Implement Multi-Process Handling (#7)

Refactor code to support starting of multiple back end llama.cpp servers. This functionality is exposed as `profiles` to create a simple configuration format. 

Changes: 

* refactor proxy tests to get ready for multi-process support
* update proxy/ProxyManager to support multiple processes (#7)
* Add support for Groups in configuration
* improve handling of Model alias configs
* implement multi-model swapping
* improve code clarity for swapModel
* improve docs, rename groups to profiles in config
This commit is contained in:
Benson Wong
2024-11-23 19:45:13 -08:00
committed by GitHub
parent 533162ce6a
commit 73ad85ea69
10 changed files with 361 additions and 124 deletions

View File

@@ -16,12 +16,16 @@ func main() {
flag.Parse() // Parse the command-line flags
// Set up the handler function using the provided response message
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
responseMessageHandler := func(w http.ResponseWriter, r *http.Request) {
// Set the header to text/plain
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintln(w, *responseMessage)
})
}
// Set up the handler function using the provided response message
http.HandleFunc("/v1/chat/completions", responseMessageHandler)
http.HandleFunc("/v1/completions", responseMessageHandler)
http.HandleFunc("/test", responseMessageHandler)
http.HandleFunc("/env", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
@@ -43,6 +47,11 @@ func main() {
w.Write([]byte(response))
})
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintf(w, "%s %s", r.Method, r.URL.Path)
})
address := "127.0.0.1:" + *port // Address with the specified port
fmt.Printf("Server is listening on port %s\n", *port)