From d682589fb11d5c3d4b0fa83a6ec8c44e7cc95d4e Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Fri, 4 Oct 2024 11:55:27 -0700 Subject: [PATCH] support environment variables --- Makefile | 5 +++++ README.md | 4 +++- bin/simple-responder/simple-responder.go | 12 ++++++++++++ config.example.yaml | 17 ++++++++++++++--- proxy/config.go | 1 + proxy/manager.go | 8 +++++--- 6 files changed, 40 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 7f0303c..e59f21c 100644 --- a/Makefile +++ b/Makefile @@ -19,6 +19,11 @@ linux: @echo "Building Linux binary..." GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64 +# for testing things +simple-responder: + @echo "Building simple responder" + go build -o $(BUILD_DIR)/simple-responder bin/simple-responder/simple-responder.go + # Ensure build directory exists $(BUILD_DIR): mkdir -p $(BUILD_DIR) diff --git a/README.md b/README.md index 1e63f59..8a45361 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,9 @@ healthCheckTimeout: 60 # define models models: "llama": + env: + - "CUDA_VISIBLE_DEVICES=0" + cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf" # address where llama-ser @@ -31,7 +34,6 @@ models: "qwen": cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" proxy: "http://127.0.0.1:8999" - aliases: ``` ## Testing with CURL diff --git a/bin/simple-responder/simple-responder.go b/bin/simple-responder/simple-responder.go index aea99fe..f20ea40 100644 --- a/bin/simple-responder/simple-responder.go +++ b/bin/simple-responder/simple-responder.go @@ -4,6 +4,7 @@ import ( "flag" "fmt" "net/http" + "os" ) func main() { @@ -17,7 +18,18 @@ func main() { // Set up the handler function using the provided response message http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + // Set the header to text/plain + w.Header().Set("Content-Type", "text/plain") + fmt.Fprintln(w, *responseMessage) + + // Get environment variables + envVars := os.Environ() + + // Write each environment variable to the response + for _, envVar := range envVars { + fmt.Fprintln(w, envVar) + } }) // Set up the /health endpoint handler function diff --git a/config.example.yaml b/config.example.yaml index 6bc09e4..8dc9086 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -11,10 +11,21 @@ models: aliases: - "gpt-4o-mini" "qwen": - cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf " + cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" proxy: "http://127.0.0.1:8999" aliases: - "gpt-3.5-turbo" - "broken": - cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf " + + "simple": + # example of setting environment variables + env: + - "CUDA_VISIBLE_DEVICES=0,1" + - "env1=hello" + cmd: "build/simple-responder --port 8999" proxy: "http://127.0.0.1:8999" + + # don't use this, just for testing if things are broken + "broken": + cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf" + proxy: "http://127.0.0.1:8999" + diff --git a/proxy/config.go b/proxy/config.go index 81b3d75..106b992 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -10,6 +10,7 @@ type ModelConfig struct { Cmd string `yaml:"cmd"` Proxy string `yaml:"proxy"` Aliases []string `yaml:"aliases"` + Env []string `yaml:"env"` } type Config struct { diff --git a/proxy/manager.go b/proxy/manager.go index 3221293..b33dd6f 100644 --- a/proxy/manager.go +++ b/proxy/manager.go @@ -61,6 +61,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) error { cmd := exec.Command(args[0], args[1:]...) cmd.Stdout = os.Stdout cmd.Stderr = os.Stderr + cmd.Env = modelConfig.Env + err := cmd.Start() if err != nil { return err @@ -99,9 +101,9 @@ func (pm *ProxyManager) checkHealthEndpoint() error { resp, err := client.Do(req) if err != nil { if strings.Contains(err.Error(), "connection refused") { - // llama.cpp /health endpoint commes up fast, give it 5 seconds - // happens when llama.cpp exited, keeps the code simple if TCP dial is not - // able to talk to the proxy endpoint + + // if TCP dial can't connect any HTTP response after 5 seconds + // exit quickly. if time.Since(startTime) > 5*time.Second { return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond") }