support environment variables

2024-10-04 11:55:27 -07:00
parent 43119e807f
commit d682589fb1
6 changed files with 40 additions and 7 deletions
--- a/5
+++ b/5
@@ -19,6 +19,11 @@ linux:
 	@echo "Building Linux binary..."
 	GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64

+# for testing things
+simple-responder:
+	@echo "Building simple responder"
+	go build -o $(BUILD_DIR)/simple-responder bin/simple-responder/simple-responder.go
+
 # Ensure build directory exists
 $(BUILD_DIR):
 	mkdir -p $(BUILD_DIR)
--- a/README.md
+++ b/README.md
@@ -18,6 +18,9 @@ healthCheckTimeout: 60
 # define models
 models:
  "llama":
+    env:
+      - "CUDA_VISIBLE_DEVICES=0"
+
    cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf"

    # address where llama-ser
@@ -31,7 +34,6 @@ models:
  "qwen":
    cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
    proxy: "http://127.0.0.1:8999"
-    aliases:
 ```

 ## Testing with CURL
--- a/bin/simple-responder/simple-responder.go
+++ b/bin/simple-responder/simple-responder.go
@@ -4,6 +4,7 @@ import (
 	"flag"
 	"fmt"
 	"net/http"
+	"os"
 )

 func main() {
@@ -17,7 +18,18 @@ func main() {

 	// Set up the handler function using the provided response message
 	http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
+		// Set the header to text/plain
+		w.Header().Set("Content-Type", "text/plain")
+
 		fmt.Fprintln(w, *responseMessage)
+
+		// Get environment variables
+		envVars := os.Environ()
+
+		// Write each environment variable to the response
+		for _, envVar := range envVars {
+			fmt.Fprintln(w, envVar)
+		}
 	})

 	// Set up the /health endpoint handler function
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -11,10 +11,21 @@ models:
    aliases:
    - "gpt-4o-mini"
  "qwen":
-    cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf "
+    cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
    proxy: "http://127.0.0.1:8999"
    aliases:
    - "gpt-3.5-turbo"
-  "broken":
-    cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf "
+
+  "simple":
+    # example of setting environment variables
+    env:
+      - "CUDA_VISIBLE_DEVICES=0,1"
+      - "env1=hello"
+    cmd: "build/simple-responder --port 8999"
    proxy: "http://127.0.0.1:8999"
+
+  # don't use this, just for testing if things are broken
+  "broken":
+    cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
+    proxy: "http://127.0.0.1:8999"
+
--- a/proxy/config.go
+++ b/proxy/config.go
@@ -10,6 +10,7 @@ type ModelConfig struct {
 	Cmd     string   `yaml:"cmd"`
 	Proxy   string   `yaml:"proxy"`
 	Aliases []string `yaml:"aliases"`
+	Env     []string `yaml:"env"`
 }

 type Config struct {
--- a/proxy/manager.go
+++ b/proxy/manager.go
@@ -61,6 +61,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
 	cmd := exec.Command(args[0], args[1:]...)
 	cmd.Stdout = os.Stdout
 	cmd.Stderr = os.Stderr
+	cmd.Env = modelConfig.Env
+
 	err := cmd.Start()
 	if err != nil {
 		return err
@@ -99,9 +101,9 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
 		resp, err := client.Do(req)
 		if err != nil {
 			if strings.Contains(err.Error(), "connection refused") {
-				// llama.cpp /health endpoint commes up fast, give it 5 seconds
-				// happens when llama.cpp exited, keeps the code simple if TCP dial is not
-				// able to talk to the proxy endpoint
+
+				// if TCP dial can't connect any HTTP response after 5 seconds
+				// exit quickly.
 				if time.Since(startTime) > 5*time.Second {
 					return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
 				}