support environment variables

This commit is contained in:
Benson Wong
2024-10-04 11:55:27 -07:00
parent 43119e807f
commit d682589fb1
6 changed files with 40 additions and 7 deletions

View File

@@ -19,6 +19,11 @@ linux:
@echo "Building Linux binary..." @echo "Building Linux binary..."
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64 GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
# for testing things
simple-responder:
@echo "Building simple responder"
go build -o $(BUILD_DIR)/simple-responder bin/simple-responder/simple-responder.go
# Ensure build directory exists # Ensure build directory exists
$(BUILD_DIR): $(BUILD_DIR):
mkdir -p $(BUILD_DIR) mkdir -p $(BUILD_DIR)

View File

@@ -18,6 +18,9 @@ healthCheckTimeout: 60
# define models # define models
models: models:
"llama": "llama":
env:
- "CUDA_VISIBLE_DEVICES=0"
cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf" cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf"
# address where llama-ser # address where llama-ser
@@ -31,7 +34,6 @@ models:
"qwen": "qwen":
cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf" cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
proxy: "http://127.0.0.1:8999" proxy: "http://127.0.0.1:8999"
aliases:
``` ```
## Testing with CURL ## Testing with CURL

View File

@@ -4,6 +4,7 @@ import (
"flag" "flag"
"fmt" "fmt"
"net/http" "net/http"
"os"
) )
func main() { func main() {
@@ -17,7 +18,18 @@ func main() {
// Set up the handler function using the provided response message // Set up the handler function using the provided response message
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
// Set the header to text/plain
w.Header().Set("Content-Type", "text/plain")
fmt.Fprintln(w, *responseMessage) fmt.Fprintln(w, *responseMessage)
// Get environment variables
envVars := os.Environ()
// Write each environment variable to the response
for _, envVar := range envVars {
fmt.Fprintln(w, envVar)
}
}) })
// Set up the /health endpoint handler function // Set up the /health endpoint handler function

View File

@@ -11,10 +11,21 @@ models:
aliases: aliases:
- "gpt-4o-mini" - "gpt-4o-mini"
"qwen": "qwen":
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf " cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
proxy: "http://127.0.0.1:8999" proxy: "http://127.0.0.1:8999"
aliases: aliases:
- "gpt-3.5-turbo" - "gpt-3.5-turbo"
"broken":
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf " "simple":
# example of setting environment variables
env:
- "CUDA_VISIBLE_DEVICES=0,1"
- "env1=hello"
cmd: "build/simple-responder --port 8999"
proxy: "http://127.0.0.1:8999" proxy: "http://127.0.0.1:8999"
# don't use this, just for testing if things are broken
"broken":
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
proxy: "http://127.0.0.1:8999"

View File

@@ -10,6 +10,7 @@ type ModelConfig struct {
Cmd string `yaml:"cmd"` Cmd string `yaml:"cmd"`
Proxy string `yaml:"proxy"` Proxy string `yaml:"proxy"`
Aliases []string `yaml:"aliases"` Aliases []string `yaml:"aliases"`
Env []string `yaml:"env"`
} }
type Config struct { type Config struct {

View File

@@ -61,6 +61,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
cmd := exec.Command(args[0], args[1:]...) cmd := exec.Command(args[0], args[1:]...)
cmd.Stdout = os.Stdout cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr cmd.Stderr = os.Stderr
cmd.Env = modelConfig.Env
err := cmd.Start() err := cmd.Start()
if err != nil { if err != nil {
return err return err
@@ -99,9 +101,9 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
resp, err := client.Do(req) resp, err := client.Do(req)
if err != nil { if err != nil {
if strings.Contains(err.Error(), "connection refused") { if strings.Contains(err.Error(), "connection refused") {
// llama.cpp /health endpoint commes up fast, give it 5 seconds
// happens when llama.cpp exited, keeps the code simple if TCP dial is not // if TCP dial can't connect any HTTP response after 5 seconds
// able to talk to the proxy endpoint // exit quickly.
if time.Since(startTime) > 5*time.Second { if time.Since(startTime) > 5*time.Second {
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond") return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
} }