support environment variables
This commit is contained in:
5
Makefile
5
Makefile
@@ -19,6 +19,11 @@ linux:
|
|||||||
@echo "Building Linux binary..."
|
@echo "Building Linux binary..."
|
||||||
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
GOOS=linux GOARCH=amd64 go build -o $(BUILD_DIR)/$(APP_NAME)-linux-amd64
|
||||||
|
|
||||||
|
# for testing things
|
||||||
|
simple-responder:
|
||||||
|
@echo "Building simple responder"
|
||||||
|
go build -o $(BUILD_DIR)/simple-responder bin/simple-responder/simple-responder.go
|
||||||
|
|
||||||
# Ensure build directory exists
|
# Ensure build directory exists
|
||||||
$(BUILD_DIR):
|
$(BUILD_DIR):
|
||||||
mkdir -p $(BUILD_DIR)
|
mkdir -p $(BUILD_DIR)
|
||||||
|
|||||||
@@ -18,6 +18,9 @@ healthCheckTimeout: 60
|
|||||||
# define models
|
# define models
|
||||||
models:
|
models:
|
||||||
"llama":
|
"llama":
|
||||||
|
env:
|
||||||
|
- "CUDA_VISIBLE_DEVICES=0"
|
||||||
|
|
||||||
cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf"
|
cmd: "llama-server --port 8999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf"
|
||||||
|
|
||||||
# address where llama-ser
|
# address where llama-ser
|
||||||
@@ -31,7 +34,6 @@ models:
|
|||||||
"qwen":
|
"qwen":
|
||||||
cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
|
cmd: "llama-server --port 8999 -m path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
|
||||||
proxy: "http://127.0.0.1:8999"
|
proxy: "http://127.0.0.1:8999"
|
||||||
aliases:
|
|
||||||
```
|
```
|
||||||
|
|
||||||
## Testing with CURL
|
## Testing with CURL
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ import (
|
|||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"os"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
@@ -17,7 +18,18 @@ func main() {
|
|||||||
|
|
||||||
// Set up the handler function using the provided response message
|
// Set up the handler function using the provided response message
|
||||||
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
// Set the header to text/plain
|
||||||
|
w.Header().Set("Content-Type", "text/plain")
|
||||||
|
|
||||||
fmt.Fprintln(w, *responseMessage)
|
fmt.Fprintln(w, *responseMessage)
|
||||||
|
|
||||||
|
// Get environment variables
|
||||||
|
envVars := os.Environ()
|
||||||
|
|
||||||
|
// Write each environment variable to the response
|
||||||
|
for _, envVar := range envVars {
|
||||||
|
fmt.Fprintln(w, envVar)
|
||||||
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
// Set up the /health endpoint handler function
|
// Set up the /health endpoint handler function
|
||||||
|
|||||||
@@ -11,10 +11,21 @@ models:
|
|||||||
aliases:
|
aliases:
|
||||||
- "gpt-4o-mini"
|
- "gpt-4o-mini"
|
||||||
"qwen":
|
"qwen":
|
||||||
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf "
|
cmd: "models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf"
|
||||||
proxy: "http://127.0.0.1:8999"
|
proxy: "http://127.0.0.1:8999"
|
||||||
aliases:
|
aliases:
|
||||||
- "gpt-3.5-turbo"
|
- "gpt-3.5-turbo"
|
||||||
"broken":
|
|
||||||
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf "
|
"simple":
|
||||||
|
# example of setting environment variables
|
||||||
|
env:
|
||||||
|
- "CUDA_VISIBLE_DEVICES=0,1"
|
||||||
|
- "env1=hello"
|
||||||
|
cmd: "build/simple-responder --port 8999"
|
||||||
proxy: "http://127.0.0.1:8999"
|
proxy: "http://127.0.0.1:8999"
|
||||||
|
|
||||||
|
# don't use this, just for testing if things are broken
|
||||||
|
"broken":
|
||||||
|
cmd: "models/llama-server-osx --port 8999 -m models/doesnotexist.gguf"
|
||||||
|
proxy: "http://127.0.0.1:8999"
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ type ModelConfig struct {
|
|||||||
Cmd string `yaml:"cmd"`
|
Cmd string `yaml:"cmd"`
|
||||||
Proxy string `yaml:"proxy"`
|
Proxy string `yaml:"proxy"`
|
||||||
Aliases []string `yaml:"aliases"`
|
Aliases []string `yaml:"aliases"`
|
||||||
|
Env []string `yaml:"env"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type Config struct {
|
type Config struct {
|
||||||
|
|||||||
@@ -61,6 +61,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) error {
|
|||||||
cmd := exec.Command(args[0], args[1:]...)
|
cmd := exec.Command(args[0], args[1:]...)
|
||||||
cmd.Stdout = os.Stdout
|
cmd.Stdout = os.Stdout
|
||||||
cmd.Stderr = os.Stderr
|
cmd.Stderr = os.Stderr
|
||||||
|
cmd.Env = modelConfig.Env
|
||||||
|
|
||||||
err := cmd.Start()
|
err := cmd.Start()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
@@ -99,9 +101,9 @@ func (pm *ProxyManager) checkHealthEndpoint() error {
|
|||||||
resp, err := client.Do(req)
|
resp, err := client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if strings.Contains(err.Error(), "connection refused") {
|
if strings.Contains(err.Error(), "connection refused") {
|
||||||
// llama.cpp /health endpoint commes up fast, give it 5 seconds
|
|
||||||
// happens when llama.cpp exited, keeps the code simple if TCP dial is not
|
// if TCP dial can't connect any HTTP response after 5 seconds
|
||||||
// able to talk to the proxy endpoint
|
// exit quickly.
|
||||||
if time.Since(startTime) > 5*time.Second {
|
if time.Since(startTime) > 5*time.Second {
|
||||||
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
|
return fmt.Errorf("/healthy endpoint took more than 5 seconds to respond")
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user