Implement Multi-Process Handling (#7)

Refactor code to support starting of multiple back end llama.cpp servers. This functionality is exposed as `profiles` to create a simple configuration format. 

Changes: 

* refactor proxy tests to get ready for multi-process support
* update proxy/ProxyManager to support multiple processes (#7)
* Add support for Groups in configuration
* improve handling of Model alias configs
* implement multi-model swapping
* improve code clarity for swapModel
* improve docs, rename groups to profiles in config
This commit is contained in:
Benson Wong
2024-11-23 19:45:13 -08:00
committed by GitHub
parent 533162ce6a
commit 73ad85ea69
10 changed files with 361 additions and 124 deletions

View File

@@ -1,54 +1,15 @@
package proxy
import (
"fmt"
"io"
"math/rand"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"runtime"
"testing"
"time"
"github.com/stretchr/testify/assert"
)
// Check if the binary exists
func TestMain(m *testing.M) {
binaryPath := getBinaryPath()
if _, err := os.Stat(binaryPath); os.IsNotExist(err) {
fmt.Printf("simple-responder not found at %s, did you `make simple-responder`?\n", binaryPath)
os.Exit(1)
}
m.Run()
}
// Helper function to get the binary path
func getBinaryPath() string {
goos := runtime.GOOS
goarch := runtime.GOARCH
return filepath.Join("..", "build", fmt.Sprintf("simple-responder_%s_%s", goos, goarch))
}
func getTestSimpleResponderConfig(expectedMessage string) ModelConfig {
// Define the range
min := 12000
max := 13000
// Generate a random number between 12000 and 13000
randomPort := rand.Intn(max-min+1) + min
binaryPath := getBinaryPath()
// Create a process configuration
return ModelConfig{
Cmd: fmt.Sprintf("%s --port %d --respond '%s'", binaryPath, randomPort, expectedMessage),
Proxy: fmt.Sprintf("http://127.0.0.1:%d", randomPort),
CheckEndpoint: "/health",
}
}
func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
logMonitor := NewLogMonitorWriter(io.Discard)
expectedMessage := "testing91931"
@@ -56,7 +17,9 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
// Create a process
process := NewProcess("test-process", 5, config, logMonitor)
req := httptest.NewRequest("GET", "/", nil)
defer process.Stop()
req := httptest.NewRequest("GET", "/test", nil)
w := httptest.NewRecorder()
// process is automatically started
@@ -92,6 +55,8 @@ func TestProcess_BrokenModelConfig(t *testing.T) {
}
process := NewProcess("broken", 1, config, NewLogMonitor())
defer process.Stop()
req := httptest.NewRequest("GET", "/", nil)
w := httptest.NewRecorder()
process.ProxyRequest(w, req)
@@ -99,6 +64,7 @@ func TestProcess_BrokenModelConfig(t *testing.T) {
assert.Contains(t, w.Body.String(), "unable to start process")
}
// test that the process unloads after the TTL
func TestProcess_UnloadAfterTTL(t *testing.T) {
if testing.Short() {
t.Skip("skipping long auto unload TTL test")
@@ -111,7 +77,9 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
assert.Equal(t, 3, config.UnloadAfter)
process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
req := httptest.NewRequest("GET", "/", nil)
defer process.Stop()
req := httptest.NewRequest("GET", "/test", nil)
w := httptest.NewRecorder()
// Proxy the request (auto start)