Implement Multi-Process Handling (#7)

Refactor code to support starting of multiple back end llama.cpp servers. This functionality is exposed as `profiles` to create a simple configuration format. Changes: * refactor proxy tests to get ready for multi-process support * update proxy/ProxyManager to support multiple processes (#7) * Add support for Groups in configuration * improve handling of Model alias configs * implement multi-model swapping * improve code clarity for swapModel * improve docs, rename groups to profiles in config
2024-11-23 19:45:13 -08:00
parent 533162ce6a
commit 73ad85ea69
10 changed files with 361 additions and 124 deletions
--- a/proxy/process_test.go
+++ b/proxy/process_test.go
@@ -1,54 +1,15 @@
 package proxy

 import (
-	"fmt"
 	"io"
-	"math/rand"
 	"net/http"
 	"net/http/httptest"
-	"os"
-	"path/filepath"
-	"runtime"
 	"testing"
 	"time"

 	"github.com/stretchr/testify/assert"
 )

-// Check if the binary exists
-func TestMain(m *testing.M) {
-	binaryPath := getBinaryPath()
-	if _, err := os.Stat(binaryPath); os.IsNotExist(err) {
-		fmt.Printf("simple-responder not found at %s, did you `make simple-responder`?\n", binaryPath)
-		os.Exit(1)
-	}
-	m.Run()
-}
-
-// Helper function to get the binary path
-func getBinaryPath() string {
-	goos := runtime.GOOS
-	goarch := runtime.GOARCH
-	return filepath.Join("..", "build", fmt.Sprintf("simple-responder_%s_%s", goos, goarch))
-}
-
-func getTestSimpleResponderConfig(expectedMessage string) ModelConfig {
-	// Define the range
-	min := 12000
-	max := 13000
-
-	// Generate a random number between 12000 and 13000
-	randomPort := rand.Intn(max-min+1) + min
-	binaryPath := getBinaryPath()
-
-	// Create a process configuration
-	return ModelConfig{
-		Cmd:           fmt.Sprintf("%s --port %d --respond '%s'", binaryPath, randomPort, expectedMessage),
-		Proxy:         fmt.Sprintf("http://127.0.0.1:%d", randomPort),
-		CheckEndpoint: "/health",
-	}
-}
-
 func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {
 	logMonitor := NewLogMonitorWriter(io.Discard)
 	expectedMessage := "testing91931"
@@ -56,7 +17,9 @@ func TestProcess_AutomaticallyStartsUpstream(t *testing.T) {

 	// Create a process
 	process := NewProcess("test-process", 5, config, logMonitor)
-	req := httptest.NewRequest("GET", "/", nil)
+	defer process.Stop()
+
+	req := httptest.NewRequest("GET", "/test", nil)
 	w := httptest.NewRecorder()

 	// process is automatically started
@@ -92,6 +55,8 @@ func TestProcess_BrokenModelConfig(t *testing.T) {
 	}

 	process := NewProcess("broken", 1, config, NewLogMonitor())
+	defer process.Stop()
+
 	req := httptest.NewRequest("GET", "/", nil)
 	w := httptest.NewRecorder()
 	process.ProxyRequest(w, req)
@@ -99,6 +64,7 @@ func TestProcess_BrokenModelConfig(t *testing.T) {
 	assert.Contains(t, w.Body.String(), "unable to start process")
 }

+// test that the process unloads after the TTL
 func TestProcess_UnloadAfterTTL(t *testing.T) {
 	if testing.Short() {
 		t.Skip("skipping long auto unload TTL test")
@@ -111,7 +77,9 @@ func TestProcess_UnloadAfterTTL(t *testing.T) {
 	assert.Equal(t, 3, config.UnloadAfter)

 	process := NewProcess("ttl", 2, config, NewLogMonitorWriter(io.Discard))
-	req := httptest.NewRequest("GET", "/", nil)
+	defer process.Stop()
+
+	req := httptest.NewRequest("GET", "/test", nil)
 	w := httptest.NewRecorder()

 	// Proxy the request (auto start)