Implement Multi-Process Handling (#7)

Refactor code to support starting of multiple back end llama.cpp servers. This functionality is exposed as `profiles` to create a simple configuration format. Changes: * refactor proxy tests to get ready for multi-process support * update proxy/ProxyManager to support multiple processes (#7) * Add support for Groups in configuration * improve handling of Model alias configs * implement multi-model swapping * improve code clarity for swapModel * improve docs, rename groups to profiles in config
2024-11-23 19:45:13 -08:00
parent 533162ce6a
commit 73ad85ea69
10 changed files with 361 additions and 124 deletions
--- a/proxy/proxymanager_test.go
+++ b/proxy/proxymanager_test.go
@@ -0,0 +1,76 @@
+package proxy
+
+import (
+	"bytes"
+	"fmt"
+	"net/http"
+	"net/http/httptest"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestProxyManager_SwapProcessCorrectly(t *testing.T) {
+	config := &Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": getTestSimpleResponderConfig("model1"),
+			"model2": getTestSimpleResponderConfig("model2"),
+		},
+	}
+
+	proxy := New(config)
+	defer proxy.StopProcesses()
+
+	for _, modelName := range []string{"model1", "model2"} {
+		reqBody := fmt.Sprintf(`{"model":"%s"}`, modelName)
+		req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
+		w := httptest.NewRecorder()
+
+		proxy.HandlerFunc(w, req)
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Contains(t, w.Body.String(), modelName)
+
+		_, exists := proxy.currentProcesses["/"+modelName]
+		assert.True(t, exists, "expected %s key in currentProcesses", modelName)
+
+	}
+
+	// make sure there's only one loaded model
+	assert.Len(t, proxy.currentProcesses, 1)
+}
+
+func TestProxyManager_SwapMultiProcess(t *testing.T) {
+	config := &Config{
+		HealthCheckTimeout: 15,
+		Models: map[string]ModelConfig{
+			"model1": getTestSimpleResponderConfig("model1"),
+			"model2": getTestSimpleResponderConfig("model2"),
+		},
+		Profiles: map[string][]string{
+			"test": {"model1", "model2"},
+		},
+	}
+
+	proxy := New(config)
+	defer proxy.StopProcesses()
+
+	for modelID, requestedModel := range map[string]string{"model1": "test/model1", "model2": "test/model2"} {
+		reqBody := fmt.Sprintf(`{"model":"%s"}`, requestedModel)
+		req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
+		w := httptest.NewRecorder()
+
+		proxy.HandlerFunc(w, req)
+		assert.Equal(t, http.StatusOK, w.Code)
+		assert.Contains(t, w.Body.String(), modelID)
+	}
+
+	// make sure there's two loaded models
+	assert.Len(t, proxy.currentProcesses, 2)
+	_, exists := proxy.currentProcesses["test/model1"]
+	assert.True(t, exists, "expected test/model1 key in currentProcesses")
+
+	_, exists = proxy.currentProcesses["test/model2"]
+	assert.True(t, exists, "expected test/model2 key in currentProcesses")
+
+}