From 04b4760e7e70bdcfb071050d8e85aa140a449df5 Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Sun, 1 Dec 2024 09:10:50 -0800 Subject: [PATCH] change profile split character to : (colon) (#21) - change from `/` to `:` for multiple models loaded as part of a profile - breaking change now, but allows for more compatibility with other inference engines that may have model references like `coding:Qwen/Qwen-2.5-Coder-32B` --- proxy/proxymanager.go | 33 +++++++++++++++++++++------------ proxy/proxymanager_test.go | 28 +++++++++++++++++++--------- 2 files changed, 40 insertions(+), 21 deletions(-) diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 8c5f271..7994e97 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -14,6 +14,10 @@ import ( "github.com/gin-gonic/gin" ) +const ( + PROFILE_SPLIT_CHAR = ":" +) + type ProxyManager struct { sync.Mutex @@ -106,15 +110,15 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) { defer pm.Unlock() // Check if requestedModel contains a / - groupName, modelName := "", requestedModel - if idx := strings.Index(requestedModel, "/"); idx != -1 { - groupName = requestedModel[:idx] + profileName, modelName := "", requestedModel + if idx := strings.Index(requestedModel, PROFILE_SPLIT_CHAR); idx != -1 { + profileName = requestedModel[:idx] modelName = requestedModel[idx+1:] } - if groupName != "" { - if _, found := pm.config.Profiles[groupName]; !found { - return nil, fmt.Errorf("model group not found %s", groupName) + if profileName != "" { + if _, found := pm.config.Profiles[profileName]; !found { + return nil, fmt.Errorf("model group not found %s", profileName) } } @@ -125,7 +129,8 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) { } // exit early when already running, otherwise stop everything and swap - requestedProcessKey := groupName + "/" + realModelName + requestedProcessKey := ProcessKeyName(profileName, realModelName) + if process, found := pm.currentProcesses[requestedProcessKey]; found { return process, nil } @@ -133,25 +138,25 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) { // stop all running models pm.stopProcesses() - if groupName == "" { + if profileName == "" { modelConfig, modelID, found := pm.config.FindConfig(realModelName) if !found { return nil, fmt.Errorf("could not find configuration for %s", realModelName) } process := NewProcess(modelID, pm.config.HealthCheckTimeout, modelConfig, pm.logMonitor) - processKey := groupName + "/" + modelID + processKey := ProcessKeyName(profileName, modelID) pm.currentProcesses[processKey] = process } else { - for _, modelName := range pm.config.Profiles[groupName] { + for _, modelName := range pm.config.Profiles[profileName] { if realModelName, found := pm.config.RealModelName(modelName); found { modelConfig, modelID, found := pm.config.FindConfig(realModelName) if !found { - return nil, fmt.Errorf("could not find configuration for %s in group %s", realModelName, groupName) + return nil, fmt.Errorf("could not find configuration for %s in group %s", realModelName, profileName) } process := NewProcess(modelID, pm.config.HealthCheckTimeout, modelConfig, pm.logMonitor) - processKey := groupName + "/" + modelID + processKey := ProcessKeyName(profileName, modelID) pm.currentProcesses[processKey] = process } } @@ -201,3 +206,7 @@ func (pm *ProxyManager) proxyNoRouteHandler(c *gin.Context) { c.AbortWithError(http.StatusBadRequest, fmt.Errorf("no strategy to handle request")) } + +func ProcessKeyName(groupName, modelName string) string { + return groupName + PROFILE_SPLIT_CHAR + modelName +} diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index d17cdf7..7b8110c 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -33,7 +33,7 @@ func TestProxyManager_SwapProcessCorrectly(t *testing.T) { assert.Equal(t, http.StatusOK, w.Code) assert.Contains(t, w.Body.String(), modelName) - _, exists := proxy.currentProcesses["/"+modelName] + _, exists := proxy.currentProcesses[ProcessKeyName("", modelName)] assert.True(t, exists, "expected %s key in currentProcesses", modelName) } @@ -43,21 +43,31 @@ func TestProxyManager_SwapProcessCorrectly(t *testing.T) { } func TestProxyManager_SwapMultiProcess(t *testing.T) { + + model1 := "path1/model1" + model2 := "path2/model2" + + profileModel1 := ProcessKeyName("test", model1) + profileModel2 := ProcessKeyName("test", model2) + config := &Config{ HealthCheckTimeout: 15, Models: map[string]ModelConfig{ - "model1": getTestSimpleResponderConfig("model1"), - "model2": getTestSimpleResponderConfig("model2"), + model1: getTestSimpleResponderConfig("model1"), + model2: getTestSimpleResponderConfig("model2"), }, Profiles: map[string][]string{ - "test": {"model1", "model2"}, + "test": {model1, model2}, }, } proxy := New(config) defer proxy.StopProcesses() - for modelID, requestedModel := range map[string]string{"model1": "test/model1", "model2": "test/model2"} { + for modelID, requestedModel := range map[string]string{ + "model1": profileModel1, + "model2": profileModel2, + } { reqBody := fmt.Sprintf(`{"model":"%s"}`, requestedModel) req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) w := httptest.NewRecorder() @@ -69,11 +79,11 @@ func TestProxyManager_SwapMultiProcess(t *testing.T) { // make sure there's two loaded models assert.Len(t, proxy.currentProcesses, 2) - _, exists := proxy.currentProcesses["test/model1"] - assert.True(t, exists, "expected test/model1 key in currentProcesses") + _, exists := proxy.currentProcesses[profileModel1] + assert.True(t, exists, "expected "+profileModel1+" key in currentProcesses") - _, exists = proxy.currentProcesses["test/model2"] - assert.True(t, exists, "expected test/model2 key in currentProcesses") + _, exists = proxy.currentProcesses[profileModel2] + assert.True(t, exists, "expected "+profileModel2+" key in currentProcesses") } // When a request for a different model comes in ProxyManager should wait until