diff --git a/README.md b/README.md index eeb7a36..9755fd3 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,7 @@ Written in golang, it is very easy to install (single binary with no dependancie - ✅ Remote log monitoring at `/log` - ✅ Direct access to upstream HTTP server via `/upstream/:model_id` ([demo](https://github.com/mostlygeek/llama-swap/pull/31)) - ✅ Manually unload models via `/unload` endpoint ([#58](https://github.com/mostlygeek/llama-swap/issues/58)) +- ✅ Check current monitoring state via `/running` endpoint ([#61](https://github.com/mostlygeek/llama-swap/issues/61)) - ✅ Automatic unloading of models after timeout by setting a `ttl` - ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabbyAPI, etc) - ✅ Docker and Podman support diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 2b2c49e..1cdffa2 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -108,6 +108,8 @@ func New(config *Config) *ProxyManager { pm.ginEngine.GET("/unload", pm.unloadAllModelsHandler) + pm.ginEngine.GET("/running", pm.listRunningProcessesHandler) + pm.ginEngine.GET("/", func(c *gin.Context) { // Set the Content-Type header to text/html c.Header("Content-Type", "text/html") @@ -387,6 +389,28 @@ func (pm *ProxyManager) unloadAllModelsHandler(c *gin.Context) { c.String(http.StatusOK, "OK") } +func (pm *ProxyManager) listRunningProcessesHandler(context *gin.Context) { + context.Header("Content-Type", "application/json") + runningProcesses := make([]gin.H, 0) // Default to an empty response. + + for _, process := range pm.currentProcesses { + + // Append the process ID and State (multiple entries if profiles are being used). + runningProcesses = append(runningProcesses, gin.H{ + "model": process.ID, + "state": process.state, + }) + + } + + // Put the results under the `running` key. + response := gin.H{ + "running": runningProcesses, + } + + context.JSON(http.StatusOK, response) // Always return 200 OK +} + func ProcessKeyName(groupName, modelName string) string { return groupName + PROFILE_SPLIT_CHAR + modelName } diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index 7388242..d09ccf2 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -349,3 +349,114 @@ func TestProxyManager_StripProfileSlug(t *testing.T) { assert.Equal(t, http.StatusOK, w.Code) assert.Contains(t, w.Body.String(), "ok") } + +// Test issue #61 `Listing the current list of models and the loaded model.` +func TestProxyManager_RunningEndpoint(t *testing.T) { + + // Shared configuration + config := &Config{ + HealthCheckTimeout: 15, + Models: map[string]ModelConfig{ + "model1": getTestSimpleResponderConfig("model1"), + "model2": getTestSimpleResponderConfig("model2"), + }, + Profiles: map[string][]string{ + "test": {"model1", "model2"}, + }, + } + + // Define a helper struct to parse the JSON response. + type RunningResponse struct { + Running []struct { + Model string `json:"model"` + State string `json:"state"` + } `json:"running"` + } + + // Create proxy once for all tests + proxy := New(config) + defer proxy.StopProcesses() + + t.Run("no models loaded", func(t *testing.T) { + req := httptest.NewRequest("GET", "/running", nil) + w := httptest.NewRecorder() + proxy.HandlerFunc(w, req) + + assert.Equal(t, http.StatusOK, w.Code) + + var response RunningResponse + + // Check if this is a valid JSON object. + assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response)) + + // We should have an empty running array here. + assert.Empty(t, response.Running, "expected no running models") + }) + + t.Run("single model loaded", func(t *testing.T) { + // Load just a model. + reqBody := `{"model":"model1"}` + req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) + w := httptest.NewRecorder() + proxy.HandlerFunc(w, req) + assert.Equal(t, http.StatusOK, w.Code) + + // Simulate browser call for the `/running` endpoint. + req = httptest.NewRequest("GET", "/running", nil) + w = httptest.NewRecorder() + proxy.HandlerFunc(w, req) + + var response RunningResponse + assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response)) + + // Check if we have a single array element. + assert.Len(t, response.Running, 1) + + // Is this the right model? + assert.Equal(t, "model1", response.Running[0].Model) + + // Is the model loaded? + assert.Equal(t, "ready", response.Running[0].State) + }) + + t.Run("multiple models via profile", func(t *testing.T) { + // Load more than one model. + for _, model := range []string{"model1", "model2"} { + profileModel := ProcessKeyName("test", model) + reqBody := fmt.Sprintf(`{"model":"%s"}`, profileModel) + req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody)) + w := httptest.NewRecorder() + proxy.HandlerFunc(w, req) + assert.Equal(t, http.StatusOK, w.Code) + } + + // Simulate the browser call. + req := httptest.NewRequest("GET", "/running", nil) + w := httptest.NewRecorder() + proxy.HandlerFunc(w, req) + + var response RunningResponse + + // The JSON response must be valid. + assert.NoError(t, json.Unmarshal(w.Body.Bytes(), &response)) + + // The response should contain 2 models. + assert.Len(t, response.Running, 2) + + expectedModels := map[string]struct{}{ + "model1": {}, + "model2": {}, + } + + // Iterate through the models and check their states as well. + for _, entry := range response.Running { + _, exists := expectedModels[entry.Model] + assert.True(t, exists, "unexpected model %s", entry.Model) + assert.Equal(t, "ready", entry.State) + delete(expectedModels, entry.Model) + } + + // Since we deleted each model while testing for its validity we should have no more models in the response. + assert.Empty(t, expectedModels, "unexpected additional models in response") + }) +}