From c867a6c9a2e8b3158ea0099fc849b96dd5d7308e Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Mon, 30 Jun 2025 23:02:44 -0700 Subject: [PATCH] Add name and description to v1/models list (#179) * Add support for name and description in v1/models list * add configuration example for name and description --- config.example.yaml | 14 +++++++++++++- proxy/config.go | 6 ++++++ proxy/config_posix_test.go | 4 ++++ proxy/proxymanager.go | 36 ++++++++++++++++++++++-------------- proxy/proxymanager_test.go | 30 ++++++++++++++++++++++++++++-- 5 files changed, 73 insertions(+), 17 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index e2fe99b..3090799 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -49,7 +49,19 @@ models: cmd: | # ${latest-llama} is a macro that is defined above ${latest-llama} - --model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf + --model path/to/llama-8B-Q4_K_M.gguf + + # name: a display name for the model + # - optional, default: empty string + # - if set, it will be used in the v1/models API response + # - if not set, it will be omitted in the JSON model record + name: "llama 3.1 8B" + + # description: a description for the model + # - optional, default: empty string + # - if set, it will be used in the v1/models API response + # - if not set, it will be omitted in the JSON model record + description: "A small but capable model used for quick testing" # env: define an array of environment variables to inject into cmd's environment # - optional, default: empty array diff --git a/proxy/config.go b/proxy/config.go index e9821e9..9a6dba9 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -28,6 +28,10 @@ type ModelConfig struct { Unlisted bool `yaml:"unlisted"` UseModelName string `yaml:"useModelName"` + // #179 for /v1/models + Name string `yaml:"name"` + Description string `yaml:"description"` + // Limit concurrency of HTTP requests to process ConcurrencyLimit int `yaml:"concurrencyLimit"` @@ -48,6 +52,8 @@ func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error { Unlisted: false, UseModelName: "", ConcurrencyLimit: 0, + Name: "", + Description: "", } // the default cmdStop to taskkill /f /t /pid ${PID} diff --git a/proxy/config_posix_test.go b/proxy/config_posix_test.go index 4a62a99..91d12a0 100644 --- a/proxy/config_posix_test.go +++ b/proxy/config_posix_test.go @@ -104,6 +104,8 @@ models: model1: cmd: path/to/cmd --arg1 one proxy: "http://localhost:8080" + name: "Model 1" + description: "This is model 1" aliases: - "m1" - "model-one" @@ -168,6 +170,8 @@ groups: Aliases: []string{"m1", "model-one"}, Env: []string{"VAR1=value1", "VAR2=value2"}, CheckEndpoint: "/health", + Name: "Model 1", + Description: "This is model 1", }, "model2": { Cmd: "path/to/server --arg1 one", diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 268458b..bbb8101 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -2,7 +2,6 @@ package proxy import ( "bytes" - "encoding/json" "fmt" "io" "mime/multipart" @@ -289,32 +288,41 @@ func (pm *ProxyManager) swapProcessGroup(requestedModel string) (*ProcessGroup, } func (pm *ProxyManager) listModelsHandler(c *gin.Context) { - data := []interface{}{} + data := make([]gin.H, 0, len(pm.config.Models)) + createdTime := time.Now().Unix() + for id, modelConfig := range pm.config.Models { if modelConfig.Unlisted { continue } - data = append(data, map[string]interface{}{ + record := gin.H{ "id": id, "object": "model", - "created": time.Now().Unix(), + "created": createdTime, "owned_by": "llama-swap", - }) + } + + if name := strings.TrimSpace(modelConfig.Name); name != "" { + record["name"] = name + } + if desc := strings.TrimSpace(modelConfig.Description); desc != "" { + record["description"] = desc + } + + data = append(data, record) } - // Set the Content-Type header to application/json - c.Header("Content-Type", "application/json") - - if origin := c.Request.Header.Get("Origin"); origin != "" { + // Set CORS headers if origin exists + if origin := c.GetHeader("Origin"); origin != "" { c.Header("Access-Control-Allow-Origin", origin) } - // Encode the data as JSON and write it to the response writer - if err := json.NewEncoder(c.Writer).Encode(map[string]interface{}{"object": "list", "data": data}); err != nil { - pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error encoding JSON %s", err.Error())) - return - } + // Use gin's JSON method which handles content-type and encoding + c.JSON(http.StatusOK, gin.H{ + "object": "list", + "data": data, + }) } func (pm *ProxyManager) proxyToUpstream(c *gin.Context) { diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index d62af6a..47397f2 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -183,11 +183,20 @@ func TestProxyManager_SwapMultiProcessParallelRequests(t *testing.T) { } func TestProxyManager_ListModelsHandler(t *testing.T) { + + model1Config := getTestSimpleResponderConfig("model1") + model1Config.Name = "Model 1" + model1Config.Description = "Model 1 description is used for testing" + + model2Config := getTestSimpleResponderConfig("model2") + model2Config.Name = " " // empty whitespace only strings will get ignored + model2Config.Description = " " + config := Config{ HealthCheckTimeout: 15, Models: map[string]ModelConfig{ - "model1": getTestSimpleResponderConfig("model1"), - "model2": getTestSimpleResponderConfig("model2"), + "model1": model1Config, + "model2": model2Config, "model3": getTestSimpleResponderConfig("model3"), }, LogLevel: "error", @@ -213,6 +222,7 @@ func TestProxyManager_ListModelsHandler(t *testing.T) { var response struct { Data []map[string]interface{} `json:"data"` } + if err := json.Unmarshal(w.Body.Bytes(), &response); err != nil { t.Fatalf("Failed to parse JSON response: %v", err) } @@ -227,6 +237,7 @@ func TestProxyManager_ListModelsHandler(t *testing.T) { "model3": {}, } + // make all models for _, model := range response.Data { modelID, ok := model["id"].(string) assert.True(t, ok, "model ID should be a string") @@ -245,6 +256,21 @@ func TestProxyManager_ListModelsHandler(t *testing.T) { ownedBy, ok := model["owned_by"].(string) assert.True(t, ok, "owned_by should be a string") assert.Equal(t, "llama-swap", ownedBy) + + // check for optional name and description + if modelID == "model1" { + name, ok := model["name"].(string) + assert.True(t, ok, "name should be a string") + assert.Equal(t, "Model 1", name) + description, ok := model["description"].(string) + assert.True(t, ok, "description should be a string") + assert.Equal(t, "Model 1 description is used for testing", description) + } else { + _, exists := model["name"] + assert.False(t, exists, "unexpected name field for model: %s", modelID) + _, exists = model["description"] + assert.False(t, exists, "unexpected description field for model: %s", modelID) + } } // Ensure all expected models were returned