diff --git a/README.md b/README.md index 012c0ca..5ff77fc 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ Written in golang, it is very easy to install (single binary with no dependencie - ✅ llama-server (llama.cpp) supported endpoints: - `v1/rerank`, `v1/reranking`, `/rerank` - `/infill` - for code infilling + - `/completion` - for completion endpoint - ✅ llama-swap custom API endpoints - `/ui` - web UI - `/log` - remote log monitoring diff --git a/misc/simple-responder/simple-responder.go b/misc/simple-responder/simple-responder.go index 2a23987..6c65140 100644 --- a/misc/simple-responder/simple-responder.go +++ b/misc/simple-responder/simple-responder.go @@ -153,6 +153,19 @@ func main() { }) + // llama-server compatibility: /completion + r.POST("/completion", func(c *gin.Context) { + c.Header("Content-Type", "application/json") + c.JSON(http.StatusOK, gin.H{ + "responseMessage": *responseMessage, + "usage": gin.H{ + "completion_tokens": 10, + "prompt_tokens": 25, + "total_tokens": 35, + }, + }) + }) + // issue #41 r.POST("/v1/audio/transcriptions", func(c *gin.Context) { // Parse the multipart form diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 9b1d49b..67da376 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -203,6 +203,9 @@ func (pm *ProxyManager) setupGinEngine() { // llama-server's /infill endpoint for code infilling pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler) + // llama-server's /completion endpoint + pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler) + // Support audio/speech endpoint pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler) pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler) diff --git a/proxy/proxymanager_test.go b/proxy/proxymanager_test.go index f8ba25f..7de9b66 100644 --- a/proxy/proxymanager_test.go +++ b/proxy/proxymanager_test.go @@ -833,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) { assert.Equal(t, "OK", rec.Body.String()) } +// Ensure the custom llama-server /completion endpoint proxies correctly +func TestProxyManager_CompletionEndpoint(t *testing.T) { + config := AddDefaultGroupToConfig(Config{ + HealthCheckTimeout: 15, + Models: map[string]ModelConfig{ + "model1": getTestSimpleResponderConfig("model1"), + }, + LogLevel: "error", + }) + + proxy := New(config) + defer proxy.StopProcesses(StopWaitForInflightRequest) + + reqBody := `{"model":"model1"}` + req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody)) + w := httptest.NewRecorder() + + proxy.ServeHTTP(w, req) + assert.Equal(t, http.StatusOK, w.Code) + assert.Contains(t, w.Body.String(), "model1") +} + func TestProxyManager_StartupHooks(t *testing.T) { // using real YAML as the configuration has gotten more complex