add /completion endpoint (#275)

* feat: add /completion endpoint
* chore: reformat using gofmt
This commit is contained in:
Yandrik
2025-08-29 06:41:02 +02:00
committed by GitHub
parent 52b329f7bc
commit 977f1856bb
4 changed files with 39 additions and 0 deletions

View File

@@ -23,6 +23,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
- ✅ llama-server (llama.cpp) supported endpoints: - ✅ llama-server (llama.cpp) supported endpoints:
- `v1/rerank`, `v1/reranking`, `/rerank` - `v1/rerank`, `v1/reranking`, `/rerank`
- `/infill` - for code infilling - `/infill` - for code infilling
- `/completion` - for completion endpoint
- ✅ llama-swap custom API endpoints - ✅ llama-swap custom API endpoints
- `/ui` - web UI - `/ui` - web UI
- `/log` - remote log monitoring - `/log` - remote log monitoring

View File

@@ -153,6 +153,19 @@ func main() {
}) })
// llama-server compatibility: /completion
r.POST("/completion", func(c *gin.Context) {
c.Header("Content-Type", "application/json")
c.JSON(http.StatusOK, gin.H{
"responseMessage": *responseMessage,
"usage": gin.H{
"completion_tokens": 10,
"prompt_tokens": 25,
"total_tokens": 35,
},
})
})
// issue #41 // issue #41
r.POST("/v1/audio/transcriptions", func(c *gin.Context) { r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
// Parse the multipart form // Parse the multipart form

View File

@@ -203,6 +203,9 @@ func (pm *ProxyManager) setupGinEngine() {
// llama-server's /infill endpoint for code infilling // llama-server's /infill endpoint for code infilling
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler) pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
// llama-server's /completion endpoint
pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
// Support audio/speech endpoint // Support audio/speech endpoint
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler) pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler) pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)

View File

@@ -833,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
assert.Equal(t, "OK", rec.Body.String()) assert.Equal(t, "OK", rec.Body.String())
} }
// Ensure the custom llama-server /completion endpoint proxies correctly
func TestProxyManager_CompletionEndpoint(t *testing.T) {
config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
"model1": getTestSimpleResponderConfig("model1"),
},
LogLevel: "error",
})
proxy := New(config)
defer proxy.StopProcesses(StopWaitForInflightRequest)
reqBody := `{"model":"model1"}`
req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
w := httptest.NewRecorder()
proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)
assert.Contains(t, w.Body.String(), "model1")
}
func TestProxyManager_StartupHooks(t *testing.T) { func TestProxyManager_StartupHooks(t *testing.T) {
// using real YAML as the configuration has gotten more complex // using real YAML as the configuration has gotten more complex