add /completion endpoint (#275)
* feat: add /completion endpoint * chore: reformat using gofmt
This commit is contained in:
@@ -23,6 +23,7 @@ Written in golang, it is very easy to install (single binary with no dependencie
|
|||||||
- ✅ llama-server (llama.cpp) supported endpoints:
|
- ✅ llama-server (llama.cpp) supported endpoints:
|
||||||
- `v1/rerank`, `v1/reranking`, `/rerank`
|
- `v1/rerank`, `v1/reranking`, `/rerank`
|
||||||
- `/infill` - for code infilling
|
- `/infill` - for code infilling
|
||||||
|
- `/completion` - for completion endpoint
|
||||||
- ✅ llama-swap custom API endpoints
|
- ✅ llama-swap custom API endpoints
|
||||||
- `/ui` - web UI
|
- `/ui` - web UI
|
||||||
- `/log` - remote log monitoring
|
- `/log` - remote log monitoring
|
||||||
|
|||||||
@@ -153,6 +153,19 @@ func main() {
|
|||||||
|
|
||||||
})
|
})
|
||||||
|
|
||||||
|
// llama-server compatibility: /completion
|
||||||
|
r.POST("/completion", func(c *gin.Context) {
|
||||||
|
c.Header("Content-Type", "application/json")
|
||||||
|
c.JSON(http.StatusOK, gin.H{
|
||||||
|
"responseMessage": *responseMessage,
|
||||||
|
"usage": gin.H{
|
||||||
|
"completion_tokens": 10,
|
||||||
|
"prompt_tokens": 25,
|
||||||
|
"total_tokens": 35,
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
||||||
// issue #41
|
// issue #41
|
||||||
r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
|
r.POST("/v1/audio/transcriptions", func(c *gin.Context) {
|
||||||
// Parse the multipart form
|
// Parse the multipart form
|
||||||
|
|||||||
@@ -203,6 +203,9 @@ func (pm *ProxyManager) setupGinEngine() {
|
|||||||
// llama-server's /infill endpoint for code infilling
|
// llama-server's /infill endpoint for code infilling
|
||||||
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
|
pm.ginEngine.POST("/infill", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
|
// llama-server's /completion endpoint
|
||||||
|
pm.ginEngine.POST("/completion", mm, pm.proxyOAIHandler)
|
||||||
|
|
||||||
// Support audio/speech endpoint
|
// Support audio/speech endpoint
|
||||||
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
pm.ginEngine.POST("/v1/audio/speech", pm.proxyOAIHandler)
|
||||||
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
pm.ginEngine.POST("/v1/audio/transcriptions", pm.proxyOAIPostFormHandler)
|
||||||
|
|||||||
@@ -833,6 +833,28 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
|
|||||||
assert.Equal(t, "OK", rec.Body.String())
|
assert.Equal(t, "OK", rec.Body.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Ensure the custom llama-server /completion endpoint proxies correctly
|
||||||
|
func TestProxyManager_CompletionEndpoint(t *testing.T) {
|
||||||
|
config := AddDefaultGroupToConfig(Config{
|
||||||
|
HealthCheckTimeout: 15,
|
||||||
|
Models: map[string]ModelConfig{
|
||||||
|
"model1": getTestSimpleResponderConfig("model1"),
|
||||||
|
},
|
||||||
|
LogLevel: "error",
|
||||||
|
})
|
||||||
|
|
||||||
|
proxy := New(config)
|
||||||
|
defer proxy.StopProcesses(StopWaitForInflightRequest)
|
||||||
|
|
||||||
|
reqBody := `{"model":"model1"}`
|
||||||
|
req := httptest.NewRequest("POST", "/completion", bytes.NewBufferString(reqBody))
|
||||||
|
w := httptest.NewRecorder()
|
||||||
|
|
||||||
|
proxy.ServeHTTP(w, req)
|
||||||
|
assert.Equal(t, http.StatusOK, w.Code)
|
||||||
|
assert.Contains(t, w.Body.String(), "model1")
|
||||||
|
}
|
||||||
|
|
||||||
func TestProxyManager_StartupHooks(t *testing.T) {
|
func TestProxyManager_StartupHooks(t *testing.T) {
|
||||||
|
|
||||||
// using real YAML as the configuration has gotten more complex
|
// using real YAML as the configuration has gotten more complex
|
||||||
|
|||||||
Reference in New Issue
Block a user