Add /upstream endpoint (#30)
* remove catch-all route to upstream proxy (it was broken anyways) * add /upstream/:model_id to swap and route to upstream path * add /upstream HTML endpoint and unlisted option * add /upstream endpoint to show a list of available models * add `unlisted` configuration option to omit a model from /v1/models and /upstream lists * add favicon.ico
This commit is contained in:
11
README.md
11
README.md
@@ -8,7 +8,7 @@ llama-swap is an OpenAI API compatible server that gives you complete control ov
|
||||
Features:
|
||||
|
||||
- ✅ Easy to deploy: single binary with no dependencies
|
||||
- ✅ Single yaml configuration file
|
||||
- ✅ Easy to config: single yaml file
|
||||
- ✅ On-demand model switching
|
||||
- ✅ Full control over server settings per model
|
||||
- ✅ OpenAI API support (`v1/completions` and `v1/chat/completions`)
|
||||
@@ -16,7 +16,8 @@ Features:
|
||||
- ✅ Run multiple models at once with `profiles`
|
||||
- ✅ Remote log monitoring at `/log`
|
||||
- ✅ Automatic unloading of models from GPUs after timeout
|
||||
- ✅ Use any local server that provides an OpenAI compatible API (llama.cpp, vllm, tabblyAPI, etc)
|
||||
- ✅ Use any local OpenAI compatible server (llama.cpp, vllm, tabblyAPI, etc)
|
||||
- ✅ Direct access to proxied upstream HTTP server via `/upstream/:model_id`
|
||||
|
||||
## Releases
|
||||
|
||||
@@ -73,6 +74,12 @@ models:
|
||||
--model path/to/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
|
||||
# unlisted models do not show up in /v1/models or /upstream lists
|
||||
# but they can still be requested as normal
|
||||
"qwen-unlisted":
|
||||
cmd: llama-server --port 9999 -m Llama-3.2-1B-Instruct-Q4_K_M.gguf -ngl 0
|
||||
unlisted: true
|
||||
|
||||
# profiles make it easy to managing multi model (and gpu) configurations.
|
||||
#
|
||||
# Tips:
|
||||
|
||||
@@ -33,6 +33,7 @@ models:
|
||||
- env1=hello
|
||||
cmd: build/simple-responder --port 8999
|
||||
proxy: http://127.0.0.1:8999
|
||||
unlisted: true
|
||||
|
||||
# use "none" to skip check. Caution this may cause some requests to fail
|
||||
# until the upstream server is ready for traffic
|
||||
@@ -42,9 +43,11 @@ models:
|
||||
"broken":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
unlisted: true
|
||||
"broken_timeout":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
||||
proxy: http://127.0.0.1:9000
|
||||
unlisted: true
|
||||
|
||||
# creating a coding profile with models for code generation and general questions
|
||||
profiles:
|
||||
|
||||
BIN
misc/assets/favicon-raw.png
Normal file
BIN
misc/assets/favicon-raw.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 51 KiB |
@@ -16,6 +16,7 @@ type ModelConfig struct {
|
||||
Env []string `yaml:"env"`
|
||||
CheckEndpoint string `yaml:"checkEndpoint"`
|
||||
UnloadAfter int `yaml:"ttl"`
|
||||
Unlisted bool `yaml:"unlisted"`
|
||||
}
|
||||
|
||||
func (m *ModelConfig) SanitizedCommand() ([]string, error) {
|
||||
|
||||
BIN
proxy/html/favicon.ico
Normal file
BIN
proxy/html/favicon.ico
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 15 KiB |
@@ -2,10 +2,12 @@ package proxy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"embed"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
@@ -18,6 +20,15 @@ const (
|
||||
PROFILE_SPLIT_CHAR = ":"
|
||||
)
|
||||
|
||||
//go:embed html/favicon.ico
|
||||
var faviconData []byte
|
||||
|
||||
//go:embed html/logs.html
|
||||
var logsHTML []byte
|
||||
|
||||
// make sure embed is kept there by the IDE auto-package importer
|
||||
var _ = embed.FS{}
|
||||
|
||||
type ProxyManager struct {
|
||||
sync.Mutex
|
||||
|
||||
@@ -48,7 +59,12 @@ func New(config *Config) *ProxyManager {
|
||||
pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
|
||||
pm.ginEngine.GET("/logs/streamSSE", pm.streamLogsHandlerSSE)
|
||||
|
||||
pm.ginEngine.NoRoute(pm.proxyNoRouteHandler)
|
||||
pm.ginEngine.GET("/upstream", pm.upstreamIndex)
|
||||
pm.ginEngine.Any("/upstream/:model_id/*upstreamPath", pm.proxyToUpstream)
|
||||
|
||||
pm.ginEngine.GET("/favicon.ico", func(c *gin.Context) {
|
||||
c.Data(http.StatusOK, "image/x-icon", faviconData)
|
||||
})
|
||||
|
||||
// Disable console color for testing
|
||||
gin.DisableConsoleColor()
|
||||
@@ -86,7 +102,11 @@ func (pm *ProxyManager) stopProcesses() {
|
||||
|
||||
func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
|
||||
data := []interface{}{}
|
||||
for id := range pm.config.Models {
|
||||
for id, modelConfig := range pm.config.Models {
|
||||
if modelConfig.Unlisted {
|
||||
continue
|
||||
}
|
||||
|
||||
data = append(data, map[string]interface{}{
|
||||
"id": id,
|
||||
"object": "model",
|
||||
@@ -113,7 +133,7 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) {
|
||||
pm.Lock()
|
||||
defer pm.Unlock()
|
||||
|
||||
// Check if requestedModel contains a /
|
||||
// Check if requestedModel contains a PROFILE_SPLIT_CHAR
|
||||
profileName, modelName := "", requestedModel
|
||||
if idx := strings.Index(requestedModel, PROFILE_SPLIT_CHAR); idx != -1 {
|
||||
profileName = requestedModel[:idx]
|
||||
@@ -170,6 +190,48 @@ func (pm *ProxyManager) swapModel(requestedModel string) (*Process, error) {
|
||||
return pm.currentProcesses[requestedProcessKey], nil
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) proxyToUpstream(c *gin.Context) {
|
||||
requestedModel := c.Param("model_id")
|
||||
|
||||
if requestedModel == "" {
|
||||
c.AbortWithError(http.StatusBadRequest, fmt.Errorf("model id required in path"))
|
||||
return
|
||||
}
|
||||
|
||||
if process, err := pm.swapModel(requestedModel); err != nil {
|
||||
c.AbortWithError(http.StatusNotFound, fmt.Errorf("unable to swap to model, %s", err.Error()))
|
||||
} else {
|
||||
// rewrite the path
|
||||
c.Request.URL.Path = c.Param("upstreamPath")
|
||||
process.ProxyRequest(c.Writer, c.Request)
|
||||
}
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) upstreamIndex(c *gin.Context) {
|
||||
var html strings.Builder
|
||||
|
||||
html.WriteString("<!doctype HTML>\n<html><body><h1>Available Models</h1><ul>")
|
||||
|
||||
// Extract keys and sort them
|
||||
var modelIDs []string
|
||||
for modelID, modelConfig := range pm.config.Models {
|
||||
if modelConfig.Unlisted {
|
||||
continue
|
||||
}
|
||||
|
||||
modelIDs = append(modelIDs, modelID)
|
||||
}
|
||||
sort.Strings(modelIDs)
|
||||
|
||||
// Iterate over sorted keys
|
||||
for _, modelID := range modelIDs {
|
||||
html.WriteString(fmt.Sprintf("<li><a href=\"/upstream/%s\">%s</a></li>", modelID, modelID))
|
||||
}
|
||||
html.WriteString("</ul></body></html>")
|
||||
c.Header("Content-Type", "text/html")
|
||||
c.String(http.StatusOK, html.String())
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
|
||||
bodyBytes, err := io.ReadAll(c.Request.Body)
|
||||
if err != nil {
|
||||
@@ -201,16 +263,6 @@ func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
|
||||
}
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) proxyNoRouteHandler(c *gin.Context) {
|
||||
// since maps are unordered, just use the first available process if one exists
|
||||
for _, process := range pm.currentProcesses {
|
||||
process.ProxyRequest(c.Writer, c.Request)
|
||||
return
|
||||
}
|
||||
|
||||
c.AbortWithError(http.StatusBadRequest, fmt.Errorf("no strategy to handle request"))
|
||||
}
|
||||
|
||||
func ProcessKeyName(groupName, modelName string) string {
|
||||
return groupName + PROFILE_SPLIT_CHAR + modelName
|
||||
}
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"embed"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"strings"
|
||||
@@ -9,12 +8,6 @@ import (
|
||||
"github.com/gin-gonic/gin"
|
||||
)
|
||||
|
||||
//go:embed html/logs.html
|
||||
var logsHTML []byte
|
||||
|
||||
// make sure embed is kept there by the IDE auto-package importer
|
||||
var _ = embed.FS{}
|
||||
|
||||
func (pm *ProxyManager) sendLogsHandlers(c *gin.Context) {
|
||||
|
||||
accept := c.GetHeader("Accept")
|
||||
|
||||
Reference in New Issue
Block a user