llama-swap/proxy/proxymanager.go

package proxy

import (
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"net/http"
	"strconv"
	"sync"
	"time"

	"github.com/gin-gonic/gin"
)

type ProxyManager struct {
	sync.Mutex

	config         *Config
	currentProcess *Process
	logMonitor     *LogMonitor
	ginEngine      *gin.Engine
}

func New(config *Config) *ProxyManager {
	pm := &ProxyManager{
		config:         config,
		currentProcess: nil,
		logMonitor:     NewLogMonitor(),
		ginEngine:      gin.New(),
	}

	// Set up routes using the Gin engine
	pm.ginEngine.POST("/v1/chat/completions", pm.proxyChatRequestHandler)

	// Support legacy /v1/completions api, see issue #12
	pm.ginEngine.POST("/v1/completions", pm.proxyChatRequestHandler)

	pm.ginEngine.GET("/v1/models", pm.listModelsHandler)

	// in proxymanager_loghandlers.go
	pm.ginEngine.GET("/logs", pm.sendLogsHandlers)
	pm.ginEngine.GET("/logs/stream", pm.streamLogsHandler)
	pm.ginEngine.GET("/logs/streamSSE", pm.streamLogsHandlerSSE)

	pm.ginEngine.NoRoute(pm.proxyRequestHandler)

	// Disable console color for testing
	gin.DisableConsoleColor()

	return pm
}

func (pm *ProxyManager) Run(addr ...string) error {
	return pm.ginEngine.Run(addr...)
}

func (pm *ProxyManager) HandlerFunc(w http.ResponseWriter, r *http.Request) {
	pm.ginEngine.ServeHTTP(w, r)
}

func (pm *ProxyManager) listModelsHandler(c *gin.Context) {
	data := []interface{}{}
	for id := range pm.config.Models {
		data = append(data, map[string]interface{}{
			"id":       id,
			"object":   "model",
			"created":  time.Now().Unix(),
			"owned_by": "llama-swap",
		})
	}

	// Set the Content-Type header to application/json
	c.Header("Content-Type", "application/json")

	// Encode the data as JSON and write it to the response writer
	if err := json.NewEncoder(c.Writer).Encode(map[string]interface{}{"data": data}); err != nil {
		c.AbortWithError(http.StatusInternalServerError, fmt.Errorf("error encoding JSON"))
		return
	}
}

func (pm *ProxyManager) swapModel(requestedModel string) error {
	pm.Lock()
	defer pm.Unlock()

	// find the model configuration matching requestedModel
	modelConfig, modelID, found := pm.config.FindConfig(requestedModel)
	if !found {
		return fmt.Errorf("could not find configuration for %s", requestedModel)
	}

	// do nothing as it's already the correct process
	if pm.currentProcess != nil {
		if pm.currentProcess.ID == modelID {
			return nil
		} else {
			pm.currentProcess.Stop()
		}
	}

	pm.currentProcess = NewProcess(modelID, pm.config.HealthCheckTimeout, modelConfig, pm.logMonitor)
	return nil
}

func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
	bodyBytes, err := io.ReadAll(c.Request.Body)
	if err != nil {
		c.AbortWithError(http.StatusBadRequest, fmt.Errorf("invalid JSON"))
		return
	}
	var requestBody map[string]interface{}
	if err := json.Unmarshal(bodyBytes, &requestBody); err != nil {
		c.AbortWithError(http.StatusBadRequest, fmt.Errorf("invalid JSON"))
		return
	}
	model, ok := requestBody["model"].(string)
	if !ok {
		c.AbortWithError(http.StatusBadRequest, fmt.Errorf("missing or invalid 'model' key"))
		return
	}

	if err := pm.swapModel(model); err != nil {
		c.AbortWithError(http.StatusNotFound, fmt.Errorf("unable to swap to model, %s", err.Error()))
		return
	}

	c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))

	// dechunk it as we already have all the body bytes see issue #11
	c.Request.Header.Del("transfer-encoding")
	c.Request.Header.Add("content-length", strconv.Itoa(len(bodyBytes)))

	pm.currentProcess.ProxyRequest(c.Writer, c.Request)
}

func (pm *ProxyManager) proxyRequestHandler(c *gin.Context) {
	if pm.currentProcess != nil {
		pm.currentProcess.ProxyRequest(c.Writer, c.Request)
	} else {
		c.AbortWithError(http.StatusBadRequest, fmt.Errorf("no strategy to handle request"))
	}
}