diff --git a/README.md b/README.md index 4e39919..1d5ac80 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,10 @@ llama-swap's configuration is purposefully simple. ```yaml # Seconds to wait for llama.cpp to load and be ready to serve requests # Default (and minimum) is 15 seconds -healthCheckTimeout: 60gi +healthCheckTimeout: 60 + +# Write HTTP logs (useful for troubleshooting), defaults to false +logRequests: true # define valid model values and the upstream server start models: diff --git a/config.example.yaml b/config.example.yaml index f155173..89621cd 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -2,6 +2,9 @@ # Default (and minimum): 15 seconds healthCheckTimeout: 15 +# Log HTTP requests helpful for troubleshoot, defaults to False +logRequests: true + models: "llama": cmd: > diff --git a/proxy/config.go b/proxy/config.go index f3bef77..3206ae9 100644 --- a/proxy/config.go +++ b/proxy/config.go @@ -25,6 +25,7 @@ func (m *ModelConfig) SanitizedCommand() ([]string, error) { type Config struct { HealthCheckTimeout int `yaml:"healthCheckTimeout"` + LogRequests bool `yaml:"logRequests"` Models map[string]ModelConfig `yaml:"models"` Profiles map[string][]string `yaml:"profiles"` diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index 7a15771..05e7c14 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -46,6 +46,38 @@ func New(config *Config) *ProxyManager { ginEngine: gin.New(), } + if config.LogRequests { + pm.ginEngine.Use(func(c *gin.Context) { + // Start timer + start := time.Now() + + // Process request + c.Next() + + // Stop timer + duration := time.Since(start) + + // Log request details + clientIP := c.ClientIP() + method := c.Request.Method + path := c.Request.URL.Path + statusCode := c.Writer.Status() + bodySize := c.Writer.Size() + + fmt.Fprintf(pm.logMonitor, "[llama-swap] %s [%s] \"%s %s %s\" %d %d \"%s\" %v\n", + clientIP, + time.Now().Format("2006-01-02 15:04:05"), + method, + path, + c.Request.Proto, + statusCode, + bodySize, + c.Request.UserAgent(), + duration, + ) + }) + } + // Set up routes using the Gin engine pm.ginEngine.POST("/v1/chat/completions", pm.proxyOAIHandler) // Support legacy /v1/completions api, see issue #12 @@ -127,7 +159,7 @@ func (pm *ProxyManager) listModelsHandler(c *gin.Context) { // Encode the data as JSON and write it to the response writer if err := json.NewEncoder(c.Writer).Encode(map[string]interface{}{"data": data}); err != nil { - c.AbortWithError(http.StatusInternalServerError, fmt.Errorf("error encoding JSON")) + pm.sendErrorResponse(c, http.StatusInternalServerError, fmt.Sprintf("error encoding JSON %s", err.Error())) return } } @@ -197,12 +229,12 @@ func (pm *ProxyManager) proxyToUpstream(c *gin.Context) { requestedModel := c.Param("model_id") if requestedModel == "" { - c.AbortWithError(http.StatusBadRequest, fmt.Errorf("model id required in path")) + pm.sendErrorResponse(c, http.StatusBadRequest, "model id required in path") return } if process, err := pm.swapModel(requestedModel); err != nil { - c.AbortWithError(http.StatusNotFound, fmt.Errorf("unable to swap to model, %s", err.Error())) + pm.sendErrorResponse(c, http.StatusNotFound, fmt.Sprintf("unable to swap to model, %s", err.Error())) } else { // rewrite the path c.Request.URL.Path = c.Param("upstreamPath") @@ -238,22 +270,23 @@ func (pm *ProxyManager) upstreamIndex(c *gin.Context) { func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) { bodyBytes, err := io.ReadAll(c.Request.Body) if err != nil { - c.AbortWithError(http.StatusBadRequest, fmt.Errorf("invalid JSON")) + pm.sendErrorResponse(c, http.StatusBadRequest, "could not ready request body") return } + var requestBody map[string]interface{} if err := json.Unmarshal(bodyBytes, &requestBody); err != nil { - c.AbortWithError(http.StatusBadRequest, fmt.Errorf("invalid JSON")) + pm.sendErrorResponse(c, http.StatusBadRequest, fmt.Sprintf("invalid JSON: %s", err.Error())) return } model, ok := requestBody["model"].(string) if !ok { - c.AbortWithError(http.StatusBadRequest, fmt.Errorf("missing or invalid 'model' key")) + pm.sendErrorResponse(c, http.StatusBadRequest, "missing or invalid 'model' key") return } if process, err := pm.swapModel(model); err != nil { - c.AbortWithError(http.StatusNotFound, fmt.Errorf("unable to swap to model, %s", err.Error())) + pm.sendErrorResponse(c, http.StatusNotFound, fmt.Sprintf("unable to swap to model, %s", err.Error())) return } else { c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) @@ -266,6 +299,16 @@ func (pm *ProxyManager) proxyOAIHandler(c *gin.Context) { } } +func (pm *ProxyManager) sendErrorResponse(c *gin.Context, statusCode int, message string) { + acceptHeader := c.GetHeader("Accept") + + if strings.Contains(acceptHeader, "application/json") { + c.JSON(statusCode, gin.H{"error": message}) + } else { + c.String(statusCode, message) + } +} + func ProcessKeyName(groupName, modelName string) string { return groupName + PROFILE_SPLIT_CHAR + modelName }