From 7eec51f3f2b662e7a419d7dca4d1d39aacbe834a Mon Sep 17 00:00:00 2001 From: Benson Wong Date: Tue, 19 Nov 2024 09:40:44 -0800 Subject: [PATCH] Dechunk HTTP requests by default (#11) ProxyManager already has all the Request body's data. There is no never a need to use chunked transfer encoding to the upstream process. --- config.example.yaml | 6 +++--- proxy/process.go | 2 +- proxy/proxymanager.go | 6 ++++++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/config.example.yaml b/config.example.yaml index 315951c..e28a941 100644 --- a/config.example.yaml +++ b/config.example.yaml @@ -7,7 +7,7 @@ models: cmd: > models/llama-server-osx --port 8999 - -m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf + -m models/Llama-3.2-1B-Instruct-Q4_0.gguf proxy: http://127.0.0.1:8999 # list of model name aliases this llama.cpp instance can serve @@ -18,7 +18,7 @@ models: checkEndpoint: /health "qwen": - cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf + cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf proxy: http://127.0.0.1:8999 aliases: - gpt-3.5-turbo @@ -40,5 +40,5 @@ models: cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf proxy: http://127.0.0.1:8999 "broken_timeout": - cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf + cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf proxy: http://127.0.0.1:9000 \ No newline at end of file diff --git a/proxy/process.go b/proxy/process.go index d086edc..8a8e465 100644 --- a/proxy/process.go +++ b/proxy/process.go @@ -178,7 +178,7 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) { http.Error(w, err.Error(), http.StatusInternalServerError) return } - req.Header = r.Header + req.Header = r.Header.Clone() resp, err := client.Do(req) if err != nil { http.Error(w, err.Error(), http.StatusBadGateway) diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go index e9d810a..8fc1035 100644 --- a/proxy/proxymanager.go +++ b/proxy/proxymanager.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "net/http" + "strconv" "sync" "time" @@ -121,6 +122,11 @@ func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) { } c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes)) + + // dechunk it as we already have all the body bytes see issue #11 + c.Request.Header.Del("transfer-encoding") + c.Request.Header.Add("content-length", strconv.Itoa(len(bodyBytes))) + pm.currentProcess.ProxyRequest(c.Writer, c.Request) }