Merge pull request #13 from mostlygeek/set-content-length
Dechunk HTTP requests by default (#11)
This commit is contained in:
@@ -7,7 +7,7 @@ models:
|
||||
cmd: >
|
||||
models/llama-server-osx
|
||||
--port 8999
|
||||
-m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
|
||||
-m models/Llama-3.2-1B-Instruct-Q4_0.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
|
||||
# list of model name aliases this llama.cpp instance can serve
|
||||
@@ -18,7 +18,7 @@ models:
|
||||
checkEndpoint: /health
|
||||
|
||||
"qwen":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
aliases:
|
||||
- gpt-3.5-turbo
|
||||
@@ -40,5 +40,5 @@ models:
|
||||
cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
|
||||
proxy: http://127.0.0.1:8999
|
||||
"broken_timeout":
|
||||
cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
|
||||
cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
|
||||
proxy: http://127.0.0.1:9000
|
||||
@@ -178,7 +178,7 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
|
||||
http.Error(w, err.Error(), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
req.Header = r.Header
|
||||
req.Header = r.Header.Clone()
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
http.Error(w, err.Error(), http.StatusBadGateway)
|
||||
|
||||
@@ -6,6 +6,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
@@ -121,6 +122,11 @@ func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
|
||||
}
|
||||
|
||||
c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
|
||||
|
||||
// dechunk it as we already have all the body bytes see issue #11
|
||||
c.Request.Header.Del("transfer-encoding")
|
||||
c.Request.Header.Add("content-length", strconv.Itoa(len(bodyBytes)))
|
||||
|
||||
pm.currentProcess.ProxyRequest(c.Writer, c.Request)
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user