From 7eec51f3f2b662e7a419d7dca4d1d39aacbe834a Mon Sep 17 00:00:00 2001
From: Benson Wong <mostlygeek@gmail.com>
Date: Tue, 19 Nov 2024 09:40:44 -0800
Subject: [PATCH] Dechunk HTTP requests by default (#11)

ProxyManager already has all the Request body's data. There is no never
a need to use chunked transfer encoding to the upstream process.
---
 config.example.yaml   | 6 +++---
 proxy/process.go      | 2 +-
 proxy/proxymanager.go | 6 ++++++
 3 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/config.example.yaml b/config.example.yaml
index 315951c..e28a941 100644
--- a/config.example.yaml
+++ b/config.example.yaml
@@ -7,7 +7,7 @@ models:
     cmd: >
       models/llama-server-osx
       --port 8999
-      -m models/Llama-3.2-1B-Instruct-Q4_K_M.gguf
+      -m models/Llama-3.2-1B-Instruct-Q4_0.gguf
     proxy: http://127.0.0.1:8999
 
     # list of model name aliases this llama.cpp instance can serve
@@ -18,7 +18,7 @@ models:
     checkEndpoint: /health
 
   "qwen":
-    cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+    cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
     proxy: http://127.0.0.1:8999
     aliases:
     - gpt-3.5-turbo
@@ -40,5 +40,5 @@ models:
     cmd: models/llama-server-osx --port 8999 -m models/doesnotexist.gguf
     proxy: http://127.0.0.1:8999
   "broken_timeout":
-    cmd: models/llama-server-osx --port 8999 -m models/Qwen2.5-1.5B-Instruct-Q4_K_M.gguf
+    cmd: models/llama-server-osx --port 8999 -m models/qwen2.5-0.5b-instruct-q8_0.gguf
     proxy: http://127.0.0.1:9000
\ No newline at end of file
diff --git a/proxy/process.go b/proxy/process.go
index d086edc..8a8e465 100644
--- a/proxy/process.go
+++ b/proxy/process.go
@@ -178,7 +178,7 @@ func (p *Process) ProxyRequest(w http.ResponseWriter, r *http.Request) {
 		http.Error(w, err.Error(), http.StatusInternalServerError)
 		return
 	}
-	req.Header = r.Header
+	req.Header = r.Header.Clone()
 	resp, err := client.Do(req)
 	if err != nil {
 		http.Error(w, err.Error(), http.StatusBadGateway)
diff --git a/proxy/proxymanager.go b/proxy/proxymanager.go
index e9d810a..8fc1035 100644
--- a/proxy/proxymanager.go
+++ b/proxy/proxymanager.go
@@ -6,6 +6,7 @@ import (
 	"fmt"
 	"io"
 	"net/http"
+	"strconv"
 	"sync"
 	"time"
 
@@ -121,6 +122,11 @@ func (pm *ProxyManager) proxyChatRequestHandler(c *gin.Context) {
 	}
 
 	c.Request.Body = io.NopCloser(bytes.NewBuffer(bodyBytes))
+
+	// dechunk it as we already have all the body bytes see issue #11
+	c.Request.Header.Del("transfer-encoding")
+	c.Request.Header.Add("content-length", strconv.Itoa(len(bodyBytes)))
+
 	pm.currentProcess.ProxyRequest(c.Writer, c.Request)
 }