Stream loading state when swapping models (#371)

Swapping models can take a long time and leave a lot of silence while the model is loading. Rather than silently load the model in the background, this PR allows llama-swap to send status updates in the reasoning_content of a streaming chat response. Fixes: #366
2025-10-29 00:09:39 -07:00
parent f852689104
commit a89b803d4a
8 changed files with 375 additions and 51 deletions
--- a/proxy/config/config.go
+++ b/proxy/config/config.go
@@ -129,6 +129,9 @@ type Config struct {

 	// hooks, see: #209
 	Hooks HooksConfig `yaml:"hooks"`
+
+	// send loading state in reasoning
+	SendLoadingState bool `yaml:"sendLoadingState"`
 }

 func (c *Config) RealModelName(search string) (string, bool) {
@@ -350,6 +353,13 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
 			)
 		}

+		// if sendLoadingState is nil, set it to the global config value
+		// see #366
+		if modelConfig.SendLoadingState == nil {
+			v := config.SendLoadingState // copy it
+			modelConfig.SendLoadingState = &v
+		}
+
 		config.Models[modelId] = modelConfig
 	}