Stream loading state when swapping models (#371)

Swapping models can take a long time and leave a lot of silence while the model is loading. Rather than silently load the model in the background, this PR allows llama-swap to send status updates in the reasoning_content of a streaming chat response. Fixes: #366
2025-10-29 00:09:39 -07:00
parent f852689104
commit a89b803d4a
8 changed files with 375 additions and 51 deletions
--- a/proxy/config/model_config.go
+++ b/proxy/config/model_config.go
@@ -35,6 +35,9 @@ type ModelConfig struct {
 	// Metadata: see #264
 	// Arbitrary metadata that can be exposed through the API
 	Metadata map[string]any `yaml:"metadata"`
+
+	// override global setting
+	SendLoadingState *bool `yaml:"sendLoadingState"`
 }

 func (m *ModelConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {