Stream loading state when swapping models (#371)

Swapping models can take a long time and leave a lot of silence while the model is loading. Rather than silently load the model in the background, this PR allows llama-swap to send status updates in the reasoning_content of a streaming chat response.

Fixes: #366
This commit is contained in:
Benson Wong
2025-10-29 00:09:39 -07:00
committed by GitHub
parent f852689104
commit a89b803d4a
8 changed files with 375 additions and 51 deletions

View File

@@ -129,6 +129,9 @@ type Config struct {
// hooks, see: #209
Hooks HooksConfig `yaml:"hooks"`
// send loading state in reasoning
SendLoadingState bool `yaml:"sendLoadingState"`
}
func (c *Config) RealModelName(search string) (string, bool) {
@@ -350,6 +353,13 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
)
}
// if sendLoadingState is nil, set it to the global config value
// see #366
if modelConfig.SendLoadingState == nil {
v := config.SendLoadingState // copy it
modelConfig.SendLoadingState = &v
}
config.Models[modelId] = modelConfig
}