Add barebones but working implementation of model preload (#209, #235)

Add barebones but working implementation of model preload

* add config test for Preload hook
* improve TestProxyManager_StartupHooks
* docs for new hook configuration
* add a .dev to .gitignore
This commit is contained in:
Benson Wong
2025-08-14 10:27:28 -07:00
committed by GitHub
parent 74c69f39ef
commit 5dc6b3e6d9
10 changed files with 199 additions and 13 deletions

View File

@@ -15,6 +15,7 @@ import (
"time"
"github.com/gin-gonic/gin"
"github.com/mostlygeek/llama-swap/event"
"github.com/tidwall/gjson"
"github.com/tidwall/sjson"
)
@@ -96,6 +97,35 @@ func New(config Config) *ProxyManager {
}
pm.setupGinEngine()
// run any startup hooks
if len(config.Hooks.OnStartup.Preload) > 0 {
// do it in the background, don't block startup -- not sure if good idea yet
go func() {
discardWriter := &DiscardWriter{}
for _, realModelName := range config.Hooks.OnStartup.Preload {
proxyLogger.Infof("Preloading model: %s", realModelName)
processGroup, _, err := pm.swapProcessGroup(realModelName)
if err != nil {
event.Emit(ModelPreloadedEvent{
ModelName: realModelName,
Success: false,
})
proxyLogger.Errorf("Failed to preload model %s: %v", realModelName, err)
continue
} else {
req, _ := http.NewRequest("GET", "/", nil)
processGroup.ProxyRequest(realModelName, discardWriter, req)
event.Emit(ModelPreloadedEvent{
ModelName: realModelName,
Success: true,
})
}
}
}()
}
return pm
}