add support for automatically unloading a model (#10) (#14)

* Make starting upstream process on-demand (#10) * Add automatic unload of model after TTL is reached * add `ttl` configuration parameter to models in seconds, default is 0 (never unload)
2024-11-19 16:32:51 -08:00
parent ba39ed4c18
commit 533162ce6a
8 changed files with 149 additions and 54 deletions
--- a/README.md
+++ b/README.md
@@ -39,6 +39,11 @@ models:
    # until the server is ready
    checkEndpoint: /custom-endpoint

+    # automatically unload the model after 10 seconds
+    # ttl values must be a value greater than 0
+    # default: 0 = never unload model
+    ttl: 5
+
  "qwen":
    # environment variables to pass to the command
    env: