Add docs for model.concurrencyLimit #263 [skip ci]

This commit is contained in:
Benson Wong
2025-08-22 16:08:37 -07:00
parent 7acbaf4712
commit c55d0cc842

View File

@@ -129,6 +129,15 @@ models:
# - recommended to stick to sampling parameters
strip_params: "temperature, top_p, top_k"
# concurrencyLimit: overrides the allowed number of active parallel requests to a model
# - optional, default: 0
# - useful for limiting the number of active parallel requests a model can process
# - must be set per model
# - any number greater than 0 will override the internal default value of 10
# - any requests that exceeds the limit will receive an HTTP 429 Too Many Requests response
# - recommended to be omitted and the default used
concurrencyLimit: 0
# Unlisted model example:
"qwen-unlisted":
# unlisted: boolean, true or false