mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-09 07:16:44 +02:00
server: allow API calls to set a lower thinking budget if a global budget is set
This commit is contained in:
@@ -1123,8 +1123,11 @@ json oaicompat_chat_params_parse(
|
||||
// Reasoning budget: pass parameters through to sampling layer
|
||||
{
|
||||
int reasoning_budget = opt.reasoning_budget;
|
||||
if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {
|
||||
reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
|
||||
if (body.contains("thinking_budget_tokens")) {
|
||||
int req_budget = json_value(body, "thinking_budget_tokens", -1);
|
||||
if (reasoning_budget == -1 || (req_budget != -1 && req_budget < reasoning_budget)) {
|
||||
reasoning_budget = req_budget;
|
||||
}
|
||||
}
|
||||
|
||||
if (!chat_params.thinking_end_tag.empty()) {
|
||||
|
||||
Reference in New Issue
Block a user