common/chat : fix LFM2/LFM2.5 reasoning round-trip and <think> leak (#24234)

* common/chat : fix LFM2 reasoning round-trip and stray <think> leak
* Gate by reasoning format and whether the template supports <think>
This commit is contained in:
Tarek Dakhran
2026-06-06 22:39:21 +02:00
committed by GitHub
parent 31e82494c0
commit 98d5e8ba8a
3 changed files with 263 additions and 179 deletions
+15 -4
View File
@@ -1625,8 +1625,17 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
const std::string THINK_END = "</think>";
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
// Copy reasoning to the "thinking" field the template expects
auto adjusted_messages = json::array();
for (auto msg : inputs.messages) {
if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
msg["thinking"] = msg.at("reasoning_content");
}
adjusted_messages.push_back(msg);
}
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs, adjusted_messages);
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, adjusted_messages);
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
data.supports_thinking = true;
data.preserved_tokens = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END };
@@ -1639,7 +1648,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
data.thinking_end_tag = THINK_END;
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
// Gate by reasoning format and whether the template supports <think>
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
tmpl.source().find(THINK_START) != std::string::npos;
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
if (inputs.has_continuation()) {
@@ -1658,7 +1669,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
auto end = p.end();
auto reasoning = p.eps();
if (extract_reasoning && inputs.enable_thinking) {
if (extract_reasoning) {
reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
}
+115
View File
@@ -0,0 +1,115 @@
{{- bos_token -}}
{%- set preserve_thinking = preserve_thinking | default(false) -%}
{%- macro format_arg_value(arg_value) -%}
{%- if arg_value is string -%}
{{- "'" + arg_value + "'" -}}
{%- elif arg_value is mapping -%}
{{- arg_value | tojson -}}
{%- else -%}
{{- arg_value | string -}}
{%- endif -%}
{%- endmacro -%}
{%- macro parse_content(content) -%}
{%- if content is string -%}
{{- content -}}
{%- else -%}
{%- set _ns = namespace(result="") -%}
{%- for item in content -%}
{%- if item["type"] == "image" -%}
{%- set _ns.result = _ns.result + "<image>" -%}
{%- elif item["type"] == "text" -%}
{%- set _ns.result = _ns.result + item["text"] -%}
{%- else -%}
{%- set _ns.result = _ns.result + item | tojson -%}
{%- endif -%}
{%- endfor -%}
{{- _ns.result -}}
{%- endif -%}
{%- endmacro -%}
{%- macro render_tool_calls(tool_calls) -%}
{%- set tool_calls_ns = namespace(tool_calls=[]) -%}
{%- for tool_call in tool_calls -%}
{%- set func_name = tool_call["function"]["name"] -%}
{%- set func_args = tool_call["function"]["arguments"] -%}
{%- set args_ns = namespace(arg_strings=[]) -%}
{%- for arg_name, arg_value in func_args.items() -%}
{%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + "=" + format_arg_value(arg_value)] -%}
{%- endfor -%}
{%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + "(" + (args_ns.arg_strings | join(", ")) + ")"] -%}
{%- endfor -%}
{{- "<|tool_call_start|>[" + (tool_calls_ns.tool_calls | join(", ")) + "]<|tool_call_end|>" -}}
{%- endmacro -%}
{%- set ns = namespace(system_prompt="", last_user_index=-1) -%}
{%- if messages[0]["role"] == "system" -%}
{%- if messages[0].get("content") -%}
{%- set ns.system_prompt = parse_content(messages[0]["content"]) -%}
{%- endif -%}
{%- set messages = messages[1:] -%}
{%- endif -%}
{%- if tools -%}
{%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
{%- for tool in tools -%}
{%- if tool is not string -%}
{%- set tool = tool | tojson -%}
{%- endif -%}
{%- set ns.system_prompt = ns.system_prompt + tool -%}
{%- if not loop.last -%}
{%- set ns.system_prompt = ns.system_prompt + ", " -%}
{%- endif -%}
{%- endfor -%}
{%- set ns.system_prompt = ns.system_prompt + "]" -%}
{%- endif -%}
{%- if ns.system_prompt -%}
{{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
{%- endif -%}
{%- for message in messages -%}
{%- if message["role"] == "user" -%}
{%- set ns.last_user_index = loop.index0 -%}
{%- endif -%}
{%- endfor -%}
{%- for message in messages -%}
{{- "<|im_start|>" + message.role + "\n" -}}
{%- if message.role == "assistant" -%}
{%- generation -%}
{%- if message.thinking is defined and (preserve_thinking or loop.index0 > ns.last_user_index) -%}
{{- "<think>" + message.thinking + "</think>" -}}
{%- endif -%}
{%- set _cfm_tag = "CONTINUE_FINAL_MESSAGE_TAG " -%}
{%- set _has_cfm = false -%}
{%- if message.content is defined -%}
{%- set content = parse_content(message.content) -%}
{%- if not (preserve_thinking or loop.index0 > ns.last_user_index) -%}
{%- if "</think>" in content -%}
{%- set content = content.split("</think>")[-1] | trim -%}
{%- endif -%}
{%- endif -%}
{%- if message.tool_calls is defined and content.endswith(_cfm_tag) -%}
{%- set _has_cfm = true -%}
{%- set _trunc_len = (content | length) - (_cfm_tag | length) -%}
{{- content[:_trunc_len] -}}
{%- else -%}
{{- content -}}
{%- endif -%}
{%- endif -%}
{%- if message.tool_calls is defined -%}
{{- render_tool_calls(message.tool_calls) -}}
{%- endif -%}
{%- if _has_cfm -%}
{{- _cfm_tag -}}
{%- endif -%}
{{- "<|im_end|>\n" -}}
{%- endgeneration -%}
{%- else %}
{%- if message.get("content") -%}
{{- parse_content(message["content"]) -}}
{%- endif -%}
{{- "<|im_end|>\n" -}}
{%- endif %}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{- "<|im_start|>assistant\n" -}}
{%- endif -%}
+133 -175
View File
@@ -1825,6 +1825,104 @@ static void test_convert_responses_to_chatcmpl() {
}
}
// Shared LFM2 parser cases - all variants use one output format and parser
static void test_lfm2_parser(const std::string & template_path, bool detailed_debug) {
auto tst = peg_tester(template_path, detailed_debug);
// Basic content only
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
// Single tool call without reasoning
tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.tools({ special_function_tool })
.expect(message_assist_call)
.run();
// Tool call with string argument
tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
.tools({ get_time_tool })
.expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
.run();
// Python literals become JSON
tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
.tools({ toggle_tool })
.expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
.run();
tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
.tools({ nullable_tool })
.expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
.run();
// Nested Python literal
tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
.tools({ config_tool })
.expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
.run();
// JSON literals are accepted too
tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
.tools({ config_tool })
.expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
.run();
// Dotted function name with structured args
tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
"metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
.tools({ calendar_create_event_tool })
.expect(message_with_tool_calls(
"Calendar.create_event",
R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
.run();
// Markdown links stay content
tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
.tools({ get_time_tool })
.expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
.run();
// Python tool with multiline code in string
tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>")
.tools({ python_tool })
.expect_tool_calls({
{ "python", R"#({"code": "def hello():\\n print('hey')"})#", "" }
})
.run();
// Content before tool call (no reasoning)
tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
.tools({ get_time_tool })
.expect(message_with_reasoning_content_and_multiple_tool_calls(
"", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
))
.run();
// Multiple tool calls (parallel)
tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
.parallel_tool_calls(true)
.tools({ special_function_tool, special_function_tool_with_optional_param })
.expect_tool_calls({
{ "special_function", R"({"arg1": 1})", {} },
{ "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
})
.run();
// Partial tool call (streaming)
tst.test("<|tool_call_start|>[special_function(arg1=")
.tools({ special_function_tool })
.is_partial(true)
.expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
.run();
// Tool call with empty arguments
tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
.tools({ empty_args_tool })
.expect(simple_assist_msg("", "", "empty_args", "{}"))
.run();
}
static void test_template_output_peg_parsers(bool detailed_debug) {
LOG_DBG("%s\n", __func__);
@@ -4038,49 +4136,30 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
.run();
}
// LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|>
for (const char * tmpl : {
"models/templates/LFM2-8B-A1B.jinja",
"models/templates/LFM2.5-Instruct.jinja",
"models/templates/LFM2.5-8B-A1B.jinja",
}) {
test_lfm2_parser(tmpl, detailed_debug);
}
// Thinking cases only apply to LFM2.5-8B-A1B, the one LFM2 template that emits <think>
{
auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug);
auto tst = peg_tester("models/templates/LFM2.5-8B-A1B.jinja", detailed_debug);
// Basic content only
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
// Reasoning is parsed independent of enable_thinking
// Single tool call without reasoning
tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.tools({ special_function_tool })
.expect(message_assist_call)
.run();
// Tool call with string argument
tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
.tools({ get_time_tool })
.expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
.run();
// Tool call with reasoning (enable_thinking=true)
// Tool call with reasoning
tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.tools({ special_function_tool })
.expect(message_assist_call_thoughts)
.run();
// Multiple tool calls (parallel)
tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
.parallel_tool_calls(true)
.tools({
special_function_tool, special_function_tool_with_optional_param
})
.expect_tool_calls({
{ "special_function", R"({"arg1": 1})", {} },
{ "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
})
.run();
// Tool call with reasoning and content
tst.test("<think>I need to call a function</think>"
"Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
.enable_thinking(true)
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.tools({ get_time_tool })
.expect(message_with_reasoning_content_and_multiple_tool_calls(
@@ -4088,32 +4167,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
))
.run();
// Python tool with multiline code in string
tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>")
.tools({ python_tool })
.expect_tool_calls({
{ "python", R"#({"code": "def hello():\\n print('hey')"})#", "" }
})
.run();
// Partial tool call (streaming)
tst.test("<|tool_call_start|>[special_function(arg1=")
.tools({ special_function_tool })
.is_partial(true)
.expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
.run();
// Tool call with empty arguments
tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
.tools({ empty_args_tool })
.expect(simple_assist_msg("", "", "empty_args", "{}"))
.run();
// fake tool call marker in reasoning
tst.test(
"<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
"<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.enable_thinking(true)
// Fake tool call marker inside reasoning is not parsed as a call
tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
"<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.tools({ special_function_tool })
.expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
@@ -4122,127 +4178,21 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
})
.run();
// Continuation tests
tst.test("world!\nWhat's up?")
// enable_thinking=false still captures emitted reasoning
tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
.enable_thinking(false)
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.enable_thinking(true)
.messages({ message_user, message_assist_prefill_content })
.add_generation_prompt(false)
.continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
.expect_reasoning("I'm thinking")
.expect_content("Hello, world!\nWhat's up?")
.expect(message_assist_thoughts)
.run();
tst.test(" thinking</think>Hello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.enable_thinking(true)
.messages({ message_user, message_assist_prefill_reasoning })
.add_generation_prompt(false)
.continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
.expect_reasoning("I'm thinking")
.expect_content("Hello, world!\nWhat's up?")
.run();
}
// LFM2.5 tests - format <|tool_call_start|>[name(args)]<|tool_call_end|>
{
auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
// Basic content only
tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
// Single tool call without reasoning
tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.tools({ special_function_tool })
.expect(message_assist_call)
.run();
// Tool call with string argument
tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
.tools({ get_time_tool })
.expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
.run();
// Python literals become JSON.
tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
.tools({ toggle_tool })
.expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
.run();
tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
.tools({ nullable_tool })
.expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
.run();
// Nested Python literal.
tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
.tools({ config_tool })
.expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
.run();
// JSON literals are accepted too.
tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
.tools({ config_tool })
.expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
.run();
// Dotted function name with structured args.
tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
"metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
.tools({ calendar_create_event_tool })
.expect(message_with_tool_calls(
"Calendar.create_event",
R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
.run();
// Markdown links stay content.
tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
.tools({ get_time_tool })
.expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
.run();
// Tool call with reasoning (enable_thinking=true)
tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
.enable_thinking(true)
.enable_thinking(false)
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.tools({ special_function_tool })
.expect(message_assist_call_thoughts)
.run();
// Multiple tool calls (parallel)
tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
.parallel_tool_calls(true)
.tools({
special_function_tool, special_function_tool_with_optional_param
})
.expect_tool_calls({
{ "special_function", R"({"arg1": 1})", {} },
{ "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
})
.run();
// Tool call with content before tool call
tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
.tools({ get_time_tool })
.expect(message_with_reasoning_content_and_multiple_tool_calls(
"", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
))
.run();
// Partial tool call (streaming)
tst.test("<|tool_call_start|>[special_function(arg1=")
.tools({ special_function_tool })
.is_partial(true)
.expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
.run();
// Tool call with empty arguments
tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
.tools({ empty_args_tool })
.expect(simple_assist_msg("", "", "empty_args", "{}"))
.run();
// Continuation tests
// Continuation: prefill content
tst.test("world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.enable_thinking(true)
@@ -4253,6 +4203,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
.expect_content("Hello, world!\nWhat's up?")
.run();
// Continuation: prefill reasoning
tst.test(" thinking</think>Hello, world!\nWhat's up?")
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
.enable_thinking(true)
@@ -5478,18 +5429,25 @@ static void test_template_generation_prompt() {
check(tmpls, continuation_reasoning(), "<|im_assistant|>assistant<|im_middle|><think>I'm");
}
{
auto tmpls = read_templates("models/templates/LFM2-8B-A1B.jinja");
for (const char * tmpl : {
"models/templates/LFM2-8B-A1B.jinja",
"models/templates/LFM2.5-Instruct.jinja",
"models/templates/LFM2.5-8B-A1B.jinja",
}) {
auto tmpls = read_templates(tmpl);
check(tmpls, basic(), "<|im_start|>assistant\n");
check(tmpls, continuation_content(), "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
}
{
auto tmpls = read_templates("models/templates/LFM2.5-Instruct.jinja");
check(tmpls, basic(), "<|im_start|>assistant\n");
check(tmpls, continuation_content(), "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
// 8B-A1B renders prior-turn reasoning via the "thinking" field
auto tmpls = read_templates("models/templates/LFM2.5-8B-A1B.jinja");
common_chat_templates_inputs inputs;
inputs.messages = { message_user, message_assist_call_thoughts, tool_msg };
inputs.add_generation_prompt = true;
auto params = common_chat_templates_apply(tmpls.get(), inputs);
assert_contains(params.prompt, "<think>I'm\nthinking</think>");
}
{