common/chat : fix LFM2/LFM2.5 reasoning round-trip and <think> leak (#24234)

* common/chat : fix LFM2 reasoning round-trip and stray <think> leak * Gate by reasoning format and whether the template supports <think>
2026-06-09 07:16:44 +02:00 · 2026-06-06 22:39:21 +02:00
parent 31e82494c0
commit 98d5e8ba8a
3 changed files with 263 additions and 179 deletions
@@ -1625,8 +1625,17 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
    const std::string THINK_END       = "</think>";
    const std::string GEN_PROMPT      = "<|im_start|>assistant\n";

-    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
-    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
+    // Copy reasoning to the "thinking" field the template expects
+    auto adjusted_messages = json::array();
+    for (auto msg : inputs.messages) {
+        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
+            msg["thinking"] = msg.at("reasoning_content");
+        }
+        adjusted_messages.push_back(msg);
+    }
+
+    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs, adjusted_messages);
+    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, adjusted_messages);
    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
    data.supports_thinking = true;
    data.preserved_tokens  = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END };
@@ -1639,7 +1648,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
    data.thinking_end_tag   = THINK_END;

    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    // Gate by reasoning format and whether the template supports <think>
+    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
+                             tmpl.source().find(THINK_START) != std::string::npos;
    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;

    if (inputs.has_continuation()) {
@@ -1658,7 +1669,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
        auto end = p.end();

        auto reasoning = p.eps();
-        if (extract_reasoning && inputs.enable_thinking) {
+        if (extract_reasoning) {
            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
        }

@@ -0,0 +1,115 @@
+{{- bos_token -}}
+{%- set preserve_thinking = preserve_thinking | default(false) -%}
+
+{%- macro format_arg_value(arg_value) -%}
+    {%- if arg_value is string -%}
+        {{- "'" + arg_value + "'" -}}
+    {%- elif arg_value is mapping -%}
+        {{- arg_value | tojson -}}
+    {%- else -%}
+        {{- arg_value | string -}}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro parse_content(content) -%}
+    {%- if content is string -%}
+        {{- content -}}
+    {%- else -%}
+        {%- set _ns = namespace(result="") -%}
+        {%- for item in content -%}
+            {%- if item["type"] == "image" -%}
+                {%- set _ns.result = _ns.result + "<image>" -%}
+            {%- elif item["type"] == "text" -%}
+                {%- set _ns.result = _ns.result + item["text"] -%}
+            {%- else -%}
+                {%- set _ns.result = _ns.result + item | tojson -%}
+            {%- endif -%}
+        {%- endfor -%}
+        {{- _ns.result -}}
+    {%- endif -%}
+{%- endmacro -%}
+
+{%- macro render_tool_calls(tool_calls) -%}
+    {%- set tool_calls_ns = namespace(tool_calls=[]) -%}
+    {%- for tool_call in tool_calls -%}
+        {%- set func_name = tool_call["function"]["name"] -%}
+        {%- set func_args = tool_call["function"]["arguments"] -%}
+        {%- set args_ns = namespace(arg_strings=[]) -%}
+        {%- for arg_name, arg_value in func_args.items() -%}
+            {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + "=" + format_arg_value(arg_value)] -%}
+        {%- endfor -%}
+        {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + "(" + (args_ns.arg_strings | join(", ")) + ")"] -%}
+    {%- endfor -%}
+    {{- "<|tool_call_start|>[" + (tool_calls_ns.tool_calls | join(", ")) + "]<|tool_call_end|>" -}}
+{%- endmacro -%}
+
+{%- set ns = namespace(system_prompt="", last_user_index=-1) -%}
+{%- if messages[0]["role"] == "system" -%}
+    {%- if messages[0].get("content") -%}
+        {%- set ns.system_prompt = parse_content(messages[0]["content"]) -%}
+    {%- endif -%}
+    {%- set messages = messages[1:] -%}
+{%- endif -%}
+{%- if tools -%}
+    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
+    {%- for tool in tools -%}
+        {%- if tool is not string -%}
+            {%- set tool = tool | tojson -%}
+        {%- endif -%}
+        {%- set ns.system_prompt = ns.system_prompt + tool -%}
+        {%- if not loop.last -%}
+            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
+        {%- endif -%}
+    {%- endfor -%}
+    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
+{%- endif -%}
+{%- if ns.system_prompt -%}
+    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
+{%- endif -%}
+{%- for message in messages -%}
+    {%- if message["role"] == "user" -%}
+        {%- set ns.last_user_index = loop.index0 -%}
+    {%- endif -%}
+{%- endfor -%}
+{%- for message in messages -%}
+    {{- "<|im_start|>" + message.role + "\n" -}}
+    {%- if message.role == "assistant" -%}
+        {%- generation -%}
+        {%- if message.thinking is defined and (preserve_thinking or loop.index0 > ns.last_user_index) -%}
+            {{- "<think>" + message.thinking + "</think>" -}}
+        {%- endif -%}
+        {%- set _cfm_tag = "CONTINUE_FINAL_MESSAGE_TAG " -%}
+        {%- set _has_cfm = false -%}
+        {%- if message.content is defined -%}
+            {%- set content = parse_content(message.content) -%}
+            {%- if not (preserve_thinking or loop.index0 > ns.last_user_index) -%}
+                {%- if "</think>" in content -%}
+                    {%- set content = content.split("</think>")[-1] | trim -%}
+                {%- endif -%}
+            {%- endif -%}
+            {%- if message.tool_calls is defined and content.endswith(_cfm_tag) -%}
+                {%- set _has_cfm = true -%}
+                {%- set _trunc_len = (content | length) - (_cfm_tag | length) -%}
+                {{- content[:_trunc_len] -}}
+            {%- else -%}
+                {{- content -}}
+            {%- endif -%}
+        {%- endif -%}
+        {%- if message.tool_calls is defined -%}
+            {{- render_tool_calls(message.tool_calls) -}}
+        {%- endif -%}
+        {%- if _has_cfm -%}
+            {{- _cfm_tag -}}
+        {%- endif -%}
+        {{- "<|im_end|>\n" -}}
+        {%- endgeneration -%}
+    {%- else %}
+        {%- if message.get("content") -%}
+            {{- parse_content(message["content"]) -}}
+        {%- endif -%}
+        {{- "<|im_end|>\n" -}}
+    {%- endif %}
+{%- endfor -%}
+{%- if add_generation_prompt -%}
+    {{- "<|im_start|>assistant\n" -}}
+{%- endif -%}
@@ -1825,6 +1825,104 @@ static void test_convert_responses_to_chatcmpl() {
    }
 }

+// Shared LFM2 parser cases - all variants use one output format and parser
+static void test_lfm2_parser(const std::string & template_path, bool detailed_debug) {
+    auto tst = peg_tester(template_path, detailed_debug);
+
+    // Basic content only
+    tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+
+    // Single tool call without reasoning
+    tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
+        .tools({ special_function_tool })
+        .expect(message_assist_call)
+        .run();
+
+    // Tool call with string argument
+    tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
+        .tools({ get_time_tool })
+        .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
+        .run();
+
+    // Python literals become JSON
+    tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
+        .tools({ toggle_tool })
+        .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
+        .run();
+
+    tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
+        .tools({ nullable_tool })
+        .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
+        .run();
+
+    // Nested Python literal
+    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
+        .tools({ config_tool })
+        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
+        .run();
+
+    // JSON literals are accepted too
+    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
+        .tools({ config_tool })
+        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
+        .run();
+
+    // Dotted function name with structured args
+    tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
+             "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
+        .tools({ calendar_create_event_tool })
+        .expect(message_with_tool_calls(
+            "Calendar.create_event",
+            R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
+        .run();
+
+    // Markdown links stay content
+    tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
+        .tools({ get_time_tool })
+        .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
+        .run();
+
+    // Python tool with multiline code in string
+    tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
+        .tools({ python_tool })
+        .expect_tool_calls({
+            { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
+        })
+        .run();
+
+    // Content before tool call (no reasoning)
+    tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
+        .tools({ get_time_tool })
+        .expect(message_with_reasoning_content_and_multiple_tool_calls(
+            "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
+        ))
+        .run();
+
+    // Multiple tool calls (parallel)
+    tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
+        .parallel_tool_calls(true)
+        .tools({ special_function_tool, special_function_tool_with_optional_param })
+        .expect_tool_calls({
+            { "special_function", R"({"arg1": 1})", {} },
+            { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
+        })
+        .run();
+
+    // Partial tool call (streaming)
+    tst.test("<|tool_call_start|>[special_function(arg1=")
+        .tools({ special_function_tool })
+        .is_partial(true)
+        .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
+        .run();
+
+    // Tool call with empty arguments
+    tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
+        .tools({ empty_args_tool })
+        .expect(simple_assist_msg("", "", "empty_args", "{}"))
+        .run();
+
+}
+
 static void test_template_output_peg_parsers(bool detailed_debug) {
    LOG_DBG("%s\n", __func__);

@@ -4038,49 +4136,30 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .run();
    }

-    // LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|>
+    for (const char * tmpl : {
+             "models/templates/LFM2-8B-A1B.jinja",
+             "models/templates/LFM2.5-Instruct.jinja",
+             "models/templates/LFM2.5-8B-A1B.jinja",
+         }) {
+        test_lfm2_parser(tmpl, detailed_debug);
+    }
+
+    // Thinking cases only apply to LFM2.5-8B-A1B, the one LFM2 template that emits <think>
    {
-        auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug);
+        auto tst = peg_tester("models/templates/LFM2.5-8B-A1B.jinja", detailed_debug);

-        // Basic content only
-        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
+        // Reasoning is parsed independent of enable_thinking

-        // Single tool call without reasoning
-        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .tools({ special_function_tool })
-            .expect(message_assist_call)
-            .run();
-
-        // Tool call with string argument
-        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
-            .run();
-
-        // Tool call with reasoning (enable_thinking=true)
+        // Tool call with reasoning
        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();

-        // Multiple tool calls (parallel)
-        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
-            .parallel_tool_calls(true)
-            .tools({
-                special_function_tool, special_function_tool_with_optional_param
-            })
-            .expect_tool_calls({
-                { "special_function", R"({"arg1": 1})", {} },
-                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
-            })
-            .run();
-
        // Tool call with reasoning and content
        tst.test("<think>I need to call a function</think>"
                 "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
-            .enable_thinking(true)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ get_time_tool })
            .expect(message_with_reasoning_content_and_multiple_tool_calls(
@@ -4088,32 +4167,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            ))
            .run();

-        // Python tool with multiline code in string
-        tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
-            .tools({ python_tool })
-            .expect_tool_calls({
-                { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
-            })
-            .run();
-
-        // Partial tool call (streaming)
-        tst.test("<|tool_call_start|>[special_function(arg1=")
-            .tools({ special_function_tool })
-            .is_partial(true)
-            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
-            .run();
-
-        // Tool call with empty arguments
-        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
-            .tools({ empty_args_tool })
-            .expect(simple_assist_msg("", "", "empty_args", "{}"))
-            .run();
-
-        // fake tool call marker in reasoning
-        tst.test(
-               "<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
-               "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
+        // Fake tool call marker inside reasoning is not parsed as a call
+        tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
+                 "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
@@ -4122,127 +4178,21 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            })
            .run();

-        // Continuation tests
-        tst.test("world!\nWhat's up?")
+        // enable_thinking=false still captures emitted reasoning
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
+            .enable_thinking(false)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_content })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
+            .expect(message_assist_thoughts)
            .run();

-        tst.test(" thinking</think>Hello, world!\nWhat's up?")
-            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
-            .messages({ message_user, message_assist_prefill_reasoning })
-            .add_generation_prompt(false)
-            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
-            .expect_reasoning("I'm thinking")
-            .expect_content("Hello, world!\nWhat's up?")
-            .run();
-    }
-
-    // LFM2.5 tests - format <|tool_call_start|>[name(args)]<|tool_call_end|>
-    {
-        auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
-
-        // Basic content only
-        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
-
-        // Single tool call without reasoning
-        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .tools({ special_function_tool })
-            .expect(message_assist_call)
-            .run();
-
-        // Tool call with string argument
-        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
-            .run();
-
-        // Python literals become JSON.
-        tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
-            .tools({ toggle_tool })
-            .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
-            .run();
-
-        tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
-            .tools({ nullable_tool })
-            .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
-            .run();
-
-        // Nested Python literal.
-        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
-            .tools({ config_tool })
-            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
-            .run();
-
-        // JSON literals are accepted too.
-        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
-            .tools({ config_tool })
-            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
-            .run();
-
-        // Dotted function name with structured args.
-        tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
-                 "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
-            .tools({ calendar_create_event_tool })
-            .expect(message_with_tool_calls(
-                "Calendar.create_event",
-                R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
-            .run();
-
-        // Markdown links stay content.
-        tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
-            .tools({ get_time_tool })
-            .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
-            .run();
-
-        // Tool call with reasoning (enable_thinking=true)
        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
+            .enable_thinking(false)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();

-        // Multiple tool calls (parallel)
-        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
-            .parallel_tool_calls(true)
-            .tools({
-                special_function_tool, special_function_tool_with_optional_param
-            })
-            .expect_tool_calls({
-                { "special_function", R"({"arg1": 1})", {} },
-                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
-            })
-            .run();
-
-        // Tool call with content before tool call
-        tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
-            .tools({ get_time_tool })
-            .expect(message_with_reasoning_content_and_multiple_tool_calls(
-                "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
-            ))
-            .run();
-
-        // Partial tool call (streaming)
-        tst.test("<|tool_call_start|>[special_function(arg1=")
-            .tools({ special_function_tool })
-            .is_partial(true)
-            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
-            .run();
-
-        // Tool call with empty arguments
-        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
-            .tools({ empty_args_tool })
-            .expect(simple_assist_msg("", "", "empty_args", "{}"))
-            .run();
-
-        // Continuation tests
+        // Continuation: prefill content
        tst.test("world!\nWhat's up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .enable_thinking(true)
@@ -4253,6 +4203,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .expect_content("Hello, world!\nWhat's up?")
            .run();

+        // Continuation: prefill reasoning
        tst.test(" thinking</think>Hello, world!\nWhat's up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .enable_thinking(true)
@@ -5478,18 +5429,25 @@ static void test_template_generation_prompt() {
        check(tmpls, continuation_reasoning(), "<|im_assistant|>assistant<|im_middle|><think>I'm");
    }

-    {
-        auto tmpls = read_templates("models/templates/LFM2-8B-A1B.jinja");
+    for (const char * tmpl : {
+             "models/templates/LFM2-8B-A1B.jinja",
+             "models/templates/LFM2.5-Instruct.jinja",
+             "models/templates/LFM2.5-8B-A1B.jinja",
+         }) {
+        auto tmpls = read_templates(tmpl);
        check(tmpls, basic(),                  "<|im_start|>assistant\n");
        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
    }

    {
-        auto tmpls = read_templates("models/templates/LFM2.5-Instruct.jinja");
-        check(tmpls, basic(),                  "<|im_start|>assistant\n");
-        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
-        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
+        // 8B-A1B renders prior-turn reasoning via the "thinking" field
+        auto tmpls = read_templates("models/templates/LFM2.5-8B-A1B.jinja");
+        common_chat_templates_inputs inputs;
+        inputs.messages              = { message_user, message_assist_call_thoughts, tool_msg };
+        inputs.add_generation_prompt = true;
+        auto params = common_chat_templates_apply(tmpls.get(), inputs);
+        assert_contains(params.prompt, "<think>I'm\nthinking</think>");
    }

    {