common/chat : fix LFM2/LFM2.5 reasoning round-trip and <think> leak (#24234)

* common/chat : fix LFM2 reasoning round-trip and stray <think> leak * Gate by reasoning format and whether the template supports <think>
2026-06-09 07:16:44 +02:00 · 2026-06-06 22:39:21 +02:00
parent 31e82494c0
commit 98d5e8ba8a
3 changed files with 263 additions and 179 deletions
@@ -1625,8 +1625,17 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
    const std::string THINK_END       = "</think>";
    const std::string GEN_PROMPT      = "<|im_start|>assistant\n";
-    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs);
+    // Copy reasoning to the "thinking" field the template expects
-    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
+    auto adjusted_messages = json::array();
    for (auto msg : inputs.messages) {
        if (msg.contains("reasoning_content") && msg.at("reasoning_content").is_string()) {
            msg["thinking"] = msg.at("reasoning_content");
        }
        adjusted_messages.push_back(msg);
    }
    data.prompt            = common_chat_template_direct_apply_impl(tmpl, inputs, adjusted_messages);
    data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs, adjusted_messages);
    data.format            = COMMON_CHAT_FORMAT_PEG_NATIVE;
    data.supports_thinking = true;
    data.preserved_tokens  = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END };
@@ -1639,7 +1648,9 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
    data.thinking_end_tag   = THINK_END;
    auto has_tools         = inputs.tools.is_array() && !inputs.tools.empty();
-    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+    // Gate by reasoning format and whether the template supports <think>
    auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE &&
                             tmpl.source().find(THINK_START) != std::string::npos;
    auto include_grammar   = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
    if (inputs.has_continuation()) {
@@ -1658,7 +1669,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
        auto end = p.end();
        auto reasoning = p.eps();
-        if (extract_reasoning && inputs.enable_thinking) {
+        if (extract_reasoning) {
            reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
        }
@@ -0,0 +1,115 @@
 {{- bos_token -}}
 {%- set preserve_thinking = preserve_thinking | default(false) -%}
 {%- macro format_arg_value(arg_value) -%}
    {%- if arg_value is string -%}
        {{- "'" + arg_value + "'" -}}
    {%- elif arg_value is mapping -%}
        {{- arg_value | tojson -}}
    {%- else -%}
        {{- arg_value | string -}}
    {%- endif -%}
 {%- endmacro -%}
 {%- macro parse_content(content) -%}
    {%- if content is string -%}
        {{- content -}}
    {%- else -%}
        {%- set _ns = namespace(result="") -%}
        {%- for item in content -%}
            {%- if item["type"] == "image" -%}
                {%- set _ns.result = _ns.result + "<image>" -%}
            {%- elif item["type"] == "text" -%}
                {%- set _ns.result = _ns.result + item["text"] -%}
            {%- else -%}
                {%- set _ns.result = _ns.result + item | tojson -%}
            {%- endif -%}
        {%- endfor -%}
        {{- _ns.result -}}
    {%- endif -%}
 {%- endmacro -%}
 {%- macro render_tool_calls(tool_calls) -%}
    {%- set tool_calls_ns = namespace(tool_calls=[]) -%}
    {%- for tool_call in tool_calls -%}
        {%- set func_name = tool_call["function"]["name"] -%}
        {%- set func_args = tool_call["function"]["arguments"] -%}
        {%- set args_ns = namespace(arg_strings=[]) -%}
        {%- for arg_name, arg_value in func_args.items() -%}
            {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name + "=" + format_arg_value(arg_value)] -%}
        {%- endfor -%}
        {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [func_name + "(" + (args_ns.arg_strings | join(", ")) + ")"] -%}
    {%- endfor -%}
    {{- "<|tool_call_start|>[" + (tool_calls_ns.tool_calls | join(", ")) + "]<|tool_call_end|>" -}}
 {%- endmacro -%}
 {%- set ns = namespace(system_prompt="", last_user_index=-1) -%}
 {%- if messages[0]["role"] == "system" -%}
    {%- if messages[0].get("content") -%}
        {%- set ns.system_prompt = parse_content(messages[0]["content"]) -%}
    {%- endif -%}
    {%- set messages = messages[1:] -%}
 {%- endif -%}
 {%- if tools -%}
    {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
    {%- for tool in tools -%}
        {%- if tool is not string -%}
            {%- set tool = tool | tojson -%}
        {%- endif -%}
        {%- set ns.system_prompt = ns.system_prompt + tool -%}
        {%- if not loop.last -%}
            {%- set ns.system_prompt = ns.system_prompt + ", " -%}
        {%- endif -%}
    {%- endfor -%}
    {%- set ns.system_prompt = ns.system_prompt + "]" -%}
 {%- endif -%}
 {%- if ns.system_prompt -%}
    {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
 {%- endif -%}
 {%- for message in messages -%}
    {%- if message["role"] == "user" -%}
        {%- set ns.last_user_index = loop.index0 -%}
    {%- endif -%}
 {%- endfor -%}
 {%- for message in messages -%}
    {{- "<|im_start|>" + message.role + "\n" -}}
    {%- if message.role == "assistant" -%}
        {%- generation -%}
        {%- if message.thinking is defined and (preserve_thinking or loop.index0 > ns.last_user_index) -%}
            {{- "<think>" + message.thinking + "</think>" -}}
        {%- endif -%}
        {%- set _cfm_tag = "CONTINUE_FINAL_MESSAGE_TAG " -%}
        {%- set _has_cfm = false -%}
        {%- if message.content is defined -%}
            {%- set content = parse_content(message.content) -%}
            {%- if not (preserve_thinking or loop.index0 > ns.last_user_index) -%}
                {%- if "</think>" in content -%}
                    {%- set content = content.split("</think>")[-1] | trim -%}
                {%- endif -%}
            {%- endif -%}
            {%- if message.tool_calls is defined and content.endswith(_cfm_tag) -%}
                {%- set _has_cfm = true -%}
                {%- set _trunc_len = (content | length) - (_cfm_tag | length) -%}
                {{- content[:_trunc_len] -}}
            {%- else -%}
                {{- content -}}
            {%- endif -%}
        {%- endif -%}
        {%- if message.tool_calls is defined -%}
            {{- render_tool_calls(message.tool_calls) -}}
        {%- endif -%}
        {%- if _has_cfm -%}
            {{- _cfm_tag -}}
        {%- endif -%}
        {{- "<|im_end|>\n" -}}
        {%- endgeneration -%}
    {%- else %}
        {%- if message.get("content") -%}
            {{- parse_content(message["content"]) -}}
        {%- endif -%}
        {{- "<|im_end|>\n" -}}
    {%- endif %}
 {%- endfor -%}
 {%- if add_generation_prompt -%}
    {{- "<|im_start|>assistant\n" -}}
 {%- endif -%}
@@ -1825,6 +1825,104 @@ static void test_convert_responses_to_chatcmpl() {
    }
 }
 // Shared LFM2 parser cases - all variants use one output format and parser
 static void test_lfm2_parser(const std::string & template_path, bool detailed_debug) {
    auto tst = peg_tester(template_path, detailed_debug);
    // Basic content only
    tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
    // Single tool call without reasoning
    tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
        .tools({ special_function_tool })
        .expect(message_assist_call)
        .run();
    // Tool call with string argument
    tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
        .tools({ get_time_tool })
        .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
        .run();
    // Python literals become JSON
    tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
        .tools({ toggle_tool })
        .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
        .run();
    tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
        .tools({ nullable_tool })
        .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
        .run();
    // Nested Python literal
    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
        .tools({ config_tool })
        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
        .run();
    // JSON literals are accepted too
    tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
        .tools({ config_tool })
        .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
        .run();
    // Dotted function name with structured args
    tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
             "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
        .tools({ calendar_create_event_tool })
        .expect(message_with_tool_calls(
            "Calendar.create_event",
            R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
        .run();
    // Markdown links stay content
    tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
        .tools({ get_time_tool })
        .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
        .run();
    // Python tool with multiline code in string
    tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
        .tools({ python_tool })
        .expect_tool_calls({
            { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
        })
        .run();
    // Content before tool call (no reasoning)
    tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
        .tools({ get_time_tool })
        .expect(message_with_reasoning_content_and_multiple_tool_calls(
            "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
        ))
        .run();
    // Multiple tool calls (parallel)
    tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
        .parallel_tool_calls(true)
        .tools({ special_function_tool, special_function_tool_with_optional_param })
        .expect_tool_calls({
            { "special_function", R"({"arg1": 1})", {} },
            { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
        })
        .run();
    // Partial tool call (streaming)
    tst.test("<|tool_call_start|>[special_function(arg1=")
        .tools({ special_function_tool })
        .is_partial(true)
        .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
        .run();
    // Tool call with empty arguments
    tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
        .tools({ empty_args_tool })
        .expect(simple_assist_msg("", "", "empty_args", "{}"))
        .run();
 }
 static void test_template_output_peg_parsers(bool detailed_debug) {
    LOG_DBG("%s\n", __func__);
@@ -4038,49 +4136,30 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .run();
    }
-    // LFM2-8B-A1B tests - uses <|tool_list_start|>/<|tool_list_end|> and <|tool_call_start|>[name(args)]<|tool_call_end|>
+    for (const char * tmpl : {
             "models/templates/LFM2-8B-A1B.jinja",
             "models/templates/LFM2.5-Instruct.jinja",
             "models/templates/LFM2.5-8B-A1B.jinja",
         }) {
        test_lfm2_parser(tmpl, detailed_debug);
    }
    // Thinking cases only apply to LFM2.5-8B-A1B, the one LFM2 template that emits <think>
    {
-        auto tst = peg_tester("models/templates/LFM2-8B-A1B.jinja", detailed_debug);
+        auto tst = peg_tester("models/templates/LFM2.5-8B-A1B.jinja", detailed_debug);
-        // Basic content only
+        // Reasoning is parsed independent of enable_thinking
        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
-        // Single tool call without reasoning
+        // Tool call with reasoning
        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .tools({ special_function_tool })
            .expect(message_assist_call)
            .run();
        // Tool call with string argument
        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
            .tools({ get_time_tool })
            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
            .run();
        // Tool call with reasoning (enable_thinking=true)
        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .enable_thinking(true)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();
        // Multiple tool calls (parallel)
        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
            .parallel_tool_calls(true)
            .tools({
                special_function_tool, special_function_tool_with_optional_param
            })
            .expect_tool_calls({
                { "special_function", R"({"arg1": 1})", {} },
                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
            })
            .run();
        // Tool call with reasoning and content
        tst.test("<think>I need to call a function</think>"
                 "Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
            .enable_thinking(true)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ get_time_tool })
            .expect(message_with_reasoning_content_and_multiple_tool_calls(
@@ -4088,32 +4167,9 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            ))
            .run();
-        // Python tool with multiline code in string
+        // Fake tool call marker inside reasoning is not parsed as a call
-        tst.test("<|tool_call_start|>[python(code=\"def hello():\\n    print('hey')\")]<|tool_call_end|>")
+        tst.test("<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
-            .tools({ python_tool })
+                 "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .expect_tool_calls({
                { "python", R"#({"code": "def hello():\\n    print('hey')"})#", "" }
            })
            .run();
        // Partial tool call (streaming)
        tst.test("<|tool_call_start|>[special_function(arg1=")
            .tools({ special_function_tool })
            .is_partial(true)
            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
            .run();
        // Tool call with empty arguments
        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
            .tools({ empty_args_tool })
            .expect(simple_assist_msg("", "", "empty_args", "{}"))
            .run();
        // fake tool call marker in reasoning
        tst.test(
               "<think>Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm</think>"
               "<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .enable_thinking(true)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect_reasoning("Let me think about <|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|> hmm")
@@ -4122,127 +4178,21 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            })
            .run();
-        // Continuation tests
+        // enable_thinking=false still captures emitted reasoning
-        tst.test("world!\nWhat's up?")
+        tst.test("<think>I'm\nthinking</think>Hello, world!\nWhat's up?")
            .enable_thinking(false)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
-            .enable_thinking(true)
+            .expect(message_assist_thoughts)
            .messages({ message_user, message_assist_prefill_content })
            .add_generation_prompt(false)
            .continue_final_message(COMMON_CHAT_CONTINUATION_CONTENT)
            .expect_reasoning("I'm thinking")
            .expect_content("Hello, world!\nWhat's up?")
            .run();
        tst.test(" thinking</think>Hello, world!\nWhat's up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .enable_thinking(true)
            .messages({ message_user, message_assist_prefill_reasoning })
            .add_generation_prompt(false)
            .continue_final_message(COMMON_CHAT_CONTINUATION_REASONING)
            .expect_reasoning("I'm thinking")
            .expect_content("Hello, world!\nWhat's up?")
            .run();
    }
    // LFM2.5 tests - format <|tool_call_start|>[name(args)]<|tool_call_end|>
    {
        auto tst = peg_tester("models/templates/LFM2.5-Instruct.jinja", detailed_debug);
        // Basic content only
        tst.test("Hello, world!\nWhat's up?").expect(message_assist).run();
        // Single tool call without reasoning
        tst.test("<|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
            .tools({ special_function_tool })
            .expect(message_assist_call)
            .run();
        // Tool call with string argument
        tst.test("<|tool_call_start|>[get_time(city=\"XYZCITY\")]<|tool_call_end|>")
            .tools({ get_time_tool })
            .expect(message_with_tool_calls("get_time", "{\"city\":\"XYZCITY\"}"))
            .run();
        // Python literals become JSON.
        tst.test("<|tool_call_start|>[toggle(enabled=True)]<|tool_call_end|>")
            .tools({ toggle_tool })
            .expect(message_with_tool_calls("toggle", R"({"enabled": true})"))
            .run();
        tst.test("<|tool_call_start|>[set_nullable(value=None)]<|tool_call_end|>")
            .tools({ nullable_tool })
            .expect(message_with_tool_calls("set_nullable", R"({"value": null})"))
            .run();
        // Nested Python literal.
        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": True, \"count\": 3})]<|tool_call_end|>")
            .tools({ config_tool })
            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "count": 3}})"))
            .run();
        // JSON literals are accepted too.
        tst.test("<|tool_call_start|>[set_config(config={\"enabled\": true, \"note\": null})]<|tool_call_end|>")
            .tools({ config_tool })
            .expect(message_with_tool_calls("set_config", R"({"config": {"enabled": true, "note": null}})"))
            .run();
        // Dotted function name with structured args.
        tst.test("<|tool_call_start|>[Calendar.create_event(title=\"demo\", participants=[\"Alice\", \"Bob\"], "
                 "metadata={\"priority\": \"high\", \"reminder\": true})]<|tool_call_end|>")
            .tools({ calendar_create_event_tool })
            .expect(message_with_tool_calls(
                "Calendar.create_event",
                R"({"title": "demo", "participants": ["Alice", "Bob"], "metadata": {"priority": "high", "reminder": true}})"))
            .run();
        // Markdown links stay content.
        tst.test("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org).")
            .tools({ get_time_tool })
            .expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
            .run();
        // Tool call with reasoning (enable_thinking=true)
        tst.test("<think>I'm\nthinking</think><|tool_call_start|>[special_function(arg1=1)]<|tool_call_end|>")
-            .enable_thinking(true)
+            .enable_thinking(false)
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .tools({ special_function_tool })
            .expect(message_assist_call_thoughts)
            .run();
-        // Multiple tool calls (parallel)
+        // Continuation: prefill content
        tst.test("<|tool_call_start|>[special_function(arg1=1), special_function_with_opt(arg1=1, arg2=2)]<|tool_call_end|>")
            .parallel_tool_calls(true)
            .tools({
                special_function_tool, special_function_tool_with_optional_param
            })
            .expect_tool_calls({
                { "special_function", R"({"arg1": 1})", {} },
                { "special_function_with_opt", R"({"arg1": 1, "arg2": 2})", {} },
            })
            .run();
        // Tool call with content before tool call
        tst.test("Let me check the time.<|tool_call_start|>[get_time(city=\"Paris\")]<|tool_call_end|>")
            .tools({ get_time_tool })
            .expect(message_with_reasoning_content_and_multiple_tool_calls(
                "", "Let me check the time.", { { "get_time", "{\"city\":\"Paris\"}" } }
            ))
            .run();
        // Partial tool call (streaming)
        tst.test("<|tool_call_start|>[special_function(arg1=")
            .tools({ special_function_tool })
            .is_partial(true)
            .expect(simple_assist_msg("", "", "special_function", "{\"arg1\": "))
            .run();
        // Tool call with empty arguments
        tst.test("<|tool_call_start|>[empty_args()]<|tool_call_end|>")
            .tools({ empty_args_tool })
            .expect(simple_assist_msg("", "", "empty_args", "{}"))
            .run();
        // Continuation tests
        tst.test("world!\nWhat's up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .enable_thinking(true)
@@ -4253,6 +4203,7 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
            .expect_content("Hello, world!\nWhat's up?")
            .run();
        // Continuation: prefill reasoning
        tst.test(" thinking</think>Hello, world!\nWhat's up?")
            .reasoning_format(COMMON_REASONING_FORMAT_AUTO)
            .enable_thinking(true)
@@ -5478,18 +5429,25 @@ static void test_template_generation_prompt() {
        check(tmpls, continuation_reasoning(), "<|im_assistant|>assistant<|im_middle|><think>I'm");
    }
-    {
+    for (const char * tmpl : {
-        auto tmpls = read_templates("models/templates/LFM2-8B-A1B.jinja");
+             "models/templates/LFM2-8B-A1B.jinja",
             "models/templates/LFM2.5-Instruct.jinja",
             "models/templates/LFM2.5-8B-A1B.jinja",
         }) {
        auto tmpls = read_templates(tmpl);
        check(tmpls, basic(),                  "<|im_start|>assistant\n");
        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
    }
    {
-        auto tmpls = read_templates("models/templates/LFM2.5-Instruct.jinja");
+        // 8B-A1B renders prior-turn reasoning via the "thinking" field
-        check(tmpls, basic(),                  "<|im_start|>assistant\n");
+        auto tmpls = read_templates("models/templates/LFM2.5-8B-A1B.jinja");
-        check(tmpls, continuation_content(),   "<|im_start|>assistant\n<think>I'm thinking</think>Hello, ");
+        common_chat_templates_inputs inputs;
-        check(tmpls, continuation_reasoning(), "<|im_start|>assistant\n<think>I'm");
+        inputs.messages              = { message_user, message_assist_call_thoughts, tool_msg };
        inputs.add_generation_prompt = true;
        auto params = common_chat_templates_apply(tmpls.get(), inputs);
        assert_contains(params.prompt, "<think>I'm\nthinking</think>");
    }
    {