mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-09 07:16:44 +02:00
common/chat : unify and fix LFM2/LFM2.5 tool parser (#24178)
This commit is contained in:
@@ -87,6 +87,8 @@ static std::string normalize_quotes_to_json(const std::string & input) {
|
||||
bool in_single_quoted = false;
|
||||
bool in_double_quoted = false;
|
||||
|
||||
auto is_word_char = [](char ch) { return std::isalnum(static_cast<unsigned char>(ch)) || ch == '_'; };
|
||||
|
||||
for (size_t i = 0; i < input.size(); ++i) {
|
||||
char c = input[i];
|
||||
|
||||
@@ -151,6 +153,29 @@ static std::string normalize_quotes_to_json(const std::string & input) {
|
||||
in_single_quoted = true;
|
||||
result += '"';
|
||||
}
|
||||
} else if (!in_single_quoted && !in_double_quoted && (c == 'T' || c == 'F' || c == 'N') &&
|
||||
(i == 0 || !is_word_char(input[i - 1]))) {
|
||||
// Python literals -> JSON; prefix match keeps streamed partials monotonic.
|
||||
static constexpr std::pair<std::string_view, std::string_view> literals[] = {
|
||||
{ "True", "true" }, { "False", "false" }, { "None", "null" },
|
||||
};
|
||||
size_t n = 0;
|
||||
while (i + n < input.size() && is_word_char(input[i + n])) {
|
||||
++n;
|
||||
}
|
||||
std::string_view token(input.data() + i, n);
|
||||
bool matched = false;
|
||||
for (const auto & [py, js] : literals) {
|
||||
if (py.substr(0, n) == token) {
|
||||
result += js.substr(0, n);
|
||||
i += n - 1;
|
||||
matched = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!matched) {
|
||||
result += c;
|
||||
}
|
||||
} else {
|
||||
result += c;
|
||||
}
|
||||
@@ -353,12 +378,8 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
||||
}
|
||||
value_to_add += escape_json_string_inner(value_content);
|
||||
} else if (!value_content.empty()) {
|
||||
// For potential containers, normalize Python-style single quotes to JSON double quotes
|
||||
bool is_potential_container = value_content[0] == '[' || value_content[0] == '{';
|
||||
if (is_potential_container) {
|
||||
value_content = normalize_container_value(value_content);
|
||||
}
|
||||
value_to_add += value_content;
|
||||
// Pythonic scalars/containers -> JSON.
|
||||
value_to_add += normalize_container_value(value_content);
|
||||
}
|
||||
|
||||
args_target() += value_to_add;
|
||||
@@ -466,11 +487,34 @@ common_peg_parser common_chat_peg_builder::standard_constructed_tools(
|
||||
return force_tool_calls ? section : optional(section);
|
||||
}
|
||||
|
||||
// Like python_value(), but the leaf also accepts JSON-cased true/false/null, used by LFM2/LFM2.5
|
||||
common_peg_parser common_chat_peg_builder::python_or_json_value() {
|
||||
return rule("python-or-json-value", [this]() {
|
||||
auto ws = space();
|
||||
auto value = python_or_json_value();
|
||||
|
||||
auto member = sequence({ python_string(), ws, literal(":"), ws, value });
|
||||
auto members = sequence({ member, zero_or_more(sequence({ ws, literal(","), ws, member })) });
|
||||
auto dict = rule("python-or-json-dict", [&]() {
|
||||
return sequence({ literal("{"), ws, choice({ literal("}"), sequence({ members, ws, literal("}") }) }), ws });
|
||||
});
|
||||
|
||||
auto elements = sequence({ value, zero_or_more(sequence({ literal(","), ws, value })) });
|
||||
auto array = rule("python-or-json-array", [&]() {
|
||||
return sequence({ literal("["), ws, choice({ literal("]"), sequence({ elements, ws, literal("]") }) }), ws });
|
||||
});
|
||||
|
||||
return choice({ dict, array, python_string(), python_number(),
|
||||
python_bool(), python_null(), json_bool(), json_null() });
|
||||
});
|
||||
}
|
||||
|
||||
// Python-style tool calls: name(arg1="value1", arg2=123)
|
||||
// Used only by LFM2 for now, so we don't merge it into autoparser
|
||||
common_peg_parser common_chat_peg_builder::python_style_tool_calls(
|
||||
const ordered_json & tools,
|
||||
bool parallel_tool_calls) {
|
||||
bool parallel_tool_calls,
|
||||
bool allow_json_literals) {
|
||||
if (!tools.is_array() || tools.empty()) {
|
||||
return eps();
|
||||
}
|
||||
@@ -504,7 +548,7 @@ common_peg_parser common_chat_peg_builder::python_style_tool_calls(
|
||||
if (is_string_type) {
|
||||
arg_value_parser = string_value_parser;
|
||||
} else {
|
||||
arg_value_parser = tool_arg_value(python_value());
|
||||
arg_value_parser = tool_arg_value(allow_json_literals ? python_or_json_value() : python_value());
|
||||
}
|
||||
|
||||
// Full argument: name="value" or name=value
|
||||
|
||||
@@ -132,9 +132,13 @@ class common_chat_peg_builder : public common_peg_parser_builder {
|
||||
// Helper for Python-style function call format: name(arg1="value1", arg2=123)
|
||||
// Used by LFM2 and similar templates
|
||||
common_peg_parser python_style_tool_calls(const nlohmann::ordered_json & tools,
|
||||
bool parallel_tool_calls);
|
||||
bool parallel_tool_calls,
|
||||
bool allow_json_literals);
|
||||
|
||||
private:
|
||||
// Python values plus JSON true/false/null.
|
||||
common_peg_parser python_or_json_value();
|
||||
|
||||
// Implementation helpers for standard_json_tools — one per JSON tool call layout mode
|
||||
common_peg_parser build_json_tools_function_is_key(const nlohmann::ordered_json & tools,
|
||||
const std::string & args_key,
|
||||
@@ -195,4 +199,3 @@ struct tagged_peg_parser {
|
||||
|
||||
tagged_peg_parser build_tagged_peg_parser(
|
||||
const std::function<common_peg_parser(common_peg_parser_builder & builder)> & fn);
|
||||
|
||||
|
||||
+26
-115
@@ -1608,42 +1608,40 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp
|
||||
return data;
|
||||
}
|
||||
|
||||
// LFM2 format: uses <|tool_list_start|>[...]<|tool_list_end|> in system prompt
|
||||
// and <|tool_call_start|>[name(arg="val")]<|tool_call_end|> for tool calls.
|
||||
// - Reasoning: <think>{reasoning}</think> (optional)
|
||||
// - Content: text before a tool call (optional)
|
||||
// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
|
||||
// Tool calls can appear multiple times (parallel tool calls supported)
|
||||
static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl,
|
||||
const autoparser::generation_params & inputs) {
|
||||
// LFM2/LFM2.5 parser. Tool calls are almost Python-style and parallel-capable
|
||||
// (except dotted names and JSON literals true/false/null).
|
||||
// Always wrapped in <|tool_call_start|>[name(args)]<|tool_call_end|> with optional <think> reasoning.
|
||||
// tool_list_tokens preserves LFM2 system tool-list markers.
|
||||
static common_chat_params common_chat_params_init_lfm2(const common_chat_template & tmpl,
|
||||
const autoparser::generation_params & inputs,
|
||||
bool tool_list_tokens) {
|
||||
common_chat_params data;
|
||||
|
||||
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
||||
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
||||
const std::string TOOL_LIST_START = "<|tool_list_start|>";
|
||||
const std::string TOOL_LIST_END = "<|tool_list_end|>";
|
||||
const std::string THINK_START = "<think>";
|
||||
const std::string THINK_END = "</think>";
|
||||
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
|
||||
|
||||
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
|
||||
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
||||
data.supports_thinking = true;
|
||||
data.preserved_tokens = {
|
||||
"<|tool_list_start|>",
|
||||
"<|tool_list_end|>",
|
||||
"<|tool_call_start|>",
|
||||
"<|tool_call_end|>",
|
||||
"<think>",
|
||||
"</think>",
|
||||
};
|
||||
data.preserved_tokens = { TOOL_CALL_START, TOOL_CALL_END, THINK_START, THINK_END };
|
||||
if (tool_list_tokens) {
|
||||
data.preserved_tokens.push_back(TOOL_LIST_START);
|
||||
data.preserved_tokens.push_back(TOOL_LIST_END);
|
||||
}
|
||||
|
||||
data.thinking_start_tag = THINK_START;
|
||||
data.thinking_end_tag = THINK_END;
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
|
||||
const std::string TOOL_CALL_START = "<|tool_call_start|>";
|
||||
const std::string TOOL_CALL_END = "<|tool_call_end|>";
|
||||
const std::string THINK_START = "<think>";
|
||||
const std::string THINK_END = "</think>";
|
||||
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
|
||||
|
||||
data.thinking_start_tag = THINK_START;
|
||||
data.thinking_end_tag = THINK_END;
|
||||
|
||||
if (inputs.has_continuation()) {
|
||||
const auto & msg = inputs.continue_msg;
|
||||
|
||||
@@ -1670,7 +1668,7 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
|
||||
auto tool_calls = p.rule("tool-calls",
|
||||
p.trigger_rule("tool-call",
|
||||
p.literal(TOOL_CALL_START) +
|
||||
p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls) +
|
||||
p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls, /* allow_json_literals = */ true) +
|
||||
p.literal(TOOL_CALL_END)
|
||||
)
|
||||
);
|
||||
@@ -1697,93 +1695,6 @@ static common_chat_params common_chat_params_init_lfm2(const common_chat_templat
|
||||
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, TOOL_CALL_START }
|
||||
};
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
// LFM2.5 format: uses plain "List of tools: [...]" in system prompt, no wrapper tokens.
|
||||
// Tool calls are bare [name(arg="val")], though model may optionally emit <|tool_call_start|>.
|
||||
// - Reasoning: <think>{reasoning}</think> (optional)
|
||||
// - Content: text before a tool call (optional)
|
||||
// - Tool calls: Python-style, e.g. [function_name(arg1="value1", arg2="value2")]
|
||||
// Tool calls can appear multiple times (parallel tool calls supported)
|
||||
static common_chat_params common_chat_params_init_lfm2_5(const common_chat_template & tmpl,
|
||||
const autoparser::generation_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
data.prompt = common_chat_template_direct_apply_impl(tmpl, inputs);
|
||||
data.generation_prompt = common_chat_template_generation_prompt_impl(tmpl, inputs);
|
||||
data.format = COMMON_CHAT_FORMAT_PEG_NATIVE;
|
||||
data.supports_thinking = true;
|
||||
data.preserved_tokens = {
|
||||
"<|tool_call_start|>",
|
||||
"<|tool_call_end|>",
|
||||
"<think>",
|
||||
"</think>",
|
||||
};
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
|
||||
const std::string THINK_START = "<think>";
|
||||
const std::string THINK_END = "</think>";
|
||||
const std::string GEN_PROMPT = "<|im_start|>assistant\n";
|
||||
|
||||
data.thinking_start_tag = THINK_START;
|
||||
data.thinking_end_tag = THINK_END;
|
||||
|
||||
if (inputs.has_continuation()) {
|
||||
const auto & msg = inputs.continue_msg;
|
||||
|
||||
data.generation_prompt = GEN_PROMPT + THINK_START + msg.reasoning_content;
|
||||
if (inputs.continue_final_message == COMMON_CHAT_CONTINUATION_CONTENT) {
|
||||
data.generation_prompt += THINK_END + msg.render_content();
|
||||
}
|
||||
|
||||
data.prompt += data.generation_prompt;
|
||||
}
|
||||
|
||||
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||
auto generation_prompt = p.literal(GEN_PROMPT);
|
||||
auto end = p.end();
|
||||
|
||||
auto reasoning = p.eps();
|
||||
if (extract_reasoning && inputs.enable_thinking) {
|
||||
reasoning = p.optional(THINK_START + p.reasoning(p.until(THINK_END)) + THINK_END);
|
||||
}
|
||||
|
||||
if (!has_tools || inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_NONE) {
|
||||
return generation_prompt + reasoning + p.content(p.rest()) + end;
|
||||
}
|
||||
|
||||
auto tool_calls = p.rule("tool-calls",
|
||||
p.trigger_rule("tool-call",
|
||||
p.python_style_tool_calls(inputs.tools, inputs.parallel_tool_calls)
|
||||
)
|
||||
);
|
||||
|
||||
auto content = p.content(p.until_one_of({"<|tool_call_start|>", "["}));
|
||||
auto maybe_start = p.optional(p.literal("<|tool_call_start|>"));
|
||||
return generation_prompt + reasoning + content + maybe_start + tool_calls + end;
|
||||
});
|
||||
|
||||
data.parser = parser.save();
|
||||
|
||||
if (include_grammar) {
|
||||
data.grammar_lazy = inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
auto schema = function.at("parameters");
|
||||
builder.resolve_refs(schema);
|
||||
});
|
||||
parser.build_grammar(builder, data.grammar_lazy);
|
||||
});
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const std::string name = tool.at("function").at("name");
|
||||
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "[" + name + "(" });
|
||||
});
|
||||
}
|
||||
|
||||
return data;
|
||||
}
|
||||
@@ -2298,14 +2209,14 @@ std::optional<common_chat_params> common_chat_try_specialized_template(
|
||||
|
||||
if (is_lfm2_template(src)) {
|
||||
LOG_DBG("Using specialized template: LFM2\n");
|
||||
return common_chat_params_init_lfm2(tmpl, params);
|
||||
return common_chat_params_init_lfm2(tmpl, params, /* tool_list_tokens = */ true);
|
||||
}
|
||||
|
||||
// LFM2.5 format detection: template uses plain "List of tools: [...]" with no special tokens
|
||||
if (src.find("List of tools: [") != std::string::npos &&
|
||||
src.find("<|tool_list_start|>") == std::string::npos) {
|
||||
LOG_DBG("Using specialized template: LFM2.5\n");
|
||||
return common_chat_params_init_lfm2_5(tmpl, params);
|
||||
return common_chat_params_init_lfm2(tmpl, params, /* tool_list_tokens = */ false);
|
||||
}
|
||||
|
||||
// GigaChatV3 format detection
|
||||
|
||||
Reference in New Issue
Block a user