mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-16 10:46:43 +02:00
Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 32120c10e3 | |||
| d5fb104293 | |||
| 635b65ad7a | |||
| e3a74b2990 | |||
| ac79caa7ce | |||
| fdd109883d | |||
| 4196b477da | |||
| ad39ccaa19 | |||
| 7dad2f1a17 | |||
| e36a602ba3 | |||
| 38d546330a | |||
| a1eb756c0b | |||
| 581e8eca8b | |||
| 0ae3f450f0 | |||
| e3cab403bf | |||
| 9dbc6621ae | |||
| 6eab47181c | |||
| e3bb1add8c | |||
| d8a3f523c8 | |||
| 72be44f1d2 | |||
| 8872ab5467 | |||
| 987fbd821d | |||
| c035ff4902 | |||
| 272088b9f2 | |||
| a6dff71270 | |||
| 2a6c391a5e |
@@ -7,7 +7,7 @@ ARG APP_REVISION=N/A
|
||||
|
||||
FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS build
|
||||
|
||||
ARG GGML_SYCL_F16=OFF
|
||||
ARG GGML_SYCL_F16=ON
|
||||
ARG LEVEL_ZERO_VERSION=1.28.2
|
||||
ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
|
||||
RUN apt-get update && \
|
||||
@@ -24,7 +24,8 @@ COPY . .
|
||||
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" \
|
||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON" \
|
||||
&& export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"; \
|
||||
fi && \
|
||||
echo "Building with dynamic libs" && \
|
||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
|
||||
|
||||
@@ -37,7 +37,7 @@ LLM inference in C/C++
|
||||
|
||||
Getting started with llama.cpp is straightforward. Here are several ways to install it on your machine:
|
||||
|
||||
- Install `llama.cpp` using [brew, nix or winget](docs/install.md)
|
||||
- Install `llama.cpp` using [brew, nix, winget, or conda-forge](docs/install.md)
|
||||
- Run with Docker - see our [Docker documentation](docs/docker.md)
|
||||
- Download pre-built binaries from the [releases page](https://github.com/ggml-org/llama.cpp/releases)
|
||||
- Build from source by cloning this repository - check out [our build guide](docs/build.md)
|
||||
|
||||
@@ -103,6 +103,10 @@ common_chat_params peg_generator::generate_parser(const common_chat_template &
|
||||
data.grammar_triggers = {
|
||||
{ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, trigger_marker }
|
||||
};
|
||||
if (autoparser.tools.format.openai_wrapper_trigger) {
|
||||
// model emits the OpenAI function wrapper, trigger on it
|
||||
data.grammar_triggers.push_back({ COMMON_GRAMMAR_TRIGGER_TYPE_WORD, "{\"type\": \"function\"," });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -134,7 +138,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs, cons
|
||||
auto response_format = p.rule("response-format", p.content(p.schema(p.json(), "response-format-schema", inputs.json_schema)));
|
||||
parser = ctx.reasoning_parser + p.space() + p.choice({
|
||||
p.literal("```json") + p.space() + response_format + p.space() + p.literal("```"),
|
||||
response_format
|
||||
p.space() + response_format + p.space()
|
||||
}) + p.end();
|
||||
pure_content = false;
|
||||
} else if (has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE && jinja_caps.supports_tool_calls) {
|
||||
@@ -224,13 +228,13 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
|
||||
auto single_tool_parser = p.standard_json_tools(
|
||||
format.per_call_start, format.per_call_end, inputs.tools, inputs.parallel_tool_calls,
|
||||
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
|
||||
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
|
||||
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order, format.openai_wrapper_trigger);
|
||||
tools_parser = p.trigger_rule("tool-calls", p.one_or_more(single_tool_parser + p.space()));
|
||||
} else {
|
||||
tools_parser = p.standard_json_tools(
|
||||
format.section_start, format.section_end, inputs.tools, inputs.parallel_tool_calls,
|
||||
inputs.tool_choice == COMMON_CHAT_TOOL_CHOICE_REQUIRED, name_field, args_field, format.tools_array_wrapped,
|
||||
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order);
|
||||
format.fun_name_is_key, format.id_field, format.gen_id_field, format.parameter_order, format.openai_wrapper_trigger);
|
||||
}
|
||||
|
||||
// Handle content wrappers if present
|
||||
@@ -393,8 +397,7 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
||||
(schema_info.resolves_to_string(param_schema) ?
|
||||
p.tool_arg_string_value(until_suffix) :
|
||||
p.tool_arg_json_value(p.schema(
|
||||
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false)) +
|
||||
p.space()) +
|
||||
p.json(), "tool-" + name + "-arg-" + param_name + "-schema", param_schema, false))) +
|
||||
p.tool_arg_close(p.literal(arguments.value_suffix)));
|
||||
|
||||
auto named_arg = p.rule("tool-" + name + "-arg-" + param_name, arg);
|
||||
|
||||
@@ -181,6 +181,7 @@ struct tool_format_analysis {
|
||||
|
||||
bool fun_name_is_key = false; // In JSON format function name is JSON key, i.e. { "<funname>": { ... arguments ... } }
|
||||
bool tools_array_wrapped = false; // Tool calls wrapped in JSON array [...]
|
||||
bool openai_wrapper_trigger = false; // model emits the OpenAI function wrapper, trigger on it
|
||||
|
||||
std::string function_field = "function";
|
||||
std::string name_field = "name";
|
||||
|
||||
@@ -165,6 +165,14 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
|
||||
LOG_DBG(ANSI_ORANGE "[Patch: Apriel 1.6]\n" ANSI_RESET);
|
||||
}
|
||||
},
|
||||
// template uses the JSON {name, parameters} tool instruction, emits the OpenAI function wrapper
|
||||
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
||||
if (tmpl.src.find("Respond in the format {\"name\": function name") != std::string::npos &&
|
||||
tmpl.src.find("Do not use variables.") != std::string::npos) {
|
||||
analysis.tools.format.openai_wrapper_trigger = true;
|
||||
LOG_DBG(ANSI_ORANGE "[Patch: JSON name/parameters tool instruction]\n" ANSI_RESET);
|
||||
}
|
||||
},
|
||||
|
||||
});
|
||||
|
||||
@@ -1229,8 +1237,8 @@ void analyze_tools::extract_argument_name_markers() {
|
||||
left_result.tags["pre"] == right_result.tags["pre"] &&
|
||||
left_result.tags["suffix"] == right_result.tags["suffix"]) {
|
||||
// Name is inside a structure (e.g., JSON key): prefix is the shared wrapper
|
||||
arguments.name_prefix = trim_whitespace(left_result.tags["pre"]);
|
||||
arguments.name_suffix = trim_leading_whitespace(left_result.tags["suffix"]);
|
||||
arguments.name_prefix = left_result.tags["pre"];
|
||||
arguments.name_suffix = left_result.tags["suffix"];
|
||||
} else if (diff.left.substr(0, ARG_FIRST.length()) == ARG_FIRST && diff.right.substr(0, ARG_SECOND.length()) == ARG_SECOND) {
|
||||
// Name is directly in the diff: prefix comes from last marker in diff.prefix
|
||||
auto pre_parser = build_tagged_peg_parser([&](common_peg_parser_builder & p) {
|
||||
@@ -1315,8 +1323,7 @@ void analyze_tools::extract_argument_value_markers() {
|
||||
value_suffix = value_suffix.substr(0, end_marker_pos);
|
||||
}
|
||||
}
|
||||
value_suffix = trim_leading_whitespace(value_suffix);
|
||||
if (!value_suffix.empty()) {
|
||||
if (!trim_whitespace(value_suffix).empty()) {
|
||||
arguments.value_suffix = value_suffix;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -363,7 +363,7 @@ void common_chat_peg_mapper::map(const common_peg_ast_node & node) {
|
||||
}
|
||||
|
||||
if ((is_arg_value || is_arg_string_value) && current_tool) {
|
||||
std::string value_content = std::string(trim_trailing_space(trim_leading_space(node.text, 1), 1));
|
||||
std::string value_content = std::string(node.text);
|
||||
|
||||
std::string value_to_add;
|
||||
if (value_content.empty() && is_arg_string_value) {
|
||||
@@ -540,10 +540,11 @@ common_peg_parser common_chat_peg_builder::python_style_tool_calls(
|
||||
auto arg_name_parser = literal(prop_name);
|
||||
|
||||
common_peg_parser arg_value_parser = eps();
|
||||
auto string_value_parser = choice({
|
||||
literal("\"") + tool_arg_string_value(string_content('"')) + literal("\""),
|
||||
literal("'") + tool_arg_string_value(string_content('\'')) + literal("'")
|
||||
});
|
||||
// Quoted literal as a value: normalize_quotes_to_json preserves escapes.
|
||||
auto string_value_parser = tool_arg_value(choice({
|
||||
literal("\"") + string_content('"') + literal("\""),
|
||||
literal("'") + string_content('\'') + literal("'")
|
||||
}));
|
||||
|
||||
if (is_string_type) {
|
||||
arg_value_parser = string_value_parser;
|
||||
@@ -745,7 +746,8 @@ common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order) {
|
||||
const std::vector<std::string> & parameters_order,
|
||||
bool accept_openai_wrapper) {
|
||||
|
||||
auto tool_choices = choice();
|
||||
auto name_key_parser = literal("\"" + effective_name_key + "\"");
|
||||
@@ -807,7 +809,13 @@ common_peg_parser common_chat_peg_builder::build_json_tools_flat_keys(
|
||||
return idx_a < idx_b;
|
||||
});
|
||||
|
||||
auto ordered_body = tool_open(literal("{")) + space();
|
||||
// accept an optional leading "type": "function" field when the model emits the OpenAI wrapper
|
||||
common_peg_parser type_field = eps();
|
||||
if (accept_openai_wrapper) {
|
||||
type_field = optional(literal("\"type\"") + space() + literal(":") + space() +
|
||||
literal("\"function\"") + space() + literal(",") + space());
|
||||
}
|
||||
auto ordered_body = tool_open(literal("{")) + space() + type_field;
|
||||
for (size_t i = 0; i < parser_pairs.size(); i++) {
|
||||
ordered_body = ordered_body + parser_pairs[i].first;
|
||||
if (i < parser_pairs.size() - 1) {
|
||||
@@ -870,7 +878,8 @@ common_peg_parser common_chat_peg_builder::standard_json_tools(
|
||||
bool function_is_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order) {
|
||||
const std::vector<std::string> & parameters_order,
|
||||
bool accept_openai_wrapper) {
|
||||
if (!tools.is_array() || tools.empty()) {
|
||||
return eps();
|
||||
}
|
||||
@@ -888,7 +897,7 @@ common_peg_parser common_chat_peg_builder::standard_json_tools(
|
||||
if (!name_spec.first.empty() || !args_spec.first.empty()) {
|
||||
tool_choices = build_json_tools_nested_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key);
|
||||
} else {
|
||||
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order);
|
||||
tool_choices = build_json_tools_flat_keys(tools, effective_name_key, effective_args_key, call_id_key, gen_call_id_key, parameters_order, accept_openai_wrapper);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -120,7 +120,8 @@ class common_chat_peg_builder : public common_peg_parser_builder {
|
||||
bool function_is_key = false,
|
||||
const std::string & call_id_key = "",
|
||||
const std::string & gen_call_id_key = "",
|
||||
const std::vector<std::string> & parameters_order = {});
|
||||
const std::vector<std::string> & parameters_order = {},
|
||||
bool accept_openai_wrapper = false);
|
||||
|
||||
// Legacy-compatible helper for building XML/tagged style tool calls
|
||||
// Used by tests and manual parsers
|
||||
@@ -157,7 +158,8 @@ class common_chat_peg_builder : public common_peg_parser_builder {
|
||||
const std::string & effective_args_key,
|
||||
const std::string & call_id_key,
|
||||
const std::string & gen_call_id_key,
|
||||
const std::vector<std::string> & parameters_order);
|
||||
const std::vector<std::string> & parameters_order,
|
||||
bool accept_openai_wrapper);
|
||||
};
|
||||
|
||||
inline common_peg_arena build_chat_peg_parser(
|
||||
|
||||
+3
-2
@@ -2678,8 +2678,9 @@ common_chat_msg common_chat_peg_parse(const common_peg_arena & src_pars
|
||||
}
|
||||
return msg;
|
||||
}
|
||||
throw std::runtime_error(std::string("Failed to parse input at pos ") + std::to_string(result.end) + ": " +
|
||||
effective_input.substr(result.end));
|
||||
LOG_WRN("%s: unparsed %s output: %s\n", __func__, common_chat_format_name(params.format), effective_input.substr(result.end).c_str());
|
||||
LOG_DBG("%s: full %s output triggering error:\n=== BEGIN ===\n%s\n=== END ===\n", __func__, common_chat_format_name(params.format), effective_input.c_str());
|
||||
throw std::runtime_error(std::string("The model produced output that does not match the expected ") + common_chat_format_name(params.format) + " format");
|
||||
}
|
||||
|
||||
common_chat_msg msg;
|
||||
|
||||
+32
-17
@@ -1272,13 +1272,13 @@ common_peg_parser common_peg_parser_builder::string_content(char delimiter) {
|
||||
|
||||
common_peg_parser common_peg_parser_builder::double_quoted_string() {
|
||||
return rule("double-quoted-string", [this]() {
|
||||
return sequence({literal("\""), string_content('"'), literal("\""), space()});
|
||||
return sequence({literal("\""), string_content('"'), literal("\"")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::single_quoted_string() {
|
||||
return rule("single-quoted-string", [this]() {
|
||||
return sequence({literal("'"), string_content('\''), literal("'"), space()});
|
||||
return sequence({literal("'"), string_content('\''), literal("'")});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1301,25 +1301,25 @@ common_peg_parser common_peg_parser_builder::json_number() {
|
||||
// At EOF in partial mode, chars returns NEED_MORE → negate propagates NEED_MORE → number not committed.
|
||||
// This prevents premature commits of partial numbers (e.g. "3" when "3.14" is incoming).
|
||||
auto not_number_continuation = negate(chars("[0-9.eE+-]", 1, 1));
|
||||
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation, space() });
|
||||
return sequence({ optional(literal("-")), int_part, optional(frac), optional(exp), not_number_continuation });
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_string() {
|
||||
return rule("json-string", [this]() {
|
||||
return sequence({literal("\""), string_content('"'), literal("\""), space()});
|
||||
return sequence({literal("\""), string_content('"'), literal("\"")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_bool() {
|
||||
return rule("json-bool", [this]() {
|
||||
return sequence({choice({literal("true"), literal("false")}), space()});
|
||||
return choice({literal("true"), literal("false")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::json_null() {
|
||||
return rule("json-null", [this]() {
|
||||
return sequence({literal("null"), space()});
|
||||
return literal("null");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1334,8 +1334,7 @@ common_peg_parser common_peg_parser_builder::json_object() {
|
||||
choice({
|
||||
literal("}"),
|
||||
sequence({members, ws, literal("}")})
|
||||
}),
|
||||
ws
|
||||
})
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1350,8 +1349,7 @@ common_peg_parser common_peg_parser_builder::json_array() {
|
||||
choice({
|
||||
literal("]"),
|
||||
sequence({elements, ws, literal("]")})
|
||||
}),
|
||||
ws
|
||||
})
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -1381,16 +1379,13 @@ common_peg_parser common_peg_parser_builder::python_number() {
|
||||
|
||||
common_peg_parser common_peg_parser_builder::python_bool() {
|
||||
return rule("python-bool", [this]() {
|
||||
return sequence({
|
||||
choice({literal("True"), literal("False")}),
|
||||
space()
|
||||
});
|
||||
return choice({literal("True"), literal("False")});
|
||||
});
|
||||
}
|
||||
|
||||
common_peg_parser common_peg_parser_builder::python_null() {
|
||||
return rule("python-none", [this]() {
|
||||
return sequence({literal("None"), space()});
|
||||
return literal("None");
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1512,6 +1507,7 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
|
||||
auto pieces = matcher.collect_prefix_and_next();
|
||||
|
||||
std::string pattern;
|
||||
std::string trailing; // optional proper-prefix of a delimiter, allowed only at the very end
|
||||
for (size_t i = 0; i < pieces.size(); ++i) {
|
||||
if (i > 0) {
|
||||
pattern += " | ";
|
||||
@@ -1527,13 +1523,32 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
|
||||
}
|
||||
|
||||
if (!pre.empty()) {
|
||||
pattern += gbnf_format_literal(common_unicode_cpts_to_utf8(pre)) + " [^" + cls + "]";
|
||||
std::string pre_literal = gbnf_format_literal(common_unicode_cpts_to_utf8(pre));
|
||||
pattern += pre_literal + " [^" + cls + "]";
|
||||
// Each interior alternative consumes a delimiter-prefix plus a disambiguating
|
||||
// char, so the repetition alone cannot match a value that *ends* on a proper
|
||||
// prefix of a delimiter (e.g. a trailing "\n" when the delimiter is
|
||||
// "\n</parameter>\n"). The runtime until() (greedy first-match) accepts such
|
||||
// values, so without this the grammar would reject input the parser accepts.
|
||||
// Allow the value to terminate on any proper prefix as an optional tail.
|
||||
// This makes the grammar a slight superset of the runtime language (a value
|
||||
// may end on the longest prefix, which greedy first-match would not itself
|
||||
// produce); harmless for constrained generation, which only needs to admit
|
||||
// every runtime-valid string.
|
||||
if (!trailing.empty()) {
|
||||
trailing += " | ";
|
||||
}
|
||||
trailing += pre_literal;
|
||||
} else {
|
||||
pattern += "[^" + cls + "]";
|
||||
}
|
||||
}
|
||||
|
||||
return "(" + pattern + ")*";
|
||||
std::string result = "(" + pattern + ")*";
|
||||
if (!trailing.empty()) {
|
||||
result += " (" + trailing + ")?";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::unordered_set<std::string> collect_reachable_rules(
|
||||
|
||||
+30
-1
@@ -140,6 +140,8 @@ struct common_speculative_impl {
|
||||
size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
|
||||
size_t n_acc_tokens = 0; // number of tokens accepted by the target model.
|
||||
|
||||
std::vector<size_t> n_acc_tokens_per_pos; // number of tokens accepted per draft position.
|
||||
|
||||
// TODO: track performance of most recent calls
|
||||
const bool gen_perf = true; // whether to generate performance stats.
|
||||
|
||||
@@ -2059,6 +2061,15 @@ void common_speculative_accept(common_speculative * spec, llama_seq_id seq_id, u
|
||||
|
||||
{
|
||||
common_time_meas tm(impl->t_accept_us, !impl->gen_perf);
|
||||
|
||||
if (impl->n_acc_tokens_per_pos.size() < n_accepted) {
|
||||
impl->n_acc_tokens_per_pos.resize(n_accepted, 0);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n_accepted; ++i) {
|
||||
impl->n_acc_tokens_per_pos[i]++;
|
||||
}
|
||||
|
||||
if (n_accepted > 0) {
|
||||
impl->n_acc_drafts++;
|
||||
impl->n_acc_tokens += n_accepted;
|
||||
@@ -2093,13 +2104,31 @@ void common_speculative_print_stats(const common_speculative * spec) {
|
||||
str_perf = "";
|
||||
}
|
||||
|
||||
LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s\n",
|
||||
std::string str_stats;
|
||||
if (impl->n_call_accept > 0) {
|
||||
const double mean =
|
||||
1.0 + (double) impl->n_acc_tokens / (double) impl->n_call_accept;
|
||||
std::ostringstream tmp;
|
||||
tmp << std::fixed << std::setprecision(3);
|
||||
for (size_t i = 0; i < impl->n_acc_tokens_per_pos.size(); ++i) {
|
||||
if (i > 0) {
|
||||
tmp << ", ";
|
||||
}
|
||||
tmp << (double) impl->n_acc_tokens_per_pos[i] / (double) impl->n_call_accept;
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << std::fixed << std::setprecision(2) << mean;
|
||||
str_stats = ", #mean acc len = " + oss.str() + ", #acc rate/pos = (" + tmp.str() + ")";
|
||||
}
|
||||
|
||||
LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s%s\n",
|
||||
common_speculative_type_to_str(impl->type).c_str(),
|
||||
impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
|
||||
impl->n_gen_drafts,
|
||||
impl->n_acc_drafts,
|
||||
impl->n_gen_tokens,
|
||||
impl->n_acc_tokens,
|
||||
str_stats.c_str(),
|
||||
str_perf.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
+12
-10
@@ -253,6 +253,7 @@ When targeting an intel GPU, the user should expect one or more devices among th
|
||||
#### Intel GPU
|
||||
|
||||
```sh
|
||||
# Uses FP32, consider using FP16 for better performance in most cases
|
||||
./examples/sycl/build.sh
|
||||
```
|
||||
|
||||
@@ -262,12 +263,12 @@ or
|
||||
# Export relevant ENV variables
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
# Option 2: Use FP16
|
||||
# Option 1: Use FP16 (recommended for better performance in most cases)
|
||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||
|
||||
# Option 2: Use FP32
|
||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
# build all binary
|
||||
cmake --build build --config Release -j -v
|
||||
```
|
||||
@@ -469,6 +470,7 @@ Choose one of following methods to build from source code.
|
||||
##### Option 1: Script
|
||||
|
||||
```sh
|
||||
# Uses FP32, consider using FP16 for better performance in most cases
|
||||
.\examples\sycl\win-build-sycl.bat
|
||||
```
|
||||
|
||||
@@ -479,11 +481,11 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
|
||||
```
|
||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||
|
||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
||||
# Option 1: Use FP16 (recommended for better performance in most cases)
|
||||
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||
|
||||
# Option 2: Or FP16
|
||||
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||
# Option 2: Or FP32
|
||||
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
||||
|
||||
cmake --build build --config Release -j
|
||||
```
|
||||
@@ -491,10 +493,10 @@ cmake --build build --config Release -j
|
||||
Or, use CMake presets to build:
|
||||
|
||||
```sh
|
||||
cmake --preset x64-windows-sycl-release
|
||||
cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release
|
||||
cmake --build build-x64-windows-sycl-release -j --target llama-completion
|
||||
|
||||
cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release
|
||||
cmake --preset x64-windows-sycl-release
|
||||
cmake --build build-x64-windows-sycl-release -j --target llama-completion
|
||||
|
||||
cmake --preset x64-windows-sycl-debug
|
||||
|
||||
+30
-2
@@ -1,12 +1,40 @@
|
||||
# Install pre-built version of llama.cpp
|
||||
|
||||
| Install via | Windows | Mac | Linux |
|
||||
|-------------|---------|-----|-------|
|
||||
| Install via | Windows | Mac | Linux |
|
||||
|-------------|---------|------|-------|
|
||||
| conda-forge | ✅ | ✅ | ✅ |
|
||||
| Winget | ✅ | | |
|
||||
| Homebrew | | ✅ | ✅ |
|
||||
| MacPorts | | ✅ | |
|
||||
| Nix | | ✅ | ✅ |
|
||||
|
||||
## conda-forge (Windows, Mac and Linux)
|
||||
|
||||
conda-forge provides builds for:
|
||||
- CUDA (Windows and Linux)
|
||||
- Vulkan (Windows and Linux)
|
||||
- Apple Metal (macOS)
|
||||
|
||||
```sh
|
||||
conda install -c conda-forge llama-cpp
|
||||
```
|
||||
|
||||
```sh
|
||||
mamba install -c conda-forge llama-cpp
|
||||
```
|
||||
|
||||
```sh
|
||||
# Project-local installation
|
||||
pixi add llama-cpp
|
||||
|
||||
# Global installation
|
||||
pixi global install llama-cpp
|
||||
```
|
||||
|
||||
This distribution is managed on [`conda-forge/llama-cpp-feedstock`](https://github.com/conda-forge/llama.cpp-feedstock/).
|
||||
|
||||
Shall you have any problems, please open an issue on [its issue tracker](https://github.com/conda-forge/llama.cpp-feedstock/issues).
|
||||
|
||||
## Winget (Windows)
|
||||
|
||||
```sh
|
||||
|
||||
+6
-5
@@ -24,6 +24,7 @@ Legend:
|
||||
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ |
|
||||
| CEIL | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| CLAMP | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ❌ | ❌ |
|
||||
| COL2IM_1D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ |
|
||||
| CONV_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ |
|
||||
@@ -43,10 +44,10 @@ Legend:
|
||||
| DUP | ❌ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| ELU | ❌ | ✅ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| EXP | ❌ | ✅ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ |
|
||||
| EXPM1 | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| FILL | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| FLASH_ATTN_EXT | ❌ | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
|
||||
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ |
|
||||
| FLOOR | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| GATED_DELTA_NET | ❌ | ❌ | ✅ | ❌ | 🟡 | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ |
|
||||
| GATED_LINEAR_ATTN | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| GEGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
@@ -77,7 +78,7 @@ Legend:
|
||||
| OUT_PROD | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ❌ | ❌ | ❌ | 🟡 |
|
||||
| PAD | ❌ | 🟡 | ✅ | 🟡 | 🟡 | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ |
|
||||
| PAD_REFLECT_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| POOL_1D | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
|
||||
| POOL_1D | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ |
|
||||
| POOL_2D | ❌ | 🟡 | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| REGLU | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| RELU | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
@@ -88,7 +89,7 @@ Legend:
|
||||
| ROLL | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| ROPE | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| ROPE_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| ROUND | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ |
|
||||
| ROUND | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| RWKV_WKV6 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| RWKV_WKV7 | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| SCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
@@ -117,6 +118,6 @@ Legend:
|
||||
| TIMESTEP_EMBEDDING | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
|
||||
| TOP_K | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
|
||||
| TRI | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ |
|
||||
| TRUNC | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| UPSCALE | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
|
||||
| XIELU | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ |
|
||||
|
||||
+264
-119
@@ -27,20 +27,20 @@
|
||||
"SYCL0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","ABS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ABS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SGN","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
@@ -69,20 +69,20 @@
|
||||
"SYCL0","HARDSIGMOID","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ABS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","ABS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SGN","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
@@ -111,8 +111,8 @@
|
||||
"SYCL0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=0","support","1","yes","SYCL"
|
||||
@@ -153,20 +153,20 @@
|
||||
"SYCL0","HARDSIGMOID","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXP","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","EXPM1","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SOFTPLUS","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","GELU_ERF","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f32,ne_a=[128,2,2,2],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f32,ne_a=[5,7,11,13],v=1","support","1","yes","SYCL"
|
||||
"SYCL0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=0","support","1","yes","SYCL"
|
||||
"SYCL0","REGLU","type=f16,ne_a=[5,7,11,13],v=0,swapped=0","support","1","yes","SYCL"
|
||||
"SYCL0","REGLU","type=f16,ne_a=[128,2,2,2],v=0,swapped=1","support","1","yes","SYCL"
|
||||
@@ -582,42 +582,42 @@
|
||||
"SYCL0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q8_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q8_0,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,1,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,7,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,7,1],nr23=[2,3],r=2,v=1","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q1_0,type_idx=i64,ne=[384,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=mxfp4,type_idx=i64,ne=[96,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,1,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,1,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,7,1],nr23=[2,3],r=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,5,7,3],nr23=[1,1],r=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[256,11,1,7],nr23=[2,3],r=7,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=nvfp4,type_idx=i64,ne=[192,3,7,1],nr23=[2,3],r=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,5,1,3],nr23=[1,1],r=1,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q2_K,type_idx=i64,ne=[256,11,1,1],nr23=[2,3],r=7,v=0","support","0","no","SYCL"
|
||||
"SYCL0","SET_ROWS","type=q2_K,type_idx=i64,ne=[768,3,1,1],nr23=[2,3],r=2,v=0","support","0","no","SYCL"
|
||||
@@ -914,57 +914,58 @@
|
||||
"SYCL0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=0,p1=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_2D","pool_type=max,type_input=f32,ne_input=[10,10,3,1],k0=3,k1=3,s0=2,s1=2,p0=1,p1=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=avg,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=1,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=1,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=0","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[10,3,2,1],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[11,1,3,2],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","POOL_1D","pool_type=max,type_input=f32,ne_input=[128,2,1,3],k0=3,s0=2,p0=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,128,1,1],ne_kernel=[3,128,1280,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[3000,384,1,1],ne_kernel=[3,384,384,1],s0=1,s1=0,p0=1,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=0,p1=0,d0=3,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[20,2,2,1],ne_kernel=[3,2,2,1],s0=1,s1=0,p0=3,p1=0,d0=1,d1=0,is_2D=0","support","1","yes","SYCL"
|
||||
@@ -1050,6 +1051,8 @@
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[12,12,2,2560],ne_kernel=[3,3,2,2560],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[5,5,1,32],ne_kernel=[3,4,1,32],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[2,2,1536,729],ne_kernel=[2,2,1536,4096],s0=1,s1=1,p0=0,p1=0,d0=1,d1=1,is_2D=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[128,128,1,2],ne_kernel=[32,33,1,2],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[128,128,2,1],ne_kernel=[33,34,2,1],s0=1,s1=1,p0=1,p1=1,d0=1,d1=1,is_2D=1","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL_3D","type_input=f32,type_kernel=f32,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f32,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","IM2COL_3D","type_input=f32,type_kernel=f16,dst_type=f16,ne_input=[10,10,10,9],ne_kernel=[3,3,3,1],IC=3,s0=1,s1=1,s2=1,p0=1,p1=1,p2=1,d0=1,d1=1,d2=1,v=0","support","1","yes","SYCL"
|
||||
@@ -5047,6 +5050,39 @@
|
||||
"SYCL0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,2,2,1],s0=1,p0=0,d0=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONV_TRANSPOSE_1D","ne_input=[3,2,1,1],ne_kernel=[3,1,2,1],s0=1,p0=0,d0=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONV_TRANSPOSE_1D","ne_input=[2,1,1,1],ne_kernel=[3,1,1,1],s0=1,p0=0,d0=1","support","1","yes","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=16,OC=32,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=4,OC=3,T_in=7,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=1,OC=5,T_in=13,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=6,OC=4,T_in=11,s0=3,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=2,OC=3,T_in=9,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=5,OC=4,T_in=11,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=8,OC=4,T_in=13,s0=4,p0=2","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=4,OC=3,T_in=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=16,OC=1,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=1,OC=5,T_in=13,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f32,K=8,OC=2,T_in=3,s0=2,p0=5","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=16,OC=32,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=4,OC=3,T_in=7,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=1,OC=5,T_in=13,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=6,OC=4,T_in=11,s0=3,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=2,OC=3,T_in=9,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=5,OC=4,T_in=11,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=8,OC=4,T_in=13,s0=4,p0=2","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=4,OC=3,T_in=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=16,OC=1,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=1,OC=5,T_in=13,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=f16,K=8,OC=2,T_in=3,s0=2,p0=5","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=16,OC=32,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=4,OC=3,T_in=7,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=1,OC=5,T_in=13,s0=1,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=6,OC=4,T_in=11,s0=3,p0=1","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=2,OC=3,T_in=9,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=5,OC=4,T_in=11,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=8,OC=4,T_in=13,s0=4,p0=2","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=4,OC=3,T_in=1,s0=2,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=16,OC=1,T_in=197,s0=8,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=1,OC=5,T_in=13,s0=3,p0=0","support","0","no","SYCL"
|
||||
"SYCL0","COL2IM_1D","type=bf16,K=8,OC=2,T_in=3,s0=2,p0=5","support","0","no","SYCL"
|
||||
"SYCL0","CONV_TRANSPOSE_2D","kernel_type=f32,ne_input=[3,2,3,1],ne_kernel=[2,2,1,3],stride=1","support","0","no","SYCL"
|
||||
"SYCL0","CONV_TRANSPOSE_2D","kernel_type=f32,ne_input=[10,10,9,1],ne_kernel=[3,3,1,9],stride=2","support","0","no","SYCL"
|
||||
"SYCL0","CONV_TRANSPOSE_2D","kernel_type=f32,ne_input=[129,63,35,1],ne_kernel=[3,3,48,35],stride=1","support","0","no","SYCL"
|
||||
@@ -5069,6 +5105,7 @@
|
||||
"SYCL0","REPEAT","type=f32,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=i32,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=i16,ne=[10,5,4,1],nr=[1,1,1,2]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=bf16,ne=[10,5,4,1],nr=[2,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=f32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,2,1,1]","support","1","yes","SYCL"
|
||||
@@ -5076,6 +5113,7 @@
|
||||
"SYCL0","REPEAT","type=f32,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=i32,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=i16,ne=[10,5,4,3],nr=[1,1,1,2]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT","type=bf16,ne=[10,5,4,3],nr=[2,1,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,1,1,1],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[2,1,1,1],v=0","support","1","yes","SYCL"
|
||||
"SYCL0","REPEAT_BACK","type=f32,ne=[8,6,4,2],nr=[1,2,1,1],v=0","support","1","yes","SYCL"
|
||||
@@ -6185,6 +6223,7 @@
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=128,n=1,k=128,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=64,n=1,k=64,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=256,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=512,n=1,k=512,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=128,n=32,k=128,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_HADAMARD","type_a=f32,type_b=f32,m=128,n=4,k=128,bs=[2,3],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT","type_a=f32,type_b=f32,m=16,n=1,k=256,bs=[1,1],nr=[1,1],per=[0,1,2,3],k_v=0,o=1","support","1","yes","SYCL"
|
||||
@@ -7603,6 +7642,31 @@
|
||||
"SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=16,n_used=16,b=1,m=50,n=200,k=64","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=1,n_used=1,b=0,m=8,n=16,k=1","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=32,n_used=2,b=0,m=2880,n=32,k=2880","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=3","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f16,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=3","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=bf16,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=3","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q4_0,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q4_1,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q5_0,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q5_1,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q8_0,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q1_0,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=384","support","0","no","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=mxfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=nvfp4,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=192","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q2_K,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q3_K,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q4_K,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q5_K,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=q6_K,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq2_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq2_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq2_s,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq3_xxs,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq1_s,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq1_m,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq4_nl,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=96","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq3_s,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=iq4_xs,type_b=f32,n_mats=4,n_used=2,b=0,m=64,n=16,k=768","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=1,k=256","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=4,k=256","support","1","yes","SYCL"
|
||||
"SYCL0","MUL_MAT_ID","type_a=f32,type_b=f32,n_mats=4,n_used=1,b=0,m=512,n=5,k=256","support","1","yes","SYCL"
|
||||
@@ -9686,10 +9750,10 @@
|
||||
"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
|
||||
"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","SYCL"
|
||||
"SYCL0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
|
||||
"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
|
||||
"SYCL0","SQR","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
|
||||
"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
|
||||
@@ -9704,14 +9768,14 @@
|
||||
"SYCL0","CLAMP","type=f16,ne=[1024,1024,1,1],min=-0.500000,max=0.500000","support","0","no","SYCL"
|
||||
"SYCL0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","SYCL"
|
||||
"SYCL0","LEAKY_RELU","type=f16,ne_a=[1024,1024,1,1],negative_slope=0.100000","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
|
||||
"SYCL0","FLOOR","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
|
||||
"SYCL0","CEIL","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
|
||||
"SYCL0","ROUND","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
|
||||
"SYCL0","TRUNC","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
|
||||
"SYCL0","SQR","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL"
|
||||
"SYCL0","SQRT","type=f32,ne=[10,3,3,2]","support","1","yes","SYCL"
|
||||
"SYCL0","LOG","type=f32,ne=[10,5,4,3]","support","1","yes","SYCL"
|
||||
@@ -10845,37 +10909,117 @@
|
||||
"SYCL0","ROPE","type=f16,ne_a=[128,32,2,1],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=1","support","1","yes","SYCL"
|
||||
"SYCL0","ROPE","type=f16,ne_a=[128,32,2,3],n_dims=128,mode=24,n_ctx=512,fs=1.424500,ef=0.746500,af=1.424500,ff=1,v=1,inplace=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=0","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=1","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=2","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=0,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=1,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=2,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=f16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=bf16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i8,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i16,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i32,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","CONCAT","type=i64,ne_a=[11,12,13,14],ne_b_d=7,dim=3,v=3","support","1","yes","SYCL"
|
||||
"SYCL0","ARGSORT","type=f32,ne=[3,1,1,1],order=0","support","1","yes","SYCL"
|
||||
"SYCL0","ARGSORT","type=f32,ne=[4,1,1,1],order=0","support","1","yes","SYCL"
|
||||
"SYCL0","ARGSORT","type=f32,ne=[7,1,1,1],order=0","support","1","yes","SYCL"
|
||||
@@ -16515,6 +16659,7 @@
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=128,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=q4_0,type_V=f16,permute=[0,1,2,3]","support","1","yes","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=72,hsv=72,nh=4,nr23=[1,1],kv=96,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=q4_0,type_V=q8_0,permute=[0,1,2,3]","support","1","yes","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=64,hsv=64,nh=4,nr23=[1,1],kv=96,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=f16,type_V=f32,permute=[0,1,2,3]","support","1","yes","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=256,nb=1,mask=0,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=f16,type_V=q4_0,permute=[0,1,2,3]","support","1","yes","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=128,hsv=128,nh=4,nr23=[1,1],kv=96,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=q1_0,type_V=q1_0,permute=[0,1,2,3]","support","0","no","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=128,hsv=64,nh=4,nr23=[1,1],kv=128,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=q1_0,type_V=q4_0,permute=[0,1,2,3]","support","0","no","SYCL"
|
||||
"SYCL0","FLASH_ATTN_EXT","hsk=64,hsv=128,nh=4,nr23=[1,1],kv=128,nb=2,mask=1,sinks=0,max_bias=0.000000,logit_softcap=0.000000,prec=f32,type_K=q4_0,type_V=q1_0,permute=[0,1,2,3]","support","0","no","SYCL"
|
||||
|
||||
|
Can't render this file because it is too large.
|
+35
-5
@@ -3,15 +3,45 @@
|
||||
# Copyright (C) 2024 Intel Corporation
|
||||
# SPDX-License-Identifier: MIT
|
||||
|
||||
print_usage() {
|
||||
echo "Usage: ./build.sh [fp32|fp16] [--help]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " fp32 Build with FP32 precision (default)"
|
||||
echo " fp16 Build with FP16 precision (faster for long-prompt inference)"
|
||||
echo " --help Print this help message"
|
||||
}
|
||||
|
||||
PRECISION=fp32
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--help)
|
||||
print_usage
|
||||
exit 0
|
||||
;;
|
||||
fp32|fp16)
|
||||
PRECISION="$arg"
|
||||
;;
|
||||
*)
|
||||
echo "Error: unknown option '$arg'"
|
||||
print_usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
mkdir -p build
|
||||
cd build
|
||||
source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
#for FP16
|
||||
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DLLAMA_OPENSSL=OFF # faster for long-prompt inference
|
||||
|
||||
#for FP32
|
||||
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_OPENSSL=OFF
|
||||
if [ "$PRECISION" = "fp16" ]; then
|
||||
#for FP16
|
||||
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON -DLLAMA_OPENSSL=OFF # faster for long-prompt inference
|
||||
else
|
||||
#for FP32
|
||||
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_OPENSSL=OFF
|
||||
fi
|
||||
|
||||
#build example/main
|
||||
#cmake --build . --config Release --target main
|
||||
|
||||
@@ -3,6 +3,23 @@
|
||||
:: Copyright (C) 2024 Intel Corporation
|
||||
:: SPDX-License-Identifier: MIT
|
||||
|
||||
IF /I "%1"=="--help" (
|
||||
echo Usage: win-build-sycl.bat [fp32^|fp16] [--help]
|
||||
echo.
|
||||
echo Options:
|
||||
echo fp32 Build with FP32 precision ^(default^)
|
||||
echo fp16 Build with FP16 precision ^(faster for long-prompt inference^)
|
||||
echo --help Print this help message
|
||||
exit /B 0
|
||||
)
|
||||
|
||||
SET PRECISION=%1
|
||||
IF "%PRECISION%"=="" SET PRECISION=fp32
|
||||
IF /I NOT "%PRECISION%"=="fp32" IF /I NOT "%PRECISION%"=="fp16" (
|
||||
echo Error: invalid value '%PRECISION%'. Use 'fp32' or 'fp16'.
|
||||
echo Usage: win-build-sycl.bat [fp32^|fp16] [--help]
|
||||
exit /B 1
|
||||
)
|
||||
|
||||
IF not exist build (mkdir build)
|
||||
cd build
|
||||
@@ -11,12 +28,14 @@ if %errorlevel% neq 0 goto ERROR
|
||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||
if %errorlevel% neq 0 goto ERROR
|
||||
|
||||
:: for FP16
|
||||
:: faster for long-prompt inference
|
||||
:: cmake -G "MinGW Makefiles" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||
|
||||
:: for FP32
|
||||
cmake -G "Ninja" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
IF /I "%PRECISION%"=="fp16" (
|
||||
:: for FP16
|
||||
:: faster for long-prompt inference
|
||||
cmake -G "MinGW Makefiles" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||
) ELSE (
|
||||
:: for FP32
|
||||
cmake -G "Ninja" .. -DLLAMA_OPENSSL=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||
)
|
||||
if %errorlevel% neq 0 goto ERROR
|
||||
|
||||
:: build all binary
|
||||
|
||||
@@ -293,7 +293,6 @@
|
||||
#define ggml_gemm_q8_0_4x8_q8_0_generic ggml_gemm_q8_0_4x8_q8_0
|
||||
#elif defined(__wasm__)
|
||||
// quants.c
|
||||
#define ggml_vec_dot_q4_1_q8_1_generic ggml_vec_dot_q4_1_q8_1
|
||||
#define ggml_vec_dot_tq1_0_q8_K_generic ggml_vec_dot_tq1_0_q8_K
|
||||
#define ggml_vec_dot_tq2_0_q8_K_generic ggml_vec_dot_tq2_0_q8_K
|
||||
#define ggml_vec_dot_iq2_xxs_q8_K_generic ggml_vec_dot_iq2_xxs_q8_K
|
||||
|
||||
@@ -1418,6 +1418,9 @@ typedef decltype(kernel_repeat<float>) kernel_repeat_t;
|
||||
|
||||
template [[host_name("kernel_repeat_f32")]] kernel kernel_repeat_t kernel_repeat<float>;
|
||||
template [[host_name("kernel_repeat_f16")]] kernel kernel_repeat_t kernel_repeat<half>;
|
||||
#if defined(GGML_METAL_HAS_BF16)
|
||||
template [[host_name("kernel_repeat_bf16")]] kernel kernel_repeat_t kernel_repeat<bfloat>;
|
||||
#endif
|
||||
template [[host_name("kernel_repeat_i32")]] kernel kernel_repeat_t kernel_repeat<int>;
|
||||
template [[host_name("kernel_repeat_i16")]] kernel kernel_repeat_t kernel_repeat<short>;
|
||||
|
||||
|
||||
@@ -287,6 +287,13 @@ inline void ggml_sycl_op_bin_bcast(ggml_backend_sycl_context & ctx, const ggml_t
|
||||
ne10, ne11, ne12, ne13, ne0, ne1, ne2, ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2,
|
||||
nb3, ggml_is_contiguous(src0), ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1),
|
||||
main_stream);
|
||||
#ifdef GGML_SYCL_HAS_BF16
|
||||
} else if (src0->type == GGML_TYPE_BF16 && src1->type == GGML_TYPE_BF16 && dst->type == GGML_TYPE_BF16) {
|
||||
op()((const sycl::ext::oneapi::bfloat16 *) src0->data, (const sycl::ext::oneapi::bfloat16 *) src1->data,
|
||||
(sycl::ext::oneapi::bfloat16 *) dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, ne0, ne1, ne2,
|
||||
ne3, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb0, nb1, nb2, nb3, ggml_is_contiguous(src0),
|
||||
ggml_is_contiguous(src1), ggml_is_permuted(src0), ggml_is_permuted(src1), main_stream);
|
||||
#endif
|
||||
} else {
|
||||
fprintf(stderr, "%s: unsupported types: dst: %s, src0: %s, src1: %s\n", __func__, ggml_type_name(dst->type),
|
||||
ggml_type_name(src0->type), ggml_type_name(src1->type));
|
||||
|
||||
@@ -59,7 +59,7 @@ bool gpu_has_xmx(sycl::device &dev) {
|
||||
return dev.has(sycl::aspect::ext_intel_matrix);
|
||||
}
|
||||
|
||||
static int ggml_sycl_get_env(const char *env_name, int default_val) {
|
||||
int ggml_sycl_get_env(const char *env_name, int default_val) {
|
||||
char *user_device_string = getenv(env_name);
|
||||
int user_number = default_val;
|
||||
|
||||
@@ -86,7 +86,7 @@ int64_t downsample_sycl_global_range(int64_t accumulate_block_num, int64_t block
|
||||
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
static bool ggml_sycl_use_level_zero_device_alloc(sycl::queue &q) {
|
||||
return ggml_sycl_get_env("GGML_SYCL_ENABLE_LEVEL_ZERO", 1) &&
|
||||
return g_ggml_sycl_enable_level_zero &&
|
||||
q.get_device().is_gpu() &&
|
||||
q.get_backend() == sycl::backend::ext_oneapi_level_zero;
|
||||
}
|
||||
@@ -94,8 +94,6 @@ static bool ggml_sycl_use_level_zero_device_alloc(sycl::queue &q) {
|
||||
|
||||
// Use Level Zero zeMemAllocDevice to avoid sycl::malloc_device triggering
|
||||
// DMA-buf/TTM system RAM staging in the xe kernel driver during multi-GPU inference.
|
||||
// The decision is made from the queue and runtime env because large buffers can be
|
||||
// allocated before ggml_check_sycl() initializes g_ggml_sycl_enable_level_zero.
|
||||
void * ggml_sycl_malloc_device(size_t size, sycl::queue &q) {
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
if (ggml_sycl_use_level_zero_device_alloc(q)) {
|
||||
|
||||
@@ -225,6 +225,7 @@ struct sycl_device_info {
|
||||
int max_wg_per_cu; // max work groups per compute unit - refer to
|
||||
// cudaOccupancyMaxActiveBlocksPerMultiprocessor
|
||||
bool vmm; // virtual memory support
|
||||
bool l0_discrete_gpu; // Level Zero backend and not an integrated GPU
|
||||
size_t vmm_granularity; // granularity of virtual memory
|
||||
size_t total_vram;
|
||||
sycl_hw_info hw_info;
|
||||
@@ -644,6 +645,8 @@ constexpr size_t ceil_div(const size_t m, const size_t n) {
|
||||
|
||||
bool gpu_has_xmx(sycl::device &dev);
|
||||
|
||||
int ggml_sycl_get_env(const char *env_name, int default_val);
|
||||
|
||||
template <int N, class T> std::string debug_get_array_str(const std::string & prefix, const T array[N]) {
|
||||
if (LIKELY(!g_ggml_sycl_debug)) {
|
||||
return "";
|
||||
|
||||
@@ -10,6 +10,8 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#include "concat.hpp"
|
||||
|
||||
static inline size_t elem_size(ggml_type t) {
|
||||
@@ -192,11 +194,29 @@ void ggml_sycl_op_concat(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
case GGML_TYPE_F32:
|
||||
concat_impl_sycl<float>(ctx, dst);
|
||||
break;
|
||||
case GGML_TYPE_F16:
|
||||
concat_impl_sycl<sycl::half>(ctx, dst);
|
||||
break;
|
||||
#ifdef GGML_SYCL_HAS_BF16
|
||||
case GGML_TYPE_BF16:
|
||||
concat_impl_sycl<sycl::ext::oneapi::bfloat16>(ctx, dst);
|
||||
break;
|
||||
#endif
|
||||
case GGML_TYPE_I32:
|
||||
concat_impl_sycl<int32_t>(ctx, dst);
|
||||
break;
|
||||
case GGML_TYPE_I16:
|
||||
concat_impl_sycl<int16_t>(ctx, dst);
|
||||
break;
|
||||
case GGML_TYPE_I64:
|
||||
concat_impl_sycl<int64_t>(ctx, dst);
|
||||
break;
|
||||
case GGML_TYPE_I8:
|
||||
concat_impl_sycl<int8_t>(ctx, dst);
|
||||
break;
|
||||
default:
|
||||
GGML_ASSERT(false && "ggml_sycl_op_concat: unsupported type");
|
||||
fprintf(stderr, "%s: unsupported types: dst: %s\n", __func__, ggml_type_name(dst->type));
|
||||
GGML_ASSERT(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -48,6 +48,287 @@ inline void cpy_blck_f32_q8_0(const char * cxi, char * cdsti) {
|
||||
}
|
||||
}
|
||||
|
||||
inline void cpy_blck_f32_q1_0(const char * cxi, char * cdsti) {
|
||||
const float * xi = (const float *) cxi;
|
||||
block_q1_0 * dsti = (block_q1_0 *) cdsti;
|
||||
|
||||
float sum_abs = 0.0f;
|
||||
for (int j = 0; j < QK1_0; ++j) {
|
||||
sum_abs += sycl::fabs((float) xi[j]);
|
||||
}
|
||||
|
||||
dsti->d = sum_abs / QK1_0;
|
||||
|
||||
for (int j = 0; j < QK1_0 / 8; ++j) {
|
||||
dsti->qs[j] = 0;
|
||||
}
|
||||
|
||||
for (int j = 0; j < QK1_0; ++j) {
|
||||
if (xi[j] >= 0.0f) {
|
||||
dsti->qs[j / 8] |= (1u << (j % 8));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inline int best_index_mxfp4(const float x, const float e) {
|
||||
int best_index = 0;
|
||||
float best_err = sycl::fabs((float) (kvalues_mxfp4[0] * e - x));
|
||||
for (int i = 1; i < 16; ++i) {
|
||||
const float err = sycl::fabs((float) (kvalues_mxfp4[i] * e - x));
|
||||
if (err < best_err) {
|
||||
best_index = i;
|
||||
best_err = err;
|
||||
}
|
||||
}
|
||||
return best_index;
|
||||
}
|
||||
|
||||
inline int nearest_int_sycl(float x) {
|
||||
const float val = x + 12582912.0f;
|
||||
int i;
|
||||
memcpy(&i, &val, sizeof(int));
|
||||
return (i & 0x007fffff) - 0x00400000;
|
||||
}
|
||||
|
||||
inline int nearest_int_ggml_sycl(float x) {
|
||||
return (int) sycl::round((float) x);
|
||||
}
|
||||
|
||||
inline uint8_t clamp_u8(const int x, const int lo, const int hi) {
|
||||
return (uint8_t) dpct::max(lo, dpct::min(hi, x));
|
||||
}
|
||||
|
||||
inline int8_t clamp_i8(const int x, const int lo, const int hi) {
|
||||
return (int8_t) dpct::max(lo, dpct::min(hi, x));
|
||||
}
|
||||
|
||||
constexpr float GROUP_MAX_EPS_SYCL = 1e-15f;
|
||||
|
||||
inline float make_qx_quants_sycl(int n, int nmax, const float * x, int8_t * L, int rmse_type, const float * qw) {
|
||||
float max = 0.0f;
|
||||
float amax = 0.0f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const float ax = sycl::fabs(x[i]);
|
||||
if (ax > amax) {
|
||||
amax = ax;
|
||||
max = x[i];
|
||||
}
|
||||
}
|
||||
if (amax < GROUP_MAX_EPS_SYCL) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
L[i] = 0;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
float iscale = -nmax / max;
|
||||
if (rmse_type == 0) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
L[i] = (int8_t) (nmax + dpct::max(-nmax, dpct::min(nmax - 1, l)));
|
||||
}
|
||||
return 1.0f / iscale;
|
||||
}
|
||||
|
||||
bool return_early = false;
|
||||
if (rmse_type < 0) {
|
||||
rmse_type = -rmse_type;
|
||||
return_early = true;
|
||||
}
|
||||
|
||||
float sumlx = 0.0f;
|
||||
float suml2 = 0.0f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
l = dpct::max(-nmax, dpct::min(nmax - 1, l));
|
||||
L[i] = (int8_t) (l + nmax);
|
||||
|
||||
const float w = qw ? qw[i] : (rmse_type == 1 ? x[i] * x[i] :
|
||||
rmse_type == 2 ? 1.0f : rmse_type == 3 ? sycl::fabs(x[i]) : sycl::sqrt(sycl::fabs(x[i])));
|
||||
|
||||
sumlx += w * x[i] * l;
|
||||
suml2 += w * l * l;
|
||||
}
|
||||
|
||||
float scale = suml2 ? sumlx / suml2 : 0.0f;
|
||||
if (return_early) {
|
||||
return suml2 > 0.0f ? 0.5f * (scale + 1.0f / iscale) : 1.0f / iscale;
|
||||
}
|
||||
|
||||
float best = scale * sumlx;
|
||||
for (int is = -9; is <= 9; ++is) {
|
||||
if (is == 0) {
|
||||
continue;
|
||||
}
|
||||
iscale = -(nmax + 0.1f * is) / max;
|
||||
sumlx = 0.0f;
|
||||
suml2 = 0.0f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
l = dpct::max(-nmax, dpct::min(nmax - 1, l));
|
||||
const float w = qw ? qw[i] : (rmse_type == 1 ? x[i] * x[i] :
|
||||
rmse_type == 2 ? 1.0f : rmse_type == 3 ? sycl::fabs(x[i]) : sycl::sqrt(sycl::fabs(x[i])));
|
||||
sumlx += w * x[i] * l;
|
||||
suml2 += w * l * l;
|
||||
}
|
||||
|
||||
if (suml2 > 0.0f && sumlx * sumlx > best * suml2) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
L[i] = (int8_t) (nmax + dpct::max(-nmax, dpct::min(nmax - 1, l)));
|
||||
}
|
||||
scale = sumlx / suml2;
|
||||
best = scale * sumlx;
|
||||
}
|
||||
}
|
||||
|
||||
return scale;
|
||||
}
|
||||
|
||||
inline float make_q3_quants_sycl(int n, int nmax, const float * x, int8_t * L, bool do_rmse) {
|
||||
float max = 0.0f;
|
||||
float amax = 0.0f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const float ax = sycl::fabs(x[i]);
|
||||
if (ax > amax) {
|
||||
amax = ax;
|
||||
max = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (amax < GROUP_MAX_EPS_SYCL) {
|
||||
for (int i = 0; i < n; ++i) {
|
||||
L[i] = 0;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
const float iscale = -nmax / max;
|
||||
if (do_rmse) {
|
||||
float sumlx = 0.0f;
|
||||
float suml2 = 0.0f;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
l = dpct::max(-nmax, dpct::min(nmax - 1, l));
|
||||
L[i] = (int8_t) l;
|
||||
const float w = x[i] * x[i];
|
||||
sumlx += w * x[i] * l;
|
||||
suml2 += w * l * l;
|
||||
}
|
||||
|
||||
for (int itry = 0; itry < 5; ++itry) {
|
||||
int n_changed = 0;
|
||||
for (int i = 0; i < n; ++i) {
|
||||
const float w = x[i] * x[i];
|
||||
float slx = sumlx - w * x[i] * L[i];
|
||||
if (slx > 0.0f) {
|
||||
float sl2 = suml2 - w * L[i] * L[i];
|
||||
int new_l = nearest_int_ggml_sycl(x[i] * sl2 / slx);
|
||||
new_l = dpct::max(-nmax, dpct::min(nmax - 1, new_l));
|
||||
if (new_l != L[i]) {
|
||||
slx += w * x[i] * new_l;
|
||||
sl2 += w * new_l * new_l;
|
||||
if (sl2 > 0.0f && slx * slx * suml2 > sumlx * sumlx * sl2) {
|
||||
L[i] = (int8_t) new_l;
|
||||
sumlx = slx;
|
||||
suml2 = sl2;
|
||||
++n_changed;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!n_changed) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
L[i] += nmax;
|
||||
}
|
||||
return suml2 > 0.0f ? sumlx / suml2 : 0.0f;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
int l = nearest_int_ggml_sycl(iscale * x[i]);
|
||||
l = dpct::max(-nmax, dpct::min(nmax - 1, l));
|
||||
L[i] = (int8_t) (l + nmax);
|
||||
}
|
||||
|
||||
return 1.0f / iscale;
|
||||
}
|
||||
|
||||
inline void set_scale_min_k4(int j, uint8_t * q, uint8_t d, uint8_t m) {
|
||||
if (j < 4) {
|
||||
q[j] = (q[j] & 0xC0) | (d & 0x3F);
|
||||
q[j + 4] = (q[j + 4] & 0xC0) | (m & 0x3F);
|
||||
} else {
|
||||
q[j + 4] = (d & 0x0F) | ((m & 0x0F) << 4);
|
||||
q[j - 4] = (q[j - 4] & 0x3F) | ((d >> 4) << 6);
|
||||
q[j - 0] = (q[j - 0] & 0x3F) | ((m >> 4) << 6);
|
||||
}
|
||||
}
|
||||
|
||||
inline void get_scale_min_k4_local(int j, const uint8_t * q, uint8_t & d, uint8_t & m) {
|
||||
if (j < 4) {
|
||||
d = q[j] & 63;
|
||||
m = q[j + 4] & 63;
|
||||
} else {
|
||||
d = (q[j + 4] & 0xF) | ((q[j - 4] >> 6) << 4);
|
||||
m = (q[j + 4] >> 4) | ((q[j - 0] >> 6) << 4);
|
||||
}
|
||||
}
|
||||
|
||||
inline void cpy_blck_f32_mxfp4(const char * cxi, char * cdsti) {
|
||||
const float * xi = (const float *) cxi;
|
||||
block_mxfp4 * dsti = (block_mxfp4 *) cdsti;
|
||||
|
||||
float amax = 0.0f;
|
||||
for (int j = 0; j < QK_MXFP4; ++j) {
|
||||
amax = sycl::fmax(amax, sycl::fabs((float) xi[j]));
|
||||
}
|
||||
|
||||
const uint8_t e = amax > 0.0f ? (uint8_t) (sycl::floor(sycl::log2(amax)) - 2 + 127) : 0;
|
||||
const float d = GGML_E8M0_TO_FP32_HALF(e);
|
||||
|
||||
dsti->e = e;
|
||||
|
||||
for (int j = 0; j < QK_MXFP4 / 2; ++j) {
|
||||
const uint8_t x0 = best_index_mxfp4(xi[0 + j], d);
|
||||
const uint8_t x1 = best_index_mxfp4(xi[QK_MXFP4 / 2 + j], d);
|
||||
|
||||
dsti->qs[j] = x0;
|
||||
dsti->qs[j] |= x1 << 4;
|
||||
}
|
||||
}
|
||||
|
||||
inline void cpy_blck_f32_nvfp4(const char * cxi, char * cdsti) {
|
||||
const float * xi = (const float *) cxi;
|
||||
block_nvfp4 * dsti = (block_nvfp4 *) cdsti;
|
||||
|
||||
constexpr int n_sub = QK_NVFP4 / QK_NVFP4_SUB;
|
||||
|
||||
for (int s = 0; s < n_sub; ++s) {
|
||||
const float * xb = xi + s * QK_NVFP4_SUB;
|
||||
|
||||
float amax = 0.0f;
|
||||
for (int j = 0; j < QK_NVFP4_SUB; ++j) {
|
||||
amax = sycl::fmax(amax, sycl::fabs((float) xb[j]));
|
||||
}
|
||||
|
||||
const uint8_t ue = ggml_fp32_to_ue4m3(amax / 6.0f);
|
||||
dsti->d[s] = ue;
|
||||
const float d = ggml_ue4m3_to_fp32(ue);
|
||||
|
||||
for (int j = 0; j < QK_NVFP4_SUB / 2; ++j) {
|
||||
const uint8_t x0 = best_index_mxfp4(xb[0 + j], d);
|
||||
const uint8_t x1 = best_index_mxfp4(xb[QK_NVFP4_SUB / 2 + j], d);
|
||||
|
||||
dsti->qs[s * (QK_NVFP4_SUB / 2) + j] = x0 | (x1 << 4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline void cpy_blck_f32_q4_0(const char * cxi, char * cdsti) {
|
||||
const float * xi = (const float *) cxi;
|
||||
block_q4_0 * dsti = (block_q4_0 *) cdsti;
|
||||
|
||||
+428
-311
File diff suppressed because it is too large
Load Diff
@@ -124,6 +124,11 @@ static __dpct_inline__ T op_exp(T x) {
|
||||
return sycl::exp(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __dpct_inline__ T op_expm1(T x) {
|
||||
return sycl::expm1(x);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static __dpct_inline__ T op_log(T x) {
|
||||
if (x <= static_cast<T>(0)) {
|
||||
@@ -266,13 +271,6 @@ static void unary_op_clamp_kernel(const T * x, T * dst, const int k, const sycl:
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void unary_op_floor_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
|
||||
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
|
||||
dst[i] = op_floor(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
|
||||
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
|
||||
@@ -280,20 +278,6 @@ static void unary_op_ceil_kernel(const T * x, T * dst, const int k, const sycl::
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void unary_op_round_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
|
||||
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
|
||||
dst[i] = op_round(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void unary_op_trunc_kernel(const T * x, T * dst, const int k, const sycl::nd_item<1> &item_ct1) {
|
||||
SYCL_GLOBAL_ID_LOOP(k, item_ct1) {
|
||||
dst[i] = op_trunc(x[i]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static void clamp(const T * x, T * dst, const float min, const float max, const int k,
|
||||
const sycl::nd_item<1> &item_ct1) {
|
||||
@@ -605,6 +589,12 @@ static inline void ggml_sycl_op_exp(ggml_backend_sycl_context & ctx, ggml_tensor
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
|
||||
return op_expm1(x);
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
|
||||
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
|
||||
@@ -728,16 +718,9 @@ static inline void ggml_sycl_op_clamp(ggml_backend_sycl_context & ctx, ggml_tens
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_floor(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
|
||||
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
|
||||
const int num_blocks = ceil_div(k_elements, 256);
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
|
||||
sycl::range<1>(256)),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
unary_op_floor_kernel(src, dst_ptr, k_elements, item_ct1);
|
||||
});
|
||||
});
|
||||
ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
|
||||
return op_floor(x);
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
@@ -747,29 +730,15 @@ static inline void ggml_sycl_op_ceil(ggml_backend_sycl_context & ctx, ggml_tenso
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_round(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
|
||||
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
|
||||
const int num_blocks = ceil_div(k_elements, 256);
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
|
||||
sycl::range<1>(256)),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
unary_op_round_kernel(src, dst_ptr, k_elements, item_ct1);
|
||||
});
|
||||
});
|
||||
ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
|
||||
return op_round(x);
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_trunc(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_detail::dispatch_ggml_sycl_op_unary(ctx, dst,
|
||||
[](const auto* src, auto* dst_ptr, int k_elements, queue_ptr stream) {
|
||||
const int num_blocks = ceil_div(k_elements, 256);
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<1>(sycl::range<1>(num_blocks) * sycl::range<1>(256),
|
||||
sycl::range<1>(256)),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
unary_op_trunc_kernel(src, dst_ptr, k_elements, item_ct1);
|
||||
});
|
||||
});
|
||||
ggml_sycl_detail::ggml_sycl_op_unary(ctx, dst, [](auto x) {
|
||||
return op_trunc(x);
|
||||
});
|
||||
}
|
||||
|
||||
static inline void ggml_sycl_op_acc(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
@@ -1018,6 +987,11 @@ void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
ggml_sycl_op_exp(ctx, dst);
|
||||
}
|
||||
|
||||
void ggml_sycl_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_expm1(ctx, dst);
|
||||
}
|
||||
|
||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_log(ctx, dst);
|
||||
|
||||
@@ -59,6 +59,8 @@ void ggml_sycl_hardswish(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_sycl_exp(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_sycl_expm1(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_sycl_log(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
void ggml_sycl_softplus(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
+258
-161
@@ -70,6 +70,7 @@
|
||||
#include "ggml-sycl/diag.hpp"
|
||||
#include "ggml-sycl/solve_tri.hpp"
|
||||
#include "ggml-sycl/gated_delta_net.hpp"
|
||||
#include "ggml-sycl/pool.hpp"
|
||||
|
||||
static bool g_sycl_loaded = false;
|
||||
int g_ggml_sycl_debug = 0;
|
||||
@@ -147,11 +148,31 @@ static ggml_sycl_device_info ggml_sycl_init() {
|
||||
GGML_LOG_WARN("SYCL GPU device %d does not use Level Zero backend, disabling Level Zero memory API\n", i);
|
||||
info.ext_oneapi_level_zero = false;
|
||||
}
|
||||
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
if (info.ext_oneapi_level_zero && device.is_gpu() && device.default_queue().get_backend() == sycl::backend::ext_oneapi_level_zero) {
|
||||
ze_device_handle_t ze_dev = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(device.default_queue().get_device());
|
||||
ze_device_properties_t props = {};
|
||||
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
||||
ze_result_t r = zeDeviceGetProperties(ze_dev, &props);
|
||||
info.devices[i].l0_discrete_gpu = r == ZE_RESULT_SUCCESS && !(props.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (int id = 0; id < info.device_count; ++id) {
|
||||
info.default_tensor_split[id] /= total_vram;
|
||||
}
|
||||
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
// Large buffers can be allocated before ggml_check_sycl() initializes other
|
||||
// g_ggml_sycl_enable_* globals, so initialize this one as early as we can.
|
||||
g_ggml_sycl_enable_level_zero =
|
||||
info.ext_oneapi_level_zero && ggml_sycl_get_env("GGML_SYCL_ENABLE_LEVEL_ZERO", 1);
|
||||
#else
|
||||
g_ggml_sycl_enable_level_zero = 0;
|
||||
#endif
|
||||
|
||||
return info;
|
||||
}
|
||||
|
||||
@@ -236,38 +257,19 @@ void ggml_backend_sycl_print_sycl_devices() {
|
||||
print_device_opt_feature(device_count);
|
||||
}
|
||||
|
||||
static inline int get_sycl_env(const char *env_name, int default_val) {
|
||||
char *user_device_string = getenv(env_name);
|
||||
int user_number = default_val;
|
||||
|
||||
unsigned n;
|
||||
if (user_device_string != NULL &&
|
||||
sscanf(user_device_string, " %u", &n) == 1) {
|
||||
user_number = (int)n;
|
||||
} else {
|
||||
user_number = default_val;
|
||||
}
|
||||
return user_number;
|
||||
}
|
||||
|
||||
static void ggml_check_sycl() try {
|
||||
static bool initialized = false;
|
||||
|
||||
if (!initialized) {
|
||||
g_ggml_sycl_debug = get_sycl_env("GGML_SYCL_DEBUG", 0);
|
||||
g_ggml_sycl_disable_optimize = get_sycl_env("GGML_SYCL_DISABLE_OPT", 0);
|
||||
g_ggml_sycl_disable_graph = get_sycl_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
||||
g_ggml_sycl_disable_dnn = get_sycl_env("GGML_SYCL_DISABLE_DNN", 0);
|
||||
g_ggml_sycl_enable_vmm = get_sycl_env("GGML_SYCL_ENABLE_VMM", 1);
|
||||
g_ggml_sycl_prioritize_dmmv = get_sycl_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
g_ggml_sycl_enable_level_zero = get_sycl_env("GGML_SYCL_ENABLE_LEVEL_ZERO", ggml_sycl_info().ext_oneapi_level_zero);
|
||||
#else
|
||||
g_ggml_sycl_enable_level_zero = 0;
|
||||
#endif
|
||||
g_ggml_sycl_debug = ggml_sycl_get_env("GGML_SYCL_DEBUG", 0);
|
||||
g_ggml_sycl_disable_optimize = ggml_sycl_get_env("GGML_SYCL_DISABLE_OPT", 0);
|
||||
g_ggml_sycl_disable_graph = ggml_sycl_get_env("GGML_SYCL_DISABLE_GRAPH", 1);
|
||||
g_ggml_sycl_disable_dnn = ggml_sycl_get_env("GGML_SYCL_DISABLE_DNN", 0);
|
||||
g_ggml_sycl_enable_vmm = ggml_sycl_get_env("GGML_SYCL_ENABLE_VMM", 1);
|
||||
g_ggml_sycl_prioritize_dmmv = ggml_sycl_get_env("GGML_SYCL_PRIORITIZE_DMMV", 0);
|
||||
|
||||
#ifdef SYCL_FLASH_ATTN
|
||||
g_ggml_sycl_enable_flash_attention = get_sycl_env("GGML_SYCL_ENABLE_FLASH_ATTN", 1);
|
||||
g_ggml_sycl_enable_flash_attention = ggml_sycl_get_env("GGML_SYCL_ENABLE_FLASH_ATTN", 1);
|
||||
#else
|
||||
g_ggml_sycl_enable_flash_attention = 0;
|
||||
#endif
|
||||
@@ -330,7 +332,7 @@ static void ggml_check_sycl() try {
|
||||
GGML_LOG_INFO(" GGML_SYCL_ENABLE_VMM: virtual memory extension is not available\n");
|
||||
#endif
|
||||
GGML_LOG_INFO(" GGML_SYCL_PRIORITIZE_DMMV: %d\n", g_ggml_sycl_prioritize_dmmv);
|
||||
g_ggml_sycl_use_async_mem_op_requested = get_sycl_env("GGML_SYCL_USE_ASYNC_MEM_OP", 1);
|
||||
g_ggml_sycl_use_async_mem_op_requested = ggml_sycl_get_env("GGML_SYCL_USE_ASYNC_MEM_OP", 1);
|
||||
GGML_LOG_INFO(" GGML_SYCL_USE_ASYNC_MEM_OP: %d\n", g_ggml_sycl_use_async_mem_op_requested);
|
||||
|
||||
#ifdef SYCL_FLASH_ATTN
|
||||
@@ -569,26 +571,18 @@ catch (sycl::exception const &exc) {
|
||||
}
|
||||
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
static bool ggml_sycl_is_l0_discrete_gpu(sycl::queue &q) {
|
||||
if (!q.get_device().is_gpu() || q.get_backend() != sycl::backend::ext_oneapi_level_zero) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ze_device_handle_t ze_dev = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q.get_device());
|
||||
ze_device_properties_t props = {};
|
||||
props.stype = ZE_STRUCTURE_TYPE_DEVICE_PROPERTIES;
|
||||
ze_result_t r = zeDeviceGetProperties(ze_dev, &props);
|
||||
return r == ZE_RESULT_SUCCESS && !(props.flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED);
|
||||
static bool ggml_sycl_is_l0_discrete_gpu(int device) {
|
||||
return ggml_sycl_info().devices[device].l0_discrete_gpu;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void dev2dev_memcpy(sycl::queue &q_dst, sycl::queue &q_src, void *ptr_dst,
|
||||
static void dev2dev_memcpy(int device_dst, sycl::queue &q_dst, int device_src, sycl::queue &q_src, void *ptr_dst,
|
||||
const void *ptr_src, size_t size) {
|
||||
#ifdef GGML_SYCL_SUPPORT_LEVEL_ZERO
|
||||
// Use Level Zero direct copy for dGPU-to-dGPU transfers.
|
||||
const bool l0_copy_supported =
|
||||
ggml_sycl_is_l0_discrete_gpu(q_dst) && ggml_sycl_is_l0_discrete_gpu(q_src);
|
||||
if (g_ggml_sycl_enable_level_zero && l0_copy_supported) {
|
||||
const bool l0_copy_supported = g_ggml_sycl_enable_level_zero &&
|
||||
ggml_sycl_is_l0_discrete_gpu(device_dst) && ggml_sycl_is_l0_discrete_gpu(device_src);
|
||||
if (l0_copy_supported) {
|
||||
auto ze_ctx = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q_dst.get_context());
|
||||
auto ze_dev = sycl::get_native<sycl::backend::ext_oneapi_level_zero>(q_dst.get_device());
|
||||
ze_command_queue_desc_t cq_desc = {ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC, nullptr, 0, 0,
|
||||
@@ -651,7 +645,7 @@ ggml_backend_sycl_buffer_cpy_tensor(ggml_backend_buffer_t buffer,
|
||||
size_t size = ggml_nbytes(src);
|
||||
|
||||
//todo. it's dirty solutino to walkaroud known issue:device2device cross GPUs.
|
||||
dev2dev_memcpy(*stream_dst, *stream_src, dst->data, src->data, size);
|
||||
dev2dev_memcpy(dst_ctx->device, *stream_dst, src_ctx->device, *stream_src, dst->data, src->data, size);
|
||||
|
||||
//todo, it's known issue:error in device2device cross GPUs. reused when the issue is fixed. DON"T remove
|
||||
#if 0
|
||||
@@ -1947,69 +1941,6 @@ static void scale_f32(const float * x, float * dst, const float scale, const flo
|
||||
}
|
||||
|
||||
|
||||
template <typename Ti, typename To>
|
||||
static void pool2d_nchw_kernel(
|
||||
const int ih, const int iw, const int oh, const int ow,
|
||||
const int kh, const int kw, const int sh, const int sw,
|
||||
const int ph, const int pw, const int parallel_elements,
|
||||
const Ti* src, To* dst, const enum ggml_op_pool op,
|
||||
const sycl::nd_item<3> &item_ct1) {
|
||||
int idx = item_ct1.get_local_id(2) +
|
||||
item_ct1.get_group(2) * item_ct1.get_local_range(2);
|
||||
if (idx >= parallel_elements) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int I_HW = ih * iw;
|
||||
const int O_HW = oh * ow;
|
||||
const int nc = idx / O_HW;
|
||||
const int cur_oh = idx % O_HW / ow;
|
||||
const int cur_ow = idx % O_HW % ow;
|
||||
const Ti* i_ptr = src + nc * I_HW;
|
||||
To* o_ptr = dst + nc * O_HW;
|
||||
const int start_h = cur_oh * sh - ph;
|
||||
const int bh = sycl::max(0, start_h);
|
||||
const int eh = sycl::min(ih, start_h + kh);
|
||||
const int start_w = cur_ow * sw - pw;
|
||||
const int bw = sycl::max(0, start_w);
|
||||
const int ew = sycl::min(iw, start_w + kw);
|
||||
|
||||
To res = 0;
|
||||
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res = 0; break;
|
||||
case GGML_OP_POOL_MAX: res = -FLT_MAX; break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = bh; i < eh; i += 1) {
|
||||
for (int j = bw; j < ew; j += 1) {
|
||||
#if DPCT_COMPATIBILITY_TEMP >= 350
|
||||
/*
|
||||
DPCT1098:106: The '*' expression is used instead of the __ldg
|
||||
call. These two expressions do not provide the exact same
|
||||
functionality. Check the generated code for potential precision
|
||||
and/or performance issues.
|
||||
*/
|
||||
Ti cur = *(i_ptr + i * iw + j);
|
||||
#else
|
||||
Ti cur = i_ptr[i * iw + j];
|
||||
#endif
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res += (cur / (kh * kw)); break;
|
||||
case GGML_OP_POOL_MAX: res = sycl::max(res, (To)cur); break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
o_ptr[cur_oh * ow + cur_ow] = res;
|
||||
}
|
||||
|
||||
|
||||
static void ggml_mul_mat_p021_f16_f32_sycl(const void *vx, const float *y,
|
||||
float *dst, const int ncols_x,
|
||||
const int nrows_x,
|
||||
@@ -2558,45 +2489,6 @@ catch (sycl::exception const &exc) {
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
static void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
||||
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
||||
float * dst_dd = static_cast<float *>(dst->data);
|
||||
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
|
||||
const int k0 = opts[1];
|
||||
const int k1 = opts[2];
|
||||
const int s0 = opts[3];
|
||||
const int s1 = opts[4];
|
||||
const int p0 = opts[5];
|
||||
const int p1 = opts[6];
|
||||
|
||||
const int64_t IH = dst->src[0]->ne[1];
|
||||
const int64_t IW = dst->src[0]->ne[0];
|
||||
|
||||
const int64_t N = dst->ne[3];
|
||||
const int64_t OC = dst->ne[2];
|
||||
const int64_t OH = dst->ne[1];
|
||||
const int64_t OW = dst->ne[0];
|
||||
|
||||
const int parallel_elements = N * OC * OH * OW;
|
||||
const int num_blocks = (parallel_elements + SYCL_POOL2D_BLOCK_SIZE - 1) / SYCL_POOL2D_BLOCK_SIZE;
|
||||
sycl::range<3> block_nums(1, 1, num_blocks);
|
||||
main_stream->parallel_for(
|
||||
sycl::nd_range<3>(block_nums *
|
||||
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE),
|
||||
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
pool2d_nchw_kernel(IH, IW, OH, OW, k1, k0, s1, s0, p1, p0,
|
||||
parallel_elements, src0_dd, dst_dd, op,
|
||||
item_ct1);
|
||||
});
|
||||
}
|
||||
|
||||
inline void ggml_sycl_op_sum(ggml_backend_sycl_context & ctx, ggml_tensor *dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
@@ -3056,7 +2948,7 @@ static void ggml_sycl_op_mul_mat(ggml_backend_sycl_context & ctx, const ggml_ten
|
||||
src1_ddf_i_source += (i0 * ne11 + src1_col_0) * ne10;
|
||||
|
||||
SYCL_CHECK(
|
||||
CHECK_TRY_ERROR(dev2dev_memcpy(*stream, *main_stream, src1_ddf_i, src1_ddf_i_source,
|
||||
CHECK_TRY_ERROR(dev2dev_memcpy(i, *stream, ctx.device, *main_stream, src1_ddf_i, src1_ddf_i_source,
|
||||
src1_ncols * ne10 * sizeof(float))));
|
||||
}
|
||||
}
|
||||
@@ -3793,6 +3685,149 @@ static bool reorder_qw_q4_k(uint8_t * data_device, size_t size, size_t offset, d
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reorder each expert slice into a self-contained SoA layout.
|
||||
static bool reorder_qw_q4_k_moe(uint8_t * data_device, size_t expert_bytes, int64_t n_expert, dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(expert_bytes % sizeof(block_q4_K) == 0);
|
||||
const int blocks_per_expert = (int) (expert_bytes / sizeof(block_q4_K));
|
||||
const size_t total_bytes = expert_bytes * (size_t) n_expert;
|
||||
|
||||
sycl_reorder_temp_buffer tmp(stream, total_bytes);
|
||||
if (!tmp) {
|
||||
GGML_LOG_WARN("%s: failed to allocate %zu bytes for reorder temp buffer, skipping reorder\n", __func__, total_bytes);
|
||||
return false;
|
||||
}
|
||||
uint8_t * tmp_buf = static_cast<uint8_t *>(tmp.ptr);
|
||||
|
||||
sycl::event copy_event;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(copy_event = stream->memcpy(tmp_buf, data_device, total_bytes)));
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
copy_event.wait();
|
||||
}
|
||||
|
||||
const int total_blocks = blocks_per_expert * (int) n_expert;
|
||||
auto reorder_event = stream->parallel_for(total_blocks, [=](auto gb_) {
|
||||
const int gb = gb_;
|
||||
const int e = gb / blocks_per_expert;
|
||||
const int ib = gb % blocks_per_expert;
|
||||
const block_q4_K * x = (const block_q4_K *) (tmp_buf + (size_t) e * expert_bytes);
|
||||
uint8_t * base = data_device + (size_t) e * expert_bytes;
|
||||
|
||||
auto * qs_ptr = base;
|
||||
auto * scales_ptr = qs_ptr + QK_K / 2 * blocks_per_expert;
|
||||
auto * dm_ptr = (sycl::half2 *) (scales_ptr + K_SCALE_SIZE * blocks_per_expert);
|
||||
|
||||
for (int j = 0; j < QK_K / 2; ++j) {
|
||||
qs_ptr[ib * (QK_K / 2) + j] = x[ib].qs[j];
|
||||
}
|
||||
for (int j = 0; j < K_SCALE_SIZE; ++j) {
|
||||
scales_ptr[ib * K_SCALE_SIZE + j] = x[ib].scales[j];
|
||||
}
|
||||
dm_ptr[ib] = x[ib].dm;
|
||||
});
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
reorder_event.wait_and_throw();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reorder each Q5_K expert slice into [qs][qh][scales][dm].
|
||||
static bool reorder_qw_q5_k_moe(uint8_t * data_device, size_t expert_bytes, int64_t n_expert, dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(expert_bytes % sizeof(block_q5_K) == 0);
|
||||
const int blocks_per_expert = (int) (expert_bytes / sizeof(block_q5_K));
|
||||
const size_t total_bytes = expert_bytes * (size_t) n_expert;
|
||||
|
||||
sycl_reorder_temp_buffer tmp(stream, total_bytes);
|
||||
if (!tmp) {
|
||||
GGML_LOG_WARN("%s: failed to allocate %zu bytes for reorder temp buffer, skipping reorder\n", __func__, total_bytes);
|
||||
return false;
|
||||
}
|
||||
uint8_t * tmp_buf = static_cast<uint8_t *>(tmp.ptr);
|
||||
|
||||
sycl::event copy_event;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(copy_event = stream->memcpy(tmp_buf, data_device, total_bytes)));
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
copy_event.wait();
|
||||
}
|
||||
|
||||
const int total_blocks = blocks_per_expert * (int) n_expert;
|
||||
auto reorder_event = stream->parallel_for(total_blocks, [=](auto gb_) {
|
||||
const int gb = gb_;
|
||||
const int e = gb / blocks_per_expert;
|
||||
const int ib = gb % blocks_per_expert;
|
||||
const block_q5_K * x = (const block_q5_K *) (tmp_buf + (size_t) e * expert_bytes);
|
||||
uint8_t * base = data_device + (size_t) e * expert_bytes;
|
||||
|
||||
auto * qs_ptr = base;
|
||||
auto * qh_ptr = qs_ptr + (QK_K / 2) * blocks_per_expert;
|
||||
auto * scales_ptr = qh_ptr + (QK_K / 8) * blocks_per_expert;
|
||||
auto * dm_ptr = (sycl::half2 *) (scales_ptr + K_SCALE_SIZE * blocks_per_expert);
|
||||
|
||||
for (int j = 0; j < QK_K / 2; ++j) {
|
||||
qs_ptr[ib * (QK_K / 2) + j] = x[ib].qs[j];
|
||||
}
|
||||
for (int j = 0; j < QK_K / 8; ++j) {
|
||||
qh_ptr[ib * (QK_K / 8) + j] = x[ib].qh[j];
|
||||
}
|
||||
for (int j = 0; j < K_SCALE_SIZE; ++j) {
|
||||
scales_ptr[ib * K_SCALE_SIZE + j] = x[ib].scales[j];
|
||||
}
|
||||
dm_ptr[ib] = x[ib].dm;
|
||||
});
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
reorder_event.wait_and_throw();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// Reorder each Q6_K expert slice into [ql][qh][scales][d].
|
||||
static bool reorder_qw_q6_k_moe(uint8_t * data_device, size_t expert_bytes, int64_t n_expert, dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(expert_bytes % sizeof(block_q6_K) == 0);
|
||||
const int blocks_per_expert = (int) (expert_bytes / sizeof(block_q6_K));
|
||||
const size_t total_bytes = expert_bytes * (size_t) n_expert;
|
||||
|
||||
sycl_reorder_temp_buffer tmp(stream, total_bytes);
|
||||
if (!tmp) {
|
||||
GGML_LOG_WARN("%s: failed to allocate %zu bytes for reorder temp buffer, skipping reorder\n", __func__, total_bytes);
|
||||
return false;
|
||||
}
|
||||
uint8_t * tmp_buf = static_cast<uint8_t *>(tmp.ptr);
|
||||
|
||||
sycl::event copy_event;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(copy_event = stream->memcpy(tmp_buf, data_device, total_bytes)));
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
copy_event.wait();
|
||||
}
|
||||
|
||||
const int total_blocks = blocks_per_expert * (int) n_expert;
|
||||
auto reorder_event = stream->parallel_for(total_blocks, [=](auto gb_) {
|
||||
const int gb = gb_;
|
||||
const int e = gb / blocks_per_expert;
|
||||
const int ib = gb % blocks_per_expert;
|
||||
const block_q6_K * x = (const block_q6_K *) (tmp_buf + (size_t) e * expert_bytes);
|
||||
uint8_t * base = data_device + (size_t) e * expert_bytes;
|
||||
|
||||
auto * ql_ptr = base;
|
||||
auto * qh_ptr = ql_ptr + (QK_K / 2) * blocks_per_expert;
|
||||
auto * scales_ptr = qh_ptr + (QK_K / 4) * blocks_per_expert;
|
||||
auto * d_ptr = (sycl::half *) (scales_ptr + (QK_K / 16) * blocks_per_expert);
|
||||
|
||||
for (int j = 0; j < QK_K / 2; ++j) {
|
||||
ql_ptr[ib * (QK_K / 2) + j] = x[ib].ql[j];
|
||||
}
|
||||
for (int j = 0; j < QK_K / 4; ++j) {
|
||||
qh_ptr[ib * (QK_K / 4) + j] = x[ib].qh[j];
|
||||
}
|
||||
for (int j = 0; j < QK_K / 16; ++j) {
|
||||
scales_ptr[ib * (QK_K / 16) + j] = x[ib].scales[j];
|
||||
}
|
||||
d_ptr[ib] = x[ib].d;
|
||||
});
|
||||
if (!g_ggml_sycl_use_async_mem_op) {
|
||||
reorder_event.wait_and_throw();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool reorder_qw_q3_k(uint8_t * data_device, size_t size, size_t offset, dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(size % sizeof(block_q3_K) == 0);
|
||||
GGML_ASSERT(offset % sizeof(block_q3_K) == 0);
|
||||
@@ -3948,6 +3983,22 @@ static bool reorder_qw(const ggml_tensor * src0, dpct::queue_ptr stream) {
|
||||
size_t nrows = src0->ne[1];
|
||||
size_t size = ggml_nbytes(src0);
|
||||
|
||||
// MoE expert weights are addressed per expert via nb[2], so each slice must
|
||||
// remain self-contained after reorder.
|
||||
if (src0->ne[2] > 1) {
|
||||
GGML_ASSERT((size_t) size == (size_t) src0->ne[2] * src0->nb[2]);
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_Q4_K:
|
||||
return reorder_qw_q4_k_moe(data_device, src0->nb[2], src0->ne[2], stream);
|
||||
case GGML_TYPE_Q5_K:
|
||||
return reorder_qw_q5_k_moe(data_device, src0->nb[2], src0->ne[2], stream);
|
||||
case GGML_TYPE_Q6_K:
|
||||
return reorder_qw_q6_k_moe(data_device, src0->nb[2], src0->ne[2], stream);
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_Q4_0:
|
||||
return reorder_qw_q4_0(data_device, ncols, nrows, size, 0, stream);
|
||||
@@ -3962,7 +4013,6 @@ static bool reorder_qw(const ggml_tensor * src0, dpct::queue_ptr stream) {
|
||||
case GGML_TYPE_Q6_K:
|
||||
return reorder_qw_q6_k(data_device, size, 0, stream);
|
||||
default:
|
||||
GGML_ABORT("reorder_qw() called with unsupported type");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -4010,6 +4060,23 @@ static void opt_for_reorder(ggml_backend_sycl_context * ctx, const ggml_tensor *
|
||||
}
|
||||
}
|
||||
|
||||
// Lazily reorder supported MoE expert weights once their fused path is used.
|
||||
static void opt_for_reorder_id(ggml_backend_sycl_context * ctx, const ggml_tensor * src0) {
|
||||
if (g_ggml_sycl_disable_optimize || !ctx->opt_feature.reorder) {
|
||||
return;
|
||||
}
|
||||
if (src0->type != GGML_TYPE_Q4_K && src0->type != GGML_TYPE_Q5_K && src0->type != GGML_TYPE_Q6_K) {
|
||||
return;
|
||||
}
|
||||
ggml_tensor_extra_gpu * extra = static_cast<ggml_tensor_extra_gpu *>(src0->extra);
|
||||
if (!extra || extra->optimized_feature.reorder) {
|
||||
return;
|
||||
}
|
||||
if (reorder_qw(src0, ctx->stream())) {
|
||||
extra->optimized_feature.reorder = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static bool can_use_dequantize_mul_mat_vec(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {
|
||||
// The F16/BF16 qk=1 kernel iterates with stride 2*DMMV_X, requiring ne[0] to be
|
||||
@@ -4175,11 +4242,6 @@ static bool ggml_sycl_mul_mat_id_mmvq_fused(
|
||||
if (ne10 != src0->ne[0] || ne10 % QK8_1 != 0) return false;
|
||||
if (!ggml_is_contiguous(src1)) return false;
|
||||
|
||||
// Reorder layout not supported; fall back.
|
||||
const ggml_tensor_extra_gpu * src0_extra =
|
||||
static_cast<const ggml_tensor_extra_gpu *>(src0->extra);
|
||||
if (src0_extra && src0_extra->optimized_feature.reorder) return false;
|
||||
|
||||
const int64_t n_ids_per_group = ids->ne[0];
|
||||
if (ids->ne[1] != 1) return false;
|
||||
if (ne11 != 1 && ne11 != n_ids_per_group) return false;
|
||||
@@ -4189,16 +4251,37 @@ static bool ggml_sycl_mul_mat_id_mmvq_fused(
|
||||
const int n_experts_used = (int) n_ids_per_group;
|
||||
const int nrows = (int) src0->ne[1];
|
||||
|
||||
// Lazily reorder the (Q4_K) expert weights into a per-expert SoA layout, then run the reorder
|
||||
// GEMV. Placed after the bail checks so a non-dispatchable op does not pay the reorder cost.
|
||||
opt_for_reorder_id(&ctx, src0);
|
||||
const ggml_tensor_extra_gpu * src0_extra =
|
||||
static_cast<const ggml_tensor_extra_gpu *>(src0->extra);
|
||||
const bool use_reorder = src0_extra && src0_extra->optimized_feature.reorder;
|
||||
|
||||
ggml_sycl_pool_alloc<char> src1_q8_alloc(ctx.pool(),
|
||||
(size_t) ne11 * src1_padded_cols * sizeof(block_q8_1) / QK8_1);
|
||||
char * src1_ddq = src1_q8_alloc.get();
|
||||
quantize_row_q8_1_sycl<quantize_q8_1>(
|
||||
(const float *) src1->data, src1_ddq, (int) ne10, (int) ne11,
|
||||
src1_padded_cols, stream);
|
||||
if (use_reorder) {
|
||||
quantize_row_q8_1_sycl<quantize_and_reorder_q8_1_soa>(
|
||||
(const float *) src1->data, src1_ddq, (int) ne10, (int) ne11,
|
||||
src1_padded_cols, stream);
|
||||
} else {
|
||||
quantize_row_q8_1_sycl<quantize_q8_1>(
|
||||
(const float *) src1->data, src1_ddq, (int) ne10, (int) ne11,
|
||||
src1_padded_cols, stream);
|
||||
}
|
||||
|
||||
const size_t bytes_per_qrow = (size_t) src1_padded_cols * sizeof(block_q8_1) / QK8_1;
|
||||
const size_t src1_row_stride = (ne11 == 1) ? 0 : bytes_per_qrow;
|
||||
|
||||
if (use_reorder) {
|
||||
return ggml_sycl_mul_mat_vec_q_id_reorder(
|
||||
src0->type, src0->data, src1_ddq, (const int32_t *) ids->data,
|
||||
(float *) dst->data, (int) ne10, nrows, n_experts_used,
|
||||
/*expert_weight_stride=*/ src0->nb[2],
|
||||
/*dst_row_stride=*/ dst->nb[1],
|
||||
src1_row_stride, stream);
|
||||
}
|
||||
return ggml_sycl_mul_mat_vec_q_id(
|
||||
src0->type, src0->data, src1_ddq, (const int32_t *) ids->data,
|
||||
(float *) dst->data, (int) ne10, nrows, n_experts_used,
|
||||
@@ -4435,6 +4518,11 @@ static void ggml_sycl_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst)
|
||||
ggml_sycl_op_pool2d(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_pool1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/1);
|
||||
ggml_sycl_op_pool1d(ctx, dst);
|
||||
}
|
||||
|
||||
static void ggml_sycl_im2col(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
scope_op_debug_print scope_dbg_print(__func__, dst, /*num_src=*/2);
|
||||
ggml_sycl_op_im2col(ctx, dst);
|
||||
@@ -4592,6 +4680,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
||||
case GGML_UNARY_OP_EXP:
|
||||
ggml_sycl_exp(ctx, dst);
|
||||
break;
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
ggml_sycl_expm1(ctx, dst);
|
||||
break;
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
ggml_sycl_softplus(ctx, dst);
|
||||
break;
|
||||
@@ -4748,6 +4839,9 @@ static bool ggml_sycl_compute_forward(ggml_backend_sycl_context & ctx, struct gg
|
||||
case GGML_OP_POOL_2D:
|
||||
ggml_sycl_pool2d(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_POOL_1D:
|
||||
ggml_sycl_pool1d(ctx, dst);
|
||||
break;
|
||||
case GGML_OP_SUM:
|
||||
ggml_sycl_sum(ctx, dst);
|
||||
break;
|
||||
@@ -5238,6 +5332,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_UNARY_OP_GELU_QUICK:
|
||||
case GGML_UNARY_OP_GELU_ERF:
|
||||
case GGML_UNARY_OP_EXP:
|
||||
case GGML_UNARY_OP_EXPM1:
|
||||
case GGML_UNARY_OP_SOFTPLUS:
|
||||
case GGML_UNARY_OP_ELU:
|
||||
case GGML_UNARY_OP_CEIL:
|
||||
@@ -5245,11 +5340,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
case GGML_UNARY_OP_FLOOR:
|
||||
case GGML_UNARY_OP_ROUND:
|
||||
case GGML_UNARY_OP_TRUNC:
|
||||
#if defined (GGML_SYCL_F16)
|
||||
return ggml_is_contiguous(op->src[0]) && (op->type == op->src[0]->type);
|
||||
#else
|
||||
return ggml_is_contiguous(op->src[0]) && (op->src[0]->type == GGML_TYPE_F32 && op->type == GGML_TYPE_F32) && (op->type == op->src[0]->type);
|
||||
#endif
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
@@ -5342,10 +5433,15 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
|
||||
case GGML_OP_SET_ROWS:
|
||||
{
|
||||
return ((op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16 ||
|
||||
|
||||
auto res = ((op->type == GGML_TYPE_F32 || op->type == GGML_TYPE_F16 || op->type == GGML_TYPE_BF16 ||
|
||||
op->type == GGML_TYPE_Q8_0 || op->type == GGML_TYPE_Q5_1 || op->type == GGML_TYPE_Q5_0 ||
|
||||
op->type == GGML_TYPE_Q4_1 || op->type == GGML_TYPE_Q4_0 || op->type == GGML_TYPE_IQ4_NL) &&
|
||||
op->type == GGML_TYPE_Q1_0 ||
|
||||
op->type == GGML_TYPE_Q4_1 || op->type == GGML_TYPE_Q4_0 || op->type == GGML_TYPE_IQ4_NL ||
|
||||
op->type == GGML_TYPE_MXFP4 || op->type == GGML_TYPE_NVFP4) &&
|
||||
op->src[0]->type == GGML_TYPE_F32 &&
|
||||
(op->src[1]->type == GGML_TYPE_I64 || op->src[1]->type == GGML_TYPE_I32));
|
||||
return res;
|
||||
}
|
||||
break;
|
||||
case GGML_OP_CPY:
|
||||
@@ -5502,6 +5598,7 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
|
||||
k > 0 && k <= 32;
|
||||
}
|
||||
case GGML_OP_POOL_2D:
|
||||
case GGML_OP_POOL_1D:
|
||||
return true;
|
||||
case GGML_OP_ACC:
|
||||
return ggml_is_contiguous(op->src[0]) && ggml_is_contiguous(op->src[1]);
|
||||
|
||||
+178
-67
@@ -662,13 +662,12 @@ static void reorder_mul_mat_vec_q4_0_q8_1_sycl(const void * vx, const void * vy,
|
||||
GGML_ASSERT(ncols % QK4_0 == 0);
|
||||
// Round up to a whole number of subgroup-sized workgroups; out-of-range rows are skipped inside the kernel.
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups) * (int) num_subgroups;
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, (block_num_y * WARP_SIZE));
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0>>(vx, vy, dst, ncols, nrows,
|
||||
nd_item);
|
||||
@@ -683,13 +682,13 @@ static void reorder_mul_mat_vec_q4_0_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK4_0 == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_0>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -1080,13 +1079,12 @@ static void reorder_mul_mat_vec_q8_0_q8_1_sycl(const void * vx, const void * vy,
|
||||
GGML_ASSERT(ncols % QK8_0 == 0);
|
||||
// Round up to a whole number of subgroup-sized workgroups; out-of-range rows are skipped inside the kernel.
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups) * (int) num_subgroups;
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, (block_num_y * WARP_SIZE));
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q8_0>>(vx, vy, dst, ncols, nrows,
|
||||
nd_item);
|
||||
@@ -1101,13 +1099,13 @@ static void reorder_mul_mat_vec_q8_0_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK8_0 == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q8_0>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -1289,13 +1287,12 @@ static void reorder_mul_mat_vec_q3_k_q8_1_sycl(const void * vx, const void * vy,
|
||||
|
||||
// Round up to a whole number of subgroup-sized workgroups; out-of-range rows are skipped inside the kernel.
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups) * (int) num_subgroups;
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q3_K>>(vx, vy, dst, ncols, nrows,
|
||||
nd_item);
|
||||
@@ -1310,13 +1307,13 @@ static void reorder_mul_mat_vec_q3_k_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q3_K>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -1457,13 +1454,12 @@ static void reorder_mul_mat_vec_q4_k_q8_1_sycl(const void * vx, const void * vy,
|
||||
|
||||
// Round up to a whole number of subgroup-sized workgroups; out-of-range rows are skipped inside the kernel.
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups) * (int) num_subgroups;
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K>>(vx, vy, dst, ncols,
|
||||
nrows, nd_item);
|
||||
@@ -1478,13 +1474,14 @@ static void reorder_mul_mat_vec_q4_k_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -1583,15 +1580,13 @@ static void reorder_mul_mat_vec_q5_k_q8_1_sycl(const void * vx, const void * vy,
|
||||
const int nrows, dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q5_K>>(vx, vy, dst, ncols,
|
||||
nrows, nd_item);
|
||||
@@ -1606,13 +1601,14 @@ static void reorder_mul_mat_vec_q5_k_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q5_K>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -1643,13 +1639,13 @@ static void reorder_mul_mat_vec_q6_k_q8_1_sycl(const void * vx, const void * vy,
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
// Round up to a whole number of subgroup-sized workgroups; out-of-range rows are skipped inside the kernel.
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups) * (int) num_subgroups;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K>>(vx, vy, dst, ncols, nrows,
|
||||
nd_item);
|
||||
@@ -1664,13 +1660,13 @@ static void reorder_mul_mat_vec_q6_k_q8_1_sycl_ncols(
|
||||
const int stride_col_y_bytes, const int stride_col_dst,
|
||||
dpct::queue_ptr stream) {
|
||||
GGML_ASSERT(ncols % QK_K == 0);
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y);
|
||||
constexpr size_t num_subgroups = 16;
|
||||
GGML_ASSERT(block_num_y % num_subgroups == 0);
|
||||
const sycl::range<3> global_size(1, GGML_SYCL_MMV_Y, block_num_y * WARP_SIZE);
|
||||
const sycl::range<3> workgroup_size(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
constexpr size_t num_subgroups = WARP_SIZE;
|
||||
const int block_num_y = ceil_div(nrows, GGML_SYCL_MMV_Y * (int) num_subgroups);
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, num_subgroups * WARP_SIZE);
|
||||
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(sycl::nd_range<3>(global_size, workgroup_size),
|
||||
cgh.parallel_for(sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> nd_item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_reorder_ncols<reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K>, ncols_dst>(
|
||||
vx, vy, dst, ncols, nrows, stride_col_y_bytes, stride_col_dst, nd_item);
|
||||
@@ -2472,3 +2468,118 @@ bool ggml_sycl_mul_mat_vec_q_id(
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Reorder (SoA) MoE expert GEMV: MoE expert/row/lane indexing (from mul_mat_vec_q_moe) with the
|
||||
// dense-reorder per-block reads (from mul_mat_vec_q_reorder). Each expert slice in vx_base is a
|
||||
// self-contained SoA, so nblocks = nrows*(ncols/qk) per expert and the constant expert stride holds.
|
||||
template <typename reorder_vec_dot_q_sycl>
|
||||
static void mul_mat_vec_q_moe_reorder(
|
||||
const void * __restrict__ vx_base, const void * __restrict__ vy_base,
|
||||
float * __restrict__ dst_base, const int32_t * __restrict__ ids_dev,
|
||||
const int ncols, const int nrows,
|
||||
const size_t expert_weight_stride, const size_t dst_row_stride,
|
||||
const size_t src1_row_stride,
|
||||
const sycl::nd_item<3> & item_ct1) {
|
||||
using block_type = ggml_sycl_reordered::block_q_t<reorder_vec_dot_q_sycl::gtype>;
|
||||
using block_traits = typename block_type::traits;
|
||||
|
||||
const int expert_idx = item_ct1.get_group(1);
|
||||
const int i02 = ids_dev[expert_idx];
|
||||
|
||||
const char * vx = (const char *) vx_base + (size_t) i02 * expert_weight_stride;
|
||||
const char * vy = (const char *) vy_base + (size_t) expert_idx * src1_row_stride;
|
||||
float * dst = (float *) ((char *) dst_base + (size_t) expert_idx * dst_row_stride);
|
||||
|
||||
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) + item_ct1.get_local_id(1);
|
||||
if (row >= nrows) {
|
||||
return;
|
||||
}
|
||||
|
||||
const auto sg = item_ct1.get_sub_group();
|
||||
|
||||
const int blocks_per_row = ncols / block_traits::qk;
|
||||
constexpr int blocks_per_subgroup = ceil_div(block_traits::vdr_mmvq * WARP_SIZE, block_traits::qi);
|
||||
constexpr int block_elements_per_subgroup = block_traits::qi / block_traits::vdr_mmvq;
|
||||
const int nblocks = nrows * (ncols / block_traits::qk);
|
||||
|
||||
static_assert(blocks_per_subgroup > 0);
|
||||
static_assert(block_elements_per_subgroup > 0);
|
||||
|
||||
float partial_sum = 0.0f;
|
||||
for (int i = sg.get_local_linear_id() / block_elements_per_subgroup; i < blocks_per_row; i += blocks_per_subgroup) {
|
||||
const int ibx = row * blocks_per_row + i;
|
||||
|
||||
const auto bx_offset = block_type::get_block_offset(ibx, nblocks);
|
||||
const auto d_offset = block_type::get_d_offset(nrows, ncols, ibx);
|
||||
|
||||
const int iby = i * block_type::block_to_q8_1_ratio();
|
||||
const int8_t * q8_1_quant_ptr = (const int8_t *) vy + iby * QK8_1;
|
||||
const sycl::half2 * q8_1_ds_ptr = (const sycl::half2 *) ((const char *) vy + ncols + iby * sizeof(sycl::half2));
|
||||
|
||||
#pragma unroll
|
||||
for (int elem = 0; elem < block_elements_per_subgroup; elem += WARP_SIZE) {
|
||||
const int iqs = elem + block_traits::vdr_mmvq * (sg.get_local_linear_id() % block_elements_per_subgroup);
|
||||
partial_sum += reorder_vec_dot_q_sycl()(vx, bx_offset, d_offset, q8_1_quant_ptr, q8_1_ds_ptr, iqs);
|
||||
}
|
||||
}
|
||||
|
||||
auto sum = sycl::reduce_over_group(sg, partial_sum, std::plus<>());
|
||||
if (sg.leader()) {
|
||||
dst[row] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename reorder_vec_dot_q_sycl>
|
||||
static void launch_mul_mat_vec_q_moe_reorder(
|
||||
const void * vx_base, const void * vy, const int32_t * ids_dev,
|
||||
float * dst_base, const int ncols, const int nrows, const int n_experts_used,
|
||||
const size_t expert_weight_stride, const size_t dst_row_stride,
|
||||
const size_t src1_row_stride,
|
||||
dpct::queue_ptr stream) {
|
||||
const int block_num_y = (nrows + GGML_SYCL_MMV_Y - 1) / GGML_SYCL_MMV_Y;
|
||||
const sycl::range<3> block_nums(1, (unsigned) n_experts_used, (unsigned) block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
stream->submit([&](sycl::handler & cgh) {
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> item) [[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
mul_mat_vec_q_moe_reorder<reorder_vec_dot_q_sycl>(
|
||||
vx_base, vy, dst_base, ids_dev, ncols, nrows,
|
||||
expert_weight_stride, dst_row_stride, src1_row_stride, item);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
bool ggml_sycl_mul_mat_vec_q_id_reorder(
|
||||
enum ggml_type src0_type,
|
||||
const void * vx_base,
|
||||
const void * vy,
|
||||
const int32_t * ids_dev,
|
||||
float * dst_base,
|
||||
int ncols,
|
||||
int nrows,
|
||||
int n_experts_used,
|
||||
size_t expert_weight_stride,
|
||||
size_t dst_row_stride,
|
||||
size_t src1_row_stride,
|
||||
dpct::queue_ptr stream) {
|
||||
switch (src0_type) {
|
||||
case GGML_TYPE_Q4_K:
|
||||
launch_mul_mat_vec_q_moe_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q4_K>>(
|
||||
vx_base, vy, ids_dev, dst_base, ncols, nrows, n_experts_used,
|
||||
expert_weight_stride, dst_row_stride, src1_row_stride, stream);
|
||||
return true;
|
||||
case GGML_TYPE_Q5_K:
|
||||
launch_mul_mat_vec_q_moe_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q5_K>>(
|
||||
vx_base, vy, ids_dev, dst_base, ncols, nrows, n_experts_used,
|
||||
expert_weight_stride, dst_row_stride, src1_row_stride, stream);
|
||||
return true;
|
||||
case GGML_TYPE_Q6_K:
|
||||
launch_mul_mat_vec_q_moe_reorder<reorder_vec_dot_q_sycl<GGML_TYPE_Q6_K>>(
|
||||
vx_base, vy, ids_dev, dst_base, ncols, nrows, n_experts_used,
|
||||
expert_weight_stride, dst_row_stride, src1_row_stride, stream);
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -40,4 +40,21 @@ bool ggml_sycl_mul_mat_vec_q_id(
|
||||
size_t src1_row_stride, // 0 = shared src1, else per-expert stride in bytes
|
||||
dpct::queue_ptr stream);
|
||||
|
||||
// Reorder (SoA) variant of the fused MoE expert GEMV.
|
||||
// vx_base: each expert slice (stride expert_weight_stride == src0->nb[2]) is a self-contained reorder/SoA layout.
|
||||
// vy: src1 quantized with quantize_and_reorder_q8_1_soa (per-row SoA). Returns false if src0_type isn't handled.
|
||||
bool ggml_sycl_mul_mat_vec_q_id_reorder(
|
||||
enum ggml_type src0_type,
|
||||
const void * vx_base,
|
||||
const void * vy,
|
||||
const int32_t * ids_dev,
|
||||
float * dst_base,
|
||||
int ncols,
|
||||
int nrows,
|
||||
int n_experts_used,
|
||||
size_t expert_weight_stride,
|
||||
size_t dst_row_stride,
|
||||
size_t src1_row_stride,
|
||||
dpct::queue_ptr stream);
|
||||
|
||||
#endif // GGML_SYCL_MMVQ_HPP
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
//
|
||||
// MIT license
|
||||
// Copyright (C) 2026 Intel Corporation
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
|
||||
#include "pool.hpp"
|
||||
#include <float.h>
|
||||
|
||||
template <typename Ti, typename To>
|
||||
static void pool2d_nchw_kernel(
|
||||
const int ih, const int iw, const int oh, const int ow,
|
||||
const int kh, const int kw, const int sh, const int sw,
|
||||
const int ph, const int pw, const int parallel_elements,
|
||||
const Ti* src, To* dst, const enum ggml_op_pool op,
|
||||
const sycl::nd_item<3> &item_ct1) {
|
||||
int idx = item_ct1.get_local_id(2) +
|
||||
item_ct1.get_group(2) * item_ct1.get_local_range(2);
|
||||
if (idx >= parallel_elements) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int I_HW = ih * iw;
|
||||
const int O_HW = oh * ow;
|
||||
const int nc = idx / O_HW;
|
||||
const int cur_oh = idx % O_HW / ow;
|
||||
const int cur_ow = idx % O_HW % ow;
|
||||
const Ti* i_ptr = src + nc * I_HW;
|
||||
To* o_ptr = dst + nc * O_HW;
|
||||
const int start_h = cur_oh * sh - ph;
|
||||
const int bh = sycl::max(0, start_h);
|
||||
const int eh = sycl::min(ih, start_h + kh);
|
||||
const int start_w = cur_ow * sw - pw;
|
||||
const int bw = sycl::max(0, start_w);
|
||||
const int ew = sycl::min(iw, start_w + kw);
|
||||
|
||||
To res = 0;
|
||||
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res = 0; break;
|
||||
case GGML_OP_POOL_MAX: res = -FLT_MAX; break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
|
||||
for (int i = bh; i < eh; i += 1) {
|
||||
for (int j = bw; j < ew; j += 1) {
|
||||
Ti cur = i_ptr[i * iw + j];
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res += (cur / (kh * kw)); break;
|
||||
case GGML_OP_POOL_MAX: res = sycl::max(res, (To)cur); break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
o_ptr[cur_oh * ow + cur_ow] = res;
|
||||
}
|
||||
|
||||
template <typename Ti, typename To>
|
||||
static void pool1d_ncw_kernel(
|
||||
const int iw, const int ow,
|
||||
const int k, const int s,
|
||||
const int p, const int parallel_elements,
|
||||
const Ti * src, To * dst, const enum ggml_op_pool op,
|
||||
const sycl::nd_item<3> & item_ct1) {
|
||||
int idx = item_ct1.get_local_id(2) +
|
||||
item_ct1.get_group(2) * item_ct1.get_local_range(2);
|
||||
if (idx >= parallel_elements) {
|
||||
return;
|
||||
}
|
||||
|
||||
const int nc = idx / ow;
|
||||
const int cur_ow = idx % ow;
|
||||
const Ti * i_ptr = src + nc * iw;
|
||||
To * o_ptr = dst + nc * ow;
|
||||
const int start = cur_ow * s - p;
|
||||
const int b = sycl::max(0, start);
|
||||
const int e = sycl::min(iw, start + k);
|
||||
|
||||
To res = 0;
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res = 0; break;
|
||||
case GGML_OP_POOL_MAX: res = -FLT_MAX; break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
|
||||
for (int j = b; j < e; j += 1) {
|
||||
Ti cur = i_ptr[j];
|
||||
switch (op) {
|
||||
case GGML_OP_POOL_AVG: res += cur; break;
|
||||
case GGML_OP_POOL_MAX: res = sycl::max(res, (To) cur); break;
|
||||
default:
|
||||
res = (To) sycl::nan(uint32_t(0));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const int count = e - b;
|
||||
if (op == GGML_OP_POOL_AVG) {
|
||||
res = (count > 0) ? (res / count) : (To) 0;
|
||||
}
|
||||
o_ptr[cur_ow] = res;
|
||||
}
|
||||
|
||||
void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
||||
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
||||
float * dst_dd = static_cast<float *>(dst->data);
|
||||
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
|
||||
const int k0 = opts[1];
|
||||
const int k1 = opts[2];
|
||||
const int s0 = opts[3];
|
||||
const int s1 = opts[4];
|
||||
const int p0 = opts[5];
|
||||
const int p1 = opts[6];
|
||||
|
||||
const int64_t IH = dst->src[0]->ne[1];
|
||||
const int64_t IW = dst->src[0]->ne[0];
|
||||
|
||||
const int64_t N = dst->ne[3];
|
||||
const int64_t OC = dst->ne[2];
|
||||
const int64_t OH = dst->ne[1];
|
||||
const int64_t OW = dst->ne[0];
|
||||
|
||||
const int parallel_elements = N * OC * OH * OW;
|
||||
const int num_blocks = (parallel_elements + SYCL_POOL2D_BLOCK_SIZE - 1) / SYCL_POOL2D_BLOCK_SIZE;
|
||||
sycl::range<3> block_nums(1, 1, num_blocks);
|
||||
main_stream->parallel_for(
|
||||
sycl::nd_range<3>(block_nums *
|
||||
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE),
|
||||
sycl::range<3>(1, 1, SYCL_IM2COL_BLOCK_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
pool2d_nchw_kernel(IH, IW, OH, OW, k1, k0, s1, s0, p1, p0,
|
||||
parallel_elements, src0_dd, dst_dd, op,
|
||||
item_ct1);
|
||||
});
|
||||
}
|
||||
|
||||
void ggml_sycl_op_pool1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) {
|
||||
GGML_ASSERT(dst->src[0]->type == GGML_TYPE_F32);
|
||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||
dpct::queue_ptr main_stream = ctx.stream();
|
||||
SYCL_CHECK(ggml_sycl_set_device(ctx.device));
|
||||
const float * src0_dd = static_cast<const float *>(dst->src[0]->data);
|
||||
float * dst_dd = static_cast<float *>(dst->data);
|
||||
|
||||
const int32_t * opts = (const int32_t *)dst->op_params;
|
||||
enum ggml_op_pool op = static_cast<ggml_op_pool>(opts[0]);
|
||||
const int k0 = opts[1];
|
||||
const int s0 = opts[2];
|
||||
const int p0 = opts[3];
|
||||
|
||||
const int64_t IW = dst->src[0]->ne[0];
|
||||
const int64_t OW = dst->ne[0];
|
||||
const int64_t NC = dst->ne[3] * dst->ne[2] * dst->ne[1];
|
||||
|
||||
const int parallel_elements = NC * OW;
|
||||
const int num_blocks = (parallel_elements + SYCL_POOL1D_BLOCK_SIZE - 1) / SYCL_POOL1D_BLOCK_SIZE;
|
||||
sycl::range<3> block_nums(1, 1, num_blocks);
|
||||
main_stream->parallel_for(
|
||||
sycl::nd_range<3>(block_nums *
|
||||
sycl::range<3>(1, 1, SYCL_POOL1D_BLOCK_SIZE),
|
||||
sycl::range<3>(1, 1, SYCL_POOL1D_BLOCK_SIZE)),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
pool1d_ncw_kernel(IW, OW, k0, s0, p0,
|
||||
parallel_elements, src0_dd, dst_dd, op,
|
||||
item_ct1);
|
||||
});
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
//
|
||||
// MIT license
|
||||
// Copyright (C) 2026 Intel Corporation
|
||||
// SPDX-License-Identifier: MIT
|
||||
//
|
||||
|
||||
//
|
||||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||||
// See https://llvm.org/LICENSE.txt for license information.
|
||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||||
//
|
||||
|
||||
#ifndef GGML_SYCL_POOL_HPP
|
||||
#define GGML_SYCL_POOL_HPP
|
||||
|
||||
#include "common.hpp"
|
||||
#include "presets.hpp"
|
||||
|
||||
void ggml_sycl_op_pool2d(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
void ggml_sycl_op_pool1d(ggml_backend_sycl_context & ctx, ggml_tensor * dst);
|
||||
|
||||
#endif // GGML_SYCL_POOL_HPP
|
||||
@@ -46,6 +46,7 @@
|
||||
#define SYCL_PAD_BLOCK_SIZE 256
|
||||
#define SYCL_ACC_BLOCK_SIZE 256
|
||||
#define SYCL_IM2COL_BLOCK_SIZE 256
|
||||
#define SYCL_POOL1D_BLOCK_SIZE 256
|
||||
#define SYCL_POOL2D_BLOCK_SIZE 256
|
||||
#define SYCL_ARGMAX_BLOCK_SIZE 256
|
||||
#define SYCL_CONV_TRANPOSE_1D_BLOCK_SIZE 256
|
||||
|
||||
@@ -135,7 +135,7 @@ static void set_rows_sycl(
|
||||
|
||||
stream->parallel_for(
|
||||
sycl::nd_range<1>(grid_size * block_size, block_size),
|
||||
[=](sycl::nd_item<1> item_ct1) {
|
||||
[=](sycl::nd_item<1> item_ct1) [[intel::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
k_set_rows<TIn, TIdx, TOut>(
|
||||
src0_d, src1_d, dst_d,
|
||||
ne00, ne01, ne02,
|
||||
@@ -202,6 +202,9 @@ static void set_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor * s
|
||||
case GGML_TYPE_Q8_0:
|
||||
set_rows_sycl_q<TIdx, block_q8_0, QK8_0, cpy_blck_f32_q8_0>(src0_d, src1_d, (block_q8_0 *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
case GGML_TYPE_Q1_0:
|
||||
set_rows_sycl_q<TIdx, block_q1_0, QK1_0, cpy_blck_f32_q1_0>(src0_d, src1_d, (block_q1_0 *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
case GGML_TYPE_Q5_1:
|
||||
set_rows_sycl_q<TIdx, block_q5_1, QK5_1, cpy_blck_f32_q5_1>(src0_d, src1_d, (block_q5_1 *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
@@ -217,7 +220,12 @@ static void set_rows_sycl(ggml_backend_sycl_context & ctx, const ggml_tensor * s
|
||||
case GGML_TYPE_IQ4_NL:
|
||||
set_rows_sycl_q<TIdx, block_iq4_nl, QK4_NL, cpy_blck_f32_iq4_nl>(src0_d, src1_d, (block_iq4_nl *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
|
||||
case GGML_TYPE_MXFP4:
|
||||
set_rows_sycl_q<TIdx, block_mxfp4, QK_MXFP4, cpy_blck_f32_mxfp4>(src0_d, src1_d, (block_mxfp4 *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
case GGML_TYPE_NVFP4:
|
||||
set_rows_sycl_q<TIdx, block_nvfp4, QK_NVFP4, cpy_blck_f32_nvfp4>(src0_d, src1_d, (block_nvfp4 *)dst->data, ne00, ne01, ne02, ne03, ne10, ne11, ne12, ne13, nb00, nb01, nb02, nb03, nb10, nb11, nb12, nb13, nb1, nb2, nb3, stream);
|
||||
break;
|
||||
default:
|
||||
GGML_ABORT("Unsupported tensor type!");
|
||||
break;
|
||||
|
||||
@@ -56,7 +56,7 @@ static void soft_max_f32(const float * x,
|
||||
: block_size_template;
|
||||
const int nthreads = block_size;
|
||||
const int nwarps = nthreads / WARP_SIZE;
|
||||
size_t nreduce = nwarps / WARP_SIZE;
|
||||
const size_t nreduce = nwarps / WARP_SIZE;
|
||||
|
||||
const int tid = item_ct1.get_local_id(2);
|
||||
|
||||
@@ -105,17 +105,15 @@ static void soft_max_f32(const float * x,
|
||||
max_val = warp_reduce_max<WARP_SIZE>(max_val);
|
||||
|
||||
if (block_size > WARP_SIZE) {
|
||||
if (warp_id == 0) {
|
||||
buf_iw[lane_id] = -INFINITY;
|
||||
}
|
||||
item_ct1.barrier();
|
||||
|
||||
if (lane_id == 0) {
|
||||
buf_iw[warp_id] = max_val;
|
||||
}
|
||||
item_ct1.barrier();
|
||||
|
||||
max_val = buf_iw[lane_id];
|
||||
max_val = -INFINITY;
|
||||
for (int i = lane_id; i < nwarps; i += WARP_SIZE) {
|
||||
max_val = sycl::max(max_val, buf_iw[i]);
|
||||
}
|
||||
max_val = warp_reduce_max<WARP_SIZE>(max_val);
|
||||
}
|
||||
float tmp = 0.0f; // partial sum
|
||||
@@ -290,7 +288,8 @@ static void soft_max_f32_sycl(const float *x, const T *mask,
|
||||
|
||||
cgh.parallel_for(
|
||||
sycl::nd_range<3>(block_nums * block_dims, block_dims),
|
||||
[=](sycl::nd_item<3> item_ct1) {
|
||||
[=](sycl::nd_item<3> item_ct1)
|
||||
[[sycl::reqd_sub_group_size(WARP_SIZE)]] {
|
||||
soft_max_f32<false, 0, 0>(
|
||||
x, mask, sinks, dst, params,
|
||||
dpct_local_acc_ct1
|
||||
|
||||
@@ -798,7 +798,7 @@ struct vk_device_struct {
|
||||
|
||||
vk_pipeline pipeline_add_id_f32;
|
||||
|
||||
vk_pipeline pipeline_concat_f32, pipeline_concat_f16, pipeline_concat_i32;
|
||||
vk_pipeline pipeline_concat_i8, pipeline_concat_i16, pipeline_concat_i32, pipeline_concat_i64;
|
||||
vk_pipeline pipeline_upscale_nearest_f32, pipeline_upscale_bilinear_f32, pipeline_upscale_bicubic_f32, pipeline_upscale_bilinear_antialias_f32;
|
||||
vk_pipeline pipeline_scale_f32;
|
||||
vk_pipeline pipeline_sqr_f32;
|
||||
@@ -902,14 +902,17 @@ struct vk_device_struct {
|
||||
vk_pipeline pipeline_im2col_3d_f32, pipeline_im2col_3d_f32_f16;
|
||||
vk_pipeline pipeline_timestep_embedding_f32;
|
||||
vk_pipeline pipeline_conv_transpose_1d_f32;
|
||||
vk_pipeline pipeline_col2im_1d_f32;
|
||||
vk_pipeline pipeline_col2im_1d_f16;
|
||||
vk_pipeline pipeline_col2im_1d_bf16;
|
||||
vk_pipeline pipeline_snake_f32;
|
||||
vk_pipeline pipeline_snake_f16;
|
||||
vk_pipeline pipeline_snake_bf16;
|
||||
vk_pipeline pipeline_pool2d_f32;
|
||||
vk_pipeline pipeline_rwkv_wkv6_f32;
|
||||
vk_pipeline pipeline_rwkv_wkv7_f32;
|
||||
// [size_idx][kda] where size_idx: 0=d32, 1=d64, 2=d128
|
||||
vk_pipeline pipeline_gated_delta_net[3][2];
|
||||
// [size_idx][kda] where size_idx: 0=d16, 1=d32, 2=d64, 3=d128
|
||||
vk_pipeline pipeline_gated_delta_net[4][2];
|
||||
vk_pipeline pipeline_ssm_scan_f32_d128;
|
||||
vk_pipeline pipeline_ssm_scan_f32_d256;
|
||||
vk_pipeline pipeline_ssm_conv_f32;
|
||||
@@ -1552,6 +1555,16 @@ struct vk_op_timestep_embedding_push_constants {
|
||||
uint32_t max_period;
|
||||
};
|
||||
|
||||
struct vk_op_col2im_1d_push_constants {
|
||||
uint32_t T_out;
|
||||
uint32_t OC;
|
||||
uint32_t K_OC;
|
||||
uint32_t T_in;
|
||||
uint32_t K;
|
||||
int32_t stride;
|
||||
int32_t p0;
|
||||
};
|
||||
|
||||
struct vk_op_conv_transpose_1d_push_constants {
|
||||
uint32_t Cout;
|
||||
uint32_t Cin;
|
||||
@@ -3067,8 +3080,10 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal});
|
||||
} else if (device->uma) {
|
||||
// Fall back to host memory type
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
// On UMA, prefer host-visible memory so direct tensor borrowing works.
|
||||
// If unavailable, fall back to device-local memory.
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
|
||||
} else if (device->disable_host_visible_vidmem) {
|
||||
if (device->allow_sysmem_fallback) {
|
||||
@@ -4996,9 +5011,10 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_acc_f32, "acc_f32", acc_f32_len, acc_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0, 1}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_set_f32, "set_f32", acc_f32_len, acc_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {0, 0}, 1);
|
||||
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_f32, "concat_f32", concat_f32_len, concat_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_f16, "concat_f16", concat_f16_len, concat_f16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_i8, "concat_i8", concat_i8_len, concat_i8_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_i16, "concat_i16", concat_i16_len, concat_i16_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_i32, "concat_i32", concat_i32_len, concat_i32_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_concat_i64, "concat_i64", concat_i64_len, concat_i64_data, "main", 3, sizeof(vk_op_binary_push_constants), {512, 1, 1}, {}, 1);
|
||||
|
||||
ggml_vk_create_pipeline(device, device->pipeline_upscale_nearest_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_NEAREST}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_upscale_bilinear_f32, "upscale_f32", upscale_f32_len, upscale_f32_data, "main", 2, sizeof(vk_op_upscale_push_constants), {512, 1, 1}, {GGML_SCALE_MODE_BILINEAR}, 1);
|
||||
@@ -5202,6 +5218,9 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_timestep_embedding_f32, "timestep_embedding_f32", timestep_embedding_f32_len, timestep_embedding_f32_data, "main", 2, sizeof(vk_op_timestep_embedding_push_constants), {256, 1, 1}, {}, 1);
|
||||
|
||||
ggml_vk_create_pipeline(device, device->pipeline_conv_transpose_1d_f32, "conv_transpose_1d_f32", conv_transpose_1d_f32_len, conv_transpose_1d_f32_data, "main", 3, sizeof(vk_op_conv_transpose_1d_push_constants), {1, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_col2im_1d_f32, "col2im_1d_f32", col2im_1d_f32_len, col2im_1d_f32_data, "main", 2, sizeof(vk_op_col2im_1d_push_constants), {256, 1, 1}, {}, 1, true);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_col2im_1d_f16, "col2im_1d_f16", col2im_1d_f16_len, col2im_1d_f16_data, "main", 2, sizeof(vk_op_col2im_1d_push_constants), {256, 1, 1}, {}, 1, true);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_col2im_1d_bf16, "col2im_1d_bf16", col2im_1d_bf16_len, col2im_1d_bf16_data, "main", 2, sizeof(vk_op_col2im_1d_push_constants), {256, 1, 1}, {}, 1, true);
|
||||
|
||||
ggml_vk_create_pipeline(device, device->pipeline_snake_f32, "snake_f32", snake_f32_len, snake_f32_data, "main", 4, sizeof(vk_op_snake_push_constants), {256, 1, 1}, {}, 1);
|
||||
ggml_vk_create_pipeline(device, device->pipeline_snake_f16, "snake_f16", snake_f16_len, snake_f16_data, "main", 4, sizeof(vk_op_snake_push_constants), {256, 1, 1}, {}, 1);
|
||||
@@ -5214,14 +5233,14 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_rwkv_wkv7_f32, "rwkv_wkv7_f32", rwkv_wkv7_f32_len, rwkv_wkv7_f32_data, "main", 8, sizeof(vk_op_rwkv_wkv7_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
||||
|
||||
{
|
||||
const uint32_t gdn_sizes[] = {32, 64, 128};
|
||||
const uint32_t gdn_sizes[] = {16, 32, 64, 128};
|
||||
const char * gdn_names[][2] = {
|
||||
{"gated_delta_net_f32_d16", "gated_delta_net_f32_d16_kda"},
|
||||
{"gated_delta_net_f32_d32", "gated_delta_net_f32_d32_kda"},
|
||||
{"gated_delta_net_f32_d64", "gated_delta_net_f32_d64_kda"},
|
||||
{"gated_delta_net_f32_d128", "gated_delta_net_f32_d128_kda"},
|
||||
};
|
||||
const bool use_subgroup_reduce = device->subgroup_arithmetic;
|
||||
for (uint32_t si = 0; si < 3; si++) {
|
||||
for (uint32_t si = 0; si < 4; si++) {
|
||||
const uint32_t S_V = gdn_sizes[si];
|
||||
GGML_ASSERT(is_pow2(S_V));
|
||||
|
||||
@@ -5235,10 +5254,29 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
lanes_per_column = std::min(S_V, device->subgroup_size);
|
||||
}
|
||||
|
||||
const bool need_clustered_shader = lanes_per_column != 1 && (lanes_per_column < device->subgroup_size);
|
||||
// gated_delta_net.comp relies on S_V % COLS_PER_WG == 0 and
|
||||
// S_V % LANES_PER_COLUMN == 0 to avoid bounds checks.
|
||||
while (lanes_per_column > 1u) {
|
||||
const bool valid_lanes = (device->subgroup_size % lanes_per_column) == 0 &&
|
||||
(S_V % lanes_per_column) == 0;
|
||||
const uint32_t cols_per_wg = valid_lanes ? device->subgroup_size / lanes_per_column : 0;
|
||||
if (valid_lanes && cols_per_wg > 0 && (S_V % cols_per_wg) == 0) {
|
||||
break;
|
||||
}
|
||||
lanes_per_column >>= 1u;
|
||||
}
|
||||
|
||||
GGML_ASSERT((device->subgroup_size % lanes_per_column) == 0);
|
||||
GGML_ASSERT((S_V % lanes_per_column) == 0);
|
||||
GGML_ASSERT((S_V % (device->subgroup_size / lanes_per_column)) == 0);
|
||||
|
||||
const bool need_partial_subgroup_reduce = lanes_per_column != 1u && lanes_per_column < device->subgroup_size;
|
||||
const bool use_clustered_reduce = device->subgroup_arithmetic && device->subgroup_clustered && need_partial_subgroup_reduce;
|
||||
const bool use_subgroup_reduce = device->subgroup_arithmetic && !need_partial_subgroup_reduce;
|
||||
const bool use_subgroup_ops = use_clustered_reduce || use_subgroup_reduce;
|
||||
size_t gdn_len;
|
||||
const void * gdn_data;
|
||||
if (use_subgroup_reduce && need_clustered_shader) {
|
||||
if (use_clustered_reduce) {
|
||||
gdn_len = gated_delta_net_f32_len;
|
||||
gdn_data = (const void *)gated_delta_net_f32_data;
|
||||
} else if (use_subgroup_reduce) {
|
||||
@@ -5255,7 +5293,7 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
for (uint32_t kda = 0; kda < 2; kda++) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_gated_delta_net[si][kda],
|
||||
gdn_names[si][kda], gdn_len, gdn_data, "main", 7, sizeof(vk_op_gated_delta_net_push_constants),
|
||||
wg_denoms, {S_V, kda, device->subgroup_size, lanes_per_column}, 1, true, use_subgroup_reduce, device->subgroup_size);
|
||||
wg_denoms, {S_V, kda, device->subgroup_size, lanes_per_column}, 1, true, use_subgroup_ops, device->subgroup_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10318,17 +10356,27 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
||||
return ctx->device->pipeline_add_id_f32;
|
||||
}
|
||||
return nullptr;
|
||||
case GGML_OP_CONCAT:
|
||||
if (src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
||||
return ctx->device->pipeline_concat_f32;
|
||||
case GGML_OP_CONCAT: {
|
||||
if (src0->type != src1->type || src0->type != dst->type) {
|
||||
return nullptr;
|
||||
}
|
||||
if (src0->type == GGML_TYPE_F16 && src1->type == GGML_TYPE_F16 && dst->type == GGML_TYPE_F16) {
|
||||
return ctx->device->pipeline_concat_f16;
|
||||
if (ggml_blck_size(src0->type) != 1) {
|
||||
return nullptr;
|
||||
}
|
||||
if (src0->type == GGML_TYPE_I32 && src1->type == GGML_TYPE_I32 && dst->type == GGML_TYPE_I32) {
|
||||
const size_t type_size = ggml_type_size(src0->type);
|
||||
switch (type_size) {
|
||||
case 1:
|
||||
return ctx->device->pipeline_concat_i8;
|
||||
case 2:
|
||||
return ctx->device->pipeline_concat_i16;
|
||||
case 4:
|
||||
return ctx->device->pipeline_concat_i32;
|
||||
case 8:
|
||||
return ctx->device->pipeline_concat_i64;
|
||||
default:
|
||||
return nullptr;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
case GGML_OP_UPSCALE:
|
||||
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
||||
uint32_t mode = (ggml_get_op_params_i32(dst, 0) & (0xFF | GGML_SCALE_FLAG_ANTIALIAS));
|
||||
@@ -10691,6 +10739,13 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
||||
return ctx->device->pipeline_conv_transpose_1d_f32;
|
||||
}
|
||||
return nullptr;
|
||||
case GGML_OP_COL2IM_1D:
|
||||
switch (src0->type) {
|
||||
case GGML_TYPE_F32: return ctx->device->pipeline_col2im_1d_f32;
|
||||
case GGML_TYPE_F16: return ctx->device->pipeline_col2im_1d_f16;
|
||||
case GGML_TYPE_BF16: return ctx->device->pipeline_col2im_1d_bf16;
|
||||
default: return nullptr;
|
||||
}
|
||||
case GGML_OP_POOL_2D:
|
||||
if (src0->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32) {
|
||||
return ctx->device->pipeline_pool2d_f32;
|
||||
@@ -10712,9 +10767,10 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
||||
const uint32_t kda = (dst->src[3]->ne[0] == (int64_t)S_v) ? 1 : 0;
|
||||
uint32_t si;
|
||||
switch (S_v) {
|
||||
case 32: si = 0; break;
|
||||
case 64: si = 1; break;
|
||||
case 128: si = 2; break;
|
||||
case 16: si = 0; break;
|
||||
case 32: si = 1; break;
|
||||
case 64: si = 2; break;
|
||||
case 128: si = 3; break;
|
||||
default: return nullptr;
|
||||
}
|
||||
return ctx->device->pipeline_gated_delta_net[si][kda];
|
||||
@@ -11136,6 +11192,10 @@ static void ggml_vk_op_f32(ggml_backend_vk_context * ctx, vk_context& subctx, co
|
||||
{
|
||||
elements = {uint32_t(src0->ne[1]), 1, 1}; // parallelize in {Cout, 1, 1}
|
||||
} break;
|
||||
case GGML_OP_COL2IM_1D:
|
||||
{
|
||||
elements = { uint32_t(dst->ne[0]), uint32_t(dst->ne[1]), 1 };
|
||||
} break;
|
||||
case GGML_OP_POOL_2D:
|
||||
{
|
||||
const uint32_t N = dst->ne[3];
|
||||
@@ -12925,6 +12985,32 @@ static void ggml_vk_conv_transpose_1d(ggml_backend_vk_context * ctx, vk_context&
|
||||
ggml_vk_op_f32(ctx, subctx, src0, src1, nullptr, nullptr, dst, GGML_OP_CONV_TRANSPOSE_1D, std::move(p));
|
||||
}
|
||||
|
||||
static void ggml_vk_col2im_1d(ggml_backend_vk_context * ctx, vk_context& subctx, const ggml_tensor * src0, ggml_tensor * dst) {
|
||||
// src0: [K_OC, T_in] columns from matmul
|
||||
// dst: [T_out, OC]
|
||||
|
||||
const int32_t stride = dst->op_params[0];
|
||||
const int32_t oc = dst->op_params[1];
|
||||
const int32_t p0 = dst->op_params[2];
|
||||
|
||||
const uint32_t K_OC = static_cast<uint32_t>(src0->ne[0]);
|
||||
const uint32_t T_in = static_cast<uint32_t>(src0->ne[1]);
|
||||
const uint32_t T_out = static_cast<uint32_t>(dst->ne[0]);
|
||||
const uint32_t OC = static_cast<uint32_t>(oc);
|
||||
const uint32_t K = K_OC / OC;
|
||||
|
||||
vk_op_col2im_1d_push_constants p{};
|
||||
p.T_out = T_out;
|
||||
p.OC = OC;
|
||||
p.K_OC = K_OC;
|
||||
p.T_in = T_in;
|
||||
p.K = K;
|
||||
p.stride = stride;
|
||||
p.p0 = p0;
|
||||
|
||||
ggml_vk_op_f32(ctx, subctx, src0, nullptr, nullptr, nullptr, dst, GGML_OP_COL2IM_1D, std::move(p));
|
||||
}
|
||||
|
||||
// Dispatch the fused snake activation: y = x + sin^2(a * x) * inv_b.
|
||||
// Match the naive mul -> sin -> sqr -> mul -> add chain and run the
|
||||
// dedicated kernel directly. The pattern is validated by
|
||||
@@ -14412,6 +14498,10 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
||||
case GGML_OP_TIMESTEP_EMBEDDING:
|
||||
ggml_vk_timestep_embedding(ctx, compute_ctx, src0, node);
|
||||
|
||||
break;
|
||||
case GGML_OP_COL2IM_1D:
|
||||
ggml_vk_col2im_1d(ctx, compute_ctx, src0, node);
|
||||
|
||||
break;
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
ggml_vk_conv_transpose_1d(ctx, compute_ctx, src0, src1, node);
|
||||
@@ -17042,8 +17132,14 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||
case GGML_OP_SET:
|
||||
return op->src[0]->type == op->src[1]->type && op->src[0]->type == op->type &&
|
||||
(op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_TYPE_I32);
|
||||
case GGML_OP_CONCAT:
|
||||
return ggml_type_size(op->src[0]->type) == ggml_type_size(GGML_TYPE_F32);
|
||||
case GGML_OP_CONCAT: {
|
||||
if (op->src[0]->type != op->src[1]->type || op->src[0]->type != op->type) {
|
||||
return false;
|
||||
}
|
||||
const size_t type_size = ggml_type_size(op->type);
|
||||
return ggml_blck_size(op->type) == 1 &&
|
||||
(type_size == 1 || type_size == 2 || type_size == 4 || type_size == 8);
|
||||
}
|
||||
case GGML_OP_ADD1:
|
||||
return (op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32)
|
||||
|| (op->src[0]->type == GGML_TYPE_F16 && op->src[1]->type == GGML_TYPE_F32)
|
||||
@@ -17119,7 +17215,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||
case GGML_OP_GATED_DELTA_NET:
|
||||
{
|
||||
const uint32_t S_v = op->src[2]->ne[0];
|
||||
if (S_v != 32 && S_v != 64 && S_v != 128) {
|
||||
if (S_v != 16 && S_v != 32 && S_v != 64 && S_v != 128) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < 6; i++) {
|
||||
@@ -17171,6 +17267,13 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||
return op->src[0]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_CONV_TRANSPOSE_1D:
|
||||
return op->src[0]->type == GGML_TYPE_F32 && op->src[1]->type == GGML_TYPE_F32;
|
||||
case GGML_OP_COL2IM_1D:
|
||||
return (op->src[0]->type == GGML_TYPE_F32 ||
|
||||
op->src[0]->type == GGML_TYPE_F16 ||
|
||||
op->src[0]->type == GGML_TYPE_BF16) &&
|
||||
op->type == op->src[0]->type &&
|
||||
ggml_is_contiguous(op->src[0]) &&
|
||||
ggml_is_contiguous(op);
|
||||
case GGML_OP_CONV_2D:
|
||||
case GGML_OP_CONV_TRANSPOSE_2D:
|
||||
{
|
||||
@@ -18002,6 +18105,11 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
|
||||
const int32_t p0 = tensor->op_params[1];
|
||||
const int32_t d0 = tensor->op_params[2];
|
||||
tensor_clone = ggml_conv_transpose_1d(ggml_ctx, src_clone[0], src_clone[1], s0, p0, d0);
|
||||
} else if (tensor->op == GGML_OP_COL2IM_1D) {
|
||||
const int32_t stride = tensor->op_params[0];
|
||||
const int32_t oc = tensor->op_params[1];
|
||||
const int32_t p0 = tensor->op_params[2];
|
||||
tensor_clone = ggml_col2im_1d(ggml_ctx, src_clone[0], stride, oc, p0);
|
||||
} else if (tensor->op == GGML_OP_POOL_2D) {
|
||||
enum ggml_op_pool op = static_cast<ggml_op_pool>(tensor->op_params[0]);
|
||||
const int32_t k0 = tensor->op_params[1];
|
||||
|
||||
@@ -0,0 +1,61 @@
|
||||
#version 450
|
||||
|
||||
#include "types.glsl"
|
||||
|
||||
layout (binding = 0) readonly buffer A {A_TYPE data_a[];}; // columns: [K_OC, T_in]
|
||||
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];}; // output: [T_out, OC]
|
||||
|
||||
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;
|
||||
|
||||
layout (push_constant) uniform parameter {
|
||||
uint32_t T_out;
|
||||
uint32_t OC;
|
||||
uint32_t K_OC;
|
||||
uint32_t T_in;
|
||||
uint32_t K;
|
||||
int32_t stride;
|
||||
int32_t p0;
|
||||
} p;
|
||||
|
||||
// Load A_TYPE to float
|
||||
float load_col(uint32_t idx) {
|
||||
#if defined(DATA_A_BF16)
|
||||
return bf16_to_fp32(uint32_t(data_a[idx]));
|
||||
#else
|
||||
return float(data_a[idx]);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Store float as D_TYPE
|
||||
void store_dst(uint32_t idx, float v) {
|
||||
#if defined(DATA_A_BF16)
|
||||
data_d[idx] = D_TYPE(fp32_to_bf16(v));
|
||||
#else
|
||||
data_d[idx] = D_TYPE(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
void main() {
|
||||
const uint32_t t_out = gl_GlobalInvocationID.x;
|
||||
const uint32_t oc = gl_GlobalInvocationID.y;
|
||||
if (t_out >= p.T_out || oc >= p.OC) return;
|
||||
|
||||
const int32_t t_abs = int32_t(t_out) + p.p0; // absolute position in uncropped signal
|
||||
|
||||
// Gather: only the ceil(K/stride) columns that scatter into t_abs, no modulo
|
||||
int32_t t_in_min = (t_abs - int32_t(p.K) + p.stride) / p.stride;
|
||||
if (t_in_min < 0) t_in_min = 0;
|
||||
int32_t t_in_max = t_abs / p.stride;
|
||||
if (t_in_max >= int32_t(p.T_in)) t_in_max = int32_t(p.T_in) - 1;
|
||||
|
||||
float val = 0.0;
|
||||
for (int32_t t_in = t_in_min; t_in <= t_in_max; t_in++) {
|
||||
int32_t k = t_abs - t_in * p.stride;
|
||||
// col layout: [K_OC, T_in], column index = oc * K + k
|
||||
uint32_t col_idx = (oc * p.K + uint32_t(k)) + uint32_t(t_in) * p.K_OC;
|
||||
val += load_col(col_idx);
|
||||
}
|
||||
|
||||
// dst layout: [T_out, OC], element (t_out, oc) = t_out + oc * T_out
|
||||
store_dst(t_out + oc * p.T_out, val);
|
||||
}
|
||||
@@ -862,9 +862,10 @@ void process_shaders() {
|
||||
|
||||
string_to_spv("pad_f32", "pad.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
|
||||
string_to_spv("concat_f32", "concat.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
string_to_spv("concat_f16", "concat.comp", {{"A_TYPE", "float16_t"}, {"B_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}, {"OPTIMIZATION_ERROR_WORKAROUND", "1"}});
|
||||
string_to_spv("concat_i32", "concat.comp", {{"A_TYPE", "int"}, {"B_TYPE", "int"}, {"D_TYPE", "int"}});
|
||||
string_to_spv("concat_i8", "concat.comp", {{"A_TYPE", "uint8_t"}, {"B_TYPE", "uint8_t"}, {"D_TYPE", "uint8_t"}});
|
||||
string_to_spv("concat_i16", "concat.comp", {{"A_TYPE", "uint16_t"}, {"B_TYPE", "uint16_t"}, {"D_TYPE", "uint16_t"}});
|
||||
string_to_spv("concat_i32", "concat.comp", {{"A_TYPE", "uint"}, {"B_TYPE", "uint"}, {"D_TYPE", "uint"}});
|
||||
string_to_spv("concat_i64", "concat.comp", {{"A_TYPE", "uvec2"}, {"B_TYPE", "uvec2"}, {"D_TYPE", "uvec2"}});
|
||||
|
||||
string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
|
||||
@@ -1002,6 +1003,9 @@ void process_shaders() {
|
||||
string_to_spv("timestep_embedding_f32", "timestep_embedding.comp", merge_maps(base_dict, {{"A_TYPE", "float"}, {"D_TYPE", "float"}}));
|
||||
|
||||
string_to_spv("conv_transpose_1d_f32", "conv_transpose_1d.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
string_to_spv("col2im_1d_f32", "col2im_1d.comp", {{"DATA_A_F32", "1"}, {"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
string_to_spv("col2im_1d_f16", "col2im_1d.comp", {{"DATA_A_F16", "1"}, {"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
||||
string_to_spv("col2im_1d_bf16", "col2im_1d.comp", {{"DATA_A_BF16", "1"}, {"A_TYPE", "uint16_t"}, {"D_TYPE", "uint16_t"}});
|
||||
|
||||
string_to_spv("snake_f32", "snake.comp", {{"DATA_A_F32", "1"}, {"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||
string_to_spv("snake_f16", "snake.comp", {{"DATA_A_F16", "1"}, {"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
||||
|
||||
@@ -129,7 +129,7 @@ void test_gbnf_generation(testing &t) {
|
||||
});
|
||||
|
||||
assert_gbnf_equal(t, R"""(
|
||||
root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])*
|
||||
root ::= ([^<] | "<" [^/] | "</" [^t] | "</t" [^a] | "</ta" [^g] | "</tag" [^>])* ("<" | "</" | "</t" | "</ta" | "</tag")?
|
||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||
)""", gbnf);
|
||||
});
|
||||
|
||||
@@ -130,12 +130,12 @@ static void init_tensor_uniform(ggml_tensor * tensor, float min = -1.0f, float m
|
||||
}
|
||||
}
|
||||
ggml_backend_tensor_set(tensor, dataq.data(), 0, dataq.size());
|
||||
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16 || tensor->type == GGML_TYPE_I32) {
|
||||
} else if (tensor->type == GGML_TYPE_I8 || tensor->type == GGML_TYPE_I16) {
|
||||
// This is going to create some weird integers though.
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0, ggml_nbytes(tensor));
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0, nels * ggml_type_size(tensor->type));
|
||||
} else if (tensor->type == GGML_TYPE_I64) {
|
||||
// Integers with a size of 8 bytes can be set by mirroring the float data, the specific values are again not really meaningful.
|
||||
const size_t nbytes_half = ggml_nbytes(tensor)/2;
|
||||
const size_t nbytes_half = nels * sizeof(float);
|
||||
ggml_backend_tensor_set(tensor, data.data(), 0*nbytes_half, nbytes_half);
|
||||
ggml_backend_tensor_set(tensor, data.data(), 1*nbytes_half, nbytes_half);
|
||||
} else {
|
||||
|
||||
@@ -1369,7 +1369,7 @@ static void test_nemotron_tool_format(testing & t) {
|
||||
// Check argument markers (note: markers retain trailing newlines for proper parsing)
|
||||
t.assert_equal("arg_name_prefix should be '<parameter='", "<parameter=", analysis.tools.arguments.name_prefix);
|
||||
t.assert_equal("arg_name_suffix should be '>\\n'", ">\n", analysis.tools.arguments.name_suffix);
|
||||
t.assert_equal("arg_value_suffix should be '</parameter>\\n'", "</parameter>\n", analysis.tools.arguments.value_suffix);
|
||||
t.assert_equal("arg_value_suffix should be '\\n</parameter>\\n'", "\n</parameter>\n", analysis.tools.arguments.value_suffix);
|
||||
|
||||
// Check format classification
|
||||
t.assert_true("tool format should be TAG_WITH_TAGGED", analysis.tools.format.mode == tool_format::TAG_WITH_TAGGED);
|
||||
@@ -2030,12 +2030,11 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
|
||||
return p.content(p.until("<seed:tool_call>")) + p.optional(tool_section) + p.end();
|
||||
});
|
||||
|
||||
// The exact input from the failing test
|
||||
std::string input =
|
||||
"<seed:tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.cpp\n"
|
||||
"<parameter=filename>"
|
||||
"foo.cpp"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>"
|
||||
"def foo(arg = \"14\"):\n"
|
||||
|
||||
+114
-6
@@ -1882,11 +1882,29 @@ static void test_lfm2_parser(const std::string & template_path, bool detailed_de
|
||||
.expect(simple_assist_msg("Use this format: [link text](url). Example: [Wikipedia](https://www.wikipedia.org)."))
|
||||
.run();
|
||||
|
||||
// Python tool with multiline code in string
|
||||
// Python tool with multiline code in string: the \n in the literal decodes to a real
|
||||
// newline, emitted as a JSON \n escape (not a doubled backslash).
|
||||
tst.test("<|tool_call_start|>[python(code=\"def hello():\\n print('hey')\")]<|tool_call_end|>")
|
||||
.tools({ python_tool })
|
||||
.expect_tool_calls({
|
||||
{ "python", R"#({"code": "def hello():\\n print('hey')"})#", "" }
|
||||
{ "python", R"#({"code": "def hello():\n print('hey')"})#", "" }
|
||||
})
|
||||
.run();
|
||||
|
||||
// String escape sequences decode to their actual characters (newline + tab here),
|
||||
// so a "write a two line file" style call produces real line breaks, not literal "\n".
|
||||
tst.test("<|tool_call_start|>[python(code=\"First line\\nSecond line\\tindented\")]<|tool_call_end|>")
|
||||
.tools({ python_tool })
|
||||
.expect_tool_calls({
|
||||
{ "python", R"#({"code": "First line\nSecond line\tindented"})#", "" }
|
||||
})
|
||||
.run();
|
||||
|
||||
// Escaped quotes inside a string argument survive the round-trip.
|
||||
tst.test("<|tool_call_start|>[python(code=\"print(\\\"hi\\\")\")]<|tool_call_end|>")
|
||||
.tools({ python_tool })
|
||||
.expect_tool_calls({
|
||||
{ "python", R"#({"code": "print(\"hi\")"})#", "" }
|
||||
})
|
||||
.run();
|
||||
|
||||
@@ -1935,6 +1953,10 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
}
|
||||
})";
|
||||
|
||||
const char * const_schema = R"({
|
||||
"const": "42"
|
||||
})";
|
||||
|
||||
{
|
||||
// Qwen3.5 (basically same as Nemotron, but keeping separate tests just in case)
|
||||
auto tst = peg_tester("models/templates/Qwen3.5-4B.jinja", detailed_debug);
|
||||
@@ -2020,6 +2042,80 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
})
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.c\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>\n"
|
||||
"#iclunde\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>\n"
|
||||
"#include\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
edit_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "edit", "{\"filename\": \"foo.c\", \"oldString\": \"#iclunde\", \"newString\": \"#include\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
// a parameter value that itself ends in a newline (e.g. a source file with a
|
||||
// trailing newline). The structural delimiter is "\n</parameter>\n", so the value
|
||||
// "#include\n" renders as "...#include\n\n</parameter>\n". The trailing newline must
|
||||
// be preserved faithfully (no stripping), and the generated grammar must admit a
|
||||
// value ending on a delimiter prefix. Regression test for gbnf_excluding_pattern.
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.c\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>\n"
|
||||
"#iclunde\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>\n"
|
||||
"#include\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
edit_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "edit", "{\"filename\": \"foo.c\", \"oldString\": \"#iclunde\", \"newString\": \"#include\\n\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
|
||||
// test code that starts with indent
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=python>\n"
|
||||
"<parameter=code>\n"
|
||||
" print(\"Hello, world!\")\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
python_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "python", "{\"code\": \" print(\\\"Hello, world!\\\")\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"I need to output the invoice details in JSON\n"
|
||||
"</think>\n\n"
|
||||
@@ -3196,18 +3292,16 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
tst.test(
|
||||
"<seed:tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.cpp\n"
|
||||
"<parameter=filename>"
|
||||
"foo.cpp"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>"
|
||||
"def foo(arg = \"14\"):\n"
|
||||
" return arg + \"bar\"\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>"
|
||||
"def foo(arg = \"15\"):\n"
|
||||
" pass\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</seed:tool_call>")
|
||||
@@ -4927,6 +5021,20 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
auto tst = peg_tester("models/templates/meta-llama-Llama-3.1-8B-Instruct.jinja", detailed_debug);
|
||||
tst.test("Hello, world!\nWhat's up?").tools({ special_function_tool }).expect(message_assist).expect_reconstruction().run();
|
||||
|
||||
tst.test(
|
||||
"```json\n\"42\" \n```")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.json_schema(const_schema)
|
||||
.expect_content(R"("42")")
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"\"42\" \n")
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.json_schema(const_schema)
|
||||
.expect_content(R"("42")")
|
||||
.run();
|
||||
|
||||
// Continuation tests
|
||||
tst.test("world!\nWhat's up?")
|
||||
.messages({ message_user, message_assist_prefill_content })
|
||||
|
||||
@@ -323,6 +323,7 @@ struct cmd_params {
|
||||
std::vector<std::string> hf_repo;
|
||||
std::vector<std::string> hf_file;
|
||||
std::string hf_token;
|
||||
bool offline;
|
||||
std::vector<int> n_prompt;
|
||||
std::vector<int> n_gen;
|
||||
std::vector<std::pair<int, int>> n_pg;
|
||||
@@ -367,6 +368,7 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* hf_repo */ {},
|
||||
/* hf_file */ {},
|
||||
/* hf_token */ "",
|
||||
/* offline */ false,
|
||||
/* n_prompt */ { 512 },
|
||||
/* n_gen */ { 128 },
|
||||
/* n_pg */ {},
|
||||
@@ -437,6 +439,8 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" (default: unused)\n");
|
||||
printf(" -hft, --hf-token <token> Hugging Face access token\n");
|
||||
printf(" (default: value from HF_TOKEN environment variable)\n");
|
||||
printf(" --offline Offline mode: forces use of cache, prevents network access\n");
|
||||
printf(" (default: disabled)\n");
|
||||
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||
printf(" -pg <pp,tg> (default: %s)\n", join(transform_to_str(cmd_params_defaults.n_pg, pair_str), ",").c_str());
|
||||
@@ -558,6 +562,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
break;
|
||||
}
|
||||
params.hf_token = argv[i];
|
||||
} else if (arg == "--offline") {
|
||||
params.offline = true;
|
||||
} else if (arg == "-p" || arg == "--n-prompt") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -1040,6 +1046,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
|
||||
common_download_opts opts;
|
||||
opts.bearer_token = params.hf_token;
|
||||
opts.offline = params.offline;
|
||||
auto download_result = common_download_model(model, opts);
|
||||
if (download_result.model_path.empty()) {
|
||||
fprintf(stderr, "error: failed to download model from HuggingFace\n");
|
||||
|
||||
@@ -247,7 +247,9 @@ int32_t mtmd_helper_decode_image_chunk(
|
||||
llama_pos n_past,
|
||||
llama_seq_id seq_id,
|
||||
int32_t n_batch,
|
||||
llama_pos * new_n_past) {
|
||||
llama_pos * new_n_past,
|
||||
mtmd_helper_post_decode_callback callback,
|
||||
void * user_data) {
|
||||
GGML_ASSERT(n_batch > 0);
|
||||
auto chunk_type = mtmd_input_chunk_get_type(chunk);
|
||||
const char * name = chunk_type == MTMD_INPUT_CHUNK_TYPE_IMAGE ? "image" : "audio";
|
||||
@@ -302,10 +304,23 @@ int32_t mtmd_helper_decode_image_chunk(
|
||||
int32_t ret = llama_decode(lctx, batch_embd_view);
|
||||
if (ret != 0) {
|
||||
LOG_ERR("failed to decode %s\n", name);
|
||||
llama_set_causal_attn(lctx, true); // restore causal attn
|
||||
if (use_non_causal) {
|
||||
llama_set_causal_attn(lctx, true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (callback != nullptr) {
|
||||
ret = callback(batch_embd_view, user_data);
|
||||
if (ret != 0) {
|
||||
LOG_ERR("post-decode callback failed\n");
|
||||
if (use_non_causal) {
|
||||
llama_set_causal_attn(lctx, true);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
LOG_INF("%s decoded (batch %d/%d) in %" PRId64 " ms\n", name, i_batch+1, n_img_batches, ggml_time_ms() - t1);
|
||||
|
||||
i_batch++;
|
||||
@@ -379,7 +394,7 @@ int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
||||
LOG_INF("%s slice encoded in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
|
||||
|
||||
float * embd = mtmd_get_output_embd(ctx);
|
||||
ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past);
|
||||
ret = mtmd_helper_decode_image_chunk(ctx, lctx, chunk, embd, n_past, seq_id, n_batch, new_n_past, nullptr, nullptr);
|
||||
if (ret != 0) {
|
||||
LOG_ERR("failed to decode %s\n", name);
|
||||
llama_batch_free(text_batch);
|
||||
|
||||
@@ -91,6 +91,8 @@ MTMD_API int32_t mtmd_helper_eval_chunk_single(mtmd_context * ctx,
|
||||
bool logits_last,
|
||||
llama_pos * new_n_past);
|
||||
|
||||
typedef int32_t (*mtmd_helper_post_decode_callback)(struct llama_batch batch, void * user_data);
|
||||
|
||||
// helper function to decode an image whose embeddings have already been calculated
|
||||
// this helper will handle batching and pre/post decoding setup (for ex. gemma 3 requires non-causal attention)
|
||||
// ret 0 on success, -1 on chunk not being a valid image chunk, 1 on decode failure
|
||||
@@ -101,7 +103,9 @@ MTMD_API int32_t mtmd_helper_decode_image_chunk(mtmd_context * ctx,
|
||||
llama_pos n_past,
|
||||
llama_seq_id seq_id,
|
||||
int32_t n_batch,
|
||||
llama_pos * new_n_past);
|
||||
llama_pos * new_n_past,
|
||||
mtmd_helper_post_decode_callback callback,
|
||||
void * user_data);
|
||||
|
||||
//
|
||||
// video input helpers (requires ffmpeg/ffprobe installed on the system)
|
||||
|
||||
+8
-9
@@ -96,16 +96,15 @@ struct mtmd_image_tokens {
|
||||
// [BOI] [row0 tokens + newline] ... [row(ny-1) tokens + newline] [EOI]
|
||||
return (nx + 1) * ny + 2;
|
||||
}
|
||||
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
|
||||
if (batch_f32.entries.size() == 1 || n_temporal_merge == 1) {
|
||||
return nx * ny;
|
||||
}
|
||||
uint32_t nz = batch_f32.entries.size();
|
||||
// TODO: simplify this by repeating the last frame until it fits the temporal merge
|
||||
if (nz % n_temporal_merge != 0) {
|
||||
nz = nz / n_temporal_merge + 1;
|
||||
} else {
|
||||
nz = nz / n_temporal_merge;
|
||||
if (n_temporal_merge > 1) {
|
||||
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
|
||||
// TODO: simplify this by repeating the last frame until it fits the temporal merge
|
||||
if (nz % n_temporal_merge != 0) {
|
||||
nz = nz / n_temporal_merge + 1;
|
||||
} else {
|
||||
nz = nz / n_temporal_merge;
|
||||
}
|
||||
}
|
||||
return nx * ny * nz;
|
||||
}
|
||||
|
||||
@@ -40,6 +40,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
required=True)
|
||||
parser.add_argument("--hf-repo", type=str, help="Hugging Face model repository", required=True)
|
||||
parser.add_argument("--hf-file", type=str, help="Hugging Face model file", required=True)
|
||||
parser.add_argument("--offline", action="store_true", default=False, help="Offline mode: forces use of cache, prevents network access")
|
||||
parser.add_argument("-ngl", "--n-gpu-layers", type=int, help="layers to the GPU for computation", required=True)
|
||||
parser.add_argument("--ctx-size", type=int, help="Set the size of the prompt context", required=True)
|
||||
parser.add_argument("--parallel", type=int, help="Set the number of slots for process requests", required=True)
|
||||
@@ -268,6 +269,8 @@ def start_server_background(args):
|
||||
]
|
||||
server_args.extend(['--hf-repo', args.hf_repo])
|
||||
server_args.extend(['--hf-file', args.hf_file])
|
||||
if args.offline:
|
||||
server_args.append('--offline')
|
||||
server_args.extend(['--n-gpu-layers', args.n_gpu_layers])
|
||||
server_args.extend(['--ctx-size', args.ctx_size])
|
||||
server_args.extend(['--parallel', args.parallel])
|
||||
|
||||
@@ -539,37 +539,6 @@ bool server_tokens::validate(const struct llama_context * ctx) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t server_tokens::process_chunk(
|
||||
llama_context * ctx,
|
||||
mtmd_context * mctx,
|
||||
size_t idx,
|
||||
llama_pos pos,
|
||||
int32_t seq_id,
|
||||
size_t & n_tokens_out) const {
|
||||
const auto & chunk = find_chunk(idx);
|
||||
const char * name = mtmd_input_chunk_get_type(chunk.get()) == MTMD_INPUT_CHUNK_TYPE_IMAGE
|
||||
? "image" : "audio";
|
||||
SRV_INF("processing %s...\n", name);
|
||||
int32_t n_batch = llama_n_batch(ctx);
|
||||
int64_t t0 = ggml_time_ms();
|
||||
llama_pos new_n_past; // unused for now
|
||||
int32_t result = mtmd_helper_eval_chunk_single(mctx, ctx,
|
||||
chunk.get(),
|
||||
pos,
|
||||
seq_id,
|
||||
n_batch,
|
||||
true, // logits last
|
||||
&new_n_past);
|
||||
SRV_INF("%s processed in %" PRId64 " ms\n", name, ggml_time_ms() - t0);
|
||||
if (result != 0) {
|
||||
LOG_ERR("mtmd_helper_eval failed with status %d", result);
|
||||
n_tokens_out = 0;
|
||||
return result;
|
||||
}
|
||||
n_tokens_out = mtmd_input_chunk_get_n_tokens(chunk.get());
|
||||
return 0;
|
||||
}
|
||||
|
||||
server_tokens server_tokens::clone() const {
|
||||
server_tokens res;
|
||||
res.has_mtmd = has_mtmd;
|
||||
|
||||
@@ -221,15 +221,6 @@ public:
|
||||
// make sure all text tokens are within the vocab range
|
||||
bool validate(const struct llama_context * ctx) const;
|
||||
|
||||
// encode and decode the image chunk
|
||||
int32_t process_chunk(
|
||||
llama_context * ctx,
|
||||
mtmd_context * mctx,
|
||||
size_t idx,
|
||||
llama_pos pos,
|
||||
int32_t seq_id,
|
||||
size_t & n_tokens_out) const;
|
||||
|
||||
server_tokens clone() const;
|
||||
};
|
||||
|
||||
|
||||
@@ -15,11 +15,6 @@
|
||||
#include "mtmd.h"
|
||||
#include "mtmd-helper.h"
|
||||
|
||||
#include "ggml-cpp.h"
|
||||
|
||||
// TODO: tmp until the mtmd draft processing is refactored [TAG_MTMD_DRAFT_PROCESSING]
|
||||
#include "../../src/llama-ext.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cinttypes>
|
||||
@@ -81,7 +76,6 @@ struct server_slot {
|
||||
// multimodal
|
||||
mtmd_context * mctx = nullptr;
|
||||
mtmd::batch_ptr mbatch = nullptr;
|
||||
std::array<llama_context *, 2> mtgt = {nullptr, nullptr}; // [0] for main context, [1] for optional draft context
|
||||
|
||||
// speculative decoding
|
||||
common_speculative * spec;
|
||||
@@ -207,6 +201,8 @@ struct server_slot {
|
||||
// Speculative decoding stats
|
||||
int32_t n_draft_total = 0; // Total draft tokens generated
|
||||
int32_t n_draft_accepted = 0; // Draft tokens actually accepted
|
||||
int32_t n_draft_verif_steps = 0; // Total draft token verification steps by the target model
|
||||
std::vector<int32_t> n_accepted_per_pos; // Accepted tokens per draft position
|
||||
|
||||
void reset() {
|
||||
SLT_DBG(*this, "%s", "\n");
|
||||
@@ -233,6 +229,8 @@ struct server_slot {
|
||||
// clear speculative decoding stats
|
||||
n_draft_total = 0;
|
||||
n_draft_accepted = 0;
|
||||
n_draft_verif_steps = 0;
|
||||
n_accepted_per_pos.clear();
|
||||
|
||||
task_prev = std::move(task);
|
||||
task.reset();
|
||||
@@ -244,15 +242,6 @@ struct server_slot {
|
||||
|
||||
// clear multimodal state
|
||||
mbatch.reset();
|
||||
mtgt[0] = ctx_tgt;
|
||||
mtgt[1] = nullptr;
|
||||
if (ctx_dft && llama_get_ctx_other(ctx_dft) != ctx_tgt) {
|
||||
// TODO: in the future, figure out how to infuse target embeddings to the images
|
||||
// for now, we re-decode the same chunk in both ctx_tgt and ctx_dft
|
||||
// maybe we simply need to call `common_speculative_process()` ?
|
||||
// [TAG_MTMD_DRAFT_PROCESSING]
|
||||
mtgt[1] = ctx_dft;
|
||||
}
|
||||
}
|
||||
|
||||
void init_sampler() const {
|
||||
@@ -524,10 +513,22 @@ struct server_slot {
|
||||
llama_perf_context(ctx_tgt).n_reused);
|
||||
|
||||
if (n_draft_total > 0) {
|
||||
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
|
||||
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
|
||||
const double mean_acc_len = n_draft_verif_steps > 0 ? 1.0 + (double) n_draft_accepted / (double) n_draft_verif_steps : 1.0;
|
||||
|
||||
std::string acceptance_rates_per_pos;
|
||||
if (n_draft_verif_steps > 0) {
|
||||
for (size_t i = 0; i < n_accepted_per_pos.size(); ++i) {
|
||||
if (i > 0) {
|
||||
acceptance_rates_per_pos += ", ";
|
||||
}
|
||||
acceptance_rates_per_pos += string_format("%.3f", (double) n_accepted_per_pos[i] / (double) n_draft_verif_steps);
|
||||
}
|
||||
}
|
||||
|
||||
SLT_INF(*this,
|
||||
"draft acceptance = %0.5f (%5d accepted / %5d generated)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total);
|
||||
"draft acceptance = %0.5f (%5d accepted / %5d generated), mean acceptance length = %5.2f, acceptance rate per position = (%s)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total, mean_acc_len, acceptance_rates_per_pos.c_str());
|
||||
}
|
||||
|
||||
common_speculative_print_stats(spec);
|
||||
@@ -598,32 +599,38 @@ struct server_slot {
|
||||
int process_mtmd_chunk(size_t idx, size_t & n_tokens_out) {
|
||||
GGML_ASSERT(mctx);
|
||||
const auto & input_tokens = task->tokens;
|
||||
auto & chunk = input_tokens.find_chunk(idx);
|
||||
const auto & chunk = input_tokens.find_chunk(idx);
|
||||
int32_t res = 0;
|
||||
|
||||
auto try_decode = [&]() -> int32_t {
|
||||
if (mbatch) {
|
||||
float * embd = mtmd_batch_get_output_embd(mbatch.get(), chunk.get());
|
||||
if (embd) {
|
||||
for (auto * lctx : mtgt) {
|
||||
if (lctx == nullptr) {
|
||||
continue;
|
||||
}
|
||||
llama_pos new_n_past; // unused for now
|
||||
res = mtmd_helper_decode_image_chunk(
|
||||
mctx,
|
||||
lctx,
|
||||
chunk.get(),
|
||||
embd,
|
||||
prompt.tokens.pos_next(),
|
||||
id,
|
||||
llama_n_batch(lctx),
|
||||
&new_n_past
|
||||
);
|
||||
if (res != 0) {
|
||||
SLT_ERR(*this, "failed to decode mtmd chunk, idx = %zu, res = %d\n", idx, res);
|
||||
return -1;
|
||||
void * cb_data = spec;
|
||||
static auto cb = [](llama_batch batch, void * user_data) {
|
||||
common_speculative * spec = static_cast<common_speculative *>(user_data);
|
||||
if (!common_speculative_process(spec, batch)) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
};
|
||||
|
||||
llama_pos new_n_past; // unused for now
|
||||
res = mtmd_helper_decode_image_chunk(
|
||||
mctx,
|
||||
ctx_tgt,
|
||||
chunk.get(),
|
||||
embd,
|
||||
prompt.tokens.pos_next(),
|
||||
id,
|
||||
llama_n_batch(ctx_tgt),
|
||||
&new_n_past,
|
||||
cb,
|
||||
cb_data
|
||||
);
|
||||
if (res != 0) {
|
||||
SLT_ERR(*this, "failed to decode mtmd chunk, idx = %zu, res = %d\n", idx, res);
|
||||
return -1;
|
||||
}
|
||||
n_tokens_out = mtmd_input_chunk_get_n_tokens(chunk.get());
|
||||
return 0; // success
|
||||
@@ -636,7 +643,8 @@ struct server_slot {
|
||||
res = try_decode();
|
||||
if (res == 0) {
|
||||
return 0;
|
||||
} else if (res < 0) {
|
||||
}
|
||||
if (res < 0) {
|
||||
// fatal error
|
||||
return res;
|
||||
}
|
||||
@@ -3350,48 +3358,6 @@ private:
|
||||
// TODO: avoid restoring the draft context and re-evaluating the drafted tokens when not needed [TAG_SPEC_AVOID_DRAFT_REEVAL]
|
||||
// for now, always re-evaluate for simplicity
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/22728#issuecomment-4400925384
|
||||
//
|
||||
// | spec type | need re-eval |
|
||||
// | --- | --- |
|
||||
// | draft model | no | because the draft model does not use embeddings from the target
|
||||
// | MTP (std) | yes |
|
||||
// | MTP Gemma4 | no | because the KV cache is shared
|
||||
// | Eagle3 | yes |
|
||||
// | DFlash | yes | https://github.com/ggml-org/llama.cpp/pull/22728#issuecomment-4405406982
|
||||
//
|
||||
// note: this logic is now moved in `common_speculative_process()`
|
||||
// keeping the sketch here until for a bit, until the logic is finalized
|
||||
//
|
||||
//if (ctx_dft) {
|
||||
// // TODO: update as needed for MTP, Eagle3, etc.
|
||||
// const bool need_tgt_embd = false;
|
||||
|
||||
// if (need_tgt_embd) {
|
||||
// llama_synchronize(ctx_tgt);
|
||||
// }
|
||||
|
||||
// // the logic here varies depending on the speculative decoding method
|
||||
// // - some draft contexts require embeddings from the target context, others don't
|
||||
// // - some draft contexts involve an encoder step to transform the target embeddings to draft embeddings
|
||||
// // TODO: extract this in a function ?
|
||||
// {
|
||||
// // TODO: hook the embeddings from the last target batch here
|
||||
// if (llama_model_has_encoder(model_dft.get())) {
|
||||
// //llama_encode(ctx_dft, ...);
|
||||
|
||||
// GGML_ABORT("not implemented yet\n");
|
||||
// }
|
||||
|
||||
// const int ret = llama_decode(ctx_dft.get(), batch_view);
|
||||
|
||||
// if (ret != 0) {
|
||||
// SRV_ERR("failed to decode draft batch, ret = %d\n", ret);
|
||||
|
||||
// // TODO: handle error
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
if (!common_speculative_process(spec.get(), batch_view)) {
|
||||
SRV_ERR("%s", "failed to process speculative batch\n");
|
||||
|
||||
@@ -3593,6 +3559,14 @@ private:
|
||||
|
||||
// update how many tokens out of those tested were accepted
|
||||
slot.n_draft_accepted += ids.size() - 1;
|
||||
slot.n_draft_verif_steps += 1;
|
||||
|
||||
if (slot.n_accepted_per_pos.empty()) {
|
||||
slot.n_accepted_per_pos.resize(common_speculative_n_max(¶ms_base.speculative), 0);
|
||||
}
|
||||
for (size_t i = 0; i < ids.size() - 1 && i < slot.n_accepted_per_pos.size(); ++i) {
|
||||
slot.n_accepted_per_pos[i]++;
|
||||
}
|
||||
|
||||
// add accepted tokens to the prompt
|
||||
slot.prompt.tokens.keep_first(slot.prompt.n_tokens() - n_draft);
|
||||
|
||||
Generated
+4
-3
@@ -35,6 +35,7 @@
|
||||
"bits-ui": "2.18.1",
|
||||
"clsx": "2.1.1",
|
||||
"dexie": "4.4.3",
|
||||
"dompurify": "3.4.5",
|
||||
"eslint": "9.39.4",
|
||||
"eslint-config-prettier": "10.1.8",
|
||||
"eslint-plugin-storybook": "10.4.2",
|
||||
@@ -8651,9 +8652,9 @@
|
||||
"peer": true
|
||||
},
|
||||
"node_modules/dompurify": {
|
||||
"version": "3.4.8",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.8.tgz",
|
||||
"integrity": "sha512-yb1cEmaOum7wFvOCSQxyfgVlv5D47Rc30iZWoMpbDIWTnJ6grDDQyu2KFJzB2k7u0pMuJcQ1zphH//fFnw2tjQ==",
|
||||
"version": "3.4.5",
|
||||
"resolved": "https://registry.npmjs.org/dompurify/-/dompurify-3.4.5.tgz",
|
||||
"integrity": "sha512-OrwIBKsdNSVEeubdJ1HBv/wNENRM9ytAVCv7YXt//A3vPdVMNuACRqK9mXCGCBW2ln7BT/A4X0jXHo2Gu89miA==",
|
||||
"dev": true,
|
||||
"license": "(MPL-2.0 OR Apache-2.0)",
|
||||
"optionalDependencies": {
|
||||
|
||||
@@ -54,6 +54,7 @@
|
||||
"bits-ui": "2.18.1",
|
||||
"clsx": "2.1.1",
|
||||
"dexie": "4.4.3",
|
||||
"dompurify": "3.4.5",
|
||||
"eslint": "9.39.4",
|
||||
"eslint-config-prettier": "10.1.8",
|
||||
"eslint-plugin-storybook": "10.4.2",
|
||||
|
||||
@@ -18,6 +18,8 @@
|
||||
import { rehypeEnhanceCodeBlocks } from './plugins/rehype/enhance-code-blocks';
|
||||
import { rehypeEnhanceMermaidBlocks } from './plugins/rehype/enhance-mermaid-blocks';
|
||||
import { rehypeMermaidPre } from './plugins/rehype/mermaid-pre';
|
||||
import { rehypeSvgPre } from './plugins/rehype/svg-pre';
|
||||
import { rehypeEnhanceSvgBlocks } from './plugins/rehype/enhance-svg-blocks';
|
||||
import { rehypeResolveAttachmentImages } from './plugins/rehype/resolve-attachment-images';
|
||||
import { rehypeRtlSupport } from './plugins/rehype/rehype-rtl-support';
|
||||
import { remarkLiteralHtml } from './plugins/remark/literal-html';
|
||||
@@ -38,11 +40,26 @@
|
||||
DATA_ERROR_BOUND_ATTR,
|
||||
DATA_ERROR_HANDLED_ATTR,
|
||||
BOOL_TRUE_STRING,
|
||||
SETTINGS_KEYS
|
||||
SETTINGS_KEYS,
|
||||
MERMAID_WRAPPER_CLASS,
|
||||
MERMAID_BLOCK_CLASS,
|
||||
MERMAID_LANGUAGE,
|
||||
MERMAID_SYNTAX_ATTR,
|
||||
MERMAID_RENDERED_ATTR,
|
||||
SVG_WRAPPER_CLASS,
|
||||
SVG_BLOCK_CLASS,
|
||||
SVG_LANGUAGE,
|
||||
XML_LANGUAGE,
|
||||
SVG_TAG_PREFIX,
|
||||
SVG_SOURCE_ATTR,
|
||||
SVG_RENDERED_ATTR,
|
||||
SVG_INLINE_SHADOW_STYLE
|
||||
} from '$lib/constants';
|
||||
import { ColorMode, UrlProtocol } from '$lib/enums';
|
||||
import { FileTypeText } from '$lib/enums/files.enums';
|
||||
import { highlightCode, detectIncompleteCodeBlock, type IncompleteCodeBlock } from '$lib/utils';
|
||||
import { sanitizeSvg } from '$lib/utils/sanitize-svg';
|
||||
import { mountSvgShadow } from '$lib/utils/svg-shadow';
|
||||
import '$styles/katex-custom.scss';
|
||||
import githubDarkCss from 'highlight.js/styles/github-dark.css?inline';
|
||||
import githubLightCss from 'highlight.js/styles/github.css?inline';
|
||||
@@ -77,11 +94,32 @@
|
||||
let renderedBlocks = $state<MarkdownBlock[]>([]);
|
||||
let unstableBlockHtml = $state('');
|
||||
let incompleteCodeBlock = $state<IncompleteCodeBlock | null>(null);
|
||||
const streamingSvgCode = $derived.by(() => {
|
||||
const block = incompleteCodeBlock;
|
||||
if (!block) return null;
|
||||
if (block.language === SVG_LANGUAGE) return block.code;
|
||||
if (block.language === XML_LANGUAGE && block.code.trimStart().startsWith(SVG_TAG_PREFIX))
|
||||
return block.code;
|
||||
return null;
|
||||
});
|
||||
const liveSvgHtml = $derived(streamingSvgCode !== null ? sanitizeSvg(streamingSvgCode) : '');
|
||||
let previewDialogOpen = $state(false);
|
||||
let previewCode = $state('');
|
||||
let previewLanguage = $state('text');
|
||||
let mermaidPreviewOpen = $state(false);
|
||||
let mermaidPreviewSvgHtml = $state('');
|
||||
let svgPreviewLive = $state(false);
|
||||
let streamingSvgHost = $state<HTMLDivElement | null>(null);
|
||||
|
||||
// While the zoom dialog is open on a streaming svg, mirror the live render into it
|
||||
$effect(() => {
|
||||
if (svgPreviewLive && liveSvgHtml) mermaidPreviewSvgHtml = liveSvgHtml;
|
||||
});
|
||||
|
||||
// Mount the streaming svg into its shadow host on every chunk so it renders live
|
||||
$effect(() => {
|
||||
if (streamingSvgHost) mountSvgShadow(streamingSvgHost, liveSvgHtml, SVG_INLINE_SHADOW_STYLE);
|
||||
});
|
||||
|
||||
let streamingCodeScrollContainer = $state<HTMLDivElement>();
|
||||
|
||||
@@ -124,8 +162,10 @@
|
||||
.use(rehypeRestoreTableHtml) // Restore limited HTML (e.g., <br>, <ul>) inside Markdown tables
|
||||
.use(rehypeEnhanceLinks) // Add target="_blank" to links
|
||||
.use(rehypeMermaidPre) // Convert mermaid blocks to <pre class="mermaid">
|
||||
.use(rehypeSvgPre) // Convert svg blocks to <pre class="svg-block">
|
||||
.use(rehypeEnhanceCodeBlocks) // Wrap code blocks with header and actions
|
||||
.use(rehypeEnhanceMermaidBlocks) // Wrap mermaid blocks with header and actions
|
||||
.use(rehypeEnhanceSvgBlocks) // Wrap svg blocks with header and actions
|
||||
.use(rehypeResolveAttachmentImages, { attachments })
|
||||
.use(rehypeRtlSupport) // Add bidirectional text support
|
||||
.use(rehypeStringify, { allowDangerousHtml: true }); // Convert to HTML string
|
||||
@@ -462,17 +502,19 @@
|
||||
const target = event.target as HTMLElement;
|
||||
|
||||
// Check if clicking on copy or preview button in mermaid block
|
||||
const copyBtn = target.closest('.mermaid-block-wrapper .copy-code-btn');
|
||||
const previewBtn = target.closest('.mermaid-block-wrapper .preview-code-btn');
|
||||
const copyBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .copy-code-btn`);
|
||||
const previewBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .preview-code-btn`);
|
||||
|
||||
if (copyBtn || previewBtn) {
|
||||
const wrapper = target.closest('.mermaid-block-wrapper');
|
||||
const wrapper = target.closest(`.${MERMAID_WRAPPER_CLASS}`);
|
||||
if (!wrapper) return;
|
||||
|
||||
const preElement = wrapper.querySelector<HTMLElement>('pre.mermaid[data-mermaid-syntax]');
|
||||
const preElement = wrapper.querySelector<HTMLElement>(
|
||||
`pre.${MERMAID_BLOCK_CLASS}[${MERMAID_SYNTAX_ATTR}]`
|
||||
);
|
||||
if (!preElement) return;
|
||||
|
||||
const mermaidSyntax = preElement.dataset.mermaidSyntax ?? '';
|
||||
const mermaidSyntax = preElement.getAttribute(MERMAID_SYNTAX_ATTR) ?? '';
|
||||
|
||||
if (copyBtn) {
|
||||
event.preventDefault();
|
||||
@@ -491,19 +533,70 @@
|
||||
const svg = preElement.querySelector('svg');
|
||||
if (!svg) return;
|
||||
mermaidPreviewSvgHtml = svg.outerHTML;
|
||||
svgPreviewLive = false;
|
||||
mermaidPreviewOpen = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if clicking on copy or preview button in svg block
|
||||
const svgCopyBtn = target.closest(`.${SVG_WRAPPER_CLASS} .copy-code-btn`);
|
||||
const svgPreviewBtn = target.closest(`.${SVG_WRAPPER_CLASS} .preview-code-btn`);
|
||||
|
||||
if (svgCopyBtn || svgPreviewBtn) {
|
||||
const wrapper = target.closest(`.${SVG_WRAPPER_CLASS}`);
|
||||
if (!wrapper) return;
|
||||
|
||||
const preElement = wrapper.querySelector<HTMLElement>(
|
||||
`pre.${SVG_BLOCK_CLASS}[${SVG_SOURCE_ATTR}]`
|
||||
);
|
||||
if (!preElement) return;
|
||||
|
||||
if (svgCopyBtn) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
try {
|
||||
await copyToClipboard(preElement.getAttribute(SVG_SOURCE_ATTR) ?? '');
|
||||
} catch (error) {
|
||||
console.error('Failed to copy svg source:', error);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
if (svgPreviewBtn) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
mermaidPreviewSvgHtml = sanitizeSvg(preElement.getAttribute(SVG_SOURCE_ATTR) ?? '');
|
||||
svgPreviewLive = false;
|
||||
mermaidPreviewOpen = true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Open preview when clicking the svg block itself. A final block carries its
|
||||
// source, a streaming block does not and is mirrored live into the dialog.
|
||||
const svgEl = target.closest(`.${SVG_BLOCK_CLASS}`);
|
||||
if (svgEl) {
|
||||
const source = svgEl.getAttribute(SVG_SOURCE_ATTR);
|
||||
if (source !== null) {
|
||||
mermaidPreviewSvgHtml = sanitizeSvg(source);
|
||||
svgPreviewLive = false;
|
||||
} else {
|
||||
svgPreviewLive = true;
|
||||
}
|
||||
mermaidPreviewOpen = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Otherwise, open preview when clicking on the mermaid diagram itself
|
||||
const mermaidEl = target.closest('.mermaid');
|
||||
const mermaidEl = target.closest(`.${MERMAID_BLOCK_CLASS}`);
|
||||
if (!mermaidEl) return;
|
||||
|
||||
const svg = mermaidEl.querySelector('svg');
|
||||
if (!svg) return;
|
||||
|
||||
mermaidPreviewSvgHtml = svg.outerHTML;
|
||||
svgPreviewLive = false;
|
||||
mermaidPreviewOpen = true;
|
||||
}
|
||||
|
||||
@@ -515,6 +608,7 @@
|
||||
mermaidPreviewOpen = open;
|
||||
if (!open) {
|
||||
mermaidPreviewSvgHtml = '';
|
||||
svgPreviewLive = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -527,12 +621,14 @@
|
||||
async function renderMermaidDiagrams() {
|
||||
if (!containerRef) return;
|
||||
|
||||
const nodes = containerRef.querySelectorAll('pre.mermaid:not([data-mermaid-rendered])');
|
||||
const nodes = containerRef.querySelectorAll(
|
||||
`pre.${MERMAID_BLOCK_CLASS}:not([${MERMAID_RENDERED_ATTR}])`
|
||||
);
|
||||
if (nodes.length === 0) return;
|
||||
|
||||
// Mark nodes immediately to prevent duplicate renders if called again during streaming.
|
||||
// This avoids needing a guard that would block node discovery.
|
||||
nodes.forEach((node) => node.setAttribute('data-mermaid-rendered', 'true'));
|
||||
nodes.forEach((node) => node.setAttribute(MERMAID_RENDERED_ATTR, 'true'));
|
||||
|
||||
// Read mode before await so Svelte tracks it reactively.
|
||||
const isDark = mode.current === ColorMode.DARK;
|
||||
@@ -565,6 +661,34 @@
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Renders svg diagrams that haven't been rendered yet.
|
||||
* Sanitizes the source before injecting and marks each node so it renders once.
|
||||
* An empty sanitize result keeps the raw source as escaped text.
|
||||
*/
|
||||
function renderSvgDiagrams() {
|
||||
if (!containerRef) return;
|
||||
|
||||
const nodes = containerRef.querySelectorAll<HTMLElement>(
|
||||
`pre.${SVG_BLOCK_CLASS}:not([${SVG_RENDERED_ATTR}])`
|
||||
);
|
||||
if (nodes.length === 0) return;
|
||||
|
||||
nodes.forEach((node) => {
|
||||
node.setAttribute(SVG_RENDERED_ATTR, 'true');
|
||||
|
||||
const source = node.getAttribute(SVG_SOURCE_ATTR) ?? node.textContent ?? '';
|
||||
const clean = sanitizeSvg(source);
|
||||
|
||||
if (clean) {
|
||||
node.textContent = '';
|
||||
const host = document.createElement('div');
|
||||
node.appendChild(host);
|
||||
mountSvgShadow(host, clean, SVG_INLINE_SHADOW_STYLE);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles image load errors by replacing the image with a fallback UI.
|
||||
* Shows a placeholder with a link to open the image in a new tab.
|
||||
@@ -647,6 +771,7 @@
|
||||
setupCodeBlockActions();
|
||||
setupImageErrorHandlers();
|
||||
renderMermaidDiagrams();
|
||||
renderSvgDiagrams();
|
||||
}
|
||||
});
|
||||
|
||||
@@ -689,7 +814,7 @@
|
||||
{/if}
|
||||
|
||||
{#if incompleteCodeBlock}
|
||||
{#if incompleteCodeBlock.language === 'mermaid'}
|
||||
{#if incompleteCodeBlock.language === MERMAID_LANGUAGE}
|
||||
<div class="mermaid-block-wrapper streaming-mermaid-block">
|
||||
<div class="code-block-header">
|
||||
<span class="code-language">mermaid</span>
|
||||
@@ -705,6 +830,30 @@
|
||||
<span class="mermaid-loading-text">Generating diagram...</span>
|
||||
</div>
|
||||
</div>
|
||||
{:else if streamingSvgCode !== null}
|
||||
<div class="svg-block-wrapper streaming-svg-block">
|
||||
<div class="code-block-header">
|
||||
<span class="code-language">svg</span>
|
||||
<div class="code-block-actions">
|
||||
<ActionIconCopyToClipboard
|
||||
text={incompleteCodeBlock.code}
|
||||
canCopy={false}
|
||||
ariaLabel="Diagram incomplete"
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
{#if liveSvgHtml}
|
||||
<div class="svg-scroll-container">
|
||||
<div class={SVG_BLOCK_CLASS}>
|
||||
<div bind:this={streamingSvgHost}></div>
|
||||
</div>
|
||||
</div>
|
||||
{:else}
|
||||
<div class="mermaid-loading-placeholder">
|
||||
<span class="mermaid-loading-text">Rendering svg...</span>
|
||||
</div>
|
||||
{/if}
|
||||
</div>
|
||||
{:else}
|
||||
<div class="code-block-wrapper streaming-code-block relative">
|
||||
<div class="code-block-header">
|
||||
|
||||
@@ -560,8 +560,9 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
border-color: var(--primary);
|
||||
}
|
||||
|
||||
/* Mermaid diagrams */
|
||||
.markdown-content :global(pre.mermaid) {
|
||||
/* Mermaid and svg blocks share the same block styling */
|
||||
.markdown-content :global(pre.mermaid),
|
||||
.markdown-content :global(.svg-block) {
|
||||
background: transparent;
|
||||
border: none;
|
||||
padding: 0;
|
||||
@@ -572,13 +573,25 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
/* The svg block fills its flex container so the shadow host has a definite width to render into */
|
||||
.markdown-content :global(.svg-block) {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/* Hide mermaid code text until rendered - prevents flash */
|
||||
.markdown-content :global(pre.mermaid:not([data-mermaid-rendered])),
|
||||
.markdown-content :global(pre.mermaid[data-mermaid-rendered]:not(:has(svg))) {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.markdown-content :global(pre.mermaid:hover) {
|
||||
/* Hide svg source until rendered - prevents flash. A rendered-but-unsanitized
|
||||
block (oversized source) keeps its raw text visible as a safe fallback. */
|
||||
.markdown-content :global(pre.svg-block:not([data-svg-rendered])) {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.markdown-content :global(pre.mermaid:hover),
|
||||
.markdown-content :global(.svg-block:hover) {
|
||||
opacity: 0.85;
|
||||
}
|
||||
|
||||
@@ -590,8 +603,9 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
padding: 3rem 1rem;
|
||||
}
|
||||
|
||||
/* Mermaid block wrapper - matches code block styling */
|
||||
.markdown-content :global(.mermaid-block-wrapper) {
|
||||
/* Diagram block wrapper - matches code block styling */
|
||||
.markdown-content :global(.mermaid-block-wrapper),
|
||||
.markdown-content :global(.svg-block-wrapper) {
|
||||
margin: 1.5rem 0;
|
||||
border-radius: 0.75rem;
|
||||
overflow: hidden;
|
||||
@@ -603,11 +617,13 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
max-height: var(--max-message-height);
|
||||
}
|
||||
|
||||
.markdown-content:global(.dark) :global(.mermaid-block-wrapper) {
|
||||
.markdown-content:global(.dark) :global(.mermaid-block-wrapper),
|
||||
.markdown-content:global(.dark) :global(.svg-block-wrapper) {
|
||||
border-color: color-mix(in oklch, var(--border) 20%, transparent);
|
||||
}
|
||||
|
||||
.markdown-content :global(.mermaid-scroll-container) {
|
||||
.markdown-content :global(.mermaid-scroll-container),
|
||||
.markdown-content :global(.svg-scroll-container) {
|
||||
min-height: 350px;
|
||||
max-height: var(--max-message-height);
|
||||
overflow-y: auto;
|
||||
@@ -618,17 +634,20 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
padding: 3rem 1rem 1rem;
|
||||
}
|
||||
|
||||
.full-height-code-blocks :global(.mermaid-block-wrapper) {
|
||||
.full-height-code-blocks :global(.mermaid-block-wrapper),
|
||||
.full-height-code-blocks :global(.svg-block-wrapper) {
|
||||
max-height: none;
|
||||
}
|
||||
|
||||
.full-height-code-blocks :global(.mermaid-scroll-container) {
|
||||
.full-height-code-blocks :global(.mermaid-scroll-container),
|
||||
.full-height-code-blocks :global(.svg-scroll-container) {
|
||||
max-height: none;
|
||||
overflow-y: visible;
|
||||
}
|
||||
|
||||
/* Mermaid block uses same header styling as code blocks */
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-header) {
|
||||
/* Diagram block uses same header styling as code blocks */
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-header),
|
||||
.markdown-content :global(.svg-block-wrapper .code-block-header) {
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
@@ -640,14 +659,16 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
right: 0;
|
||||
}
|
||||
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-actions) {
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-actions),
|
||||
.markdown-content :global(.svg-block-wrapper .code-block-actions) {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
/* Mermaid pre element - remove default margins */
|
||||
.markdown-content :global(.mermaid-block-wrapper pre.mermaid) {
|
||||
/* Diagram pre element - remove default margins */
|
||||
.markdown-content :global(.mermaid-block-wrapper pre.mermaid),
|
||||
.markdown-content :global(.svg-block-wrapper pre.svg-block) {
|
||||
background: transparent;
|
||||
border: none;
|
||||
padding: 0;
|
||||
@@ -655,7 +676,6 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
/* Mermaid SVG should be bigger */
|
||||
.markdown-content :global(.mermaid-block-wrapper pre.mermaid svg) {
|
||||
width: unset !important;
|
||||
height: auto;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
*/
|
||||
|
||||
import { copyCodeToClipboard, copyToClipboard } from '$lib/utils';
|
||||
import { MERMAID_WRAPPER_CLASS, MERMAID_BLOCK_CLASS, MERMAID_SYNTAX_ATTR } from '$lib/constants';
|
||||
|
||||
export interface PreviewState {
|
||||
previewDialogOpen: boolean;
|
||||
@@ -106,17 +107,19 @@ export function createHandleMermaidClick(mermaidState: MermaidPreviewState) {
|
||||
const target = event.target as HTMLElement;
|
||||
|
||||
// Check if clicking on copy or preview button in mermaid block
|
||||
const copyBtn = target.closest('.mermaid-block-wrapper .copy-code-btn');
|
||||
const previewBtn = target.closest('.mermaid-block-wrapper .preview-code-btn');
|
||||
const copyBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .copy-code-btn`);
|
||||
const previewBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .preview-code-btn`);
|
||||
|
||||
if (copyBtn || previewBtn) {
|
||||
const wrapper = target.closest('.mermaid-block-wrapper');
|
||||
const wrapper = target.closest(`.${MERMAID_WRAPPER_CLASS}`);
|
||||
if (!wrapper) return;
|
||||
|
||||
const preElement = wrapper.querySelector<HTMLElement>('pre.mermaid[data-mermaid-syntax]');
|
||||
const preElement = wrapper.querySelector<HTMLElement>(
|
||||
`pre.${MERMAID_BLOCK_CLASS}[${MERMAID_SYNTAX_ATTR}]`
|
||||
);
|
||||
if (!preElement) return;
|
||||
|
||||
const mermaidSyntax = preElement.dataset.mermaidSyntax ?? '';
|
||||
const mermaidSyntax = preElement.getAttribute(MERMAID_SYNTAX_ATTR) ?? '';
|
||||
|
||||
if (copyBtn) {
|
||||
event.preventDefault();
|
||||
@@ -141,7 +144,7 @@ export function createHandleMermaidClick(mermaidState: MermaidPreviewState) {
|
||||
}
|
||||
|
||||
// Otherwise, open preview when clicking on the mermaid diagram itself
|
||||
const mermaidEl = target.closest('.mermaid');
|
||||
const mermaidEl = target.closest(`.${MERMAID_BLOCK_CLASS}`);
|
||||
if (!mermaidEl) return;
|
||||
|
||||
const svg = mermaidEl.querySelector('svg');
|
||||
|
||||
+18
-9
@@ -13,7 +13,14 @@
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root, Element, ElementContent } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import { MERMAID_WRAPPER_CLASS, MERMAID_SCROLL_CONTAINER_CLASS } from '$lib/constants';
|
||||
import {
|
||||
MERMAID_WRAPPER_CLASS,
|
||||
MERMAID_SCROLL_CONTAINER_CLASS,
|
||||
MERMAID_BLOCK_CLASS,
|
||||
MERMAID_LANGUAGE,
|
||||
MERMAID_SYNTAX_ATTR,
|
||||
MERMAID_ID_ATTR
|
||||
} from '$lib/constants';
|
||||
import {
|
||||
createBlockHeader,
|
||||
createCopyButton,
|
||||
@@ -43,11 +50,13 @@ export const rehypeEnhanceMermaidBlocks: Plugin<[], Root> = () => {
|
||||
const className = node.properties?.className;
|
||||
if (!Array.isArray(className)) return;
|
||||
|
||||
const isMermaid = className.some((cls) => typeof cls === 'string' && cls === 'mermaid');
|
||||
const isMermaid = className.some(
|
||||
(cls) => typeof cls === 'string' && cls === MERMAID_BLOCK_CLASS
|
||||
);
|
||||
|
||||
if (!isMermaid) return;
|
||||
|
||||
const mermaidId = generateBlockId('mermaid', 'idxMermaidBlock');
|
||||
const mermaidId = generateBlockId(MERMAID_LANGUAGE, 'idxMermaidBlock');
|
||||
|
||||
// Extract the mermaid syntax (text content of the pre element)
|
||||
const diagramText = node.children
|
||||
@@ -60,22 +69,22 @@ export const rehypeEnhanceMermaidBlocks: Plugin<[], Root> = () => {
|
||||
// Store the mermaid syntax in data attribute for copy functionality
|
||||
node.properties = {
|
||||
...node.properties,
|
||||
'data-mermaid-syntax': diagramText,
|
||||
'data-mermaid-id': mermaidId
|
||||
[MERMAID_SYNTAX_ATTR]: diagramText,
|
||||
[MERMAID_ID_ATTR]: mermaidId
|
||||
};
|
||||
|
||||
const actions = [
|
||||
createCopyButton(mermaidId, 'data-mermaid-id', 'Copy mermaid syntax'),
|
||||
createPreviewButton(mermaidId, 'data-mermaid-id', 'Preview diagram')
|
||||
createCopyButton(mermaidId, MERMAID_ID_ATTR, 'Copy mermaid syntax'),
|
||||
createPreviewButton(mermaidId, MERMAID_ID_ATTR, 'Preview diagram')
|
||||
];
|
||||
|
||||
const header = createBlockHeader('mermaid', mermaidId, 'data-mermaid-id', actions);
|
||||
const header = createBlockHeader(MERMAID_LANGUAGE, mermaidId, MERMAID_ID_ATTR, actions);
|
||||
const wrapper = createWrapper(
|
||||
header,
|
||||
node,
|
||||
MERMAID_WRAPPER_CLASS,
|
||||
MERMAID_SCROLL_CONTAINER_CLASS,
|
||||
{ 'data-mermaid-id': mermaidId }
|
||||
{ [MERMAID_ID_ATTR]: mermaidId }
|
||||
);
|
||||
|
||||
// Replace pre with wrapper in parent
|
||||
|
||||
+80
@@ -0,0 +1,80 @@
|
||||
/**
|
||||
* Rehype plugin to enhance svg blocks with wrapper, header, and action buttons.
|
||||
*
|
||||
* Wraps <pre class="svg-block"> elements with a container that includes:
|
||||
* - Language label ("svg")
|
||||
* - Copy button (copies svg source to clipboard)
|
||||
* - Preview button (opens fullscreen preview dialog)
|
||||
*
|
||||
* Operates directly on the HAST tree and reuses the shared code-block builders.
|
||||
*/
|
||||
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root, Element, ElementContent } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import {
|
||||
SVG_WRAPPER_CLASS,
|
||||
SVG_SCROLL_CONTAINER_CLASS,
|
||||
SVG_BLOCK_CLASS,
|
||||
SVG_LANGUAGE,
|
||||
SVG_SOURCE_ATTR,
|
||||
SVG_ID_ATTR
|
||||
} from '$lib/constants';
|
||||
import {
|
||||
createBlockHeader,
|
||||
createCopyButton,
|
||||
createPreviewButton,
|
||||
createWrapper,
|
||||
generateBlockId
|
||||
} from './code-block-utils';
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
idxSvgBlock?: number;
|
||||
}
|
||||
}
|
||||
|
||||
export const rehypeEnhanceSvgBlocks: Plugin<[], Root> = () => {
|
||||
return (tree: Root) => {
|
||||
visit(tree, 'element', (node: Element, index, parent) => {
|
||||
if (node.tagName !== 'pre' || !parent || index === undefined) return;
|
||||
|
||||
const className = node.properties?.className;
|
||||
if (!Array.isArray(className)) return;
|
||||
|
||||
const isSvg = className.some((cls) => typeof cls === 'string' && cls === SVG_BLOCK_CLASS);
|
||||
|
||||
if (!isSvg) return;
|
||||
|
||||
const svgId = generateBlockId(SVG_LANGUAGE, 'idxSvgBlock');
|
||||
|
||||
// Extract the svg source (text content of the pre element)
|
||||
const svgSource = node.children
|
||||
.map((child) => {
|
||||
if (child.type === 'text') return child.value;
|
||||
return '';
|
||||
})
|
||||
.join('');
|
||||
|
||||
// Store the svg source in data attribute for copy and render
|
||||
node.properties = {
|
||||
...node.properties,
|
||||
[SVG_SOURCE_ATTR]: svgSource,
|
||||
[SVG_ID_ATTR]: svgId
|
||||
};
|
||||
|
||||
const actions = [
|
||||
createCopyButton(svgId, SVG_ID_ATTR, 'Copy svg source'),
|
||||
createPreviewButton(svgId, SVG_ID_ATTR, 'Preview svg')
|
||||
];
|
||||
|
||||
const header = createBlockHeader(SVG_LANGUAGE, svgId, SVG_ID_ATTR, actions);
|
||||
const wrapper = createWrapper(header, node, SVG_WRAPPER_CLASS, SVG_SCROLL_CONTAINER_CLASS, {
|
||||
[SVG_ID_ATTR]: svgId
|
||||
});
|
||||
|
||||
// Replace pre with wrapper in parent
|
||||
(parent.children as ElementContent[])[index] = wrapper;
|
||||
});
|
||||
};
|
||||
};
|
||||
+4
-64
@@ -1,67 +1,7 @@
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root, Element, ElementContent, Text } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
import { createPreTransform } from './pre-transform';
|
||||
import { MERMAID_BLOCK_CLASS, MERMAID_LANGUAGE } from '$lib/constants';
|
||||
|
||||
/**
|
||||
* Recursively extracts all text content from a HAST node.
|
||||
* Handles nested elements (e.g., span wrappers from syntax highlighting).
|
||||
* Converts mermaid code blocks to <pre class="mermaid"> for client-side rendering.
|
||||
*/
|
||||
function extractText(node: ElementContent): string {
|
||||
if (node.type === 'text') return node.value;
|
||||
if (node.type === 'element') {
|
||||
return (node.children ?? []).map(extractText).join('');
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Rehype plugin to convert mermaid code blocks to <pre class="mermaid"> elements.
|
||||
*
|
||||
* Transforms:
|
||||
* <pre><code class="language-mermaid">graph TD; A-->B</code></pre>
|
||||
* into:
|
||||
* <pre class="mermaid">graph TD; A-->B</pre>
|
||||
*
|
||||
* The mermaid library renders these client-side via mermaid.run().
|
||||
*
|
||||
* Must run BEFORE rehypeEnhanceCodeBlocks so mermaid blocks are not wrapped
|
||||
* with code block headers/buttons (they have no <code> child, so they're skipped).
|
||||
*/
|
||||
export const rehypeMermaidPre: Plugin<[], Root> = () => {
|
||||
return (tree: Root) => {
|
||||
visit(tree, 'element', (node: Element, index, parent) => {
|
||||
if (node.tagName !== 'pre' || !parent || index === undefined) return;
|
||||
|
||||
const codeElement = node.children.find(
|
||||
(child): child is Element => child.type === 'element' && child.tagName === 'code'
|
||||
);
|
||||
|
||||
if (!codeElement) return;
|
||||
|
||||
const className = codeElement.properties?.className;
|
||||
if (!Array.isArray(className)) return;
|
||||
|
||||
const isMermaid = className.some(
|
||||
(cls) => typeof cls === 'string' && cls === 'language-mermaid'
|
||||
);
|
||||
|
||||
if (!isMermaid) return;
|
||||
|
||||
// Recursively extract text to handle nested spans from syntax highlighting
|
||||
const diagramText = codeElement.children.map(extractText).join('').trim();
|
||||
|
||||
if (!diagramText) return;
|
||||
|
||||
const mermaidPre: Element = {
|
||||
type: 'element',
|
||||
tagName: 'pre',
|
||||
properties: {
|
||||
className: ['mermaid']
|
||||
},
|
||||
children: [{ type: 'text', value: diagramText } as Text]
|
||||
};
|
||||
|
||||
(parent.children as ElementContent[])[index] = mermaidPre;
|
||||
});
|
||||
};
|
||||
};
|
||||
export const rehypeMermaidPre = createPreTransform(MERMAID_LANGUAGE, MERMAID_BLOCK_CLASS);
|
||||
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
import type { Plugin } from 'unified';
|
||||
import type { Root, Element, ElementContent, Text } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
|
||||
/**
|
||||
* Recursively extracts all text content from a HAST node.
|
||||
* Handles nested elements (e.g., span wrappers from syntax highlighting).
|
||||
*/
|
||||
function extractText(node: ElementContent): string {
|
||||
if (node.type === 'text') return node.value;
|
||||
if (node.type === 'element') {
|
||||
return (node.children ?? []).map(extractText).join('');
|
||||
}
|
||||
return '';
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a rehype plugin that converts <pre><code class="language-{language}">
|
||||
* blocks into <pre class="{targetClass}"> elements carrying the raw text.
|
||||
*
|
||||
* Accepts one or more source languages, and an optional contentGuard that
|
||||
* receives the trimmed text and decides whether the block qualifies. The guard
|
||||
* lets a shared fence language be claimed only when its content matches, e.g.
|
||||
* an xml block is converted to svg only when it starts with <svg.
|
||||
*
|
||||
* The result has no <code> child, so rehypeEnhanceCodeBlocks skips it. Rendering
|
||||
* happens client-side, so no markup is injected at this stage. Must run BEFORE
|
||||
* rehypeEnhanceCodeBlocks.
|
||||
*/
|
||||
export function createPreTransform(
|
||||
languages: string | string[],
|
||||
targetClass: string,
|
||||
contentGuard?: (text: string) => boolean
|
||||
): Plugin<[], Root> {
|
||||
const codeClasses = (Array.isArray(languages) ? languages : [languages]).map(
|
||||
(language) => `language-${language}`
|
||||
);
|
||||
|
||||
return () => {
|
||||
return (tree: Root) => {
|
||||
visit(tree, 'element', (node: Element, index, parent) => {
|
||||
if (node.tagName !== 'pre' || !parent || index === undefined) return;
|
||||
|
||||
const codeElement = node.children.find(
|
||||
(child): child is Element => child.type === 'element' && child.tagName === 'code'
|
||||
);
|
||||
|
||||
if (!codeElement) return;
|
||||
|
||||
const className = codeElement.properties?.className;
|
||||
if (!Array.isArray(className)) return;
|
||||
|
||||
const matches = className.some(
|
||||
(cls) => typeof cls === 'string' && codeClasses.includes(cls)
|
||||
);
|
||||
|
||||
if (!matches) return;
|
||||
|
||||
// Recursively extract text to handle nested spans from syntax highlighting
|
||||
const text = codeElement.children.map(extractText).join('').trim();
|
||||
|
||||
if (!text) return;
|
||||
|
||||
if (contentGuard && !contentGuard(text)) return;
|
||||
|
||||
const pre: Element = {
|
||||
type: 'element',
|
||||
tagName: 'pre',
|
||||
properties: {
|
||||
className: [targetClass]
|
||||
},
|
||||
children: [{ type: 'text', value: text } as Text]
|
||||
};
|
||||
|
||||
(parent.children as ElementContent[])[index] = pre;
|
||||
});
|
||||
};
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
import { createPreTransform } from './pre-transform';
|
||||
import { SVG_BLOCK_CLASS, SVG_LANGUAGE, XML_LANGUAGE, SVG_TAG_PREFIX } from '$lib/constants';
|
||||
|
||||
/**
|
||||
* Converts svg code blocks to <pre class="svg-block"> for client-side rendering.
|
||||
* Also claims xml blocks whose content starts with <svg, since models often emit
|
||||
* svg inside an xml fence.
|
||||
*/
|
||||
export const rehypeSvgPre = createPreTransform(
|
||||
[SVG_LANGUAGE, XML_LANGUAGE],
|
||||
SVG_BLOCK_CLASS,
|
||||
(text) => text.startsWith(SVG_TAG_PREFIX)
|
||||
);
|
||||
@@ -1,5 +1,7 @@
|
||||
<script lang="ts">
|
||||
import MermaidPreviewControls from './MermaidPreviewControls.svelte';
|
||||
import { mountSvgShadow } from '$lib/utils/svg-shadow';
|
||||
import { SVG_DIALOG_SHADOW_STYLE } from '$lib/constants';
|
||||
|
||||
interface Props {
|
||||
svgHtml: string;
|
||||
@@ -7,6 +9,13 @@
|
||||
|
||||
let { svgHtml }: Props = $props();
|
||||
|
||||
let svgHost = $state<HTMLDivElement | null>(null);
|
||||
|
||||
// Re-mount on every svgHtml change so a live streaming svg keeps rendering while zoomed
|
||||
$effect(() => {
|
||||
if (svgHost) mountSvgShadow(svgHost, svgHtml, SVG_DIALOG_SHADOW_STYLE);
|
||||
});
|
||||
|
||||
// Zoom and pan state
|
||||
let scale = $state(1);
|
||||
let translateX = $state(0);
|
||||
@@ -99,8 +108,7 @@
|
||||
onpointerup={handlePointerUp}
|
||||
onpointerleave={handlePointerUp}
|
||||
>
|
||||
<!-- eslint-disable-next-line no-at-html-tags -->
|
||||
{@html svgHtml}
|
||||
<div bind:this={svgHost}></div>
|
||||
</div>
|
||||
|
||||
<MermaidPreviewControls
|
||||
@@ -111,16 +119,3 @@
|
||||
onResetView={resetView}
|
||||
/>
|
||||
</div>
|
||||
|
||||
<style lang="postcss" scoped>
|
||||
/* Styles for SVGs rendered via {@html} — no Tailwind class can target child elements */
|
||||
.mermaid-preview-diagram :global(svg) {
|
||||
min-height: min(50vh, 12rem);
|
||||
min-width: min(80vw, 20rem);
|
||||
max-width: none !important;
|
||||
max-height: none !important;
|
||||
height: auto !important;
|
||||
width: auto !important;
|
||||
display: block;
|
||||
}
|
||||
</style>
|
||||
|
||||
@@ -29,6 +29,7 @@ export * from './latex-protection';
|
||||
export * from './literal-html';
|
||||
export * from './markdown';
|
||||
export * from './mermaid-blocks';
|
||||
export * from './svg-blocks';
|
||||
export * from './max-bundle-size';
|
||||
export * from './mcp';
|
||||
export * from './mcp-form';
|
||||
|
||||
@@ -1,2 +1,9 @@
|
||||
export const MERMAID_WRAPPER_CLASS = 'mermaid-block-wrapper';
|
||||
export const MERMAID_SCROLL_CONTAINER_CLASS = 'mermaid-scroll-container';
|
||||
export const MERMAID_BLOCK_CLASS = 'mermaid';
|
||||
|
||||
export const MERMAID_LANGUAGE = 'mermaid';
|
||||
|
||||
export const MERMAID_SYNTAX_ATTR = 'data-mermaid-syntax';
|
||||
export const MERMAID_ID_ATTR = 'data-mermaid-id';
|
||||
export const MERMAID_RENDERED_ATTR = 'data-mermaid-rendered';
|
||||
|
||||
@@ -0,0 +1,49 @@
|
||||
export const SVG_WRAPPER_CLASS = 'svg-block-wrapper';
|
||||
export const SVG_SCROLL_CONTAINER_CLASS = 'svg-scroll-container';
|
||||
export const SVG_BLOCK_CLASS = 'svg-block';
|
||||
|
||||
export const SVG_LANGUAGE = 'svg';
|
||||
export const XML_LANGUAGE = 'xml';
|
||||
export const SVG_TAG_PREFIX = '<svg';
|
||||
|
||||
export const SVG_SOURCE_ATTR = 'data-svg-source';
|
||||
export const SVG_ID_ATTR = 'data-svg-id';
|
||||
export const SVG_RENDERED_ATTR = 'data-svg-rendered';
|
||||
|
||||
/**
|
||||
* Hard size ceiling for a single inline svg block.
|
||||
* Above this the source is left as raw text instead of being rendered.
|
||||
*/
|
||||
export const SVG_MAX_BYTES = 256 * 1024;
|
||||
|
||||
/**
|
||||
* DOMPurify config for untrusted svg coming from model output.
|
||||
*
|
||||
* foreignObject and script stay forbidden unconditionally, they are the only
|
||||
* inline svg vectors that execute arbitrary html or js. Everything else is
|
||||
* allowed for maximum rendering compatibility: href and xlink:href stay so
|
||||
* use, image, a and animateMotion work, and DOMPurify still neutralizes
|
||||
* javascript: and data: uri schemes natively. External resource refs are
|
||||
* allowed by design on a local first tool, the user browser fetches them.
|
||||
*
|
||||
* The sanitized svg is always mounted inside a shadow root (see svg-shadow),
|
||||
* so an author <style> stays scoped to that root and can not reach the page.
|
||||
*/
|
||||
export const SVG_SANITIZE_CONFIG = {
|
||||
USE_PROFILES: { svg: true, svgFilters: true },
|
||||
FORBID_TAGS: ['foreignObject', 'script']
|
||||
};
|
||||
|
||||
/**
|
||||
* Shadow root style for an inline svg block. Mirrors the centered, padded
|
||||
* sizing the light dom used before the svg moved behind a shadow boundary.
|
||||
*/
|
||||
export const SVG_INLINE_SHADOW_STYLE =
|
||||
':host{display:block;width:100%;text-align:center}svg{display:block;margin:0 auto;width:auto;height:auto;max-width:100%;max-height:70vh;min-height:8rem;padding:3rem 1rem}';
|
||||
|
||||
/**
|
||||
* Shadow root style for the zoom dialog svg. Lets the svg grow past its
|
||||
* intrinsic size so pan and zoom have room to work.
|
||||
*/
|
||||
export const SVG_DIALOG_SHADOW_STYLE =
|
||||
':host{display:inline-block}svg{min-height:min(50vh,12rem);min-width:min(80vw,20rem);max-width:none;max-height:none;height:auto;width:auto;display:block}';
|
||||
@@ -0,0 +1,22 @@
|
||||
import DOMPurify from 'dompurify';
|
||||
import { SVG_MAX_BYTES, SVG_SANITIZE_CONFIG, SVG_TAG_PREFIX } from '$lib/constants';
|
||||
|
||||
/**
|
||||
* Sanitizes a raw svg string for safe inline rendering.
|
||||
* Returns the cleaned svg markup, or an empty string when the input is not a
|
||||
* usable svg, exceeds the size ceiling, or sanitizes to nothing. An empty
|
||||
* return tells the caller to keep the raw code block instead of rendering.
|
||||
*/
|
||||
export function sanitizeSvg(source: string): string {
|
||||
const trimmed = source.trim();
|
||||
|
||||
if (!trimmed || trimmed.length > SVG_MAX_BYTES) return '';
|
||||
|
||||
if (!trimmed.startsWith(SVG_TAG_PREFIX)) return '';
|
||||
|
||||
const clean = DOMPurify.sanitize(trimmed, SVG_SANITIZE_CONFIG) as unknown as string;
|
||||
|
||||
if (!clean || !clean.includes(SVG_TAG_PREFIX)) return '';
|
||||
|
||||
return clean;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
/**
|
||||
* Mounts svg markup inside an open shadow root on the host element.
|
||||
* The shadow boundary scopes the svg <style> and its animations to the host,
|
||||
* so model authored css can not reach the surrounding page. The caller passes
|
||||
* markup that is already sanitized, this only isolates and sizes it.
|
||||
*/
|
||||
export function mountSvgShadow(host: HTMLElement, markup: string, style: string): void {
|
||||
const root = host.shadowRoot ?? host.attachShadow({ mode: 'open' });
|
||||
root.innerHTML = markup ? `<style>${style}</style>${markup}` : '';
|
||||
}
|
||||
Reference in New Issue
Block a user