mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-17 02:57:39 +02:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 74ade52741 | |||
| c1304d7b28 | |||
| 02810c7aa8 | |||
| a1824902b5 | |||
| 32120c10e3 | |||
| d5fb104293 | |||
| 635b65ad7a |
+62
-2
@@ -140,6 +140,8 @@ struct common_speculative_impl {
|
||||
size_t n_gen_tokens = 0; // number of tokens generated by this implementation.
|
||||
size_t n_acc_tokens = 0; // number of tokens accepted by the target model.
|
||||
|
||||
std::vector<size_t> n_acc_tokens_per_pos; // number of tokens accepted per draft position.
|
||||
|
||||
// TODO: track performance of most recent calls
|
||||
const bool gen_perf = true; // whether to generate performance stats.
|
||||
|
||||
@@ -416,6 +418,9 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
|
||||
|
||||
std::vector<common_sampler_ptr> smpls;
|
||||
|
||||
// backend sampler chain per seq, attached to ctx_dft
|
||||
std::vector<llama_sampler *> backend_chains;
|
||||
|
||||
int32_t n_embd_dec = 0; // draft hidden size
|
||||
int32_t n_embd_enc = 0; // target_layer_ids_n * target_hidden_size
|
||||
int32_t n_embd_tgt = 0; // target model hidden size
|
||||
@@ -441,7 +446,7 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
|
||||
, params(params.draft)
|
||||
{
|
||||
LOG_INF("%s: adding speculative implementation 'draft-eagle3'\n", __func__);
|
||||
LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min);
|
||||
LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f, backend_sampling=%d\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min, (int) params.draft.backend_sampling);
|
||||
|
||||
auto * ctx_tgt = this->params.ctx_tgt;
|
||||
auto * ctx_dft = this->params.ctx_dft;
|
||||
@@ -476,6 +481,22 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
|
||||
s.reset(common_sampler_init(llama_get_model(ctx_dft), sparams));
|
||||
}
|
||||
|
||||
// offload draft sampling to the backend
|
||||
backend_chains.assign(n_seq, nullptr);
|
||||
if (this->params.backend_sampling) {
|
||||
for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) n_seq; ++seq_id) {
|
||||
llama_sampler * chain = llama_sampler_chain_init(llama_sampler_chain_default_params());
|
||||
llama_sampler_chain_add(chain, llama_sampler_init_top_k(10));
|
||||
|
||||
if (!llama_set_sampler(ctx_dft, seq_id, chain)) {
|
||||
LOG_WRN("%s: backend offload failed for seq_id=%d; using CPU sampler\n", __func__, (int) seq_id);
|
||||
llama_sampler_free(chain);
|
||||
chain = nullptr;
|
||||
}
|
||||
backend_chains[seq_id] = chain;
|
||||
}
|
||||
}
|
||||
|
||||
// turn on extraction of the target layers' input embeddings
|
||||
for (uint32_t k = 0; k < target_layer_ids_n; ++k) {
|
||||
llama_set_embeddings_layer_inp(ctx_tgt, (uint32_t) target_layer_ids[k], true);
|
||||
@@ -494,6 +515,18 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
|
||||
}
|
||||
|
||||
~common_speculative_impl_draft_eagle3() override {
|
||||
auto * ctx_dft = this->params.ctx_dft;
|
||||
for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) backend_chains.size(); ++seq_id) {
|
||||
if (backend_chains[seq_id] == nullptr) {
|
||||
continue;
|
||||
}
|
||||
if (ctx_dft) {
|
||||
llama_set_sampler(ctx_dft, seq_id, nullptr);
|
||||
}
|
||||
llama_sampler_free(backend_chains[seq_id]);
|
||||
}
|
||||
backend_chains.clear();
|
||||
|
||||
if (batch.token != nullptr) {
|
||||
free(batch.token);
|
||||
batch.token = nullptr;
|
||||
@@ -2059,6 +2092,15 @@ void common_speculative_accept(common_speculative * spec, llama_seq_id seq_id, u
|
||||
|
||||
{
|
||||
common_time_meas tm(impl->t_accept_us, !impl->gen_perf);
|
||||
|
||||
if (impl->n_acc_tokens_per_pos.size() < n_accepted) {
|
||||
impl->n_acc_tokens_per_pos.resize(n_accepted, 0);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < n_accepted; ++i) {
|
||||
impl->n_acc_tokens_per_pos[i]++;
|
||||
}
|
||||
|
||||
if (n_accepted > 0) {
|
||||
impl->n_acc_drafts++;
|
||||
impl->n_acc_tokens += n_accepted;
|
||||
@@ -2093,13 +2135,31 @@ void common_speculative_print_stats(const common_speculative * spec) {
|
||||
str_perf = "";
|
||||
}
|
||||
|
||||
LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s\n",
|
||||
std::string str_stats;
|
||||
if (impl->n_call_accept > 0) {
|
||||
const double mean =
|
||||
1.0 + (double) impl->n_acc_tokens / (double) impl->n_call_accept;
|
||||
std::ostringstream tmp;
|
||||
tmp << std::fixed << std::setprecision(3);
|
||||
for (size_t i = 0; i < impl->n_acc_tokens_per_pos.size(); ++i) {
|
||||
if (i > 0) {
|
||||
tmp << ", ";
|
||||
}
|
||||
tmp << (double) impl->n_acc_tokens_per_pos[i] / (double) impl->n_call_accept;
|
||||
}
|
||||
std::ostringstream oss;
|
||||
oss << std::fixed << std::setprecision(2) << mean;
|
||||
str_stats = ", #mean acc len = " + oss.str() + ", #acc rate/pos = (" + tmp.str() + ")";
|
||||
}
|
||||
|
||||
LOG_INF("statistics %16s: #calls(b,g,a) = %4zu %6zu %6zu, #gen drafts = %6zu, #acc drafts = %5zu, #gen tokens = %6zu, #acc tokens = %5zu%s%s\n",
|
||||
common_speculative_type_to_str(impl->type).c_str(),
|
||||
impl->n_call_begin, impl->n_call_draft, impl->n_call_accept,
|
||||
impl->n_gen_drafts,
|
||||
impl->n_acc_drafts,
|
||||
impl->n_gen_tokens,
|
||||
impl->n_acc_tokens,
|
||||
str_stats.c_str(),
|
||||
str_perf.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -911,8 +911,8 @@ struct vk_device_struct {
|
||||
vk_pipeline pipeline_pool2d_f32;
|
||||
vk_pipeline pipeline_rwkv_wkv6_f32;
|
||||
vk_pipeline pipeline_rwkv_wkv7_f32;
|
||||
// [size_idx][kda] where size_idx: 0=d32, 1=d64, 2=d128
|
||||
vk_pipeline pipeline_gated_delta_net[3][2];
|
||||
// [size_idx][kda] where size_idx: 0=d16, 1=d32, 2=d64, 3=d128
|
||||
vk_pipeline pipeline_gated_delta_net[4][2];
|
||||
vk_pipeline pipeline_ssm_scan_f32_d128;
|
||||
vk_pipeline pipeline_ssm_scan_f32_d256;
|
||||
vk_pipeline pipeline_ssm_conv_f32;
|
||||
@@ -3080,8 +3080,10 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal});
|
||||
} else if (device->uma) {
|
||||
// Fall back to host memory type
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
// On UMA, prefer host-visible memory so direct tensor borrowing works.
|
||||
// If unavailable, fall back to device-local memory.
|
||||
buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
|
||||
vk::MemoryPropertyFlagBits::eDeviceLocal,
|
||||
vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
|
||||
} else if (device->disable_host_visible_vidmem) {
|
||||
if (device->allow_sysmem_fallback) {
|
||||
@@ -5231,14 +5233,14 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_rwkv_wkv7_f32, "rwkv_wkv7_f32", rwkv_wkv7_f32_len, rwkv_wkv7_f32_data, "main", 8, sizeof(vk_op_rwkv_wkv7_push_constants), {1, 1, 1}, {device->subgroup_size}, 1);
|
||||
|
||||
{
|
||||
const uint32_t gdn_sizes[] = {32, 64, 128};
|
||||
const uint32_t gdn_sizes[] = {16, 32, 64, 128};
|
||||
const char * gdn_names[][2] = {
|
||||
{"gated_delta_net_f32_d16", "gated_delta_net_f32_d16_kda"},
|
||||
{"gated_delta_net_f32_d32", "gated_delta_net_f32_d32_kda"},
|
||||
{"gated_delta_net_f32_d64", "gated_delta_net_f32_d64_kda"},
|
||||
{"gated_delta_net_f32_d128", "gated_delta_net_f32_d128_kda"},
|
||||
};
|
||||
const bool use_subgroup_reduce = device->subgroup_arithmetic;
|
||||
for (uint32_t si = 0; si < 3; si++) {
|
||||
for (uint32_t si = 0; si < 4; si++) {
|
||||
const uint32_t S_V = gdn_sizes[si];
|
||||
GGML_ASSERT(is_pow2(S_V));
|
||||
|
||||
@@ -5252,10 +5254,29 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
lanes_per_column = std::min(S_V, device->subgroup_size);
|
||||
}
|
||||
|
||||
const bool need_clustered_shader = lanes_per_column != 1 && (lanes_per_column < device->subgroup_size);
|
||||
// gated_delta_net.comp relies on S_V % COLS_PER_WG == 0 and
|
||||
// S_V % LANES_PER_COLUMN == 0 to avoid bounds checks.
|
||||
while (lanes_per_column > 1u) {
|
||||
const bool valid_lanes = (device->subgroup_size % lanes_per_column) == 0 &&
|
||||
(S_V % lanes_per_column) == 0;
|
||||
const uint32_t cols_per_wg = valid_lanes ? device->subgroup_size / lanes_per_column : 0;
|
||||
if (valid_lanes && cols_per_wg > 0 && (S_V % cols_per_wg) == 0) {
|
||||
break;
|
||||
}
|
||||
lanes_per_column >>= 1u;
|
||||
}
|
||||
|
||||
GGML_ASSERT((device->subgroup_size % lanes_per_column) == 0);
|
||||
GGML_ASSERT((S_V % lanes_per_column) == 0);
|
||||
GGML_ASSERT((S_V % (device->subgroup_size / lanes_per_column)) == 0);
|
||||
|
||||
const bool need_partial_subgroup_reduce = lanes_per_column != 1u && lanes_per_column < device->subgroup_size;
|
||||
const bool use_clustered_reduce = device->subgroup_arithmetic && device->subgroup_clustered && need_partial_subgroup_reduce;
|
||||
const bool use_subgroup_reduce = device->subgroup_arithmetic && !need_partial_subgroup_reduce;
|
||||
const bool use_subgroup_ops = use_clustered_reduce || use_subgroup_reduce;
|
||||
size_t gdn_len;
|
||||
const void * gdn_data;
|
||||
if (use_subgroup_reduce && need_clustered_shader) {
|
||||
if (use_clustered_reduce) {
|
||||
gdn_len = gated_delta_net_f32_len;
|
||||
gdn_data = (const void *)gated_delta_net_f32_data;
|
||||
} else if (use_subgroup_reduce) {
|
||||
@@ -5272,7 +5293,7 @@ static void ggml_vk_load_shaders(vk_device& device, vk_pipeline requested) {
|
||||
for (uint32_t kda = 0; kda < 2; kda++) {
|
||||
ggml_vk_create_pipeline(device, device->pipeline_gated_delta_net[si][kda],
|
||||
gdn_names[si][kda], gdn_len, gdn_data, "main", 7, sizeof(vk_op_gated_delta_net_push_constants),
|
||||
wg_denoms, {S_V, kda, device->subgroup_size, lanes_per_column}, 1, true, use_subgroup_reduce, device->subgroup_size);
|
||||
wg_denoms, {S_V, kda, device->subgroup_size, lanes_per_column}, 1, true, use_subgroup_ops, device->subgroup_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -10746,9 +10767,10 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
||||
const uint32_t kda = (dst->src[3]->ne[0] == (int64_t)S_v) ? 1 : 0;
|
||||
uint32_t si;
|
||||
switch (S_v) {
|
||||
case 32: si = 0; break;
|
||||
case 64: si = 1; break;
|
||||
case 128: si = 2; break;
|
||||
case 16: si = 0; break;
|
||||
case 32: si = 1; break;
|
||||
case 64: si = 2; break;
|
||||
case 128: si = 3; break;
|
||||
default: return nullptr;
|
||||
}
|
||||
return ctx->device->pipeline_gated_delta_net[si][kda];
|
||||
@@ -17193,7 +17215,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
||||
case GGML_OP_GATED_DELTA_NET:
|
||||
{
|
||||
const uint32_t S_v = op->src[2]->ne[0];
|
||||
if (S_v != 32 && S_v != 64 && S_v != 128) {
|
||||
if (S_v != 16 && S_v != 32 && S_v != 64 && S_v != 128) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < 6; i++) {
|
||||
|
||||
+58
-45
@@ -1088,6 +1088,10 @@ ggml_tensor * llm_graph_context::build_lora_mm(
|
||||
ggml_tensor * w_s) const {
|
||||
ggml_tensor * res = ggml_mul_mat(ctx0, w, cur);
|
||||
|
||||
if (w_s) {
|
||||
res = ggml_mul(ctx0, res, w_s);
|
||||
}
|
||||
|
||||
for (const auto & lora : *loras) {
|
||||
llama_adapter_lora_weight * lw = lora.first->get_weight(w);
|
||||
if (lw == nullptr) {
|
||||
@@ -1106,18 +1110,24 @@ ggml_tensor * llm_graph_context::build_lora_mm(
|
||||
res = ggml_add(ctx0, res, ab_cur);
|
||||
}
|
||||
|
||||
if (w_s) {
|
||||
res = ggml_mul(ctx0, res, w_s);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
ggml_tensor * llm_graph_context::build_lora_mm_id(
|
||||
ggml_tensor * w, // ggml_tensor * as
|
||||
ggml_tensor * cur, // ggml_tensor * b
|
||||
ggml_tensor * ids) const {
|
||||
ggml_tensor * ids,
|
||||
ggml_tensor * w_s) const {
|
||||
ggml_tensor * res = ggml_mul_mat_id(ctx0, w, cur, ids);
|
||||
|
||||
if (w_s) {
|
||||
const int64_t n_expert = w_s->ne[0];
|
||||
const int64_t n_tokens = cur->ne[2];
|
||||
ggml_tensor * s = ggml_reshape_3d(ctx0, w_s, 1, n_expert, 1);
|
||||
s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
|
||||
s = ggml_get_rows(ctx0, s, ids);
|
||||
res = ggml_mul(ctx0, res, s);
|
||||
}
|
||||
for (const auto & lora : *loras) {
|
||||
llama_adapter_lora_weight * lw = lora.first->get_weight(w);
|
||||
if (lw == nullptr) {
|
||||
@@ -1269,6 +1279,29 @@ ggml_tensor * llm_graph_context::build_ffn(
|
||||
llm_ffn_op_type type_op,
|
||||
llm_ffn_gate_type type_gate,
|
||||
int il) const {
|
||||
// NVFP4 support is currently restricted to
|
||||
// 1) LORA absence (*_s would be applied after LORA residual, which is incorrect)
|
||||
// 2) bias absense (*_s would be applied after bias addition, which is incorrect)
|
||||
// TODO: disambiguate LLM-architectural scales (which use *_s) from NVFP4 scale_2 (which also uses *_s currently)
|
||||
auto has_lora = [this](ggml_tensor * w) {
|
||||
if (!w) {
|
||||
return false;
|
||||
}
|
||||
for (const auto & lora : *loras) {
|
||||
if (lora.first->get_weight(w) != nullptr) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
GGML_ASSERT(!up_s || !up_b || !up || up->type != GGML_TYPE_NVFP4);
|
||||
GGML_ASSERT(!gate_s || !gate_b || !gate || gate->type != GGML_TYPE_NVFP4);
|
||||
GGML_ASSERT(!down_s || !down_b || !down || down->type != GGML_TYPE_NVFP4);
|
||||
GGML_ASSERT(!up_s || !up || up->type != GGML_TYPE_NVFP4 || !has_lora(up));
|
||||
GGML_ASSERT(!gate_s || !gate || gate->type != GGML_TYPE_NVFP4 || !has_lora(gate));
|
||||
GGML_ASSERT(!down_s || !down || down->type != GGML_TYPE_NVFP4 || !has_lora(down));
|
||||
|
||||
ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur;
|
||||
cb(tmp, "ffn_up", il);
|
||||
|
||||
@@ -1627,23 +1660,18 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
||||
|
||||
if (gate_up_exps) {
|
||||
// merged gate_up path: one mul_mat_id, then split into gate and up views
|
||||
ggml_tensor * gate_up = build_lora_mm_id(gate_up_exps, cur, selected_experts); // [n_ff*2, n_expert_used, n_tokens]
|
||||
ggml_tensor * gate_up = build_lora_mm_id(gate_up_exps, cur, selected_experts, up_exps_s); // [n_ff*2, n_expert_used, n_tokens]
|
||||
cb(gate_up, "ffn_moe_gate_up", il);
|
||||
|
||||
if (up_exps_s) {
|
||||
cb(gate_up, "ffn_moe_gate_up_scaled", il);
|
||||
}
|
||||
|
||||
if (gate_up_exps_b) {
|
||||
gate_up = ggml_add_id(ctx0, gate_up, gate_up_exps_b, selected_experts);
|
||||
cb(gate_up, "ffn_moe_gate_up_biased", il);
|
||||
}
|
||||
|
||||
// apply per-expert scale2 to merged gate_up (use up_exps_s since gate and up are fused)
|
||||
if (up_exps_s) {
|
||||
ggml_tensor * s = ggml_reshape_3d(ctx0, up_exps_s, 1, n_expert, 1);
|
||||
s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
|
||||
s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
|
||||
gate_up = ggml_mul(ctx0, gate_up, s);
|
||||
cb(gate_up, "ffn_moe_gate_up_scaled", il);
|
||||
}
|
||||
|
||||
const int64_t n_ff = gate_up->ne[0] / 2;
|
||||
cur = ggml_view_3d(ctx0, gate_up, n_ff, gate_up->ne[1], gate_up->ne[2], gate_up->nb[1], gate_up->nb[2], 0);
|
||||
cb(cur, "ffn_moe_gate", il);
|
||||
@@ -1651,43 +1679,33 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
||||
cb(up, "ffn_moe_up", il);
|
||||
} else {
|
||||
// separate gate and up path
|
||||
up = build_lora_mm_id(up_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]
|
||||
up = build_lora_mm_id(up_exps, cur, selected_experts, up_exps_s); // [n_ff, n_expert_used, n_tokens]
|
||||
cb(up, "ffn_moe_up", il);
|
||||
|
||||
if (up_exps_s) {
|
||||
cb(up, "ffn_moe_up_scaled", il);
|
||||
}
|
||||
|
||||
if (up_exps_b) {
|
||||
up = ggml_add_id(ctx0, up, up_exps_b, selected_experts);
|
||||
cb(up, "ffn_moe_up_biased", il);
|
||||
}
|
||||
|
||||
// apply per-expert scale2 to up
|
||||
if (up_exps_s) {
|
||||
ggml_tensor * s = ggml_reshape_3d(ctx0, up_exps_s, 1, n_expert, 1);
|
||||
s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
|
||||
s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
|
||||
up = ggml_mul(ctx0, up, s);
|
||||
cb(up, "ffn_moe_up_scaled", il);
|
||||
}
|
||||
|
||||
if (gate_exps) {
|
||||
cur = build_lora_mm_id(gate_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]
|
||||
cur = build_lora_mm_id(gate_exps, cur, selected_experts, gate_exps_s); // [n_ff, n_expert_used, n_tokens]
|
||||
cb(cur, "ffn_moe_gate", il);
|
||||
} else {
|
||||
cur = up;
|
||||
}
|
||||
|
||||
if (gate_exps_s) {
|
||||
cb(cur, "ffn_moe_gate_scaled", il);
|
||||
}
|
||||
|
||||
if (gate_exps_b) {
|
||||
cur = ggml_add_id(ctx0, cur, gate_exps_b, selected_experts);
|
||||
cb(cur, "ffn_moe_gate_biased", il);
|
||||
}
|
||||
|
||||
// apply per-expert scale2 to gate
|
||||
if (gate_exps_s) {
|
||||
ggml_tensor * s = ggml_reshape_3d(ctx0, gate_exps_s, 1, n_expert, 1);
|
||||
s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
|
||||
s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
|
||||
cur = ggml_mul(ctx0, cur, s);
|
||||
cb(cur, "ffn_moe_gate_scaled", il);
|
||||
}
|
||||
}
|
||||
|
||||
const bool has_gate = gate_exps || gate_up_exps;
|
||||
@@ -1759,23 +1777,18 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
|
||||
GGML_ABORT("fatal error");
|
||||
}
|
||||
|
||||
experts = build_lora_mm_id(down_exps, cur, selected_experts); // [n_embd, n_expert_used, n_tokens]
|
||||
experts = build_lora_mm_id(down_exps, cur, selected_experts, down_exps_s); // [n_embd, n_expert_used, n_tokens]
|
||||
cb(experts, "ffn_moe_down", il);
|
||||
|
||||
if (down_exps_s) {
|
||||
cb(experts, "ffn_moe_down_scaled", il);
|
||||
}
|
||||
|
||||
if (down_exps_b) {
|
||||
experts = ggml_add_id(ctx0, experts, down_exps_b, selected_experts);
|
||||
cb(experts, "ffn_moe_down_biased", il);
|
||||
}
|
||||
|
||||
// apply per-expert scale2 to down
|
||||
if (down_exps_s) {
|
||||
ggml_tensor * s = ggml_reshape_3d(ctx0, down_exps_s, 1, n_expert, 1);
|
||||
s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
|
||||
s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
|
||||
experts = ggml_mul(ctx0, experts, s);
|
||||
cb(experts, "ffn_moe_down_scaled", il);
|
||||
}
|
||||
|
||||
if (!weight_before_ffn) {
|
||||
experts = ggml_mul(ctx0, experts, weights);
|
||||
cb(experts, "ffn_moe_weighted", il);
|
||||
|
||||
+3
-2
@@ -853,11 +853,12 @@ struct llm_graph_context {
|
||||
ggml_tensor * cur,
|
||||
ggml_tensor * w_s = nullptr) const;
|
||||
|
||||
// do mat_mul_id, while optionally apply lora
|
||||
// do mat_mul_id, while optionally apply lora and per-expert scale
|
||||
ggml_tensor * build_lora_mm_id(
|
||||
ggml_tensor * w, // ggml_tensor * as
|
||||
ggml_tensor * cur, // ggml_tensor * b
|
||||
ggml_tensor * ids) const;
|
||||
ggml_tensor * ids,
|
||||
ggml_tensor * w_s = nullptr) const;
|
||||
|
||||
ggml_tensor * build_norm(
|
||||
ggml_tensor * cur,
|
||||
|
||||
@@ -201,6 +201,8 @@ struct server_slot {
|
||||
// Speculative decoding stats
|
||||
int32_t n_draft_total = 0; // Total draft tokens generated
|
||||
int32_t n_draft_accepted = 0; // Draft tokens actually accepted
|
||||
int32_t n_draft_verif_steps = 0; // Total draft token verification steps by the target model
|
||||
std::vector<int32_t> n_accepted_per_pos; // Accepted tokens per draft position
|
||||
|
||||
void reset() {
|
||||
SLT_DBG(*this, "%s", "\n");
|
||||
@@ -227,6 +229,8 @@ struct server_slot {
|
||||
// clear speculative decoding stats
|
||||
n_draft_total = 0;
|
||||
n_draft_accepted = 0;
|
||||
n_draft_verif_steps = 0;
|
||||
n_accepted_per_pos.clear();
|
||||
|
||||
task_prev = std::move(task);
|
||||
task.reset();
|
||||
@@ -509,10 +513,22 @@ struct server_slot {
|
||||
llama_perf_context(ctx_tgt).n_reused);
|
||||
|
||||
if (n_draft_total > 0) {
|
||||
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
|
||||
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
|
||||
const double mean_acc_len = n_draft_verif_steps > 0 ? 1.0 + (double) n_draft_accepted / (double) n_draft_verif_steps : 1.0;
|
||||
|
||||
std::string acceptance_rates_per_pos;
|
||||
if (n_draft_verif_steps > 0) {
|
||||
for (size_t i = 0; i < n_accepted_per_pos.size(); ++i) {
|
||||
if (i > 0) {
|
||||
acceptance_rates_per_pos += ", ";
|
||||
}
|
||||
acceptance_rates_per_pos += string_format("%.3f", (double) n_accepted_per_pos[i] / (double) n_draft_verif_steps);
|
||||
}
|
||||
}
|
||||
|
||||
SLT_INF(*this,
|
||||
"draft acceptance = %0.5f (%5d accepted / %5d generated)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total);
|
||||
"draft acceptance = %0.5f (%5d accepted / %5d generated), mean acceptance length = %5.2f, acceptance rate per position = (%s)\n",
|
||||
draft_ratio, n_draft_accepted, n_draft_total, mean_acc_len, acceptance_rates_per_pos.c_str());
|
||||
}
|
||||
|
||||
common_speculative_print_stats(spec);
|
||||
@@ -3543,6 +3559,14 @@ private:
|
||||
|
||||
// update how many tokens out of those tested were accepted
|
||||
slot.n_draft_accepted += ids.size() - 1;
|
||||
slot.n_draft_verif_steps += 1;
|
||||
|
||||
if (slot.n_accepted_per_pos.empty()) {
|
||||
slot.n_accepted_per_pos.resize(common_speculative_n_max(¶ms_base.speculative), 0);
|
||||
}
|
||||
for (size_t i = 0; i < ids.size() - 1 && i < slot.n_accepted_per_pos.size(); ++i) {
|
||||
slot.n_accepted_per_pos[i]++;
|
||||
}
|
||||
|
||||
// add accepted tokens to the prompt
|
||||
slot.prompt.tokens.keep_first(slot.prompt.n_tokens() - n_draft);
|
||||
|
||||
@@ -41,6 +41,7 @@
|
||||
DATA_ERROR_HANDLED_ATTR,
|
||||
BOOL_TRUE_STRING,
|
||||
SETTINGS_KEYS,
|
||||
CODE_BLOCK_HEADER_CLASS,
|
||||
MERMAID_WRAPPER_CLASS,
|
||||
MERMAID_BLOCK_CLASS,
|
||||
MERMAID_LANGUAGE,
|
||||
@@ -53,7 +54,11 @@
|
||||
SVG_TAG_PREFIX,
|
||||
SVG_SOURCE_ATTR,
|
||||
SVG_RENDERED_ATTR,
|
||||
SVG_INLINE_SHADOW_STYLE
|
||||
SVG_INLINE_SHADOW_STYLE,
|
||||
TOGGLE_SOURCE_BTN_CLASS,
|
||||
DIAGRAM_VIEW_MODE_ATTR,
|
||||
DIAGRAM_VIEW_RENDERED,
|
||||
DIAGRAM_VIEW_SOURCE
|
||||
} from '$lib/constants';
|
||||
import { ColorMode, UrlProtocol } from '$lib/enums';
|
||||
import { FileTypeText } from '$lib/enums/files.enums';
|
||||
@@ -501,6 +506,23 @@
|
||||
async function handleMermaidClick(event: MouseEvent) {
|
||||
const target = event.target as HTMLElement;
|
||||
|
||||
// Toggle a diagram block between its rendered view and its source view.
|
||||
// Shared by mermaid and svg, css drives the visibility from the wrapper mode.
|
||||
const toggleBtn = target.closest(`.${TOGGLE_SOURCE_BTN_CLASS}`);
|
||||
if (toggleBtn) {
|
||||
event.preventDefault();
|
||||
event.stopPropagation();
|
||||
|
||||
const wrapper = toggleBtn.closest(`.${MERMAID_WRAPPER_CLASS}, .${SVG_WRAPPER_CLASS}`);
|
||||
if (!wrapper) return;
|
||||
|
||||
const isSource = wrapper.getAttribute(DIAGRAM_VIEW_MODE_ATTR) === DIAGRAM_VIEW_SOURCE;
|
||||
const next = isSource ? DIAGRAM_VIEW_RENDERED : DIAGRAM_VIEW_SOURCE;
|
||||
wrapper.setAttribute(DIAGRAM_VIEW_MODE_ATTR, next);
|
||||
toggleBtn.setAttribute('aria-pressed', String(!isSource));
|
||||
return;
|
||||
}
|
||||
|
||||
// Check if clicking on copy or preview button in mermaid block
|
||||
const copyBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .copy-code-btn`);
|
||||
const previewBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .preview-code-btn`);
|
||||
@@ -573,6 +595,11 @@
|
||||
}
|
||||
}
|
||||
|
||||
// A click on the header chrome targets the action buttons, never the
|
||||
// diagram. Guard so a header click can not fall through to the click to
|
||||
// zoom branches below, whatever the scroll position or stacking.
|
||||
if (target.closest(`.${CODE_BLOCK_HEADER_CLASS}`)) return;
|
||||
|
||||
// Open preview when clicking the svg block itself. A final block carries its
|
||||
// source, a streaming block does not and is mirrored live into the dialog.
|
||||
const svgEl = target.closest(`.${SVG_BLOCK_CLASS}`);
|
||||
|
||||
@@ -300,7 +300,8 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
}
|
||||
|
||||
.markdown-content :global(.copy-code-btn),
|
||||
.markdown-content :global(.preview-code-btn) {
|
||||
.markdown-content :global(.preview-code-btn),
|
||||
.markdown-content :global(.toggle-source-btn) {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
@@ -312,15 +313,22 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
}
|
||||
|
||||
.markdown-content :global(.copy-code-btn:hover),
|
||||
.markdown-content :global(.preview-code-btn:hover) {
|
||||
.markdown-content :global(.preview-code-btn:hover),
|
||||
.markdown-content :global(.toggle-source-btn:hover) {
|
||||
transform: scale(1.05);
|
||||
}
|
||||
|
||||
.markdown-content :global(.copy-code-btn:active),
|
||||
.markdown-content :global(.preview-code-btn:active) {
|
||||
.markdown-content :global(.preview-code-btn:active),
|
||||
.markdown-content :global(.toggle-source-btn:active) {
|
||||
transform: scale(0.95);
|
||||
}
|
||||
|
||||
/* Pressed state marks the source view as active */
|
||||
.markdown-content :global(.toggle-source-btn[aria-pressed='true']) {
|
||||
color: var(--primary);
|
||||
}
|
||||
|
||||
.markdown-content :global(.code-block-wrapper pre) {
|
||||
background: transparent;
|
||||
margin: 0;
|
||||
@@ -629,8 +637,8 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
overflow-y: auto;
|
||||
overflow-x: auto;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
align-items: safe center;
|
||||
justify-content: safe center;
|
||||
padding: 3rem 1rem 1rem;
|
||||
}
|
||||
|
||||
@@ -645,7 +653,9 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
overflow-y: visible;
|
||||
}
|
||||
|
||||
/* Diagram block uses same header styling as code blocks */
|
||||
/* Diagram block uses same header styling as code blocks. The header floats over
|
||||
scrollable diagram content and stays transparent, so the overflow shows up to
|
||||
the box edge. It keeps a z-index so it stays the click target above content. */
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-header),
|
||||
.markdown-content :global(.svg-block-wrapper .code-block-header) {
|
||||
display: flex;
|
||||
@@ -657,6 +667,7 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
top: 0;
|
||||
left: 0;
|
||||
right: 0;
|
||||
z-index: 2;
|
||||
}
|
||||
|
||||
.markdown-content :global(.mermaid-block-wrapper .code-block-actions),
|
||||
@@ -683,6 +694,31 @@ div.markdown-user-content :global(.table-wrapper) {
|
||||
padding: 3rem 1rem;
|
||||
}
|
||||
|
||||
/* Source view stays hidden while the block renders, css swaps the two views
|
||||
from the wrapper mode so the click handler only flips one attribute. The view
|
||||
reuses the code block scroll container, so it matches the app code blocks. */
|
||||
.markdown-content :global(.diagram-source) {
|
||||
display: none;
|
||||
text-align: left;
|
||||
}
|
||||
|
||||
.markdown-content :global(.diagram-source pre) {
|
||||
background: transparent;
|
||||
margin: 0;
|
||||
border-radius: 0;
|
||||
border: none;
|
||||
font-size: 0.875rem;
|
||||
}
|
||||
|
||||
.markdown-content :global([data-view-mode='source'] .mermaid-scroll-container),
|
||||
.markdown-content :global([data-view-mode='source'] .svg-scroll-container) {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.markdown-content :global([data-view-mode='source'] .diagram-source) {
|
||||
display: block;
|
||||
}
|
||||
|
||||
/* Streaming mermaid block - empty preview box */
|
||||
.mermaid-streaming-block {
|
||||
min-height: 300px;
|
||||
|
||||
+63
-7
@@ -7,12 +7,16 @@ import type { Element, ElementContent } from 'hast';
|
||||
import {
|
||||
CODE_BLOCK_HEADER_CLASS,
|
||||
CODE_BLOCK_ACTIONS_CLASS,
|
||||
CODE_BLOCK_SCROLL_CONTAINER_CLASS,
|
||||
CODE_LANGUAGE_CLASS,
|
||||
COPY_CODE_BTN_CLASS,
|
||||
PREVIEW_CODE_BTN_CLASS,
|
||||
TOGGLE_SOURCE_BTN_CLASS,
|
||||
DIAGRAM_SOURCE_CLASS,
|
||||
RELATIVE_CLASS,
|
||||
COPY_ICON_SVG,
|
||||
PREVIEW_ICON_SVG
|
||||
PREVIEW_ICON_SVG,
|
||||
CODE_ICON_SVG
|
||||
} from '$lib/constants';
|
||||
|
||||
export interface BlockIdGenerator {
|
||||
@@ -32,14 +36,16 @@ export function createIconElement(svg: string): Element {
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a button element with icon.
|
||||
* Creates a button element with icon. Extra properties merge onto the button,
|
||||
* which lets a stateful button carry attributes like aria-pressed.
|
||||
*/
|
||||
export function createButton(
|
||||
className: string,
|
||||
title: string,
|
||||
iconSvg: string,
|
||||
id: string,
|
||||
idAttribute: string
|
||||
idAttribute: string,
|
||||
extraProperties: Record<string, string> = {}
|
||||
): Element {
|
||||
return {
|
||||
type: 'element',
|
||||
@@ -48,7 +54,8 @@ export function createButton(
|
||||
className: [className],
|
||||
[idAttribute]: id,
|
||||
title,
|
||||
type: 'button'
|
||||
type: 'button',
|
||||
...extraProperties
|
||||
},
|
||||
children: [createIconElement(iconSvg)]
|
||||
};
|
||||
@@ -72,6 +79,52 @@ export function createPreviewButton(
|
||||
return createButton(PREVIEW_CODE_BTN_CLASS, title, PREVIEW_ICON_SVG, id, idAttribute);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a button that toggles a diagram block between its rendered view and
|
||||
* its source view. aria-pressed starts false, the rendered view is the default.
|
||||
*/
|
||||
export function createToggleSourceButton(
|
||||
id: string,
|
||||
idAttribute: string,
|
||||
title: string = 'Toggle source'
|
||||
): Element {
|
||||
return createButton(TOGGLE_SOURCE_BTN_CLASS, title, CODE_ICON_SVG, id, idAttribute, {
|
||||
'aria-pressed': 'false'
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a source view for a diagram block. It reuses the code block scroll
|
||||
* container so it matches the app code blocks, and wraps the highlighted code
|
||||
* element captured at transform time. A missing code element falls back to a
|
||||
* plain code node built from the raw source.
|
||||
*/
|
||||
export function createSourceView(
|
||||
codeElement: Element | undefined,
|
||||
source: string,
|
||||
language: string
|
||||
): Element {
|
||||
const code: Element = codeElement ?? {
|
||||
type: 'element',
|
||||
tagName: 'code',
|
||||
properties: { className: ['hljs', `language-${language}`] },
|
||||
children: [{ type: 'text', value: source }]
|
||||
};
|
||||
return {
|
||||
type: 'element',
|
||||
tagName: 'div',
|
||||
properties: { className: [DIAGRAM_SOURCE_CLASS, CODE_BLOCK_SCROLL_CONTAINER_CLASS] },
|
||||
children: [
|
||||
{
|
||||
type: 'element',
|
||||
tagName: 'pre',
|
||||
properties: {},
|
||||
children: [code]
|
||||
}
|
||||
]
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a block header with language label and action buttons.
|
||||
*/
|
||||
@@ -116,14 +169,17 @@ export function createScrollContainer(preElement: Element, scrollContainerClass:
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a wrapper element with header and scroll container.
|
||||
* Creates a wrapper element with header and scroll container. Extra children
|
||||
* append after the scroll container, which lets a block carry a source view
|
||||
* alongside its rendered output.
|
||||
*/
|
||||
export function createWrapper(
|
||||
header: Element,
|
||||
preElement: Element,
|
||||
wrapperClass: string,
|
||||
scrollContainerClass: string,
|
||||
additionalAttributes?: Record<string, string>
|
||||
additionalAttributes?: Record<string, string>,
|
||||
extraChildren: Element[] = []
|
||||
): Element {
|
||||
return {
|
||||
type: 'element',
|
||||
@@ -132,7 +188,7 @@ export function createWrapper(
|
||||
className: [wrapperClass, RELATIVE_CLASS],
|
||||
...additionalAttributes
|
||||
} as Element['properties'],
|
||||
children: [header, createScrollContainer(preElement, scrollContainerClass)]
|
||||
children: [header, createScrollContainer(preElement, scrollContainerClass), ...extraChildren]
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
+14
-2
@@ -19,12 +19,17 @@ import {
|
||||
MERMAID_BLOCK_CLASS,
|
||||
MERMAID_LANGUAGE,
|
||||
MERMAID_SYNTAX_ATTR,
|
||||
MERMAID_ID_ATTR
|
||||
MERMAID_ID_ATTR,
|
||||
DIAGRAM_VIEW_MODE_ATTR,
|
||||
DIAGRAM_VIEW_RENDERED
|
||||
} from '$lib/constants';
|
||||
import type { DiagramPreData } from './pre-transform';
|
||||
import {
|
||||
createBlockHeader,
|
||||
createCopyButton,
|
||||
createPreviewButton,
|
||||
createToggleSourceButton,
|
||||
createSourceView,
|
||||
createWrapper,
|
||||
generateBlockId
|
||||
} from './code-block-utils';
|
||||
@@ -75,16 +80,23 @@ export const rehypeEnhanceMermaidBlocks: Plugin<[], Root> = () => {
|
||||
|
||||
const actions = [
|
||||
createCopyButton(mermaidId, MERMAID_ID_ATTR, 'Copy mermaid syntax'),
|
||||
createToggleSourceButton(mermaidId, MERMAID_ID_ATTR, 'Toggle mermaid source'),
|
||||
createPreviewButton(mermaidId, MERMAID_ID_ATTR, 'Preview diagram')
|
||||
];
|
||||
|
||||
const header = createBlockHeader(MERMAID_LANGUAGE, mermaidId, MERMAID_ID_ATTR, actions);
|
||||
const preservedCode = (node.data as DiagramPreData | undefined)?.sourceCode;
|
||||
const sourceView = createSourceView(preservedCode, diagramText, MERMAID_LANGUAGE);
|
||||
const wrapper = createWrapper(
|
||||
header,
|
||||
node,
|
||||
MERMAID_WRAPPER_CLASS,
|
||||
MERMAID_SCROLL_CONTAINER_CLASS,
|
||||
{ [MERMAID_ID_ATTR]: mermaidId }
|
||||
{
|
||||
[MERMAID_ID_ATTR]: mermaidId,
|
||||
[DIAGRAM_VIEW_MODE_ATTR]: DIAGRAM_VIEW_RENDERED
|
||||
},
|
||||
[sourceView]
|
||||
);
|
||||
|
||||
// Replace pre with wrapper in parent
|
||||
|
||||
+20
-4
@@ -18,12 +18,17 @@ import {
|
||||
SVG_BLOCK_CLASS,
|
||||
SVG_LANGUAGE,
|
||||
SVG_SOURCE_ATTR,
|
||||
SVG_ID_ATTR
|
||||
SVG_ID_ATTR,
|
||||
DIAGRAM_VIEW_MODE_ATTR,
|
||||
DIAGRAM_VIEW_RENDERED
|
||||
} from '$lib/constants';
|
||||
import type { DiagramPreData } from './pre-transform';
|
||||
import {
|
||||
createBlockHeader,
|
||||
createCopyButton,
|
||||
createPreviewButton,
|
||||
createToggleSourceButton,
|
||||
createSourceView,
|
||||
createWrapper,
|
||||
generateBlockId
|
||||
} from './code-block-utils';
|
||||
@@ -65,13 +70,24 @@ export const rehypeEnhanceSvgBlocks: Plugin<[], Root> = () => {
|
||||
|
||||
const actions = [
|
||||
createCopyButton(svgId, SVG_ID_ATTR, 'Copy svg source'),
|
||||
createToggleSourceButton(svgId, SVG_ID_ATTR, 'Toggle svg source'),
|
||||
createPreviewButton(svgId, SVG_ID_ATTR, 'Preview svg')
|
||||
];
|
||||
|
||||
const header = createBlockHeader(SVG_LANGUAGE, svgId, SVG_ID_ATTR, actions);
|
||||
const wrapper = createWrapper(header, node, SVG_WRAPPER_CLASS, SVG_SCROLL_CONTAINER_CLASS, {
|
||||
[SVG_ID_ATTR]: svgId
|
||||
});
|
||||
const preservedCode = (node.data as DiagramPreData | undefined)?.sourceCode;
|
||||
const sourceView = createSourceView(preservedCode, svgSource, SVG_LANGUAGE);
|
||||
const wrapper = createWrapper(
|
||||
header,
|
||||
node,
|
||||
SVG_WRAPPER_CLASS,
|
||||
SVG_SCROLL_CONTAINER_CLASS,
|
||||
{
|
||||
[SVG_ID_ATTR]: svgId,
|
||||
[DIAGRAM_VIEW_MODE_ATTR]: DIAGRAM_VIEW_RENDERED
|
||||
},
|
||||
[sourceView]
|
||||
);
|
||||
|
||||
// Replace pre with wrapper in parent
|
||||
(parent.children as ElementContent[])[index] = wrapper;
|
||||
|
||||
+13
-1
@@ -2,6 +2,15 @@ import type { Plugin } from 'unified';
|
||||
import type { Root, Element, ElementContent, Text } from 'hast';
|
||||
import { visit } from 'unist-util-visit';
|
||||
|
||||
/**
|
||||
* Metadata a diagram pre carries on its unist data field. The source code holds
|
||||
* the highlighted code element captured before the pre became a render target,
|
||||
* which the enhancer reuses to build a matching source view.
|
||||
*/
|
||||
export interface DiagramPreData {
|
||||
sourceCode: Element;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recursively extracts all text content from a HAST node.
|
||||
* Handles nested elements (e.g., span wrappers from syntax highlighting).
|
||||
@@ -69,7 +78,10 @@ export function createPreTransform(
|
||||
properties: {
|
||||
className: [targetClass]
|
||||
},
|
||||
children: [{ type: 'text', value: text } as Text]
|
||||
children: [{ type: 'text', value: text } as Text],
|
||||
// Keep the highlighted code element so the block can offer a source
|
||||
// view that matches the app code blocks without re highlighting.
|
||||
data: { sourceCode: codeElement } satisfies DiagramPreData
|
||||
};
|
||||
|
||||
(parent.children as ElementContent[])[index] = pre;
|
||||
|
||||
@@ -0,0 +1,9 @@
|
||||
// Shared constants for diagram blocks (mermaid and svg) that toggle between a
|
||||
// rendered view and a source view. The wrapper carries the active mode, css
|
||||
// drives the visibility, the click handler only flips the attribute.
|
||||
|
||||
export const DIAGRAM_VIEW_MODE_ATTR = 'data-view-mode';
|
||||
export const DIAGRAM_VIEW_RENDERED = 'rendered';
|
||||
export const DIAGRAM_VIEW_SOURCE = 'source';
|
||||
export const DIAGRAM_SOURCE_CLASS = 'diagram-source';
|
||||
export const TOGGLE_SOURCE_BTN_CLASS = 'toggle-source-btn';
|
||||
@@ -39,3 +39,5 @@ export const MODALITY_LABELS = {
|
||||
export const COPY_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy-icon lucide-copy"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;
|
||||
|
||||
export const PREVIEW_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-eye lucide-eye-icon"><path d="M2.062 12.345a1 1 0 0 1 0-.69C3.5 7.73 7.36 5 12 5s8.5 2.73 9.938 6.655a1 1 0 0 1 0 .69C20.5 16.27 16.64 19 12 19s-8.5-2.73-9.938-6.655"/><circle cx="12" cy="12" r="3"/></svg>`;
|
||||
|
||||
export const CODE_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-code lucide-code-icon"><path d="m16 18 6-6-6-6"/><path d="m8 6-6 6 6 6"/></svg>`;
|
||||
|
||||
@@ -30,6 +30,7 @@ export * from './literal-html';
|
||||
export * from './markdown';
|
||||
export * from './mermaid-blocks';
|
||||
export * from './svg-blocks';
|
||||
export * from './diagram-blocks';
|
||||
export * from './max-bundle-size';
|
||||
export * from './mcp';
|
||||
export * from './mcp-form';
|
||||
|
||||
Vendored
+1
-1
@@ -41,7 +41,7 @@ if (LLAMA_BUILD_BORINGSSL)
|
||||
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
|
||||
|
||||
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
|
||||
set(BORINGSSL_VERSION "0.20260526.0" CACHE STRING "BoringSSL version")
|
||||
set(BORINGSSL_VERSION "0.20260616.0" CACHE STRING "BoringSSL version")
|
||||
|
||||
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user