vendor : update BoringSSL to 0.20260616.0 (#24693 )

ui: add source toggle to mermaid and svg blocks (#24652 )
* ui: add source toggle to mermaid and svg blocks Add a toggle button next to copy and preview that switches a rendered mermaid or svg block to its source code and back. The button is shared by both block types and the rendered view stays the default. The source view reuses the code block scroll container and the highlighted code element captured at transform time, so it matches the app code blocks without highlighting again. Make tall diagrams scroll like text code blocks: safe centering keeps the diagram centered when it fits and falls back to start alignment when it overflows, so the top stays reachable instead of clipping above. Keep the block header opaque and layered above the scrolled diagram, and ignore header clicks in the zoom handler, so a button click never falls through to the zoom dialog. * ui: transparent diagram block header, address review from @allozaur
2026-06-17 02:57:39 +02:00 · 2026-06-16 20:24:28 +02:00 · 2026-06-16 14:14:22 +02:00 · 2026-06-16 11:52:38 +02:00 · 2026-06-16 12:05:52 +03:00 · 2026-06-16 09:36:52 +02:00
14 changed files with 290 additions and 72 deletions
@@ -418,6 +418,9 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {

    std::vector<common_sampler_ptr> smpls;

+    // backend sampler chain per seq, attached to ctx_dft
+    std::vector<llama_sampler *> backend_chains;
+
    int32_t n_embd_dec = 0;       // draft hidden size
    int32_t n_embd_enc = 0;       // target_layer_ids_n * target_hidden_size
    int32_t n_embd_tgt = 0;       // target model hidden size
@@ -443,7 +446,7 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
        , params(params.draft)
    {
        LOG_INF("%s: adding speculative implementation 'draft-eagle3'\n", __func__);
-        LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min);
+        LOG_INF("%s: - n_max=%d, n_min=%d, p_min=%f, backend_sampling=%d\n", __func__, params.draft.n_max, params.draft.n_min, params.draft.p_min, (int) params.draft.backend_sampling);

        auto * ctx_tgt = this->params.ctx_tgt;
        auto * ctx_dft = this->params.ctx_dft;
@@ -478,6 +481,22 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
            s.reset(common_sampler_init(llama_get_model(ctx_dft), sparams));
        }

+        // offload draft sampling to the backend
+        backend_chains.assign(n_seq, nullptr);
+        if (this->params.backend_sampling) {
+            for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) n_seq; ++seq_id) {
+                llama_sampler * chain = llama_sampler_chain_init(llama_sampler_chain_default_params());
+                llama_sampler_chain_add(chain, llama_sampler_init_top_k(10));
+
+                if (!llama_set_sampler(ctx_dft, seq_id, chain)) {
+                    LOG_WRN("%s: backend offload failed for seq_id=%d; using CPU sampler\n", __func__, (int) seq_id);
+                    llama_sampler_free(chain);
+                    chain = nullptr;
+                }
+                backend_chains[seq_id] = chain;
+            }
+        }
+
        // turn on extraction of the target layers' input embeddings
        for (uint32_t k = 0; k < target_layer_ids_n; ++k) {
            llama_set_embeddings_layer_inp(ctx_tgt, (uint32_t) target_layer_ids[k], true);
@@ -496,6 +515,18 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
    }

    ~common_speculative_impl_draft_eagle3() override {
+        auto * ctx_dft = this->params.ctx_dft;
+        for (llama_seq_id seq_id = 0; seq_id < (llama_seq_id) backend_chains.size(); ++seq_id) {
+            if (backend_chains[seq_id] == nullptr) {
+                continue;
+            }
+            if (ctx_dft) {
+                llama_set_sampler(ctx_dft, seq_id, nullptr);
+            }
+            llama_sampler_free(backend_chains[seq_id]);
+        }
+        backend_chains.clear();
+
        if (batch.token != nullptr) {
            free(batch.token);
            batch.token = nullptr;
@@ -3080,8 +3080,10 @@ static vk_buffer ggml_vk_create_buffer_device(vk_device& device, size_t size) {
            buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
                                                       vk::MemoryPropertyFlagBits::eDeviceLocal});
        } else if (device->uma) {
-            // Fall back to host memory type
-            buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal,
+            // On UMA, prefer host-visible memory so direct tensor borrowing works.
+            // If unavailable, fall back to device-local memory.
+            buf = ggml_vk_create_buffer(device, size, {vk::MemoryPropertyFlagBits::eDeviceLocal | vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent,
+                                                       vk::MemoryPropertyFlagBits::eDeviceLocal,
                                                       vk::MemoryPropertyFlagBits::eHostVisible | vk::MemoryPropertyFlagBits::eHostCoherent});
        } else if (device->disable_host_visible_vidmem) {
            if (device->allow_sysmem_fallback) {
@@ -1088,6 +1088,10 @@ ggml_tensor * llm_graph_context::build_lora_mm(
          ggml_tensor * w_s) const {
    ggml_tensor * res = ggml_mul_mat(ctx0, w, cur);

+    if (w_s) {
+        res = ggml_mul(ctx0, res, w_s);
+    }
+
    for (const auto & lora : *loras) {
        llama_adapter_lora_weight * lw = lora.first->get_weight(w);
        if (lw == nullptr) {
@@ -1106,18 +1110,24 @@ ggml_tensor * llm_graph_context::build_lora_mm(
        res = ggml_add(ctx0, res, ab_cur);
    }

-    if (w_s) {
-        res = ggml_mul(ctx0, res, w_s);
-    }
-
    return res;
 }

 ggml_tensor * llm_graph_context::build_lora_mm_id(
          ggml_tensor * w,   // ggml_tensor * as
          ggml_tensor * cur, // ggml_tensor * b
-          ggml_tensor * ids) const {
+          ggml_tensor * ids,
+          ggml_tensor * w_s) const {
    ggml_tensor * res = ggml_mul_mat_id(ctx0, w, cur, ids);
+
+    if (w_s) {
+        const int64_t n_expert = w_s->ne[0];
+        const int64_t n_tokens = cur->ne[2];
+        ggml_tensor * s = ggml_reshape_3d(ctx0, w_s, 1, n_expert, 1);
+        s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
+        s = ggml_get_rows(ctx0, s, ids);
+        res = ggml_mul(ctx0, res, s);
+    }
    for (const auto & lora : *loras) {
        llama_adapter_lora_weight * lw = lora.first->get_weight(w);
        if (lw == nullptr) {
@@ -1269,6 +1279,29 @@ ggml_tensor * llm_graph_context::build_ffn(
     llm_ffn_op_type   type_op,
   llm_ffn_gate_type   type_gate,
                 int   il) const {
+    // NVFP4 support is currently restricted to
+    // 1) LORA absence (*_s would be applied after LORA residual, which is incorrect)
+    // 2) bias absense (*_s would be applied after bias addition, which is incorrect)
+    // TODO: disambiguate LLM-architectural scales (which use *_s) from NVFP4 scale_2 (which also uses *_s currently)
+    auto has_lora = [this](ggml_tensor * w) {
+        if (!w) {
+            return false;
+        }
+        for (const auto & lora : *loras) {
+            if (lora.first->get_weight(w) != nullptr) {
+                return true;
+            }
+        }
+        return false;
+    };
+
+    GGML_ASSERT(!up_s   || !up_b   || !up   || up->type   != GGML_TYPE_NVFP4);
+    GGML_ASSERT(!gate_s || !gate_b || !gate || gate->type != GGML_TYPE_NVFP4);
+    GGML_ASSERT(!down_s || !down_b || !down || down->type != GGML_TYPE_NVFP4);
+    GGML_ASSERT(!up_s   || !up   || up->type   != GGML_TYPE_NVFP4 || !has_lora(up));
+    GGML_ASSERT(!gate_s || !gate || gate->type != GGML_TYPE_NVFP4 || !has_lora(gate));
+    GGML_ASSERT(!down_s || !down || down->type != GGML_TYPE_NVFP4 || !has_lora(down));
+
    ggml_tensor * tmp = up ? build_lora_mm(up, cur) : cur;
    cb(tmp, "ffn_up", il);

@@ -1627,23 +1660,18 @@ ggml_tensor * llm_graph_context::build_moe_ffn(

    if (gate_up_exps) {
        // merged gate_up path: one mul_mat_id, then split into gate and up views
-        ggml_tensor * gate_up = build_lora_mm_id(gate_up_exps, cur, selected_experts); // [n_ff*2, n_expert_used, n_tokens]
+        ggml_tensor * gate_up = build_lora_mm_id(gate_up_exps, cur, selected_experts, up_exps_s); // [n_ff*2, n_expert_used, n_tokens]
        cb(gate_up, "ffn_moe_gate_up", il);

+        if (up_exps_s) {
+            cb(gate_up, "ffn_moe_gate_up_scaled", il);
+        }
+
        if (gate_up_exps_b) {
            gate_up = ggml_add_id(ctx0, gate_up, gate_up_exps_b, selected_experts);
            cb(gate_up, "ffn_moe_gate_up_biased", il);
        }

-        // apply per-expert scale2 to merged gate_up (use up_exps_s since gate and up are fused)
-        if (up_exps_s) {
-            ggml_tensor * s = ggml_reshape_3d(ctx0, up_exps_s, 1, n_expert, 1);
-            s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
-            s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
-            gate_up = ggml_mul(ctx0, gate_up, s);
-            cb(gate_up, "ffn_moe_gate_up_scaled", il);
-        }
-
        const int64_t n_ff = gate_up->ne[0] / 2;
        cur = ggml_view_3d(ctx0, gate_up, n_ff, gate_up->ne[1], gate_up->ne[2], gate_up->nb[1], gate_up->nb[2], 0);
        cb(cur, "ffn_moe_gate", il);
@@ -1651,43 +1679,33 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
        cb(up, "ffn_moe_up", il);
    } else {
        // separate gate and up path
-        up = build_lora_mm_id(up_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]
+        up = build_lora_mm_id(up_exps, cur, selected_experts, up_exps_s); // [n_ff, n_expert_used, n_tokens]
        cb(up, "ffn_moe_up", il);

+        if (up_exps_s) {
+            cb(up, "ffn_moe_up_scaled", il);
+        }
+
        if (up_exps_b) {
            up = ggml_add_id(ctx0, up, up_exps_b, selected_experts);
            cb(up, "ffn_moe_up_biased", il);
        }

-        // apply per-expert scale2 to up
-        if (up_exps_s) {
-            ggml_tensor * s = ggml_reshape_3d(ctx0, up_exps_s, 1, n_expert, 1);
-            s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
-            s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
-            up = ggml_mul(ctx0, up, s);
-            cb(up, "ffn_moe_up_scaled", il);
-        }
-
        if (gate_exps) {
-            cur = build_lora_mm_id(gate_exps, cur, selected_experts); // [n_ff, n_expert_used, n_tokens]
+            cur = build_lora_mm_id(gate_exps, cur, selected_experts, gate_exps_s); // [n_ff, n_expert_used, n_tokens]
            cb(cur, "ffn_moe_gate", il);
        } else {
            cur = up;
        }

+        if (gate_exps_s) {
+            cb(cur, "ffn_moe_gate_scaled", il);
+        }
+
        if (gate_exps_b) {
            cur = ggml_add_id(ctx0, cur, gate_exps_b, selected_experts);
            cb(cur, "ffn_moe_gate_biased", il);
        }
-
-        // apply per-expert scale2 to gate
-        if (gate_exps_s) {
-            ggml_tensor * s = ggml_reshape_3d(ctx0, gate_exps_s, 1, n_expert, 1);
-            s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
-            s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
-            cur = ggml_mul(ctx0, cur, s);
-            cb(cur, "ffn_moe_gate_scaled", il);
-        }
    }

    const bool has_gate = gate_exps || gate_up_exps;
@@ -1759,23 +1777,18 @@ ggml_tensor * llm_graph_context::build_moe_ffn(
            GGML_ABORT("fatal error");
    }

-    experts = build_lora_mm_id(down_exps, cur, selected_experts); // [n_embd, n_expert_used, n_tokens]
+    experts = build_lora_mm_id(down_exps, cur, selected_experts, down_exps_s); // [n_embd, n_expert_used, n_tokens]
    cb(experts, "ffn_moe_down", il);

+    if (down_exps_s) {
+        cb(experts, "ffn_moe_down_scaled", il);
+    }
+
    if (down_exps_b) {
        experts = ggml_add_id(ctx0, experts, down_exps_b, selected_experts);
        cb(experts, "ffn_moe_down_biased", il);
    }

-    // apply per-expert scale2 to down
-    if (down_exps_s) {
-        ggml_tensor * s = ggml_reshape_3d(ctx0, down_exps_s, 1, n_expert, 1);
-        s = ggml_repeat_4d(ctx0, s, 1, n_expert, n_tokens, 1);
-        s = ggml_get_rows(ctx0, s, selected_experts); // [1, n_expert_used, n_tokens]
-        experts = ggml_mul(ctx0, experts, s);
-        cb(experts, "ffn_moe_down_scaled", il);
-    }
-
    if (!weight_before_ffn) {
        experts = ggml_mul(ctx0, experts, weights);
        cb(experts, "ffn_moe_weighted", il);
@@ -853,11 +853,12 @@ struct llm_graph_context {
              ggml_tensor * cur,
              ggml_tensor * w_s = nullptr) const;

-    // do mat_mul_id, while optionally apply lora
+    // do mat_mul_id, while optionally apply lora and per-expert scale
    ggml_tensor * build_lora_mm_id(
              ggml_tensor * w,   // ggml_tensor * as
              ggml_tensor * cur, // ggml_tensor * b
-              ggml_tensor * ids) const;
+              ggml_tensor * ids,
+              ggml_tensor * w_s = nullptr) const;

    ggml_tensor * build_norm(
             ggml_tensor * cur,
@@ -41,6 +41,7 @@
 		DATA_ERROR_HANDLED_ATTR,
 		BOOL_TRUE_STRING,
 		SETTINGS_KEYS,
+		CODE_BLOCK_HEADER_CLASS,
 		MERMAID_WRAPPER_CLASS,
 		MERMAID_BLOCK_CLASS,
 		MERMAID_LANGUAGE,
@@ -53,7 +54,11 @@
 		SVG_TAG_PREFIX,
 		SVG_SOURCE_ATTR,
 		SVG_RENDERED_ATTR,
-		SVG_INLINE_SHADOW_STYLE
+		SVG_INLINE_SHADOW_STYLE,
+		TOGGLE_SOURCE_BTN_CLASS,
+		DIAGRAM_VIEW_MODE_ATTR,
+		DIAGRAM_VIEW_RENDERED,
+		DIAGRAM_VIEW_SOURCE
 	} from '$lib/constants';
 	import { ColorMode, UrlProtocol } from '$lib/enums';
 	import { FileTypeText } from '$lib/enums/files.enums';
@@ -501,6 +506,23 @@
 	async function handleMermaidClick(event: MouseEvent) {
 		const target = event.target as HTMLElement;

+		// Toggle a diagram block between its rendered view and its source view.
+		// Shared by mermaid and svg, css drives the visibility from the wrapper mode.
+		const toggleBtn = target.closest(`.${TOGGLE_SOURCE_BTN_CLASS}`);
+		if (toggleBtn) {
+			event.preventDefault();
+			event.stopPropagation();
+
+			const wrapper = toggleBtn.closest(`.${MERMAID_WRAPPER_CLASS}, .${SVG_WRAPPER_CLASS}`);
+			if (!wrapper) return;
+
+			const isSource = wrapper.getAttribute(DIAGRAM_VIEW_MODE_ATTR) === DIAGRAM_VIEW_SOURCE;
+			const next = isSource ? DIAGRAM_VIEW_RENDERED : DIAGRAM_VIEW_SOURCE;
+			wrapper.setAttribute(DIAGRAM_VIEW_MODE_ATTR, next);
+			toggleBtn.setAttribute('aria-pressed', String(!isSource));
+			return;
+		}
+
 		// Check if clicking on copy or preview button in mermaid block
 		const copyBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .copy-code-btn`);
 		const previewBtn = target.closest(`.${MERMAID_WRAPPER_CLASS} .preview-code-btn`);
@@ -573,6 +595,11 @@
 			}
 		}

+		// A click on the header chrome targets the action buttons, never the
+		// diagram. Guard so a header click can not fall through to the click to
+		// zoom branches below, whatever the scroll position or stacking.
+		if (target.closest(`.${CODE_BLOCK_HEADER_CLASS}`)) return;
+
 		// Open preview when clicking the svg block itself. A final block carries its
 		// source, a streaming block does not and is mirrored live into the dialog.
 		const svgEl = target.closest(`.${SVG_BLOCK_CLASS}`);
@@ -300,7 +300,8 @@ div.markdown-user-content :global(.table-wrapper) {
 }

 .markdown-content :global(.copy-code-btn),
-.markdown-content :global(.preview-code-btn) {
+.markdown-content :global(.preview-code-btn),
+.markdown-content :global(.toggle-source-btn) {
 	display: flex;
 	align-items: center;
 	justify-content: center;
@@ -312,15 +313,22 @@ div.markdown-user-content :global(.table-wrapper) {
 }

 .markdown-content :global(.copy-code-btn:hover),
-.markdown-content :global(.preview-code-btn:hover) {
+.markdown-content :global(.preview-code-btn:hover),
+.markdown-content :global(.toggle-source-btn:hover) {
 	transform: scale(1.05);
 }

 .markdown-content :global(.copy-code-btn:active),
-.markdown-content :global(.preview-code-btn:active) {
+.markdown-content :global(.preview-code-btn:active),
+.markdown-content :global(.toggle-source-btn:active) {
 	transform: scale(0.95);
 }

+/* Pressed state marks the source view as active */
+.markdown-content :global(.toggle-source-btn[aria-pressed='true']) {
+	color: var(--primary);
+}
+
 .markdown-content :global(.code-block-wrapper pre) {
 	background: transparent;
 	margin: 0;
@@ -629,8 +637,8 @@ div.markdown-user-content :global(.table-wrapper) {
 	overflow-y: auto;
 	overflow-x: auto;
 	display: flex;
-	align-items: center;
-	justify-content: center;
+	align-items: safe center;
+	justify-content: safe center;
 	padding: 3rem 1rem 1rem;
 }

@@ -645,7 +653,9 @@ div.markdown-user-content :global(.table-wrapper) {
 	overflow-y: visible;
 }

-/* Diagram block uses same header styling as code blocks */
+/* Diagram block uses same header styling as code blocks. The header floats over
+   scrollable diagram content and stays transparent, so the overflow shows up to
+   the box edge. It keeps a z-index so it stays the click target above content. */
 .markdown-content :global(.mermaid-block-wrapper .code-block-header),
 .markdown-content :global(.svg-block-wrapper .code-block-header) {
 	display: flex;
@@ -657,6 +667,7 @@ div.markdown-user-content :global(.table-wrapper) {
 	top: 0;
 	left: 0;
 	right: 0;
+	z-index: 2;
 }

 .markdown-content :global(.mermaid-block-wrapper .code-block-actions),
@@ -683,6 +694,31 @@ div.markdown-user-content :global(.table-wrapper) {
 	padding: 3rem 1rem;
 }

+/* Source view stays hidden while the block renders, css swaps the two views
+   from the wrapper mode so the click handler only flips one attribute. The view
+   reuses the code block scroll container, so it matches the app code blocks. */
+.markdown-content :global(.diagram-source) {
+	display: none;
+	text-align: left;
+}
+
+.markdown-content :global(.diagram-source pre) {
+	background: transparent;
+	margin: 0;
+	border-radius: 0;
+	border: none;
+	font-size: 0.875rem;
+}
+
+.markdown-content :global([data-view-mode='source'] .mermaid-scroll-container),
+.markdown-content :global([data-view-mode='source'] .svg-scroll-container) {
+	display: none;
+}
+
+.markdown-content :global([data-view-mode='source'] .diagram-source) {
+	display: block;
+}
+
 /* Streaming mermaid block - empty preview box */
 .mermaid-streaming-block {
 	min-height: 300px;
@@ -7,12 +7,16 @@ import type { Element, ElementContent } from 'hast';
 import {
 	CODE_BLOCK_HEADER_CLASS,
 	CODE_BLOCK_ACTIONS_CLASS,
+	CODE_BLOCK_SCROLL_CONTAINER_CLASS,
 	CODE_LANGUAGE_CLASS,
 	COPY_CODE_BTN_CLASS,
 	PREVIEW_CODE_BTN_CLASS,
+	TOGGLE_SOURCE_BTN_CLASS,
+	DIAGRAM_SOURCE_CLASS,
 	RELATIVE_CLASS,
 	COPY_ICON_SVG,
-	PREVIEW_ICON_SVG
+	PREVIEW_ICON_SVG,
+	CODE_ICON_SVG
 } from '$lib/constants';

 export interface BlockIdGenerator {
@@ -32,14 +36,16 @@ export function createIconElement(svg: string): Element {
 }

 /**
- * Creates a button element with icon.
+ * Creates a button element with icon. Extra properties merge onto the button,
+ * which lets a stateful button carry attributes like aria-pressed.
 */
 export function createButton(
 	className: string,
 	title: string,
 	iconSvg: string,
 	id: string,
-	idAttribute: string
+	idAttribute: string,
+	extraProperties: Record<string, string> = {}
 ): Element {
 	return {
 		type: 'element',
@@ -48,7 +54,8 @@ export function createButton(
 			className: [className],
 			[idAttribute]: id,
 			title,
-			type: 'button'
+			type: 'button',
+			...extraProperties
 		},
 		children: [createIconElement(iconSvg)]
 	};
@@ -72,6 +79,52 @@ export function createPreviewButton(
 	return createButton(PREVIEW_CODE_BTN_CLASS, title, PREVIEW_ICON_SVG, id, idAttribute);
 }

+/**
+ * Creates a button that toggles a diagram block between its rendered view and
+ * its source view. aria-pressed starts false, the rendered view is the default.
+ */
+export function createToggleSourceButton(
+	id: string,
+	idAttribute: string,
+	title: string = 'Toggle source'
+): Element {
+	return createButton(TOGGLE_SOURCE_BTN_CLASS, title, CODE_ICON_SVG, id, idAttribute, {
+		'aria-pressed': 'false'
+	});
+}
+
+/**
+ * Creates a source view for a diagram block. It reuses the code block scroll
+ * container so it matches the app code blocks, and wraps the highlighted code
+ * element captured at transform time. A missing code element falls back to a
+ * plain code node built from the raw source.
+ */
+export function createSourceView(
+	codeElement: Element | undefined,
+	source: string,
+	language: string
+): Element {
+	const code: Element = codeElement ?? {
+		type: 'element',
+		tagName: 'code',
+		properties: { className: ['hljs', `language-${language}`] },
+		children: [{ type: 'text', value: source }]
+	};
+	return {
+		type: 'element',
+		tagName: 'div',
+		properties: { className: [DIAGRAM_SOURCE_CLASS, CODE_BLOCK_SCROLL_CONTAINER_CLASS] },
+		children: [
+			{
+				type: 'element',
+				tagName: 'pre',
+				properties: {},
+				children: [code]
+			}
+		]
+	};
+}
+
 /**
 * Creates a block header with language label and action buttons.
 */
@@ -116,14 +169,17 @@ export function createScrollContainer(preElement: Element, scrollContainerClass:
 }

 /**
- * Creates a wrapper element with header and scroll container.
+ * Creates a wrapper element with header and scroll container. Extra children
+ * append after the scroll container, which lets a block carry a source view
+ * alongside its rendered output.
 */
 export function createWrapper(
 	header: Element,
 	preElement: Element,
 	wrapperClass: string,
 	scrollContainerClass: string,
-	additionalAttributes?: Record<string, string>
+	additionalAttributes?: Record<string, string>,
+	extraChildren: Element[] = []
 ): Element {
 	return {
 		type: 'element',
@@ -132,7 +188,7 @@ export function createWrapper(
 			className: [wrapperClass, RELATIVE_CLASS],
 			...additionalAttributes
 		} as Element['properties'],
-		children: [header, createScrollContainer(preElement, scrollContainerClass)]
+		children: [header, createScrollContainer(preElement, scrollContainerClass), ...extraChildren]
 	};
 }

@@ -19,12 +19,17 @@ import {
 	MERMAID_BLOCK_CLASS,
 	MERMAID_LANGUAGE,
 	MERMAID_SYNTAX_ATTR,
-	MERMAID_ID_ATTR
+	MERMAID_ID_ATTR,
+	DIAGRAM_VIEW_MODE_ATTR,
+	DIAGRAM_VIEW_RENDERED
 } from '$lib/constants';
+import type { DiagramPreData } from './pre-transform';
 import {
 	createBlockHeader,
 	createCopyButton,
 	createPreviewButton,
+	createToggleSourceButton,
+	createSourceView,
 	createWrapper,
 	generateBlockId
 } from './code-block-utils';
@@ -75,16 +80,23 @@ export const rehypeEnhanceMermaidBlocks: Plugin<[], Root> = () => {

 			const actions = [
 				createCopyButton(mermaidId, MERMAID_ID_ATTR, 'Copy mermaid syntax'),
+				createToggleSourceButton(mermaidId, MERMAID_ID_ATTR, 'Toggle mermaid source'),
 				createPreviewButton(mermaidId, MERMAID_ID_ATTR, 'Preview diagram')
 			];

 			const header = createBlockHeader(MERMAID_LANGUAGE, mermaidId, MERMAID_ID_ATTR, actions);
+			const preservedCode = (node.data as DiagramPreData | undefined)?.sourceCode;
+			const sourceView = createSourceView(preservedCode, diagramText, MERMAID_LANGUAGE);
 			const wrapper = createWrapper(
 				header,
 				node,
 				MERMAID_WRAPPER_CLASS,
 				MERMAID_SCROLL_CONTAINER_CLASS,
-				{ [MERMAID_ID_ATTR]: mermaidId }
+				{
+					[MERMAID_ID_ATTR]: mermaidId,
+					[DIAGRAM_VIEW_MODE_ATTR]: DIAGRAM_VIEW_RENDERED
+				},
+				[sourceView]
 			);

 			// Replace pre with wrapper in parent
@@ -18,12 +18,17 @@ import {
 	SVG_BLOCK_CLASS,
 	SVG_LANGUAGE,
 	SVG_SOURCE_ATTR,
-	SVG_ID_ATTR
+	SVG_ID_ATTR,
+	DIAGRAM_VIEW_MODE_ATTR,
+	DIAGRAM_VIEW_RENDERED
 } from '$lib/constants';
+import type { DiagramPreData } from './pre-transform';
 import {
 	createBlockHeader,
 	createCopyButton,
 	createPreviewButton,
+	createToggleSourceButton,
+	createSourceView,
 	createWrapper,
 	generateBlockId
 } from './code-block-utils';
@@ -65,13 +70,24 @@ export const rehypeEnhanceSvgBlocks: Plugin<[], Root> = () => {

 			const actions = [
 				createCopyButton(svgId, SVG_ID_ATTR, 'Copy svg source'),
+				createToggleSourceButton(svgId, SVG_ID_ATTR, 'Toggle svg source'),
 				createPreviewButton(svgId, SVG_ID_ATTR, 'Preview svg')
 			];

 			const header = createBlockHeader(SVG_LANGUAGE, svgId, SVG_ID_ATTR, actions);
-			const wrapper = createWrapper(header, node, SVG_WRAPPER_CLASS, SVG_SCROLL_CONTAINER_CLASS, {
-				[SVG_ID_ATTR]: svgId
-			});
+			const preservedCode = (node.data as DiagramPreData | undefined)?.sourceCode;
+			const sourceView = createSourceView(preservedCode, svgSource, SVG_LANGUAGE);
+			const wrapper = createWrapper(
+				header,
+				node,
+				SVG_WRAPPER_CLASS,
+				SVG_SCROLL_CONTAINER_CLASS,
+				{
+					[SVG_ID_ATTR]: svgId,
+					[DIAGRAM_VIEW_MODE_ATTR]: DIAGRAM_VIEW_RENDERED
+				},
+				[sourceView]
+			);

 			// Replace pre with wrapper in parent
 			(parent.children as ElementContent[])[index] = wrapper;
@@ -2,6 +2,15 @@ import type { Plugin } from 'unified';
 import type { Root, Element, ElementContent, Text } from 'hast';
 import { visit } from 'unist-util-visit';

+/**
+ * Metadata a diagram pre carries on its unist data field. The source code holds
+ * the highlighted code element captured before the pre became a render target,
+ * which the enhancer reuses to build a matching source view.
+ */
+export interface DiagramPreData {
+	sourceCode: Element;
+}
+
 /**
 * Recursively extracts all text content from a HAST node.
 * Handles nested elements (e.g., span wrappers from syntax highlighting).
@@ -69,7 +78,10 @@ export function createPreTransform(
 					properties: {
 						className: [targetClass]
 					},
-					children: [{ type: 'text', value: text } as Text]
+					children: [{ type: 'text', value: text } as Text],
+					// Keep the highlighted code element so the block can offer a source
+					// view that matches the app code blocks without re highlighting.
+					data: { sourceCode: codeElement } satisfies DiagramPreData
 				};

 				(parent.children as ElementContent[])[index] = pre;
@@ -0,0 +1,9 @@
+// Shared constants for diagram blocks (mermaid and svg) that toggle between a
+// rendered view and a source view. The wrapper carries the active mode, css
+// drives the visibility, the click handler only flips the attribute.
+
+export const DIAGRAM_VIEW_MODE_ATTR = 'data-view-mode';
+export const DIAGRAM_VIEW_RENDERED = 'rendered';
+export const DIAGRAM_VIEW_SOURCE = 'source';
+export const DIAGRAM_SOURCE_CLASS = 'diagram-source';
+export const TOGGLE_SOURCE_BTN_CLASS = 'toggle-source-btn';
@@ -39,3 +39,5 @@ export const MODALITY_LABELS = {
 export const COPY_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy-icon lucide-copy"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg>`;

 export const PREVIEW_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-eye lucide-eye-icon"><path d="M2.062 12.345a1 1 0 0 1 0-.69C3.5 7.73 7.36 5 12 5s8.5 2.73 9.938 6.655a1 1 0 0 1 0 .69C20.5 16.27 16.64 19 12 19s-8.5-2.73-9.938-6.655"/><circle cx="12" cy="12" r="3"/></svg>`;
+
+export const CODE_ICON_SVG = `<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-code lucide-code-icon"><path d="m16 18 6-6-6-6"/><path d="m8 6-6 6 6 6"/></svg>`;
@@ -30,6 +30,7 @@ export * from './literal-html';
 export * from './markdown';
 export * from './mermaid-blocks';
 export * from './svg-blocks';
+export * from './diagram-blocks';
 export * from './max-bundle-size';
 export * from './mcp';
 export * from './mcp-form';
@@ -41,7 +41,7 @@ if (LLAMA_BUILD_BORINGSSL)
    set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")

    set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
-    set(BORINGSSL_VERSION "0.20260526.0" CACHE STRING "BoringSSL version")
+    set(BORINGSSL_VERSION "0.20260616.0" CACHE STRING "BoringSSL version")

    message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")
Author	SHA1	Message	Date
Alessandro de Oliveira Faria (A.K.A.CABELO)	74ade52741	vendor : update BoringSSL to 0.20260616.0 (#24693 )	2026-06-16 20:24:28 +02:00
Pascal	c1304d7b28	ui: add source toggle to mermaid and svg blocks (#24652 ) * ui: add source toggle to mermaid and svg blocks Add a toggle button next to copy and preview that switches a rendered mermaid or svg block to its source code and back. The button is shared by both block types and the rendered view stays the default. The source view reuses the code block scroll container and the highlighted code element captured at transform time, so it matches the app code blocks without highlighting again. Make tall diagrams scroll like text code blocks: safe centering keeps the diagram centered when it fits and falls back to start alignment when it overflows, so the top stays reachable instead of clipping above. Keep the block header opaque and layered above the scrolled diagram, and ignore header clicks in the zoom handler, so a button click never falls through to the zoom dialog. * ui: transparent diagram block header, address review from @allozaur	2026-06-16 14:14:22 +02:00
Oliver Simons	02810c7aa8	Fix and restrict NVFP4 edge-cases in llama-graph (#24331 ) * Move post-GEMM MUL required for dequant b4 lora and bias add see https://github.com/ggml-org/llama.cpp/pull/23484 : 1. For lora, I would presume we want fully dequantized values before doing the residuals, but this depends on how the LORAs were generated. Literature tells me LORA happens post-mul but pre-bias add https://github.com/ggml-org/llama.cpp/pull/8332 2. For ModelOPT, bias-add should happen on [fully-dequantized values](https://github.com/NVIDIA/Model-Optimizer/blob/b49f9b9e2d747af992d78a3aa7f10efe5a8847e1/modelopt/torch/quantization/backends/nvfp4_gemm.py#L59-L64) * Restrict build_ffn for NVFP4 to supported combinations	2026-06-16 11:52:38 +02:00
Ruixiang Wang	a1824902b5	spec: add backend sampling support for eagle3 (#24655 )	2026-06-16 12:05:52 +03:00
Winston Ma	32120c10e3	vulkan: prefer host-visible memory buffers on UMA devices (#22930 ) * implement UMA host-visible memory * update based on 0cc4m's suggestion	2026-06-16 09:36:52 +02:00