llama : use LLM_KV for quantization_version & file_type (#24802 )

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
arg: try fixing test-args-parser randomly fails (#24826 )
2026-06-20 20:57:40 +02:00 · 2026-06-20 20:07:01 +02:00 · 2026-06-20 19:45:27 +02:00 · 2026-06-20 23:08:59 +08:00 · 2026-06-20 15:34:47 +02:00 · 2026-06-20 05:54:42 -05:00
16 changed files with 220 additions and 46 deletions
@@ -4,20 +4,6 @@ ARG BUILD_DATE=N/A
 ARG APP_VERSION=N/A
 ARG APP_REVISION=N/A

-ARG NODE_VERSION=24
-
-FROM docker.io/node:$NODE_VERSION AS web
-
-ARG APP_VERSION
-
-WORKDIR /app/tools/ui
-
-COPY tools/ui/package.json tools/ui/package-lock.json ./
-RUN npm ci
-
-COPY tools/ui/ ./
-RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
-
 ### Build Llama.cpp stage
 FROM docker.io/gcc:${GCC_VERSION} AS build

@@ -34,8 +20,6 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
 WORKDIR /app
 COPY . .

-COPY --from=web /app/tools/ui/dist tools/ui/dist
-
 RUN --mount=type=cache,target=/root/.ccache \
    --mount=type=cache,target=/app/build \
    cmake -S . -B build -G Ninja \
@@ -11,7 +11,6 @@
 build*/

 tools/ui/node_modules/
-tools/ui/dist/

 models/*

@@ -58,6 +58,13 @@ jobs:
          git tag ${{ steps.srctag.outputs.name }} || exit 0
          git push origin ${{ steps.srctag.outputs.name }} || exit 0

+  build_ui:
+    name: Build UI
+    needs: create_tag
+    uses: ./.github/workflows/ui-build.yml
+    with:
+      hf_ui_version: ${{ needs.create_tag.outputs.source_tag }}
+
  prepare_matrices:
    name: Prepare Docker matrices
    runs-on: ubuntu-24.04
@@ -79,7 +86,7 @@ jobs:
          [
            { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04" },
            { "tag": "cpu", "dockerfile": ".devops/cpu.Dockerfile", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-arm" },
-            { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x" },
+            { "tag": "cpu", "dockerfile": ".devops/s390x.Dockerfile", "platforms": "linux/s390x", "full": true, "light": true, "server": true, "free_disk_space": false, "runs_on": "ubuntu-24.04-s390x", "prebuilt_ui": true },
            { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
            { "tag": "cuda cuda12", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "12.8.1", "platforms": "linux/arm64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04-arm" },
            { "tag": "cuda13", "dockerfile": ".devops/cuda.Dockerfile", "cuda_version": "13.3.0", "platforms": "linux/amd64", "full": true, "light": true, "server": true, "free_disk_space": true, "runs_on": "ubuntu-24.04" },
@@ -135,7 +142,7 @@ jobs:

  push_to_registry:
    name: Push Docker image to Docker Registry
-    needs: [prepare_matrices, create_tag]
+    needs: [prepare_matrices, create_tag, build_ui]

    runs-on: ${{ matrix.config.runs_on }}
    strategy:
@@ -150,6 +157,13 @@ jobs:
          fetch-depth: 0
          ref: ${{ needs.create_tag.outputs.source_tag }}

+      - name: Download prebuilt UI
+        if: ${{ matrix.config.prebuilt_ui == true }}
+        uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8
+        with:
+          name: ui-build
+          path: tools/ui/dist
+
      - name: Set up QEMU
        if: ${{ contains(matrix.config.platforms, 'linux/amd64') }}
        uses: docker/setup-qemu-action@ce360397dd3f832beb865e1373c09c0e9f86d70a # v4
@@ -1627,6 +1627,7 @@ jobs:
            **Windows:**
            - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
            - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
+            - [Windows arm64 (OpenCL Adreno)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-opencl-adreno-arm64.zip)
            - [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip)
            - [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.3-x64.zip) - [CUDA 13.3 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.3-x64.zip)
            - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
@@ -924,8 +924,8 @@ static utf8_argv make_utf8_argv() {
 bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
 #ifdef _WIN32
    auto utf8 = make_utf8_argv();
-    if (!utf8.ptrs.empty()) {
-        argc = static_cast<int>(utf8.buf.size());
+    // repair argv only when it matches the process command line
+    if (static_cast<int>(utf8.buf.size()) == argc) {
        argv = utf8.ptrs.data();
    }
 #endif
@@ -2897,7 +2897,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
            params.server_tools = parse_csv_row(value);
        }
    ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
-        add_opt(common_arg(
+    add_opt(common_arg(
        {"-ag", "--agent"},
        {"-no-ag", "--no-agent"},
        "whether to enable CORS proxy and all built-in tools - do not enable in untrusted environments (default: disabled)",
@@ -932,8 +932,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::

    // copy the KV pairs from the input file
    gguf_set_kv     (ctx_out.get(), ml.metadata);
-    gguf_set_val_u32(ctx_out.get(), "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV
-    gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
+    gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION).c_str(), GGML_QNT_VERSION);
+    gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_FILE_TYPE).c_str(), ftype);

    // Remove split metadata
    gguf_remove_key(ctx_out.get(), ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
@@ -10,7 +10,7 @@
 #undef NDEBUG
 #include <cassert>

-int main(void) {
+static void test(void) {
    common_params params;

    printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
@@ -210,3 +210,13 @@ int main(void) {

    printf("test-arg-parser: all tests OK\n\n");
 }
+
+int main(void) {
+    try {
+        test();
+    } catch (std::exception & e) {
+        fprintf(stderr, "test-arg-parser: exception: %s\n", e.what());
+        return 1;
+    }
+    return 0;
+}
@@ -7,9 +7,18 @@
 #include <unordered_set>
 #include <list>
 #include <map>
+#include <algorithm>
+#include <cctype>

 #include "server-http.h"

+static std::string proxy_header_to_lower(std::string header) {
+    std::transform(header.begin(), header.end(), header.begin(), [](unsigned char c) {
+        return std::tolower(c);
+    });
+    return header;
+}
+
 static server_http_res_ptr proxy_request(const server_http_req & req, std::string method) {
    std::string target_url = req.get_param("url");
    common_http_url parsed_url = common_http_parse_url(target_url);
@@ -33,11 +42,18 @@ static server_http_res_ptr proxy_request(const server_http_req & req, std::strin
    SRV_INF("proxying %s request to %s://%s:%i%s\n", method.c_str(), parsed_url.scheme.c_str(), parsed_url.host.c_str(), parsed_url.port, parsed_url.path.c_str());

    std::map<std::string, std::string> headers;
+    const std::string proxy_header_prefix = "x-llama-server-proxy-header-";
    for (auto [key, value] : req.headers) {
-        auto new_key = key;
-        if (string_starts_with(new_key, "x-proxy-header-")) {
-            string_replace_all(new_key, "x-proxy-header-", "");
+        const std::string lowered_key = proxy_header_to_lower(key);
+        if (!string_starts_with(lowered_key, proxy_header_prefix)) {
+            continue;
        }
+
+        auto new_key = key.substr(proxy_header_prefix.size());
+        if (new_key.empty()) {
+            continue;
+        }
+
        headers[new_key] = value;
    }

@@ -1,6 +1,8 @@
 import pytest
 from openai import OpenAI
 from utils import *
+import threading
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer

 server = ServerPreset.tinyllama2()

@@ -105,6 +107,49 @@ def test_cors_options(origin: str, cors_header: str, cors_header_value: str):
    assert res.headers[cors_header] == cors_header_value


+def test_cors_proxy_only_forwards_explicit_proxy_headers():
+    class CaptureHeadersHandler(BaseHTTPRequestHandler):
+        def do_GET(self):
+            self.server.captured_headers = dict(self.headers)
+            self.send_response(200)
+            self.end_headers()
+            self.wfile.write(b"ok")
+
+        def log_message(self, format, *args):
+            pass
+
+    target = ThreadingHTTPServer(("127.0.0.1", 0), CaptureHeadersHandler)
+    target.captured_headers = {}
+    target_thread = threading.Thread(target=target.serve_forever, daemon=True)
+    target_thread.start()
+
+    try:
+        server = ServerPreset.tinyllama2()
+        server.api_key = TEST_API_KEY
+        server.ui_mcp_proxy = True
+        server.start()
+
+        res = server.make_request("GET", f"/cors-proxy?url=http://127.0.0.1:{target.server_port}/capture", headers={
+            "Authorization": f"Bearer {TEST_API_KEY}",
+            "Proxy-Authorization": "Basic secret",
+            "X-Api-Key": TEST_API_KEY,
+            "Cookie": "session=secret",
+            "x-llama-server-proxy-header-accept": "application/json",
+            "x-llama-server-proxy-header-authorization": "Bearer explicit",
+        })
+
+        assert res.status_code == 200
+        captured = {key.lower(): value for key, value in target.captured_headers.items()}
+        assert captured["accept"] == "application/json"
+        assert captured["authorization"] == "Bearer explicit"
+        assert "proxy-authorization" not in captured
+        assert "x-api-key" not in captured
+        assert "cookie" not in captured
+    finally:
+        target.shutdown()
+        target.server_close()
+
+
@pytest.mark.parametrize(
    "media_path, image_url, success",
    [
@@ -51,6 +51,9 @@ export const EXPECTED_THEMED_ICON_PAIR_COUNT = 2;
 /** CORS proxy URL query parameter name */
 export const CORS_PROXY_URL_PARAM = 'url';

+/** Header prefix for headers that should be forwarded by the CORS proxy */
+export const CORS_PROXY_HEADER_PREFIX = 'x-llama-server-proxy-header-';
+
 /** Number of trailing characters to keep visible when partially redacting mcp-session-id */
 export const MCP_SESSION_ID_VISIBLE_CHARS = 5;

@@ -16,6 +16,7 @@ import {
 	DEFAULT_MCP_CONFIG,
 	DEFAULT_CLIENT_VERSION,
 	DEFAULT_IMAGE_MIME_TYPE,
+	CORS_PROXY_HEADER_PREFIX,
 	MCP_PARTIAL_REDACT_HEADERS,
 	CORS_PROXY_ENDPOINT
 } from '$lib/constants';
@@ -133,6 +134,20 @@ export class MCPService {
 		return details;
 	}

+	private static addRequestHeaders(
+		requestHeaders: Headers,
+		headers: HeadersInit,
+		useProxy: boolean
+	) {
+		for (const [key, value] of new Headers(headers).entries()) {
+			const proxiedKey =
+				useProxy && !key.toLowerCase().startsWith(CORS_PROXY_HEADER_PREFIX)
+					? `${CORS_PROXY_HEADER_PREFIX}${key}`
+					: key;
+			requestHeaders.set(proxiedKey, value);
+		}
+	}
+
 	private static summarizeError(error: unknown): Record<string, unknown> {
 		if (error instanceof Error) {
 			return {
@@ -271,15 +286,11 @@ export class MCPService {
 				const requestHeaders = new Headers(baseInit.headers);

 				if (typeof Request !== 'undefined' && input instanceof Request) {
-					for (const [key, value] of input.headers.entries()) {
-						requestHeaders.set(key, value);
-					}
+					this.addRequestHeaders(requestHeaders, input.headers, useProxy);
 				}

 				if (init?.headers) {
-					for (const [key, value] of new Headers(init.headers).entries()) {
-						requestHeaders.set(key, value);
-					}
+					this.addRequestHeaders(requestHeaders, init.headers, useProxy);
 				}

 				const request = this.createDiagnosticRequestDetails(
@@ -1,5 +1,5 @@
 import { config } from '$lib/stores/settings.svelte';
-import { REDACTED_HEADERS } from '$lib/constants';
+import { CORS_PROXY_HEADER_PREFIX, REDACTED_HEADERS } from '$lib/constants';
 import { redactValue } from './redact';

 /**
@@ -52,11 +52,20 @@ export function sanitizeHeaders(

 	for (const [key, value] of normalized.entries()) {
 		const normalizedKey = key.toLowerCase();
-		const partialChars = partialRedactHeaders?.get(normalizedKey);
+		const unproxiedKey = normalizedKey.startsWith(CORS_PROXY_HEADER_PREFIX)
+			? normalizedKey.slice(CORS_PROXY_HEADER_PREFIX.length)
+			: normalizedKey;
+		const partialChars =
+			partialRedactHeaders?.get(normalizedKey) ?? partialRedactHeaders?.get(unproxiedKey);

 		if (partialChars !== undefined) {
 			sanitized[key] = redactValue(value, partialChars);
-		} else if (REDACTED_HEADERS.has(normalizedKey) || redactedHeaders.has(normalizedKey)) {
+		} else if (
+			REDACTED_HEADERS.has(normalizedKey) ||
+			REDACTED_HEADERS.has(unproxiedKey) ||
+			redactedHeaders.has(normalizedKey) ||
+			redactedHeaders.has(unproxiedKey)
+		) {
 			sanitized[key] = redactValue(value);
 		} else {
 			sanitized[key] = value;
@@ -3,7 +3,11 @@
 */

 import { base } from '$app/paths';
-import { CORS_PROXY_ENDPOINT, CORS_PROXY_URL_PARAM } from '$lib/constants';
+import {
+	CORS_PROXY_ENDPOINT,
+	CORS_PROXY_HEADER_PREFIX,
+	CORS_PROXY_URL_PARAM
+} from '$lib/constants';

 /**
 * Build a proxied URL that routes through llama-server's CORS proxy.
@@ -28,7 +32,7 @@ export function buildProxiedHeaders(headers: Record<string, string>): Record<str
 	const proxiedHeaders: Record<string, string> = {};

 	for (const [key, value] of Object.entries(headers)) {
-		proxiedHeaders[`x-proxy-header-${key}`] = value;
+		proxiedHeaders[`${CORS_PROXY_HEADER_PREFIX}${key}`] = value;
 	}

 	return proxiedHeaders;
@@ -39,8 +39,8 @@ test.describe('PWA Service Worker', () => {
 		const swContent = await swResponse.text();

 		// Precache contains SvelteKit content-hashed bundle paths
-		expect(swContent).toMatch(/"_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"/);
-		expect(swContent).toMatch(/"_app\/immutable\/assets\/bundle\.[a-zA-Z0-9-]+\.css"/);
+		expect(swContent).toMatch(/"_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"/);
+		expect(swContent).toMatch(/"_app\/immutable\/assets\/bundle\.[a-zA-Z0-9_-]+\.css"/);
 		expect(swContent).toMatch(/"manifest\.webmanifest"/);
 		expect(swContent).toMatch(/"_app\/version\.json"/);
 		expect(swContent).toMatch(/NavigationRoute/);
@@ -99,8 +99,8 @@ test.describe('PWA Service Worker', () => {
 		const html = await response.text();

 		// SvelteKit outputs content-hashed bundle names in _app/immutable/
-		expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"/);
-		expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/assets\/bundle\.[a-zA-Z0-9-]+\.css"/);
-		expect(html).toMatch(/import\("(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"\)/);
+		expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"/);
+		expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/assets\/bundle\.[a-zA-Z0-9_-]+\.css"/);
+		expect(html).toMatch(/import\("(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"\)/);
 	});
 });
@@ -3,6 +3,7 @@ import { Client } from '@modelcontextprotocol/sdk/client';
 import { MCPService } from '$lib/services/mcp.service';
 import { MCPConnectionPhase, MCPTransportType } from '$lib/enums';
 import type { MCPConnectionLog, MCPServerConfig } from '$lib/types';
+import { CORS_PROXY_HEADER_PREFIX } from '$lib/constants';

 type DiagnosticFetchFactory = (
 	serverName: string,
@@ -16,11 +17,12 @@ type DiagnosticFetchFactory = (
 const createDiagnosticFetch = (
 	config: MCPServerConfig,
 	onLog?: (log: MCPConnectionLog) => void,
-	baseInit: RequestInit = {}
+	baseInit: RequestInit = {},
+	useProxy = false
 ) =>
 	(
 		MCPService as unknown as { createDiagnosticFetch: DiagnosticFetchFactory }
-	).createDiagnosticFetch('test-server', config, baseInit, new URL(config.url), false, onLog);
+	).createDiagnosticFetch('test-server', config, baseInit, new URL(config.url), useProxy, onLog);

 describe('MCPService', () => {
 	afterEach(() => {
@@ -94,6 +96,64 @@ describe('MCPService', () => {
 		});
 	});

+	it('wraps dynamic request headers when using the CORS proxy', async () => {
+		const logs: MCPConnectionLog[] = [];
+		const proxiedAuthToken = `${CORS_PROXY_HEADER_PREFIX}x-auth-token`;
+		const proxiedContentType = `${CORS_PROXY_HEADER_PREFIX}content-type`;
+		const proxiedSessionId = `${CORS_PROXY_HEADER_PREFIX}mcp-session-id`;
+		const response = new Response('{}', {
+			status: 200,
+			headers: { 'content-type': 'application/json' }
+		});
+		const fetchMock = vi.fn().mockResolvedValue(response);
+
+		vi.stubGlobal('fetch', fetchMock);
+
+		const config: MCPServerConfig = {
+			url: 'https://example.com/mcp',
+			transport: MCPTransportType.STREAMABLE_HTTP,
+			useProxy: true
+		};
+
+		const controller = createDiagnosticFetch(
+			config,
+			(log) => logs.push(log),
+			{
+				headers: {
+					authorization: 'Bearer llama-server-key',
+					[proxiedAuthToken]: 'target-token'
+				}
+			},
+			true
+		);
+
+		await controller.fetch('http://localhost:8080/cors-proxy?url=https%3A%2F%2Fexample.com%2Fmcp', {
+			method: 'POST',
+			headers: {
+				'content-type': 'application/json',
+				'mcp-session-id': 'session-request-12345'
+			},
+			body: '{}'
+		});
+
+		const sentHeaders = fetchMock.mock.calls[0]?.[1]?.headers as Headers;
+		expect(sentHeaders.get('authorization')).toBe('Bearer llama-server-key');
+		expect(sentHeaders.get(proxiedAuthToken)).toBe('target-token');
+		expect(sentHeaders.get(proxiedContentType)).toBe('application/json');
+		expect(sentHeaders.get(proxiedSessionId)).toBe('session-request-12345');
+		expect(sentHeaders.has('content-type')).toBe(false);
+		expect(sentHeaders.has('mcp-session-id')).toBe(false);
+		expect(logs[0].details).toMatchObject({
+			request: {
+				headers: {
+					authorization: '[redacted]',
+					[proxiedAuthToken]: '[redacted]',
+					[proxiedSessionId]: '....12345'
+				}
+			}
+		});
+	});
+
 	it('partially redacts mcp-session-id in diagnostic request and response logs', async () => {
 		const logs: MCPConnectionLog[] = [];
 		const response = new Response('{}', {
@@ -1,5 +1,6 @@
 import { describe, expect, it } from 'vitest';
 import { sanitizeHeaders } from '$lib/utils/api-headers';
+import { CORS_PROXY_HEADER_PREFIX } from '$lib/constants';

 describe('sanitizeHeaders', () => {
 	it('returns empty object for undefined input', () => {
@@ -52,4 +53,21 @@ describe('sanitizeHeaders', () => {
 		const result = sanitizeHeaders(headers, ['X-CUSTOM-TOKEN']);
 		expect(result['x-custom-token']).toBe('[redacted]');
 	});
+
+	it('redacts proxied sensitive and custom target headers', () => {
+		const proxiedAuthorization = `${CORS_PROXY_HEADER_PREFIX}authorization`;
+		const proxiedSessionId = `${CORS_PROXY_HEADER_PREFIX}mcp-session-id`;
+		const proxiedVendorKey = `${CORS_PROXY_HEADER_PREFIX}x-vendor-key`;
+		const headers = new Headers({
+			[proxiedAuthorization]: 'Bearer secret',
+			[proxiedSessionId]: 'session-12345',
+			[proxiedVendorKey]: 'vendor-secret'
+		});
+		const partial = new Map([['mcp-session-id', 5]]);
+		const result = sanitizeHeaders(headers, ['x-vendor-key'], partial);
+
+		expect(result[proxiedAuthorization]).toBe('[redacted]');
+		expect(result[proxiedSessionId]).toBe('....12345');
+		expect(result[proxiedVendorKey]).toBe('[redacted]');
+	});
 });
Author	SHA1	Message	Date
Adrien Gallouët	84de01a1f1	llama : use LLM_KV for quantization_version & file_type (#24802 ) Signed-off-by: Adrien Gallouët <angt@huggingface.co>	2026-06-20 20:07:01 +02:00
Xuan-Son Nguyen	75f460ac28	arg: try fixing test-args-parser randomly fails (#24826 ) * arg: try fixing test-args-parser randomly fails * return ref * try triggering the workflow * exception wrapper * wip * test * test 2 * arg: guard win32 utf8 argv override make_utf8_argv rebuilds argv from GetCommandLineW to fix utf8 handling of non ascii arguments on windows. the override runs unconditionally inside common_params_parse, so it also clobbers a programmatic argv passed by a caller. test-arg-parser builds a synthetic argv but then sees the real process command line instead, the model argument is never parsed, and the assert that expects success aborts via fastfail (0xC0000409). this shows up as a random failure in the openvino windows workflow. only override argv when its length matches the caller argc, so the utf8 repair still applies to real binaries while a programmatic argv stays intact. --------- Co-authored-by: Pascal <admin@serveurperso.com>	2026-06-20 19:45:27 +02:00
Muhammad Salem	8452824611	release: add missing link for win opencl adreno arm64 (#24809 )	2026-06-20 23:08:59 +08:00
Matti4	e27f308597	server: avoid forwarding auth headers in CORS proxy (#24373 ) * server: avoid forwarding auth headers in CORS proxy * format * fix test * fix e2e test --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>	2026-06-20 15:34:47 +02:00
Aldehir Rojas	67e9fd3b74	docker : prebuild web UI for s390x build [no release] (#24829 )	2026-06-20 05:54:42 -05:00