cmake : add utf8 compilation options for msvc (#17682 )

Server: Change Invalid Schema from Server Error (500) to User Error (400) (#17572 )
* Make invalid schema a user error (400) * Move invalid_argument exception handler to ex_wrapper * Fix test * Simplify test back to original pattern
2026-07-02 18:47:43 +02:00 · 2025-12-02 19:50:57 +02:00 · 2025-12-02 17:33:50 +01:00 · 2025-12-02 18:21:11 +02:00 · 2025-12-02 20:35:23 +08:00 · 2025-12-02 12:52:45 +01:00
13 changed files with 110 additions and 102 deletions
@@ -7,7 +7,7 @@
 /ci/                                    @ggerganov
 /cmake/                                 @ggerganov
 /common/CMakeLists.txt                  @ggerganov
-/common/arg.*                           @ggerganov @ericcurtin
+/common/arg.*                           @ggerganov
 /common/base64.hpp.*                    @ggerganov
 /common/build-info.*                    @ggerganov
 /common/common.*                        @ggerganov
@@ -87,8 +87,7 @@
 /tools/perplexity/                      @ggerganov
 /tools/quantize/                        @ggerganov
 /tools/rpc/                             @rgerganov
-/tools/run/                             @ericcurtin
-/tools/server/*                         @ngxson @ggerganov @ericcurtin # no subdir
+/tools/server/*                         @ngxson @ggerganov # no subdir
 /tools/server/webui/                    @allozaur
 /tools/tokenize/                        @ggerganov
 /tools/tts/                             @ggerganov
@@ -163,7 +163,7 @@ common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::strin
    if (tool_choice == "required") {
        return COMMON_CHAT_TOOL_CHOICE_REQUIRED;
    }
-    throw std::runtime_error("Invalid tool_choice: " + tool_choice);
+    throw std::invalid_argument("Invalid tool_choice: " + tool_choice);
 }

 bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates) {
@@ -186,17 +186,17 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
    try {

        if (!messages.is_array()) {
-            throw std::runtime_error("Expected 'messages' to be an array, got " + messages.dump());
+            throw std::invalid_argument("Expected 'messages' to be an array, got " + messages.dump());
        }

        for (const auto & message : messages) {
            if (!message.is_object()) {
-                throw std::runtime_error("Expected 'message' to be an object, got " + message.dump());
+                throw std::invalid_argument("Expected 'message' to be an object, got " + message.dump());
            }

            common_chat_msg msg;
            if (!message.contains("role")) {
-                throw std::runtime_error("Missing 'role' in message: " + message.dump());
+                throw std::invalid_argument("Missing 'role' in message: " + message.dump());
            }
            msg.role = message.at("role");

@@ -209,11 +209,11 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                } else if (content.is_array()) {
                    for (const auto & part : content) {
                        if (!part.contains("type")) {
-                            throw std::runtime_error("Missing content part type: " + part.dump());
+                            throw std::invalid_argument("Missing content part type: " + part.dump());
                        }
                        const auto & type = part.at("type");
                        if (type != "text") {
-                            throw std::runtime_error("Unsupported content part type: " + type.dump());
+                            throw std::invalid_argument("Unsupported content part type: " + type.dump());
                        }
                        common_chat_msg_content_part msg_part;
                        msg_part.type = type;
@@ -221,25 +221,25 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                        msg.content_parts.push_back(msg_part);
                    }
                } else if (!content.is_null()) {
-                    throw std::runtime_error("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
+                    throw std::invalid_argument("Invalid 'content' type: expected string or array, got " + content.dump() + " (ref: https://github.com/ggml-org/llama.cpp/issues/8367)");
                }
            }
            if (has_tool_calls) {
                for (const auto & tool_call : message.at("tool_calls")) {
                    common_chat_tool_call tc;
                    if (!tool_call.contains("type")) {
-                        throw std::runtime_error("Missing tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call type: " + tool_call.dump());
                    }
                    const auto & type = tool_call.at("type");
                    if (type != "function") {
-                        throw std::runtime_error("Unsupported tool call type: " + tool_call.dump());
+                        throw std::invalid_argument("Unsupported tool call type: " + tool_call.dump());
                    }
                    if (!tool_call.contains("function")) {
-                        throw std::runtime_error("Missing tool call function: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call function: " + tool_call.dump());
                    }
                    const auto & fc = tool_call.at("function");
                    if (!fc.contains("name")) {
-                        throw std::runtime_error("Missing tool call name: " + tool_call.dump());
+                        throw std::invalid_argument("Missing tool call name: " + tool_call.dump());
                    }
                    tc.name = fc.at("name");
                    tc.arguments = fc.at("arguments");
@@ -250,7 +250,7 @@ std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const json & messa
                }
            }
            if (!has_content && !has_tool_calls) {
-                throw std::runtime_error("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
+                throw std::invalid_argument("Expected 'content' or 'tool_calls' (ref: https://github.com/ggml-org/llama.cpp/issues/8367 & https://github.com/ggml-org/llama.cpp/issues/12279)");
            }
            if (message.contains("reasoning_content")) {
                msg.reasoning_content = message.at("reasoning_content");
@@ -353,18 +353,18 @@ std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const json & too
    try {
        if (!tools.is_null()) {
            if (!tools.is_array()) {
-                throw std::runtime_error("Expected 'tools' to be an array, got " + tools.dump());
+                throw std::invalid_argument("Expected 'tools' to be an array, got " + tools.dump());
            }
            for (const auto & tool : tools) {
                if (!tool.contains("type")) {
-                    throw std::runtime_error("Missing tool type: " + tool.dump());
+                    throw std::invalid_argument("Missing tool type: " + tool.dump());
                }
                const auto & type = tool.at("type");
                if (!type.is_string() || type != "function") {
-                    throw std::runtime_error("Unsupported tool type: " + tool.dump());
+                    throw std::invalid_argument("Unsupported tool type: " + tool.dump());
                }
                if (!tool.contains("function")) {
-                    throw std::runtime_error("Missing tool function: " + tool.dump());
+                    throw std::invalid_argument("Missing tool function: " + tool.dump());
                }

                const auto & function = tool.at("function");
@@ -974,7 +974,7 @@ public:

    void check_errors() {
        if (!_errors.empty()) {
-            throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
+            throw std::invalid_argument("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
        }
        if (!_warnings.empty()) {
            fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
@@ -2842,6 +2842,10 @@ class Mistral3Model(LlamaModel):
            self.gguf_writer.add_attn_temperature_scale(rope_params["llama_4_scaling_beta"])

    def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
+        # TODO: probably not worth supporting quantized weight, as official BF16 is also available
+        if name.endswith("weight_scale_inv"):
+            raise ValueError("This is a quantized weight, please use BF16 weight instead")
+
        name = name.replace("language_model.", "")
        if "multi_modal_projector" in name or "vision_tower" in name:
            return []
@@ -408,62 +408,67 @@ if (MSVC)
        /wd4996  # Disable POSIX deprecation warnings
        /wd4702  # Unreachable code warnings
    )
-    function(disable_msvc_warnings target_name)
+    set(MSVC_COMPILE_OPTIONS
+        "$<$<COMPILE_LANGUAGE:C>:/utf-8>"
+        "$<$<COMPILE_LANGUAGE:CXX>:/utf-8>"
+    )
+    function(configure_msvc_target target_name)
        if(TARGET ${target_name})
            target_compile_options(${target_name} PRIVATE ${MSVC_WARNING_FLAGS})
+            target_compile_options(${target_name} PRIVATE ${MSVC_COMPILE_OPTIONS})
        endif()
    endfunction()

-    disable_msvc_warnings(ggml-base)
-    disable_msvc_warnings(ggml)
-    disable_msvc_warnings(ggml-cpu)
-    disable_msvc_warnings(ggml-cpu-x64)
-    disable_msvc_warnings(ggml-cpu-sse42)
-    disable_msvc_warnings(ggml-cpu-sandybridge)
-    disable_msvc_warnings(ggml-cpu-haswell)
-    disable_msvc_warnings(ggml-cpu-skylakex)
-    disable_msvc_warnings(ggml-cpu-icelake)
-    disable_msvc_warnings(ggml-cpu-alderlake)
+    configure_msvc_target(ggml-base)
+    configure_msvc_target(ggml)
+    configure_msvc_target(ggml-cpu)
+    configure_msvc_target(ggml-cpu-x64)
+    configure_msvc_target(ggml-cpu-sse42)
+    configure_msvc_target(ggml-cpu-sandybridge)
+    configure_msvc_target(ggml-cpu-haswell)
+    configure_msvc_target(ggml-cpu-skylakex)
+    configure_msvc_target(ggml-cpu-icelake)
+    configure_msvc_target(ggml-cpu-alderlake)

    if (GGML_BUILD_EXAMPLES)
-        disable_msvc_warnings(common-ggml)
-        disable_msvc_warnings(common)
+        configure_msvc_target(common-ggml)
+        configure_msvc_target(common)

-        disable_msvc_warnings(mnist-common)
-        disable_msvc_warnings(mnist-eval)
-        disable_msvc_warnings(mnist-train)
+        configure_msvc_target(mnist-common)
+        configure_msvc_target(mnist-eval)
+        configure_msvc_target(mnist-train)

-        disable_msvc_warnings(gpt-2-ctx)
-        disable_msvc_warnings(gpt-2-alloc)
-        disable_msvc_warnings(gpt-2-backend)
-        disable_msvc_warnings(gpt-2-sched)
-        disable_msvc_warnings(gpt-2-quantize)
-        disable_msvc_warnings(gpt-2-batched)
+        configure_msvc_target(gpt-2-ctx)
+        configure_msvc_target(gpt-2-alloc)
+        configure_msvc_target(gpt-2-backend)
+        configure_msvc_target(gpt-2-sched)
+        configure_msvc_target(gpt-2-quantize)
+        configure_msvc_target(gpt-2-batched)

-        disable_msvc_warnings(gpt-j)
-        disable_msvc_warnings(gpt-j-quantize)
+        configure_msvc_target(gpt-j)
+        configure_msvc_target(gpt-j-quantize)

-        disable_msvc_warnings(magika)
-        disable_msvc_warnings(yolov3-tiny)
-        disable_msvc_warnings(sam)
+        configure_msvc_target(magika)
+        configure_msvc_target(yolov3-tiny)
+        configure_msvc_target(sam)

-        disable_msvc_warnings(simple-ctx)
-        disable_msvc_warnings(simple-backend)
+        configure_msvc_target(simple-ctx)
+        configure_msvc_target(simple-backend)
    endif()

    if (GGML_BUILD_TESTS)
-        disable_msvc_warnings(test-mul-mat)
-        disable_msvc_warnings(test-arange)
-        disable_msvc_warnings(test-backend-ops)
-        disable_msvc_warnings(test-cont)
-        disable_msvc_warnings(test-conv-transpose)
-        disable_msvc_warnings(test-conv-transpose-1d)
-        disable_msvc_warnings(test-conv1d)
-        disable_msvc_warnings(test-conv2d)
-        disable_msvc_warnings(test-conv2d-dw)
-        disable_msvc_warnings(test-customop)
-        disable_msvc_warnings(test-dup)
-        disable_msvc_warnings(test-opt)
-        disable_msvc_warnings(test-pool)
+        configure_msvc_target(test-mul-mat)
+        configure_msvc_target(test-arange)
+        configure_msvc_target(test-backend-ops)
+        configure_msvc_target(test-cont)
+        configure_msvc_target(test-conv-transpose)
+        configure_msvc_target(test-conv-transpose-1d)
+        configure_msvc_target(test-conv1d)
+        configure_msvc_target(test-conv2d)
+        configure_msvc_target(test-conv2d-dw)
+        configure_msvc_target(test-customop)
+        configure_msvc_target(test-dup)
+        configure_msvc_target(test-opt)
+        configure_msvc_target(test-pool)
    endif ()
 endif()
@@ -1240,10 +1240,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
                                tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
                                ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
                            }
-                            if (sched->n_copies > 1) {
-                                ggml_set_input(tensor_copy);
-                                ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
-                            }
+                            ggml_set_input(tensor_copy);
+                            ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
                            tensor_id_copy(src_id, src_backend_id, c) = tensor_copy;
                            SET_CAUSE(tensor_copy, "4.cpy");
                        }
@@ -2564,6 +2564,10 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, const ggml_ten
            return true;
        case GGML_OP_OUT_PROD:
            {
+#ifdef ASCEND_310P
+                // Ger is not supported on 310p device
+                return false;
+#endif
                switch (op->src[0]->type) {
                    case GGML_TYPE_F16:
                    case GGML_TYPE_F32:
@@ -683,22 +683,14 @@ bool ggml_is_numa(void) {
 }

 #if defined(__ARM_ARCH)
-
-#if defined(__linux__) && defined(__aarch64__)
-#include <sys/auxv.h>
-#endif
-
-static void ggml_init_arm_arch_features(void) {
 #if defined(__aarch64__) && defined(__ARM_FEATURE_SVE)
-#if defined(__linux__)
-    ggml_arm_arch_features.sve_cnt = PR_SVE_VL_LEN_MASK & prctl(PR_SVE_GET_VL);
-#else
-    // TODO: add support of SVE for non-linux systems
-#error "TODO: SVE is not supported on this platform. To use SVE, sve_cnt needs to be initialized here."
-#endif
-#endif
+#include <arm_sve.h>
+static void ggml_init_arm_arch_features(void) {
+    ggml_arm_arch_features.sve_cnt = svcntb();
 }
-
+#else
+static void ggml_init_arm_arch_features(void) {}
+#endif
 #endif // __ARM_ARCH

 struct ggml_tensor * ggml_new_i32(struct ggml_context * ctx, int32_t value) {
@@ -485,7 +485,7 @@ struct llama_mlock::impl {
        if (suggest && getrlimit(RLIMIT_MEMLOCK, &lock_limit)) {
            suggest = false;
        }
-        if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) {
+        if (suggest && ((uint64_t)lock_limit.rlim_max > (uint64_t)lock_limit.rlim_cur + size)) {
            suggest = false;
        }
 #endif
@@ -1375,7 +1375,7 @@ int main() {
        try {
            tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
            tc.verify_status(SUCCESS);
-        } catch (const std::runtime_error & ex) {
+        } catch (const std::invalid_argument & ex) {
            fprintf(stderr, "Error: %s\n", ex.what());
            tc.verify_status(FAILURE);
        }
@@ -819,26 +819,26 @@ json oaicompat_chat_params_parse(
            auto schema_wrapper = json_value(response_format, "json_schema", json::object());
            json_schema = json_value(schema_wrapper, "schema", json::object());
        } else if (!response_type.empty() && response_type != "text") {
-            throw std::runtime_error("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
+            throw std::invalid_argument("response_format type must be one of \"text\" or \"json_object\", but got: " + response_type);
        }
    }

    // get input files
    if (!body.contains("messages")) {
-        throw std::runtime_error("'messages' is required");
+        throw std::invalid_argument("'messages' is required");
    }
    json & messages = body.at("messages");
    if (!messages.is_array()) {
-        throw std::runtime_error("Expected 'messages' to be an array");
+        throw std::invalid_argument("Expected 'messages' to be an array");
    }
    for (auto & msg : messages) {
        std::string role = json_value(msg, "role", std::string());
        if (role != "assistant" && !msg.contains("content")) {
-            throw std::runtime_error("All non-assistant messages must contain 'content'");
+            throw std::invalid_argument("All non-assistant messages must contain 'content'");
        }
        if (role == "assistant") {
            if (!msg.contains("content") && !msg.contains("tool_calls")) {
-                throw std::runtime_error("Assistant message must contain either 'content' or 'tool_calls'!");
+                throw std::invalid_argument("Assistant message must contain either 'content' or 'tool_calls'!");
            }
            if (!msg.contains("content")) {
                continue; // avoid errors with no content
@@ -850,7 +850,7 @@ json oaicompat_chat_params_parse(
        }

        if (!content.is_array()) {
-            throw std::runtime_error("Expected 'content' to be a string or an array");
+            throw std::invalid_argument("Expected 'content' to be a string or an array");
        }

        for (auto & p : content) {
@@ -884,11 +884,11 @@ json oaicompat_chat_params_parse(
                    // try to decode base64 image
                    std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
                    if (parts.size() != 2) {
-                        throw std::runtime_error("Invalid image_url.url value");
+                        throw std::invalid_argument("Invalid image_url.url value");
                    } else if (!string_starts_with(parts[0], "data:image/")) {
-                        throw std::runtime_error("Invalid image_url.url format: " + parts[0]);
+                        throw std::invalid_argument("Invalid image_url.url format: " + parts[0]);
                    } else if (!string_ends_with(parts[0], "base64")) {
-                        throw std::runtime_error("image_url.url must be base64 encoded");
+                        throw std::invalid_argument("image_url.url must be base64 encoded");
                    } else {
                        auto base64_data = parts[1];
                        auto decoded_data = base64_decode(base64_data);
@@ -911,7 +911,7 @@ json oaicompat_chat_params_parse(
                std::string format = json_value(input_audio, "format", std::string());
                // while we also support flac, we don't allow it here so we matches the OAI spec
                if (format != "wav" && format != "mp3") {
-                    throw std::runtime_error("input_audio.format must be either 'wav' or 'mp3'");
+                    throw std::invalid_argument("input_audio.format must be either 'wav' or 'mp3'");
                }
                auto decoded_data = base64_decode(data); // expected to be base64 encoded
                out_files.push_back(decoded_data);
@@ -922,7 +922,7 @@ json oaicompat_chat_params_parse(
                p.erase("input_audio");

            } else if (type != "text") {
-                throw std::runtime_error("unsupported content[].type");
+                throw std::invalid_argument("unsupported content[].type");
            }
        }
    }
@@ -940,7 +940,7 @@ json oaicompat_chat_params_parse(
    inputs.enable_thinking       = opt.enable_thinking;
    if (!inputs.tools.empty() && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
        if (body.contains("grammar")) {
-            throw std::runtime_error("Cannot use custom grammar constraints with tools.");
+            throw std::invalid_argument("Cannot use custom grammar constraints with tools.");
        }
        llama_params["parse_tool_calls"] = true;
    }
@@ -959,7 +959,7 @@ json oaicompat_chat_params_parse(
    } else if (enable_thinking_kwarg == "false") {
        inputs.enable_thinking = false;
    } else if (!enable_thinking_kwarg.empty() && enable_thinking_kwarg[0] == '"') {
-        throw std::runtime_error("invalid type for \"enable_thinking\" (expected boolean, got string)");
+        throw std::invalid_argument("invalid type for \"enable_thinking\" (expected boolean, got string)");
    }

    // if the assistant message appears at the end of list, we do not add end-of-turn token
@@ -972,14 +972,14 @@ json oaicompat_chat_params_parse(

        /* sanity check, max one assistant message at the end of the list */
        if (!inputs.messages.empty() && inputs.messages.back().role == "assistant"){
-            throw std::runtime_error("Cannot have 2 or more assistant messages at the end of the list.");
+            throw std::invalid_argument("Cannot have 2 or more assistant messages at the end of the list.");
        }

        /* TODO: test this properly */
        inputs.reasoning_format = COMMON_REASONING_FORMAT_NONE;

        if ( inputs.enable_thinking ) {
-            throw std::runtime_error("Assistant response prefill is incompatible with enable_thinking.");
+            throw std::invalid_argument("Assistant response prefill is incompatible with enable_thinking.");
        }

        inputs.add_generation_prompt = true;
@@ -1020,18 +1020,18 @@ json oaicompat_chat_params_parse(
    // Handle "n" field
    int n_choices = json_value(body, "n", 1);
    if (n_choices != 1) {
-        throw std::runtime_error("Only one completion choice is allowed");
+        throw std::invalid_argument("Only one completion choice is allowed");
    }

    // Handle "logprobs" field
    // TODO: The response format of this option is not yet OAI-compatible, but seems like no one really using it; We may need to fix it in the future
    if (json_value(body, "logprobs", false)) {
        if (has_tools && stream) {
-            throw std::runtime_error("logprobs is not supported with tools + stream");
+            throw std::invalid_argument("logprobs is not supported with tools + stream");
        }
        llama_params["n_probs"] = json_value(body, "top_logprobs", 20);
    } else if (body.contains("top_logprobs") && !body.at("top_logprobs").is_null()) {
-        throw std::runtime_error("top_logprobs requires logprobs to be set to true");
+        throw std::invalid_argument("top_logprobs requires logprobs to be set to true");
    }

    // Copy remaining properties to llama_params
@@ -34,18 +34,24 @@ static inline void signal_handler(int signal) {
 static server_http_context::handler_t ex_wrapper(server_http_context::handler_t func) {
    return [func = std::move(func)](const server_http_req & req) -> server_http_res_ptr {
        std::string message;
+        error_type error;
        try {
            return func(req);
+        } catch (const std::invalid_argument & e) {
+            error = ERROR_TYPE_INVALID_REQUEST;
+            message = e.what();
        } catch (const std::exception & e) {
+            error = ERROR_TYPE_SERVER;
            message = e.what();
        } catch (...) {
+            error = ERROR_TYPE_SERVER;
            message = "unknown error";
        }

        auto res = std::make_unique<server_http_res>();
        res->status = 500;
        try {
-            json error_data = format_error_response(message, ERROR_TYPE_SERVER);
+            json error_data = format_error_response(message, error);
            res->status = json_value(error_data, "code", 500);
            res->data = safe_json_to_str({{ "error", error_data }});
            SRV_WRN("got exception: %s\n", res->data.c_str());
@@ -199,7 +199,7 @@ def test_completion_with_response_format(response_format: dict, n_predicted: int
        choice = res.body["choices"][0]
        assert match_regex(re_content, choice["message"]["content"])
    else:
-        assert res.status_code != 200
+        assert res.status_code == 400
        assert "error" in res.body
Author	SHA1	Message	Date
xiaobing318	e251e5ebbe	cmake : add utf8 compilation options for msvc (#17682 )	2025-12-02 19:50:57 +02:00
Chad Voegele	c4357dcc35	Server: Change Invalid Schema from Server Error (500) to User Error (400) (#17572 ) * Make invalid schema a user error (400) * Move invalid_argument exception handler to ex_wrapper * Fix test * Simplify test back to original pattern	2025-12-02 17:33:50 +01:00
Adrien Gallouët	e148380c7c	ggml : use svcntb() for SVE vector length detection (#17474 ) Signed-off-by: Adrien Gallouët <angt@huggingface.co>	2025-12-02 18:21:11 +02:00
TianHao324	a2b0fe8d37	CANN: Disable Ger operator of OUT_PROD on 310p device (#17563 )	2025-12-02 20:35:23 +08:00
Daniel Bevenius	7f3a72a8ed	ggml : remove redundant n_copies check when setting input/output (#17612 ) This commit removes a redundant check for sched->n_copies > 1 when setting input and output flags on tensor copies in ggml_backend_sched_split_graph. The motivation for this change is to clarify the code as the outer if statement already performs this check.	2025-12-02 12:52:45 +01:00
Eric Curtin	b9a37717b0	codeowners : remove ericcurtin (#17658 ) Taking a break from llama.cpp . I wasn't around at the start of llama.cpp but I want to thank @ggerganov and @slaren for creating a neat community here. Signed-off-by: Eric Curtin <eric.curtin@docker.com>	2025-12-02 12:18:15 +01:00
Adrien Gallouët	f3a9674ae8	llama : fix signed comparison warning on FreeBSD (#17497 ) This ensures correct RLIM_INFINITY handling and compatibility on all platforms (32/64-bit). warning: comparison of integers of different signs: 'rlim_t' (aka 'long') and 'size_t' (aka 'unsigned long') [-Wsign-compare] 488 \| if (suggest && (lock_limit.rlim_max > lock_limit.rlim_cur + size)) { \| ~~~~~~~~~~~~~~~~~~~ ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Adrien Gallouët <angt@huggingface.co>	2025-12-02 12:05:38 +01:00
Xuan-Son Nguyen	2c453c6c77	convert: add error message for mistral3 quantized weight (#17686 )	2025-12-02 11:48:31 +01:00