Compare commits

..

3 Commits

Author SHA1 Message Date
Adrien Gallouët 84de01a1f1 llama : use LLM_KV for quantization_version & file_type (#24802)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-06-20 20:07:01 +02:00
Xuan-Son Nguyen 75f460ac28 arg: try fixing test-args-parser randomly fails (#24826)
* arg: try fixing test-args-parser randomly fails

* return ref

* try triggering the workflow

* exception wrapper

* wip

* test

* test 2

* arg: guard win32 utf8 argv override

make_utf8_argv rebuilds argv from GetCommandLineW to fix utf8 handling of
non ascii arguments on windows. the override runs unconditionally inside
common_params_parse, so it also clobbers a programmatic argv passed by a
caller. test-arg-parser builds a synthetic argv but then sees the real
process command line instead, the model argument is never parsed, and the
assert that expects success aborts via fastfail (0xC0000409). this shows up
as a random failure in the openvino windows workflow.

only override argv when its length matches the caller argc, so the utf8
repair still applies to real binaries while a programmatic argv stays intact.

---------

Co-authored-by: Pascal <admin@serveurperso.com>
2026-06-20 19:45:27 +02:00
Muhammad Salem 8452824611 release: add missing link for win opencl adreno arm64 (#24809) 2026-06-20 23:08:59 +08:00
4 changed files with 17 additions and 6 deletions
+1
View File
@@ -1627,6 +1627,7 @@ jobs:
**Windows:**
- [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
- [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
- [Windows arm64 (OpenCL Adreno)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-opencl-adreno-arm64.zip)
- [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip)
- [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.3-x64.zip) - [CUDA 13.3 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.3-x64.zip)
- [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
+3 -3
View File
@@ -924,8 +924,8 @@ static utf8_argv make_utf8_argv() {
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
#ifdef _WIN32
auto utf8 = make_utf8_argv();
if (!utf8.ptrs.empty()) {
argc = static_cast<int>(utf8.buf.size());
// repair argv only when it matches the process command line
if (static_cast<int>(utf8.buf.size()) == argc) {
argv = utf8.ptrs.data();
}
#endif
@@ -2897,7 +2897,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.server_tools = parse_csv_row(value);
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
add_opt(common_arg(
add_opt(common_arg(
{"-ag", "--agent"},
{"-no-ag", "--no-agent"},
"whether to enable CORS proxy and all built-in tools - do not enable in untrusted environments (default: disabled)",
+2 -2
View File
@@ -932,8 +932,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
// copy the KV pairs from the input file
gguf_set_kv (ctx_out.get(), ml.metadata);
gguf_set_val_u32(ctx_out.get(), "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV
gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION).c_str(), GGML_QNT_VERSION);
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_FILE_TYPE).c_str(), ftype);
// Remove split metadata
gguf_remove_key(ctx_out.get(), ml.llm_kv(LLM_KV_SPLIT_NO).c_str());
+11 -1
View File
@@ -10,7 +10,7 @@
#undef NDEBUG
#include <cassert>
int main(void) {
static void test(void) {
common_params params;
printf("test-arg-parser: make sure there is no duplicated arguments in any examples\n\n");
@@ -210,3 +210,13 @@ int main(void) {
printf("test-arg-parser: all tests OK\n\n");
}
int main(void) {
try {
test();
} catch (std::exception & e) {
fprintf(stderr, "test-arg-parser: exception: %s\n", e.what());
return 1;
}
return 0;
}