Compare commits

..

3 Commits

Author SHA1 Message Date
Xuan-Son Nguyen e475fa2b5f mtmd, arg: fix utf8 handling on windows (#24779)
* mtmd, arg: fix utf8 handling on windows

* also fix ggml_fopen

* fix build fail

* also fix CLI
2026-06-19 22:28:38 +02:00
Xuan-Son Nguyen 175147e8f6 server: remove all internal mentions about "webui" (#24817) 2026-06-19 22:12:46 +02:00
Mikolaj Kucharski fabde3bf51 arg: Add comment line support to --api-key-file (#23168) 2026-06-19 17:33:54 +02:00
17 changed files with 124 additions and 42 deletions
+40 -2
View File
@@ -17,6 +17,7 @@
# define NOMINMAX
#endif
#include <windows.h>
#include <shellapi.h>
#endif
#define JSON_ASSERT GGML_ASSERT
@@ -893,7 +894,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
return true;
}
#ifdef _WIN32
struct utf8_argv {
std::vector<std::string> buf;
std::vector<char*> ptrs;
};
static utf8_argv make_utf8_argv() {
utf8_argv out;
int wargc = 0;
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
if (!wargv) return out;
out.buf.reserve(wargc);
for (int i = 0; i < wargc; ++i) {
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
if (n <= 0) { out.buf.emplace_back(); continue; }
auto& s = out.buf.emplace_back();
s.resize(static_cast<size_t>(n - 1));
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
}
LocalFree(wargv);
out.ptrs.reserve(out.buf.size() + 1);
for (auto& s : out.buf) out.ptrs.push_back(s.data());
out.ptrs.push_back(nullptr);
return out;
}
#endif
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
#ifdef _WIN32
auto utf8 = make_utf8_argv();
if (!utf8.ptrs.empty()) {
argc = static_cast<int>(utf8.buf.size());
argv = utf8.ptrs.data();
}
#endif
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
const common_params params_org = ctx_arg.params; // the example can modify the default params
@@ -2911,7 +2949,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY"));
add_opt(common_arg(
{"--api-key-file"}, "FNAME",
"path to file containing API keys (default: none)",
"path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)",
[](common_params & params, const std::string & value) {
std::ifstream key_file(value);
if (!key_file) {
@@ -2919,7 +2957,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
std::string key;
while (std::getline(key_file, key)) {
if (!key.empty()) {
if (!key.empty() && key[0] != '#') {
params.api_keys.push_back(key);
}
}
+12
View File
@@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
return files;
}
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
#ifdef _WIN32
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
if (!wlen) { return std::ifstream(); }
std::vector<wchar_t> wfname(wlen);
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
return std::ifstream(wfname.data(), mode);
#else
return std::ifstream(fname, mode);
#endif
}
//
// TTY utils
//
+3
View File
@@ -842,6 +842,9 @@ struct common_file_info {
};
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
// fs open, also handle UTF8 on Windows
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
//
// TTY utils
//
+7 -10
View File
@@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
// convert fname (UTF-8)
wchar_t * wfname = ggml_mbstowcs(fname);
if (wfname) {
// convert mode (ANSI)
wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
wchar_t * wmode_p = wmode;
do {
*wmode_p++ = (wchar_t)*mode;
} while (*mode++);
// open file
file = _wfopen(wfname, wmode);
// convert mode (UTF-8)
wchar_t * wmode = ggml_mbstowcs(mode);
if (wmode) {
// open file
file = _wfopen(wfname, wmode);
GGML_FREE(wmode);
}
GGML_FREE(wfname);
GGML_FREE(wmode);
}
return file;
+1 -1
View File
@@ -202,7 +202,7 @@ struct cli_context {
// TODO: support remote files in the future (http, https, etc)
std::string load_input_file(const std::string & fname, bool is_media) {
std::ifstream file(fname, std::ios::binary);
std::ifstream file = fs_open_ifstream(fname, std::ios::binary);
if (!file) {
return "";
}
+24
View File
@@ -13,6 +13,14 @@
#include <sstream>
#include <vector>
#include <memory>
#include <fstream>
#ifdef _WIN32
#ifndef NOMINMAX
#define NOMINMAX
#endif
#include <windows.h>
#endif
// Internal header for clip.cpp
@@ -661,6 +669,22 @@ struct clip_image_f32_batch {
// common utils
//
#ifdef _WIN32
static std::ifstream open_ifstream_binary(const std::string & fname) {
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
if (!wlen) {
throw std::runtime_error("failed to convert filename to UTF-16: " + fname);
}
std::vector<wchar_t> wfname(wlen);
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
return std::ifstream(wfname.data(), std::ios::binary);
}
#else
static std::ifstream open_ifstream_binary(const std::string & fname) {
return std::ifstream(fname, std::ios::binary);
}
#endif
static std::string string_format(const char * fmt, ...) {
va_list ap;
va_list ap2;
+1 -1
View File
@@ -1752,7 +1752,7 @@ struct clip_model_loader {
std::map<std::string, size_t> tensor_offset;
std::vector<ggml_tensor *> tensors_to_load;
auto fin = std::ifstream(fname, std::ios::binary);
auto fin = open_ifstream_binary(fname);
if (!fin) {
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
}
+3
View File
@@ -396,6 +396,9 @@ int main(int argc, char ** argv) {
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
console::init(params.simple_io, params.use_color);
atexit([]() { console::cleanup(); });
// Ctrl+C handling
{
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
+17 -1
View File
@@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx,
}
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
std::vector<unsigned char> buf;
#ifdef _WIN32
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
if (!wlen) {
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
return {nullptr, nullptr};
}
std::vector<wchar_t> wfname(wlen);
wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen);
if (!wlen) {
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
return {nullptr, nullptr};
}
FILE * f = _wfopen(wfname.data(), L"rb");
#else
FILE * f = fopen(fname, "rb");
#endif
if (!f) {
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
return {nullptr, nullptr};
}
std::vector<unsigned char> buf;
fseek(f, 0, SEEK_END);
long file_size = ftell(f);
fseek(f, 0, SEEK_SET);
+1 -1
View File
@@ -198,7 +198,7 @@ For the full list of features, please refer to [server's changelog](https://gith
| `--embedding, --embeddings` | restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)<br/>(env: LLAMA_ARG_EMBEDDINGS) |
| `--rerank, --reranking` | enable reranking endpoint on server (default: disabled)<br/>(env: LLAMA_ARG_RERANKING) |
| `--api-key KEY` | API key to use for authentication, multiple keys can be provided as a comma-separated list (default: none)<br/>(env: LLAMA_API_KEY) |
| `--api-key-file FNAME` | path to file containing API keys (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
| `--api-key-file FNAME` | path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
| `--ssl-key-file FNAME` | path to file a PEM-encoded SSL private key<br/>(env: LLAMA_ARG_SSL_KEY_FILE) |
| `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate<br/>(env: LLAMA_ARG_SSL_CERT_FILE) |
| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) |
+1 -8
View File
@@ -825,8 +825,7 @@ private:
server_metrics metrics;
json json_ui_settings = json::object(); // Primary: new name
json json_webui_settings = json::object(); // Deprecated: use json_ui_settings instead (kept for compat)
json json_ui_settings = json::object();
// Necessary similarity of prompt for slot selection
float slot_prompt_similarity = 0.0f;
@@ -1308,7 +1307,6 @@ private:
try {
json json_settings = json::parse(cfg);
json_ui_settings = json_settings;
json_webui_settings = json_settings; // deprecated: keep in sync
} catch (const std::exception & e) {
SRV_ERR("%s: failed to parse UI config: %s\n", __func__, e.what());
return false;
@@ -3687,7 +3685,6 @@ server_context_meta server_context::get_meta() const {
/* has_inp_audio */ impl->chat_params.allow_audio,
/* has_inp_video */ impl->chat_params.allow_video,
/* json_ui_settings */ impl->json_ui_settings,
/* json_webui_settings */ impl->json_webui_settings, // Deprecated
/* slot_n_ctx */ impl->get_slot_n_ctx(),
/* pooling_type */ llama_pooling_type(impl->ctx_tgt),
@@ -4300,12 +4297,8 @@ void server_routes::init_routes() {
{ "endpoint_slots", params.endpoint_slots },
{ "endpoint_props", params.endpoint_props },
{ "endpoint_metrics", params.endpoint_metrics },
// New keys
{ "ui", params.ui },
{ "ui_settings", meta->json_ui_settings },
// Deprecated: use ui/ui_settings instead (kept for backward compat)
{ "webui", params.ui },
{ "webui_settings", meta->json_ui_settings },
{ "chat_template", tmpl_default },
{ "chat_template_caps", meta->chat_template_caps },
{ "bos_token", meta->bos_token_str },
+1 -2
View File
@@ -22,8 +22,7 @@ struct server_context_meta {
bool has_inp_image;
bool has_inp_audio;
bool has_inp_video;
json json_ui_settings; // Primary: new name
json json_webui_settings; // Deprecated: use json_ui_settings instead (kept for backward compat)
json json_ui_settings;
int slot_n_ctx;
enum llama_pooling_type pooling_type;
-1
View File
@@ -1474,7 +1474,6 @@ void server_models_routes::init_routes() {
}},
// New key
{"ui_settings", ui_settings},
{"webui_settings", webui_settings},
{"build_info", std::string(llama_build_info())},
{"cors_proxy_enabled", params.ui_mcp_proxy},
});
-2
View File
@@ -207,7 +207,6 @@ public:
struct server_models_routes {
common_params params;
json ui_settings = json::object(); // Primary: new name
json webui_settings = json::object(); // Deprecated: use ui_settings (kept for compat)
std::atomic<bool> stopping = false; // for graceful disconnecting SSE clients during shutdown
server_models models;
server_models_routes(const common_params & params, int argc, char ** argv)
@@ -217,7 +216,6 @@ struct server_models_routes {
try {
json json_settings = json::parse(cfg);
ui_settings = json_settings;
webui_settings = json_settings; // Deprecated: keep in sync
} catch (const std::exception & e) {
LOG_ERR("%s: failed to parse UI config: %s\n", __func__, e.what());
throw;
+4 -4
View File
@@ -79,9 +79,9 @@ def test_load_split_model():
assert match_regex("(little|girl)+", res.body["content"])
def test_no_webui():
def test_no_ui():
global server
# default: webui enabled
# default: UI enabled
server.start()
url = f"http://{server.server_host}:{server.server_port}"
res = requests.get(url)
@@ -89,8 +89,8 @@ def test_no_webui():
assert "<!doctype html>" in res.text
server.stop()
# with --no-webui
server.no_webui = True
# with --no-ui, the UI should be disabled
server.no_ui = True
server.start()
res = requests.get(url)
assert res.status_code == 404
+3 -3
View File
@@ -12,7 +12,7 @@ def create_server():
def test_mcp_no_proxy():
global server
server.webui_mcp_proxy = False
server.ui_mcp_proxy = False
server.start()
res = server.make_request("GET", "/cors-proxy")
@@ -21,7 +21,7 @@ def test_mcp_no_proxy():
def test_mcp_proxy():
global server
server.webui_mcp_proxy = True
server.ui_mcp_proxy = True
server.start()
url = f"http://{server.server_host}:{server.server_port}/cors-proxy?url=http://example.com"
@@ -32,7 +32,7 @@ def test_mcp_proxy():
def test_mcp_proxy_custom_port():
global server
server.webui_mcp_proxy = True
server.ui_mcp_proxy = True
server.start()
# try getting the server's models API via the proxy
+6 -6
View File
@@ -94,7 +94,7 @@ class ServerProcess:
enable_ctx_shift: int | None = False
spec_draft_n_min: int | None = None
spec_draft_n_max: int | None = None
no_webui: bool | None = None
no_ui: bool | None = None
jinja: bool | None = None
reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
reasoning: Literal['on', 'off', 'auto'] | None = None
@@ -107,7 +107,7 @@ class ServerProcess:
cache_ram: int | None = None
no_cache_idle_slots: bool = False
log_path: str | None = None
webui_mcp_proxy: bool = False
ui_mcp_proxy: bool = False
backend_sampling: bool = False
gcp_compat: bool = False
@@ -225,8 +225,8 @@ class ServerProcess:
server_args.extend(["--spec-draft-n-max", self.spec_draft_n_max])
if self.spec_draft_n_min:
server_args.extend(["--spec-draft-n-min", self.spec_draft_n_min])
if self.no_webui:
server_args.append("--no-webui")
if self.no_ui:
server_args.append("--no-ui")
if self.no_models_autoload:
server_args.append("--no-models-autoload")
if self.jinja:
@@ -251,8 +251,8 @@ class ServerProcess:
server_args.extend(["--cache-ram", self.cache_ram])
if self.no_cache_idle_slots:
server_args.append("--no-cache-idle-slots")
if self.webui_mcp_proxy:
server_args.append("--webui-mcp-proxy")
if self.ui_mcp_proxy:
server_args.append("--ui-mcp-proxy")
if self.backend_sampling:
server_args.append("--backend_sampling")
if self.gcp_compat: