mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-20 04:37:38 +02:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| f449e05537 | |||
| 2b686a9120 | |||
| 4b48a53b6c | |||
| e475fa2b5f | |||
| 175147e8f6 | |||
| fabde3bf51 | |||
| 0d2d9ccbf6 |
+40
-4
@@ -17,6 +17,7 @@
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#include <shellapi.h>
|
||||
#endif
|
||||
|
||||
#define JSON_ASSERT GGML_ASSERT
|
||||
@@ -302,7 +303,6 @@ static handle_model_result common_params_handle_model(struct common_params_model
|
||||
|
||||
if (!model.docker_repo.empty()) {
|
||||
model.path = common_docker_resolve_model(model.docker_repo);
|
||||
model.name = model.docker_repo;
|
||||
} else if (!model.hf_repo.empty()) {
|
||||
// If -m was used with -hf, treat the model "path" as the hf_file to download
|
||||
if (model.hf_file.empty() && !model.path.empty()) {
|
||||
@@ -322,7 +322,6 @@ static handle_model_result common_params_handle_model(struct common_params_model
|
||||
throw std::runtime_error("failed to download model from Hugging Face");
|
||||
}
|
||||
|
||||
model.name = model.hf_repo;
|
||||
model.path = download_result.model_path;
|
||||
|
||||
if (!download_result.mmproj_path.empty()) {
|
||||
@@ -893,7 +892,44 @@ bool common_params_to_map(int argc, char ** argv, llama_example ex, std::map<com
|
||||
return true;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
struct utf8_argv {
|
||||
std::vector<std::string> buf;
|
||||
std::vector<char*> ptrs;
|
||||
};
|
||||
|
||||
static utf8_argv make_utf8_argv() {
|
||||
utf8_argv out;
|
||||
int wargc = 0;
|
||||
LPWSTR* wargv = CommandLineToArgvW(GetCommandLineW(), &wargc);
|
||||
if (!wargv) return out;
|
||||
|
||||
out.buf.reserve(wargc);
|
||||
for (int i = 0; i < wargc; ++i) {
|
||||
int n = WideCharToMultiByte(CP_UTF8, WC_ERR_INVALID_CHARS, wargv[i], -1, nullptr, 0, nullptr, nullptr);
|
||||
if (n <= 0) { out.buf.emplace_back(); continue; }
|
||||
auto& s = out.buf.emplace_back();
|
||||
s.resize(static_cast<size_t>(n - 1));
|
||||
(void)WideCharToMultiByte(CP_UTF8, 0, wargv[i], -1, s.data(), n, nullptr, nullptr);
|
||||
}
|
||||
LocalFree(wargv);
|
||||
|
||||
out.ptrs.reserve(out.buf.size() + 1);
|
||||
for (auto& s : out.buf) out.ptrs.push_back(s.data());
|
||||
out.ptrs.push_back(nullptr);
|
||||
return out;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) {
|
||||
#ifdef _WIN32
|
||||
auto utf8 = make_utf8_argv();
|
||||
if (!utf8.ptrs.empty()) {
|
||||
argc = static_cast<int>(utf8.buf.size());
|
||||
argv = utf8.ptrs.data();
|
||||
}
|
||||
#endif
|
||||
|
||||
auto ctx_arg = common_params_parser_init(params, ex, print_usage);
|
||||
const common_params params_org = ctx_arg.params; // the example can modify the default params
|
||||
|
||||
@@ -2911,7 +2947,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY"));
|
||||
add_opt(common_arg(
|
||||
{"--api-key-file"}, "FNAME",
|
||||
"path to file containing API keys (default: none)",
|
||||
"path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
std::ifstream key_file(value);
|
||||
if (!key_file) {
|
||||
@@ -2919,7 +2955,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
}
|
||||
std::string key;
|
||||
while (std::getline(key_file, key)) {
|
||||
if (!key.empty()) {
|
||||
if (!key.empty() && key[0] != '#') {
|
||||
params.api_keys.push_back(key);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1074,6 +1074,18 @@ std::vector<common_file_info> fs_list(const std::string & path, bool include_dir
|
||||
return files;
|
||||
}
|
||||
|
||||
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode) {
|
||||
#ifdef _WIN32
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||
if (!wlen) { return std::ifstream(); }
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||
return std::ifstream(wfname.data(), mode);
|
||||
#else
|
||||
return std::ifstream(fname, mode);
|
||||
#endif
|
||||
}
|
||||
|
||||
//
|
||||
// TTY utils
|
||||
//
|
||||
|
||||
+13
-1
@@ -295,7 +295,16 @@ struct common_params_model {
|
||||
std::string hf_repo = ""; // HF repo // NOLINT
|
||||
std::string hf_file = ""; // HF file // NOLINT
|
||||
std::string docker_repo = ""; // Docker repo // NOLINT
|
||||
std::string name = ""; // in format <user>/<model>[:<tag>] (tag is optional) // NOLINT
|
||||
|
||||
std::string get_name() {
|
||||
if (!hf_repo.empty()) {
|
||||
return hf_repo;
|
||||
}
|
||||
if (!docker_repo.empty()) {
|
||||
return docker_repo;
|
||||
}
|
||||
return path;
|
||||
}
|
||||
};
|
||||
|
||||
// draft-model-based speculative decoding parameters
|
||||
@@ -842,6 +851,9 @@ struct common_file_info {
|
||||
};
|
||||
std::vector<common_file_info> fs_list(const std::string & path, bool include_directories);
|
||||
|
||||
// fs open, also handle UTF8 on Windows
|
||||
std::ifstream fs_open_ifstream(const std::string & fname, std::ios_base::openmode mode);
|
||||
|
||||
//
|
||||
// TTY utils
|
||||
//
|
||||
|
||||
@@ -3788,7 +3788,7 @@ static void ggml_webgpu_init_memset_pipeline(webgpu_global_context & ctx) {
|
||||
ctx->memset_pipeline = ggml_webgpu_create_pipeline(ctx->device, wgsl_memset, "memset", constants);
|
||||
}
|
||||
|
||||
static void create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
|
||||
static void ggml_backend_webgpu_request_adapter(wgpu::Instance & instance, wgpu::Adapter & adapter) {
|
||||
wgpu::RequestAdapterOptions options = {};
|
||||
|
||||
#ifndef __EMSCRIPTEN__
|
||||
@@ -3800,17 +3800,20 @@ static void create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
|
||||
options.nextInChain = &adapterTogglesDesc;
|
||||
#endif
|
||||
|
||||
ctx->webgpu_global_ctx->instance.WaitAny(
|
||||
ctx->webgpu_global_ctx->instance.RequestAdapter(
|
||||
&options, wgpu::CallbackMode::AllowSpontaneous,
|
||||
[&ctx](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter, const char * message) {
|
||||
if (status != wgpu::RequestAdapterStatus::Success) {
|
||||
GGML_LOG_ERROR("ggml_webgpu: Failed to get an adapter: %s\n", message);
|
||||
return;
|
||||
}
|
||||
ctx->webgpu_global_ctx->adapter = std::move(adapter);
|
||||
}),
|
||||
UINT64_MAX);
|
||||
instance.WaitAny(instance.RequestAdapter(
|
||||
&options, wgpu::CallbackMode::AllowSpontaneous,
|
||||
[&adapter](wgpu::RequestAdapterStatus status, wgpu::Adapter _adapter, const char * message) {
|
||||
if (status != wgpu::RequestAdapterStatus::Success) {
|
||||
GGML_LOG_ERROR("ggml_webgpu: Failed to get an adapter: %s\n", message);
|
||||
return;
|
||||
}
|
||||
adapter = std::move(_adapter);
|
||||
}),
|
||||
UINT64_MAX);
|
||||
}
|
||||
|
||||
static void create_webgpu_device(ggml_backend_webgpu_reg_context * ctx) {
|
||||
ggml_backend_webgpu_request_adapter(ctx->webgpu_global_ctx->instance, ctx->webgpu_global_ctx->adapter);
|
||||
GGML_ASSERT(ctx->webgpu_global_ctx->adapter != nullptr);
|
||||
|
||||
ctx->webgpu_global_ctx->adapter.GetLimits(&ctx->webgpu_global_ctx->capabilities.limits);
|
||||
@@ -4543,20 +4546,7 @@ ggml_backend_reg_t ggml_backend_webgpu_reg() {
|
||||
// Probe for adapter support
|
||||
wgpu::Adapter adapter;
|
||||
if (ctx->webgpu_global_ctx->instance != nullptr) {
|
||||
wgpu::RequestAdapterOptions options = {};
|
||||
|
||||
// probe for adapter support
|
||||
ctx->webgpu_global_ctx->instance.WaitAny(
|
||||
ctx->webgpu_global_ctx->instance.RequestAdapter(
|
||||
&options, wgpu::CallbackMode::AllowSpontaneous,
|
||||
[&adapter](wgpu::RequestAdapterStatus status, wgpu::Adapter _adapter, const char * message) {
|
||||
if (status != wgpu::RequestAdapterStatus::Success) {
|
||||
GGML_LOG_ERROR("ggml_webgpu: Failed to get an adapter: %s\n", message);
|
||||
return;
|
||||
}
|
||||
adapter = std::move(_adapter);
|
||||
}),
|
||||
UINT64_MAX);
|
||||
ggml_backend_webgpu_request_adapter(ctx->webgpu_global_ctx->instance, adapter);
|
||||
}
|
||||
|
||||
// WebGPU backend requires f16 support and, on native, implicit device synchronization.
|
||||
|
||||
+7
-10
@@ -600,18 +600,15 @@ FILE * ggml_fopen(const char * fname, const char * mode) {
|
||||
// convert fname (UTF-8)
|
||||
wchar_t * wfname = ggml_mbstowcs(fname);
|
||||
if (wfname) {
|
||||
// convert mode (ANSI)
|
||||
wchar_t * wmode = GGML_MALLOC((strlen(mode) + 1) * sizeof(wchar_t));
|
||||
wchar_t * wmode_p = wmode;
|
||||
do {
|
||||
*wmode_p++ = (wchar_t)*mode;
|
||||
} while (*mode++);
|
||||
|
||||
// open file
|
||||
file = _wfopen(wfname, wmode);
|
||||
// convert mode (UTF-8)
|
||||
wchar_t * wmode = ggml_mbstowcs(mode);
|
||||
if (wmode) {
|
||||
// open file
|
||||
file = _wfopen(wfname, wmode);
|
||||
GGML_FREE(wmode);
|
||||
}
|
||||
|
||||
GGML_FREE(wfname);
|
||||
GGML_FREE(wmode);
|
||||
}
|
||||
|
||||
return file;
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
HTTPLIB_VERSION = "refs/tags/v0.47.0"
|
||||
HTTPLIB_VERSION = "refs/tags/v0.48.0"
|
||||
|
||||
vendor = {
|
||||
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
|
||||
|
||||
+1
-1
@@ -202,7 +202,7 @@ struct cli_context {
|
||||
|
||||
// TODO: support remote files in the future (http, https, etc)
|
||||
std::string load_input_file(const std::string & fname, bool is_media) {
|
||||
std::ifstream file(fname, std::ios::binary);
|
||||
std::ifstream file = fs_open_ifstream(fname, std::ios::binary);
|
||||
if (!file) {
|
||||
return "";
|
||||
}
|
||||
|
||||
@@ -13,6 +13,14 @@
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include <fstream>
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Internal header for clip.cpp
|
||||
|
||||
@@ -661,6 +669,22 @@ struct clip_image_f32_batch {
|
||||
// common utils
|
||||
//
|
||||
|
||||
#ifdef _WIN32
|
||||
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, NULL, 0);
|
||||
if (!wlen) {
|
||||
throw std::runtime_error("failed to convert filename to UTF-16: " + fname);
|
||||
}
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
(void)MultiByteToWideChar(CP_UTF8, 0, fname.c_str(), -1, wfname.data(), wlen);
|
||||
return std::ifstream(wfname.data(), std::ios::binary);
|
||||
}
|
||||
#else
|
||||
static std::ifstream open_ifstream_binary(const std::string & fname) {
|
||||
return std::ifstream(fname, std::ios::binary);
|
||||
}
|
||||
#endif
|
||||
|
||||
static std::string string_format(const char * fmt, ...) {
|
||||
va_list ap;
|
||||
va_list ap2;
|
||||
|
||||
+1
-1
@@ -1752,7 +1752,7 @@ struct clip_model_loader {
|
||||
std::map<std::string, size_t> tensor_offset;
|
||||
std::vector<ggml_tensor *> tensors_to_load;
|
||||
|
||||
auto fin = std::ifstream(fname, std::ios::binary);
|
||||
auto fin = open_ifstream_binary(fname);
|
||||
if (!fin) {
|
||||
throw std::runtime_error(string_format("%s: failed to open %s\n", __func__, fname.c_str()));
|
||||
}
|
||||
|
||||
@@ -396,6 +396,9 @@ int main(int argc, char ** argv) {
|
||||
|
||||
int n_predict = params.n_predict < 0 ? INT_MAX : params.n_predict;
|
||||
|
||||
console::init(params.simple_io, params.use_color);
|
||||
atexit([]() { console::cleanup(); });
|
||||
|
||||
// Ctrl+C handling
|
||||
{
|
||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||
|
||||
@@ -582,13 +582,29 @@ mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_buf(mtmd_context * ctx,
|
||||
}
|
||||
|
||||
mtmd_helper_bitmap_wrapper mtmd_helper_bitmap_init_from_file(mtmd_context * ctx, const char * fname, bool placeholder) {
|
||||
std::vector<unsigned char> buf;
|
||||
#ifdef _WIN32
|
||||
int wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, NULL, 0);
|
||||
if (!wlen) {
|
||||
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
std::vector<wchar_t> wfname(wlen);
|
||||
wlen = MultiByteToWideChar(CP_UTF8, 0, fname, -1, wfname.data(), wlen);
|
||||
if (!wlen) {
|
||||
LOG_ERR("Unable to convert filename to UTF-16: %s\n", fname);
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
FILE * f = _wfopen(wfname.data(), L"rb");
|
||||
#else
|
||||
FILE * f = fopen(fname, "rb");
|
||||
#endif
|
||||
if (!f) {
|
||||
LOG_ERR("Unable to open file %s: %s\n", fname, strerror(errno));
|
||||
return {nullptr, nullptr};
|
||||
}
|
||||
|
||||
std::vector<unsigned char> buf;
|
||||
|
||||
fseek(f, 0, SEEK_END);
|
||||
long file_size = ftell(f);
|
||||
fseek(f, 0, SEEK_SET);
|
||||
|
||||
@@ -180,6 +180,17 @@ That requires `JSON.stringify` when formatted to message content:
|
||||
}
|
||||
```
|
||||
|
||||
### Router mode: how child <--> router communicates
|
||||
|
||||
Upon spawning a new child process using `subprocess`, both child and router listen to the stdout/stderr (combined)
|
||||
|
||||
For the direction from child to router:
|
||||
- Generic messages are logs, it will be forwarded to router's stdout
|
||||
- Special state update messages are prefixed by `cmd_child_to_router:state:`, followed by a JSON. See `server_models::handle_child_state` for more
|
||||
|
||||
For the direction from router to child:
|
||||
- When server sends `cmd_router_to_child:exit`, the child should exit gracefully --> if after `DEFAULT_STOP_TIMEOUT` and the child is still running, force-kill it
|
||||
|
||||
### Model management API (router mode)
|
||||
|
||||
Model management API was added via PR [#23976](https://github.com/ggml-org/llama.cpp/pull/23976)
|
||||
|
||||
@@ -198,7 +198,7 @@ For the full list of features, please refer to [server's changelog](https://gith
|
||||
| `--embedding, --embeddings` | restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)<br/>(env: LLAMA_ARG_EMBEDDINGS) |
|
||||
| `--rerank, --reranking` | enable reranking endpoint on server (default: disabled)<br/>(env: LLAMA_ARG_RERANKING) |
|
||||
| `--api-key KEY` | API key to use for authentication, multiple keys can be provided as a comma-separated list (default: none)<br/>(env: LLAMA_API_KEY) |
|
||||
| `--api-key-file FNAME` | path to file containing API keys (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
|
||||
| `--api-key-file FNAME` | path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
|
||||
| `--ssl-key-file FNAME` | path to file a PEM-encoded SSL private key<br/>(env: LLAMA_ARG_SSL_KEY_FILE) |
|
||||
| `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate<br/>(env: LLAMA_ARG_SSL_CERT_FILE) |
|
||||
| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) |
|
||||
|
||||
@@ -12,6 +12,7 @@
|
||||
#include <random>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
|
||||
json format_error_response(const std::string & message, const enum error_type type) {
|
||||
std::string type_str;
|
||||
@@ -1238,7 +1239,7 @@ json format_response_rerank(
|
||||
// other utils
|
||||
//
|
||||
|
||||
std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int idx) {
|
||||
std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int idx, size_t n_top) {
|
||||
std::vector<llama_token_data> cur;
|
||||
|
||||
const auto * logits = llama_get_logits_ith(ctx, idx);
|
||||
@@ -1257,21 +1258,34 @@ std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int i
|
||||
}
|
||||
}
|
||||
|
||||
// sort tokens by logits
|
||||
std::sort(cur.begin(), cur.end(), [](const llama_token_data & a, const llama_token_data & b) {
|
||||
return a.logit > b.logit;
|
||||
});
|
||||
// sort tokens by logits (partial: only the leading `n_top` need ordering)
|
||||
if (n_top > cur.size()) {
|
||||
n_top = cur.size();
|
||||
}
|
||||
if (n_top > 0) {
|
||||
std::partial_sort(cur.begin(), cur.begin() + n_top, cur.end(),
|
||||
[](const llama_token_data & a, const llama_token_data & b) {
|
||||
return a.logit > b.logit;
|
||||
});
|
||||
}
|
||||
|
||||
// apply softmax
|
||||
float max_l = cur[0].logit;
|
||||
float max_l = -std::numeric_limits<float>::infinity();
|
||||
if (n_top > 0) {
|
||||
max_l = cur[0].logit; // partial_sort guarantees the absolute maximum is at index 0
|
||||
} else {
|
||||
for (const auto & t : cur) {
|
||||
max_l = std::max(max_l, t.logit);
|
||||
}
|
||||
}
|
||||
float cum_sum = 0.0f;
|
||||
for (size_t i = 0; i < cur.size(); ++i) {
|
||||
float p = expf(cur[i].logit - max_l);
|
||||
cur[i].p = p;
|
||||
for (auto & t : cur) {
|
||||
float p = expf(t.logit - max_l);
|
||||
t.p = p;
|
||||
cum_sum += p;
|
||||
}
|
||||
for (size_t i = 0; i < cur.size(); ++i) {
|
||||
cur[i].p /= cum_sum;
|
||||
for (auto & t : cur) {
|
||||
t.p /= cum_sum;
|
||||
}
|
||||
|
||||
return cur;
|
||||
|
||||
@@ -326,7 +326,7 @@ json format_response_rerank(
|
||||
// other utils
|
||||
//
|
||||
|
||||
std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int idx);
|
||||
std::vector<llama_token_data> get_token_probabilities(llama_context * ctx, int idx, size_t n_top);
|
||||
|
||||
std::string safe_json_to_str(const json & data);
|
||||
|
||||
|
||||
@@ -63,11 +63,6 @@ enum slot_state {
|
||||
SLOT_STATE_GENERATING,
|
||||
};
|
||||
|
||||
enum server_state {
|
||||
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
|
||||
SERVER_STATE_READY, // Server is ready and model is loaded
|
||||
};
|
||||
|
||||
struct server_slot {
|
||||
int id;
|
||||
|
||||
@@ -773,6 +768,8 @@ public:
|
||||
// note: chat_params must not be refreshed upon existing sleeping state
|
||||
server_chat_params chat_params;
|
||||
|
||||
server_state_callback_t callback_state = [](server_state, json) -> void {};
|
||||
|
||||
server_context_impl() {
|
||||
mtmd_helper_log_set(common_log_default_callback, nullptr);
|
||||
}
|
||||
@@ -825,8 +822,7 @@ private:
|
||||
|
||||
server_metrics metrics;
|
||||
|
||||
json json_ui_settings = json::object(); // Primary: new name
|
||||
json json_webui_settings = json::object(); // Deprecated: use json_ui_settings instead (kept for compat)
|
||||
json json_ui_settings = json::object();
|
||||
|
||||
// Necessary similarity of prompt for slot selection
|
||||
float slot_prompt_similarity = 0.0f;
|
||||
@@ -1245,8 +1241,8 @@ private:
|
||||
if (!params_base.model_alias.empty()) {
|
||||
// backward compat: use first alias as model name
|
||||
model_name = *params_base.model_alias.begin();
|
||||
} else if (!params_base.model.name.empty()) {
|
||||
model_name = params_base.model.name;
|
||||
} else if (!params_base.model.get_name().empty()) {
|
||||
model_name = params_base.model.get_name();
|
||||
} else {
|
||||
// fallback: derive model name from file name
|
||||
auto model_path = std::filesystem::path(params_base.model.path);
|
||||
@@ -1308,7 +1304,6 @@ private:
|
||||
try {
|
||||
json json_settings = json::parse(cfg);
|
||||
json_ui_settings = json_settings;
|
||||
json_webui_settings = json_settings; // deprecated: keep in sync
|
||||
} catch (const std::exception & e) {
|
||||
SRV_ERR("%s: failed to parse UI config: %s\n", __func__, e.what());
|
||||
return false;
|
||||
@@ -1826,8 +1821,7 @@ private:
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// TODO: optimize this with min-p optimization
|
||||
std::vector<llama_token_data> cur = get_token_probabilities(ctx_tgt, idx);
|
||||
std::vector<llama_token_data> cur = get_token_probabilities(ctx_tgt, idx, n_probs_request);
|
||||
const size_t max_probs = cur.size();
|
||||
const size_t n_probs = std::min(max_probs, n_probs_request);
|
||||
|
||||
@@ -3687,7 +3681,6 @@ server_context_meta server_context::get_meta() const {
|
||||
/* has_inp_audio */ impl->chat_params.allow_audio,
|
||||
/* has_inp_video */ impl->chat_params.allow_video,
|
||||
/* json_ui_settings */ impl->json_ui_settings,
|
||||
/* json_webui_settings */ impl->json_webui_settings, // Deprecated
|
||||
/* slot_n_ctx */ impl->get_slot_n_ctx(),
|
||||
/* pooling_type */ llama_pooling_type(impl->ctx_tgt),
|
||||
|
||||
@@ -3738,8 +3731,11 @@ struct server_res_generator : server_http_res {
|
||||
}
|
||||
};
|
||||
|
||||
void server_context::on_sleeping_changed(std::function<void(bool)> callback) {
|
||||
impl->queue_tasks.on_sleeping_state(std::move(callback));
|
||||
void server_context::set_state_callback(server_state_callback_t callback) {
|
||||
impl->callback_state = std::move(callback);
|
||||
impl->queue_tasks.on_sleeping_state([this](bool sleeping) {
|
||||
impl->callback_state(sleeping ? SERVER_STATE_SLEEPING : SERVER_STATE_READY, {});
|
||||
});
|
||||
}
|
||||
|
||||
// compute the number of tokens before the last user message in the prompt
|
||||
@@ -4300,12 +4296,8 @@ void server_routes::init_routes() {
|
||||
{ "endpoint_slots", params.endpoint_slots },
|
||||
{ "endpoint_props", params.endpoint_props },
|
||||
{ "endpoint_metrics", params.endpoint_metrics },
|
||||
// New keys
|
||||
{ "ui", params.ui },
|
||||
{ "ui_settings", meta->json_ui_settings },
|
||||
// Deprecated: use ui/ui_settings instead (kept for backward compat)
|
||||
{ "webui", params.ui },
|
||||
{ "webui_settings", meta->json_ui_settings },
|
||||
{ "chat_template", tmpl_default },
|
||||
{ "chat_template_caps", meta->chat_template_caps },
|
||||
{ "bos_token", meta->bos_token_str },
|
||||
|
||||
@@ -22,8 +22,7 @@ struct server_context_meta {
|
||||
bool has_inp_image;
|
||||
bool has_inp_audio;
|
||||
bool has_inp_video;
|
||||
json json_ui_settings; // Primary: new name
|
||||
json json_webui_settings; // Deprecated: use json_ui_settings instead (kept for backward compat)
|
||||
json json_ui_settings;
|
||||
int slot_n_ctx;
|
||||
enum llama_pooling_type pooling_type;
|
||||
|
||||
@@ -53,6 +52,31 @@ struct server_context_meta {
|
||||
uint64_t model_size;
|
||||
};
|
||||
|
||||
enum server_state {
|
||||
// SERVER_STATE_DOWNLOADING,
|
||||
SERVER_STATE_LOADING,
|
||||
SERVER_STATE_READY,
|
||||
SERVER_STATE_SLEEPING,
|
||||
};
|
||||
|
||||
static std::string server_state_to_str(server_state state) {
|
||||
switch (state) {
|
||||
case SERVER_STATE_LOADING: return "loading";
|
||||
case SERVER_STATE_READY: return "ready";
|
||||
case SERVER_STATE_SLEEPING: return "sleeping";
|
||||
default: GGML_ASSERT(false && "invalid server_state");
|
||||
}
|
||||
}
|
||||
|
||||
static server_state server_state_from_str(const std::string & str) {
|
||||
if (str == "loading") return SERVER_STATE_LOADING;
|
||||
if (str == "ready") return SERVER_STATE_READY;
|
||||
if (str == "sleeping") return SERVER_STATE_SLEEPING;
|
||||
GGML_ASSERT(false && "invalid server_state string");
|
||||
}
|
||||
|
||||
using server_state_callback_t = std::function<void(server_state, json /* payload */)>;
|
||||
|
||||
struct server_context {
|
||||
std::unique_ptr<server_context_impl> impl;
|
||||
|
||||
@@ -80,9 +104,8 @@ struct server_context {
|
||||
// not thread-safe, should only be used from the main thread
|
||||
server_context_meta get_meta() const;
|
||||
|
||||
// register a callback to be called when sleeping state changes
|
||||
// must be set before load_model() is called
|
||||
void on_sleeping_changed(std::function<void(bool)> callback);
|
||||
// note: must be set before load_model() is called
|
||||
void set_state_callback(server_state_callback_t callback);
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
#include "server-common.h"
|
||||
#include "server-models.h"
|
||||
#include "server-context.h"
|
||||
|
||||
#include "build-info.h"
|
||||
#include "preset.h"
|
||||
@@ -44,9 +45,7 @@ extern char **environ;
|
||||
#define DEFAULT_STOP_TIMEOUT 10 // seconds
|
||||
|
||||
#define CMD_ROUTER_TO_CHILD_EXIT "cmd_router_to_child:exit"
|
||||
#define CMD_CHILD_TO_ROUTER_READY "cmd_child_to_router:ready" // also sent when waking up from sleep
|
||||
#define CMD_CHILD_TO_ROUTER_SLEEP "cmd_child_to_router:sleep"
|
||||
#define CMD_CHILD_TO_ROUTER_INFO "cmd_child_to_router:info:" // followed by json string
|
||||
#define CMD_CHILD_TO_ROUTER_STATE "cmd_child_to_router:state:" // followed by json string
|
||||
|
||||
// address for child process, this is needed because router may run on 0.0.0.0
|
||||
// ref: https://github.com/ggml-org/llama.cpp/issues/17862
|
||||
@@ -904,12 +903,8 @@ void server_models::load(const std::string & name) {
|
||||
while (fgets(buffer, vec_buf.size(), stdout_file) != nullptr) {
|
||||
LOG("[%5d] %s", port, buffer);
|
||||
std::string str(buffer);
|
||||
if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_READY)) {
|
||||
this->update_status(name, SERVER_MODEL_STATUS_LOADED, 0);
|
||||
} else if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_INFO)) {
|
||||
this->update_loaded_info(name, str);
|
||||
} else if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_SLEEP)) {
|
||||
this->update_status(name, SERVER_MODEL_STATUS_SLEEPING, 0);
|
||||
if (string_starts_with(buffer, CMD_CHILD_TO_ROUTER_STATE)) {
|
||||
this->handle_child_state(name, str);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -976,7 +971,10 @@ void server_models::load(const std::string & name) {
|
||||
subprocess_destroy(&child_proc->get());
|
||||
|
||||
// update status and exit code
|
||||
this->update_status(name, SERVER_MODEL_STATUS_UNLOADED, exit_code);
|
||||
this->update_status(name, {
|
||||
SERVER_MODEL_STATUS_UNLOADED,
|
||||
exit_code
|
||||
});
|
||||
SRV_INF("instance name=%s exited with status %d\n", name.c_str(), exit_code);
|
||||
});
|
||||
|
||||
@@ -1016,7 +1014,8 @@ struct server_models_download_res : public common_download_callback {
|
||||
common_download_model(model, opts);
|
||||
is_ok = true;
|
||||
} catch (const std::exception & e) {
|
||||
SRV_ERR("download failed for model name=%s: %s\n", model.name.c_str(), e.what());
|
||||
auto model_name = model.get_name();
|
||||
SRV_ERR("download failed for model name=%s: %s\n", model_name.c_str(), e.what());
|
||||
is_ok = false;
|
||||
}
|
||||
return is_ok;
|
||||
@@ -1036,7 +1035,7 @@ struct server_models_download_res : public common_download_callback {
|
||||
};
|
||||
|
||||
void server_models::download(common_params_model && model, common_download_opts && opts) {
|
||||
std::string name = model.name;
|
||||
std::string name = model.get_name();
|
||||
GGML_ASSERT(name == model.hf_repo);
|
||||
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
@@ -1064,9 +1063,10 @@ void server_models::download(common_params_model && model, common_download_opts
|
||||
inst.th = std::thread([this, dl = std::move(dl)]() {
|
||||
dl->opts.callback = dl.get();
|
||||
bool ok = dl->run();
|
||||
auto model_name = dl->model.get_name();
|
||||
SRV_INF("download finished for model name=%s with status=%s\n",
|
||||
dl->model.name.c_str(), ok ? "success" : "failure");
|
||||
update_download_progress(dl->model.name, {}, true, ok);
|
||||
model_name.c_str(), ok ? "success" : "failure");
|
||||
update_download_progress(model_name, {}, true, ok);
|
||||
// need_reload is set inside update_download_progress under the mutex;
|
||||
// the next load_models() call will clean up this instance
|
||||
});
|
||||
@@ -1130,21 +1130,27 @@ void server_models::unload_all() {
|
||||
}
|
||||
}
|
||||
|
||||
void server_models::update_status(const std::string & name, server_model_status status, int exit_code) {
|
||||
void server_models::update_status(const std::string & name, const update_status_args & args) {
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
auto & meta = it->second.meta;
|
||||
meta.status = status;
|
||||
meta.exit_code = exit_code;
|
||||
meta.status = args.status;
|
||||
meta.exit_code = args.exit_code;
|
||||
if (!args.loaded_info.is_null()) {
|
||||
meta.loaded_info = args.loaded_info;
|
||||
}
|
||||
}
|
||||
// broadcast status change to SSE
|
||||
{
|
||||
json data = {
|
||||
{"status", server_model_status_to_string(status)},
|
||||
{"status", server_model_status_to_string(args.status)},
|
||||
};
|
||||
if (status == SERVER_MODEL_STATUS_UNLOADED) {
|
||||
data["exit_code"] = exit_code;
|
||||
if (args.status == SERVER_MODEL_STATUS_UNLOADED) {
|
||||
data["exit_code"] = args.exit_code;
|
||||
}
|
||||
if (!args.loaded_info.is_null()) {
|
||||
data["info"] = args.loaded_info;
|
||||
}
|
||||
// note: notify_sse doesn't acquire the lock, so no deadlock here
|
||||
notify_sse("status_change", name, data);
|
||||
@@ -1152,29 +1158,6 @@ void server_models::update_status(const std::string & name, server_model_status
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void server_models::update_loaded_info(const std::string & name, std::string & raw_info) {
|
||||
if (!string_starts_with(raw_info, CMD_CHILD_TO_ROUTER_INFO)) {
|
||||
SRV_WRN("invalid loaded info format from child for model name=%s: %s\n", name.c_str(), raw_info.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
json info;
|
||||
try {
|
||||
info = json::parse(raw_info.substr(strlen(CMD_CHILD_TO_ROUTER_INFO)));
|
||||
} catch (const std::exception & e) {
|
||||
SRV_WRN("failed to parse loaded info from child for model name=%s: %s\n", name.c_str(), e.what());
|
||||
return;
|
||||
}
|
||||
|
||||
std::unique_lock<std::mutex> lk(mutex);
|
||||
auto it = mapping.find(name);
|
||||
if (it != mapping.end()) {
|
||||
auto & meta = it->second.meta;
|
||||
meta.loaded_info = info;
|
||||
}
|
||||
cv.notify_all();
|
||||
}
|
||||
|
||||
void server_models::update_download_progress(const std::string & name, const common_download_progress & progress, bool done, bool ok) {
|
||||
json curr;
|
||||
{
|
||||
@@ -1323,21 +1306,54 @@ server_http_res_ptr server_models::proxy_request(const server_http_req & req, co
|
||||
return proxy;
|
||||
}
|
||||
|
||||
bool server_models::is_child_server() {
|
||||
void server_models::handle_child_state(const std::string & name, const std::string & raw_input) {
|
||||
server_state state;
|
||||
json payload;
|
||||
|
||||
try {
|
||||
json data = json::parse(raw_input.substr(strlen(CMD_CHILD_TO_ROUTER_STATE)));
|
||||
state = server_state_from_str(json_value(data, "state", std::string()));
|
||||
payload = json_value(data, "payload", json{});
|
||||
} catch (const std::exception & e) {
|
||||
SRV_ERR("failed to parse child state update for name=%s: %s\n", name.c_str(), e.what());
|
||||
return;
|
||||
}
|
||||
|
||||
switch (state) {
|
||||
case SERVER_STATE_LOADING:
|
||||
{
|
||||
// do nothing for now
|
||||
// TODO: report loading progress for first load and wakeup from sleep
|
||||
} break;
|
||||
case SERVER_STATE_READY:
|
||||
{
|
||||
update_status(name, {
|
||||
SERVER_MODEL_STATUS_LOADED,
|
||||
0,
|
||||
// note: payload can be empty if this is a wakeup from sleep
|
||||
payload.size() > 0 ? payload : nullptr
|
||||
});
|
||||
} break;
|
||||
case SERVER_STATE_SLEEPING:
|
||||
{
|
||||
update_status(name, { SERVER_MODEL_STATUS_SLEEPING });
|
||||
} break;
|
||||
default:
|
||||
// should never happen, but just in case
|
||||
GGML_ASSERT(false && "unexpected state from child server");
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// server_child
|
||||
//
|
||||
|
||||
bool server_child::is_child() {
|
||||
const char * router_port = std::getenv("LLAMA_SERVER_ROUTER_PORT");
|
||||
return router_port != nullptr;
|
||||
}
|
||||
|
||||
std::thread server_models::setup_child_server(const std::function<void(int)> & shutdown_handler, const json & model_info) {
|
||||
// send a notification to the router server that a model instance is ready
|
||||
common_log_pause(common_log_main());
|
||||
fflush(stdout);
|
||||
fprintf(stdout, "%s\n", CMD_CHILD_TO_ROUTER_READY);
|
||||
fflush(stdout);
|
||||
fprintf(stdout, "%s%s\n", CMD_CHILD_TO_ROUTER_INFO, safe_json_to_str(model_info).c_str());
|
||||
fflush(stdout);
|
||||
common_log_resume(common_log_main());
|
||||
|
||||
std::thread server_child::setup(const std::function<void(int)> & shutdown_handler) {
|
||||
// setup thread for monitoring stdin
|
||||
return std::thread([shutdown_handler]() {
|
||||
// wait for EOF on stdin
|
||||
@@ -1363,10 +1379,14 @@ std::thread server_models::setup_child_server(const std::function<void(int)> & s
|
||||
});
|
||||
}
|
||||
|
||||
void server_models::notify_router_sleeping_state(bool is_sleeping) {
|
||||
void server_child::notify_to_router(const std::string & state, const json & payload) {
|
||||
json data = {
|
||||
{"state", state},
|
||||
{"payload", payload},
|
||||
};
|
||||
common_log_pause(common_log_main());
|
||||
fflush(stdout);
|
||||
fprintf(stdout, "%s\n", is_sleeping ? CMD_CHILD_TO_ROUTER_SLEEP : CMD_CHILD_TO_ROUTER_READY);
|
||||
fprintf(stdout, "%s%s\n", CMD_CHILD_TO_ROUTER_STATE, safe_json_to_str(data).c_str());
|
||||
fflush(stdout);
|
||||
common_log_resume(common_log_main());
|
||||
}
|
||||
@@ -1474,7 +1494,6 @@ void server_models_routes::init_routes() {
|
||||
}},
|
||||
// New key
|
||||
{"ui_settings", ui_settings},
|
||||
{"webui_settings", webui_settings},
|
||||
{"build_info", std::string(llama_build_info())},
|
||||
{"cors_proxy_enabled", params.ui_mcp_proxy},
|
||||
});
|
||||
@@ -1645,7 +1664,6 @@ void server_models_routes::init_routes() {
|
||||
common_params_model model;
|
||||
common_download_opts opts;
|
||||
|
||||
model.name = name;
|
||||
model.hf_repo = name;
|
||||
opts.bearer_token = params.hf_token;
|
||||
opts.download_mmproj = true;
|
||||
|
||||
@@ -171,8 +171,12 @@ public:
|
||||
void download(common_params_model && model, common_download_opts && opts);
|
||||
|
||||
// update the status of a model instance (thread-safe)
|
||||
void update_status(const std::string & name, server_model_status status, int exit_code);
|
||||
void update_loaded_info(const std::string & name, std::string & raw_info);
|
||||
struct update_status_args {
|
||||
server_model_status status;
|
||||
int exit_code = 0; // only valid if status == UNLOADED
|
||||
json loaded_info = nullptr;
|
||||
};
|
||||
void update_status(const std::string & name, const update_status_args & args);
|
||||
void update_download_progress(const std::string & name, const common_download_progress & progress, bool done, bool ok = true);
|
||||
|
||||
// remove a cache model from disk and update the list (thread-safe)
|
||||
@@ -193,21 +197,32 @@ public:
|
||||
// proxy an HTTP request to the model instance
|
||||
server_http_res_ptr proxy_request(const server_http_req & req, const std::string & method, const std::string & name, bool update_last_used);
|
||||
|
||||
// handle message sent from server_child::notify_to_router()
|
||||
// raw input must starts with CMD_CHILD_TO_ROUTER_STATE, followed by a JSON string
|
||||
// this function is not thread-safe, must be called from instance's monitoring thread
|
||||
// payload per state:
|
||||
// state = loading -> payload = {} (TODO: add progress info)
|
||||
// state = ready -> payload = model_info (json), or {} if wakeup from sleeping
|
||||
// state = sleeping -> payload = {}
|
||||
void handle_child_state(const std::string & name, const std::string & raw_input);
|
||||
};
|
||||
|
||||
struct server_child {
|
||||
// return true if the current process is a child server instance
|
||||
static bool is_child_server();
|
||||
bool is_child();
|
||||
|
||||
// notify the router server that a model instance is ready
|
||||
// register the shutdown_handler to be called by the router
|
||||
// return the monitoring thread (to be joined by the caller)
|
||||
static std::thread setup_child_server(const std::function<void(int)> & shutdown_handler, const json & model_info);
|
||||
std::thread setup(const std::function<void(int)> & shutdown_handler);
|
||||
|
||||
// notify the router server that the sleeping state has changed
|
||||
static void notify_router_sleeping_state(bool sleeping);
|
||||
// notify router server for status changes (e.g. loading, downloading, sleeping, etc.)
|
||||
// message will be handled by server_models::handle_child_state() on the router side
|
||||
void notify_to_router(const std::string & state_name, const json & payload);
|
||||
};
|
||||
|
||||
struct server_models_routes {
|
||||
common_params params;
|
||||
json ui_settings = json::object(); // Primary: new name
|
||||
json webui_settings = json::object(); // Deprecated: use ui_settings (kept for compat)
|
||||
std::atomic<bool> stopping = false; // for graceful disconnecting SSE clients during shutdown
|
||||
server_models models;
|
||||
server_models_routes(const common_params & params, int argc, char ** argv)
|
||||
@@ -217,7 +232,6 @@ struct server_models_routes {
|
||||
try {
|
||||
json json_settings = json::parse(cfg);
|
||||
ui_settings = json_settings;
|
||||
webui_settings = json_settings; // Deprecated: keep in sync
|
||||
} catch (const std::exception & e) {
|
||||
LOG_ERR("%s: failed to parse UI config: %s\n", __func__, e.what());
|
||||
throw;
|
||||
|
||||
+17
-12
@@ -90,8 +90,10 @@ int llama_server(int argc, char ** argv) {
|
||||
llama_numa_init(params.numa);
|
||||
|
||||
// router server never loads a model and must not touch the GPU
|
||||
const bool is_router_server = params.model.path.empty()
|
||||
&& params.model.hf_repo.empty();
|
||||
|
||||
// skip device enumeration so the CUDA primary context stays uncreated
|
||||
const bool is_router_server = params.model.path.empty();
|
||||
common_params_print_info(params, !is_router_server);
|
||||
|
||||
if (!is_router_server) {
|
||||
@@ -113,8 +115,9 @@ int llama_server(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// for consistency between server router mode and single-model mode, we set the same model name as alias
|
||||
if (params.model_alias.empty() && !params.model.name.empty()) {
|
||||
params.model_alias.insert(params.model.name);
|
||||
auto model_name = params.model.get_name();
|
||||
if (params.model_alias.empty() && !model_name.empty()) {
|
||||
params.model_alias.insert(model_name);
|
||||
}
|
||||
|
||||
// struct that contains llama context and inference
|
||||
@@ -255,6 +258,7 @@ int llama_server(int argc, char ** argv) {
|
||||
// Start the server
|
||||
//
|
||||
|
||||
server_child child; // only used in non-router mode
|
||||
std::function<void()> clean_up;
|
||||
|
||||
if (is_router_server) {
|
||||
@@ -300,15 +304,16 @@ int llama_server(int argc, char ** argv) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// load the model
|
||||
SRV_INF("%s", "loading model\n");
|
||||
|
||||
if (server_models::is_child_server()) {
|
||||
ctx_server.on_sleeping_changed([&](bool sleeping) {
|
||||
server_models::notify_router_sleeping_state(sleeping);
|
||||
// setup communication child --> router if necessary
|
||||
if (child.is_child()) {
|
||||
ctx_server.set_state_callback([&](server_state state, json payload) {
|
||||
child.notify_to_router(server_state_to_str(state), payload);
|
||||
});
|
||||
}
|
||||
|
||||
// load the model
|
||||
SRV_INF("%s", "loading model\n");
|
||||
|
||||
if (!ctx_server.load_model(params)) {
|
||||
clean_up();
|
||||
if (ctx_http.thread.joinable()) {
|
||||
@@ -365,9 +370,9 @@ int llama_server(int argc, char ** argv) {
|
||||
|
||||
// optionally, notify router server that this instance is ready
|
||||
std::thread monitor_thread;
|
||||
if (server_models::is_child_server()) {
|
||||
json model_info = routes.get_model_info();
|
||||
monitor_thread = server_models::setup_child_server(shutdown_handler, model_info);
|
||||
if (child.is_child()) {
|
||||
monitor_thread = child.setup(shutdown_handler);
|
||||
child.notify_to_router(server_state_to_str(SERVER_STATE_READY), routes.get_model_info());
|
||||
}
|
||||
|
||||
// this call blocks the main thread until queue_tasks.terminate() is called
|
||||
|
||||
@@ -79,9 +79,9 @@ def test_load_split_model():
|
||||
assert match_regex("(little|girl)+", res.body["content"])
|
||||
|
||||
|
||||
def test_no_webui():
|
||||
def test_no_ui():
|
||||
global server
|
||||
# default: webui enabled
|
||||
# default: UI enabled
|
||||
server.start()
|
||||
url = f"http://{server.server_host}:{server.server_port}"
|
||||
res = requests.get(url)
|
||||
@@ -89,8 +89,8 @@ def test_no_webui():
|
||||
assert "<!doctype html>" in res.text
|
||||
server.stop()
|
||||
|
||||
# with --no-webui
|
||||
server.no_webui = True
|
||||
# with --no-ui, the UI should be disabled
|
||||
server.no_ui = True
|
||||
server.start()
|
||||
res = requests.get(url)
|
||||
assert res.status_code == 404
|
||||
|
||||
@@ -12,7 +12,7 @@ def create_server():
|
||||
|
||||
def test_mcp_no_proxy():
|
||||
global server
|
||||
server.webui_mcp_proxy = False
|
||||
server.ui_mcp_proxy = False
|
||||
server.start()
|
||||
|
||||
res = server.make_request("GET", "/cors-proxy")
|
||||
@@ -21,7 +21,7 @@ def test_mcp_no_proxy():
|
||||
|
||||
def test_mcp_proxy():
|
||||
global server
|
||||
server.webui_mcp_proxy = True
|
||||
server.ui_mcp_proxy = True
|
||||
server.start()
|
||||
|
||||
url = f"http://{server.server_host}:{server.server_port}/cors-proxy?url=http://example.com"
|
||||
@@ -32,7 +32,7 @@ def test_mcp_proxy():
|
||||
|
||||
def test_mcp_proxy_custom_port():
|
||||
global server
|
||||
server.webui_mcp_proxy = True
|
||||
server.ui_mcp_proxy = True
|
||||
server.start()
|
||||
|
||||
# try getting the server's models API via the proxy
|
||||
|
||||
@@ -94,7 +94,7 @@ class ServerProcess:
|
||||
enable_ctx_shift: int | None = False
|
||||
spec_draft_n_min: int | None = None
|
||||
spec_draft_n_max: int | None = None
|
||||
no_webui: bool | None = None
|
||||
no_ui: bool | None = None
|
||||
jinja: bool | None = None
|
||||
reasoning_format: Literal['deepseek', 'none', 'nothink'] | None = None
|
||||
reasoning: Literal['on', 'off', 'auto'] | None = None
|
||||
@@ -107,7 +107,7 @@ class ServerProcess:
|
||||
cache_ram: int | None = None
|
||||
no_cache_idle_slots: bool = False
|
||||
log_path: str | None = None
|
||||
webui_mcp_proxy: bool = False
|
||||
ui_mcp_proxy: bool = False
|
||||
backend_sampling: bool = False
|
||||
gcp_compat: bool = False
|
||||
|
||||
@@ -225,8 +225,8 @@ class ServerProcess:
|
||||
server_args.extend(["--spec-draft-n-max", self.spec_draft_n_max])
|
||||
if self.spec_draft_n_min:
|
||||
server_args.extend(["--spec-draft-n-min", self.spec_draft_n_min])
|
||||
if self.no_webui:
|
||||
server_args.append("--no-webui")
|
||||
if self.no_ui:
|
||||
server_args.append("--no-ui")
|
||||
if self.no_models_autoload:
|
||||
server_args.append("--no-models-autoload")
|
||||
if self.jinja:
|
||||
@@ -251,8 +251,8 @@ class ServerProcess:
|
||||
server_args.extend(["--cache-ram", self.cache_ram])
|
||||
if self.no_cache_idle_slots:
|
||||
server_args.append("--no-cache-idle-slots")
|
||||
if self.webui_mcp_proxy:
|
||||
server_args.append("--webui-mcp-proxy")
|
||||
if self.ui_mcp_proxy:
|
||||
server_args.append("--ui-mcp-proxy")
|
||||
if self.backend_sampling:
|
||||
server_args.append("--backend_sampling")
|
||||
if self.gcp_compat:
|
||||
|
||||
Vendored
+96
-145
@@ -5809,11 +5809,9 @@ std::string decode_query_component(const std::string &component,
|
||||
|
||||
for (size_t i = 0; i < component.size(); i++) {
|
||||
if (component[i] == '%' && i + 2 < component.size()) {
|
||||
std::string hex = component.substr(i + 1, 2);
|
||||
char *end;
|
||||
unsigned long value = std::strtoul(hex.c_str(), &end, 16);
|
||||
if (end == hex.c_str() + 2) {
|
||||
result += static_cast<char>(value);
|
||||
auto val = 0;
|
||||
if (detail::from_hex_to_i(component, i + 1, 2, val)) {
|
||||
result += static_cast<char>(val);
|
||||
i += 2;
|
||||
} else {
|
||||
result += component[i];
|
||||
@@ -12551,6 +12549,21 @@ bool parse_ipv4(const std::string &str, unsigned char *out) {
|
||||
return *p == '\0';
|
||||
}
|
||||
|
||||
// Parse an IP literal (IPv4 or IPv6) into raw network-order bytes.
|
||||
// `out` must have room for at least 16 bytes. Returns the address length
|
||||
// (4 for IPv4, 16 for IPv6) on success, or 0 if the string is not an IP
|
||||
// literal. Used to match a host against iPAddress SANs the same way the
|
||||
// OpenSSL backend does via X509_check_ip.
|
||||
size_t parse_ip_address(const std::string &str, unsigned char *out) {
|
||||
if (is_ipv4_address(str)) { return parse_ipv4(str, out) ? 4 : 0; }
|
||||
struct in6_addr addr6 = {};
|
||||
if (inet_pton(AF_INET6, str.c_str(), &addr6) == 1) {
|
||||
memcpy(out, &addr6, 16);
|
||||
return 16;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Enumerate Windows system certificates and call callback with DER data
|
||||
template <typename Callback>
|
||||
@@ -12852,6 +12865,30 @@ int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) {
|
||||
return callback(verify_ctx) ? 1 : 0;
|
||||
}
|
||||
|
||||
// X509_STORE_get0_objects is deprecated since OpenSSL 4.0 because it is not
|
||||
// thread-safe; X509_STORE_get1_objects (OpenSSL 3.3+) returns a snapshot
|
||||
// that must be released with release_store_objects
|
||||
#if !defined(OPENSSL_IS_BORINGSSL) && !defined(LIBRESSL_VERSION_NUMBER) && \
|
||||
OPENSSL_VERSION_NUMBER >= 0x30300000L
|
||||
#define CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
#endif
|
||||
|
||||
STACK_OF(X509_OBJECT) * get_store_objects(X509_STORE *store) {
|
||||
#ifdef CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
return X509_STORE_get1_objects(store);
|
||||
#else
|
||||
return X509_STORE_get0_objects(store);
|
||||
#endif
|
||||
}
|
||||
|
||||
void release_store_objects(STACK_OF(X509_OBJECT) * objs) {
|
||||
#ifdef CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free);
|
||||
#else
|
||||
(void)objs; // get0 variant returns an internal pointer; nothing to free
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
|
||||
ctx_t create_client_context() {
|
||||
@@ -13373,11 +13410,19 @@ std::string get_cert_subject_cn(cert_t cert) {
|
||||
auto subject_name = X509_get_subject_name(x509);
|
||||
if (!subject_name) return "";
|
||||
|
||||
char buf[256];
|
||||
auto len =
|
||||
X509_NAME_get_text_by_NID(subject_name, NID_commonName, buf, sizeof(buf));
|
||||
if (len < 0) return "";
|
||||
return std::string(buf, static_cast<size_t>(len));
|
||||
// X509_NAME_get_text_by_NID is deprecated since OpenSSL 4.0
|
||||
auto idx = X509_NAME_get_index_by_NID(subject_name, NID_commonName, -1);
|
||||
if (idx < 0) return "";
|
||||
|
||||
auto entry = X509_NAME_get_entry(subject_name, idx);
|
||||
if (!entry) return "";
|
||||
|
||||
auto data = X509_NAME_ENTRY_get_data(entry);
|
||||
if (!data) return "";
|
||||
|
||||
return std::string(
|
||||
reinterpret_cast<const char *>(ASN1_STRING_get0_data(data)),
|
||||
static_cast<size_t>(ASN1_STRING_length(data)));
|
||||
}
|
||||
|
||||
std::string get_cert_issuer_name(cert_t cert) {
|
||||
@@ -13582,8 +13627,9 @@ size_t get_ca_certs(ctx_t ctx, std::vector<cert_t> &certs) {
|
||||
auto store = SSL_CTX_get_cert_store(ssl_ctx);
|
||||
if (!store) { return 0; }
|
||||
|
||||
auto objs = X509_STORE_get0_objects(store);
|
||||
auto objs = impl::get_store_objects(store);
|
||||
if (!objs) { return 0; }
|
||||
auto se = detail::scope_exit([&] { impl::release_store_objects(objs); });
|
||||
|
||||
auto count = sk_X509_OBJECT_num(objs);
|
||||
for (decltype(count) i = 0; i < count; i++) {
|
||||
@@ -13609,8 +13655,9 @@ std::vector<std::string> get_ca_names(ctx_t ctx) {
|
||||
auto store = SSL_CTX_get_cert_store(ssl_ctx);
|
||||
if (!store) { return names; }
|
||||
|
||||
auto objs = X509_STORE_get0_objects(store);
|
||||
auto objs = impl::get_store_objects(store);
|
||||
if (!objs) { return names; }
|
||||
auto se = detail::scope_exit([&] { impl::release_store_objects(objs); });
|
||||
|
||||
auto count = sk_X509_OBJECT_num(objs);
|
||||
for (decltype(count) i = 0; i < count; i++) {
|
||||
@@ -13716,110 +13763,6 @@ std::string verify_error_string(long error_code) {
|
||||
|
||||
} // namespace tls
|
||||
|
||||
bool SSLClient::verify_host(X509 *server_cert) const {
|
||||
/* Quote from RFC2818 section 3.1 "Server Identity"
|
||||
|
||||
If a subjectAltName extension of type dNSName is present, that MUST
|
||||
be used as the identity. Otherwise, the (most specific) Common Name
|
||||
field in the Subject field of the certificate MUST be used. Although
|
||||
the use of the Common Name is existing practice, it is deprecated and
|
||||
Certification Authorities are encouraged to use the dNSName instead.
|
||||
|
||||
Matching is performed using the matching rules specified by
|
||||
[RFC2459]. If more than one identity of a given type is present in
|
||||
the certificate (e.g., more than one dNSName name, a match in any one
|
||||
of the set is considered acceptable.) Names may contain the wildcard
|
||||
character * which is considered to match any single domain name
|
||||
component or component fragment. E.g., *.a.com matches foo.a.com but
|
||||
not bar.foo.a.com. f*.com matches foo.com but not bar.com.
|
||||
|
||||
In some cases, the URI is specified as an IP address rather than a
|
||||
hostname. In this case, the iPAddress subjectAltName must be present
|
||||
in the certificate and must exactly match the IP in the URI.
|
||||
|
||||
*/
|
||||
return verify_host_with_subject_alt_name(server_cert) ||
|
||||
verify_host_with_common_name(server_cert);
|
||||
}
|
||||
|
||||
bool
|
||||
SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const {
|
||||
auto ret = false;
|
||||
|
||||
auto type = GEN_DNS;
|
||||
|
||||
struct in6_addr addr6 = {};
|
||||
struct in_addr addr = {};
|
||||
size_t addr_len = 0;
|
||||
|
||||
#ifndef __MINGW32__
|
||||
if (inet_pton(AF_INET6, host_.c_str(), &addr6)) {
|
||||
type = GEN_IPADD;
|
||||
addr_len = sizeof(struct in6_addr);
|
||||
} else if (inet_pton(AF_INET, host_.c_str(), &addr)) {
|
||||
type = GEN_IPADD;
|
||||
addr_len = sizeof(struct in_addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto alt_names = static_cast<const struct stack_st_GENERAL_NAME *>(
|
||||
X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr));
|
||||
|
||||
if (alt_names) {
|
||||
auto dsn_matched = false;
|
||||
auto ip_matched = false;
|
||||
|
||||
auto count = sk_GENERAL_NAME_num(alt_names);
|
||||
|
||||
for (decltype(count) i = 0; i < count && !dsn_matched; i++) {
|
||||
auto val = sk_GENERAL_NAME_value(alt_names, i);
|
||||
if (!val || val->type != type) { continue; }
|
||||
|
||||
auto name =
|
||||
reinterpret_cast<const char *>(ASN1_STRING_get0_data(val->d.ia5));
|
||||
if (name == nullptr) { continue; }
|
||||
|
||||
auto name_len = static_cast<size_t>(ASN1_STRING_length(val->d.ia5));
|
||||
|
||||
switch (type) {
|
||||
case GEN_DNS:
|
||||
dsn_matched =
|
||||
detail::match_hostname(std::string(name, name_len), host_);
|
||||
break;
|
||||
|
||||
case GEN_IPADD:
|
||||
if (!memcmp(&addr6, name, addr_len) || !memcmp(&addr, name, addr_len)) {
|
||||
ip_matched = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dsn_matched || ip_matched) { ret = true; }
|
||||
}
|
||||
|
||||
GENERAL_NAMES_free(const_cast<STACK_OF(GENERAL_NAME) *>(
|
||||
reinterpret_cast<const STACK_OF(GENERAL_NAME) *>(alt_names)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SSLClient::verify_host_with_common_name(X509 *server_cert) const {
|
||||
const auto subject_name = X509_get_subject_name(server_cert);
|
||||
|
||||
if (subject_name != nullptr) {
|
||||
char name[BUFSIZ];
|
||||
auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName,
|
||||
name, sizeof(name));
|
||||
|
||||
if (name_len != -1) {
|
||||
return detail::match_hostname(
|
||||
std::string(name, static_cast<size_t>(name_len)), host_);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
|
||||
/*
|
||||
@@ -14622,10 +14565,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
auto mcert = static_cast<const mbedtls_x509_crt *>(cert);
|
||||
std::string host_str(hostname);
|
||||
|
||||
// Check if hostname is an IP address
|
||||
bool is_ip = impl::is_ipv4_address(host_str);
|
||||
unsigned char ip_bytes[4];
|
||||
if (is_ip) { impl::parse_ipv4(host_str, ip_bytes); }
|
||||
// Check if hostname is an IP address (IPv4 or IPv6)
|
||||
unsigned char ip_bytes[16];
|
||||
auto ip_len = impl::parse_ip_address(host_str, ip_bytes);
|
||||
auto is_ip = ip_len > 0;
|
||||
|
||||
// Check Subject Alternative Names (SAN)
|
||||
// In Mbed TLS 3.x, subject_alt_names contains raw values without ASN.1 tags
|
||||
@@ -14637,9 +14580,9 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
size_t len = san->buf.len;
|
||||
|
||||
if (is_ip) {
|
||||
// Check if this SAN is an IPv4 address (4 bytes)
|
||||
if (len == 4 && memcmp(p, ip_bytes, 4) == 0) { return true; }
|
||||
// Check if this SAN is an IPv6 address (16 bytes) - skip for now
|
||||
// For an IP host, only a matching iPAddress SAN of the same family
|
||||
// (4 bytes for IPv4, 16 bytes for IPv6) may authenticate it.
|
||||
if (len == ip_len && memcmp(p, ip_bytes, ip_len) == 0) { return true; }
|
||||
} else {
|
||||
// Check if this SAN is a DNS name (printable ASCII string)
|
||||
bool is_dns = len > 0;
|
||||
@@ -14654,21 +14597,25 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
san = san->next;
|
||||
}
|
||||
|
||||
// Fallback: Check Common Name (CN) in subject
|
||||
char cn[256];
|
||||
int ret = mbedtls_x509_dn_gets(cn, sizeof(cn), &mcert->subject);
|
||||
if (ret > 0) {
|
||||
std::string cn_str(cn);
|
||||
// Fallback: Check Common Name (CN) in subject. Skipped for IP-literal hosts:
|
||||
// an IP identity is only valid via an iPAddress SAN, never the CN (RFC 9110;
|
||||
// the OpenSSL backend's X509_check_ip behaves the same way).
|
||||
if (!is_ip) {
|
||||
char cn[256];
|
||||
int ret = mbedtls_x509_dn_gets(cn, sizeof(cn), &mcert->subject);
|
||||
if (ret > 0) {
|
||||
std::string cn_str(cn);
|
||||
|
||||
// Look for "CN=" in the DN string
|
||||
size_t cn_pos = cn_str.find("CN=");
|
||||
if (cn_pos != std::string::npos) {
|
||||
size_t start = cn_pos + 3;
|
||||
size_t end = cn_str.find(',', start);
|
||||
std::string cn_value =
|
||||
cn_str.substr(start, end == std::string::npos ? end : end - start);
|
||||
// Look for "CN=" in the DN string
|
||||
size_t cn_pos = cn_str.find("CN=");
|
||||
if (cn_pos != std::string::npos) {
|
||||
size_t start = cn_pos + 3;
|
||||
size_t end = cn_str.find(',', start);
|
||||
std::string cn_value =
|
||||
cn_str.substr(start, end == std::string::npos ? end : end - start);
|
||||
|
||||
if (detail::match_hostname(cn_value, host_str)) { return true; }
|
||||
if (detail::match_hostname(cn_value, host_str)) { return true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15774,10 +15721,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
auto x509 = static_cast<WOLFSSL_X509 *>(cert);
|
||||
std::string host_str(hostname);
|
||||
|
||||
// Check if hostname is an IP address
|
||||
bool is_ip = impl::is_ipv4_address(host_str);
|
||||
unsigned char ip_bytes[4];
|
||||
if (is_ip) { impl::parse_ipv4(host_str, ip_bytes); }
|
||||
// Check if hostname is an IP address (IPv4 or IPv6)
|
||||
unsigned char ip_bytes[16];
|
||||
auto ip_len = impl::parse_ip_address(host_str, ip_bytes);
|
||||
auto is_ip = ip_len > 0;
|
||||
|
||||
// Check Subject Alternative Names
|
||||
auto *san_names = static_cast<WOLF_STACK_OF(WOLFSSL_GENERAL_NAME) *>(
|
||||
@@ -15804,10 +15751,12 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
}
|
||||
}
|
||||
} else if (is_ip && names->type == WOLFSSL_GEN_IPADD) {
|
||||
// IP address
|
||||
// IP address: only an iPAddress SAN of the same family (4 bytes for
|
||||
// IPv4, 16 bytes for IPv6) may authenticate the host.
|
||||
unsigned char *ip_data = wolfSSL_ASN1_STRING_data(names->d.iPAddress);
|
||||
int ip_len = wolfSSL_ASN1_STRING_length(names->d.iPAddress);
|
||||
if (ip_data && ip_len == 4 && memcmp(ip_data, ip_bytes, 4) == 0) {
|
||||
auto san_ip_len = wolfSSL_ASN1_STRING_length(names->d.iPAddress);
|
||||
if (ip_data && san_ip_len == static_cast<int>(ip_len) &&
|
||||
memcmp(ip_data, ip_bytes, ip_len) == 0) {
|
||||
wolfSSL_sk_free(san_names);
|
||||
return true;
|
||||
}
|
||||
@@ -15816,8 +15765,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
wolfSSL_sk_free(san_names);
|
||||
}
|
||||
|
||||
// Fallback: Check Common Name (CN) in subject
|
||||
WOLFSSL_X509_NAME *subject = wolfSSL_X509_get_subject_name(x509);
|
||||
// Fallback: Check Common Name (CN) in subject. Skipped for IP-literal hosts:
|
||||
// an IP identity is only valid via an iPAddress SAN, never the CN (RFC 9110;
|
||||
// the OpenSSL backend's X509_check_ip behaves the same way).
|
||||
auto subject = is_ip ? nullptr : wolfSSL_X509_get_subject_name(x509);
|
||||
if (subject) {
|
||||
char cn[256] = {};
|
||||
int cn_len = wolfSSL_X509_NAME_get_text_by_NID(subject, NID_commonName, cn,
|
||||
|
||||
Vendored
+63
-18
@@ -8,8 +8,8 @@
|
||||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||
#define CPPHTTPLIB_HTTPLIB_H
|
||||
|
||||
#define CPPHTTPLIB_VERSION "0.47.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x002f00"
|
||||
#define CPPHTTPLIB_VERSION "0.48.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x003000"
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0A00
|
||||
@@ -686,18 +686,70 @@ inline from_chars_result<T> from_chars(const char *first, const char *last,
|
||||
return {p, std::errc{}};
|
||||
}
|
||||
|
||||
// from_chars for double (simple wrapper for strtod)
|
||||
// from_chars for double (hand-written, locale-independent)
|
||||
//
|
||||
// The only double consumed by this library is the HTTP quality value, whose
|
||||
// grammar is (RFC 9110 12.4.2):
|
||||
// qvalue = ( "0" [ "." 0*3DIGIT ] ) / ( "1" [ "." 0*3("0") ] )
|
||||
// i.e. a non-negative decimal with no sign, exponent, "inf"/"nan", or wide
|
||||
// magnitude. So this parser recognizes exactly 1*DIGIT [ "." *DIGIT ] with
|
||||
// '.' always the decimal separator (std::strtod would instead read it from the
|
||||
// global C locale, mis-parsing q-values once an embedder calls
|
||||
// setlocale(LC_ALL, "") into a comma-decimal locale). The caller range-checks
|
||||
// the result to [0, 1], so inputs outside that range need not be distinguished
|
||||
// here. Allocation-free, single pass, and free of the overflow/rounding edge
|
||||
// cases that exponent and wide-range handling would introduce.
|
||||
inline from_chars_result<double> from_chars(const char *first, const char *last,
|
||||
double &value) {
|
||||
std::string s(first, last);
|
||||
char *endptr = nullptr;
|
||||
errno = 0;
|
||||
value = std::strtod(s.c_str(), &endptr);
|
||||
if (endptr == s.c_str()) { return {first, std::errc::invalid_argument}; }
|
||||
if (errno == ERANGE) {
|
||||
return {first + (endptr - s.c_str()), std::errc::result_out_of_range};
|
||||
value = 0.0;
|
||||
const char *p = first;
|
||||
|
||||
// Each 1eN is exactly representable, so a single final division by the
|
||||
// matching entry yields a correctly-rounded result.
|
||||
static const double powers_of_ten[] = {
|
||||
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
|
||||
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18};
|
||||
const int max_frac_digits =
|
||||
static_cast<int>(sizeof(powers_of_ten) / sizeof(powers_of_ten[0])) - 1;
|
||||
|
||||
// Accumulate digits into a 64-bit integer and remember how many were
|
||||
// fractional. Two independent caps keep this bounded and safe:
|
||||
// * accumulation saturates before mantissa could overflow uint64_t, and
|
||||
// * frac_digits is capped at max_frac_digits so it is always a valid index
|
||||
// into powers_of_ten (without this an input like "0.000...0" would never
|
||||
// grow mantissa, so the saturation cap alone would not bound it).
|
||||
// Both caps only drop digits far beyond the precision a q-value needs; any
|
||||
// value they would change is well outside [0, 1] and rejected by the caller.
|
||||
uint64_t mantissa = 0;
|
||||
int frac_digits = 0;
|
||||
bool seen_digit = false;
|
||||
|
||||
const uint64_t limit = ((std::numeric_limits<uint64_t>::max)() - 9) / 10;
|
||||
auto accumulate = [&](char c) {
|
||||
if (mantissa <= limit) {
|
||||
mantissa = mantissa * 10 + static_cast<uint64_t>(c - '0');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
for (; p != last && '0' <= *p && *p <= '9'; ++p) {
|
||||
seen_digit = true;
|
||||
accumulate(*p);
|
||||
}
|
||||
return {first + (endptr - s.c_str()), std::errc{}};
|
||||
|
||||
if (p != last && *p == '.') {
|
||||
++p;
|
||||
for (; p != last && '0' <= *p && *p <= '9'; ++p) {
|
||||
seen_digit = true;
|
||||
if (frac_digits < max_frac_digits && accumulate(*p)) { ++frac_digits; }
|
||||
}
|
||||
}
|
||||
|
||||
if (!seen_digit) { return {first, std::errc::invalid_argument}; }
|
||||
|
||||
value = static_cast<double>(mantissa) / powers_of_ten[frac_digits];
|
||||
return {p, std::errc{}};
|
||||
}
|
||||
|
||||
inline bool parse_port(const char *s, size_t len, int &port) {
|
||||
@@ -2826,13 +2878,6 @@ private:
|
||||
#endif
|
||||
|
||||
friend class ClientImpl;
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
private:
|
||||
bool verify_host(X509 *server_cert) const;
|
||||
bool verify_host_with_subject_alt_name(X509 *server_cert) const;
|
||||
bool verify_host_with_common_name(X509 *server_cert) const;
|
||||
#endif
|
||||
};
|
||||
#endif // CPPHTTPLIB_SSL_ENABLED
|
||||
|
||||
|
||||
Reference in New Issue
Block a user