mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-19 20:27:39 +02:00
Compare commits
7 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| fabde3bf51 | |||
| 0d2d9ccbf6 | |||
| 8c2d6f6475 | |||
| 38724ab593 | |||
| e2e7a9b2d0 | |||
| b14e3fb90c | |||
| 159d093a43 |
@@ -13,6 +13,20 @@ ARG APP_REVISION=N/A
|
||||
# BUILD STAGE
|
||||
# Compile all binary files and libraries
|
||||
# ==============================================================================
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM ${CANN_BASE_IMAGE} AS build
|
||||
|
||||
# -- Install build dependencies --
|
||||
@@ -26,6 +40,8 @@ WORKDIR /app
|
||||
# -- Copy project files --
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
# -- Set CANN environment variables (required for compilation) --
|
||||
# Using ENV instead of `source` allows environment variables to persist across the entire image layer
|
||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
||||
|
||||
@@ -3,6 +3,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM docker.io/ubuntu:$UBUNTU_VERSION AS build
|
||||
|
||||
ARG TARGETARCH
|
||||
@@ -16,6 +30,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN if [ "$TARGETARCH" = "amd64" ] || [ "$TARGETARCH" = "arm64" ]; then \
|
||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
||||
else \
|
||||
|
||||
@@ -11,6 +11,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
||||
|
||||
ARG GCC_VERSION
|
||||
@@ -26,6 +40,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
|
||||
@@ -5,6 +5,20 @@ ARG APP_REVISION=N/A
|
||||
|
||||
## Build Image
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS build
|
||||
|
||||
ARG GGML_SYCL_F16=ON
|
||||
@@ -22,6 +36,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||
echo "GGML_SYCL_F16 is set" \
|
||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON" \
|
||||
|
||||
@@ -10,6 +10,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
||||
|
||||
# MUSA architecture to build for (defaults to all supported archs)
|
||||
@@ -29,6 +43,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
||||
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
||||
fi && \
|
||||
|
||||
@@ -22,6 +22,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
## Build Image
|
||||
FROM docker.io/ubuntu:${UBUNTU_VERSION} AS build
|
||||
|
||||
@@ -69,6 +83,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
# Build Stage
|
||||
RUN bash -c "source ${OpenVINO_DIR}/setupvars.sh && \
|
||||
cmake -B build/ReleaseOV -G Ninja \
|
||||
|
||||
@@ -11,6 +11,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
### Build image
|
||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
||||
|
||||
@@ -38,6 +52,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
||||
cmake -S . -B build \
|
||||
-DGGML_HIP=ON \
|
||||
|
||||
@@ -4,6 +4,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
### Build Llama.cpp stage
|
||||
FROM docker.io/gcc:${GCC_VERSION} AS build
|
||||
|
||||
@@ -20,6 +34,8 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN --mount=type=cache,target=/root/.ccache \
|
||||
--mount=type=cache,target=/app/build \
|
||||
cmake -S . -B build -G Ninja \
|
||||
|
||||
@@ -3,6 +3,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM docker.io/ubuntu:$UBUNTU_VERSION AS build
|
||||
|
||||
# Install build tools
|
||||
@@ -17,6 +31,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=ON -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON && \
|
||||
cmake --build build --config Release -j$(nproc)
|
||||
|
||||
|
||||
@@ -3,6 +3,20 @@ ARG BUILD_DATE=N/A
|
||||
ARG APP_VERSION=N/A
|
||||
ARG APP_REVISION=N/A
|
||||
|
||||
ARG NODE_VERSION=24
|
||||
|
||||
FROM docker.io/node:$NODE_VERSION AS web
|
||||
|
||||
ARG APP_VERSION
|
||||
|
||||
WORKDIR /app/tools/ui
|
||||
|
||||
COPY tools/ui/package.json tools/ui/package-lock.json ./
|
||||
RUN npm ci
|
||||
|
||||
COPY tools/ui/ ./
|
||||
RUN LLAMA_BUILD_NUMBER="$APP_VERSION" npm run build
|
||||
|
||||
FROM docker.io/ubuntu:$UBUNTU_VERSION AS build
|
||||
|
||||
RUN apt-get update && \
|
||||
@@ -14,6 +28,8 @@ WORKDIR /app
|
||||
|
||||
COPY . .
|
||||
|
||||
COPY --from=web /app/tools/ui/dist tools/ui/dist
|
||||
|
||||
RUN cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DLLAMA_BUILD_TESTS=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DGGML_ZENDNN=ON && \
|
||||
cmake --build build -j $(nproc)
|
||||
|
||||
|
||||
@@ -10,6 +10,9 @@
|
||||
|
||||
build*/
|
||||
|
||||
tools/ui/node_modules/
|
||||
tools/ui/dist/
|
||||
|
||||
models/*
|
||||
|
||||
/llama-cli
|
||||
|
||||
+20
-54
@@ -2830,62 +2830,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.api_prefix = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_API_PREFIX"));
|
||||
// Deprecated: use --ui-config instead (kept for backward compat)
|
||||
add_opt(common_arg(
|
||||
{"--webui-config"}, "JSON",
|
||||
"[DEPRECATED: use --ui-config] JSON that provides default WebUI settings (overrides WebUI defaults)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.ui_config_json = value;
|
||||
params.webui_config_json = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_CONFIG"));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"--ui-config"}, "JSON",
|
||||
{"--ui-config", "--webui-config"}, "JSON",
|
||||
"JSON that provides default UI settings (overrides UI defaults)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.ui_config_json = value;
|
||||
params.webui_config_json = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_UI_CONFIG"));
|
||||
|
||||
// Deprecated: use --ui-config-file instead (kept for backward compat)
|
||||
add_opt(common_arg(
|
||||
{"--webui-config-file"}, "PATH",
|
||||
"[DEPRECATED: use --ui-config-file] JSON file that provides default WebUI settings (overrides WebUI defaults)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.ui_config_json = read_file(value);
|
||||
params.webui_config_json = params.ui_config_json;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_CONFIG_FILE"));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"--ui-config-file"}, "PATH",
|
||||
{"--ui-config-file", "--webui-config-file"}, "PATH",
|
||||
"JSON file that provides default UI settings (overrides UI defaults)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
params.ui_config_json = read_file(value);
|
||||
params.webui_config_json = params.ui_config_json;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_UI_CONFIG_FILE"));
|
||||
|
||||
// Deprecated: use --ui-mcp-proxy instead (kept for backward compat)
|
||||
add_opt(common_arg(
|
||||
{"--webui-mcp-proxy"},
|
||||
{"--no-webui-mcp-proxy"},
|
||||
"[DEPRECATED: use --ui-mcp-proxy/--no-ui-mcp-proxy] experimental: whether to enable MCP CORS proxy",
|
||||
[](common_params & params, bool value) {
|
||||
params.ui_mcp_proxy = value;
|
||||
params.webui_mcp_proxy = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI_MCP_PROXY"));
|
||||
|
||||
add_opt(common_arg(
|
||||
{"--ui-mcp-proxy"},
|
||||
{"--no-ui-mcp-proxy"},
|
||||
{"--ui-mcp-proxy", "--webui-mcp-proxy"},
|
||||
{"--no-ui-mcp-proxy", "--no-webui-mcp-proxy"},
|
||||
"experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: disabled)",
|
||||
[](common_params & params, bool value) {
|
||||
params.ui_mcp_proxy = value;
|
||||
params.webui_mcp_proxy = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_UI_MCP_PROXY"));
|
||||
add_opt(common_arg(
|
||||
@@ -2897,24 +2861,26 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
params.server_tools = parse_csv_row(value);
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TOOLS"));
|
||||
// Deprecated: use --ui/--no-ui instead (kept for backward compat)
|
||||
add_opt(common_arg(
|
||||
{"--webui"},
|
||||
{"--no-webui"},
|
||||
"[DEPRECATED: use --ui/--no-ui] whether to enable the Web UI",
|
||||
add_opt(common_arg(
|
||||
{"-ag", "--agent"},
|
||||
{"-no-ag", "--no-agent"},
|
||||
"whether to enable CORS proxy and all built-in tools - do not enable in untrusted environments (default: disabled)",
|
||||
[](common_params & params, bool value) {
|
||||
params.ui = value;
|
||||
params.webui = value;
|
||||
if (value) {
|
||||
params.server_tools = {"all"};
|
||||
params.ui_mcp_proxy = true;
|
||||
} else {
|
||||
params.server_tools.clear();
|
||||
params.ui_mcp_proxy = false;
|
||||
}
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_WEBUI"));
|
||||
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_AGENT"));
|
||||
add_opt(common_arg(
|
||||
{"--ui"},
|
||||
{"--no-ui"},
|
||||
{"--ui", "--webui"},
|
||||
{"--no-ui", "--no-webui"},
|
||||
string_format("whether to enable the Web UI (default: %s)", params.ui ? "enabled" : "disabled"),
|
||||
[](common_params & params, bool value) {
|
||||
params.ui = value;
|
||||
params.webui = value;
|
||||
}
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_UI"));
|
||||
add_opt(common_arg(
|
||||
@@ -2945,7 +2911,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY"));
|
||||
add_opt(common_arg(
|
||||
{"--api-key-file"}, "FNAME",
|
||||
"path to file containing API keys (default: none)",
|
||||
"path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)",
|
||||
[](common_params & params, const std::string & value) {
|
||||
std::ifstream key_file(value);
|
||||
if (!key_file) {
|
||||
@@ -2953,7 +2919,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
|
||||
}
|
||||
std::string key;
|
||||
while (std::getline(key_file, key)) {
|
||||
if (!key.empty()) {
|
||||
if (!key.empty() && key[0] != '#') {
|
||||
params.api_keys.push_back(key);
|
||||
}
|
||||
}
|
||||
|
||||
+3
-1
@@ -2034,7 +2034,7 @@ bool common_prompt_batch_decode(
|
||||
}
|
||||
|
||||
size_t common_prompt_checkpoint::size() const {
|
||||
return data_tgt.size() + data_dft.size();
|
||||
return data_tgt.size() + data_dft.size() + data_spec.size();
|
||||
}
|
||||
|
||||
bool common_prompt_checkpoint::empty() const {
|
||||
@@ -2049,6 +2049,7 @@ void common_prompt_checkpoint::clear() {
|
||||
|
||||
data_tgt.clear();
|
||||
data_dft.clear();
|
||||
data_spec.clear();
|
||||
}
|
||||
|
||||
void common_prompt_checkpoint::update_pos(
|
||||
@@ -2138,4 +2139,5 @@ void common_prompt_checkpoint::clear_tgt() {
|
||||
|
||||
void common_prompt_checkpoint::clear_dft() {
|
||||
data_dft.clear();
|
||||
data_spec.clear();
|
||||
}
|
||||
|
||||
+5
-7
@@ -363,7 +363,7 @@ struct common_params_speculative {
|
||||
|
||||
uint32_t need_n_rs_seq() const {
|
||||
bool needs_rs_seq = std::any_of(types.begin(), types.end(), [&](auto t) {
|
||||
return t == COMMON_SPECULATIVE_TYPE_DRAFT_MTP;
|
||||
return t == COMMON_SPECULATIVE_TYPE_DRAFT_MTP || t == COMMON_SPECULATIVE_TYPE_DRAFT_EAGLE3;
|
||||
});
|
||||
|
||||
return needs_rs_seq ? draft.n_max : 0u;
|
||||
@@ -624,12 +624,6 @@ struct common_params {
|
||||
|
||||
// UI configs
|
||||
bool ui = true;
|
||||
|
||||
// Deprecated: use ui, ui_mcp_proxy, ui_config_json instead
|
||||
bool webui = ui;
|
||||
bool webui_mcp_proxy = false;
|
||||
std::string webui_config_json;
|
||||
|
||||
bool ui_mcp_proxy = false;
|
||||
std::string ui_config_json;
|
||||
|
||||
@@ -1065,6 +1059,10 @@ struct common_prompt_checkpoint {
|
||||
std::vector<uint8_t> data_tgt;
|
||||
std::vector<uint8_t> data_dft;
|
||||
|
||||
// (optional) speculative-decoding implementation state stashed with the checkpoint
|
||||
// (e.g. eagle3's deferred-boundary g_embd row)
|
||||
std::vector<uint8_t> data_spec;
|
||||
|
||||
size_t size() const;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
@@ -161,6 +161,10 @@ struct common_speculative_impl {
|
||||
|
||||
virtual void accept(llama_seq_id seq_id, uint16_t n_accepted, bool is_other) = 0;
|
||||
|
||||
// (optional) serialize/restore per-seq internal state (e.g. eagle3's deferred boundary).
|
||||
virtual bool get_state(llama_seq_id /*seq_id*/, std::vector<uint8_t> & /*data*/) const { return false; }
|
||||
virtual void set_state(llama_seq_id /*seq_id*/, const std::vector<uint8_t> & /*data*/) {}
|
||||
|
||||
// true if this implementation requires the target context to extract post-norm embeddings
|
||||
virtual bool need_embd() const = 0;
|
||||
|
||||
@@ -841,6 +845,49 @@ struct common_speculative_impl_draft_eagle3 : public common_speculative_impl {
|
||||
(size_t) n_embd_dec * sizeof(float));
|
||||
}
|
||||
|
||||
// we only need to stash the deferred boundary's g_embd row for recurrent/hybrid targets:
|
||||
// their single-position checkpoints drop it on restore
|
||||
bool need_boundary_stash() const {
|
||||
const llama_model * model_tgt = llama_get_model(params.ctx_tgt);
|
||||
return llama_model_is_recurrent(model_tgt) || llama_model_is_hybrid(model_tgt);
|
||||
}
|
||||
|
||||
bool get_state(llama_seq_id seq_id, std::vector<uint8_t> & data) const override {
|
||||
if (!need_boundary_stash()) {
|
||||
return false;
|
||||
}
|
||||
if (seq_id < 0 || seq_id >= (llama_seq_id) n_seq || pending_pos_last[seq_id] < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const llama_pos pos = pending_pos_last[seq_id];
|
||||
const std::vector<float> & g = pending_g_last[seq_id];
|
||||
|
||||
data.resize(sizeof(llama_pos) + g.size() * sizeof(float));
|
||||
std::memcpy(data.data(), &pos, sizeof(llama_pos));
|
||||
std::memcpy(data.data() + sizeof(llama_pos), g.data(), g.size() * sizeof(float));
|
||||
return true;
|
||||
}
|
||||
|
||||
void set_state(llama_seq_id seq_id, const std::vector<uint8_t> & data) override {
|
||||
if (!need_boundary_stash()) {
|
||||
return;
|
||||
}
|
||||
if (seq_id < 0 || seq_id >= (llama_seq_id) n_seq) {
|
||||
return;
|
||||
}
|
||||
if (data.size() != sizeof(llama_pos) + (size_t) n_embd_dec * sizeof(float)) {
|
||||
return;
|
||||
}
|
||||
|
||||
llama_pos pos = -1;
|
||||
std::memcpy(&pos, data.data(), sizeof(llama_pos));
|
||||
|
||||
pending_pos_last[seq_id] = pos;
|
||||
pending_g_last[seq_id].resize(n_embd_dec);
|
||||
std::memcpy(pending_g_last[seq_id].data(), data.data() + sizeof(llama_pos), (size_t) n_embd_dec * sizeof(float));
|
||||
}
|
||||
|
||||
bool need_embd() const override {
|
||||
return false;
|
||||
}
|
||||
@@ -2118,6 +2165,31 @@ void common_speculative_accept(common_speculative * spec, llama_seq_id seq_id, u
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: support the case of more than one speculative implementations having a state
|
||||
bool common_speculative_get_state(common_speculative * spec, llama_seq_id seq_id, std::vector<uint8_t> & data) {
|
||||
if (spec == nullptr) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (auto & impl : spec->impls) {
|
||||
if (impl->get_state(seq_id, data)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void common_speculative_set_state(common_speculative * spec, llama_seq_id seq_id, const std::vector<uint8_t> & data) {
|
||||
if (spec == nullptr) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (auto & impl : spec->impls) {
|
||||
impl->set_state(seq_id, data);
|
||||
}
|
||||
}
|
||||
|
||||
void common_speculative_print_stats(const common_speculative * spec) {
|
||||
if (spec == nullptr) {
|
||||
return;
|
||||
|
||||
@@ -68,6 +68,10 @@ void common_speculative_draft(common_speculative * spec);
|
||||
// informs the speculative context that n_accepted tokens were accepted by the target model
|
||||
void common_speculative_accept(common_speculative * spec, llama_seq_id, uint16_t n_accepted);
|
||||
|
||||
// (optional) get/set internal state
|
||||
bool common_speculative_get_state(common_speculative * spec, llama_seq_id seq_id, std::vector<uint8_t> & data);
|
||||
void common_speculative_set_state(common_speculative * spec, llama_seq_id seq_id, const std::vector<uint8_t> & data);
|
||||
|
||||
// print statistics about the speculative decoding
|
||||
void common_speculative_print_stats(const common_speculative * spec);
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@ import os
|
||||
import sys
|
||||
import subprocess
|
||||
|
||||
HTTPLIB_VERSION = "refs/tags/v0.47.0"
|
||||
HTTPLIB_VERSION = "refs/tags/v0.48.0"
|
||||
|
||||
vendor = {
|
||||
"https://github.com/nlohmann/json/releases/latest/download/json.hpp": "vendor/nlohmann/json.hpp",
|
||||
|
||||
@@ -156,6 +156,8 @@ llama_model_qwen35::graph::graph(const llama_model & model, const llm_graph_para
|
||||
|
||||
// MTP/NextN layers are loaded as extra decoder blocks but not executed in the main pass.
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
res->t_layer_inp[il] = inpL;
|
||||
|
||||
ggml_tensor * inpSA = inpL;
|
||||
|
||||
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);
|
||||
|
||||
@@ -179,6 +179,8 @@ llama_model_qwen35moe::graph::graph(const llama_model & model, const llm_graph_p
|
||||
|
||||
// MTP/NextN layers are loaded as extra decoder blocks but not executed in the main pass.
|
||||
for (int il = 0; il < n_layer; ++il) {
|
||||
res->t_layer_inp[il] = inpL;
|
||||
|
||||
ggml_tensor * inpSA = inpL;
|
||||
|
||||
cur = build_norm(inpL, model.layers[il].attn_norm, nullptr, LLM_NORM_RMS, il);
|
||||
|
||||
+2
-1
@@ -161,7 +161,7 @@
|
||||
| `-mmu, --mmproj-url URL` | URL to a multimodal projector file. see tools/mtmd/README.md<br/>(env: LLAMA_ARG_MMPROJ_URL) |
|
||||
| `--mmproj-auto, --no-mmproj, --no-mmproj-auto` | whether to use multimodal projector file (if available), useful when using -hf (default: enabled)<br/>(env: LLAMA_ARG_MMPROJ_AUTO) |
|
||||
| `--mmproj-offload, --no-mmproj-offload` | whether to enable GPU offloading for multimodal projector (default: enabled)<br/>(env: LLAMA_ARG_MMPROJ_OFFLOAD) |
|
||||
| `--image, --audio FILE` | path to an image or audio file. use with multimodal models, use comma-separated values for multiple files |
|
||||
| `--image, --audio, --video FILE` | path to an image, audio, or video file. use with multimodal models, use comma-separated values for multiple files |
|
||||
| `--image-min-tokens N` | minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)<br/>(env: LLAMA_ARG_IMAGE_MIN_TOKENS) |
|
||||
| `--image-max-tokens N` | maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)<br/>(env: LLAMA_ARG_IMAGE_MAX_TOKENS) |
|
||||
| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) |
|
||||
@@ -174,6 +174,7 @@
|
||||
| `--chat-template-file JINJA_TEMPLATE_FILE` | set custom jinja chat template file (default: template taken from model's metadata)<br/>if suffix/prefix are specified, template will be disabled<br/>only commonly used templates are accepted (unless --jinja is set before this flag):<br/>list of built-in templates:<br/>bailing, bailing-think, bailing2, chatglm3, chatglm4, chatml, command-r, deepseek, deepseek-ocr, deepseek2, deepseek3, exaone-moe, exaone3, exaone4, falcon3, gemma, gigachat, glmedge, gpt-oss, granite, granite-4.0, granite-4.1, grok-2, hunyuan-dense, hunyuan-moe, hunyuan-vl, kimi-k2, llama2, llama2-sys, llama2-sys-bos, llama2-sys-strip, llama3, llama4, megrez, minicpm, mistral-v1, mistral-v3, mistral-v3-tekken, mistral-v7, mistral-v7-tekken, monarch, openchat, orion, pangu-embedded, phi3, phi4, rwkv-world, seed_oss, smolvlm, solar-open, vicuna, vicuna-orca, yandex, zephyr<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_FILE) |
|
||||
| `--skip-chat-parsing, --no-skip-chat-parsing` | force a pure content parser, even if a Jinja template is specified; model will output everything in the content section, including any reasoning and/or tool calls (default: disabled)<br/>(env: LLAMA_ARG_SKIP_CHAT_PARSING) |
|
||||
| `--simple-io` | use basic IO for better compatibility in subprocesses and limited consoles |
|
||||
| `--log-prompts-dir PATH` | Log prompts to directory (only used for debugging, default: disabled) |
|
||||
| `--spec-draft-hf, -hfd, -hfrd, --hf-repo-draft <user>/<model>[:quant]` | Same as --hf-repo, but for the draft model (default: unused)<br/>(env: LLAMA_ARG_SPEC_DRAFT_HF_REPO) |
|
||||
| `--spec-draft-threads, -td, --threads-draft N` | number of threads to use during generation (default: same as --threads) |
|
||||
| `--spec-draft-threads-batch, -tbd, --threads-batch-draft N` | number of threads to use during batch and prompt processing (default: same as --threads-draft) |
|
||||
|
||||
+29
-1
@@ -1675,6 +1675,9 @@ struct clip_model_loader {
|
||||
// note: some models having hparams.image_size == 0, which means the image size is dynamic
|
||||
throw std::runtime_error(string_format("%s: image_size (%d) cannot be negative\n", __func__, hparams.image_size));
|
||||
}
|
||||
if (hparams.image_size > 65536) {
|
||||
throw std::runtime_error(string_format("%s: image_size (%d) is too large (max 65536)\n", __func__, hparams.image_size));
|
||||
}
|
||||
if (hparams.patch_size <= 0) {
|
||||
throw std::runtime_error(string_format("%s: patch_size (%d) must be greater than 0\n", __func__, hparams.patch_size));
|
||||
}
|
||||
@@ -1723,6 +1726,19 @@ struct clip_model_loader {
|
||||
LOG_INF("%s: audio_n_fft: %d\n", __func__, hparams.audio_n_fft);
|
||||
LOG_INF("%s: audio_window_len: %d\n", __func__, hparams.audio_window_len);
|
||||
LOG_INF("%s: audio_hop_len: %d\n", __func__, hparams.audio_hop_len);
|
||||
|
||||
// GEMMA4UA is encoder-free: it uses n_mel_bins as a raw-waveform frame size (640) and has no FFT/filterbank, so the mel-range and FFT
|
||||
// checks below do not apply to it.
|
||||
const bool fft_based = model.proj_type != PROJECTOR_TYPE_GEMMA4UA;
|
||||
|
||||
// Validate audio hparams loaded from GGUF metadata
|
||||
if (hparams.n_mel_bins <= 0 || (fft_based && hparams.n_mel_bins > 256)) {
|
||||
throw std::runtime_error(string_format("%s: n_mel_bins (%d) must be in range [1, 256]\n", __func__, hparams.n_mel_bins));
|
||||
}
|
||||
if (fft_based && (hparams.audio_sample_rate <= 0 || hparams.audio_n_fft <= 0 || hparams.audio_hop_len <= 0 || hparams.audio_window_len <= 0)) {
|
||||
throw std::runtime_error(string_format("%s: audio hparams invalid: sample_rate=%d n_fft=%d window_len=%d hop_len=%d\n",
|
||||
__func__, hparams.audio_sample_rate, hparams.audio_n_fft, hparams.audio_window_len, hparams.audio_hop_len));
|
||||
}
|
||||
}
|
||||
LOG_INF("\n");
|
||||
LOG_INF("%s: model size: %.2f MiB\n", __func__, model_size / 1024.0 / 1024.0);
|
||||
@@ -2831,6 +2847,12 @@ struct clip_model_loader {
|
||||
img.set_size({sz, sz}, false, false);
|
||||
LOG_INF("%s: warmup with image size = %d x %d\n", __func__, sz, sz);
|
||||
} else {
|
||||
// GEMMA4UA uses n_mel_bins as a raw-waveform frame size (640), not a mel-bin count,
|
||||
// so the [1, 256] bound only applies to FFT-based models.
|
||||
const bool fft_based = ctx_clip.model.proj_type != PROJECTOR_TYPE_GEMMA4UA;
|
||||
if (hparams.n_mel_bins <= 0 || (fft_based && hparams.n_mel_bins > 256)) {
|
||||
throw std::runtime_error(string_format("%s: invalid n_mel_bins (%d), must be in [1, 256]\n", __func__, hparams.n_mel_bins));
|
||||
}
|
||||
img.set_size({hparams.warmup_audio_size, hparams.n_mel_bins}, false, false);
|
||||
LOG_INF("%s: warmup with audio size = %d\n", __func__, hparams.warmup_audio_size);
|
||||
}
|
||||
@@ -2994,7 +3016,13 @@ struct clip_model_loader {
|
||||
}
|
||||
return;
|
||||
}
|
||||
output = gguf_get_val_u32(ctx_gguf.get(), i);
|
||||
const uint32_t val = gguf_get_val_u32(ctx_gguf.get(), i);
|
||||
// sanity check
|
||||
if (val > (uint32_t) INT32_MAX) {
|
||||
throw std::runtime_error(string_format("%s: value %u for key '%s' exceeds INT32_MAX\n",
|
||||
__func__, val, key.c_str()));
|
||||
}
|
||||
output = (int) val;
|
||||
}
|
||||
|
||||
void get_f32(const std::string & key, float & output, bool required = true) const {
|
||||
|
||||
@@ -24,6 +24,9 @@ struct clip_image_size {
|
||||
return !(*this == other);
|
||||
}
|
||||
int area() const {
|
||||
// avoid overflow when computing area
|
||||
GGML_ASSERT(width >= 0 && width <= 46000);
|
||||
GGML_ASSERT(height >= 0 && height <= 46000);
|
||||
return width * height;
|
||||
}
|
||||
};
|
||||
|
||||
+76
-63
@@ -32,8 +32,8 @@ void mtmd_audio_cache::fill_hann_window(uint32_t length, bool periodic) {
|
||||
}
|
||||
}
|
||||
|
||||
void mtmd_audio_cache::fill_mel_filterbank_matrix(int n_mel,
|
||||
int n_fft,
|
||||
void mtmd_audio_cache::fill_mel_filterbank_matrix(int64_t n_mel,
|
||||
int64_t n_fft,
|
||||
int sample_rate,
|
||||
float fmin,
|
||||
float fmax,
|
||||
@@ -86,11 +86,16 @@ void mtmd_audio_cache::fill_mel_filterbank_matrix(int n_mel,
|
||||
hz_pts[i] = mel_to_hz(mel_pts[i]);
|
||||
}
|
||||
|
||||
const int n_fft_bins = n_fft / 2 + 1;
|
||||
const int64_t n_fft_bins = n_fft / 2 + 1;
|
||||
|
||||
// Validate allocation size
|
||||
if ((size_t)n_mel * (size_t)n_fft_bins > SIZE_MAX) {
|
||||
GGML_ASSERT(false && "mel filterbank allocation too large");
|
||||
}
|
||||
|
||||
// filterbank
|
||||
std::vector<float> out(n_mel * n_fft_bins, 0);
|
||||
for (int m = 0; m < n_mel; ++m) {
|
||||
std::vector<float> out((size_t)n_mel * (size_t)n_fft_bins, 0);
|
||||
for (int64_t m = 0; m < n_mel; ++m) {
|
||||
const double f_left = hz_pts[m];
|
||||
const double f_center = hz_pts[m + 1];
|
||||
const double f_right = hz_pts[m + 2];
|
||||
@@ -266,8 +271,8 @@ static void ifft(const mtmd_audio_cache & cache, float * in, int N, float * out)
|
||||
}
|
||||
|
||||
struct filter_params {
|
||||
int32_t n_mel;
|
||||
int32_t n_fft_bins;
|
||||
int64_t n_mel;
|
||||
int64_t n_fft_bins;
|
||||
int32_t hann_window_size;
|
||||
int32_t hop_length;
|
||||
int32_t sample_rate;
|
||||
@@ -293,8 +298,8 @@ static void log_mel_spectrogram_worker_thread(int ith,
|
||||
std::vector<float> fft_in(frame_size * 2, 0.0);
|
||||
std::vector<float> fft_out(frame_size * 2 * 2 * 2);
|
||||
|
||||
int n_fft_bins = params.n_fft_bins;
|
||||
int i = ith;
|
||||
int64_t n_fft_bins = params.n_fft_bins;
|
||||
int64_t i = ith;
|
||||
|
||||
const auto & filters = cache.filters;
|
||||
|
||||
@@ -302,17 +307,18 @@ static void log_mel_spectrogram_worker_thread(int ith,
|
||||
GGML_ASSERT(n_fft_bins == 1 + (frame_size / 2));
|
||||
GGML_ASSERT(cache.sin_vals.size() == cache.cos_vals.size());
|
||||
// calculate FFT only when fft_in are not all zero
|
||||
for (; i < std::min(n_samples / frame_step + 1, out.n_len); i += n_threads) {
|
||||
const int offset = i * frame_step;
|
||||
for (; i < std::min((int64_t)(n_samples / frame_step + 1), out.n_len); i += n_threads) {
|
||||
const int64_t offset = i * frame_step;
|
||||
|
||||
// apply Hann window (~10% faster)
|
||||
for (int j = 0; j < std::min(frame_size, n_samples - offset); j++) {
|
||||
const int valid_len = std::min(frame_size, std::max(0, n_samples - (int)offset));
|
||||
for (int j = 0; j < valid_len; j++) {
|
||||
fft_in[j] = hann[j] * samples[offset + j];
|
||||
}
|
||||
|
||||
// fill the rest with zeros
|
||||
if (n_samples - offset < frame_size) {
|
||||
std::fill(fft_in.begin() + (n_samples - offset), fft_in.end(), 0.0);
|
||||
if (valid_len < frame_size) {
|
||||
std::fill(fft_in.begin() + valid_len, fft_in.end(), 0.0);
|
||||
}
|
||||
|
||||
// FFT
|
||||
@@ -325,7 +331,7 @@ static void log_mel_spectrogram_worker_thread(int ith,
|
||||
}
|
||||
|
||||
// mel spectrogram
|
||||
for (int j = 0; j < out.n_mel; j++) {
|
||||
for (int64_t j = 0; j < out.n_mel; j++) {
|
||||
double sum = 0.0;
|
||||
// unroll loop (suggested by GH user @lunixbochs)
|
||||
int k = 0;
|
||||
@@ -339,21 +345,21 @@ static void log_mel_spectrogram_worker_thread(int ith,
|
||||
}
|
||||
// handle n_fft remainder
|
||||
for (; k < n_fft_bins; k++) {
|
||||
sum += fft_out[k] * filters.data[j * n_fft_bins + k];
|
||||
sum += fft_out[k] * filters.data[(size_t)j * n_fft_bins + k];
|
||||
}
|
||||
sum = std::max(sum, (double)params.mel_floor);
|
||||
sum = params.use_natural_log
|
||||
? log(sum)
|
||||
: log10(sum);
|
||||
out.data[j * out.n_len + i] = sum;
|
||||
out.data[(size_t)j * out.n_len + i] = sum;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise fft_out are all zero
|
||||
double sum = params.use_natural_log ? log(1e-10) : log10(1e-10);
|
||||
for (; i < out.n_len; i += n_threads) {
|
||||
for (int j = 0; j < out.n_mel; j++) {
|
||||
out.data[j * out.n_len + i] = sum;
|
||||
for (int64_t j = 0; j < out.n_mel; j++) {
|
||||
out.data[(size_t)j * out.n_len + i] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -437,16 +443,21 @@ static bool log_mel_spectrogram(
|
||||
GGML_ASSERT(params.hop_length > 0);
|
||||
out.n_mel = params.n_mel;
|
||||
out.n_len = (n_samples - frame_size) / frame_step + 1;
|
||||
// TODO: handle these checks better
|
||||
if (out.n_mel > 0 && (unsigned long)out.n_len > SIZE_MAX / out.n_mel) {
|
||||
LOG_ERR("%s: size overflow\n", __func__);
|
||||
// Validate dimensions before allocation to prevent integer overflow
|
||||
if (out.n_mel <= 0 || out.n_len <= 0) {
|
||||
LOG_ERR("%s: invalid mel dimensions n_mel=%lld n_len=%lld\n", __func__, (long long)out.n_mel, (long long)out.n_len);
|
||||
return false;
|
||||
}
|
||||
const size_t total_size = (size_t)out.n_mel * (size_t)out.n_len;
|
||||
if (total_size > SIZE_MAX / sizeof(float)) {
|
||||
LOG_ERR("%s: size overflow: n_mel=%lld n_len=%lld\n", __func__, (long long)out.n_mel, (long long)out.n_len);
|
||||
return false;
|
||||
}
|
||||
if (n_samples < frame_size) {
|
||||
LOG_ERR("%s: not enough samples after padding\n", __func__);
|
||||
return false;
|
||||
}
|
||||
out.data.resize(out.n_mel * out.n_len);
|
||||
out.data.resize(total_size);
|
||||
|
||||
{
|
||||
std::vector<std::thread> workers(n_threads - 1);
|
||||
@@ -464,38 +475,39 @@ static bool log_mel_spectrogram(
|
||||
}
|
||||
}
|
||||
|
||||
const int effective_n_len = n_samples_in / frame_step;
|
||||
const int64_t effective_n_len = n_samples_in / frame_step;
|
||||
if (params.norm_per_feature) {
|
||||
GGML_ASSERT(effective_n_len > 1);
|
||||
for (int i = 0; i < out.n_mel; i++) {
|
||||
for (int64_t i = 0; i < out.n_mel; i++) {
|
||||
double mean = 0;
|
||||
for (int j = 0; j < effective_n_len; ++j) {
|
||||
mean += out.data[i * out.n_len + j];
|
||||
for (int64_t j = 0; j < effective_n_len; ++j) {
|
||||
mean += out.data[(size_t)i * out.n_len + j];
|
||||
}
|
||||
mean /= effective_n_len;
|
||||
|
||||
double var = 0.0;
|
||||
for (int j = 0; j < effective_n_len; ++j) {
|
||||
const double value = out.data[i * out.n_len + j] - mean;
|
||||
for (int64_t j = 0; j < effective_n_len; ++j) {
|
||||
const double value = out.data[(size_t)i * out.n_len + j] - mean;
|
||||
var += value * value;
|
||||
}
|
||||
var /= effective_n_len - 1; // unbiased
|
||||
const double mstd = std::sqrt(var + 1e-5);
|
||||
|
||||
for (int j = 0; j < effective_n_len; ++j) {
|
||||
auto &value = out.data[i * out.n_len + j];
|
||||
for (int64_t j = 0; j < effective_n_len; ++j) {
|
||||
auto &value = out.data[(size_t)i * out.n_len + j];
|
||||
value = (value - mean) / mstd;
|
||||
}
|
||||
|
||||
// pad the rest with zeros
|
||||
for (int j = effective_n_len; j < out.n_len; ++j) {
|
||||
out.data[i * out.n_len + j] = 0.0;
|
||||
for (int64_t j = effective_n_len; j < out.n_len; ++j) {
|
||||
out.data[(size_t)i * out.n_len + j] = 0.0;
|
||||
}
|
||||
}
|
||||
} else if (!params.no_padding) {
|
||||
// Whisper-style clamping and normalization (NOT used by Gemma4)
|
||||
double mmax = -1e20;
|
||||
for (int i = 0; i < out.n_mel*out.n_len; i++) {
|
||||
const size_t mel_size = (size_t)out.n_mel * (size_t)out.n_len;
|
||||
for (size_t i = 0; i < mel_size; i++) {
|
||||
if (out.data[i] > mmax) {
|
||||
mmax = out.data[i];
|
||||
}
|
||||
@@ -503,7 +515,7 @@ static bool log_mel_spectrogram(
|
||||
|
||||
mmax -= 8.0;
|
||||
|
||||
for (int i = 0; i < out.n_mel*out.n_len; i++) {
|
||||
for (size_t i = 0; i < mel_size; i++) {
|
||||
if (out.data[i] < mmax) {
|
||||
out.data[i] = mmax;
|
||||
}
|
||||
@@ -582,13 +594,13 @@ bool mtmd_audio_preprocessor_whisper::preprocess(const float * s
|
||||
// because the cgraph in clip.cpp only accepts 3000 frames each, we need to split the mel
|
||||
// we always expect the mel to have 3000 silent frames at the end
|
||||
if (DEBUG) {
|
||||
printf("output: n_mel = %d, n_len = %d\n", out_full.n_mel, out_full.n_len);
|
||||
printf("output: n_mel = %d, n_len = %d\n", (int) out_full.n_mel, (int) out_full.n_len);
|
||||
}
|
||||
const size_t frames_per_chunk = 3000;
|
||||
GGML_ASSERT((size_t) out_full.n_len > frames_per_chunk);
|
||||
for (size_t off = 0; off < (size_t) out_full.n_len; off += frames_per_chunk) {
|
||||
int n_len = std::min(frames_per_chunk, (size_t) out_full.n_len - off);
|
||||
if ((size_t) n_len < frames_per_chunk) {
|
||||
int64_t n_len = std::min((int64_t)frames_per_chunk, out_full.n_len - (int64_t)off);
|
||||
if (n_len < (int64_t)frames_per_chunk) {
|
||||
break; // last incomplete chunk will always be a padded chunk, safe to ignore
|
||||
}
|
||||
|
||||
@@ -596,10 +608,10 @@ bool mtmd_audio_preprocessor_whisper::preprocess(const float * s
|
||||
out_chunk.n_len = n_len;
|
||||
out_chunk.n_mel = out_full.n_mel;
|
||||
out_chunk.n_len_org = out_full.n_mel; // unused
|
||||
out_chunk.data.reserve(out_chunk.n_mel * out_chunk.n_len);
|
||||
out_chunk.data.reserve((size_t)out_chunk.n_mel * (size_t)out_chunk.n_len);
|
||||
|
||||
for (int i = 0; i < out_full.n_mel; i++) {
|
||||
auto src = out_full.data.begin() + i * out_full.n_len + off;
|
||||
for (int64_t i = 0; i < out_full.n_mel; i++) {
|
||||
auto src = out_full.data.begin() + (size_t)i * out_full.n_len + off;
|
||||
out_chunk.data.insert(out_chunk.data.end(), src, src + frames_per_chunk);
|
||||
}
|
||||
|
||||
@@ -681,8 +693,8 @@ bool mtmd_audio_preprocessor_qwen3a::preprocess(const float * sa
|
||||
|
||||
// The effective frame count: center-padded STFT gives ~n_samples/hop_length frames.
|
||||
// We take min(mel_full.n_len, n_samples/hop + 1) to avoid including excess frames.
|
||||
const int n_eff = std::min(mel_full.n_len,
|
||||
(int)(n_samples / hparams.audio_hop_len) + 1);
|
||||
const int64_t n_eff = std::min(mel_full.n_len,
|
||||
(int64_t)(n_samples / hparams.audio_hop_len) + 1);
|
||||
|
||||
// Split into inference windows matching n_window_infer=800 from model config.
|
||||
// Each window is padded to the next multiple of chunk_size for the cgraph.
|
||||
@@ -690,18 +702,18 @@ bool mtmd_audio_preprocessor_qwen3a::preprocess(const float * sa
|
||||
const int chunk_size = 100; // conv sub-chunk size (n_window * 2, n_window=50)
|
||||
const int window_size = 800; // mel frames per forward pass (n_window_infer=800)
|
||||
|
||||
for (int off = 0; off < n_eff; off += window_size) {
|
||||
const int win_eff = std::min(window_size, n_eff - off);
|
||||
const int n_chunks = (win_eff + chunk_size - 1) / chunk_size;
|
||||
const int n_padded = n_chunks * chunk_size;
|
||||
for (int64_t off = 0; off < n_eff; off += window_size) {
|
||||
const int64_t win_eff = std::min((int64_t)window_size, n_eff - off);
|
||||
const int64_t n_chunks = (win_eff + chunk_size - 1) / chunk_size;
|
||||
const int64_t n_padded = n_chunks * chunk_size;
|
||||
|
||||
mtmd_audio_mel out;
|
||||
out.n_mel = mel_full.n_mel;
|
||||
out.n_len = n_padded;
|
||||
out.n_len_org = win_eff;
|
||||
out.data.assign(out.n_mel * out.n_len, 0.0f);
|
||||
for (int m = 0; m < out.n_mel; m++) {
|
||||
const int copy_len = std::min(win_eff, mel_full.n_len - off);
|
||||
out.data.assign((size_t)out.n_mel * (size_t)out.n_len, 0.0f);
|
||||
for (int64_t m = 0; m < out.n_mel; m++) {
|
||||
const int64_t copy_len = std::min((int64_t)win_eff, mel_full.n_len - off);
|
||||
if (copy_len > 0) {
|
||||
std::copy(mel_full.data.begin() + (size_t)m * mel_full.n_len + off,
|
||||
mel_full.data.begin() + (size_t)m * mel_full.n_len + off + copy_len,
|
||||
@@ -823,37 +835,38 @@ bool mtmd_audio_preprocessor_granite_speech::preprocess(const float *
|
||||
}
|
||||
|
||||
double mmax = -1e20;
|
||||
for (int i = 0; i < mel.n_mel * mel.n_len; i++) {
|
||||
const size_t mel_size = (size_t)mel.n_mel * (size_t)mel.n_len;
|
||||
for (size_t i = 0; i < mel_size; i++) {
|
||||
if (mel.data[i] > mmax) {
|
||||
mmax = mel.data[i];
|
||||
}
|
||||
}
|
||||
mmax -= 8.0;
|
||||
|
||||
for (int i = 0; i < mel.n_mel * mel.n_len; i++) {
|
||||
for (size_t i = 0; i < mel_size; i++) {
|
||||
if (mel.data[i] < mmax) {
|
||||
mel.data[i] = mmax;
|
||||
}
|
||||
mel.data[i] = (mel.data[i] + 4.0) / 4.0;
|
||||
}
|
||||
|
||||
int n_frames = mel.n_len;
|
||||
int64_t n_frames = mel.n_len;
|
||||
if (n_frames % 2 == 1) {
|
||||
n_frames--;
|
||||
}
|
||||
const int n_mel = mel.n_mel;
|
||||
const int n_stacked = n_frames / 2;
|
||||
const int64_t n_mel = mel.n_mel;
|
||||
const int64_t n_stacked = n_frames / 2;
|
||||
|
||||
mtmd_audio_mel stacked;
|
||||
stacked.n_mel = 2 * n_mel;
|
||||
stacked.n_len = n_stacked;
|
||||
stacked.n_len_org = (int)n_samples;
|
||||
stacked.data.resize(2 * n_mel * n_stacked);
|
||||
stacked.n_len_org = (int64_t)n_samples;
|
||||
stacked.data.resize((size_t)2 * (size_t)n_mel * (size_t)n_stacked);
|
||||
|
||||
for (int t = 0; t < n_stacked; t++) {
|
||||
for (int m = 0; m < n_mel; m++) {
|
||||
stacked.data[m * n_stacked + t] = mel.data[m * mel.n_len + 2 * t];
|
||||
stacked.data[(m + n_mel) * n_stacked + t] = mel.data[m * mel.n_len + 2 * t + 1];
|
||||
for (int64_t t = 0; t < n_stacked; t++) {
|
||||
for (int64_t m = 0; m < n_mel; m++) {
|
||||
stacked.data[(size_t)m * n_stacked + t] = mel.data[(size_t)m * mel.n_len + 2 * t];
|
||||
stacked.data[(size_t)(m + n_mel) * n_stacked + t] = mel.data[(size_t)m * mel.n_len + 2 * t + 1];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -921,8 +934,8 @@ bool mtmd_audio_preprocessor_gemma4a::preprocess(const float * s
|
||||
const int hop = hparams.audio_hop_len;
|
||||
const int n_with_left = (int)chunk_len + pad_left;
|
||||
// PyTorch: unfold(size=frame_length+1, step=hop) on semicausal-padded waveform
|
||||
const int pt_frames = (n_with_left - (hparams.audio_window_len + 1)) / hop + 1;
|
||||
const int n_padded_needed = (pt_frames - 1) * hop + fft_size;
|
||||
const int64_t pt_frames = (n_with_left - (hparams.audio_window_len + 1)) / hop + 1;
|
||||
const int64_t n_padded_needed = (pt_frames - 1) * hop + fft_size;
|
||||
const int total_pad = std::max((int)(n_padded_needed - (int)chunk_len), pad_left);
|
||||
std::vector<float> padded_samples(total_pad + chunk_len, 0.0f);
|
||||
std::copy(chunk_ptr, chunk_ptr + chunk_len, padded_samples.data() + pad_left);
|
||||
|
||||
@@ -10,16 +10,16 @@
|
||||
#define MTMD_INTERNAL_HEADER
|
||||
|
||||
struct mtmd_audio_mel {
|
||||
int n_len;
|
||||
int n_len_org;
|
||||
int n_mel;
|
||||
int64_t n_len;
|
||||
int64_t n_len_org;
|
||||
int64_t n_mel;
|
||||
|
||||
std::vector<float> data;
|
||||
};
|
||||
|
||||
struct mtmd_audio_mel_filters {
|
||||
int32_t n_mel;
|
||||
int32_t n_fft;
|
||||
int64_t n_mel;
|
||||
int64_t n_fft;
|
||||
|
||||
std::vector<float> data;
|
||||
};
|
||||
@@ -39,8 +39,8 @@ struct mtmd_audio_cache {
|
||||
|
||||
// Build mel filterbank matrix [n_mel × n_fft_bins] at runtime.
|
||||
// n_fft_bins must be (N_fft / 2 + 1). Example: if N_fft=512 -> n_fft_bins=257.
|
||||
void fill_mel_filterbank_matrix(int n_mel,
|
||||
int n_fft,
|
||||
void fill_mel_filterbank_matrix(int64_t n_mel,
|
||||
int64_t n_fft,
|
||||
int sample_rate, // e.g. 16000
|
||||
float fmin = 0.0f, // e.g. 0.0
|
||||
float fmax = -1.0f, // e.g. sr/2; pass -1 for auto
|
||||
|
||||
+4
-1
@@ -1295,9 +1295,12 @@ struct mtmd_tokenizer {
|
||||
for (auto & mel_spec : mel_spec_chunks) {
|
||||
const bool is_placeholder = mel_spec.data.empty();
|
||||
|
||||
// Validate dimensions fit in clip_image_size (int)
|
||||
GGML_ASSERT(mel_spec.n_len <= INT32_MAX && mel_spec.n_len >= 0);
|
||||
GGML_ASSERT(mel_spec.n_mel <= INT32_MAX && mel_spec.n_mel >= 0);
|
||||
clip_image_f32 mel_f32;
|
||||
mel_f32.set_size(
|
||||
{mel_spec.n_len, mel_spec.n_mel},
|
||||
{(int)mel_spec.n_len, (int)mel_spec.n_mel},
|
||||
is_placeholder, /* is_audio */ true);
|
||||
mel_f32.cpy_buf(mel_spec.data);
|
||||
|
||||
|
||||
+9
-11
@@ -175,13 +175,12 @@ For the full list of features, please refer to [server's changelog](https://gith
|
||||
| `-np, --parallel N` | number of server slots (default: -1, -1 = auto)<br/>(env: LLAMA_ARG_N_PARALLEL) |
|
||||
| `-cb, --cont-batching, -nocb, --no-cont-batching` | whether to enable continuous batching (a.k.a dynamic batching) (default: enabled)<br/>(env: LLAMA_ARG_CONT_BATCHING) |
|
||||
| `-mm, --mmproj FILE` | path to a multimodal projector file. see tools/mtmd/README.md<br/>note: if -hf is used, this argument can be omitted<br/>(env: LLAMA_ARG_MMPROJ) |
|
||||
| `-tk, --talker-model FILE` | path to the qwen3-omni talker gguf, enables the /v1/audio/speech endpoint<br/>(env: LLAMA_ARG_TALKER_MODEL) |
|
||||
| `-c2w, --code2wav-model FILE` | path to the qwen3-omni code2wav gguf, the talker code detokenizer<br/>(env: LLAMA_ARG_CODE2WAV_MODEL) |
|
||||
| `-mmu, --mmproj-url URL` | URL to a multimodal projector file. see tools/mtmd/README.md<br/>(env: LLAMA_ARG_MMPROJ_URL) |
|
||||
| `--mmproj-auto, --no-mmproj, --no-mmproj-auto` | whether to use multimodal projector file (if available), useful when using -hf (default: enabled)<br/>(env: LLAMA_ARG_MMPROJ_AUTO) |
|
||||
| `--mmproj-offload, --no-mmproj-offload` | whether to enable GPU offloading for multimodal projector (default: enabled)<br/>(env: LLAMA_ARG_MMPROJ_OFFLOAD) |
|
||||
| `--image-min-tokens N` | minimum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)<br/>(env: LLAMA_ARG_IMAGE_MIN_TOKENS) |
|
||||
| `--image-max-tokens N` | maximum number of tokens each image can take, only used by vision models with dynamic resolution (default: read from model)<br/>(env: LLAMA_ARG_IMAGE_MAX_TOKENS) |
|
||||
| `--mtmd-batch-max-tokens N` | maximum number of image tokens per batch when encoding images (default: 1024)<br/>(env: LLAMA_ARG_MTMD_BATCH_MAX_TOKENS) |
|
||||
| `-a, --alias STRING` | set model name aliases, comma-separated (to be used by API)<br/>(env: LLAMA_ARG_ALIAS) |
|
||||
| `--tags STRING` | set model tags, comma-separated (informational, not used for routing)<br/>(env: LLAMA_ARG_TAGS) |
|
||||
| `--embd-normalize N` | normalisation for embeddings (default: 2) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) |
|
||||
@@ -190,23 +189,21 @@ For the full list of features, please refer to [server's changelog](https://gith
|
||||
| `--reuse-port` | allow multiple sockets to bind to the same port (default: disabled)<br/>(env: LLAMA_ARG_REUSE_PORT) |
|
||||
| `--path PATH` | path to serve static files from (default: )<br/>(env: LLAMA_ARG_STATIC_PATH) |
|
||||
| `--api-prefix PREFIX` | prefix path the server serves from, without the trailing slash (default: )<br/>(env: LLAMA_ARG_API_PREFIX) |
|
||||
| `--webui-config JSON` | [DEPRECATED: use --ui-config] JSON that provides default WebUI settings (overrides WebUI defaults)<br/>(env: LLAMA_ARG_WEBUI_CONFIG) |
|
||||
| `--ui-config JSON` | JSON that provides default UI settings (overrides UI defaults)<br/>(env: LLAMA_ARG_UI_CONFIG) |
|
||||
| `--webui-config-file PATH` | [DEPRECATED: use --ui-config-file] JSON file that provides default WebUI settings (overrides WebUI defaults)<br/>(env: LLAMA_ARG_WEBUI_CONFIG_FILE) |
|
||||
| `--ui-config-file PATH` | JSON file that provides default UI settings (overrides UI defaults)<br/>(env: LLAMA_ARG_UI_CONFIG_FILE) |
|
||||
| `--webui-mcp-proxy, --no-webui-mcp-proxy` | [DEPRECATED: use --ui-mcp-proxy/--no-ui-mcp-proxy] experimental: whether to enable MCP CORS proxy<br/>(env: LLAMA_ARG_WEBUI_MCP_PROXY) |
|
||||
| `--ui-mcp-proxy, --no-ui-mcp-proxy` | experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: disabled)<br/>(env: LLAMA_ARG_UI_MCP_PROXY) |
|
||||
| `--ui-config, --webui-config JSON` | JSON that provides default UI settings (overrides UI defaults)<br/>(env: LLAMA_ARG_UI_CONFIG) |
|
||||
| `--ui-config-file, --webui-config-file PATH` | JSON file that provides default UI settings (overrides UI defaults)<br/>(env: LLAMA_ARG_UI_CONFIG_FILE) |
|
||||
| `--ui-mcp-proxy, --webui-mcp-proxy, --no-ui-mcp-proxy, --no-webui-mcp-proxy` | experimental: whether to enable MCP CORS proxy - do not enable in untrusted environments (default: disabled)<br/>(env: LLAMA_ARG_UI_MCP_PROXY) |
|
||||
| `--tools TOOL1,TOOL2,...` | experimental: whether to enable built-in tools for AI agents - do not enable in untrusted environments (default: no tools)<br/>specify "all" to enable all tools<br/>available tools: read_file, file_glob_search, grep_search, exec_shell_command, write_file, edit_file, apply_diff, get_datetime<br/>(env: LLAMA_ARG_TOOLS) |
|
||||
| `--webui, --no-webui` | [DEPRECATED: use --ui/--no-ui] whether to enable the Web UI<br/>(env: LLAMA_ARG_WEBUI) |
|
||||
| `--ui, --no-ui` | whether to enable the Web UI (default: enabled)<br/>(env: LLAMA_ARG_UI) |
|
||||
| `-ag, --agent, -no-ag, --no-agent` | whether to enable CORS proxy and all built-in tools - do not enable in untrusted environments (default: disabled)<br/>(env: LLAMA_ARG_AGENT) |
|
||||
| `--ui, --webui, --no-ui, --no-webui` | whether to enable the Web UI (default: enabled)<br/>(env: LLAMA_ARG_UI) |
|
||||
| `--embedding, --embeddings` | restrict to only support embedding use case; use only with dedicated embedding models (default: disabled)<br/>(env: LLAMA_ARG_EMBEDDINGS) |
|
||||
| `--rerank, --reranking` | enable reranking endpoint on server (default: disabled)<br/>(env: LLAMA_ARG_RERANKING) |
|
||||
| `--api-key KEY` | API key to use for authentication, multiple keys can be provided as a comma-separated list (default: none)<br/>(env: LLAMA_API_KEY) |
|
||||
| `--api-key-file FNAME` | path to file containing API keys (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
|
||||
| `--api-key-file FNAME` | path to file containing API keys, one per line; lines starting with a hash are treated as comments (default: none)<br/>(env: LLAMA_ARG_API_KEY_FILE) |
|
||||
| `--ssl-key-file FNAME` | path to file a PEM-encoded SSL private key<br/>(env: LLAMA_ARG_SSL_KEY_FILE) |
|
||||
| `--ssl-cert-file FNAME` | path to file a PEM-encoded SSL certificate<br/>(env: LLAMA_ARG_SSL_CERT_FILE) |
|
||||
| `--chat-template-kwargs STRING` | sets additional params for the json template parser, must be a valid json object string, e.g. '{"key1":"value1","key2":"value2"}'<br/>(env: LLAMA_ARG_CHAT_TEMPLATE_KWARGS) |
|
||||
| `-to, --timeout N` | server read/write timeout in seconds (default: 3600)<br/>(env: LLAMA_ARG_TIMEOUT) |
|
||||
| `--sse-ping-interval N` | server SSE ping interval in seconds (-1 = disabled, default: 30)<br/>(env: LLAMA_ARG_SSE_PING_INTERVAL) |
|
||||
| `--threads-http N` | number of threads used to process HTTP requests (default: -1)<br/>(env: LLAMA_ARG_THREADS_HTTP) |
|
||||
| `--cache-prompt, --no-cache-prompt` | whether to enable prompt caching (default: enabled)<br/>(env: LLAMA_ARG_CACHE_PROMPT) |
|
||||
| `--cache-reuse N` | min chunk size to attempt reusing from the cache via KV shifting, requires prompt caching to be enabled (default: 0)<br/>[(card)](https://ggml.ai/f0.png)<br/>(env: LLAMA_ARG_CACHE_REUSE) |
|
||||
@@ -231,6 +228,7 @@ For the full list of features, please refer to [server's changelog](https://gith
|
||||
| `-sps, --slot-prompt-similarity SIMILARITY` | how much the prompt of a request must match the prompt of a slot in order to use that slot (default: 0.10, 0.0 = disabled) |
|
||||
| `--lora-init-without-apply` | load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: disabled) |
|
||||
| `--sleep-idle-seconds SECONDS` | number of seconds of idleness after which the server will sleep (default: -1; -1 = disabled) |
|
||||
| `--log-prompts-dir PATH` | Log prompts to directory (only used for debugging, default: disabled) |
|
||||
| `--spec-draft-hf, -hfd, -hfrd, --hf-repo-draft <user>/<model>[:quant]` | Same as --hf-repo, but for the draft model (default: unused)<br/>(env: LLAMA_ARG_SPEC_DRAFT_HF_REPO) |
|
||||
| `--spec-draft-threads, -td, --threads-draft N` | number of threads to use during generation (default: same as --threads) |
|
||||
| `--spec-draft-threads-batch, -tbd, --threads-batch-draft N` | number of threads to use during batch and prompt processing (default: same as --threads-draft) |
|
||||
|
||||
@@ -1302,11 +1302,8 @@ private:
|
||||
}
|
||||
}
|
||||
|
||||
// populate UI settings (from either new ui_config_json or deprecated webui_config_json)
|
||||
{
|
||||
const std::string & cfg = !params_base.ui_config_json.empty()
|
||||
? params_base.ui_config_json
|
||||
: params_base.webui_config_json;
|
||||
const std::string & cfg = params_base.ui_config_json;
|
||||
if (!cfg.empty()) {
|
||||
try {
|
||||
json json_settings = json::parse(cfg);
|
||||
@@ -2172,6 +2169,8 @@ private:
|
||||
|
||||
cur.update_tgt(ctx_tgt, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
cur.update_dft(ctx_dft.get(), slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
// stash the draft's speculative state with the checkpoint
|
||||
common_speculative_get_state(spec.get(), slot.id, cur.data_spec);
|
||||
|
||||
SLT_INF(slot,
|
||||
"created context checkpoint %d of %d (pos_min = %d, pos_max = %d, n_tokens = %" PRId64 ", size = %.3f MiB)\n",
|
||||
@@ -2565,7 +2564,10 @@ private:
|
||||
n_keep = std::min(slot.n_ctx - 4, n_keep);
|
||||
|
||||
const int n_left = slot.prompt.n_tokens() - n_keep;
|
||||
const int n_discard = slot.task->params.n_discard ? slot.task->params.n_discard : (n_left / 2);
|
||||
int n_discard = slot.task->params.n_discard ? slot.task->params.n_discard : (n_left / 2);
|
||||
|
||||
// ref: https://github.com/ggml-org/llama.cpp/pull/24786
|
||||
n_discard = std::clamp(n_discard, 0, std::max(0, n_left - 1));
|
||||
|
||||
SLT_WRN(slot, "slot context shift, n_keep = %d, n_left = %d, n_discard = %d\n", n_keep, n_left, n_discard);
|
||||
|
||||
@@ -2995,6 +2997,8 @@ private:
|
||||
// restore the context checkpoint
|
||||
it->load_tgt(ctx_tgt, slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
it->load_dft(ctx_dft.get(), slot.id, LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY);
|
||||
// restore the draft's speculative state
|
||||
common_speculative_set_state(spec.get(), slot.id, it->data_spec);
|
||||
|
||||
pos_next = std::min(pos_next, std::max(it->pos_min + 1, it->pos_max));
|
||||
n_past = std::min(slot.prompt.tokens.size_up_to_pos(pos_next), (size_t) it->n_tokens);
|
||||
@@ -4297,18 +4301,18 @@ void server_routes::init_routes() {
|
||||
{ "endpoint_props", params.endpoint_props },
|
||||
{ "endpoint_metrics", params.endpoint_metrics },
|
||||
// New keys
|
||||
{ "ui", params.ui },
|
||||
{ "ui_settings", meta->json_ui_settings },
|
||||
{ "ui", params.ui },
|
||||
{ "ui_settings", meta->json_ui_settings },
|
||||
// Deprecated: use ui/ui_settings instead (kept for backward compat)
|
||||
{ "webui", params.webui },
|
||||
{ "webui_settings", meta->json_webui_settings },
|
||||
{ "webui", params.ui },
|
||||
{ "webui_settings", meta->json_ui_settings },
|
||||
{ "chat_template", tmpl_default },
|
||||
{ "chat_template_caps", meta->chat_template_caps },
|
||||
{ "bos_token", meta->bos_token_str },
|
||||
{ "eos_token", meta->eos_token_str },
|
||||
{ "build_info", meta->build_info },
|
||||
{ "is_sleeping", queue_tasks.is_sleeping() },
|
||||
{ "cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy },
|
||||
{ "cors_proxy_enabled", params.ui_mcp_proxy },
|
||||
};
|
||||
if (params.use_jinja) {
|
||||
if (!tmpl_tools.empty()) {
|
||||
|
||||
@@ -1462,9 +1462,9 @@ void server_models_routes::init_routes() {
|
||||
auto res = std::make_unique<server_http_res>();
|
||||
res_ok(res, {
|
||||
// TODO: add support for this on web UI
|
||||
{"role", "router"},
|
||||
{"max_instances", params.models_max},
|
||||
{"models_autoload", params.models_autoload},
|
||||
{"role", "router"},
|
||||
{"max_instances", params.models_max},
|
||||
{"models_autoload", params.models_autoload},
|
||||
// this is a dummy response to make sure the UI doesn't break
|
||||
{"model_alias", "llama-server"},
|
||||
{"model_path", "none"},
|
||||
@@ -1473,11 +1473,10 @@ void server_models_routes::init_routes() {
|
||||
{"n_ctx", 0},
|
||||
}},
|
||||
// New key
|
||||
{"ui_settings", ui_settings},
|
||||
// Deprecated: use ui_settings instead (kept for backward compat)
|
||||
{"webui_settings", webui_settings},
|
||||
{"build_info", std::string(llama_build_info())},
|
||||
{"cors_proxy_enabled", params.ui_mcp_proxy || params.webui_mcp_proxy},
|
||||
{"ui_settings", ui_settings},
|
||||
{"webui_settings", webui_settings},
|
||||
{"build_info", std::string(llama_build_info())},
|
||||
{"cors_proxy_enabled", params.ui_mcp_proxy},
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
@@ -212,10 +212,7 @@ struct server_models_routes {
|
||||
server_models models;
|
||||
server_models_routes(const common_params & params, int argc, char ** argv)
|
||||
: params(params), models(params, argc, argv) {
|
||||
// Support both new ui_config_json and deprecated webui_config_json
|
||||
const std::string & cfg = !this->params.ui_config_json.empty()
|
||||
? this->params.ui_config_json
|
||||
: this->params.webui_config_json;
|
||||
const std::string & cfg = this->params.ui_config_json;
|
||||
if (!cfg.empty()) {
|
||||
try {
|
||||
json json_settings = json::parse(cfg);
|
||||
|
||||
@@ -227,8 +227,7 @@ int llama_server(int argc, char ** argv) {
|
||||
ctx_http.register_gcp_compat();
|
||||
|
||||
// CORS proxy (EXPERIMENTAL, only used by the Web UI for MCP)
|
||||
// Supports both new ui_mcp_proxy and deprecated webui_mcp_proxy fields
|
||||
if (params.ui_mcp_proxy || params.webui_mcp_proxy) {
|
||||
if (params.ui_mcp_proxy) {
|
||||
SRV_WRN("%s", "-----------------\n");
|
||||
SRV_WRN("%s", "CORS proxy is enabled, do not expose server to untrusted environments\n");
|
||||
SRV_WRN("%s", "This feature is EXPERIMENTAL and may be removed or changed in future versions\n");
|
||||
|
||||
Vendored
+96
-145
@@ -5809,11 +5809,9 @@ std::string decode_query_component(const std::string &component,
|
||||
|
||||
for (size_t i = 0; i < component.size(); i++) {
|
||||
if (component[i] == '%' && i + 2 < component.size()) {
|
||||
std::string hex = component.substr(i + 1, 2);
|
||||
char *end;
|
||||
unsigned long value = std::strtoul(hex.c_str(), &end, 16);
|
||||
if (end == hex.c_str() + 2) {
|
||||
result += static_cast<char>(value);
|
||||
auto val = 0;
|
||||
if (detail::from_hex_to_i(component, i + 1, 2, val)) {
|
||||
result += static_cast<char>(val);
|
||||
i += 2;
|
||||
} else {
|
||||
result += component[i];
|
||||
@@ -12551,6 +12549,21 @@ bool parse_ipv4(const std::string &str, unsigned char *out) {
|
||||
return *p == '\0';
|
||||
}
|
||||
|
||||
// Parse an IP literal (IPv4 or IPv6) into raw network-order bytes.
|
||||
// `out` must have room for at least 16 bytes. Returns the address length
|
||||
// (4 for IPv4, 16 for IPv6) on success, or 0 if the string is not an IP
|
||||
// literal. Used to match a host against iPAddress SANs the same way the
|
||||
// OpenSSL backend does via X509_check_ip.
|
||||
size_t parse_ip_address(const std::string &str, unsigned char *out) {
|
||||
if (is_ipv4_address(str)) { return parse_ipv4(str, out) ? 4 : 0; }
|
||||
struct in6_addr addr6 = {};
|
||||
if (inet_pton(AF_INET6, str.c_str(), &addr6) == 1) {
|
||||
memcpy(out, &addr6, 16);
|
||||
return 16;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
// Enumerate Windows system certificates and call callback with DER data
|
||||
template <typename Callback>
|
||||
@@ -12852,6 +12865,30 @@ int openssl_verify_callback(int preverify_ok, X509_STORE_CTX *ctx) {
|
||||
return callback(verify_ctx) ? 1 : 0;
|
||||
}
|
||||
|
||||
// X509_STORE_get0_objects is deprecated since OpenSSL 4.0 because it is not
|
||||
// thread-safe; X509_STORE_get1_objects (OpenSSL 3.3+) returns a snapshot
|
||||
// that must be released with release_store_objects
|
||||
#if !defined(OPENSSL_IS_BORINGSSL) && !defined(LIBRESSL_VERSION_NUMBER) && \
|
||||
OPENSSL_VERSION_NUMBER >= 0x30300000L
|
||||
#define CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
#endif
|
||||
|
||||
STACK_OF(X509_OBJECT) * get_store_objects(X509_STORE *store) {
|
||||
#ifdef CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
return X509_STORE_get1_objects(store);
|
||||
#else
|
||||
return X509_STORE_get0_objects(store);
|
||||
#endif
|
||||
}
|
||||
|
||||
void release_store_objects(STACK_OF(X509_OBJECT) * objs) {
|
||||
#ifdef CPPHTTPLIB_HAS_X509_STORE_GET1_OBJECTS
|
||||
sk_X509_OBJECT_pop_free(objs, X509_OBJECT_free);
|
||||
#else
|
||||
(void)objs; // get0 variant returns an internal pointer; nothing to free
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace impl
|
||||
|
||||
ctx_t create_client_context() {
|
||||
@@ -13373,11 +13410,19 @@ std::string get_cert_subject_cn(cert_t cert) {
|
||||
auto subject_name = X509_get_subject_name(x509);
|
||||
if (!subject_name) return "";
|
||||
|
||||
char buf[256];
|
||||
auto len =
|
||||
X509_NAME_get_text_by_NID(subject_name, NID_commonName, buf, sizeof(buf));
|
||||
if (len < 0) return "";
|
||||
return std::string(buf, static_cast<size_t>(len));
|
||||
// X509_NAME_get_text_by_NID is deprecated since OpenSSL 4.0
|
||||
auto idx = X509_NAME_get_index_by_NID(subject_name, NID_commonName, -1);
|
||||
if (idx < 0) return "";
|
||||
|
||||
auto entry = X509_NAME_get_entry(subject_name, idx);
|
||||
if (!entry) return "";
|
||||
|
||||
auto data = X509_NAME_ENTRY_get_data(entry);
|
||||
if (!data) return "";
|
||||
|
||||
return std::string(
|
||||
reinterpret_cast<const char *>(ASN1_STRING_get0_data(data)),
|
||||
static_cast<size_t>(ASN1_STRING_length(data)));
|
||||
}
|
||||
|
||||
std::string get_cert_issuer_name(cert_t cert) {
|
||||
@@ -13582,8 +13627,9 @@ size_t get_ca_certs(ctx_t ctx, std::vector<cert_t> &certs) {
|
||||
auto store = SSL_CTX_get_cert_store(ssl_ctx);
|
||||
if (!store) { return 0; }
|
||||
|
||||
auto objs = X509_STORE_get0_objects(store);
|
||||
auto objs = impl::get_store_objects(store);
|
||||
if (!objs) { return 0; }
|
||||
auto se = detail::scope_exit([&] { impl::release_store_objects(objs); });
|
||||
|
||||
auto count = sk_X509_OBJECT_num(objs);
|
||||
for (decltype(count) i = 0; i < count; i++) {
|
||||
@@ -13609,8 +13655,9 @@ std::vector<std::string> get_ca_names(ctx_t ctx) {
|
||||
auto store = SSL_CTX_get_cert_store(ssl_ctx);
|
||||
if (!store) { return names; }
|
||||
|
||||
auto objs = X509_STORE_get0_objects(store);
|
||||
auto objs = impl::get_store_objects(store);
|
||||
if (!objs) { return names; }
|
||||
auto se = detail::scope_exit([&] { impl::release_store_objects(objs); });
|
||||
|
||||
auto count = sk_X509_OBJECT_num(objs);
|
||||
for (decltype(count) i = 0; i < count; i++) {
|
||||
@@ -13716,110 +13763,6 @@ std::string verify_error_string(long error_code) {
|
||||
|
||||
} // namespace tls
|
||||
|
||||
bool SSLClient::verify_host(X509 *server_cert) const {
|
||||
/* Quote from RFC2818 section 3.1 "Server Identity"
|
||||
|
||||
If a subjectAltName extension of type dNSName is present, that MUST
|
||||
be used as the identity. Otherwise, the (most specific) Common Name
|
||||
field in the Subject field of the certificate MUST be used. Although
|
||||
the use of the Common Name is existing practice, it is deprecated and
|
||||
Certification Authorities are encouraged to use the dNSName instead.
|
||||
|
||||
Matching is performed using the matching rules specified by
|
||||
[RFC2459]. If more than one identity of a given type is present in
|
||||
the certificate (e.g., more than one dNSName name, a match in any one
|
||||
of the set is considered acceptable.) Names may contain the wildcard
|
||||
character * which is considered to match any single domain name
|
||||
component or component fragment. E.g., *.a.com matches foo.a.com but
|
||||
not bar.foo.a.com. f*.com matches foo.com but not bar.com.
|
||||
|
||||
In some cases, the URI is specified as an IP address rather than a
|
||||
hostname. In this case, the iPAddress subjectAltName must be present
|
||||
in the certificate and must exactly match the IP in the URI.
|
||||
|
||||
*/
|
||||
return verify_host_with_subject_alt_name(server_cert) ||
|
||||
verify_host_with_common_name(server_cert);
|
||||
}
|
||||
|
||||
bool
|
||||
SSLClient::verify_host_with_subject_alt_name(X509 *server_cert) const {
|
||||
auto ret = false;
|
||||
|
||||
auto type = GEN_DNS;
|
||||
|
||||
struct in6_addr addr6 = {};
|
||||
struct in_addr addr = {};
|
||||
size_t addr_len = 0;
|
||||
|
||||
#ifndef __MINGW32__
|
||||
if (inet_pton(AF_INET6, host_.c_str(), &addr6)) {
|
||||
type = GEN_IPADD;
|
||||
addr_len = sizeof(struct in6_addr);
|
||||
} else if (inet_pton(AF_INET, host_.c_str(), &addr)) {
|
||||
type = GEN_IPADD;
|
||||
addr_len = sizeof(struct in_addr);
|
||||
}
|
||||
#endif
|
||||
|
||||
auto alt_names = static_cast<const struct stack_st_GENERAL_NAME *>(
|
||||
X509_get_ext_d2i(server_cert, NID_subject_alt_name, nullptr, nullptr));
|
||||
|
||||
if (alt_names) {
|
||||
auto dsn_matched = false;
|
||||
auto ip_matched = false;
|
||||
|
||||
auto count = sk_GENERAL_NAME_num(alt_names);
|
||||
|
||||
for (decltype(count) i = 0; i < count && !dsn_matched; i++) {
|
||||
auto val = sk_GENERAL_NAME_value(alt_names, i);
|
||||
if (!val || val->type != type) { continue; }
|
||||
|
||||
auto name =
|
||||
reinterpret_cast<const char *>(ASN1_STRING_get0_data(val->d.ia5));
|
||||
if (name == nullptr) { continue; }
|
||||
|
||||
auto name_len = static_cast<size_t>(ASN1_STRING_length(val->d.ia5));
|
||||
|
||||
switch (type) {
|
||||
case GEN_DNS:
|
||||
dsn_matched =
|
||||
detail::match_hostname(std::string(name, name_len), host_);
|
||||
break;
|
||||
|
||||
case GEN_IPADD:
|
||||
if (!memcmp(&addr6, name, addr_len) || !memcmp(&addr, name, addr_len)) {
|
||||
ip_matched = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (dsn_matched || ip_matched) { ret = true; }
|
||||
}
|
||||
|
||||
GENERAL_NAMES_free(const_cast<STACK_OF(GENERAL_NAME) *>(
|
||||
reinterpret_cast<const STACK_OF(GENERAL_NAME) *>(alt_names)));
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool SSLClient::verify_host_with_common_name(X509 *server_cert) const {
|
||||
const auto subject_name = X509_get_subject_name(server_cert);
|
||||
|
||||
if (subject_name != nullptr) {
|
||||
char name[BUFSIZ];
|
||||
auto name_len = X509_NAME_get_text_by_NID(subject_name, NID_commonName,
|
||||
name, sizeof(name));
|
||||
|
||||
if (name_len != -1) {
|
||||
return detail::match_hostname(
|
||||
std::string(name, static_cast<size_t>(name_len)), host_);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
|
||||
/*
|
||||
@@ -14622,10 +14565,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
auto mcert = static_cast<const mbedtls_x509_crt *>(cert);
|
||||
std::string host_str(hostname);
|
||||
|
||||
// Check if hostname is an IP address
|
||||
bool is_ip = impl::is_ipv4_address(host_str);
|
||||
unsigned char ip_bytes[4];
|
||||
if (is_ip) { impl::parse_ipv4(host_str, ip_bytes); }
|
||||
// Check if hostname is an IP address (IPv4 or IPv6)
|
||||
unsigned char ip_bytes[16];
|
||||
auto ip_len = impl::parse_ip_address(host_str, ip_bytes);
|
||||
auto is_ip = ip_len > 0;
|
||||
|
||||
// Check Subject Alternative Names (SAN)
|
||||
// In Mbed TLS 3.x, subject_alt_names contains raw values without ASN.1 tags
|
||||
@@ -14637,9 +14580,9 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
size_t len = san->buf.len;
|
||||
|
||||
if (is_ip) {
|
||||
// Check if this SAN is an IPv4 address (4 bytes)
|
||||
if (len == 4 && memcmp(p, ip_bytes, 4) == 0) { return true; }
|
||||
// Check if this SAN is an IPv6 address (16 bytes) - skip for now
|
||||
// For an IP host, only a matching iPAddress SAN of the same family
|
||||
// (4 bytes for IPv4, 16 bytes for IPv6) may authenticate it.
|
||||
if (len == ip_len && memcmp(p, ip_bytes, ip_len) == 0) { return true; }
|
||||
} else {
|
||||
// Check if this SAN is a DNS name (printable ASCII string)
|
||||
bool is_dns = len > 0;
|
||||
@@ -14654,21 +14597,25 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
san = san->next;
|
||||
}
|
||||
|
||||
// Fallback: Check Common Name (CN) in subject
|
||||
char cn[256];
|
||||
int ret = mbedtls_x509_dn_gets(cn, sizeof(cn), &mcert->subject);
|
||||
if (ret > 0) {
|
||||
std::string cn_str(cn);
|
||||
// Fallback: Check Common Name (CN) in subject. Skipped for IP-literal hosts:
|
||||
// an IP identity is only valid via an iPAddress SAN, never the CN (RFC 9110;
|
||||
// the OpenSSL backend's X509_check_ip behaves the same way).
|
||||
if (!is_ip) {
|
||||
char cn[256];
|
||||
int ret = mbedtls_x509_dn_gets(cn, sizeof(cn), &mcert->subject);
|
||||
if (ret > 0) {
|
||||
std::string cn_str(cn);
|
||||
|
||||
// Look for "CN=" in the DN string
|
||||
size_t cn_pos = cn_str.find("CN=");
|
||||
if (cn_pos != std::string::npos) {
|
||||
size_t start = cn_pos + 3;
|
||||
size_t end = cn_str.find(',', start);
|
||||
std::string cn_value =
|
||||
cn_str.substr(start, end == std::string::npos ? end : end - start);
|
||||
// Look for "CN=" in the DN string
|
||||
size_t cn_pos = cn_str.find("CN=");
|
||||
if (cn_pos != std::string::npos) {
|
||||
size_t start = cn_pos + 3;
|
||||
size_t end = cn_str.find(',', start);
|
||||
std::string cn_value =
|
||||
cn_str.substr(start, end == std::string::npos ? end : end - start);
|
||||
|
||||
if (detail::match_hostname(cn_value, host_str)) { return true; }
|
||||
if (detail::match_hostname(cn_value, host_str)) { return true; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15774,10 +15721,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
auto x509 = static_cast<WOLFSSL_X509 *>(cert);
|
||||
std::string host_str(hostname);
|
||||
|
||||
// Check if hostname is an IP address
|
||||
bool is_ip = impl::is_ipv4_address(host_str);
|
||||
unsigned char ip_bytes[4];
|
||||
if (is_ip) { impl::parse_ipv4(host_str, ip_bytes); }
|
||||
// Check if hostname is an IP address (IPv4 or IPv6)
|
||||
unsigned char ip_bytes[16];
|
||||
auto ip_len = impl::parse_ip_address(host_str, ip_bytes);
|
||||
auto is_ip = ip_len > 0;
|
||||
|
||||
// Check Subject Alternative Names
|
||||
auto *san_names = static_cast<WOLF_STACK_OF(WOLFSSL_GENERAL_NAME) *>(
|
||||
@@ -15804,10 +15751,12 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
}
|
||||
}
|
||||
} else if (is_ip && names->type == WOLFSSL_GEN_IPADD) {
|
||||
// IP address
|
||||
// IP address: only an iPAddress SAN of the same family (4 bytes for
|
||||
// IPv4, 16 bytes for IPv6) may authenticate the host.
|
||||
unsigned char *ip_data = wolfSSL_ASN1_STRING_data(names->d.iPAddress);
|
||||
int ip_len = wolfSSL_ASN1_STRING_length(names->d.iPAddress);
|
||||
if (ip_data && ip_len == 4 && memcmp(ip_data, ip_bytes, 4) == 0) {
|
||||
auto san_ip_len = wolfSSL_ASN1_STRING_length(names->d.iPAddress);
|
||||
if (ip_data && san_ip_len == static_cast<int>(ip_len) &&
|
||||
memcmp(ip_data, ip_bytes, ip_len) == 0) {
|
||||
wolfSSL_sk_free(san_names);
|
||||
return true;
|
||||
}
|
||||
@@ -15816,8 +15765,10 @@ bool verify_hostname(cert_t cert, const char *hostname) {
|
||||
wolfSSL_sk_free(san_names);
|
||||
}
|
||||
|
||||
// Fallback: Check Common Name (CN) in subject
|
||||
WOLFSSL_X509_NAME *subject = wolfSSL_X509_get_subject_name(x509);
|
||||
// Fallback: Check Common Name (CN) in subject. Skipped for IP-literal hosts:
|
||||
// an IP identity is only valid via an iPAddress SAN, never the CN (RFC 9110;
|
||||
// the OpenSSL backend's X509_check_ip behaves the same way).
|
||||
auto subject = is_ip ? nullptr : wolfSSL_X509_get_subject_name(x509);
|
||||
if (subject) {
|
||||
char cn[256] = {};
|
||||
int cn_len = wolfSSL_X509_NAME_get_text_by_NID(subject, NID_commonName, cn,
|
||||
|
||||
Vendored
+63
-18
@@ -8,8 +8,8 @@
|
||||
#ifndef CPPHTTPLIB_HTTPLIB_H
|
||||
#define CPPHTTPLIB_HTTPLIB_H
|
||||
|
||||
#define CPPHTTPLIB_VERSION "0.47.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x002f00"
|
||||
#define CPPHTTPLIB_VERSION "0.48.0"
|
||||
#define CPPHTTPLIB_VERSION_NUM "0x003000"
|
||||
|
||||
#ifdef _WIN32
|
||||
#if defined(_WIN32_WINNT) && _WIN32_WINNT < 0x0A00
|
||||
@@ -686,18 +686,70 @@ inline from_chars_result<T> from_chars(const char *first, const char *last,
|
||||
return {p, std::errc{}};
|
||||
}
|
||||
|
||||
// from_chars for double (simple wrapper for strtod)
|
||||
// from_chars for double (hand-written, locale-independent)
|
||||
//
|
||||
// The only double consumed by this library is the HTTP quality value, whose
|
||||
// grammar is (RFC 9110 12.4.2):
|
||||
// qvalue = ( "0" [ "." 0*3DIGIT ] ) / ( "1" [ "." 0*3("0") ] )
|
||||
// i.e. a non-negative decimal with no sign, exponent, "inf"/"nan", or wide
|
||||
// magnitude. So this parser recognizes exactly 1*DIGIT [ "." *DIGIT ] with
|
||||
// '.' always the decimal separator (std::strtod would instead read it from the
|
||||
// global C locale, mis-parsing q-values once an embedder calls
|
||||
// setlocale(LC_ALL, "") into a comma-decimal locale). The caller range-checks
|
||||
// the result to [0, 1], so inputs outside that range need not be distinguished
|
||||
// here. Allocation-free, single pass, and free of the overflow/rounding edge
|
||||
// cases that exponent and wide-range handling would introduce.
|
||||
inline from_chars_result<double> from_chars(const char *first, const char *last,
|
||||
double &value) {
|
||||
std::string s(first, last);
|
||||
char *endptr = nullptr;
|
||||
errno = 0;
|
||||
value = std::strtod(s.c_str(), &endptr);
|
||||
if (endptr == s.c_str()) { return {first, std::errc::invalid_argument}; }
|
||||
if (errno == ERANGE) {
|
||||
return {first + (endptr - s.c_str()), std::errc::result_out_of_range};
|
||||
value = 0.0;
|
||||
const char *p = first;
|
||||
|
||||
// Each 1eN is exactly representable, so a single final division by the
|
||||
// matching entry yields a correctly-rounded result.
|
||||
static const double powers_of_ten[] = {
|
||||
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
|
||||
1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18};
|
||||
const int max_frac_digits =
|
||||
static_cast<int>(sizeof(powers_of_ten) / sizeof(powers_of_ten[0])) - 1;
|
||||
|
||||
// Accumulate digits into a 64-bit integer and remember how many were
|
||||
// fractional. Two independent caps keep this bounded and safe:
|
||||
// * accumulation saturates before mantissa could overflow uint64_t, and
|
||||
// * frac_digits is capped at max_frac_digits so it is always a valid index
|
||||
// into powers_of_ten (without this an input like "0.000...0" would never
|
||||
// grow mantissa, so the saturation cap alone would not bound it).
|
||||
// Both caps only drop digits far beyond the precision a q-value needs; any
|
||||
// value they would change is well outside [0, 1] and rejected by the caller.
|
||||
uint64_t mantissa = 0;
|
||||
int frac_digits = 0;
|
||||
bool seen_digit = false;
|
||||
|
||||
const uint64_t limit = ((std::numeric_limits<uint64_t>::max)() - 9) / 10;
|
||||
auto accumulate = [&](char c) {
|
||||
if (mantissa <= limit) {
|
||||
mantissa = mantissa * 10 + static_cast<uint64_t>(c - '0');
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
for (; p != last && '0' <= *p && *p <= '9'; ++p) {
|
||||
seen_digit = true;
|
||||
accumulate(*p);
|
||||
}
|
||||
return {first + (endptr - s.c_str()), std::errc{}};
|
||||
|
||||
if (p != last && *p == '.') {
|
||||
++p;
|
||||
for (; p != last && '0' <= *p && *p <= '9'; ++p) {
|
||||
seen_digit = true;
|
||||
if (frac_digits < max_frac_digits && accumulate(*p)) { ++frac_digits; }
|
||||
}
|
||||
}
|
||||
|
||||
if (!seen_digit) { return {first, std::errc::invalid_argument}; }
|
||||
|
||||
value = static_cast<double>(mantissa) / powers_of_ten[frac_digits];
|
||||
return {p, std::errc{}};
|
||||
}
|
||||
|
||||
inline bool parse_port(const char *s, size_t len, int &port) {
|
||||
@@ -2826,13 +2878,6 @@ private:
|
||||
#endif
|
||||
|
||||
friend class ClientImpl;
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
private:
|
||||
bool verify_host(X509 *server_cert) const;
|
||||
bool verify_host_with_subject_alt_name(X509 *server_cert) const;
|
||||
bool verify_host_with_common_name(X509 *server_cert) const;
|
||||
#endif
|
||||
};
|
||||
#endif // CPPHTTPLIB_SSL_ENABLED
|
||||
|
||||
|
||||
Reference in New Issue
Block a user