Compare commits

...

9 Commits

Author SHA1 Message Date
Adrien Gallouët a4ea7a188f vendor : update BoringSSL to 0.20260204.0 (#19333)
Signed-off-by: Adrien Gallouët <angt@huggingface.co>
2026-02-05 09:53:35 +01:00
Georgi Gerganov 7a4f97d196 metal : add diag (#19330) 2026-02-05 10:08:45 +02:00
Oleksandr Kuvshynov a498c75ad1 vulkan: fix GPU deduplication logic. (#19222)
* vulkan: fix GPU deduplication logic.

As reported in https://github.com/ggml-org/llama.cpp/issues/19221, the
(same uuid, same driver) logic is problematic for windows+intel igpu.

Let's just avoid filtering for MoltenVK which is apple-specific, and
keep the logic the  same as before 88d23ad5 - just dedup based on UUID.

Verified that MacOS + 4xVega still reports 4 GPUs with this version.

* vulkan: only skip dedup when both drivers are moltenVk
2026-02-05 09:06:59 +01:00
Jeff Bolz 3409ab842d vulkan: Set k_load_shmem to false when K is too large (#19301) 2026-02-05 08:48:33 +01:00
Jeff Bolz c342c3b93d vulkan: fix non-contig rope (#19299) 2026-02-05 08:38:59 +01:00
will-lms af252d0758 metal : add missing includes (#19348) 2026-02-05 08:05:09 +02:00
Sigbjørn Skjæret 11fb327bf3 vendor : add missing llama_add_compile_flags (#19322)
* add missing llama_add_compile_flags

* disable all warnings for ssl, crypto and fipsmodule
2026-02-05 02:27:38 +01:00
Aaron Teo e6e934c5ea vendor: update cpp-httplib version (#19313)
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
2026-02-05 05:15:03 +08:00
Daniel Bevenius b536eb0233 codeowners : add danbev for examples/debug (#19332)
* codeowners : add danbev for examples/debug

* Add @pwilkin to CODEOWNERS for debug

---------

Co-authored-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com>
2026-02-04 20:20:40 +01:00
21 changed files with 533 additions and 168 deletions
+1
View File
@@ -27,6 +27,7 @@
/examples/batched.swift/ @ggerganov
/examples/batched/ @ggerganov
/examples/convert-llama2c-to-ggml/ @ggerganov
/examples/debug/ @danbev @pwilkin
/examples/deprecation-warning/ @ggerganov
/examples/diffusion/ @am17an
/examples/embedding/ @ggerganov
+20
View File
@@ -176,6 +176,26 @@ ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_set_rows(ggml_me
return res;
}
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_diag(ggml_metal_library_t lib, const ggml_tensor * op) {
char base[256];
char name[256];
const int n = op->src[0]->ne[0];
snprintf(base, 256, "kernel_diag_%s", ggml_type_name(op->src[0]->type));
snprintf(name, 256, "%s_n=%d", base, n);
ggml_metal_pipeline_with_params res = ggml_metal_library_get_pipeline(lib, name);
if (!res.pipeline) {
res = ggml_metal_library_compile_pipeline(lib, base, name, nullptr);
}
res.nsg = 1;
res.smem = 0;
return res;
}
ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_repeat(ggml_metal_library_t lib, ggml_type tsrc) {
char base[256];
char name[256];
+1
View File
@@ -108,6 +108,7 @@ struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_pool_1d
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_pool_2d (ggml_metal_library_t lib, const struct ggml_tensor * op, enum ggml_op_pool op_pool);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_get_rows (ggml_metal_library_t lib, enum ggml_type tsrc);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_set_rows (ggml_metal_library_t lib, enum ggml_type tidx, enum ggml_type tdst);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_diag (ggml_metal_library_t lib, const struct ggml_tensor * op);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_repeat (ggml_metal_library_t lib, enum ggml_type tsrc);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_unary (ggml_metal_library_t lib, const struct ggml_tensor * op);
struct ggml_metal_pipeline_with_params ggml_metal_library_get_pipeline_glu (ggml_metal_library_t lib, const struct ggml_tensor * op);
+3 -1
View File
@@ -1152,8 +1152,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
return has_simdgroup_reduction;
case GGML_OP_RWKV_WKV6:
case GGML_OP_RWKV_WKV7:
case GGML_OP_SOLVE_TRI:
return true;
case GGML_OP_SOLVE_TRI:
case GGML_OP_MUL_MAT:
case GGML_OP_MUL_MAT_ID:
return has_simdgroup_reduction;
@@ -1235,6 +1235,8 @@ bool ggml_metal_device_supports_op(ggml_metal_device_t dev, const struct ggml_te
return false;
};
}
case GGML_OP_DIAG:
return true;
case GGML_OP_OPT_STEP_ADAMW:
case GGML_OP_OPT_STEP_SGD:
return has_simdgroup_reduction;
+19
View File
@@ -792,6 +792,25 @@ typedef struct {
uint64_t nb3;
} ggml_metal_kargs_set_rows;
typedef struct {
int32_t ne00;
int32_t ne01;
int32_t ne02;
int32_t ne03;
uint64_t nb00;
uint64_t nb01;
uint64_t nb02;
uint64_t nb03;
int32_t ne0;
int32_t ne1;
int32_t ne2;
int32_t ne3;
uint64_t nb0;
uint64_t nb1;
uint64_t nb2;
uint64_t nb3;
} ggml_metal_kargs_diag;
typedef struct {
int64_t ne00;
int64_t ne01;
+46
View File
@@ -361,6 +361,10 @@ static int ggml_metal_op_encode_impl(ggml_metal_op_t ctx, int idx) {
{
n_fuse = ggml_metal_op_set_rows(ctx, idx);
} break;
case GGML_OP_DIAG:
{
n_fuse = ggml_metal_op_diag(ctx, idx);
} break;
case GGML_OP_L2_NORM:
{
n_fuse = ggml_metal_op_l2_norm(ctx, idx);
@@ -1259,6 +1263,48 @@ int ggml_metal_op_set_rows(ggml_metal_op_t ctx, int idx) {
return 1;
}
int ggml_metal_op_diag(ggml_metal_op_t ctx, int idx) {
ggml_tensor * op = ctx->node(idx);
ggml_metal_library_t lib = ctx->lib;
ggml_metal_encoder_t enc = ctx->enc;
GGML_TENSOR_LOCALS(int32_t, ne0, op->src[0], ne);
GGML_TENSOR_LOCALS(uint64_t, nb0, op->src[0], nb);
GGML_TENSOR_LOCALS(int32_t, ne, op, ne);
GGML_TENSOR_LOCALS(uint64_t, nb, op, nb);
ggml_metal_kargs_diag args = {
/*.ne00 =*/ne00,
/*.ne01 =*/ne01,
/*.ne02 =*/ne02,
/*.ne03 =*/ne03,
/*.nb00 =*/nb00,
/*.nb01 =*/nb01,
/*.nb02 =*/nb02,
/*.nb03 =*/nb03,
/*.ne0 =*/ne0,
/*.ne1 =*/ne1,
/*.ne2 =*/ne2,
/*.ne3 =*/ne3,
/*.nb0 =*/nb0,
/*.nb1 =*/nb1,
/*.nb2 =*/nb2,
/*.nb3 =*/nb3,
};
auto pipeline = ggml_metal_library_get_pipeline_diag(lib, op);
ggml_metal_encoder_set_pipeline(enc, pipeline);
ggml_metal_encoder_set_bytes(enc, &args, sizeof(args), 0);
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op->src[0]), 1);
ggml_metal_encoder_set_buffer(enc, ggml_metal_get_buffer_id(op), 2);
ggml_metal_encoder_dispatch_threadgroups(enc, ne1, ne2, ne3, 32, 1, 1);
return 1;
}
int ggml_metal_op_soft_max(ggml_metal_op_t ctx, int idx) {
ggml_tensor * op = ctx->node(idx);
+1
View File
@@ -56,6 +56,7 @@ int ggml_metal_op_sum_rows (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_cumsum (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_get_rows (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_set_rows (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_diag (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_soft_max (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_ssm_conv (ggml_metal_op_t ctx, int idx);
int ggml_metal_op_ssm_scan (ggml_metal_op_t ctx, int idx);
+3
View File
@@ -7,6 +7,9 @@
#include "ggml-metal-context.h"
#include "ggml-metal-ops.h"
#include <mutex>
#include <string>
#define GGML_METAL_NAME "MTL"
#define GGML_METAL_MAX_DEVICES 16
+20
View File
@@ -8815,6 +8815,26 @@ kernel void kernel_set_rows_f(
}
}
kernel void kernel_diag_f32(
constant ggml_metal_kargs_diag & args,
device const char * src0,
device char * dst,
uint3 tgpig[[threadgroup_position_in_grid]],
ushort tiitg[[thread_index_in_threadgroup]]) {
constexpr short NW = N_SIMDWIDTH;
const int32_t i3 = tgpig.z;
const int32_t i2 = tgpig.y;
const int32_t i1 = tgpig.x;
device const float * src0_ptr = (device const float *)(src0 + i2*args.nb02 + i3*args.nb03);
device float * dst_ptr = (device float *)(dst + i1*args.nb01 + i2*args.nb2 + i3*args.nb3);
for (int i0 = tiitg; i0 < args.ne0; i0 += NW) {
dst_ptr[i0] = i0 == i1 ? src0_ptr[i0] : 0.0f;
}
}
constant bool FC_mul_mm_bc_inp [[function_constant(FC_MUL_MM + 0)]];
constant bool FC_mul_mm_bc_out [[function_constant(FC_MUL_MM + 1)]];
+33 -18
View File
@@ -1263,25 +1263,30 @@ struct vk_op_diag_mask_push_constants {
struct vk_op_rope_push_constants {
uint32_t rope_mode;
uint32_t ncols;
uint32_t nrows;
uint32_t n_dims;
float freq_scale;
uint32_t p_delta_rows;
float freq_base;
float ext_factor;
float attn_factor;
float corr_dims[2];
float theta_scale;
uint32_t has_ff;
uint32_t ne02;
uint32_t s1;
uint32_t s2;
int32_t sections[4];
uint32_t is_imrope;
uint32_t is_back;
uint32_t set_rows_stride;
uint32_t ne00;
uint32_t ne01;
uint32_t ne02;
uint32_t nb01;
uint32_t nb02;
uint32_t nb03;
uint32_t nb11;
uint32_t nb12;
uint32_t nb13;
};
static_assert(sizeof(vk_op_rope_push_constants) <= 128, "sizeof(vk_op_rope_push_constants) must be <= 128");
// For fused rms_norm+mul+rope(+view+set_rows)
struct vk_op_rms_norm_mul_rope_push_constants {
@@ -3199,9 +3204,10 @@ static void ggml_vk_load_shaders(vk_device& device) {
const uint32_t D_lsb = D ^ (D & (D-1));
uint32_t D_split = std::min(std::min(device->subgroup_size, 8u), D_lsb / 4);
// Nvidia prefers shared memory use to load large tiles of K
// Nvidia prefers shared memory use to load large tiles of K.
// Switch to loading from global memory when it would use too much shared memory.
// AMD prefers loading K directly from global memory
const uint32_t k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA ? 1 : 0;
const uint32_t k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256 ? 1 : 0;
return {wg_size, rows_cols[0], rows_cols[1], hsk, hsv, clamp, D_split, device->subgroup_size, k_load_shmem};
};
@@ -5555,9 +5561,9 @@ static void ggml_vk_instance_init() {
// Check if there are two physical devices corresponding to the same GPU
// This handles the case where the same GPU appears with different drivers (e.g., RADV + AMDVLK on Linux),
// see https://github.com/ggml-org/llama.cpp/pull/7582 for original deduplication.
// However, for MoltenVK on macOS, multiple GPUs on the same card may report the same UUID,
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Until this is fixed, we'll only deduplicate
// when drivers differ (same driver + same UUID = likely different GPUs)
// MoltenVK on macOS may report the same UUID for distinct GPUs on multi-GPU cards,
// see https://github.com/KhronosGroup/MoltenVK/issues/2683. Skip when both old/new
// driver is MoltenVK
auto old_device = std::find_if(
vk_instance.device_indices.begin(),
vk_instance.device_indices.end(),
@@ -5574,11 +5580,9 @@ static void ggml_vk_instance_init() {
old_id.deviceLUIDValid && new_id.deviceLUIDValid &&
std::equal(std::begin(old_id.deviceLUID), std::end(old_id.deviceLUID), std::begin(new_id.deviceLUID))
);
bool both_molten_vk = (new_driver.driverID == vk::DriverId::eMoltenvk && old_driver.driverID == vk::DriverId::eMoltenvk);
// Only deduplicate if same UUID AND different drivers
// (same driver + same UUID on MoltenVK = likely different GPUs on multi-GPU card)
bool different_driver = (old_driver.driverID != new_driver.driverID);
return same_uuid && different_driver;
return same_uuid && !both_molten_vk;
}
);
if (old_device == vk_instance.device_indices.end()) {
@@ -8407,7 +8411,7 @@ static bool ggml_vk_flash_attn_coopmat_shmem_support(const vk_device& device, co
const uint32_t sfshstride = (hsk <= 128) ? (Br + 8) : Br;
const uint32_t sfsh = Bc * sfshstride * acctype;
const bool k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA;
const bool k_load_shmem = device->vendor_id == VK_VENDOR_ID_NVIDIA && hsk < 256;
const uint32_t kshstride = (k_load_shmem ? hsk_pad : MatBr) / 4 + 2;
const uint32_t vsh_stride = MatBc / 4 * row_split;
const uint32_t ksh = ((kshstride >= vsh_stride) ? (Bc * kshstride) : (Bc * vsh_stride)) * f16vec4;
@@ -10405,12 +10409,22 @@ static vk_op_rope_push_constants ggml_vk_make_rope_constants(const ggml_tensor *
uint32_t nb01 = src0->nb[1] / ggml_type_size(src0->type);
uint32_t nb02 = src0->nb[2] / ggml_type_size(src0->type);
uint32_t nb03 = src0->nb[3] / ggml_type_size(src0->type);
uint32_t nb11 = dst->nb[1] / ggml_type_size(dst->type);
uint32_t nb12 = dst->nb[2] / ggml_type_size(dst->type);
uint32_t nb13 = dst->nb[3] / ggml_type_size(dst->type);
vk_op_rope_push_constants rope {
(uint32_t)mode, (uint32_t)src0->ne[0], (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale, (uint32_t)src0->ne[1],
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale,
has_ff, (uint32_t)src0->ne[2], nb01, nb02,
(uint32_t)mode, (uint32_t)ggml_nrows(src0), (uint32_t)n_dims, freq_scale,
freq_base, ext_factor, attn_factor, {corr_dims[0], corr_dims[1]}, theta_scale, has_ff,
{ sections[0], sections[1], sections[2], sections[3] }, is_imrope, backprop, set_rows_stride,
(uint32_t)src0->ne[0],
(uint32_t)src0->ne[1],
(uint32_t)src0->ne[2],
nb01, nb02, nb03,
nb11, nb12, nb13,
};
return rope;
@@ -14798,6 +14812,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
case GGML_OP_REPEAT_BACK:
return op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32;
case GGML_OP_ROPE:
return ggml_is_contiguous_rows(op) && ggml_is_contiguous_rows(op->src[0]);
case GGML_OP_ROPE_BACK:
case GGML_OP_NONE:
case GGML_OP_RESHAPE:
@@ -112,12 +112,11 @@ void rms_norm(uint num_iters) {
#if RMS_NORM_ROPE_FUSION
barrier();
rope_params rp = p.rope;
uint rope_row = (samp*nchannels + channel)*nrows + row;
for (uint t = 2*tid; t < ncols; t += 2*BLOCK_SIZE) {
if (rp.rope_mode == GGML_ROPE_TYPE_NEOX) {
rope_neox(t, rope_row, rp);
rope_neox(t, row, channel, samp, rp);
} else if (rp.rope_mode == GGML_ROPE_TYPE_NORMAL) {
rope_norm(t, rope_row, rp);
rope_norm(t, row, channel, samp, rp);
}
}
#endif
@@ -4,12 +4,12 @@ float rope_yarn_ramp(const float low, const float high, const uint i0) {
return 1.0f - min(1.0f, max(0.0f, y));
}
uint rope_a_coord(const uint i0, const uint i01, const uint i02, rope_params p) {
uint rope_a_coord(const uint i0, const uint i01, const uint i02, const uint i03, rope_params p) {
#if RMS_NORM_ROPE_FUSION
// Per-row offset in shared memory
const uint ix = i0;
#else
const uint ix = i02*p.nb02 + i01*p.nb01 + i0;
const uint ix = i03*p.nb03 + i02*p.nb02 + i01*p.nb01 + i0;
#endif
return ix;
}
@@ -34,26 +34,19 @@ void rope_yarn(const float theta_extrap, const uint i0, out float cos_theta, out
sin_theta = sin(theta) * mscale;
}
void rope_norm(const uint i0, const uint i1, rope_params p) {
uint ne0 = p.ncols;
uint ne1 = p.p_delta_rows;
if (i0 >= ne0) {
void rope_norm(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
if (i0 >= p.ne00) {
return;
}
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i01 = i1 % ne1;
const uint i02 = i1 / ne1;
uint idst = i1*ne0 + i0;
const uint ix = rope_a_coord(i0, i01, i02, p);
uint idst = i0 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
const uint ix = rope_a_coord(i0, i1, i2, i3, p);
// Fusion optimization: ROPE + VIEW + SET_ROWS.
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
if (p.set_rows_stride != 0) {
idst = i01*ne0 + i0;
idst += rope_data_i[i02].x * p.set_rows_stride;
idst = i1*p.nb11 + i0;
idst += rope_data_i[i2].x * p.set_rows_stride;
}
if (i0 >= p.n_dims) {
@@ -63,7 +56,7 @@ void rope_norm(const uint i0, const uint i1, rope_params p) {
return;
}
const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f);
const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f);
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
@@ -77,25 +70,19 @@ void rope_norm(const uint i0, const uint i1, rope_params p) {
rope_data_d[idst + 1] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta);
}
void rope_neox(const uint i0, const uint i1, rope_params p) {
uint ne0 = p.ncols;
uint ne1 = p.p_delta_rows;
if (i0 >= ne0) {
void rope_neox(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
if (i0 >= p.ne00) {
return;
}
const uint i01 = i1 % ne1;
const uint i02 = i1 / ne1;
uint idst = i1*ne0 + i0/2;
const uint ix = rope_a_coord(i0/2, i01, i02, p);
uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
// Fusion optimization: ROPE + VIEW + SET_ROWS.
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
if (p.set_rows_stride != 0) {
idst = i01*ne0 + i0/2;
idst += rope_data_i[i02].x * p.set_rows_stride;
idst = i1*p.nb11 + i0/2;
idst += rope_data_i[i2].x * p.set_rows_stride;
}
if (i0 >= p.n_dims) {
@@ -105,7 +92,7 @@ void rope_neox(const uint i0, const uint i1, rope_params p) {
return;
}
const float theta_base = rope_data_pos[i02] * pow(p.theta_scale, i0/2.0f);
const float theta_base = rope_data_pos[i2] * pow(p.theta_scale, i0/2.0f);
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
@@ -120,26 +107,19 @@ void rope_neox(const uint i0, const uint i1, rope_params p) {
}
void rope_multi(const uint i0, const uint i1, rope_params p) {
uint ne0 = p.ncols;
uint ne1 = p.p_delta_rows;
uint ne2 = p.ne02;
if (i0 >= ne0) {
void rope_multi(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
if (i0 >= p.ne00) {
return;
}
const uint i01 = i1 % ne1;
const uint i02 = i1 / ne1;
uint idst = i1*ne0 + i0/2;
const uint ix = rope_a_coord(i0/2, i01, i02, p);
uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
// Fusion optimization: ROPE + VIEW + SET_ROWS.
// The rope output is viewed as a 1D tensor and offset based on a row index in rope_data_i.
if (p.set_rows_stride != 0) {
idst = i01*ne0 + i0/2;
idst += rope_data_i[i02].x * p.set_rows_stride;
idst = i1*p.nb11 + i0/2;
idst += rope_data_i[i2].x * p.set_rows_stride;
}
if (i0 >= p.n_dims) {
@@ -156,26 +136,26 @@ void rope_multi(const uint i0, const uint i1, rope_params p) {
float theta_base = 0.0;
if (p.is_imrope != 0) {
if (sector % 3 == 1 && sector < 3 * p.sections[1]) {
theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f);
} else if (sector % 3 == 2 && sector < 3 * p.sections[2]) {
theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f);
} else if (sector % 3 == 0 && sector < 3 * p.sections[0]) {
theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f);
} else {
theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f);
}
} else {
if (sector < p.sections[0]) {
theta_base = rope_data_pos[i02]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= p.sections[0] && sector < sec_w) {
theta_base = rope_data_pos[i02 + ne2 * 1]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 1]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w && sector < sec_w + p.sections[2]) {
theta_base = rope_data_pos[i02 + ne2 * 2]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 2]*pow(p.theta_scale, i0/2.0f);
}
else if (sector >= sec_w + p.sections[2]) {
theta_base = rope_data_pos[i02 + ne2 * 3]*pow(p.theta_scale, i0/2.0f);
theta_base = rope_data_pos[i2 + p.ne02 * 3]*pow(p.theta_scale, i0/2.0f);
}
}
@@ -191,20 +171,13 @@ void rope_multi(const uint i0, const uint i1, rope_params p) {
rope_data_d[idst + p.n_dims/2] = ROPE_D_TYPE(x0*sin_theta + x1*cos_theta);
}
void rope_vision(const uint i0, const uint i1, rope_params p) {
uint ne0 = p.ncols;
uint ne1 = p.p_delta_rows;
uint ne2 = p.ne02;
if (i0 >= ne0) {
void rope_vision(const uint i0, const uint i1, const uint i2, const uint i3, rope_params p) {
if (i0 >= p.ne00) {
return;
}
const uint i01 = i1 % ne1;
const uint i02 = i1 / ne1;
const uint idst = i1*ne0 + i0/2;
const uint ix = rope_a_coord(i0/2, i01, i02, p);
const uint idst = i0/2 + i1 * p.nb11 + i2 * p.nb12 + i3 * p.nb13;
const uint ix = rope_a_coord(i0/2, i1, i2, i3, p);
const int sect_dims = p.sections[0] + p.sections[1];
const int sec_w = p.sections[1] + p.sections[0];
@@ -213,11 +186,11 @@ void rope_vision(const uint i0, const uint i1, rope_params p) {
float theta_base = 0.0;
if (sector < p.sections[0]) {
const uint p0 = sector;
theta_base = rope_data_pos[i02]*pow(p.theta_scale, p0);
theta_base = rope_data_pos[i2]*pow(p.theta_scale, p0);
}
else if (sector >= p.sections[0] && sector < sec_w) {
const uint p0 = sector - p.sections[0];
theta_base = rope_data_pos[i02 + ne2]*pow(p.theta_scale, p0);
theta_base = rope_data_pos[i2 + p.ne02]*pow(p.theta_scale, p0);
}
const float freq_factor = p.has_ff != 0 ? rope_data_ff[i0/2] : 1.0f;
@@ -5,10 +5,13 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (row >= pc.nrows) {
return;
}
rope_multi(i0, i1, pc);
const uint i3 = row / (pc.ne01*pc.ne02);
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
rope_multi(i0, i1, i2, i3, pc);
}
@@ -5,10 +5,13 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (row >= pc.nrows) {
return;
}
rope_neox(i0, i1, pc);
const uint i3 = row / (pc.ne01*pc.ne02);
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
rope_neox(i0, i1, i2, i3, pc);
}
@@ -5,10 +5,13 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (row >= pc.nrows) {
return;
}
rope_norm(i0, i1, pc);
const uint i3 = row / (pc.ne01*pc.ne02);
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
rope_norm(i0, i1, i2, i3, pc);
}
@@ -5,24 +5,29 @@
struct rope_params {
uint rope_mode;
uint ncols;
uint nrows;
uint n_dims;
float freq_scale;
uint p_delta_rows;
float freq_base;
float ext_factor;
float attn_factor;
float corr_dims[2];
float theta_scale;
uint has_ff;
uint ne02;
uint nb01;
uint nb02;
int sections[4];
uint is_imrope;
uint is_back;
uint set_rows_stride;
uint ne00;
uint ne01;
uint ne02;
uint nb01;
uint nb02;
uint nb03;
uint nb11;
uint nb12;
uint nb13;
};
#endif // !defined(GGML_ROPE_PARAMS)
@@ -5,10 +5,13 @@
void main() {
const uint i0 = 2*gl_GlobalInvocationID.y;
// i1 is actually i2*nb2+i1, but the rows are contiguous
const uint i1 = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (i1 >= pc.nrows) {
const uint row = gl_GlobalInvocationID.x + 32768 * gl_GlobalInvocationID.z;
if (row >= pc.nrows) {
return;
}
rope_vision(i0, i1, pc);
const uint i3 = row / (pc.ne01*pc.ne02);
const uint i2 = (row - i3 * pc.ne01*pc.ne02) / pc.ne01;
const uint i1 = (row - i3 * pc.ne01*pc.ne02 - i2 * pc.ne01);
rope_vision(i0, i1, i2, i3, pc);
}
+2 -2
View File
@@ -12,8 +12,8 @@ vendor = {
# "https://github.com/mackron/miniaudio/raw/refs/tags/0.11.23/miniaudio.h": "vendor/miniaudio/miniaudio.h",
"https://github.com/mackron/miniaudio/raw/669ed3e844524fcd883231b13095baee9f6de304/miniaudio.h": "vendor/miniaudio/miniaudio.h",
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/httplib.h": "vendor/cpp-httplib/httplib.h",
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.1/LICENSE": "vendor/cpp-httplib/LICENSE",
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/httplib.h": "vendor/cpp-httplib/httplib.h",
"https://raw.githubusercontent.com/yhirose/cpp-httplib/refs/tags/v0.30.2/LICENSE": "vendor/cpp-httplib/LICENSE",
"https://raw.githubusercontent.com/sheredom/subprocess.h/b49c56e9fe214488493021017bf3954b91c7c1f5/subprocess.h": "vendor/sheredom/subprocess.h",
}
+25 -3
View File
@@ -3,9 +3,14 @@ license_add_file("cpp-httplib" "LICENSE")
find_package(Threads REQUIRED)
llama_add_compile_flags()
add_library(${TARGET} STATIC httplib.cpp httplib.h)
if (NOT MSVC)
# disable warnings in 3rd party code
# disable warnings in 3rd party code
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
target_compile_options(${TARGET} PRIVATE /w)
else()
target_compile_options(${TARGET} PRIVATE -w)
endif()
@@ -34,7 +39,7 @@ if (LLAMA_BUILD_BORINGSSL)
set(FIPS OFF CACHE BOOL "Enable FIPS (BoringSSL)")
set(BORINGSSL_GIT "https://boringssl.googlesource.com/boringssl" CACHE STRING "BoringSSL git repository")
set(BORINGSSL_VERSION "0.20251002.0" CACHE STRING "BoringSSL version")
set(BORINGSSL_VERSION "0.20260204.0" CACHE STRING "BoringSSL version")
message(STATUS "Fetching BoringSSL version ${BORINGSSL_VERSION}")
@@ -146,6 +151,23 @@ elseif (LLAMA_OPENSSL)
endif()
endif()
# disable warnings in 3rd party code
if(LLAMA_BUILD_BORINGSSL OR LLAMA_BUILD_LIBRESSL)
if (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
target_compile_options(ssl PRIVATE /w)
target_compile_options(crypto PRIVATE /w)
if(LLAMA_BUILD_BORINGSSL)
target_compile_options(fipsmodule PRIVATE /w)
endif()
else()
target_compile_options(ssl PRIVATE -w)
target_compile_options(crypto PRIVATE -w)
if(LLAMA_BUILD_BORINGSSL)
target_compile_options(fipsmodule PRIVATE -w)
endif()
endif()
endif()
if (CPPHTTPLIB_OPENSSL_SUPPORT)
target_compile_definitions(${TARGET} PUBLIC CPPHTTPLIB_OPENSSL_SUPPORT) # used in server.cpp
if (APPLE AND CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+190 -48
View File
@@ -117,6 +117,8 @@ time_t parse_http_date(const std::string &date_str) {
#ifdef _WIN32
return _mkgmtime(&tm_buf);
#elif defined _AIX
return mktime(&tm_buf);
#else
return timegm(&tm_buf);
#endif
@@ -1376,7 +1378,7 @@ int getaddrinfo_with_timeout(const char *node, const char *service,
// Allocate on the heap, so the resolver thread can keep using the data.
auto state = std::make_shared<GetAddrInfoState>();
state->node = node;
if (node) { state->node = node; }
state->service = service;
state->hints = *hints;
@@ -2896,10 +2898,20 @@ bool parse_range_header(const std::string &s, Ranges &ranges) try {
return;
}
const auto first =
static_cast<ssize_t>(lhs.empty() ? -1 : std::stoll(lhs));
const auto last =
static_cast<ssize_t>(rhs.empty() ? -1 : std::stoll(rhs));
ssize_t first = -1;
if (!lhs.empty()) {
ssize_t v;
auto res = detail::from_chars(lhs.data(), lhs.data() + lhs.size(), v);
if (res.ec == std::errc{}) { first = v; }
}
ssize_t last = -1;
if (!rhs.empty()) {
ssize_t v;
auto res = detail::from_chars(rhs.data(), rhs.data() + rhs.size(), v);
if (res.ec == std::errc{}) { last = v; }
}
if ((first == -1 && last == -1) ||
(first != -1 && last != -1 && first > last)) {
all_valid_ranges = false;
@@ -2974,25 +2986,17 @@ bool parse_accept_header(const std::string &s,
return;
}
#ifdef CPPHTTPLIB_NO_EXCEPTIONS
{
std::istringstream iss(quality_str);
iss >> accept_entry.quality;
// Check if conversion was successful and entire string was consumed
if (iss.fail() || !iss.eof()) {
double v = 0.0;
auto res = detail::from_chars(
quality_str.data(), quality_str.data() + quality_str.size(), v);
if (res.ec == std::errc{}) {
accept_entry.quality = v;
} else {
has_invalid_entry = true;
return;
}
}
#else
try {
accept_entry.quality = std::stod(quality_str);
} catch (...) {
has_invalid_entry = true;
return;
}
#endif
// Check if quality is in valid range [0.0, 1.0]
if (accept_entry.quality < 0.0 || accept_entry.quality > 1.0) {
has_invalid_entry = true;
@@ -5570,13 +5574,26 @@ bool Server::read_content(Stream &strm, Request &req, Response &res) {
strm, req, res,
// Regular
[&](const char *buf, size_t n) {
// Prevent arithmetic overflow when checking sizes.
// Avoid computing (req.body.size() + n) directly because
// adding two unsigned `size_t` values can wrap around and
// produce a small result instead of indicating overflow.
// Instead, check using subtraction: ensure `n` does not
// exceed the remaining capacity `max_size() - size()`.
if (req.body.size() >= req.body.max_size() ||
n > req.body.max_size() - req.body.size()) {
return false;
}
// Limit decompressed body size to payload_max_length_ to protect
// against "zip bomb" attacks where a small compressed payload
// decompresses to a massive size.
if (req.body.size() + n > payload_max_length_ ||
req.body.size() + n > req.body.max_size()) {
if (payload_max_length_ > 0 &&
(req.body.size() >= payload_max_length_ ||
n > payload_max_length_ - req.body.size())) {
return false;
}
req.body.append(buf, n);
return true;
},
@@ -5666,22 +5683,29 @@ bool Server::read_content_core(
// oversized request and fail early (causing connection close). For SSL
// builds we cannot reliably peek the decrypted application bytes, so keep
// the original behaviour.
#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT) && !defined(_WIN32)
#if !defined(CPPHTTPLIB_OPENSSL_SUPPORT)
if (!req.has_header("Content-Length") &&
!detail::is_chunked_transfer_encoding(req.headers)) {
socket_t s = strm.socket();
if (s != INVALID_SOCKET) {
// Peek up to payload_max_length_ + 1 bytes. If more than
// payload_max_length_ bytes are pending, reject the request.
size_t to_peek =
(payload_max_length_ > 0)
? (std::min)(payload_max_length_ + 1, static_cast<size_t>(4096))
: 1;
std::vector<char> peekbuf(to_peek);
ssize_t n = ::recv(s, peekbuf.data(), to_peek, MSG_PEEK);
if (n > 0 && static_cast<size_t>(n) > payload_max_length_) {
// Indicate failure so connection will be closed.
return false;
// Only peek if payload_max_length is set to a finite value
if (payload_max_length_ > 0 &&
payload_max_length_ < (std::numeric_limits<size_t>::max)()) {
socket_t s = strm.socket();
if (s != INVALID_SOCKET) {
// Peek to check if there is any pending data
char peekbuf[1];
ssize_t n = ::recv(s, peekbuf, 1, MSG_PEEK);
if (n > 0) {
// There is data, so read it with payload limit enforcement
auto result = detail::read_content_without_length(
strm, payload_max_length_, out);
if (result == detail::ReadContentResult::PayloadTooLarge) {
res.status = StatusCode::PayloadTooLarge_413;
return false;
} else if (result != detail::ReadContentResult::Success) {
return false;
}
return true;
}
}
}
return true;
@@ -6656,7 +6680,8 @@ void ClientImpl::close_socket(Socket &socket) {
}
bool ClientImpl::read_response_line(Stream &strm, const Request &req,
Response &res) const {
Response &res,
bool skip_100_continue) const {
std::array<char, 2048> buf{};
detail::stream_line_reader line_reader(strm, buf.data(), buf.size());
@@ -6677,8 +6702,8 @@ bool ClientImpl::read_response_line(Stream &strm, const Request &req,
res.status = std::stoi(std::string(m[2]));
res.reason = std::string(m[3]);
// Ignore '100 Continue'
while (res.status == StatusCode::Continue_100) {
// Ignore '100 Continue' (only when not using Expect: 100-continue explicitly)
while (skip_100_continue && res.status == StatusCode::Continue_100) {
if (!line_reader.getline()) { return false; } // CRLF
if (!line_reader.getline()) { return false; } // next response line
@@ -7463,7 +7488,8 @@ bool ClientImpl::write_content_with_provider(Stream &strm,
}
bool ClientImpl::write_request(Stream &strm, Request &req,
bool close_connection, Error &error) {
bool close_connection, Error &error,
bool skip_body) {
// Prepare additional headers
if (close_connection) {
if (!req.has_header("Connection")) {
@@ -7582,7 +7608,59 @@ bool ClientImpl::write_request(Stream &strm, Request &req,
}
}
// After sending request line and headers, wait briefly for an early server
// response (e.g. 4xx) and avoid sending a potentially large request body
// unnecessarily. This workaround is only enabled on Windows because Unix
// platforms surface write errors (EPIPE) earlier; on Windows kernel send
// buffering can accept large writes even when the peer already responded.
// Check the stream first (which covers SSL via `is_readable()`), then
// fall back to select on the socket. Only perform the wait for very large
// request bodies to avoid interfering with normal small requests and
// reduce side-effects. Poll briefly (up to 50ms as default) for an early
// response. Skip this check when using Expect: 100-continue, as the protocol
// handles early responses properly.
#if defined(_WIN32)
if (!skip_body &&
req.body.size() > CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD &&
req.path.size() > CPPHTTPLIB_REQUEST_URI_MAX_LENGTH) {
auto start = std::chrono::high_resolution_clock::now();
for (;;) {
// Prefer socket-level readiness to avoid SSL_pending() false-positives
// from SSL internals. If the underlying socket is readable, assume an
// early response may be present.
auto sock = strm.socket();
if (sock != INVALID_SOCKET && detail::select_read(sock, 0, 0) > 0) {
return false;
}
// Fallback to stream-level check for non-socket streams or when the
// socket isn't reporting readable. Avoid using `is_readable()` for
// SSL, since `SSL_pending()` may report buffered records that do not
// indicate a complete application-level response yet.
if (!is_ssl() && strm.is_readable()) { return false; }
auto now = std::chrono::high_resolution_clock::now();
auto elapsed =
std::chrono::duration_cast<std::chrono::milliseconds>(now - start)
.count();
if (elapsed >= CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND) {
break;
}
std::this_thread::sleep_for(std::chrono::milliseconds(1));
}
}
#endif
// Body
if (skip_body) { return true; }
return write_request_body(strm, req, error);
}
bool ClientImpl::write_request_body(Stream &strm, Request &req,
Error &error) {
if (req.body.empty()) {
return write_content_with_provider(strm, req, error);
}
@@ -7758,8 +7836,20 @@ void ClientImpl::output_error_log(const Error &err,
bool ClientImpl::process_request(Stream &strm, Request &req,
Response &res, bool close_connection,
Error &error) {
// Send request
if (!write_request(strm, req, close_connection, error)) { return false; }
// Auto-add Expect: 100-continue for large bodies
if (CPPHTTPLIB_EXPECT_100_THRESHOLD > 0 && !req.has_header("Expect")) {
auto body_size = req.body.empty() ? req.content_length_ : req.body.size();
if (body_size >= CPPHTTPLIB_EXPECT_100_THRESHOLD) {
req.set_header("Expect", "100-continue");
}
}
// Check for Expect: 100-continue
auto expect_100_continue = req.get_header_value("Expect") == "100-continue";
// Send request (skip body if using Expect: 100-continue)
auto write_request_success =
write_request(strm, req, close_connection, error, expect_100_continue);
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
if (is_ssl()) {
@@ -7774,14 +7864,48 @@ bool ClientImpl::process_request(Stream &strm, Request &req,
}
#endif
// Handle Expect: 100-continue with timeout
if (expect_100_continue && CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND > 0) {
time_t sec = CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND / 1000;
time_t usec = (CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND % 1000) * 1000;
auto ret = detail::select_read(strm.socket(), sec, usec);
if (ret <= 0) {
// Timeout or error: send body anyway (server didn't respond in time)
if (!write_request_body(strm, req, error)) { return false; }
expect_100_continue = false; // Switch to normal response handling
}
}
// Receive response and headers
if (!read_response_line(strm, req, res) ||
// When using Expect: 100-continue, don't auto-skip `100 Continue` response
if (!read_response_line(strm, req, res, !expect_100_continue) ||
!detail::read_headers(strm, res.headers)) {
error = Error::Read;
if (write_request_success) { error = Error::Read; }
output_error_log(error, &req);
return false;
}
if (!write_request_success) { return false; }
// Handle Expect: 100-continue response
if (expect_100_continue) {
if (res.status == StatusCode::Continue_100) {
// Server accepted, send the body
if (!write_request_body(strm, req, error)) { return false; }
// Read the actual response
res.headers.clear();
res.body.clear();
if (!read_response_line(strm, req, res) ||
!detail::read_headers(strm, res.headers)) {
error = Error::Read;
output_error_log(error, &req);
return false;
}
}
// If not 100 Continue, server returned an error; proceed with that response
}
// Body
if ((res.status != StatusCode::NoContent_204) && req.method != "HEAD" &&
req.method != "CONNECT") {
@@ -9543,7 +9667,7 @@ bool SSLClient::load_certs() {
last_openssl_error_ = ERR_get_error();
ret = false;
}
} else {
} else if (!ca_cert_store_) {
auto loaded = false;
#ifdef _WIN32
loaded =
@@ -9790,7 +9914,11 @@ bool SSLClient::verify_host_with_common_name(X509 *server_cert) const {
bool SSLClient::check_host_name(const char *pattern,
size_t pattern_len) const {
if (host_.size() == pattern_len && host_ == pattern) { return true; }
// Exact match (case-insensitive)
if (host_.size() == pattern_len &&
detail::case_ignore::equal(host_, std::string(pattern, pattern_len))) {
return true;
}
// Wildcard match
// https://bugs.launchpad.net/ubuntu/+source/firefox-3.0/+bug/376484
@@ -9805,9 +9933,23 @@ bool SSLClient::check_host_name(const char *pattern,
auto itr = pattern_components.begin();
for (const auto &h : host_components_) {
auto &p = *itr;
if (p != h && p != "*") {
auto partial_match = (p.size() > 0 && p[p.size() - 1] == '*' &&
!p.compare(0, p.size() - 1, h));
if (!httplib::detail::case_ignore::equal(p, h) && p != "*") {
bool partial_match = false;
if (!p.empty() && p[p.size() - 1] == '*') {
const auto prefix_length = p.size() - 1;
if (prefix_length == 0) {
partial_match = true;
} else if (h.size() >= prefix_length) {
partial_match =
std::equal(p.begin(),
p.begin() + static_cast<std::string::difference_type>(
prefix_length),
h.begin(), [](const char ca, const char cb) {
return httplib::detail::case_ignore::to_lower(ca) ==
httplib::detail::case_ignore::to_lower(cb);
});
}
}
if (!partial_match) { return false; }
}
++itr;
+93 -9
View File
@@ -8,8 +8,8 @@
#ifndef CPPHTTPLIB_HTTPLIB_H
#define CPPHTTPLIB_HTTPLIB_H
#define CPPHTTPLIB_VERSION "0.30.1"
#define CPPHTTPLIB_VERSION_NUM "0x001E01"
#define CPPHTTPLIB_VERSION "0.30.2"
#define CPPHTTPLIB_VERSION_NUM "0x001E02"
/*
* Platform compatibility check
@@ -98,6 +98,22 @@
#define CPPHTTPLIB_CLIENT_MAX_TIMEOUT_MSECOND 0
#endif
#ifndef CPPHTTPLIB_EXPECT_100_THRESHOLD
#define CPPHTTPLIB_EXPECT_100_THRESHOLD 1024
#endif
#ifndef CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND
#define CPPHTTPLIB_EXPECT_100_TIMEOUT_MSECOND 1000
#endif
#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD
#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_THRESHOLD (1024 * 1024)
#endif
#ifndef CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND
#define CPPHTTPLIB_WAIT_EARLY_SERVER_RESPONSE_TIMEOUT_MSECOND 50
#endif
#ifndef CPPHTTPLIB_IDLE_INTERVAL_SECOND
#define CPPHTTPLIB_IDLE_INTERVAL_SECOND 0
#endif
@@ -286,8 +302,10 @@ using socket_t = int;
#include <atomic>
#include <cassert>
#include <cctype>
#include <chrono>
#include <climits>
#include <condition_variable>
#include <cstdlib>
#include <cstring>
#include <errno.h>
#include <exception>
@@ -305,6 +323,7 @@ using socket_t = int;
#include <sstream>
#include <string>
#include <sys/stat.h>
#include <system_error>
#include <thread>
#include <unordered_map>
#include <unordered_set>
@@ -494,6 +513,69 @@ private:
bool execute_on_destruction;
};
// Simple from_chars implementation for integer and double types (C++17
// substitute)
template <typename T> struct from_chars_result {
const char *ptr;
std::errc ec;
};
template <typename T>
inline from_chars_result<T> from_chars(const char *first, const char *last,
T &value, int base = 10) {
value = 0;
const char *p = first;
bool negative = false;
if (p != last && *p == '-') {
negative = true;
++p;
}
if (p == last) { return {first, std::errc::invalid_argument}; }
T result = 0;
for (; p != last; ++p) {
char c = *p;
int digit = -1;
if ('0' <= c && c <= '9') {
digit = c - '0';
} else if ('a' <= c && c <= 'z') {
digit = c - 'a' + 10;
} else if ('A' <= c && c <= 'Z') {
digit = c - 'A' + 10;
} else {
break;
}
if (digit < 0 || digit >= base) { break; }
if (result > ((std::numeric_limits<T>::max)() - digit) / base) {
return {p, std::errc::result_out_of_range};
}
result = result * base + digit;
}
if (p == first || (negative && p == first + 1)) {
return {first, std::errc::invalid_argument};
}
value = negative ? -result : result;
return {p, std::errc{}};
}
// from_chars for double (simple wrapper for strtod)
inline from_chars_result<double> from_chars(const char *first, const char *last,
double &value) {
std::string s(first, last);
char *endptr = nullptr;
errno = 0;
value = std::strtod(s.c_str(), &endptr);
if (endptr == s.c_str()) { return {first, std::errc::invalid_argument}; }
if (errno == ERANGE) {
return {first + (endptr - s.c_str()), std::errc::result_out_of_range};
}
return {first + (endptr - s.c_str()), std::errc{}};
}
} // namespace detail
enum SSLVerifierResponse {
@@ -1848,10 +1930,11 @@ private:
Result send_(Request &&req);
socket_t create_client_socket(Error &error) const;
bool read_response_line(Stream &strm, const Request &req,
Response &res) const;
bool read_response_line(Stream &strm, const Request &req, Response &res,
bool skip_100_continue = true) const;
bool write_request(Stream &strm, Request &req, bool close_connection,
Error &error);
Error &error, bool skip_body = false);
bool write_request_body(Stream &strm, Request &req, Error &error);
void prepare_default_headers(Request &r, bool for_stream,
const std::string &ct);
bool redirect(Request &req, Response &res, Error &error);
@@ -3243,10 +3326,11 @@ private:
msg.id = value;
} else if (field == "retry") {
// Parse retry interval in milliseconds
try {
retry_ms = std::stoi(value);
} catch (...) {
// Invalid retry value, ignore
{
int v = 0;
auto res =
detail::from_chars(value.data(), value.data() + value.size(), v);
if (res.ec == std::errc{}) { retry_ms = v; }
}
}
// Unknown fields are ignored per SSE spec