server: add SSE ping interval (#24013)

This commit is contained in:
Xuan-Son Nguyen
2026-06-02 14:14:55 +02:00
committed by GitHub
parent a468b89018
commit 60130d18f9
5 changed files with 29 additions and 8 deletions
+21 -2
View File
@@ -3693,6 +3693,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
auto res = create_response();
auto completion_id = gen_chatcmplid();
auto & rd = res->rd;
auto & params = this->params;
try {
std::vector<server_task> tasks;
@@ -3828,7 +3829,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
}
res->status = 200;
res->content_type = "text/event-stream";
res->next = [res_this = res.get(), res_type, &req](std::string & output) -> bool {
res->next = [res_this = res.get(), res_type, &req, &params](std::string & output) -> bool {
static auto format_error = [](task_response_type res_type, const json & res_json) {
if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
return format_anthropic_sse({
@@ -3873,7 +3874,25 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
}
// receive subsequent results
auto result = rd.next(req.should_stop);
bool timeout = false;
int64_t start_time = ggml_time_ms();
auto result = rd.next([&timeout, &req, &start_time, &params]() {
if (req.should_stop()) {
return true; // should_stop condition met
} else if (params.sse_ping_interval > 0 && ggml_time_ms() - start_time > (int64_t)params.sse_ping_interval * 1000) {
timeout = true;
return true; // timeout
}
return false;
});
if (timeout) {
// some clients may time out (e.g. undici) will time out if no data is received for a while, so we need to send a ping to keep the connection alive
SRV_DBG("%s", "sending SSE ping\n");
output = ":\n\n";
return true;
}
if (result == nullptr) {
SRV_DBG("%s", "stopping streaming due to should_stop condition\n");
GGML_ASSERT(req.should_stop());
-4
View File
@@ -381,10 +381,6 @@ server_task_result_ptr server_response_reader::next(const std::function<bool()>
if (result == nullptr) {
// timeout, check stop condition
if (should_stop()) {
const int64_t time_elapsed_ms = ggml_time_ms() - time_start_ms;
if (time_elapsed_ms > 30000) {
SRV_WRN("%s", "request cancelled after 30s, potentially a client-side timeout; please check your client's code\n");
}
return nullptr;
}
} else {
-2
View File
@@ -169,8 +169,6 @@ struct server_response_reader {
bool cancelled = false;
int polling_interval_seconds;
const int64_t time_start_ms = ggml_time_ms();
// tracking generation state and partial tool calls
// only used by streaming completions
std::vector<task_result_state> states;