server: add SSE ping interval (#24013)

2026-06-09 07:16:44 +02:00 · 2026-06-02 14:14:55 +02:00
parent a468b89018
commit 60130d18f9
5 changed files with 29 additions and 8 deletions
@@ -3693,6 +3693,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
    auto res = create_response();
    auto completion_id = gen_chatcmplid();
    auto & rd = res->rd;
+    auto & params = this->params;

    try {
        std::vector<server_task> tasks;
@@ -3828,7 +3829,7 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
        }
        res->status = 200;
        res->content_type = "text/event-stream";
-        res->next = [res_this = res.get(), res_type, &req](std::string & output) -> bool {
+        res->next = [res_this = res.get(), res_type, &req, &params](std::string & output) -> bool {
            static auto format_error = [](task_response_type res_type, const json & res_json) {
                if (res_type == TASK_RESPONSE_TYPE_ANTHROPIC) {
                    return format_anthropic_sse({
@@ -3873,7 +3874,25 @@ std::unique_ptr<server_res_generator> server_routes::handle_completions_impl(
                }

                // receive subsequent results
-                auto result = rd.next(req.should_stop);
+                bool timeout = false;
+                int64_t start_time = ggml_time_ms();
+                auto result = rd.next([&timeout, &req, &start_time, &params]() {
+                    if (req.should_stop()) {
+                        return true; // should_stop condition met
+                    } else if (params.sse_ping_interval > 0 && ggml_time_ms() - start_time > (int64_t)params.sse_ping_interval * 1000) {
+                        timeout = true;
+                        return true; // timeout
+                    }
+                    return false;
+                });
+
+                if (timeout) {
+                    // some clients may time out (e.g. undici) will time out if no data is received for a while, so we need to send a ping to keep the connection alive
+                    SRV_DBG("%s", "sending SSE ping\n");
+                    output = ":\n\n";
+                    return true;
+                }
+
                if (result == nullptr) {
                    SRV_DBG("%s", "stopping streaming due to should_stop condition\n");
                    GGML_ASSERT(req.should_stop());
@@ -381,10 +381,6 @@ server_task_result_ptr server_response_reader::next(const std::function<bool()>
        if (result == nullptr) {
            // timeout, check stop condition
            if (should_stop()) {
-                const int64_t time_elapsed_ms = ggml_time_ms() - time_start_ms;
-                if (time_elapsed_ms > 30000) {
-                    SRV_WRN("%s", "request cancelled after 30s, potentially a client-side timeout; please check your client's code\n");
-                }
                return nullptr;
            }
        } else {
@@ -169,8 +169,6 @@ struct server_response_reader {
    bool cancelled = false;
    int polling_interval_seconds;

-    const int64_t time_start_ms = ggml_time_ms();
-
    // tracking generation state and partial tool calls
    // only used by streaming completions
    std::vector<task_result_state> states;