Compare commits

..

1 Commits

Author SHA1 Message Date
Eric Curtin d3d06debe3 server : add pidfile option
So we can track the pid of this process

Signed-off-by: Eric Curtin <ecurtin@redhat.com>
2025-06-17 21:47:53 +01:00
9 changed files with 139 additions and 83 deletions
+4
View File
@@ -3373,5 +3373,9 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
}
).set_examples({LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg({ "--pidfile" }, "FILE", "path to PID file for server process",
[](common_params & params, const std::string & value) { params.pidfile = value; })
.set_examples({ LLAMA_EXAMPLE_SERVER }));
return ctx_arg;
}
+1
View File
@@ -366,6 +366,7 @@ struct common_params {
std::string hostname = "127.0.0.1";
std::string public_path = ""; // NOLINT
std::string chat_template = ""; // NOLINT
std::string pidfile = ""; // path to PID file for server process // NOLINT
bool use_jinja = false; // NOLINT
bool enable_chat_template = true;
common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
+22 -16
View File
@@ -556,8 +556,11 @@ class TextModel(ModelBase):
logger.info(f"gguf: experts used count = {n_experts_used}")
if (head_dim := self.hparams.get("head_dim")) is not None:
self.gguf_writer.add_key_length(head_dim)
self.gguf_writer.add_value_length(head_dim)
# Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class)
# https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210
if self.hparams.get("model_type") != "deepseek_v3":
self.gguf_writer.add_key_length(head_dim)
self.gguf_writer.add_value_length(head_dim)
self.gguf_writer.add_file_type(self.ftype)
logger.info(f"gguf: file type = {self.ftype}")
@@ -1898,7 +1901,9 @@ class LlamaModel(TextModel):
hparams = self.hparams
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
if (rope_dim := hparams.get("head_dim")) is None:
if "head_dim" in hparams:
rope_dim = hparams["head_dim"]
else:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(rope_dim)
@@ -1980,8 +1985,7 @@ class LlamaModel(TextModel):
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
if rope_scaling.get("rope_type", '').lower() == "llama3":
base = self.hparams.get("rope_theta", 10000.0)
if (dim := self.hparams.get("head_dim")) is None:
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
factor = rope_scaling.get("factor", 8.0)
@@ -2317,7 +2321,9 @@ class DeciModel(TextModel):
hparams = self.hparams
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
if (rope_dim := hparams.get("head_dim")) is None:
if "head_dim" in hparams:
rope_dim = hparams["head_dim"]
else:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(rope_dim)
@@ -2357,8 +2363,7 @@ class DeciModel(TextModel):
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
if rope_scaling.get("rope_type", '').lower() == "llama3":
base = self.hparams.get("rope_theta", 10000.0)
if (dim := self.hparams.get("head_dim")) is None:
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
factor = rope_scaling.get("factor", 8.0)
@@ -3676,7 +3681,9 @@ class InternLM3Model(TextModel):
hparams = self.hparams
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
if (rope_dim := hparams.get("head_dim")) is None:
if "head_dim" in hparams:
rope_dim = hparams["head_dim"]
else:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(rope_dim)
@@ -5091,7 +5098,9 @@ class DeepseekModel(TextModel):
def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
if (rope_dim := hparams.get("head_dim")) is None:
if "head_dim" in hparams:
rope_dim = hparams["head_dim"]
else:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(rope_dim)
@@ -5981,8 +5990,7 @@ class ExaoneModel(TextModel):
if rope_scaling := self.find_hparam(["rope_scaling"], optional=True):
if rope_scaling.get("rope_type", '').lower() == "llama3":
base = self.hparams.get("rope_theta", 10000.0)
if (dim := self.hparams.get("head_dim")) is None:
dim = self.hparams["hidden_size"] // self.hparams["num_attention_heads"]
dim = self.hparams.get("head_dim", self.hparams["hidden_size"] // self.hparams["num_attention_heads"])
freqs = 1.0 / (base ** (torch.arange(0, dim, 2, dtype=torch.float32) / dim))
factor = rope_scaling.get("factor", 8.0)
@@ -6094,8 +6102,7 @@ class BailingMoeModel(TextModel):
def set_gguf_parameters(self):
super().set_gguf_parameters()
hparams = self.hparams
if (rope_dim := hparams.get("head_dim")) is None:
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
rope_dim = hparams.get("head_dim") or hparams["hidden_size"] // hparams["num_attention_heads"]
self.gguf_writer.add_rope_dimension_count(rope_dim)
rope_scaling = self.hparams.get("rope_scaling") or {}
@@ -6127,8 +6134,7 @@ class BailingMoeModel(TextModel):
n_head = self.hparams["num_attention_heads"]
n_kv_head = self.hparams.get("num_key_value_heads")
n_embd = self.hparams["hidden_size"]
if (head_dim := self.hparams.get("head_dim")) is None:
head_dim = n_embd // n_head
head_dim = self.hparams.get("head_dim") or n_embd // n_head
output_name = self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT)
-44
View File
@@ -368,8 +368,6 @@ if (MSVC)
/wd4005 # Macro redefinition
/wd4244 # Conversion from one type to another type, possible loss of data
/wd4267 # Conversion from 'size_t' to a smaller type, possible loss of data
/wd4305 # Conversion from 'type1' to 'type2', possible loss of data
/wd4566 # Conversion from 'char' to 'wchar_t', possible loss of data
/wd4996 # Disable POSIX deprecation warnings
/wd4702 # Unreachable code warnings
)
@@ -389,46 +387,4 @@ if (MSVC)
disable_msvc_warnings(ggml-cpu-skylakex)
disable_msvc_warnings(ggml-cpu-icelake)
disable_msvc_warnings(ggml-cpu-alderlake)
if (GGML_BUILD_EXAMPLES)
disable_msvc_warnings(common-ggml)
disable_msvc_warnings(common)
disable_msvc_warnings(mnist-common)
disable_msvc_warnings(mnist-eval)
disable_msvc_warnings(mnist-train)
disable_msvc_warnings(gpt-2-ctx)
disable_msvc_warnings(gpt-2-alloc)
disable_msvc_warnings(gpt-2-backend)
disable_msvc_warnings(gpt-2-sched)
disable_msvc_warnings(gpt-2-quantize)
disable_msvc_warnings(gpt-2-batched)
disable_msvc_warnings(gpt-j)
disable_msvc_warnings(gpt-j-quantize)
disable_msvc_warnings(magika)
disable_msvc_warnings(yolov3-tiny)
disable_msvc_warnings(sam)
disable_msvc_warnings(simple-ctx)
disable_msvc_warnings(simple-backend)
endif()
if (GGML_BUILD_TESTS)
disable_msvc_warnings(test-mul-mat)
disable_msvc_warnings(test-arange)
disable_msvc_warnings(test-backend-ops)
disable_msvc_warnings(test-cont)
disable_msvc_warnings(test-conv-transpose)
disable_msvc_warnings(test-conv-transpose-1d)
disable_msvc_warnings(test-conv1d)
disable_msvc_warnings(test-conv2d)
disable_msvc_warnings(test-conv2d-dw)
disable_msvc_warnings(test-customop)
disable_msvc_warnings(test-dup)
disable_msvc_warnings(test-opt)
disable_msvc_warnings(test-pool)
endif ()
endif()
+4 -8
View File
@@ -144,15 +144,9 @@ if (Vulkan_FOUND)
${VULKAN_SHADER_GEN_CMAKE_ARGS}
BUILD_COMMAND ${CMAKE_COMMAND} --build . --config $<CONFIG>
# NOTE: When DESTDIR is set using Makefile generators and
# "make install" triggers the build step, vulkan-shaders-gen
# would be installed into the DESTDIR prefix, so it is unset
# to ensure that does not happen.
INSTALL_COMMAND ${CMAKE_COMMAND} -E env --unset=DESTDIR
${CMAKE_COMMAND} --install . --config $<CONFIG>
INSTALL_COMMAND ${CMAKE_COMMAND} --install . --config $<CONFIG>
)
ExternalProject_Add_StepTargets(vulkan-shaders-gen build install)
set (_ggml_vk_host_suffix $<IF:$<STREQUAL:${CMAKE_HOST_SYSTEM_NAME},Windows>,.exe,>)
set (_ggml_vk_genshaders_dir "${CMAKE_BINARY_DIR}/$<CONFIG>")
@@ -178,6 +172,8 @@ if (Vulkan_FOUND)
DEPENDS ${_ggml_vk_shader_files}
vulkan-shaders-gen
vulkan-shaders-gen-build
vulkan-shaders-gen-install
COMMENT "Generate vulkan shaders"
)
+6
View File
@@ -888,6 +888,12 @@ struct ggml_context {
struct ggml_object * objects_end;
};
struct ggml_context_container {
bool used;
struct ggml_context context;
};
//
// data types
//
+1 -1
View File
@@ -1 +1 @@
8cda0a3c19f2c7dc493887353c42f6956bc268b1
6a7d170c04789f6ebcf320ed03c1b16973f93bd7
+2 -2
View File
@@ -333,7 +333,7 @@ int32_t llm_chat_apply_template(
std::string role(message->role);
if (role == "system") {
// there is no system message for gemma, but we will merge it with user prompt, so nothing is broken
system_prompt += trim(message->content);
system_prompt = trim(message->content);
continue;
}
// in gemma, "assistant" is "model"
@@ -355,7 +355,7 @@ int32_t llm_chat_apply_template(
std::string role(message->role);
if (role == "system") {
// there is no system message support, we will merge it with user prompt
system_prompt += message->content;
system_prompt = message->content;
continue;
} else if (role == "user") {
ss << "Human: ";
+99 -12
View File
@@ -14,22 +14,31 @@
// mime type for sending response
#define MIMETYPE_JSON "application/json; charset=utf-8"
#include <signal.h>
#include <atomic>
#include <chrono>
#include <cinttypes>
#include <condition_variable>
#include <cstddef>
#include <deque>
#include <memory>
#include <mutex>
#include <thread>
#include <unordered_map>
#include <unordered_set>
// auto generated files (see README.md for details)
#include "index.html.gz.hpp"
#include "loading.html.hpp"
#include <atomic>
#include <chrono>
#include <condition_variable>
#include <cstddef>
#include <cinttypes>
#include <deque>
#include <memory>
#include <mutex>
#include <signal.h>
#include <thread>
#include <unordered_map>
#include <unordered_set>
#ifdef _WIN32
#include <process.h>
#define getpid _getpid
#define pid_t int;
#else
#include <unistd.h>
#endif
using json = nlohmann::ordered_json;
@@ -3691,6 +3700,77 @@ inline void signal_handler(int signal) {
shutdown_handler(signal);
}
static bool check_pid_alive(const pid_t pid) {
if (pid <= 0) {
return false;
}
// Process is alive or exists but is inaccessible
if (kill(pid, 0) == 0 || errno == EPERM) {
return true; // Process is alive
}
return false; // Process does not exist or other error
}
class PidFile {
public:
FILE * file = nullptr;
std::string fname;
bool rm = false;
FILE * open(const std::string & filename, const char * mode, const bool r = false) {
file = ggml_fopen(filename.c_str(), mode);
fname = filename;
rm = r;
return file;
}
void close() {
fclose(file);
file = nullptr;
if (rm) {
// Remove stale pidfile
unlink(fname.c_str());
}
}
~PidFile() {
if (file) {
close();
}
}
};
static bool is_old_pid_alive(const std::string & filename) {
pid_t oldpid = 0;
PidFile f;
if (f.open(filename, "r")) {
if (fscanf(f.file, "%d", &oldpid) == 1) {
if (check_pid_alive(oldpid)) {
LOG_ERR("Process already running with PID %d\n", oldpid);
return true;
}
}
}
return false;
}
static int create_pidfile(const std::string & pidfile, PidFile & f) {
if (!f.open(pidfile.c_str(), "w", true)) {
LOG_ERR("Unable to open pidfile %s: %s\n", pidfile.c_str(), strerror(errno));
return -1;
}
fprintf(f.file, "%d\n", getpid());
fflush(f.file);
return 0;
}
int main(int argc, char ** argv) {
// own arguments required by this example
common_params params;
@@ -3699,6 +3779,13 @@ int main(int argc, char ** argv) {
return 1;
}
PidFile f;
if (!params.pidfile.empty()) {
if (is_old_pid_alive(params.pidfile) || create_pidfile(params.pidfile, f)) {
return 1;
}
}
common_init();
// struct that contains llama context and inference