Compare commits

...

3 Commits

Author SHA1 Message Date
Molly Sophia adef81781a server : allow setting --reverse-prompt arg (#14799)
Signed-off-by: Molly Sophia <mollysophia379@gmail.com>
2025-07-22 09:24:22 +08:00
R0CKSTAR 48b86c4fdb cuda: remove linking to cublasLt (#14790)
Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
2025-07-22 07:45:26 +08:00
Sigbjørn Skjæret 38d3af1b73 opencl: fix im2col when KW!=KH (#14803) 2025-07-21 13:55:10 -07:00
5 changed files with 11 additions and 6 deletions
+1 -1
View File
@@ -1612,7 +1612,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
[](common_params & params, const std::string & value) {
params.antiprompt.emplace_back(value);
}
).set_examples({LLAMA_EXAMPLE_MAIN}));
).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}));
add_opt(common_arg(
{"-sp", "--special"},
string_format("special tokens output enabled (default: %s)", params.special ? "true" : "false"),
+3 -3
View File
@@ -102,12 +102,12 @@ if (CUDAToolkit_FOUND)
if (GGML_STATIC)
if (WIN32)
# As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas CUDA::cublasLt)
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas)
else ()
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static)
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart_static CUDA::cublas_static)
endif()
else()
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas CUDA::cublasLt)
target_link_libraries(ggml-cuda PRIVATE CUDA::cudart CUDA::cublas)
endif()
if (GGML_CUDA_NO_VMM)
+1 -1
View File
@@ -31,7 +31,7 @@ kernel void kernel_im2col_f16(
src1 = (global float*)((global char*)src1 + offset1);
dst = (global half*)((global char*)dst + offsetd);
long ksize = OW * (KH > 1 ? KW : 1);
long ksize = OW * KH;
long kx = i / ksize;
long kd = kx * ksize;
long ky = (i - kd) / OW;
+1 -1
View File
@@ -31,7 +31,7 @@ kernel void kernel_im2col_f32(
src1 = (global float*)((global char*)src1 + offset1);
dst = (global float*)((global char*)dst + offsetd);
long ksize = OW * (KH > 1 ? KW : 1);
long ksize = OW * KH;
long kx = i / ksize;
long kd = kx * ksize;
long ky = (i - kd) / OW;
+5
View File
@@ -253,6 +253,7 @@ struct server_task {
defaults.sampling = params_base.sampling;
defaults.speculative = params_base.speculative;
defaults.n_keep = params_base.n_keep;
defaults.antiprompt = params_base.antiprompt;
// enabling this will output extra debug information in the HTTP responses from the server
params.verbose = params_base.verbosity > 9;
@@ -490,6 +491,10 @@ struct server_task {
}
}
}
// set reverse prompt from cli args if not set in the request
if (params.antiprompt.empty()) {
params.antiprompt = defaults.antiprompt;
}
}
{