Files
llama.cpp/tests/CMakeLists.txt
Pascal 26021699bc ggml : add GGML_OP_COL2IM_1D (#24206)
* cpu: add GGML_OP_COL2IM_1D

Add the overlap-add (scatter-add) step of a 1D transposed convolution.
A ConvTranspose1d factorizes as a GEMM followed by col2im: a weight
pre-permuted to [IC, K*OC] is contracted against the [IC, T_in] input
with mul_mat to produce a column matrix [K*OC, T_in], and col2im_1d
scatters those columns back into the [T_out, OC] signal, with
T_out = (T_in - 1)*s0 + K - 2*p0.

Keeping the contraction as a plain mul_mat leaves the heavy work on the
optimized (and quantizable) matmul kernels, so col2im_1d only does the
cheap overlap-add.

CPU uses a gather formulation parallelized over output channels,
supporting F32, F16 and BF16 with an F32 accumulator.

* tests: add backend coverage for GGML_OP_COL2IM_1D

Add test_col2im_1d next to the conv_transpose_1d cases, covering F32,
F16 and BF16 across eight geometries: the canonical kernel = 2*stride
DAC upsampling shape, overlap, no overlap, cropping (p0 = 1 and
p0 = stride/2), kernel < stride with zeroed gaps, kernel not a
multiple of stride, and a single column unfold.

Perf mode gets three real vocoder stage shapes reporting memory
bandwidth. max_nmse_err relaxes to 5e-4 for F16 and BF16.

* cpu: harden GGML_OP_COL2IM_1D

ggml_col2im_1d validates s0, oc, p0 and input contiguity at graph
build time, before the oc division, protecting every backend at once.
The kernel asserts the contiguity its flat indexing assumes and its
doc states the full output length including the crop term.

The kernel parallelizes over the time axis: the split stays balanced
down to OC = 1, where the previous channel split was single threaded.
Values are bit identical on the three real vocoder chains, two out of
three improve.

* tests: extend the GGML_OP_COL2IM_1D grid

The eval grid grows to eleven geometries: OC = 1 (mono output stage),
K = 1 with stride > 1 (sparse scatter, every gap position zeroed) and
a crop down to T_out = 2 where all the gather bounds act at once.

* tests: add col2im_1d equivalence test

tests/test-col2im-1d.cpp proves mul_mat + col2im_1d matches the
native ggml_conv_transpose_1d on the CPU backend, F32 bit exact, F16
and BF16 through casts of the column matrix. test-backend-ops cannot
cover this for a CPU only op since the CPU backend is its own
reference there.

* rpc: bump protocol patch version for GGML_OP_COL2IM_1D

GGML_OP_COUNT goes from 96 to 97 with the new op, which trips the
static_assert in ggml-rpc.h. Bump RPC_PROTO_PATCH_VERSION since the
op is appended and no existing op code shifts.
2026-06-09 12:01:37 +03:00

312 lines
14 KiB
CMake

llama_add_compile_flags()
function(llama_build source)
set(TEST_SOURCES ${source} ${ARGN})
if (DEFINED LLAMA_TEST_NAME)
set(TEST_TARGET ${LLAMA_TEST_NAME})
else()
get_filename_component(TEST_TARGET ${source} NAME_WE)
endif()
add_executable(${TEST_TARGET} ${TEST_SOURCES})
target_link_libraries(${TEST_TARGET} PRIVATE llama llama-common)
if (LLAMA_TESTS_INSTALL)
install(TARGETS ${TEST_TARGET} RUNTIME)
endif()
endfunction()
function(llama_test target)
include(CMakeParseArguments)
set(options)
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
set(multiValueArgs ARGS)
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED LLAMA_TEST_LABEL)
set(LLAMA_TEST_LABEL "main")
endif()
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
set(LLAMA_TEST_WORKING_DIRECTORY .)
endif()
if (DEFINED LLAMA_TEST_NAME)
set(TEST_NAME ${LLAMA_TEST_NAME})
else()
set(TEST_NAME ${target})
endif()
set(TEST_TARGET ${target})
add_test(
NAME ${TEST_NAME}
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
COMMAND $<TARGET_FILE:${TEST_TARGET}>
${LLAMA_TEST_ARGS})
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
endfunction()
function(llama_test_cmd target)
include(CMakeParseArguments)
set(options)
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
set(multiValueArgs ARGS)
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED LLAMA_TEST_LABEL)
set(LLAMA_TEST_LABEL "main")
endif()
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
set(LLAMA_TEST_WORKING_DIRECTORY .)
endif()
if (DEFINED LLAMA_TEST_NAME)
set(TEST_NAME ${LLAMA_TEST_NAME})
else()
set(TEST_NAME ${target})
endif()
add_test(
NAME ${TEST_NAME}
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
COMMAND ${target}
${LLAMA_TEST_ARGS})
set_property(TEST ${TEST_NAME} PROPERTY LABELS ${LLAMA_TEST_LABEL})
endfunction()
# Builds and runs a test source file.
# Optional args:
# - NAME: name of the executable & test target (defaults to the source file name without extension)
# - LABEL: label for the test (defaults to main)
# - ARGS: arguments to pass to the test executable
# - WORKING_DIRECTORY
function(llama_build_and_test source)
include(CMakeParseArguments)
set(options)
set(oneValueArgs NAME LABEL WORKING_DIRECTORY)
set(multiValueArgs ARGS)
cmake_parse_arguments(LLAMA_TEST "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
set(TEST_SOURCES ${source} ${LLAMA_TEST_UNPARSED_ARGUMENTS} get-model.cpp)
if (NOT DEFINED LLAMA_TEST_LABEL)
set(LLAMA_TEST_LABEL "main")
endif()
if (NOT DEFINED LLAMA_TEST_WORKING_DIRECTORY)
set(LLAMA_TEST_WORKING_DIRECTORY .)
endif()
if (DEFINED LLAMA_TEST_NAME)
set(TEST_TARGET ${LLAMA_TEST_NAME})
else()
get_filename_component(TEST_TARGET ${source} NAME_WE)
endif()
add_executable(${TEST_TARGET} ${TEST_SOURCES})
if (LLAMA_TESTS_INSTALL)
install(TARGETS ${TEST_TARGET} RUNTIME)
endif()
target_link_libraries(${TEST_TARGET} PRIVATE llama-common)
add_test(
NAME ${TEST_TARGET}
WORKING_DIRECTORY ${LLAMA_TEST_WORKING_DIRECTORY}
COMMAND $<TARGET_FILE:${TEST_TARGET}>
${LLAMA_TEST_ARGS})
set_property(TEST ${TEST_TARGET} PROPERTY LABELS ${LLAMA_TEST_LABEL})
endfunction()
# build test-tokenizer-0 target once and add many tests
llama_build(test-tokenizer-0.cpp)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-bert-bge ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-bert-bge.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-command-r ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-command-r.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-coder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-coder.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-deepseek-llm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-deepseek-llm.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gemma-4 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gemma-4.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-mpt ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-mpt.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-phi-3 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-phi-3.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-qwen2.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-qwen35 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-qwen35.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-refact ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-refact.gguf)
llama_test(test-tokenizer-0 NAME test-tokenizer-0-starcoder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-starcoder.gguf)
if (NOT WIN32)
llama_test_cmd(
${CMAKE_CURRENT_SOURCE_DIR}/test-tokenizers-repo.sh
NAME test-tokenizers-ggml-vocabs
WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}
ARGS https://huggingface.co/ggml-org/vocabs ${PROJECT_SOURCE_DIR}/models/ggml-vocabs
)
endif()
if (LLAMA_LLGUIDANCE)
llama_build_and_test(test-grammar-llguidance.cpp ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf)
endif ()
if (NOT WIN32 OR NOT BUILD_SHARED_LIBS)
# these tests are disabled on Windows because they use internal functions not exported with LLAMA_API (when building with shared libraries)
llama_build_and_test(test-sampling.cpp)
llama_build_and_test(test-reasoning-budget.cpp)
llama_build_and_test(test-grammar-parser.cpp)
llama_build_and_test(test-grammar-integration.cpp)
llama_build_and_test(test-llama-grammar.cpp)
llama_build_and_test(test-chat.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
target_include_directories(test-chat PRIVATE ${PROJECT_SOURCE_DIR}/tools/server)
target_link_libraries(test-chat PRIVATE server-context)
# TODO: disabled on loongarch64 because the ggml-ci node lacks Python 3.8
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64")
llama_build_and_test(test-json-schema-to-grammar.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
target_include_directories(test-json-schema-to-grammar PRIVATE ${PROJECT_SOURCE_DIR}/tools/server)
endif()
if (NOT GGML_BACKEND_DL)
llama_build(test-quantize-stats.cpp)
endif()
llama_build(test-gbnf-validator.cpp)
# build test-tokenizer-1-bpe target once and add many tests
llama_build(test-tokenizer-1-bpe.cpp)
# TODO: disabled due to slowness
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-aquila ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-aquila.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-falcon ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-falcon.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-2 ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-2.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-gpt-neox ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-gpt-neox.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-llama-bpe ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-bpe.gguf --ignore-merges)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-mpt ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-mpt.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-refact ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-refact.gguf)
#llama_test(test-tokenizer-1-bpe NAME test-tokenizer-1-starcoder ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-starcoder.gguf)
# build test-tokenizer-1-spm target once and add many tests
llama_build(test-tokenizer-1-spm.cpp)
llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-llama-spm ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-llama-spm.gguf)
#llama_test(test-tokenizer-1-spm NAME test-tokenizer-1-baichuan ARGS ${PROJECT_SOURCE_DIR}/models/ggml-vocab-baichuan.gguf)
# llama_build_and_test(test-double-float.cpp) # SLOW
llama_build_and_test(test-llama-archs.cpp)
endif()
llama_build_and_test(test-chat-peg-parser.cpp peg-parser/simple-tokenize.cpp)
llama_build_and_test(test-jinja.cpp)
llama_test(test-jinja NAME test-jinja-py ARGS -py LABEL python)
llama_build_and_test(test-chat-auto-parser.cpp WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
llama_build_and_test(test-chat-template.cpp)
llama_build_and_test(test-json-partial.cpp)
llama_build_and_test(test-log.cpp)
llama_build_and_test(
test-peg-parser.cpp
peg-parser/simple-tokenize.cpp
peg-parser/test-basic.cpp
peg-parser/test-gbnf-generation.cpp
peg-parser/test-json-parser.cpp
peg-parser/test-json-serialization.cpp
peg-parser/test-python-dict-parser.cpp
peg-parser/test-unicode.cpp
peg-parser/tests.h
)
llama_build_and_test(test-regex-partial.cpp)
if (NOT ${CMAKE_SYSTEM_PROCESSOR} MATCHES "s390x")
set(MODEL_NAME "tinyllamas/stories15M-q4_0.gguf")
set(MODEL_HASH "SHA256=66967fbece6dbe97886593fdbb73589584927e29119ec31f08090732d1861739")
else()
set(MODEL_NAME "tinyllamas/stories15M-be.Q4_0.gguf")
set(MODEL_HASH "SHA256=9aec857937849d976f30397e97eb1cabb53eb9dcb1ce4611ba8247fb5f44c65d")
endif()
set(MODEL_DEST "${CMAKE_BINARY_DIR}/${MODEL_NAME}")
add_test(NAME test-download-model COMMAND ${CMAKE_COMMAND}
-DDEST=${MODEL_DEST}
-DNAME=${MODEL_NAME}
-DHASH=${MODEL_HASH}
-P ${CMAKE_SOURCE_DIR}/cmake/download-models.cmake
)
set_tests_properties(test-download-model PROPERTIES FIXTURES_SETUP test-download-model)
llama_build_and_test(test-thread-safety.cpp ARGS -m "${MODEL_DEST}" -ngl 99 -p "The meaning of life is" -n 128 -c 256 -ub 32 -np 4 -t 2)
set_tests_properties(test-thread-safety PROPERTIES FIXTURES_REQUIRED test-download-model)
llama_build_and_test(test-arg-parser.cpp)
if (NOT LLAMA_SANITIZE_ADDRESS AND NOT GGML_SCHED_NO_REALLOC)
# TODO: repair known memory leaks
llama_build_and_test(test-opt.cpp)
endif()
llama_build_and_test(test-gguf.cpp)
llama_build_and_test(test-backend-ops.cpp)
llama_build_and_test(test-model-load-cancel.cpp LABEL "model")
llama_build_and_test(test-autorelease.cpp LABEL "model")
llama_build_and_test(test-backend-sampler.cpp LABEL "model")
# Test for state restore with fragmented KV cache
# Requires a model, uses same args pattern as test-thread-safety
llama_build_and_test(test-state-restore-fragmented.cpp LABEL "model" ARGS -m "${MODEL_DEST}")
set_tests_properties(test-state-restore-fragmented PROPERTIES FIXTURES_REQUIRED test-download-model)
llama_build_and_test(test-recurrent-state-rollback.cpp LABEL "model" ARGS -m "${MODEL_DEST}")
set_tests_properties(test-recurrent-state-rollback PROPERTIES FIXTURES_REQUIRED test-download-model)
# Test state save/load functionality
llama_build_and_test(test-save-load-state.cpp LABEL "model" ARGS -m "${MODEL_DEST}")
set_tests_properties(test-save-load-state PROPERTIES FIXTURES_REQUIRED test-download-model)
if (NOT GGML_BACKEND_DL)
# these tests use the backends directly and cannot be built with dynamic loading
llama_build_and_test(test-barrier.cpp)
llama_build_and_test(test-quantize-fns.cpp)
llama_build_and_test(test-quantize-perf.cpp)
llama_build_and_test(test-rope.cpp)
llama_build_and_test(test-col2im-1d.cpp)
endif()
# libmtmd
set(LLAMA_TEST_NAME test-mtmd-c-api)
llama_build_and_test(test-mtmd-c-api.c)
target_link_libraries(${LLAMA_TEST_NAME} PRIVATE mtmd)
unset(LLAMA_TEST_NAME)
# GGUF model data fetcher library for tests that need real model metadata
# Only compile when cpp-httplib has SSL support (CPPHTTPLIB_OPENSSL_SUPPORT)
if (TARGET cpp-httplib)
get_target_property(_cpp_httplib_defs cpp-httplib INTERFACE_COMPILE_DEFINITIONS)
if (_cpp_httplib_defs MATCHES "CPPHTTPLIB_OPENSSL_SUPPORT")
add_library(gguf-model-data STATIC gguf-model-data.cpp)
target_link_libraries(gguf-model-data PRIVATE llama-common cpp-httplib)
target_include_directories(gguf-model-data PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})
add_executable(test-gguf-model-data test-gguf-model-data.cpp)
target_link_libraries(test-gguf-model-data PRIVATE gguf-model-data llama-common)
llama_test(test-gguf-model-data LABEL "model")
# test-quant-type-selection requires gguf-model-data for remote model metadata
llama_build_and_test(test-quant-type-selection.cpp LABEL "model")
target_link_libraries(test-quant-type-selection PRIVATE gguf-model-data)
target_compile_definitions(test-quant-type-selection PRIVATE
SNAPSHOT_DIR="${CMAKE_CURRENT_SOURCE_DIR}/snapshots")
endif()
endif()
# dummy executable - not installed
get_filename_component(TEST_TARGET test-c.c NAME_WE)
add_executable(${TEST_TARGET} test-c.c)
target_link_libraries(${TEST_TARGET} PRIVATE llama)
llama_build_and_test(test-alloc.cpp)
target_include_directories(test-alloc PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
llama_build(export-graph-ops.cpp)
target_include_directories(export-graph-ops PRIVATE ${PROJECT_SOURCE_DIR}/ggml/src)
if (TARGET gguf-model-data)
target_link_libraries(export-graph-ops PRIVATE gguf-model-data)
target_compile_definitions(export-graph-ops PRIVATE LLAMA_HF_FETCH)
endif()