Compare commits

..

5 Commits

Author SHA1 Message Date
Giuseppe Scrivano 1eeb523c3e vulkan: optimize UMA buffer operations and fix driver hangs (#16059)
* vulkan: optimize UMA buffer operations and fix driver hangs

The previous implementation was blocking the GPU for extended periods,
causing the i915 driver to reset the context due to the hangcheck
protection.

[32628.443070] i915 0000:00:02.0: [drm] GPU HANG: ecode 12:1:85dffffb, in llama-server [194114]
[32628.443091] i915 0000:00:02.0: [drm] llama-server[194114] context reset due to GPU hang

* vulkan: implement deferred_memset on UMA

---------

Signed-off-by: Giuseppe Scrivano <gscrivan@redhat.com>
2025-09-21 08:31:55 +02:00
Jeff Bolz 5bb4a3edec vulkan: fix validation error about VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR (#16086) 2025-09-21 08:23:37 +02:00
Georgi Gerganov 7f766929ca sync : ggml 2025-09-20 13:02:14 +03:00
Daniel Bevenius 405921dcef ggml : introduce semantic versioning (ggml/1336)
* ggml : introduce semantic versioning

This commit introduces semantic versioning for the GGML library.

The motivation for this is that the current versioning, using build
numbers, makes it difficult to track changes and releases for projects
that use ggml.

The release steps are the following:
1. Sync the changes from llama.cpp using sync-llama-am.sh and after the
   PR has been approved and merged move to step 2.
2. Run scripts/release.sh and specify the type of release, major, minor,
   or patch. This script will handle incrementing the version
   (major|minor|patch), create a new commit with the version change,
   create a tag for the version, and prepare for the next development
   iteration.
3. Inspect the commits/tag and push to master. This will trigger the
   github release workflow which is triggered for new tags which will
   then publish a new release on github.

Example usage:
```console
$ ./scripts/release.sh major --dry-run
[dry-run] - No changes will be made

Step 1: Reading current version...
Current version: 0.9.0-dev
New release version: 1.0.0

Step 2: Updating version in ggml/CMakeLists.txt...
  [dry-run] Would update GGML_VERSION_MAJOR to 1
  [dry-run] Would update GGML_VERSION_MINOR to 0
  [dry-run] Would update GGML_VERSION_PATCH to 0
  [dry-run] Would remove -dev suffix

Step 3: Committing version bump...
  [dry-run] Would commit: 'ggml : bump version to 1.0.0'

Step 4: Creating git tag...
  [dry-run] Would create tag: v1.0.0 with message 'Release version 1.0.0'

Step 5: Preparing for next development cycle...
  [dry-run] Would update GGML_VERSION_MINOR to 1
  [dry-run] Would add -dev suffix back

Step 6: Committing development version...
  [dry-run] Would commit: 'ggml : prepare for development of 1.1.0-dev'

[dry-run] Summary (no changes were made):
  • Would have released version: 1.0.0
  • Would have created tag: v1.0.0
  • Would have set next development version: 1.1.0-dev
```

Refs: https://github.com/ggml-org/ggml/issues/1333

* ggml: create branch for release candidate and check master

* ggml : sign the git tag
2025-09-20 13:02:14 +03:00
Gregor Jasny fa6383ca7e CUDA : conditionally add cuda architectures (ggml/1341) 2025-09-20 13:02:14 +03:00
4 changed files with 87 additions and 26 deletions
+37 -21
View File
@@ -1,5 +1,41 @@
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
project("ggml" C CXX ASM)
### GGML Version
set(GGML_VERSION_MAJOR 0)
set(GGML_VERSION_MINOR 9)
set(GGML_VERSION_PATCH 0)
set(GGML_VERSION_DEV "-dev") # "-dev" for development, "" for releases
set(GGML_VERSION_BASE "${GGML_VERSION_MAJOR}.${GGML_VERSION_MINOR}.${GGML_VERSION_PATCH}")
find_program(GIT_EXE NAMES git git.exe NO_CMAKE_FIND_ROOT_PATH)
if(GIT_EXE)
# Get current git commit hash
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GGML_BUILD_COMMIT
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
# Check if the working directory is dirty (i.e., has uncommitted changes)
execute_process(COMMAND ${GIT_EXE} diff-index --quiet HEAD -- .
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
RESULT_VARIABLE GGML_GIT_DIRTY
ERROR_QUIET
)
endif()
# Build the version string with optional -dev suffix and dirty flag
set(GGML_VERSION "${GGML_VERSION_BASE}${GGML_VERSION_DEV}")
if(GGML_GIT_DIRTY AND NOT GGML_GIT_DIRTY EQUAL 0)
set(GGML_VERSION "${GGML_VERSION}-dirty")
endif()
if(NOT GGML_BUILD_COMMIT)
set(GGML_BUILD_COMMIT "unknown")
endif()
include(CheckIncludeFileCXX)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
@@ -300,26 +336,6 @@ endif()
# Create CMake package
#
# Generate version info based on git commit.
if(NOT DEFINED GGML_BUILD_NUMBER)
find_program(GIT_EXE NAMES git git.exe REQUIRED NO_CMAKE_FIND_ROOT_PATH)
execute_process(COMMAND ${GIT_EXE} rev-list --count HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GGML_BUILD_NUMBER
OUTPUT_STRIP_TRAILING_WHITESPACE
)
if(GGML_BUILD_NUMBER EQUAL 1)
message(WARNING "GGML build version fixed at 1 likely due to a shallow clone.")
endif()
execute_process(COMMAND ${GIT_EXE} rev-parse --short HEAD
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
OUTPUT_VARIABLE GGML_BUILD_COMMIT
OUTPUT_STRIP_TRAILING_WHITESPACE
)
endif()
# Capture variables prefixed with GGML_.
@@ -348,7 +364,7 @@ set(GGML_VARIABLES_EXPANDED ${variable_set_statements})
# Create the CMake package and set install location.
set(GGML_INSTALL_VERSION 0.0.${GGML_BUILD_NUMBER})
set(GGML_INSTALL_VERSION ${GGML_VERSION})
set(GGML_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
set(GGML_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
set(GGML_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
+7 -3
View File
@@ -25,10 +25,14 @@ if (CUDAToolkit_FOUND)
if (GGML_NATIVE AND CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.6" AND CMAKE_VERSION VERSION_GREATER_EQUAL "3.24")
set(CMAKE_CUDA_ARCHITECTURES "native")
else()
if (CUDAToolkit_VERSION VERSION_LESS "13")
list(APPEND CMAKE_CUDA_ARCHITECTURES 50-virtual 61-virtual 70-virtual)
endif ()
list(APPEND CMAKE_CUDA_ARCHITECTURES 75-virtual 80-virtual 86-real)
if (CUDAToolkit_VERSION VERSION_GREATER_EQUAL "11.8")
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real;89-real")
else()
set(CMAKE_CUDA_ARCHITECTURES "50-virtual;61-virtual;70-virtual;75-virtual;80-virtual;86-real")
list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
endif()
endif()
endif()
+42 -1
View File
@@ -1185,6 +1185,14 @@ struct vk_staging_memcpy {
size_t n;
};
struct vk_staging_memset {
vk_staging_memset(void * _dst, uint32_t _val, size_t _n) : dst(_dst), val(_val), n(_n) {}
void * dst;
uint32_t val;
size_t n;
};
struct vk_context_struct {
vk_submission * s;
std::vector<vk_sequence> seqs;
@@ -1193,6 +1201,7 @@ struct vk_context_struct {
std::vector<vk_staging_memcpy> in_memcpys;
std::vector<vk_staging_memcpy> out_memcpys;
std::vector<vk_staging_memset> memsets;
vk_command_pool * p {};
};
@@ -1584,7 +1593,9 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
}
vk::ComputePipelineCreateInfo compute_pipeline_create_info(
vk::PipelineCreateFlags{},
device->pipeline_executable_properties_support ?
vk::PipelineCreateFlagBits::eCaptureStatisticsKHR :
vk::PipelineCreateFlags{},
pipeline_shader_create_info,
pipeline->layout);
@@ -5194,6 +5205,14 @@ static void deferred_memcpy(void * dst, const void * src, size_t size, std::vect
}
}
static void deferred_memset(void * dst, uint32_t val, size_t size, std::vector<vk_staging_memset>* memsets = nullptr) {
if (memsets == nullptr) {
memset(dst, val, size);
} else {
memsets->emplace_back(dst, val, size);
}
}
static void ggml_vk_ensure_sync_staging_buffer(vk_device& device, size_t size) {
if (device->sync_staging == nullptr || device->sync_staging->size < size) {
VK_LOG_MEMORY("ggml_vk_ensure_sync_staging_buffer(" << size << ")");
@@ -5389,6 +5408,10 @@ static void ggml_vk_buffer_write_2d(vk_buffer& dst, size_t offset, const void *
memcpy(cpy.dst, cpy.src, cpy.n);
}
for (auto& mset : subctx->memsets) {
memset(mset.dst, mset.val, mset.n);
}
ggml_vk_submit(subctx, dst->device->fence);
VK_CHECK(dst->device->device.waitForFences({ dst->device->fence }, true, UINT64_MAX), "vk_buffer_write_2d waitForFences");
dst->device->device.resetFences({ dst->device->fence });
@@ -5528,12 +5551,25 @@ static void ggml_vk_buffer_copy(vk_buffer& dst, size_t dst_offset, vk_buffer& sr
static void ggml_vk_buffer_memset_async(vk_context& ctx, vk_buffer& dst, size_t offset, uint32_t c, size_t size) {
VK_LOG_DEBUG("ggml_vk_buffer_memset_async(" << offset << ", " << c << ", " << size << ")");
if (dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible &&
dst->device->uma) {
deferred_memset((uint8_t*)dst->ptr + offset, c, size, &ctx->memsets);
return;
}
// Fall back to GPU fillBuffer for non-UMA or non-host-visible buffers
ctx->s->buffer.fillBuffer(dst->buffer, offset, size, c);
}
static void ggml_vk_buffer_memset(vk_buffer& dst, size_t offset, uint32_t c, size_t size) {
VK_LOG_DEBUG("ggml_vk_buffer_memset(" << offset << ", " << c << ", " << size << ")");
if (dst->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible &&
dst->device->uma) {
memset((uint8_t*)dst->ptr + offset, c, size);
return;
}
std::lock_guard<std::recursive_mutex> guard(dst->device->mutex);
vk_context subctx = ggml_vk_create_temporary_context(dst->device->transfer_queue.cmd_pool);
ggml_vk_ctx_begin(dst->device, subctx);
@@ -11168,6 +11204,10 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
memcpy(cpy.dst, cpy.src, cpy.n);
}
for (auto& mset : subctx->memsets) {
memset(mset.dst, mset.val, mset.n);
}
if (almost_ready && !ctx->almost_ready_fence_pending && !use_fence) {
ggml_vk_submit(subctx, ctx->almost_ready_fence);
ctx->almost_ready_fence_pending = true;
@@ -11190,6 +11230,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
}
subctx->in_memcpys.clear();
subctx->out_memcpys.clear();
subctx->memsets.clear();
}
return true;
+1 -1
View File
@@ -1 +1 @@
323951f1bdcdfbd5b5ff3a9a7c3770e63b1a560e
978f6e1993f2eeb4e99b63d4e70b4401c0a2dae2