ggml-webgpu: Add clang-format job (#24308)

* Add clang-format job

* try local formatting
This commit is contained in:
Reese Levine
2026-06-08 20:54:24 -07:00
committed by GitHub
parent 1e1aca09da
commit 3ac3c20c96
3 changed files with 37 additions and 11 deletions
+23
View File
@@ -35,6 +35,29 @@ env:
LLAMA_ARG_LOG_TIMESTAMPS: 1 LLAMA_ARG_LOG_TIMESTAMPS: 1
jobs: jobs:
format:
runs-on: ubuntu-24.04
steps:
- name: Clone
uses: actions/checkout@v6
- name: Install clang-format 22
run: |
wget -qO- https://apt.llvm.org/llvm-snapshot.gpg.key |
sudo tee /etc/apt/trusted.gpg.d/apt.llvm.org.asc > /dev/null
sudo add-apt-repository -y \
"deb http://apt.llvm.org/noble/ llvm-toolchain-noble-22 main"
sudo apt-get update
sudo apt-get install -y clang-format-22
- name: Check formatting
run: |
find ggml/src/ggml-webgpu \
-type f \( -name '*.cpp' -o -name '*.hpp' -o -name '*.h' \) \
-print0 |
xargs -0 clang-format-22 --dry-run --Werror
macos: macos:
runs-on: macos-latest runs-on: macos-latest
@@ -644,7 +644,8 @@ inline size_t ggml_webgpu_flash_attn_tensor_offset(const ggml_tensor * tensor) {
inline bool ggml_webgpu_flash_attn_float_vec4_aligned(const ggml_tensor * K, size_t storage_offset_alignment) { inline bool ggml_webgpu_flash_attn_float_vec4_aligned(const ggml_tensor * K, size_t storage_offset_alignment) {
const uint32_t offset_elems = const uint32_t offset_elems =
(uint32_t) ((ggml_webgpu_flash_attn_tensor_offset(K) & (storage_offset_alignment - 1)) / ggml_type_size(K->type)); (uint32_t) ((ggml_webgpu_flash_attn_tensor_offset(K) & (storage_offset_alignment - 1)) /
ggml_type_size(K->type));
return offset_elems % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0u; return offset_elems % GGML_WEBGPU_FLASH_ATTN_TILE_KV_VEC_WIDTH == 0u;
} }
@@ -655,8 +656,10 @@ inline bool ggml_webgpu_flash_attn_float_vec4_aligned(const ggml_tensor * K,
ggml_webgpu_flash_attn_float_vec4_aligned(V, storage_offset_alignment); ggml_webgpu_flash_attn_float_vec4_aligned(V, storage_offset_alignment);
} }
inline bool ggml_webgpu_flash_attn_kv_direct( inline bool ggml_webgpu_flash_attn_kv_direct(const ggml_tensor * Q,
const ggml_tensor * Q, const ggml_tensor * K, const ggml_tensor * V, uint32_t kv_direct_align) { const ggml_tensor * K,
const ggml_tensor * V,
uint32_t kv_direct_align) {
return K->type == GGML_TYPE_F16 && V->type == GGML_TYPE_F16 && (Q->ne[0] % kv_direct_align == 0) && return K->type == GGML_TYPE_F16 && V->type == GGML_TYPE_F16 && (Q->ne[0] % kv_direct_align == 0) &&
(K->ne[1] % GGML_WEBGPU_KV_SEQ_PAD == 0); (K->ne[1] % GGML_WEBGPU_KV_SEQ_PAD == 0);
} }
@@ -671,10 +674,10 @@ inline ggml_webgpu_flash_attn_common_pipeline_key ggml_webgpu_flash_attn_make_co
key.dst_type = context.dst->type; key.dst_type = context.dst->type;
key.head_dim_qk = (uint32_t) context.src0->ne[0]; key.head_dim_qk = (uint32_t) context.src0->ne[0];
key.head_dim_v = (uint32_t) context.src2->ne[0]; key.head_dim_v = (uint32_t) context.src2->ne[0];
key.kv_direct = ggml_webgpu_flash_attn_kv_direct(context.src0, context.src1, context.src2, kv_direct_align); key.kv_direct = ggml_webgpu_flash_attn_kv_direct(context.src0, context.src1, context.src2, kv_direct_align);
key.kv_overlap = ggml_webgpu_tensor_overlap(context.src1, context.src2); key.kv_overlap = ggml_webgpu_tensor_overlap(context.src1, context.src2);
key.has_mask = context.src3 != nullptr; key.has_mask = context.src3 != nullptr;
key.has_sinks = context.src4 != nullptr; key.has_sinks = context.src4 != nullptr;
key.uses_logit_softcap = ggml_get_op_params_f32(context.dst, 2) != 0.0f; key.uses_logit_softcap = ggml_get_op_params_f32(context.dst, 2) != 0.0f;
return key; return key;
} }
@@ -1727,7 +1730,7 @@ class ggml_webgpu_shader_lib {
key.type = context.dst->type; key.type = context.dst->type;
key.d_state = (int) context.src0->ne[0]; key.d_state = (int) context.src0->ne[0];
key.xbc_overlap = ggml_webgpu_tensor_overlap(context.src1, context.src4) && key.xbc_overlap = ggml_webgpu_tensor_overlap(context.src1, context.src4) &&
ggml_webgpu_tensor_overlap(context.src1, context.src5); ggml_webgpu_tensor_overlap(context.src1, context.src5);
auto it = ssm_scan_pipelines.find(key); auto it = ssm_scan_pipelines.find(key);
if (it != ssm_scan_pipelines.end()) { if (it != ssm_scan_pipelines.end()) {
+3 -3
View File
@@ -4253,9 +4253,9 @@ static bool ggml_backend_webgpu_device_supports_op(ggml_backend_dev_t dev, const
const uint32_t q_tile = const uint32_t q_tile =
use_subgroup_matrix ? capabilities.sg_mat_m : GGML_WEBGPU_FLASH_ATTN_TILE_Q_TILE; use_subgroup_matrix ? capabilities.sg_mat_m : GGML_WEBGPU_FLASH_ATTN_TILE_Q_TILE;
const uint32_t kv_granularity = use_subgroup_matrix ? capabilities.sg_mat_n : 1u; const uint32_t kv_granularity = use_subgroup_matrix ? capabilities.sg_mat_n : 1u;
const bool kv_direct = use_subgroup_matrix ? const bool kv_direct = use_subgroup_matrix ?
ggml_webgpu_flash_attn_kv_direct(src0, src1, src2, capabilities.sg_mat_k) : ggml_webgpu_flash_attn_kv_direct(src0, src1, src2, capabilities.sg_mat_k) :
false; false;
const uint32_t max_kv_tile = ggml_webgpu_flash_attn_max_kv_tile( const uint32_t max_kv_tile = ggml_webgpu_flash_attn_max_kv_tile(
capabilities.limits.maxComputeWorkgroupStorageSize, q_tile, kv_granularity, (uint32_t) src0->ne[0], capabilities.limits.maxComputeWorkgroupStorageSize, q_tile, kv_granularity, (uint32_t) src0->ne[0],
(uint32_t) src2->ne[0], op->src[3] != nullptr, kv_direct); (uint32_t) src2->ne[0], op->src[3] != nullptr, kv_direct);