Compare commits

...

7 Commits

Author SHA1 Message Date
Meng, Hengyu 52604860f9 [SYCL] Disable iqx on windows as WA (#6435)
* disable iqx on windows as WA

* array instead of global_memory
2024-04-03 10:34:40 +08:00
Georgi Gerganov f87f7b8986 flake.lock: Update (#6402)
Flake lock file updates:

• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/44d0940ea560dee511026a53f0e2e2cde489b4d4' (2024-03-23)
  → 'github:NixOS/nixpkgs/d8fe5e6c92d0d190646fb9f1056741a229980089' (2024-03-29)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-04-01 09:05:57 -07:00
Johannes Gäßler 33a5244806 compare-llama-bench.py: fix long hexsha args (#6424) 2024-04-01 13:30:43 +02:00
Pierrick Hymbert 226e819371 ci: server: verify deps are coherent with the commit (#6409)
* ci: server: verify deps are coherent with the commit

* ci: server: change the ref to build as now it's a pull event target
2024-04-01 12:36:40 +02:00
Georgi Gerganov c50a82ce0f readme : update hot topics 2024-03-31 11:56:30 +03:00
Pierrick Hymbert 37e7854c10 ci: bench: fix Resource not accessible by integration on PR event (#6393) 2024-03-30 12:36:07 +02:00
Mohammadreza Hendiani c342d070c6 Fedora build update (#6388)
* fixed deprecated address

* fixed deprecated address

* fixed deprecated address

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* reverted back to only the MIT license
2024-03-29 22:59:56 +01:00
10 changed files with 87 additions and 136 deletions
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -2
View File
@@ -25,7 +25,7 @@ on:
branches:
- master
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
pull_request:
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
schedule:
@@ -143,7 +143,6 @@ jobs:
- name: Commit status
uses: Sibz/github-status-action@v1
continue-on-error: true # If not authorized on external repo
with:
authToken: ${{secrets.GITHUB_TOKEN}}
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
+34 -10
View File
@@ -4,6 +4,10 @@ name: Server
on:
workflow_dispatch: # allows manual triggering
inputs:
sha:
description: 'Commit SHA1 to build'
required: false
type: string
slow_tests:
description: 'Run slow tests'
required: true
@@ -11,12 +15,12 @@ on:
push:
branches:
- master
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
pull_request:
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
schedule:
- cron: '0 0 * * *'
- cron: '2 4 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -44,25 +48,45 @@ jobs:
options: --cpus 4
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Dependencies
id: depends
run: |
apt-get update
apt-get -y install \
build-essential \
xxd \
git \
cmake \
python3-pip \
curl \
wget \
language-pack-en \
libcurl4-openssl-dev
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Verify server deps
id: verify_server_deps
run: |
git config --global --add safe.directory $(realpath .)
cd examples/server
git ls-files --others --modified
git status
./deps.sh
git status
not_ignored_files="$(git ls-files --others --modified)"
echo "Modified files: ${not_ignored_files}"
if [ -n "${not_ignored_files}" ]; then
echo "Repository is dirty or server deps are not built as expected"
echo "${not_ignored_files}"
exit 1
fi
- name: Build
id: cmake_build
run: |
+1 -1
View File
@@ -18,12 +18,12 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
### Hot topics
- Model sharding instructions using `gguf-split` https://github.com/ggerganov/llama.cpp/discussions/6404
- Fix major bug in Metal batched inference https://github.com/ggerganov/llama.cpp/pull/6225
- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017
- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981
- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
- Support loading sharded model, using `gguf-split` CLI https://github.com/ggerganov/llama.cpp/pull/6187
----
Generated
+3 -3
View File
@@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1711163522,
"narHash": "sha256-YN/Ciidm+A0fmJPWlHBGvVkcarYWSC+s3NTPk/P+q3c=",
"lastModified": 1711703276,
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "44d0940ea560dee511026a53f0e2e2cde489b4d4",
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
"type": "github"
},
"original": {
+3 -2
View File
@@ -447,10 +447,11 @@ static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_
#define GGML_COMMON_IMPL
#elif defined(GGML_COMMON_IMPL_SYCL)
#include <cstdint>
#define GGML_TABLE_BEGIN(type, name, size) static dpct::global_memory<const type, 1> name(sycl::range<1>(size), {
#define GGML_TABLE_END() });
#define GGML_TABLE_BEGIN(type, name, size) static const type name[size] = {
#define GGML_TABLE_END() };
#define GGML_COMMON_IMPL
#endif
+38 -114
View File
@@ -8079,7 +8079,7 @@ template <bool need_check> static void
template <int qk, int qi, typename block_q_t, int vdr, vec_dot_q_sycl_t vec_dot_q_sycl>
static void mul_mat_vec_q(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
const sycl::nd_item<3> &item_ct1,
const uint32_t *iq3xxs_grid_ptr, const uint64_t *ksigns64_ptr) {
const uint32_t *iq3xxs_grid_ptr=nullptr, const uint64_t *ksigns64_ptr=nullptr) {
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
item_ct1.get_local_id(1);
@@ -9956,17 +9956,14 @@ static void dequantize_row_iq2_xxs_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq2xxs_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
@@ -9985,17 +9982,14 @@ static void dequantize_row_iq2_xs_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq2xs_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
@@ -10014,17 +10008,14 @@ static void dequantize_row_iq3_xxs_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq3xxs_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
@@ -10043,17 +10034,14 @@ static void dequantize_row_iq3_s_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq3s_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
@@ -10072,17 +10060,14 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
dpct::queue_ptr stream) {
const int nb = k / QK_K;
{
iq1s_grid_gpu.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
dpct::has_capability_or_fail(stream->get_device(),
{sycl::aspect::fp16});
stream->submit([&](sycl::handler &cgh) {
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(sycl::nd_range<3>(sycl::range<3>(1, 1, nb) *
sycl::range<3>(1, 1, 32),
@@ -10415,12 +10400,8 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10428,8 +10409,7 @@ static void mul_mat_vec_q4_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_0, block_q4_0,
VDR_Q4_0_Q8_1_MMVQ, vec_dot_q4_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10444,12 +10424,8 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10457,8 +10433,7 @@ static void mul_mat_vec_q4_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK4_0, QI4_1, block_q4_1,
VDR_Q4_1_Q8_1_MMVQ, vec_dot_q4_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10473,12 +10448,8 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10486,8 +10457,7 @@ static void mul_mat_vec_q5_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_0, QI5_0, block_q5_0,
VDR_Q5_0_Q8_1_MMVQ, vec_dot_q5_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10502,12 +10472,8 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10515,8 +10481,7 @@ static void mul_mat_vec_q5_1_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK5_1, QI5_1, block_q5_1,
VDR_Q5_1_Q8_1_MMVQ, vec_dot_q5_1_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10531,12 +10496,8 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10544,8 +10505,7 @@ static void mul_mat_vec_q8_0_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK8_0, QI8_0, block_q8_0,
VDR_Q8_0_Q8_1_MMVQ, vec_dot_q8_0_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10560,12 +10520,8 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10573,8 +10529,7 @@ static void mul_mat_vec_q2_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI2_K, block_q2_K,
VDR_Q2_K_Q8_1_MMVQ, vec_dot_q2_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10589,12 +10544,8 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10602,8 +10553,7 @@ static void mul_mat_vec_q3_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI3_K, block_q3_K,
VDR_Q3_K_Q8_1_MMVQ, vec_dot_q3_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10618,12 +10568,8 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10631,8 +10577,7 @@ static void mul_mat_vec_q4_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI4_K, block_q4_K,
VDR_Q4_K_Q8_1_MMVQ, vec_dot_q4_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10647,12 +10592,8 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10660,8 +10601,7 @@ static void mul_mat_vec_q5_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI5_K, block_q5_K,
VDR_Q5_K_Q8_1_MMVQ, vec_dot_q5_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
@@ -10676,12 +10616,8 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10689,13 +10625,13 @@ static void mul_mat_vec_q6_K_q8_1_sycl(const void *vx, const void *vy,
[[intel::reqd_sub_group_size(32)]] {
mul_mat_vec_q<QK_K, QI6_K, block_q6_K,
VDR_Q6_K_Q8_1_MMVQ, vec_dot_q6_K_q8_1>(
vx, vy, dst, ncols, nrows, item_ct1,
iq3xxs_grid_ptr_ct1, ksigns64_ptr_ct1);
vx, vy, dst, ncols, nrows, item_ct1);
});
});
}
}
static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
float *dst, const int ncols,
const int nrows,
@@ -10705,15 +10641,11 @@ static void mul_mat_vec_iq2_xxs_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq2xxs_grid.init(*stream);
ksigns_iq2xs.init(*stream);
kmask_iq2xs.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq2xxs_grid_ptr_ct1 = iq2xxs_grid.get_ptr();
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
auto iq2xxs_grid_ptr_ct1 = &iq2xxs_grid[0];
auto ksigns_iq2xs_ptr_ct1 = &ksigns_iq2xs[0];
auto kmask_iq2xs_ptr_ct1 = &kmask_iq2xs[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10736,12 +10668,10 @@ static void mul_mat_vec_iq2_xs_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq2xs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq2xs_grid_ptr_ct1 = iq2xs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
auto iq2xs_grid_ptr_ct1 = &iq2xs_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10764,12 +10694,10 @@ static void mul_mat_vec_iq3_xxs_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3xxs_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3xxs_grid_ptr_ct1 = iq3xxs_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
auto iq3xxs_grid_ptr_ct1 = &iq3xxs_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10792,12 +10720,10 @@ static void mul_mat_vec_iq3_s_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq3s_grid.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq3s_grid_ptr_ct1 = iq3s_grid.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
auto iq3s_grid_ptr_ct1 = &iq3s_grid[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
@@ -10820,12 +10746,10 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
const sycl::range<3> block_nums(1, 1, block_num_y);
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
{
iq1s_grid_gpu.init(*stream);
ksigns64.init(*stream);
stream->submit([&](sycl::handler &cgh) {
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
auto iq1s_grid_ptr_ct1 = &iq1s_grid_gpu[0];
auto ksigns64_ptr_ct1 = &ksigns64[0];
cgh.parallel_for(
sycl::nd_range<3>(block_nums * block_dims, block_dims),
+4 -1
View File
@@ -178,6 +178,9 @@ def get_commit_hexsha8(name):
for t in repo.tags:
if t.name == name:
return t.commit.hexsha[:8]
for c in repo.iter_commits("--all"):
if c.hexsha[:8] == name[:8]:
return c.hexsha[:8]
return None
@@ -224,7 +227,7 @@ if known_args.compare is not None:
hexsha8_compare = get_commit_hexsha8(known_args.compare)
name_compare = known_args.compare
if hexsha8_compare is None:
print(f"ERROR: cannot find data for baseline={known_args.compare}.")
print(f"ERROR: cannot find data for compare={known_args.compare}.")
sys.exit(1)
# Otherwise, search for the commit for llama-bench was most recently run
# and that is not a parent of master: