Compare commits

...

6 Commits

Author SHA1 Message Date
Georgi Gerganov f87f7b8986 flake.lock: Update (#6402)
Flake lock file updates:

• Updated input 'nixpkgs':
    'github:NixOS/nixpkgs/44d0940ea560dee511026a53f0e2e2cde489b4d4' (2024-03-23)
  → 'github:NixOS/nixpkgs/d8fe5e6c92d0d190646fb9f1056741a229980089' (2024-03-29)

Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
2024-04-01 09:05:57 -07:00
Johannes Gäßler 33a5244806 compare-llama-bench.py: fix long hexsha args (#6424) 2024-04-01 13:30:43 +02:00
Pierrick Hymbert 226e819371 ci: server: verify deps are coherent with the commit (#6409)
* ci: server: verify deps are coherent with the commit

* ci: server: change the ref to build as now it's a pull event target
2024-04-01 12:36:40 +02:00
Georgi Gerganov c50a82ce0f readme : update hot topics 2024-03-31 11:56:30 +03:00
Pierrick Hymbert 37e7854c10 ci: bench: fix Resource not accessible by integration on PR event (#6393) 2024-03-30 12:36:07 +02:00
Mohammadreza Hendiani c342d070c6 Fedora build update (#6388)
* fixed deprecated address

* fixed deprecated address

* fixed deprecated address

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* Added 'Apache-2.0' SPDX license identifier due to 'kompute.cc' submodule licensing. Explanation of licensing method: https://docs.fedoraproject.org/en-US/legal/spdx/#_and_expressions

* reverted back to only the MIT license
2024-03-29 22:59:56 +01:00
8 changed files with 46 additions and 20 deletions
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -1
View File
@@ -1,5 +1,5 @@
# SRPM for building from source and packaging an RPM for RPM-based distros.
# https://fedoraproject.org/wiki/How_to_create_an_RPM_package
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
# Built and maintained by John Boero - boeroboy@gmail.com
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
+1 -2
View File
@@ -25,7 +25,7 @@ on:
branches:
- master
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
pull_request:
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/bench.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/bench/**.*']
schedule:
@@ -143,7 +143,6 @@ jobs:
- name: Commit status
uses: Sibz/github-status-action@v1
continue-on-error: true # If not authorized on external repo
with:
authToken: ${{secrets.GITHUB_TOKEN}}
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
+34 -10
View File
@@ -4,6 +4,10 @@ name: Server
on:
workflow_dispatch: # allows manual triggering
inputs:
sha:
description: 'Commit SHA1 to build'
required: false
type: string
slow_tests:
description: 'Run slow tests'
required: true
@@ -11,12 +15,12 @@ on:
push:
branches:
- master
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
pull_request:
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
pull_request_target:
types: [opened, synchronize, reopened]
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/tests/**.*']
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
schedule:
- cron: '0 0 * * *'
- cron: '2 4 * * *'
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
@@ -44,25 +48,45 @@ jobs:
options: --cpus 4
steps:
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Dependencies
id: depends
run: |
apt-get update
apt-get -y install \
build-essential \
xxd \
git \
cmake \
python3-pip \
curl \
wget \
language-pack-en \
libcurl4-openssl-dev
- name: Clone
id: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
- name: Verify server deps
id: verify_server_deps
run: |
git config --global --add safe.directory $(realpath .)
cd examples/server
git ls-files --others --modified
git status
./deps.sh
git status
not_ignored_files="$(git ls-files --others --modified)"
echo "Modified files: ${not_ignored_files}"
if [ -n "${not_ignored_files}" ]; then
echo "Repository is dirty or server deps are not built as expected"
echo "${not_ignored_files}"
exit 1
fi
- name: Build
id: cmake_build
run: |
+1 -1
View File
@@ -18,12 +18,12 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
### Hot topics
- Model sharding instructions using `gguf-split` https://github.com/ggerganov/llama.cpp/discussions/6404
- Fix major bug in Metal batched inference https://github.com/ggerganov/llama.cpp/pull/6225
- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017
- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981
- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
- Support loading sharded model, using `gguf-split` CLI https://github.com/ggerganov/llama.cpp/pull/6187
----
Generated
+3 -3
View File
@@ -20,11 +20,11 @@
},
"nixpkgs": {
"locked": {
"lastModified": 1711163522,
"narHash": "sha256-YN/Ciidm+A0fmJPWlHBGvVkcarYWSC+s3NTPk/P+q3c=",
"lastModified": 1711703276,
"narHash": "sha256-iMUFArF0WCatKK6RzfUJknjem0H9m4KgorO/p3Dopkk=",
"owner": "NixOS",
"repo": "nixpkgs",
"rev": "44d0940ea560dee511026a53f0e2e2cde489b4d4",
"rev": "d8fe5e6c92d0d190646fb9f1056741a229980089",
"type": "github"
},
"original": {
+4 -1
View File
@@ -178,6 +178,9 @@ def get_commit_hexsha8(name):
for t in repo.tags:
if t.name == name:
return t.commit.hexsha[:8]
for c in repo.iter_commits("--all"):
if c.hexsha[:8] == name[:8]:
return c.hexsha[:8]
return None
@@ -224,7 +227,7 @@ if known_args.compare is not None:
hexsha8_compare = get_commit_hexsha8(known_args.compare)
name_compare = known_args.compare
if hexsha8_compare is None:
print(f"ERROR: cannot find data for baseline={known_args.compare}.")
print(f"ERROR: cannot find data for compare={known_args.compare}.")
sys.exit(1)
# Otherwise, search for the commit for llama-bench was most recently run
# and that is not a parent of master: