mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-17 02:57:39 +02:00
Compare commits
40 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 46acb36767 | |||
| 131b058409 | |||
| 753e36f650 | |||
| 7ce2c77f88 | |||
| aab606a11f | |||
| b0bc9f4a9d | |||
| 4755afd1cb | |||
| 6e0438da3c | |||
| 727107707a | |||
| 69ff61397d | |||
| 044ec4b2a5 | |||
| 77178eedc8 | |||
| 15a333260a | |||
| 43241adf22 | |||
| a44bc969e4 | |||
| 2c4fb69246 | |||
| 3ca23481dd | |||
| 3fe8d7a17f | |||
| 68265ebfc6 | |||
| 381da2d9f0 | |||
| 0fd6c1f015 | |||
| 19885d205e | |||
| 76a936c893 | |||
| 463628372d | |||
| f30ea47a87 | |||
| d8fd0ccf6a | |||
| b3d978600f | |||
| 99b71c068f | |||
| 306d34be7a | |||
| 8030da7afe | |||
| 184215e783 | |||
| 48358b2e5b | |||
| 5cdb371731 | |||
| 44ca159faf | |||
| 05b06210c9 | |||
| 83796e62bc | |||
| 828defefb6 | |||
| caa106d4e0 | |||
| 3202361c5b | |||
| 332bdfd798 |
@@ -333,6 +333,7 @@ jobs:
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G Xcode .. \
|
||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||
-DLLAMA_BUILD_TESTS=OFF \
|
||||
-DLLAMA_BUILD_SERVER=OFF \
|
||||
@@ -361,6 +362,7 @@ jobs:
|
||||
mkdir build
|
||||
cd build
|
||||
cmake -G Xcode .. \
|
||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||
-DLLAMA_BUILD_TESTS=OFF \
|
||||
-DLLAMA_BUILD_SERVER=OFF \
|
||||
@@ -425,6 +427,8 @@ jobs:
|
||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
||||
- build: 'vulkan'
|
||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
||||
- build: 'arm64'
|
||||
defines: '-A ARM64 -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||
|
||||
steps:
|
||||
- name: Clone
|
||||
@@ -520,7 +524,7 @@ jobs:
|
||||
- name: Test
|
||||
id: cmake_test
|
||||
# not all machines have native AVX-512
|
||||
if: ${{ matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
|
||||
if: ${{ matrix.build != 'arm64' && matrix.build != 'clblast' && matrix.build != 'kompute' && matrix.build != 'vulkan' && (matrix.build != 'avx512' || env.HAS_AVX512F == '1') }}
|
||||
run: |
|
||||
cd build
|
||||
ctest -L main -C Release --verbose --timeout 900
|
||||
|
||||
@@ -25,17 +25,14 @@ jobs:
|
||||
strategy:
|
||||
matrix:
|
||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
||||
build_type: [Debug, Release]
|
||||
build_type: [Debug]
|
||||
include:
|
||||
- build_type: Release
|
||||
sanitizer: ""
|
||||
exclude:
|
||||
- build_type: Release
|
||||
sanitizer: ADDRESS
|
||||
- build_type: Release
|
||||
- build_type: Debug
|
||||
sanitizer: THREAD
|
||||
- build_type: Release
|
||||
sanitizer: UNDEFINED
|
||||
disabled_on_pr: true
|
||||
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
||||
|
||||
container:
|
||||
image: ubuntu:latest
|
||||
@@ -81,13 +78,14 @@ jobs:
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_tests
|
||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ github.event.schedule != '' && matrix.build_type == 'Release' || github.event.inputs.slow_tests == 'true' }}
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||
@@ -124,13 +122,14 @@ jobs:
|
||||
|
||||
- name: Tests
|
||||
id: server_integration_tests
|
||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||
|
||||
- name: Slow tests
|
||||
id: server_integration_tests_slow
|
||||
if: ${{ github.event.schedule != '' || github.event.inputs.slow_tests == 'true' }}
|
||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||
run: |
|
||||
cd examples/server/tests
|
||||
behave.exe --stop --no-skipped --no-capture --tags slow
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
name: clang-tidy review post comments
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
workflows: ["clang-tidy-review"]
|
||||
types:
|
||||
- completed
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: ZedThree/clang-tidy-review/post@v0.13.0
|
||||
# lgtm_comment_body, max_comments, and annotations need to be set on the posting workflow in a split setup
|
||||
with:
|
||||
# adjust options as necessary
|
||||
lgtm_comment_body: ''
|
||||
annotations: false
|
||||
max_comments: 25
|
||||
@@ -1,23 +0,0 @@
|
||||
name: clang-tidy-review
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- master
|
||||
|
||||
jobs:
|
||||
clang-tidy-review:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- uses: ZedThree/clang-tidy-review@v0.13.0
|
||||
id: review
|
||||
with:
|
||||
lgtm_comment_body: ''
|
||||
build_dir: build
|
||||
cmake_command: cmake . -B build -DCMAKE_EXPORT_COMPILE_COMMANDS=on
|
||||
split_workflow: true
|
||||
|
||||
- uses: ZedThree/clang-tidy-review/upload@v0.13.0
|
||||
@@ -25,6 +25,8 @@
|
||||
.vscode/
|
||||
.idea/
|
||||
|
||||
ggml-metal-embed.metal
|
||||
|
||||
lcov-report/
|
||||
gcovr-report/
|
||||
|
||||
|
||||
+43
-32
@@ -118,6 +118,7 @@ option(LLAMA_SYCL "llama: use SYCL"
|
||||
option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF)
|
||||
set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device")
|
||||
option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF)
|
||||
set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism")
|
||||
|
||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||
@@ -147,6 +148,8 @@ set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||
find_package(Threads REQUIRED)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES})
|
||||
|
||||
# enable libstdc++ assertions for debug builds
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>)
|
||||
@@ -197,9 +200,6 @@ if (LLAMA_METAL)
|
||||
add_compile_definitions(GGML_METAL_NDEBUG)
|
||||
endif()
|
||||
|
||||
# get full path to the file
|
||||
#add_compile_definitions(GGML_METAL_DIR_KERNELS="${CMAKE_CURRENT_SOURCE_DIR}/")
|
||||
|
||||
# copy ggml-common.h and ggml-metal.metal to bin directory
|
||||
configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY)
|
||||
configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY)
|
||||
@@ -208,53 +208,62 @@ if (LLAMA_METAL)
|
||||
enable_language(ASM)
|
||||
add_compile_definitions(GGML_METAL_EMBED_LIBRARY)
|
||||
|
||||
set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h")
|
||||
set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
|
||||
|
||||
file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated")
|
||||
set(EMBED_METALLIB_ASSEMBLY "${CMAKE_BINARY_DIR}/autogenerated/ggml-embed-metallib.s")
|
||||
|
||||
# merge ggml-common.h and ggml-metal.metal into a single file
|
||||
set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s")
|
||||
set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal")
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo ".section __DATA,__ggml_metallib" > ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo ".globl _ggml_metallib_start" >> ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo "_ggml_metallib_start:" >> ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo ".incbin \\\"${METALLIB_SOURCE}\\\"" >> ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo ".globl _ggml_metallib_end" >> ${EMBED_METALLIB_ASSEMBLY}
|
||||
COMMAND echo "_ggml_metallib_end:" >> ${EMBED_METALLIB_ASSEMBLY}
|
||||
DEPENDS ${METALLIB_SOURCE}
|
||||
OUTPUT ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo "Embedding Metal library"
|
||||
COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED}
|
||||
COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM}
|
||||
COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM}
|
||||
DEPENDS ggml-metal.metal ggml-common.h
|
||||
COMMENT "Generate assembly for embedded Metal library"
|
||||
)
|
||||
|
||||
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${EMBED_METALLIB_ASSEMBLY})
|
||||
endif()
|
||||
|
||||
if (LLAMA_METAL_SHADER_DEBUG)
|
||||
# custom command to do the following:
|
||||
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
|
||||
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
|
||||
#
|
||||
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
||||
# disabling fast math is needed in order to pass tests/test-backend-ops
|
||||
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
||||
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
|
||||
set(XC_FLAGS -fno-fast-math -fno-inline -g)
|
||||
if (LLAMA_QKK_64)
|
||||
set(XC_FLAGS ${XC_FLAGS} -DQK_K=64)
|
||||
set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM})
|
||||
else()
|
||||
if (LLAMA_METAL_SHADER_DEBUG)
|
||||
# custom command to do the following:
|
||||
# xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air
|
||||
# xcrun -sdk macosx metallib ggml-metal.air -o default.metallib
|
||||
#
|
||||
# note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works
|
||||
# disabling fast math is needed in order to pass tests/test-backend-ops
|
||||
# note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1
|
||||
# note: unfortunately, we have to call it default.metallib instead of ggml.metallib
|
||||
# ref: https://github.com/ggerganov/whisper.cpp/issues/1720
|
||||
set(XC_FLAGS -fno-fast-math -fno-inline -g)
|
||||
else()
|
||||
set(XC_FLAGS -O3)
|
||||
endif()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||
COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
DEPENDS ggml-metal.metal
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h
|
||||
COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal
|
||||
DEPENDS ggml-metal.metal ggml-common.h
|
||||
COMMENT "Compiling Metal kernels"
|
||||
)
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
ggml-metal ALL
|
||||
DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||
)
|
||||
endif()
|
||||
)
|
||||
endif() # LLAMA_METAL_EMBED_LIBRARY
|
||||
|
||||
set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS}
|
||||
${FOUNDATION_LIBRARY}
|
||||
@@ -1141,6 +1150,8 @@ endif()
|
||||
add_library(llama
|
||||
llama.cpp
|
||||
llama.h
|
||||
unicode.h
|
||||
unicode.cpp
|
||||
)
|
||||
|
||||
target_include_directories(llama PUBLIC .)
|
||||
|
||||
@@ -167,6 +167,10 @@ ifeq ($(UNAME_S),OpenBSD)
|
||||
MK_CPPFLAGS += -D_BSD_SOURCE
|
||||
endif
|
||||
|
||||
ifdef LLAMA_SCHED_MAX_COPIES
|
||||
MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(LLAMA_SCHED_MAX_COPIES)
|
||||
endif
|
||||
|
||||
ifdef LLAMA_DEBUG
|
||||
MK_CFLAGS += -O0 -g
|
||||
MK_CXXFLAGS += -O0 -g
|
||||
@@ -549,19 +553,20 @@ endif
|
||||
endif # LLAMA_METAL
|
||||
|
||||
ifdef LLAMA_METAL
|
||||
ggml-metal.o: ggml-metal.m ggml-metal.h
|
||||
ggml-metal.o: ggml-metal.m ggml-metal.h ggml.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
ifdef LLAMA_METAL_EMBED_LIBRARY
|
||||
ggml-metal-embed.o: ggml-metal.metal
|
||||
ggml-metal-embed.o: ggml-metal.metal ggml-common.h
|
||||
@echo "Embedding Metal library"
|
||||
@sed -e '/#include "ggml-common.h"/r ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml-metal.metal > ggml-metal-embed.metal
|
||||
$(eval TEMP_ASSEMBLY=$(shell mktemp))
|
||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
||||
@echo ".incbin \"$<\"" >> $(TEMP_ASSEMBLY)
|
||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
||||
@echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
|
||||
@echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
|
||||
@echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
|
||||
@echo ".incbin \"ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
|
||||
@echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
|
||||
@echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
|
||||
@$(AS) $(TEMP_ASSEMBLY) -o $@
|
||||
@rm -f ${TEMP_ASSEMBLY}
|
||||
endif
|
||||
@@ -633,9 +638,12 @@ ggml-backend.o: ggml-backend.c ggml.h ggml-backend.h
|
||||
ggml-quants.o: ggml-quants.c ggml.h ggml-quants.h ggml-common.h
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o
|
||||
unicode.o: unicode.cpp unicode.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
llama.o: llama.cpp ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
||||
OBJS += ggml-alloc.o ggml-backend.o ggml-quants.o unicode.o
|
||||
|
||||
llama.o: llama.cpp unicode.h ggml.h ggml-alloc.h ggml-backend.h ggml-cuda.h ggml-metal.h llama.h
|
||||
$(CXX) $(CXXFLAGS) -c $< -o $@
|
||||
|
||||
COMMON_H_DEPS = common/common.h common/sampling.h common/log.h
|
||||
|
||||
@@ -31,6 +31,7 @@ let package = Package(
|
||||
sources: [
|
||||
"ggml.c",
|
||||
"llama.cpp",
|
||||
"unicode.cpp",
|
||||
"ggml-alloc.c",
|
||||
"ggml-backend.c",
|
||||
"ggml-quants.c",
|
||||
|
||||
@@ -10,12 +10,14 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
|
||||
|
||||
### Recent API changes
|
||||
|
||||
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_max_seq()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
|
||||
- [2024 Mar 13] Add `llama_synchronize()` + `llama_context_params.n_ubatch` https://github.com/ggerganov/llama.cpp/pull/6017
|
||||
- [2024 Mar 8] `llama_kv_cache_seq_rm()` returns a `bool` instead of `void`, and new `llama_n_seq_max()` returns the upper limit of acceptable `seq_id` in batches (relevant when dealing with multiple sequences) https://github.com/ggerganov/llama.cpp/pull/5328
|
||||
- [2024 Mar 4] Embeddings API updated https://github.com/ggerganov/llama.cpp/pull/5796
|
||||
- [2024 Mar 3] `struct llama_context_params` https://github.com/ggerganov/llama.cpp/pull/5849
|
||||
|
||||
### Hot topics
|
||||
|
||||
- Multi-GPU pipeline parallelizm support https://github.com/ggerganov/llama.cpp/pull/6017
|
||||
- Looking for contributions to add Deepseek support: https://github.com/ggerganov/llama.cpp/issues/5981
|
||||
- Quantization blind testing: https://github.com/ggerganov/llama.cpp/discussions/5962
|
||||
- Initial Mamba support has been added: https://github.com/ggerganov/llama.cpp/pull/5328
|
||||
@@ -902,6 +904,9 @@ First, install the essential packages for termux:
|
||||
pkg install clang wget git cmake
|
||||
```
|
||||
Second, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake:
|
||||
|
||||
You can execute the following commands on your computer to avoid downloading the NDK to your mobile. Of course, you can also do this in Termux.
|
||||
|
||||
```
|
||||
$ mkdir build-android
|
||||
$ cd build-android
|
||||
@@ -910,7 +915,28 @@ $ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROI
|
||||
$ make
|
||||
```
|
||||
Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card.
|
||||
Finally, copy the `llama` binary and the model files to your device storage. Here is a demo of an interactive session running on Pixel 5 phone:
|
||||
Finally, copy these built `llama` binaries and the model file to your device storage. Because the file permissions in the Android sdcard cannot be changed, you can copy the executable files to the `/data/data/com.termux/files/home/bin` path, and then execute the following commands in Termux to add executable permission:
|
||||
|
||||
(Assumed that you have pushed the built executable files to the /sdcard/llama.cpp/bin path using `adb push`)
|
||||
```
|
||||
$cp -r /sdcard/llama.cpp/bin /data/data/com.termux/files/home/
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$chmod +x ./*
|
||||
```
|
||||
|
||||
Download model [llama-2-7b-chat.Q4_K_M.gguf](https://huggingface.co/TheBloke/Llama-2-7B-Chat-GGUF/blob/main/llama-2-7b-chat.Q4_K_M.gguf), and push it to `/sdcard/llama.cpp/`, then move it to `/data/data/com.termux/files/home/model/`
|
||||
|
||||
```
|
||||
$mv /sdcard/llama.cpp/llama-2-7b-chat.Q4_K_M.gguf /data/data/com.termux/files/home/model/
|
||||
```
|
||||
|
||||
Now, you can start chatting:
|
||||
```
|
||||
$cd /data/data/com.termux/files/home/bin
|
||||
$./main -m ../model/llama-2-7b-chat.Q4_K_M.gguf -n 128 -cml
|
||||
```
|
||||
|
||||
Here is a demo of an interactive session running on Pixel 5 phone:
|
||||
|
||||
https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4
|
||||
|
||||
|
||||
@@ -115,6 +115,7 @@ pub fn build(b: *std.build.Builder) !void {
|
||||
const ggml_alloc = make.obj("ggml-alloc", "ggml-alloc.c");
|
||||
const ggml_backend = make.obj("ggml-backend", "ggml-backend.c");
|
||||
const ggml_quants = make.obj("ggml-quants", "ggml-quants.c");
|
||||
const unicode = make.obj("unicode", "unicode.cpp");
|
||||
const llama = make.obj("llama", "llama.cpp");
|
||||
const buildinfo = make.obj("common", "common/build-info.cpp");
|
||||
const common = make.obj("common", "common/common.cpp");
|
||||
@@ -125,14 +126,14 @@ pub fn build(b: *std.build.Builder) !void {
|
||||
const clip = make.obj("clip", "examples/llava/clip.cpp");
|
||||
const llava = make.obj("llava", "examples/llava/llava.cpp");
|
||||
|
||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, console, grammar_parser });
|
||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
||||
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
||||
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo });
|
||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
|
||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, train });
|
||||
_ = make.exe("main", "examples/main/main.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, console, grammar_parser });
|
||||
_ = make.exe("quantize", "examples/quantize/quantize.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
|
||||
_ = make.exe("perplexity", "examples/perplexity/perplexity.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
|
||||
_ = make.exe("embedding", "examples/embedding/embedding.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo });
|
||||
_ = make.exe("finetune", "examples/finetune/finetune.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
|
||||
_ = make.exe("train-text-from-scratch", "examples/train-text-from-scratch/train-text-from-scratch.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, train });
|
||||
|
||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, common, buildinfo, sampling, grammar_parser, clip, llava });
|
||||
const server = make.exe("server", "examples/server/server.cpp", &.{ ggml, ggml_alloc, ggml_backend, ggml_quants, llama, unicode, common, buildinfo, sampling, grammar_parser, clip, llava });
|
||||
if (server.target.isWindows()) {
|
||||
server.linkSystemLibrary("ws2_32");
|
||||
}
|
||||
|
||||
+33
-10
@@ -483,6 +483,12 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) {
|
||||
break;
|
||||
}
|
||||
params.n_batch = std::stoi(argv[i]);
|
||||
} else if (arg == "-ub" || arg == "--ubatch-size") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_ubatch = std::stoi(argv[i]);
|
||||
} else if (arg == "--keep") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -977,7 +983,9 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
||||
printf(" binary file containing multiple choice tasks.\n");
|
||||
printf(" -n N, --n-predict N number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict);
|
||||
printf(" -c N, --ctx-size N size of the prompt context (default: %d, 0 = loaded from model)\n", params.n_ctx);
|
||||
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
|
||||
printf(" -ub N, --ubatch-size N\n");
|
||||
printf(" physical maximum batch size (default: %d)\n", params.n_ubatch);
|
||||
printf(" --samplers samplers that will be used for generation in the order, separated by \';\'\n");
|
||||
printf(" (default: %s)\n", sampler_type_names.c_str());
|
||||
printf(" --sampling-seq simplified sequence for samplers that will be used (default: %s)\n", sampler_type_chars.c_str());
|
||||
@@ -1287,8 +1295,9 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
cparams.n_ctx = params.n_ctx;
|
||||
cparams.n_seq_max = params.n_parallel;
|
||||
cparams.n_batch = params.n_batch;
|
||||
cparams.n_parallel = params.n_parallel;
|
||||
cparams.n_ubatch = params.n_ubatch;
|
||||
cparams.n_threads = params.n_threads;
|
||||
cparams.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
cparams.seed = params.seed;
|
||||
@@ -1379,6 +1388,7 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par
|
||||
std::vector<llama_token> tmp = { llama_token_bos(model), llama_token_eos(model), };
|
||||
llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch), 0, 0));
|
||||
llama_kv_cache_clear(lctx);
|
||||
llama_synchronize(lctx);
|
||||
llama_reset_timings(lctx);
|
||||
}
|
||||
|
||||
@@ -1786,17 +1796,17 @@ void dump_kv_cache_view(const llama_kv_cache_view & view, int row_size) {
|
||||
static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+";
|
||||
|
||||
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d",
|
||||
view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
||||
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
||||
|
||||
llama_kv_cache_view_cell * c_curr = view.cells;
|
||||
llama_seq_id * cs_curr = view.cells_sequences;
|
||||
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
||||
if (i % row_size == 0) {
|
||||
printf("\n%5d: ", i);
|
||||
}
|
||||
int seq_count = 0;
|
||||
for (int j = 0; j < view.n_max_seq; j++) {
|
||||
for (int j = 0; j < view.n_seq_max; j++) {
|
||||
if (cs_curr[j] >= 0) { seq_count++; }
|
||||
}
|
||||
putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]);
|
||||
@@ -1809,14 +1819,14 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
||||
static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
|
||||
|
||||
printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n",
|
||||
view.n_cells, view.n_max_seq, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
||||
view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx);
|
||||
|
||||
std::unordered_map<llama_seq_id, size_t> seqs;
|
||||
llama_kv_cache_view_cell * c_curr = view.cells;
|
||||
llama_seq_id * cs_curr = view.cells_sequences;
|
||||
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
|
||||
for (int j = 0; j < view.n_max_seq; j++) {
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
||||
for (int j = 0; j < view.n_seq_max; j++) {
|
||||
if (cs_curr[j] < 0) { continue; }
|
||||
if (seqs.find(cs_curr[j]) == seqs.end()) {
|
||||
if (seqs.size() + 1 >= sizeof(slot_chars)) { break; }
|
||||
@@ -1835,11 +1845,11 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size) {
|
||||
|
||||
c_curr = view.cells;
|
||||
cs_curr = view.cells_sequences;
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_max_seq) {
|
||||
for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) {
|
||||
if (i % row_size == 0) {
|
||||
printf("\n%5d: ", i);
|
||||
}
|
||||
for (int j = 0; j < view.n_max_seq; j++) {
|
||||
for (int j = 0; j < view.n_seq_max; j++) {
|
||||
if (cs_curr[j] >= 0) {
|
||||
const auto & it = seqs.find(cs_curr[j]);
|
||||
putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+');
|
||||
@@ -1867,3 +1877,16 @@ void llama_embd_normalize(const float * inp, float * out, int n) {
|
||||
}
|
||||
}
|
||||
|
||||
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n){
|
||||
double sum = 0.0;
|
||||
double sum1 = 0.0;
|
||||
double sum2 = 0.0;
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
sum += embd1[i] * embd2[i];
|
||||
sum1 += embd1[i] * embd1[i];
|
||||
sum2 += embd2[i] * embd2[i];
|
||||
}
|
||||
|
||||
return sum / (sqrt(sum1) * sqrt(sum2));
|
||||
}
|
||||
|
||||
+3
-1
@@ -51,7 +51,8 @@ struct gpt_params {
|
||||
int32_t n_threads_batch_draft = -1;
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
int32_t n_ctx = 512; // context size
|
||||
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||
@@ -267,3 +268,4 @@ void dump_kv_cache_view_seqs(const llama_kv_cache_view & view, int row_size = 40
|
||||
|
||||
void llama_embd_normalize(const float * inp, float * out, int n);
|
||||
|
||||
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||
|
||||
@@ -17,6 +17,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Ensure that there is a "root" node.
|
||||
if (result->parsed_grammar.symbol_ids.find("root") == result->parsed_grammar.symbol_ids.end()) {
|
||||
fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
|
||||
delete result;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
||||
|
||||
result->grammar = llama_grammar_init(
|
||||
|
||||
+73
-53
@@ -332,6 +332,9 @@ class Params:
|
||||
#
|
||||
|
||||
class BpeVocab:
|
||||
tokenizer_model = "gpt2"
|
||||
name = "bpe"
|
||||
|
||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
||||
self.bpe_tokenizer = json.loads(open(str(fname_tokenizer), encoding="utf-8").read())
|
||||
if isinstance(self.bpe_tokenizer.get('model'), dict):
|
||||
@@ -390,6 +393,9 @@ class BpeVocab:
|
||||
|
||||
|
||||
class SentencePieceVocab:
|
||||
tokenizer_model = "llama"
|
||||
name = "spm"
|
||||
|
||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None) -> None:
|
||||
self.sentencepiece_tokenizer = SentencePieceProcessor(str(fname_tokenizer))
|
||||
added_tokens: dict[str, int]
|
||||
@@ -453,6 +459,9 @@ class SentencePieceVocab:
|
||||
|
||||
|
||||
class HfVocab:
|
||||
tokenizer_model = "llama"
|
||||
name = "hfft"
|
||||
|
||||
def __init__(self, fname_tokenizer: Path, fname_added_tokens: Path | None = None) -> None:
|
||||
try:
|
||||
from transformers import AutoTokenizer
|
||||
@@ -553,7 +562,15 @@ class HfVocab:
|
||||
return f"<HfVocab with {self.vocab_size_base} base tokens and {len(self.added_tokens_list)} added tokens>"
|
||||
|
||||
|
||||
Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab"
|
||||
class NoVocab:
|
||||
tokenizer_model = "no_vocab"
|
||||
name = "no_vocab"
|
||||
|
||||
def __repr__(self) -> str:
|
||||
return "<NoVocab for a model without integrated vocabulary>"
|
||||
|
||||
|
||||
Vocab: TypeAlias = "BpeVocab | SentencePieceVocab | HfVocab | NoVocab"
|
||||
|
||||
|
||||
#
|
||||
@@ -935,8 +952,10 @@ def check_vocab_size(params: Params, vocab: Vocab, pad_vocab: bool = False) -> N
|
||||
# Handle special case where the model's vocab size is not set
|
||||
if params.n_vocab == -1:
|
||||
raise ValueError(
|
||||
f"The model's vocab size is set to -1 in params.json. Please update it manually. Maybe {vocab.vocab_size}?"
|
||||
f"The model's vocab size is set to -1 in params.json. Please update it manually.{f' Maybe {vocab.vocab_size}?' if hasattr(vocab, 'vocab_size') else ''}"
|
||||
)
|
||||
if isinstance(vocab, NoVocab):
|
||||
return # model has no vocab
|
||||
|
||||
# Check for a vocab size mismatch
|
||||
if params.n_vocab == vocab.vocab_size:
|
||||
@@ -977,6 +996,7 @@ class OutputFile:
|
||||
name = str(params.path_model.parent).split('/')[-1]
|
||||
|
||||
self.gguf.add_name (name)
|
||||
self.gguf.add_vocab_size (params.n_vocab)
|
||||
self.gguf.add_context_length (params.n_ctx)
|
||||
self.gguf.add_embedding_length (params.n_embd)
|
||||
self.gguf.add_block_count (params.n_layer)
|
||||
@@ -1013,21 +1033,9 @@ class OutputFile:
|
||||
if params.ftype is not None:
|
||||
self.gguf.add_file_type(params.ftype)
|
||||
|
||||
def handle_tokenizer_model(self, vocab: Vocab) -> str:
|
||||
# Map the vocab types to the supported tokenizer models
|
||||
tokenizer_model = {
|
||||
SentencePieceVocab: "llama",
|
||||
HfVocab: "llama",
|
||||
BpeVocab: "gpt2",
|
||||
}.get(type(vocab))
|
||||
|
||||
# Block if vocab type is not predefined
|
||||
if tokenizer_model is None:
|
||||
raise ValueError("Unknown vocab type: Not supported")
|
||||
|
||||
return tokenizer_model
|
||||
|
||||
def extract_vocabulary_from_model(self, vocab: Vocab) -> tuple[list[bytes], list[float], list[gguf.TokenType]]:
|
||||
assert not isinstance(vocab, NoVocab)
|
||||
|
||||
tokens = []
|
||||
scores = []
|
||||
toktypes = []
|
||||
@@ -1043,11 +1051,8 @@ class OutputFile:
|
||||
return tokens, scores, toktypes
|
||||
|
||||
def add_meta_vocab(self, vocab: Vocab) -> None:
|
||||
# Handle the tokenizer model
|
||||
tokenizer_model = self.handle_tokenizer_model(vocab)
|
||||
|
||||
# Ensure that tokenizer_model is added to the GGUF model
|
||||
self.gguf.add_tokenizer_model(tokenizer_model)
|
||||
self.gguf.add_tokenizer_model(vocab.tokenizer_model)
|
||||
|
||||
# Extract model vocabulary for model conversion
|
||||
tokens, scores, toktypes = self.extract_vocabulary_from_model(vocab)
|
||||
@@ -1074,6 +1079,26 @@ class OutputFile:
|
||||
def write_tensor_info(self) -> None:
|
||||
self.gguf.write_ti_data_to_file()
|
||||
|
||||
def write_tensor_data(self, ftype: GGMLFileType, model: LazyModel, concurrency: int) -> None:
|
||||
ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency=concurrency)
|
||||
if ftype == GGMLFileType.MostlyQ8_0:
|
||||
ndarrays = bounded_parallel_map(
|
||||
OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
|
||||
use_processpool_executor=True,
|
||||
)
|
||||
else:
|
||||
ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
|
||||
|
||||
start = time.time()
|
||||
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
||||
elapsed = time.time() - start
|
||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||
padi = len(str(len(model)))
|
||||
print(
|
||||
f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
|
||||
)
|
||||
self.gguf.write_tensor_data(ndarray)
|
||||
|
||||
def close(self) -> None:
|
||||
self.gguf.close()
|
||||
|
||||
@@ -1082,7 +1107,7 @@ class OutputFile:
|
||||
fname_out: Path, params: Params, vocab: Vocab, svocab: gguf.SpecialVocab,
|
||||
endianess: gguf.GGUFEndian = gguf.GGUFEndian.LITTLE, pad_vocab: bool = False,
|
||||
) -> None:
|
||||
check_vocab_size(params, vocab, pad_vocab = pad_vocab)
|
||||
check_vocab_size(params, vocab, pad_vocab=pad_vocab)
|
||||
|
||||
of = OutputFile(fname_out, endianess=endianess)
|
||||
|
||||
@@ -1120,8 +1145,11 @@ class OutputFile:
|
||||
|
||||
# meta data
|
||||
of.add_meta_arch(params)
|
||||
of.add_meta_vocab(vocab)
|
||||
of.add_meta_special_vocab(svocab)
|
||||
if isinstance(vocab, NoVocab):
|
||||
of.gguf.add_tokenizer_model(vocab.tokenizer_model)
|
||||
else:
|
||||
of.add_meta_vocab(vocab)
|
||||
of.add_meta_special_vocab(svocab)
|
||||
|
||||
# tensor info
|
||||
for name, lazy_tensor in model.items():
|
||||
@@ -1131,24 +1159,7 @@ class OutputFile:
|
||||
of.write_tensor_info()
|
||||
|
||||
# tensor data
|
||||
ndarrays_inner = bounded_parallel_map(OutputFile.do_item, model.items(), concurrency = concurrency)
|
||||
if ftype == GGMLFileType.MostlyQ8_0:
|
||||
ndarrays = bounded_parallel_map(
|
||||
OutputFile.maybe_do_quantize, ndarrays_inner, concurrency=concurrency, max_workers=concurrency,
|
||||
use_processpool_executor=True,
|
||||
)
|
||||
else:
|
||||
ndarrays = map(OutputFile.maybe_do_quantize, ndarrays_inner)
|
||||
|
||||
start = time.time()
|
||||
for i, ((name, lazy_tensor), ndarray) in enumerate(zip(model.items(), ndarrays)):
|
||||
elapsed = time.time() - start
|
||||
size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape)
|
||||
padi = len(str(len(model)))
|
||||
print(
|
||||
f"[{i+1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}"
|
||||
)
|
||||
of.gguf.write_tensor_data(ndarray)
|
||||
of.write_tensor_data(ftype, model, concurrency)
|
||||
|
||||
of.close()
|
||||
|
||||
@@ -1309,8 +1320,8 @@ class VocabFactory:
|
||||
return vtype, path
|
||||
raise FileNotFoundError(f"Could not find any of {[self._FILES[vt] for vt in vocab_types]}")
|
||||
|
||||
def _create_special_vocab(self, vocab: Vocab, vocabtype: str, model_parent_path: Path) -> gguf.SpecialVocab:
|
||||
load_merges = vocabtype == "bpe"
|
||||
def _create_special_vocab(self, vocab: Vocab, model_parent_path: Path) -> gguf.SpecialVocab:
|
||||
load_merges = vocab.name == "bpe"
|
||||
n_vocab = vocab.vocab_size if hasattr(vocab, "vocab_size") else None
|
||||
return gguf.SpecialVocab(
|
||||
model_parent_path,
|
||||
@@ -1319,30 +1330,34 @@ class VocabFactory:
|
||||
n_vocab=n_vocab,
|
||||
)
|
||||
|
||||
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
|
||||
def _create_vocab_by_path(self, vocab_types: list[str]) -> Vocab:
|
||||
vocab_type, path = self._select_file(vocab_types)
|
||||
print(f"Loading vocab file {path!r}, type {vocab_type!r}")
|
||||
|
||||
added_tokens_path = path.parent / "added_tokens.json"
|
||||
vocab: Vocab
|
||||
if vocab_type == "bpe":
|
||||
vocab = BpeVocab(
|
||||
return BpeVocab(
|
||||
path, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
elif vocab_type == "spm":
|
||||
vocab = SentencePieceVocab(
|
||||
if vocab_type == "spm":
|
||||
return SentencePieceVocab(
|
||||
path, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
elif vocab_type == "hfft":
|
||||
vocab = HfVocab(
|
||||
if vocab_type == "hfft":
|
||||
return HfVocab(
|
||||
path.parent, added_tokens_path if added_tokens_path.exists() else None
|
||||
)
|
||||
raise ValueError(vocab_type)
|
||||
|
||||
def load_vocab(self, vocab_types: list[str], model_parent_path: Path) -> tuple[Vocab, gguf.SpecialVocab]:
|
||||
vocab: Vocab
|
||||
if len(vocab_types) == 1 and "no_vocab" in vocab_types:
|
||||
vocab = NoVocab()
|
||||
else:
|
||||
raise ValueError(vocab_type)
|
||||
vocab = self._create_vocab_by_path(vocab_types)
|
||||
# FIXME: Respect --vocab-dir?
|
||||
special_vocab = self._create_special_vocab(
|
||||
vocab,
|
||||
vocab_type,
|
||||
model_parent_path,
|
||||
)
|
||||
return vocab, special_vocab
|
||||
@@ -1380,6 +1395,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
parser.add_argument("--dump", action="store_true", help="don't convert, just show what's in the model")
|
||||
parser.add_argument("--dump-single", action="store_true", help="don't convert, just show what's in a single model file")
|
||||
parser.add_argument("--vocab-only", action="store_true", help="extract only the vocab")
|
||||
parser.add_argument("--no-vocab", action="store_true", help="store model without the vocab")
|
||||
parser.add_argument("--outtype", choices=output_choices, help="output format - note: q8_0 may be very slow (default: f16 or f32 based on input)")
|
||||
parser.add_argument("--vocab-dir", type=Path, help="directory containing tokenizer.model, if separate from model file")
|
||||
parser.add_argument("--vocab-type", help="vocab types to try in order, choose from 'spm', 'bpe', 'hfft' (default: spm,hfft)", default="spm,hfft")
|
||||
@@ -1392,6 +1408,10 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
parser.add_argument("--skip-unknown", action="store_true", help="skip unknown tensor names instead of failing")
|
||||
|
||||
args = parser.parse_args(args_in)
|
||||
if args.no_vocab:
|
||||
if args.vocab_only:
|
||||
raise ValueError("no need to specify --vocab-only if using --no-vocab")
|
||||
args.vocab_type = "no_vocab"
|
||||
|
||||
if args.dump_single:
|
||||
model_plus = lazy_load_file(args.model)
|
||||
@@ -1442,7 +1462,7 @@ def main(args_in: list[str] | None = None) -> None:
|
||||
print(f"Wrote {outfile}")
|
||||
return
|
||||
|
||||
if model_plus.vocab is not None and args.vocab_dir is None:
|
||||
if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab:
|
||||
vocab = model_plus.vocab
|
||||
|
||||
print(f"Vocab info: {vocab}")
|
||||
|
||||
@@ -106,7 +106,7 @@ int main(int argc, char ** argv) {
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
// ensure enough sequences are available
|
||||
ctx_params.n_parallel = *std::max_element(n_pl.begin(), n_pl.end());
|
||||
ctx_params.n_seq_max = *std::max_element(n_pl.begin(), n_pl.end());
|
||||
|
||||
llama_context * ctx = llama_new_context_with_model(model, ctx_params);
|
||||
|
||||
@@ -138,6 +138,8 @@ int main(int argc, char ** argv) {
|
||||
LOG_TEE("failed to decode the batch, n_batch = %d, ret = %d\n", n_batch, ret);
|
||||
return false;
|
||||
}
|
||||
|
||||
llama_synchronize(ctx);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
@@ -80,7 +80,7 @@ int main(int argc, char ** argv) {
|
||||
ctx_params.seed = 1234;
|
||||
ctx_params.n_ctx = n_kv_req;
|
||||
ctx_params.n_batch = std::max(n_len, n_parallel);
|
||||
ctx_params.n_parallel = n_parallel;
|
||||
ctx_params.n_seq_max = n_parallel;
|
||||
ctx_params.n_threads = params.n_threads;
|
||||
ctx_params.n_threads_batch = params.n_threads_batch == -1 ? params.n_threads : params.n_threads_batch;
|
||||
|
||||
|
||||
@@ -107,18 +107,25 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// max batch size
|
||||
const uint64_t n_batch = params.n_batch;
|
||||
GGML_ASSERT(params.n_batch == params.n_ctx);
|
||||
GGML_ASSERT(params.n_batch >= params.n_ctx);
|
||||
|
||||
// tokenize the prompts and trim
|
||||
std::vector<std::vector<int32_t>> inputs;
|
||||
for (const auto & prompt : prompts) {
|
||||
auto inp = ::llama_tokenize(ctx, prompt, true);
|
||||
auto inp = ::llama_tokenize(ctx, prompt, true, false);
|
||||
if (inp.size() > n_batch) {
|
||||
inp.resize(n_batch);
|
||||
}
|
||||
inputs.push_back(inp);
|
||||
}
|
||||
|
||||
// add eos if not present
|
||||
for (auto & inp : inputs) {
|
||||
if (inp.empty() || inp.back() != llama_token_eos(model)) {
|
||||
inp.push_back(llama_token_eos(model));
|
||||
}
|
||||
}
|
||||
|
||||
// tokenization stats
|
||||
if (params.verbose_prompt) {
|
||||
for (int i = 0; i < (int) inputs.size(); i++) {
|
||||
@@ -167,15 +174,26 @@ int main(int argc, char ** argv) {
|
||||
float * out = emb + p * n_embd;
|
||||
batch_decode(ctx, batch, out, s, n_embd);
|
||||
|
||||
// print first 3 embeddings
|
||||
for (int j = 0; j < std::min(3, n_prompts); j++) {
|
||||
fprintf(stderr, "embedding %d: ", j);
|
||||
for (int i = 0; i < n_embd; i++) {
|
||||
fprintf(stderr, "%f ", emb[j * n_embd + i]);
|
||||
// print the first part of the embeddings
|
||||
fprintf(stdout, "\n");
|
||||
for (int j = 0; j < n_prompts; j++) {
|
||||
fprintf(stdout, "embedding %d: ", j);
|
||||
for (int i = 0; i < std::min(16, n_embd); i++) {
|
||||
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
|
||||
}
|
||||
fprintf(stderr, "\n\n");
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
|
||||
// print cosine similarity matrix
|
||||
fprintf(stdout, "\n");
|
||||
printf("cosine similarity matrix:\n\n");
|
||||
for (int i = 0; i < n_prompts; i++) {
|
||||
for (int j = 0; j < n_prompts; j++) {
|
||||
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
||||
fprintf(stdout, "%6.2f ", sim);
|
||||
}
|
||||
fprintf(stdout, "\n");
|
||||
}
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
// clean up
|
||||
llama_print_timings(ctx);
|
||||
|
||||
@@ -211,6 +211,7 @@ static bool gguf_ex_read_1(const std::string & fname) {
|
||||
for (int j = 0; j < ggml_nelements(cur); ++j) {
|
||||
if (data[j] != 100 + i) {
|
||||
fprintf(stderr, "%s: tensor[%d]: data[%d] = %f\n", __func__, i, j, data[j]);
|
||||
gguf_free(ctx);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,22 +6,6 @@
|
||||
|
||||
// #define GRIT_DEBUG
|
||||
|
||||
static float dot_product(const std::vector<float> & v1, const std::vector<float> & v2) {
|
||||
float dot = 0.0f;
|
||||
for (uint64_t i = 0; i < v1.size(); ++i) {
|
||||
dot += v1[i] * v2[i];
|
||||
}
|
||||
return dot;
|
||||
}
|
||||
|
||||
static float norm(const std::vector<float> & v) {
|
||||
return std::sqrt(dot_product(v, v));
|
||||
}
|
||||
|
||||
static float cosine_similarity(const std::vector<float> & v1, const std::vector<float> & v2) {
|
||||
return dot_product(v1, v2) / (norm(v1) * norm(v2));
|
||||
}
|
||||
|
||||
static std::vector<std::vector<float>> encode(llama_context * ctx, const std::vector<std::string> & sentences, const std::string & instruction) {
|
||||
std::vector<std::vector<float>> result;
|
||||
|
||||
@@ -203,10 +187,12 @@ int main(int argc, char * argv[]) {
|
||||
const std::vector<std::vector<float>> d_rep = encode(ctx, documents, gritlm_instruction(""));
|
||||
const std::vector<std::vector<float>> q_rep = encode(ctx, queries, gritlm_instruction(instruction));
|
||||
|
||||
const float cosine_sim_q0_d0 = cosine_similarity(q_rep[0], d_rep[0]);
|
||||
const float cosine_sim_q0_d1 = cosine_similarity(q_rep[0], d_rep[1]);
|
||||
const float cosine_sim_q1_d0 = cosine_similarity(q_rep[1], d_rep[0]);
|
||||
const float cosine_sim_q1_d1 = cosine_similarity(q_rep[1], d_rep[1]);
|
||||
const int n_embd = llama_n_embd(mdl);
|
||||
|
||||
const float cosine_sim_q0_d0 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q0_d1 = llama_embd_similarity_cos(q_rep[0].data(), d_rep[1].data(), n_embd);
|
||||
const float cosine_sim_q1_d0 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[0].data(), n_embd);
|
||||
const float cosine_sim_q1_d1 = llama_embd_similarity_cos(q_rep[1].data(), d_rep[1].data(), n_embd);
|
||||
|
||||
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[0].c_str(), cosine_sim_q0_d0);
|
||||
std::printf("Cosine similarity between \"%.50s\" and \"%.50s\" is: %.3f\n", queries[0].c_str(), documents[1].c_str(), cosine_sim_q0_d1);
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <ctime>
|
||||
#include <cstdlib>
|
||||
#include <iterator>
|
||||
#include <map>
|
||||
#include <numeric>
|
||||
@@ -103,6 +104,7 @@ static std::string get_cpu_info() {
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
#endif
|
||||
// TODO: other platforms
|
||||
@@ -164,6 +166,7 @@ struct cmd_params {
|
||||
std::vector<int> n_prompt;
|
||||
std::vector<int> n_gen;
|
||||
std::vector<int> n_batch;
|
||||
std::vector<int> n_ubatch;
|
||||
std::vector<ggml_type> type_k;
|
||||
std::vector<ggml_type> type_v;
|
||||
std::vector<int> n_threads;
|
||||
@@ -183,7 +186,8 @@ static const cmd_params cmd_params_defaults = {
|
||||
/* model */ {"models/7B/ggml-model-q4_0.gguf"},
|
||||
/* n_prompt */ {512},
|
||||
/* n_gen */ {128},
|
||||
/* n_batch */ {512},
|
||||
/* n_batch */ {2048},
|
||||
/* n_ubatch */ {512},
|
||||
/* type_k */ {GGML_TYPE_F16},
|
||||
/* type_v */ {GGML_TYPE_F16},
|
||||
/* n_threads */ {get_num_physical_cores()},
|
||||
@@ -208,6 +212,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" -p, --n-prompt <n> (default: %s)\n", join(cmd_params_defaults.n_prompt, ",").c_str());
|
||||
printf(" -n, --n-gen <n> (default: %s)\n", join(cmd_params_defaults.n_gen, ",").c_str());
|
||||
printf(" -b, --batch-size <n> (default: %s)\n", join(cmd_params_defaults.n_batch, ",").c_str());
|
||||
printf(" -ub N, --ubatch-size <n> (default: %s)\n", join(cmd_params_defaults.n_ubatch, ",").c_str());
|
||||
printf(" -ctk <t>, --cache-type-k <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_k, ggml_type_name), ",").c_str());
|
||||
printf(" -ctv <t>, --cache-type-v <t> (default: %s)\n", join(transform_to_str(cmd_params_defaults.type_v, ggml_type_name), ",").c_str());
|
||||
printf(" -t, --threads <n> (default: %s)\n", join(cmd_params_defaults.n_threads, ",").c_str());
|
||||
@@ -217,7 +222,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||
printf(" -mmp, --mmap <0|1> (default: %s)\n", join(cmd_params_defaults.use_mmap, ",").c_str());
|
||||
printf(" -embd, --embeddings <0|1> (default: %s)\n", join(cmd_params_defaults.embeddings, ",").c_str());
|
||||
printf(" -ts, --tensor_split <ts0/ts1/..> (default: 0)\n");
|
||||
printf(" -ts, --tensor-split <ts0/ts1/..> (default: 0)\n");
|
||||
printf(" -r, --repetitions <n> (default: %d)\n", cmd_params_defaults.reps);
|
||||
printf(" -o, --output <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format));
|
||||
printf(" -v, --verbose (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0");
|
||||
@@ -297,6 +302,13 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_batch.insert(params.n_batch.end(), p.begin(), p.end());
|
||||
} else if (arg == "-ub" || arg == "--ubatch-size") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
auto p = split<int>(argv[i], split_delim);
|
||||
params.n_ubatch.insert(params.n_ubatch.end(), p.begin(), p.end());
|
||||
} else if (arg == "-ctk" || arg == "--cache-type-k") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -455,6 +467,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
if (params.n_prompt.empty()) { params.n_prompt = cmd_params_defaults.n_prompt; }
|
||||
if (params.n_gen.empty()) { params.n_gen = cmd_params_defaults.n_gen; }
|
||||
if (params.n_batch.empty()) { params.n_batch = cmd_params_defaults.n_batch; }
|
||||
if (params.n_ubatch.empty()) { params.n_ubatch = cmd_params_defaults.n_ubatch; }
|
||||
if (params.type_k.empty()) { params.type_k = cmd_params_defaults.type_k; }
|
||||
if (params.type_v.empty()) { params.type_v = cmd_params_defaults.type_v; }
|
||||
if (params.n_gpu_layers.empty()) { params.n_gpu_layers = cmd_params_defaults.n_gpu_layers; }
|
||||
@@ -474,6 +487,7 @@ struct cmd_params_instance {
|
||||
int n_prompt;
|
||||
int n_gen;
|
||||
int n_batch;
|
||||
int n_ubatch;
|
||||
ggml_type type_k;
|
||||
ggml_type type_v;
|
||||
int n_threads;
|
||||
@@ -511,6 +525,7 @@ struct cmd_params_instance {
|
||||
|
||||
cparams.n_ctx = n_prompt + n_gen;
|
||||
cparams.n_batch = n_batch;
|
||||
cparams.n_ubatch = n_ubatch;
|
||||
cparams.type_k = type_k;
|
||||
cparams.type_v = type_v;
|
||||
cparams.offload_kqv = !no_kv_offload;
|
||||
@@ -532,6 +547,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
for (const auto & mmp : params.use_mmap)
|
||||
for (const auto & embd : params.embeddings)
|
||||
for (const auto & nb : params.n_batch)
|
||||
for (const auto & nub : params.n_ubatch)
|
||||
for (const auto & tk : params.type_k)
|
||||
for (const auto & tv : params.type_v)
|
||||
for (const auto & nkvo : params.no_kv_offload)
|
||||
@@ -545,6 +561,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
/* .n_prompt = */ n_prompt,
|
||||
/* .n_gen = */ 0,
|
||||
/* .n_batch = */ nb,
|
||||
/* .n_ubatch = */ nub,
|
||||
/* .type_k = */ tk,
|
||||
/* .type_v = */ tv,
|
||||
/* .n_threads = */ nt,
|
||||
@@ -568,6 +585,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
|
||||
/* .n_prompt = */ 0,
|
||||
/* .n_gen = */ n_gen,
|
||||
/* .n_batch = */ nb,
|
||||
/* .n_ubatch = */ nub,
|
||||
/* .type_k = */ tk,
|
||||
/* .type_v = */ tv,
|
||||
/* .n_threads = */ nt,
|
||||
@@ -604,6 +622,7 @@ struct test {
|
||||
uint64_t model_size;
|
||||
uint64_t model_n_params;
|
||||
int n_batch;
|
||||
int n_ubatch;
|
||||
int n_threads;
|
||||
ggml_type type_k;
|
||||
ggml_type type_v;
|
||||
@@ -627,6 +646,7 @@ struct test {
|
||||
model_size = llama_model_size(lmodel);
|
||||
model_n_params = llama_model_n_params(lmodel);
|
||||
n_batch = inst.n_batch;
|
||||
n_ubatch = inst.n_ubatch;
|
||||
n_threads = inst.n_threads;
|
||||
type_k = inst.type_k;
|
||||
type_v = inst.type_v;
|
||||
@@ -705,7 +725,8 @@ struct test {
|
||||
"cuda", "opencl", "vulkan", "kompute", "metal", "sycl", "gpu_blas", "blas",
|
||||
"cpu_info", "gpu_info",
|
||||
"model_filename", "model_type", "model_size", "model_n_params",
|
||||
"n_batch", "n_threads", "type_k", "type_v",
|
||||
"n_batch", "n_ubatch",
|
||||
"n_threads", "type_k", "type_v",
|
||||
"n_gpu_layers", "split_mode",
|
||||
"main_gpu", "no_kv_offload",
|
||||
"tensor_split", "use_mmap", "embeddings",
|
||||
@@ -719,7 +740,8 @@ struct test {
|
||||
enum field_type {STRING, BOOL, INT, FLOAT};
|
||||
|
||||
static field_type get_field_type(const std::string & field) {
|
||||
if (field == "build_number" || field == "n_batch" || field == "n_threads" ||
|
||||
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" ||
|
||||
field == "n_threads" ||
|
||||
field == "model_size" || field == "model_n_params" ||
|
||||
field == "n_gpu_layers" || field == "main_gpu" ||
|
||||
field == "n_prompt" || field == "n_gen" ||
|
||||
@@ -759,7 +781,8 @@ struct test {
|
||||
std::to_string(metal), std::to_string(sycl), std::to_string(gpu_blas), std::to_string(blas),
|
||||
cpu_info, gpu_info,
|
||||
model_filename, model_type, std::to_string(model_size), std::to_string(model_n_params),
|
||||
std::to_string(n_batch), std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
||||
std::to_string(n_batch), std::to_string(n_ubatch),
|
||||
std::to_string(n_threads), ggml_type_name(type_k), ggml_type_name(type_v),
|
||||
std::to_string(n_gpu_layers), split_mode_str(split_mode),
|
||||
std::to_string(main_gpu), std::to_string(no_kv_offload),
|
||||
tensor_split_str, std::to_string(use_mmap), std::to_string(embeddings),
|
||||
@@ -957,6 +980,9 @@ struct markdown_printer : public printer {
|
||||
if (params.n_batch.size() > 1 || params.n_batch != cmd_params_defaults.n_batch) {
|
||||
fields.emplace_back("n_batch");
|
||||
}
|
||||
if (params.n_ubatch.size() > 1 || params.n_ubatch != cmd_params_defaults.n_ubatch) {
|
||||
fields.emplace_back("n_ubatch");
|
||||
}
|
||||
if (params.type_k.size() > 1 || params.type_k != cmd_params_defaults.type_k) {
|
||||
fields.emplace_back("type_k");
|
||||
}
|
||||
@@ -1096,25 +1122,40 @@ struct sql_printer : public printer {
|
||||
};
|
||||
|
||||
static void test_prompt(llama_context * ctx, int n_prompt, int n_past, int n_batch, int n_threads) {
|
||||
std::vector<llama_token> tokens(n_batch, llama_token_bos(llama_get_model(ctx)));
|
||||
int n_processed = 0;
|
||||
|
||||
llama_set_n_threads(ctx, n_threads, n_threads);
|
||||
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const int32_t n_vocab = llama_n_vocab(model);
|
||||
|
||||
std::vector<llama_token> tokens(n_batch);
|
||||
|
||||
int n_processed = 0;
|
||||
|
||||
while (n_processed < n_prompt) {
|
||||
int n_tokens = std::min(n_prompt - n_processed, n_batch);
|
||||
tokens[0] = n_processed == 0 && llama_add_bos_token(model) ? llama_token_bos(model) : std::rand() % n_vocab;
|
||||
for (int i = 1; i < n_tokens; i++) {
|
||||
tokens[i] = std::rand() % n_vocab;
|
||||
}
|
||||
llama_decode(ctx, llama_batch_get_one(tokens.data(), n_tokens, n_past + n_processed, 0));
|
||||
n_processed += n_tokens;
|
||||
}
|
||||
|
||||
llama_synchronize(ctx);
|
||||
}
|
||||
|
||||
static void test_gen(llama_context * ctx, int n_gen, int n_past, int n_threads) {
|
||||
llama_token token = llama_token_bos(llama_get_model(ctx));
|
||||
|
||||
llama_set_n_threads(ctx, n_threads, n_threads);
|
||||
|
||||
const llama_model * model = llama_get_model(ctx);
|
||||
const int32_t n_vocab = llama_n_vocab(model);
|
||||
|
||||
llama_token token = llama_add_bos_token(model) ? llama_token_bos(model) : std::rand() % n_vocab;
|
||||
|
||||
for (int i = 0; i < n_gen; i++) {
|
||||
llama_decode(ctx, llama_batch_get_one(&token, 1, n_past + i, 0));
|
||||
llama_synchronize(ctx);
|
||||
token = std::rand() % n_vocab;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1203,7 +1244,8 @@ int main(int argc, char ** argv) {
|
||||
|
||||
// warmup run
|
||||
if (t.n_prompt > 0) {
|
||||
test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
|
||||
//test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads);
|
||||
test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);
|
||||
}
|
||||
if (t.n_gen > 0) {
|
||||
test_gen(ctx, 1, 0, t.n_threads);
|
||||
@@ -1219,6 +1261,7 @@ int main(int argc, char ** argv) {
|
||||
if (t.n_gen > 0) {
|
||||
test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads);
|
||||
}
|
||||
|
||||
uint64_t t_ns = get_time_ns() - t_start;
|
||||
t.samples_ns.push_back(t_ns);
|
||||
}
|
||||
|
||||
@@ -221,6 +221,7 @@ actor LlamaContext {
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during prompt")
|
||||
}
|
||||
llama_synchronize(context)
|
||||
|
||||
let t_pp_end = ggml_time_us()
|
||||
|
||||
@@ -240,6 +241,7 @@ actor LlamaContext {
|
||||
if llama_decode(context, batch) != 0 {
|
||||
print("llama_decode() failed during text generation")
|
||||
}
|
||||
llama_synchronize(context)
|
||||
}
|
||||
|
||||
let t_tg_end = ggml_time_us()
|
||||
|
||||
@@ -63,12 +63,20 @@ Now both the LLaMA part and the image encoder is in the `llava-v1.5-7b` director
|
||||
```console
|
||||
git clone https://huggingface.co/liuhaotian/llava-v1.6-vicuna-7b
|
||||
```
|
||||
2) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
|
||||
|
||||
2) Install the required Python packages:
|
||||
|
||||
```sh
|
||||
pip install -r examples/llava/requirements.txt
|
||||
```
|
||||
|
||||
3) Use `llava-surgery-v2.py` which also supports llava-1.5 variants pytorch as well as safetensor models:
|
||||
```console
|
||||
python examples/llava/llava-surgery-v2.py -C -m ../llava-v1.6-vicuna-7b/
|
||||
```
|
||||
- you will find a llava.projector and a llava.clip file in your model directory
|
||||
3) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory:
|
||||
|
||||
4) Copy the llava.clip file into a subdirectory (like vit), rename it to pytorch_model.bin and add a fitting vit configuration to the directory:
|
||||
```console
|
||||
mkdir vit
|
||||
cp ../llava-v1.6-vicuna-7b/llava.clip vit/pytorch_model.bin
|
||||
@@ -76,18 +84,18 @@ cp ../llava-v1.6-vicuna-7b/llava.projector vit/
|
||||
curl -s -q https://huggingface.co/cmp-nct/llava-1.6-gguf/raw/main/config_vit.json -o vit/config.json
|
||||
```
|
||||
|
||||
4) Create the visual gguf model:
|
||||
5) Create the visual gguf model:
|
||||
```console
|
||||
python ./examples/llava/convert-image-encoder-to-gguf.py -m vit --llava-projector vit/llava.projector --output-dir vit --clip-model-is-vision
|
||||
```
|
||||
- This is similar to llava-1.5, the difference is that we tell the encoder that we are working with the pure vision model part of CLIP
|
||||
|
||||
5) Then convert the model to gguf format:
|
||||
6) Then convert the model to gguf format:
|
||||
```console
|
||||
python ./convert.py ../llava-v1.6-vicuna-7b/ --skip-unknown
|
||||
```
|
||||
|
||||
6) And finally we can run the llava-cli using the 1.6 model version:
|
||||
7) And finally we can run the llava-cli using the 1.6 model version:
|
||||
```console
|
||||
./llava-cli -m ../llava-v1.6-vicuna-7b/ggml-model-f16.gguf --mmproj vit/mmproj-model-f16.gguf --image some-image.jpg -c 4096
|
||||
```
|
||||
|
||||
@@ -995,6 +995,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
if (!new_clip->ctx_data) {
|
||||
fprintf(stderr, "%s: ggml_init() failed\n", __func__);
|
||||
clip_free(new_clip);
|
||||
gguf_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -1002,6 +1003,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
if (!fin) {
|
||||
printf("cannot open model file for loading tensors\n");
|
||||
clip_free(new_clip);
|
||||
gguf_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
@@ -1023,6 +1025,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||
if (!fin) {
|
||||
printf("%s: failed to seek for tensor %s\n", __func__, name);
|
||||
clip_free(new_clip);
|
||||
gguf_free(ctx);
|
||||
return nullptr;
|
||||
}
|
||||
int num_bytes = ggml_nbytes(cur);
|
||||
@@ -1908,6 +1911,7 @@ bool clip_model_quantize(const char * fname_inp, const char * fname_out, const i
|
||||
break;
|
||||
default:
|
||||
printf("Please use an input file in f32 or f16\n");
|
||||
gguf_free(ctx_out);
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@@ -878,6 +878,7 @@ int main(int argc, char ** argv) {
|
||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
||||
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
||||
|
||||
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
||||
|
||||
embd_inp.insert(embd_inp.end(), line_pfx.begin(), line_pfx.end());
|
||||
|
||||
@@ -589,9 +589,10 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
|
||||
}
|
||||
}
|
||||
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if (i == 0) {
|
||||
llama_synchronize(ctx);
|
||||
const auto t_end = std::chrono::high_resolution_clock::now();
|
||||
const float t_total = std::chrono::duration<float>(t_end - t_start).count();
|
||||
fprintf(stderr, "%s: %.2f seconds per pass - ETA ", __func__, t_total);
|
||||
int total_seconds = (int)(t_total*n_chunk/n_seq);
|
||||
@@ -841,7 +842,7 @@ static void hellaswag_score(llama_context * ctx, const gpt_params & params) {
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 32;
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
@@ -1118,7 +1119,7 @@ static void winogrande_score(llama_context * ctx, const gpt_params & params) {
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 128;
|
||||
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
const int max_seq = std::min(2*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
@@ -1470,7 +1471,7 @@ static void multiple_choice_score(llama_context * ctx, const gpt_params & params
|
||||
const int n_batch = params.n_batch;
|
||||
|
||||
const int max_tasks_per_batch = 32;
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_max_seq(ctx));
|
||||
const int max_seq = std::min(4*max_tasks_per_batch, (int) llama_n_seq_max(ctx));
|
||||
|
||||
llama_batch batch = llama_batch_init(n_ctx, 0, max_seq);
|
||||
|
||||
|
||||
@@ -123,10 +123,10 @@ You can consume the endpoints with Postman or NodeJS with axios library. You can
|
||||
### Docker
|
||||
|
||||
```bash
|
||||
docker run -p 8080:8080 -v /path/to/models:/models ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080
|
||||
docker run -p 8080:8080 -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:server -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080
|
||||
|
||||
# or, with CUDA:
|
||||
docker run -p 8080:8080 -v /path/to/models:/models --gpus all ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99
|
||||
docker run -p 8080:8080 -v /path/to/models:/models --gpus all ghcr.io/ggerganov/llama.cpp:server-cuda -m models/7B/ggml-model.gguf -c 512 --host 0.0.0.0 --port 8080 --n-gpu-layers 99
|
||||
```
|
||||
|
||||
## Testing with CURL
|
||||
@@ -556,9 +556,51 @@ Run with bash:
|
||||
bash chat.sh
|
||||
```
|
||||
|
||||
### API like OAI
|
||||
### OAI-like API
|
||||
|
||||
The HTTP server supports OAI-like API
|
||||
The HTTP server supports OAI-like API: https://github.com/openai/openai-openapi
|
||||
|
||||
### API errors
|
||||
|
||||
Server returns error in the same format as OAI: https://github.com/openai/openai-openapi
|
||||
|
||||
Example of an error:
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": 401,
|
||||
"message": "Invalid API Key",
|
||||
"type": "authentication_error"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Apart from error types supported by OAI, we also have custom types that are specific to functionalities of llama.cpp:
|
||||
|
||||
**When /metrics or /slots endpoint is disabled**
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": 501,
|
||||
"message": "This server does not support metrics endpoint.",
|
||||
"type": "not_supported_error"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
**When the server receives invalid grammar via */completions endpoint**
|
||||
|
||||
```json
|
||||
{
|
||||
"error": {
|
||||
"code": 400,
|
||||
"message": "Failed to parse grammar",
|
||||
"type": "invalid_request_error"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Extending or building alternative Web Front End
|
||||
|
||||
|
||||
+235
-234
@@ -231,255 +231,256 @@ unsigned char completion_js[] = {
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65,
|
||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d,
|
||||
0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28,
|
||||
0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c,
|
||||
0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74,
|
||||
0x65, 0x6e, 0x74, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73,
|
||||
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
|
||||
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x72,
|
||||
0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74,
|
||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f,
|
||||
0x4e, 0x2e, 0x70, 0x61, 0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75,
|
||||
0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67,
|
||||
0x65, 0x2e, 0x69, 0x6e, 0x63, 0x6c, 0x75, 0x64, 0x65, 0x73, 0x28, 0x27,
|
||||
0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c,
|
||||
0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x2f, 0x2f, 0x20, 0x54, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x61,
|
||||
0x6e, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x74, 0x6f, 0x20, 0x62,
|
||||
0x65, 0x20, 0x63, 0x61, 0x75, 0x67, 0x68, 0x74, 0x20, 0x62, 0x79, 0x20,
|
||||
0x75, 0x70, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x20, 0x63, 0x61, 0x6c,
|
||||
0x6c, 0x65, 0x72, 0x73, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77,
|
||||
0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x27,
|
||||
0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61, 0x69, 0x6c,
|
||||
0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x65, 0x6c,
|
||||
0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
||||
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f,
|
||||
0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||
0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x74, 0x68, 0x72,
|
||||
0x6f, 0x77, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45, 0x72, 0x72, 0x6f, 0x72,
|
||||
0x28, 0x27, 0x73, 0x6c, 0x6f, 0x74, 0x20, 0x75, 0x6e, 0x61, 0x76, 0x61,
|
||||
0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x27, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x20, 0x65, 0x6c, 0x73, 0x65, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72,
|
||||
0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70,
|
||||
0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20, 0x5b, 0x24, 0x7b, 0x72,
|
||||
0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e,
|
||||
0x63, 0x6f, 0x64, 0x65, 0x7d, 0x20, 0x2d, 0x20, 0x24, 0x7b, 0x72, 0x65,
|
||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x74,
|
||||
0x79, 0x70, 0x65, 0x7d, 0x5d, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73,
|
||||
0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x6d, 0x65,
|
||||
0x73, 0x73, 0x61, 0x67, 0x65, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x28, 0x65, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x20,
|
||||
0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72,
|
||||
0x6f, 0x72, 0x7d, 0x60, 0x29, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x72, 0x65,
|
||||
0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65, 0x72, 0x72,
|
||||
0x6f, 0x72, 0x20, 0x3d, 0x20, 0x4a, 0x53, 0x4f, 0x4e, 0x2e, 0x70, 0x61,
|
||||
0x72, 0x73, 0x65, 0x28, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e, 0x65,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f,
|
||||
0x6c, 0x65, 0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x60, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x2e, 0x63, 0x70, 0x70, 0x20, 0x65, 0x72, 0x72, 0x6f,
|
||||
0x72, 0x3a, 0x20, 0x24, 0x7b, 0x72, 0x65, 0x73, 0x75, 0x6c, 0x74, 0x2e,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||
0x74, 0x7d, 0x60, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61,
|
||||
0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61, 0x6d, 0x65,
|
||||
0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72, 0x74, 0x45,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65, 0x2e, 0x65,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x20,
|
||||
0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20, 0x65, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79, 0x20, 0x7b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c,
|
||||
0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28, 0x29, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||
0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a,
|
||||
0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20, 0x6c,
|
||||
0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
|
||||
0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20, 0x74, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x79, 0x6f,
|
||||
0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73, 0x63, 0x72,
|
||||
0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f,
|
||||
0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70, 0x6f, 0x72,
|
||||
0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65,
|
||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d, 0x20, 0x66,
|
||||
0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f, 0x2f, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28,
|
||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65, 0x72, 0x28,
|
||||
0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20, 0x28,
|
||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f, 0x63, 0x75,
|
||||
0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65, 0x28, 0x63,
|
||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x2e,
|
||||
0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20,
|
||||
0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70,
|
||||
0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67,
|
||||
0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74,
|
||||
0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20, 0x7b,
|
||||
0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d, 0x20,
|
||||
0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x28, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28,
|
||||
0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c,
|
||||
0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72,
|
||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73,
|
||||
0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c,
|
||||
0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e,
|
||||
0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74,
|
||||
0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x20,
|
||||
0x63, 0x61, 0x74, 0x63, 0x68, 0x20, 0x28, 0x65, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x65, 0x2e, 0x6e, 0x61,
|
||||
0x6d, 0x65, 0x20, 0x21, 0x3d, 0x3d, 0x20, 0x27, 0x41, 0x62, 0x6f, 0x72,
|
||||
0x74, 0x45, 0x72, 0x72, 0x6f, 0x72, 0x27, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x6f, 0x6c, 0x65,
|
||||
0x2e, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x28, 0x22, 0x6c, 0x6c, 0x61, 0x6d,
|
||||
0x61, 0x20, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x3a, 0x20, 0x22, 0x2c, 0x20,
|
||||
0x65, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x74, 0x68, 0x72, 0x6f, 0x77, 0x20, 0x65, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x66, 0x69, 0x6e, 0x61, 0x6c, 0x6c, 0x79,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2e, 0x61, 0x62, 0x6f, 0x72, 0x74, 0x28,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x0a, 0x20, 0x20, 0x72, 0x65,
|
||||
0x74, 0x75, 0x72, 0x6e, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c,
|
||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75,
|
||||
0x72, 0x6e, 0x20, 0x61, 0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20,
|
||||
0x79, 0x6f, 0x75, 0x20, 0x63, 0x61, 0x6e, 0x20, 0x73, 0x75, 0x62, 0x73,
|
||||
0x63, 0x72, 0x69, 0x62, 0x65, 0x20, 0x74, 0x6f, 0x0a, 0x2f, 0x2f, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x69, 0x6d, 0x70,
|
||||
0x6f, 0x72, 0x74, 0x20, 0x7b, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45,
|
||||
0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x7d,
|
||||
0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x27, 0x2f, 0x63, 0x6f, 0x6d, 0x70,
|
||||
0x6c, 0x65, 0x74, 0x69, 0x6f, 0x6e, 0x2e, 0x6a, 0x73, 0x27, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
||||
0x74, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x20, 0x3d, 0x20, 0x6c, 0x6c, 0x61,
|
||||
0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f,
|
||||
0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x6e, 0x2e, 0x61, 0x64, 0x64,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x4c, 0x69, 0x73, 0x74, 0x65, 0x6e, 0x65,
|
||||
0x72, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c,
|
||||
0x20, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64, 0x6f,
|
||||
0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74, 0x65,
|
||||
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x65, 0x74, 0x61, 0x69,
|
||||
0x6c, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
|
||||
0x2f, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f, 0x0a, 0x65,
|
||||
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
||||
0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d,
|
||||
0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20,
|
||||
0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x20, 0x3d, 0x20, 0x6e, 0x65, 0x77,
|
||||
0x20, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74,
|
||||
0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
||||
0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x66,
|
||||
0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63, 0x6f,
|
||||
0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f, 0x66,
|
||||
0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||
0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
||||
0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65,
|
||||
0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73,
|
||||
0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e,
|
||||
0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65,
|
||||
0x6e, 0x74, 0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22,
|
||||
0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20,
|
||||
0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d,
|
||||
0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68,
|
||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||
0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
|
||||
0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67,
|
||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64,
|
||||
0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||
0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61,
|
||||
0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67,
|
||||
0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20,
|
||||
0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e,
|
||||
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74,
|
||||
0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61,
|
||||
0x74, 0x63, 0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77,
|
||||
0x20, 0x43, 0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74,
|
||||
0x28, 0x22, 0x6d, 0x65, 0x73, 0x73, 0x61, 0x67, 0x65, 0x22, 0x2c, 0x20,
|
||||
0x28, 0x22, 0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20,
|
||||
0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68,
|
||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x20, 0x7d, 0x29, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72,
|
||||
0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
||||
0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74,
|
||||
0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64,
|
||||
0x61, 0x74, 0x61, 0x2e, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69,
|
||||
0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20,
|
||||
0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x63,
|
||||
0x68, 0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69,
|
||||
0x6d, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61,
|
||||
0x72, 0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63,
|
||||
0x68, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43,
|
||||
0x75, 0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22,
|
||||
0x74, 0x69, 0x6d, 0x69, 0x6e, 0x67, 0x73, 0x22, 0x2c, 0x20, 0x7b, 0x20,
|
||||
0x64, 0x65, 0x74, 0x61, 0x69, 0x6c, 0x3a, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d, 0x69, 0x6e,
|
||||
0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68, 0x45, 0x76,
|
||||
0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75, 0x73, 0x74,
|
||||
0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64, 0x6f, 0x6e,
|
||||
0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61, 0x69, 0x6c,
|
||||
0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20,
|
||||
0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x28,
|
||||
0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20,
|
||||
0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65, 0x74, 0x3b,
|
||||
0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c, 0x6c, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
||||
0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20,
|
||||
0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65,
|
||||
0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6d,
|
||||
0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78, 0x74, 0x2e,
|
||||
0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73, 0x20, 0x6e,
|
||||
0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x73,
|
||||
0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f, 0x2f, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65, 0x3a, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x6c,
|
||||
0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70,
|
||||
0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28,
|
||||
0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20, 0x3d, 0x3e,
|
||||
0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69,
|
||||
0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a,
|
||||
0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a, 0x2f, 0x2f,
|
||||
0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d,
|
||||
0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72, 0x6f, 0x6d,
|
||||
0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x64,
|
||||
0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72, 0x69, 0x74,
|
||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d,
|
||||
0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70,
|
||||
0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20, 0x3d, 0x20,
|
||||
0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x20, 0x3d,
|
||||
0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77, 0x20, 0x50,
|
||||
0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79, 0x6e, 0x63,
|
||||
0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c, 0x20, 0x72,
|
||||
0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74,
|
||||
0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x74, 0x69, 0x6d,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72,
|
||||
0x67, 0x65, 0x74, 0x2e, 0x64, 0x69, 0x73, 0x70, 0x61, 0x74, 0x63, 0x68,
|
||||
0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x6e, 0x65, 0x77, 0x20, 0x43, 0x75,
|
||||
0x73, 0x74, 0x6f, 0x6d, 0x45, 0x76, 0x65, 0x6e, 0x74, 0x28, 0x22, 0x64,
|
||||
0x6f, 0x6e, 0x65, 0x22, 0x2c, 0x20, 0x7b, 0x20, 0x64, 0x65, 0x74, 0x61,
|
||||
0x69, 0x6c, 0x3a, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e,
|
||||
0x74, 0x20, 0x7d, 0x20, 0x7d, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x7d,
|
||||
0x29, 0x28, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72,
|
||||
0x6e, 0x20, 0x65, 0x76, 0x65, 0x6e, 0x74, 0x54, 0x61, 0x72, 0x67, 0x65,
|
||||
0x74, 0x3b, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x43, 0x61, 0x6c,
|
||||
0x6c, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x2c, 0x20, 0x72, 0x65, 0x74,
|
||||
0x75, 0x72, 0x6e, 0x20, 0x61, 0x20, 0x70, 0x72, 0x6f, 0x6d, 0x69, 0x73,
|
||||
0x65, 0x20, 0x74, 0x68, 0x61, 0x74, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c,
|
||||
0x76, 0x65, 0x73, 0x20, 0x74, 0x6f, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63,
|
||||
0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74, 0x65, 0x64, 0x20, 0x74, 0x65, 0x78,
|
||||
0x74, 0x2e, 0x20, 0x54, 0x68, 0x69, 0x73, 0x20, 0x64, 0x6f, 0x65, 0x73,
|
||||
0x20, 0x6e, 0x6f, 0x74, 0x20, 0x73, 0x75, 0x70, 0x70, 0x6f, 0x72, 0x74,
|
||||
0x20, 0x73, 0x74, 0x72, 0x65, 0x61, 0x6d, 0x69, 0x6e, 0x67, 0x0a, 0x2f,
|
||||
0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x45, 0x78, 0x61, 0x6d, 0x70, 0x6c, 0x65,
|
||||
0x3a, 0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65,
|
||||
0x28, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x29, 0x2e, 0x74, 0x68, 0x65,
|
||||
0x6e, 0x28, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x20,
|
||||
0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77,
|
||||
0x72, 0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x29, 0x0a,
|
||||
0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x6f, 0x72,
|
||||
0x0a, 0x2f, 0x2f, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74,
|
||||
0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x6c, 0x6c, 0x61,
|
||||
0x6d, 0x61, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x70, 0x72,
|
||||
0x6f, 0x6d, 0x70, 0x74, 0x29, 0x0a, 0x2f, 0x2f, 0x20, 0x20, 0x20, 0x20,
|
||||
0x20, 0x64, 0x6f, 0x63, 0x75, 0x6d, 0x65, 0x6e, 0x74, 0x2e, 0x77, 0x72,
|
||||
0x69, 0x74, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
||||
0x0a, 0x2f, 0x2f, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x50, 0x72,
|
||||
0x6f, 0x6d, 0x69, 0x73, 0x65, 0x20, 0x3d, 0x20, 0x28, 0x70, 0x72, 0x6f,
|
||||
0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x20,
|
||||
0x3d, 0x20, 0x7b, 0x7d, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67,
|
||||
0x20, 0x3d, 0x20, 0x7b, 0x7d, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x6e, 0x65, 0x77,
|
||||
0x20, 0x50, 0x72, 0x6f, 0x6d, 0x69, 0x73, 0x65, 0x28, 0x61, 0x73, 0x79,
|
||||
0x6e, 0x63, 0x20, 0x28, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76, 0x65, 0x2c,
|
||||
0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x6c, 0x65, 0x74, 0x20, 0x63, 0x6f,
|
||||
0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x3d, 0x20, 0x22, 0x22, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x74, 0x72, 0x79, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69,
|
||||
0x74, 0x20, 0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75,
|
||||
0x6e, 0x6b, 0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28,
|
||||
0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61,
|
||||
0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29,
|
||||
0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68,
|
||||
0x75, 0x6e, 0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x65, 0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f,
|
||||
0x6c, 0x76, 0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29,
|
||||
0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63,
|
||||
0x68, 0x20, 0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74,
|
||||
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a,
|
||||
0x0a, 0x2f, 0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70,
|
||||
0x72, 0x65, 0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f,
|
||||
0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73,
|
||||
0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c,
|
||||
0x65, 0x74, 0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20,
|
||||
0x28, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c,
|
||||
0x6c, 0x62, 0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a,
|
||||
0x20, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20,
|
||||
0x28, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b,
|
||||
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x72,
|
||||
0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73,
|
||||
0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x29, 0x29, 0x20, 0x7b,
|
||||
0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x65, 0x6e, 0x74, 0x20, 0x2b, 0x3d, 0x20, 0x63, 0x68, 0x75, 0x6e,
|
||||
0x6b, 0x2e, 0x64, 0x61, 0x74, 0x61, 0x2e, 0x63, 0x6f, 0x6e, 0x74, 0x65,
|
||||
0x6e, 0x74, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x7d, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x73, 0x6f, 0x6c, 0x76,
|
||||
0x65, 0x28, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x6e, 0x74, 0x29, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x20, 0x20, 0x7d, 0x20, 0x63, 0x61, 0x74, 0x63, 0x68, 0x20,
|
||||
0x28, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x20, 0x20, 0x72, 0x65, 0x6a, 0x65, 0x63, 0x74, 0x28, 0x65,
|
||||
0x72, 0x72, 0x6f, 0x72, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x7d,
|
||||
0x0a, 0x20, 0x20, 0x7d, 0x29, 0x3b, 0x0a, 0x7d, 0x3b, 0x0a, 0x0a, 0x2f,
|
||||
0x2a, 0x2a, 0x0a, 0x20, 0x2a, 0x20, 0x28, 0x64, 0x65, 0x70, 0x72, 0x65,
|
||||
0x63, 0x61, 0x74, 0x65, 0x64, 0x29, 0x0a, 0x20, 0x2a, 0x2f, 0x0a, 0x65,
|
||||
0x78, 0x70, 0x6f, 0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20,
|
||||
0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x43, 0x6f, 0x6d, 0x70, 0x6c, 0x65, 0x74,
|
||||
0x65, 0x20, 0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x70,
|
||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x72,
|
||||
0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x2c, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62,
|
||||
0x61, 0x63, 0x6b, 0x29, 0x20, 0x3d, 0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x66, 0x6f, 0x72, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x28, 0x63,
|
||||
0x6f, 0x6e, 0x73, 0x74, 0x20, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x20, 0x6f,
|
||||
0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61, 0x72, 0x61,
|
||||
0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c, 0x20, 0x70,
|
||||
0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63, 0x6f, 0x6e,
|
||||
0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29, 0x29, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c, 0x62, 0x61,
|
||||
0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47, 0x65, 0x74,
|
||||
0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c, 0x20, 0x69,
|
||||
0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74, 0x68, 0x65,
|
||||
0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54, 0x68, 0x69,
|
||||
0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75, 0x6c, 0x20,
|
||||
0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x20,
|
||||
0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78, 0x74, 0x20,
|
||||
0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64, 0x20, 0x73,
|
||||
0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f, 0x72, 0x74,
|
||||
0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61,
|
||||
0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20, 0x3d, 0x20,
|
||||
0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67, 0x65, 0x6e,
|
||||
0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74,
|
||||
0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20,
|
||||
0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x20,
|
||||
0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65, 0x74, 0x63,
|
||||
0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22, 0x29, 0x2e,
|
||||
0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20, 0x72, 0x2e,
|
||||
0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20, 0x20, 0x20,
|
||||
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d, 0x20, 0x70,
|
||||
0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75, 0x6c, 0x74,
|
||||
0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x20, 0x20,
|
||||
0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e, 0x20, 0x67,
|
||||
0x20, 0x6f, 0x66, 0x20, 0x6c, 0x6c, 0x61, 0x6d, 0x61, 0x28, 0x70, 0x61,
|
||||
0x72, 0x61, 0x6d, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x6d, 0x70, 0x74, 0x2c,
|
||||
0x20, 0x70, 0x61, 0x72, 0x61, 0x6d, 0x73, 0x2c, 0x20, 0x7b, 0x20, 0x63,
|
||||
0x6f, 0x6e, 0x74, 0x72, 0x6f, 0x6c, 0x6c, 0x65, 0x72, 0x20, 0x7d, 0x29,
|
||||
0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x20, 0x20, 0x63, 0x61, 0x6c, 0x6c,
|
||||
0x62, 0x61, 0x63, 0x6b, 0x28, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x29, 0x3b,
|
||||
0x0a, 0x20, 0x20, 0x7d, 0x0a, 0x7d, 0x0a, 0x0a, 0x2f, 0x2f, 0x20, 0x47,
|
||||
0x65, 0x74, 0x20, 0x74, 0x68, 0x65, 0x20, 0x6d, 0x6f, 0x64, 0x65, 0x6c,
|
||||
0x20, 0x69, 0x6e, 0x66, 0x6f, 0x20, 0x66, 0x72, 0x6f, 0x6d, 0x20, 0x74,
|
||||
0x68, 0x65, 0x20, 0x73, 0x65, 0x72, 0x76, 0x65, 0x72, 0x2e, 0x20, 0x54,
|
||||
0x68, 0x69, 0x73, 0x20, 0x69, 0x73, 0x20, 0x75, 0x73, 0x65, 0x66, 0x75,
|
||||
0x6c, 0x20, 0x66, 0x6f, 0x72, 0x20, 0x67, 0x65, 0x74, 0x74, 0x69, 0x6e,
|
||||
0x67, 0x20, 0x74, 0x68, 0x65, 0x20, 0x63, 0x6f, 0x6e, 0x74, 0x65, 0x78,
|
||||
0x74, 0x20, 0x77, 0x69, 0x6e, 0x64, 0x6f, 0x77, 0x20, 0x61, 0x6e, 0x64,
|
||||
0x20, 0x73, 0x6f, 0x20, 0x6f, 0x6e, 0x2e, 0x0a, 0x65, 0x78, 0x70, 0x6f,
|
||||
0x72, 0x74, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x6c, 0x6c, 0x61,
|
||||
0x6d, 0x61, 0x4d, 0x6f, 0x64, 0x65, 0x6c, 0x49, 0x6e, 0x66, 0x6f, 0x20,
|
||||
0x3d, 0x20, 0x61, 0x73, 0x79, 0x6e, 0x63, 0x20, 0x28, 0x29, 0x20, 0x3d,
|
||||
0x3e, 0x20, 0x7b, 0x0a, 0x20, 0x20, 0x69, 0x66, 0x20, 0x28, 0x21, 0x67,
|
||||
0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f, 0x73, 0x65,
|
||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||
0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x29, 0x20, 0x7b, 0x0a, 0x20, 0x20,
|
||||
0x20, 0x20, 0x63, 0x6f, 0x6e, 0x73, 0x74, 0x20, 0x70, 0x72, 0x6f, 0x70,
|
||||
0x73, 0x20, 0x3d, 0x20, 0x61, 0x77, 0x61, 0x69, 0x74, 0x20, 0x66, 0x65,
|
||||
0x74, 0x63, 0x68, 0x28, 0x22, 0x2f, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x22,
|
||||
0x29, 0x2e, 0x74, 0x68, 0x65, 0x6e, 0x28, 0x72, 0x20, 0x3d, 0x3e, 0x20,
|
||||
0x72, 0x2e, 0x6a, 0x73, 0x6f, 0x6e, 0x28, 0x29, 0x29, 0x3b, 0x0a, 0x20,
|
||||
0x20, 0x20, 0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
||||
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x20, 0x3d,
|
||||
0x20, 0x70, 0x72, 0x6f, 0x70, 0x73, 0x2e, 0x64, 0x65, 0x66, 0x61, 0x75,
|
||||
0x6c, 0x74, 0x5f, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f,
|
||||
0x6e, 0x5f, 0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a,
|
||||
0x20, 0x20, 0x7d, 0x0a, 0x20, 0x20, 0x72, 0x65, 0x74, 0x75, 0x72, 0x6e,
|
||||
0x20, 0x67, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x5f,
|
||||
0x73, 0x65, 0x74, 0x74, 0x69, 0x6e, 0x67, 0x73, 0x3b, 0x0a, 0x7d, 0x0a
|
||||
};
|
||||
unsigned int completion_js_len = 5782;
|
||||
unsigned int completion_js_len = 5796;
|
||||
|
||||
@@ -96,18 +96,18 @@ export async function* llama(prompt, params = {}, config = {}) {
|
||||
}
|
||||
}
|
||||
if (result.error) {
|
||||
result.error = JSON.parse(result.error);
|
||||
if (result.error.content.includes('slot unavailable')) {
|
||||
// Throw an error to be caught by upstream callers
|
||||
throw new Error('slot unavailable');
|
||||
} else {
|
||||
console.error(`llama.cpp error: ${result.error.content}`);
|
||||
try {
|
||||
result.error = JSON.parse(result.error);
|
||||
if (result.error.message.includes('slot unavailable')) {
|
||||
// Throw an error to be caught by upstream callers
|
||||
throw new Error('slot unavailable');
|
||||
} else {
|
||||
console.error(`llama.cpp error [${result.error.code} - ${result.error.type}]: ${result.error.message}`);
|
||||
}
|
||||
} catch(e) {
|
||||
console.error(`llama.cpp error ${result.error}`)
|
||||
}
|
||||
}
|
||||
if (result.error) {
|
||||
result.error = JSON.parse(result.error);
|
||||
console.error(`llama.cpp error: ${result.error.content}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+142
-128
@@ -147,7 +147,7 @@ struct server_slot {
|
||||
int32_t n_decoded = 0;
|
||||
int32_t n_remaining = -1;
|
||||
int32_t i_batch = -1;
|
||||
int32_t n_predict = -1;
|
||||
int32_t n_predict = -1; // TODO: disambiguate from params.n_predict
|
||||
|
||||
int32_t n_prompt_tokens = 0;
|
||||
int32_t n_prompt_tokens_processed = 0;
|
||||
@@ -396,7 +396,7 @@ struct server_queue {
|
||||
// callback functions
|
||||
std::function<void(server_task &)> callback_new_task;
|
||||
std::function<void(server_task_multi &)> callback_finish_multitask;
|
||||
std::function<void(void)> callback_run_slots;
|
||||
std::function<void(void)> callback_update_slots;
|
||||
|
||||
// Add a new task to the end of the queue
|
||||
int post(server_task task) {
|
||||
@@ -435,8 +435,8 @@ struct server_queue {
|
||||
}
|
||||
|
||||
// Register the function to be called when all slots data is ready to be processed
|
||||
void on_run_slots(std::function<void(void)> callback) {
|
||||
callback_run_slots = std::move(callback);
|
||||
void on_update_slots(std::function<void(void)> callback) {
|
||||
callback_update_slots = std::move(callback);
|
||||
}
|
||||
|
||||
// Call when the state of one slot is changed
|
||||
@@ -461,7 +461,7 @@ struct server_queue {
|
||||
* - Wait until a new task arrives
|
||||
* - Process the task (i.e. maybe copy data into slot)
|
||||
* - Check if multitask is finished
|
||||
* - Run all slots
|
||||
* - Update all slots
|
||||
*/
|
||||
void start_loop() {
|
||||
running = true;
|
||||
@@ -499,9 +499,9 @@ struct server_queue {
|
||||
}
|
||||
|
||||
// all tasks in the current loop is processed, slots data is now ready
|
||||
LOG_VERBOSE("callback_run_slots", {});
|
||||
LOG_VERBOSE("callback_update_slots", {});
|
||||
|
||||
callback_run_slots();
|
||||
callback_update_slots();
|
||||
|
||||
LOG_VERBOSE("wait for new task", {});
|
||||
{
|
||||
@@ -739,7 +739,13 @@ struct server_context {
|
||||
default_generation_settings_for_props = get_formated_generation(slots.front());
|
||||
default_generation_settings_for_props["seed"] = -1;
|
||||
|
||||
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
|
||||
// the update_slots() logic will always submit a maximum of n_batch tokens
|
||||
// note that n_batch can be > n_ctx (e.g. for non-causal attention models such as BERT where the KV cache is not used)
|
||||
{
|
||||
const int32_t n_batch = llama_n_batch(ctx);
|
||||
|
||||
batch = llama_batch_init(n_batch, 0, params.n_parallel);
|
||||
}
|
||||
|
||||
metrics.init();
|
||||
}
|
||||
@@ -805,9 +811,10 @@ struct server_context {
|
||||
return last_used;
|
||||
}
|
||||
|
||||
bool launch_slot_with_data(server_slot & slot, json data) const {
|
||||
bool launch_slot_with_task(server_slot & slot, const server_task & task) {
|
||||
slot_params default_params;
|
||||
llama_sampling_params default_sparams;
|
||||
auto & data = task.data;
|
||||
|
||||
if (data.count("__oaicompat") != 0) {
|
||||
slot.oaicompat = true;
|
||||
@@ -864,10 +871,15 @@ struct server_context {
|
||||
{
|
||||
const auto & prompt = data.find("prompt");
|
||||
if (prompt == data.end()) {
|
||||
slot.prompt = "";
|
||||
send_error(task, "Either \"prompt\" or \"messages\" must be provided", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
} else {
|
||||
slot.prompt = *prompt;
|
||||
}
|
||||
if (slot.prompt.is_array() && slot.prompt.size() == 0) {
|
||||
send_error(task, "\"prompt\" cannot be an empty array", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// penalize user-provided tokens
|
||||
@@ -926,6 +938,7 @@ struct server_context {
|
||||
if (logit_bias != data.end() && logit_bias->is_array()) {
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
for (const auto & el : *logit_bias) {
|
||||
// TODO: we may want to throw errors here, in case "el" is incorrect
|
||||
if (el.is_array() && el.size() == 2) {
|
||||
float bias;
|
||||
if (el[1].is_number()) {
|
||||
@@ -985,6 +998,11 @@ struct server_context {
|
||||
llama_sampling_free(slot.ctx_sampling);
|
||||
}
|
||||
slot.ctx_sampling = llama_sampling_init(slot.sparams);
|
||||
if (slot.ctx_sampling == nullptr) {
|
||||
// for now, the only error that may happen here is invalid grammar
|
||||
send_error(task, "Failed to parse grammar", ERROR_TYPE_INVALID_REQUEST);
|
||||
return false;
|
||||
}
|
||||
llama_set_rng_seed(ctx, slot.params.seed);
|
||||
}
|
||||
|
||||
@@ -1024,8 +1042,10 @@ struct server_context {
|
||||
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch) {
|
||||
const int32_t n_tokens = std::min(params.n_batch, (int32_t) (batch.n_tokens - i));
|
||||
const int32_t n_batch = llama_n_batch(ctx);
|
||||
|
||||
for (int32_t i = 0; i < batch.n_tokens; i += n_batch) {
|
||||
const int32_t n_tokens = std::min(params.n_batch, batch.n_tokens - i);
|
||||
llama_batch batch_view = {
|
||||
n_tokens,
|
||||
batch.token + i,
|
||||
@@ -1214,7 +1234,7 @@ struct server_context {
|
||||
{"mirostat_eta", slot.sparams.mirostat_eta},
|
||||
{"penalize_nl", slot.sparams.penalize_nl},
|
||||
{"stop", slot.params.antiprompt},
|
||||
{"n_predict", slot.params.n_predict},
|
||||
{"n_predict", slot.params.n_predict}, // TODO: fix duplicate key n_predict
|
||||
{"n_keep", params.n_keep},
|
||||
{"ignore_eos", ignore_eos},
|
||||
{"stream", slot.params.stream},
|
||||
@@ -1226,15 +1246,23 @@ struct server_context {
|
||||
};
|
||||
}
|
||||
|
||||
void send_error(const server_task & task, const std::string & error) {
|
||||
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
|
||||
void send_error(const server_task & task, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
|
||||
send_error(task.id, task.id_multi, error, type);
|
||||
}
|
||||
|
||||
void send_error(const server_slot & slot, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
|
||||
send_error(slot.id_task, slot.id_multi, error, type);
|
||||
}
|
||||
|
||||
void send_error(const int id_task, const int id_multi, const std::string & error, const enum error_type type = ERROR_TYPE_SERVER) {
|
||||
LOG_TEE("task %i - error: %s\n", id_task, error.c_str());
|
||||
|
||||
server_task_result res;
|
||||
res.id = task.id;
|
||||
res.id_multi = task.id_multi;
|
||||
res.id = id_task;
|
||||
res.id_multi = id_multi;
|
||||
res.stop = false;
|
||||
res.error = true;
|
||||
res.data = { { "content", error } };
|
||||
res.data = format_error_response(error, type);
|
||||
|
||||
queue_results.send(res);
|
||||
}
|
||||
@@ -1468,9 +1496,8 @@ struct server_context {
|
||||
slot->infill = task.infill;
|
||||
slot->embedding = task.embedding;
|
||||
|
||||
if (!launch_slot_with_data(*slot, task.data)) {
|
||||
// send error result
|
||||
send_error(task, "internal_error");
|
||||
if (!launch_slot_with_task(*slot, task)) {
|
||||
LOG_ERROR("error while launching slot", task.data);
|
||||
break;
|
||||
}
|
||||
} break;
|
||||
@@ -1587,7 +1614,7 @@ struct server_context {
|
||||
queue_results.send(result);
|
||||
}
|
||||
|
||||
bool update_slots() {
|
||||
void update_slots() {
|
||||
if (system_need_update) {
|
||||
system_prompt_update();
|
||||
}
|
||||
@@ -1630,7 +1657,7 @@ struct server_context {
|
||||
kv_cache_clear();
|
||||
}
|
||||
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1719,7 +1746,8 @@ struct server_context {
|
||||
}
|
||||
|
||||
// process in chunks of params.n_batch
|
||||
int32_t n_batch = params.n_batch;
|
||||
int32_t n_batch = llama_n_batch(ctx);
|
||||
int32_t n_ubatch = llama_n_ubatch(ctx);
|
||||
|
||||
// next, batch any pending prompts without exceeding n_batch
|
||||
if (params.cont_batching || batch.n_tokens == 0) {
|
||||
@@ -1792,7 +1820,7 @@ struct server_context {
|
||||
|
||||
if (slot.embedding) {
|
||||
// this prompt is too large to process - discard it
|
||||
if (slot.n_prompt_tokens > n_batch) {
|
||||
if (slot.n_prompt_tokens > n_ubatch) {
|
||||
slot.state = SLOT_STATE_PROCESSING;
|
||||
slot.command = SLOT_COMMAND_NONE;
|
||||
slot.release();
|
||||
@@ -1975,8 +2003,7 @@ struct server_context {
|
||||
|
||||
if (batch.n_tokens == 0) {
|
||||
LOG_VERBOSE("no tokens to decode", {});
|
||||
|
||||
return true;
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_VERBOSE("decoding batch", {
|
||||
@@ -2033,7 +2060,13 @@ struct server_context {
|
||||
if (n_batch == 1 || ret < 0) {
|
||||
// if you get here, it means the KV cache is full - try increasing it via the context size
|
||||
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
|
||||
return false;
|
||||
for (auto & slot : slots) {
|
||||
slot.state = SLOT_STATE_PROCESSING;
|
||||
slot.command = SLOT_COMMAND_NONE;
|
||||
slot.release();
|
||||
send_error(slot, "Input prompt is too big compared to KV size. Please try increasing KV size.");
|
||||
}
|
||||
break; // break loop of n_batch
|
||||
}
|
||||
|
||||
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
|
||||
@@ -2042,12 +2075,12 @@ struct server_context {
|
||||
n_batch /= 2;
|
||||
i -= n_batch;
|
||||
|
||||
continue;
|
||||
continue; // continue loop of n_batch
|
||||
}
|
||||
|
||||
for (auto & slot : slots) {
|
||||
if (slot.state != SLOT_STATE_PROCESSING || slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens)) {
|
||||
continue;
|
||||
continue; // continue loop of slots
|
||||
}
|
||||
|
||||
// prompt evaluated for embedding
|
||||
@@ -2055,7 +2088,7 @@ struct server_context {
|
||||
send_embedding(slot, batch_view);
|
||||
slot.release();
|
||||
slot.i_batch = -1;
|
||||
continue;
|
||||
continue; // continue loop of slots
|
||||
}
|
||||
|
||||
completion_token_output result;
|
||||
@@ -2097,9 +2130,7 @@ struct server_context {
|
||||
}
|
||||
}
|
||||
|
||||
LOG_VERBOSE("slots updated", {});
|
||||
|
||||
return true;
|
||||
LOG_VERBOSE("run slots completed", {});
|
||||
}
|
||||
|
||||
json model_meta() const {
|
||||
@@ -2135,7 +2166,8 @@ static void server_print_usage(const char * argv0, const gpt_params & params, co
|
||||
printf(" --pooling {none,mean,cls} pooling type for embeddings, use model default if unspecified\n");
|
||||
printf(" -dt N, --defrag-thold N\n");
|
||||
printf(" KV cache defragmentation threshold (default: %.1f, < 0 - disabled)\n", params.defrag_thold);
|
||||
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
|
||||
printf(" -b N, --batch-size N logical maximum batch size (default: %d)\n", params.n_batch);
|
||||
printf(" -ub N, --ubatch-size N physical maximum batch size (default: %d)\n", params.n_ubatch);
|
||||
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
|
||||
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
|
||||
if (llama_supports_mlock()) {
|
||||
@@ -2402,6 +2434,12 @@ static void server_params_parse(int argc, char ** argv, server_params & sparams,
|
||||
break;
|
||||
}
|
||||
params.n_batch = std::stoi(argv[i]);
|
||||
} else if (arg == "-ub" || arg == "--ubatch-size") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
break;
|
||||
}
|
||||
params.n_ubatch = std::stoi(argv[i]);
|
||||
} else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers") {
|
||||
if (++i >= argc) {
|
||||
invalid_param = true;
|
||||
@@ -2741,36 +2779,37 @@ int main(int argc, char ** argv) {
|
||||
res.set_header("Access-Control-Allow-Credentials", "true");
|
||||
res.set_header("Access-Control-Allow-Methods", "POST");
|
||||
res.set_header("Access-Control-Allow-Headers", "*");
|
||||
return res.set_content("", "application/json; charset=utf-8");
|
||||
});
|
||||
|
||||
svr->set_logger(log_server_request);
|
||||
|
||||
svr->set_exception_handler([](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) {
|
||||
const char fmt[] = "500 Internal Server Error\n%s";
|
||||
auto res_error = [](httplib::Response & res, json error_data) {
|
||||
json final_response {{"error", error_data}};
|
||||
res.set_content(final_response.dump(), "application/json; charset=utf-8");
|
||||
res.status = json_value(error_data, "code", 500);
|
||||
};
|
||||
|
||||
char buf[BUFSIZ];
|
||||
svr->set_exception_handler([&res_error](const httplib::Request &, httplib::Response & res, std::exception_ptr ep) {
|
||||
std::string message;
|
||||
try {
|
||||
std::rethrow_exception(std::move(ep));
|
||||
} catch (std::exception &e) {
|
||||
snprintf(buf, sizeof(buf), fmt, e.what());
|
||||
} catch (std::exception & e) {
|
||||
message = e.what();
|
||||
} catch (...) {
|
||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||
message = "Unknown Exception";
|
||||
}
|
||||
|
||||
res.set_content(buf, "text/plain; charset=utf-8");
|
||||
res.status = 500;
|
||||
json formatted_error = format_error_response(message, ERROR_TYPE_SERVER);
|
||||
LOG_VERBOSE("Got exception", formatted_error);
|
||||
res_error(res, formatted_error);
|
||||
});
|
||||
|
||||
svr->set_error_handler([](const httplib::Request &, httplib::Response & res) {
|
||||
if (res.status == 401) {
|
||||
res.set_content("Unauthorized", "text/plain; charset=utf-8");
|
||||
}
|
||||
if (res.status == 400) {
|
||||
res.set_content("Invalid request", "text/plain; charset=utf-8");
|
||||
}
|
||||
svr->set_error_handler([&res_error](const httplib::Request &, httplib::Response & res) {
|
||||
if (res.status == 404) {
|
||||
res.set_content("File Not Found", "text/plain; charset=utf-8");
|
||||
res_error(res, format_error_response("File Not Found", ERROR_TYPE_NOT_FOUND));
|
||||
}
|
||||
// for other error codes, we skip processing here because it's already done by res_error()
|
||||
});
|
||||
|
||||
// set timeouts and change hostname and port
|
||||
@@ -2835,7 +2874,7 @@ int main(int argc, char ** argv) {
|
||||
// Middlewares
|
||||
//
|
||||
|
||||
auto middleware_validate_api_key = [&sparams](const httplib::Request & req, httplib::Response & res) {
|
||||
auto middleware_validate_api_key = [&sparams, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
// TODO: should we apply API key to all endpoints, including "/health" and "/models"?
|
||||
static const std::set<std::string> protected_endpoints = {
|
||||
"/props",
|
||||
@@ -2876,8 +2915,7 @@ int main(int argc, char ** argv) {
|
||||
// API key is invalid or not provided
|
||||
// TODO: make another middleware for CORS related logic
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
res.set_content("Unauthorized: Invalid API Key", "text/plain; charset=utf-8");
|
||||
res.status = 401; // Unauthorized
|
||||
res_error(res, format_error_response("Invalid API Key", ERROR_TYPE_AUTHENTICATION));
|
||||
|
||||
LOG_WARNING("Unauthorized: Invalid API Key", {});
|
||||
|
||||
@@ -2940,21 +2978,18 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
case SERVER_STATE_LOADING_MODEL:
|
||||
{
|
||||
res.set_content(R"({"status": "loading model"})", "application/json");
|
||||
res.status = 503; // HTTP Service Unavailable
|
||||
res_error(res, format_error_response("Loading model", ERROR_TYPE_UNAVAILABLE));
|
||||
} break;
|
||||
case SERVER_STATE_ERROR:
|
||||
{
|
||||
res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json");
|
||||
res.status = 500; // HTTP Internal Server Error
|
||||
res_error(res, format_error_response("Model failed to load", ERROR_TYPE_SERVER));
|
||||
} break;
|
||||
}
|
||||
};
|
||||
|
||||
const auto handle_slots = [&](const httplib::Request &, httplib::Response & res) {
|
||||
if (!sparams.slots_endpoint) {
|
||||
res.status = 501;
|
||||
res.set_content("This server does not support slots endpoint.", "text/plain; charset=utf-8");
|
||||
res_error(res, format_error_response("This server does not support slots endpoint.", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2978,8 +3013,7 @@ int main(int argc, char ** argv) {
|
||||
|
||||
const auto handle_metrics = [&](const httplib::Request &, httplib::Response & res) {
|
||||
if (!sparams.metrics_endpoint) {
|
||||
res.status = 501;
|
||||
res.set_content("This server does not support metrics endpoint.", "text/plain; charset=utf-8");
|
||||
res_error(res, format_error_response("This server does not support metrics endpoint.", ERROR_TYPE_NOT_SUPPORTED));
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -3090,7 +3124,7 @@ int main(int argc, char ** argv) {
|
||||
res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
};
|
||||
|
||||
const auto handle_completions = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_completions = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
|
||||
json data = json::parse(req.body);
|
||||
@@ -3105,8 +3139,7 @@ int main(int argc, char ** argv) {
|
||||
if (!result.error && result.stop) {
|
||||
res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
|
||||
} else {
|
||||
res.status = 500;
|
||||
res.set_content(result.data["content"], "text/plain; charset=utf-8");
|
||||
res_error(res, result.data);
|
||||
}
|
||||
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
@@ -3186,7 +3219,7 @@ int main(int argc, char ** argv) {
|
||||
res.set_content(models.dump(), "application/json; charset=utf-8");
|
||||
};
|
||||
|
||||
const auto handle_chat_completions = [&ctx_server, &sparams](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_chat_completions = [&ctx_server, &sparams, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
json data = oaicompat_completion_params_parse(ctx_server.model, json::parse(req.body), sparams.chat_template);
|
||||
|
||||
@@ -3195,24 +3228,24 @@ int main(int argc, char ** argv) {
|
||||
ctx_server.queue_results.add_waiting_task_id(id_task);
|
||||
ctx_server.request_completion(id_task, -1, data, false, false);
|
||||
|
||||
const auto completion_id = gen_chatcmplid();
|
||||
if (!json_value(data, "stream", false)) {
|
||||
server_task_result result = ctx_server.queue_results.recv(id_task);
|
||||
|
||||
if (!result.error && result.stop) {
|
||||
json result_oai = format_final_response_oaicompat(data, result.data);
|
||||
json result_oai = format_final_response_oaicompat(data, result.data, completion_id);
|
||||
|
||||
res.set_content(result_oai.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
|
||||
} else {
|
||||
res.status = 500;
|
||||
res.set_content(result.data["content"], "text/plain; charset=utf-8");
|
||||
res_error(res, result.data);
|
||||
}
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
} else {
|
||||
const auto chunked_content_provider = [id_task, &ctx_server](size_t, httplib::DataSink & sink) {
|
||||
const auto chunked_content_provider = [id_task, &ctx_server, completion_id](size_t, httplib::DataSink & sink) {
|
||||
while (true) {
|
||||
server_task_result result = ctx_server.queue_results.recv(id_task);
|
||||
if (!result.error) {
|
||||
std::vector<json> result_array = format_partial_response_oaicompat(result.data);
|
||||
std::vector<json> result_array = format_partial_response_oaicompat(result.data, completion_id);
|
||||
|
||||
for (auto it = result_array.begin(); it != result_array.end(); ++it) {
|
||||
if (!it->empty()) {
|
||||
@@ -3258,7 +3291,7 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
};
|
||||
|
||||
const auto handle_infill = [&ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_infill = [&ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
|
||||
json data = json::parse(req.body);
|
||||
@@ -3273,8 +3306,7 @@ int main(int argc, char ** argv) {
|
||||
if (!result.error && result.stop) {
|
||||
res.set_content(result.data.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
|
||||
} else {
|
||||
res.status = 404;
|
||||
res.set_content(result.data["content"], "text/plain; charset=utf-8");
|
||||
res_error(res, result.data);
|
||||
}
|
||||
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
@@ -3345,7 +3377,7 @@ int main(int argc, char ** argv) {
|
||||
return res.set_content(data.dump(), "application/json; charset=utf-8");
|
||||
};
|
||||
|
||||
const auto handle_embeddings = [¶ms, &ctx_server](const httplib::Request & req, httplib::Response & res) {
|
||||
const auto handle_embeddings = [¶ms, &ctx_server, &res_error](const httplib::Request & req, httplib::Response & res) {
|
||||
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
|
||||
if (!params.embedding) {
|
||||
res.status = 501;
|
||||
@@ -3356,65 +3388,58 @@ int main(int argc, char ** argv) {
|
||||
const json body = json::parse(req.body);
|
||||
bool is_openai = false;
|
||||
|
||||
// an input prompt can string or a list of tokens (integer)
|
||||
std::vector<json> prompts;
|
||||
// an input prompt can be a string or a list of tokens (integer)
|
||||
json prompt;
|
||||
if (body.count("input") != 0) {
|
||||
is_openai = true;
|
||||
if (body["input"].is_array()) {
|
||||
// support multiple prompts
|
||||
for (const json & elem : body["input"]) {
|
||||
prompts.push_back(elem);
|
||||
}
|
||||
} else {
|
||||
// single input prompt
|
||||
prompts.push_back(body["input"]);
|
||||
}
|
||||
prompt = body["input"];
|
||||
} else if (body.count("content") != 0) {
|
||||
// only support single prompt here
|
||||
std::string content = body["content"];
|
||||
prompts.push_back(content);
|
||||
// with "content", we only support single prompt
|
||||
prompt = std::vector<std::string>{body["content"]};
|
||||
} else {
|
||||
// TODO @ngxson : should return an error here
|
||||
prompts.push_back("");
|
||||
res_error(res, format_error_response("\"input\" or \"content\" must be provided", ERROR_TYPE_INVALID_REQUEST));
|
||||
return;
|
||||
}
|
||||
|
||||
// process all prompts
|
||||
json responses = json::array();
|
||||
for (auto & prompt : prompts) {
|
||||
// TODO @ngxson : maybe support multitask for this endpoint?
|
||||
// create and queue the task
|
||||
// create and queue the task
|
||||
json responses;
|
||||
{
|
||||
const int id_task = ctx_server.queue_tasks.get_new_id();
|
||||
|
||||
ctx_server.queue_results.add_waiting_task_id(id_task);
|
||||
ctx_server.request_completion(id_task, -1, { {"prompt", prompt}, { "n_predict", 0}}, false, true);
|
||||
ctx_server.request_completion(id_task, -1, {{"prompt", prompt}}, false, true);
|
||||
|
||||
// get the result
|
||||
server_task_result result = ctx_server.queue_results.recv(id_task);
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
|
||||
// append to the responses
|
||||
responses.push_back(result.data);
|
||||
if (!result.error) {
|
||||
if (result.data.count("results")) {
|
||||
// result for multi-task
|
||||
responses = result.data["results"];
|
||||
} else {
|
||||
// result for single task
|
||||
responses = std::vector<json>{result.data};
|
||||
}
|
||||
} else {
|
||||
// error received, ignore everything else
|
||||
res_error(res, result.data);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// write JSON response
|
||||
json root;
|
||||
if (is_openai) {
|
||||
json res_oai = json::array();
|
||||
int i = 0;
|
||||
for (auto & elem : responses) {
|
||||
res_oai.push_back(json{
|
||||
{"embedding", json_value(elem, "embedding", json::array())},
|
||||
{"index", i++},
|
||||
{"object", "embedding"}
|
||||
});
|
||||
}
|
||||
root = format_embeddings_response_oaicompat(body, res_oai);
|
||||
} else {
|
||||
root = responses[0];
|
||||
}
|
||||
json root = is_openai
|
||||
? format_embeddings_response_oaicompat(body, responses)
|
||||
: responses[0];
|
||||
return res.set_content(root.dump(), "application/json; charset=utf-8");
|
||||
};
|
||||
|
||||
auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
|
||||
return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(content), len, mime_type);
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
//
|
||||
// Router
|
||||
//
|
||||
@@ -3426,17 +3451,6 @@ int main(int argc, char ** argv) {
|
||||
}
|
||||
|
||||
// using embedded static files
|
||||
auto handle_static_file = [](unsigned char * content, size_t len, const char * mime_type) {
|
||||
return [content, len, mime_type](const httplib::Request &, httplib::Response & res) {
|
||||
res.set_content(reinterpret_cast<const char*>(content), len, mime_type);
|
||||
return false;
|
||||
};
|
||||
};
|
||||
|
||||
svr->Options(R"(/.*)", [](const httplib::Request &, httplib::Response & res) {
|
||||
// TODO @ngxson : I have no idea what it is... maybe this is redundant?
|
||||
return res.set_content("", "application/json; charset=utf-8");
|
||||
});
|
||||
svr->Get("/", handle_static_file(index_html, index_html_len, "text/html; charset=utf-8"));
|
||||
svr->Get("/index.js", handle_static_file(index_js, index_js_len, "text/javascript; charset=utf-8"));
|
||||
svr->Get("/completion.js", handle_static_file(completion_js, completion_js_len, "text/javascript; charset=utf-8"));
|
||||
@@ -3487,7 +3501,7 @@ int main(int argc, char ** argv) {
|
||||
&server_context::process_single_task, &ctx_server, std::placeholders::_1));
|
||||
ctx_server.queue_tasks.on_finish_multitask(std::bind(
|
||||
&server_context::on_finish_multitask, &ctx_server, std::placeholders::_1));
|
||||
ctx_server.queue_tasks.on_run_slots(std::bind(
|
||||
ctx_server.queue_tasks.on_update_slots(std::bind(
|
||||
&server_context::update_slots, &ctx_server));
|
||||
ctx_server.queue_results.on_multitask_update(std::bind(
|
||||
&server_queue::update_multitask,
|
||||
|
||||
@@ -9,6 +9,7 @@ Feature: llama.cpp server
|
||||
And 42 as server seed
|
||||
And 2 slots
|
||||
And 1024 as batch size
|
||||
And 1024 as ubatch size
|
||||
And 2048 KV cache size
|
||||
And embeddings extraction
|
||||
Then the server is starting
|
||||
|
||||
@@ -33,6 +33,7 @@ def step_server_config(context, server_fqdn, server_port):
|
||||
|
||||
context.model_alias = None
|
||||
context.n_batch = None
|
||||
context.n_ubatch = None
|
||||
context.n_ctx = None
|
||||
context.n_ga = None
|
||||
context.n_ga_w = None
|
||||
@@ -118,6 +119,10 @@ def step_server_metrics(context):
|
||||
def step_start_server(context):
|
||||
start_server_background(context)
|
||||
attempts = 0
|
||||
max_attempts = 20
|
||||
if 'GITHUB_ACTIONS' in os.environ:
|
||||
max_attempts *= 2
|
||||
|
||||
while True:
|
||||
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
|
||||
result = sock.connect_ex((context.server_fqdn, context.server_port))
|
||||
@@ -125,7 +130,7 @@ def step_start_server(context):
|
||||
print("\x1b[33;46mserver started!\x1b[0m")
|
||||
return
|
||||
attempts += 1
|
||||
if attempts > 20:
|
||||
if attempts > max_attempts:
|
||||
assert False, "server not started"
|
||||
print(f"waiting for server to start, connect error code = {result}...")
|
||||
time.sleep(0.1)
|
||||
@@ -278,6 +283,11 @@ def step_n_batch(context, n_batch):
|
||||
context.n_batch = n_batch
|
||||
|
||||
|
||||
@step('{n_ubatch:d} as ubatch size')
|
||||
def step_n_ubatch(context, n_ubatch):
|
||||
context.n_ubatch = n_ubatch
|
||||
|
||||
|
||||
@step('{seed:d} as seed')
|
||||
def step_seed(context, seed):
|
||||
context.seed = seed
|
||||
@@ -801,7 +811,7 @@ async def oai_chat_completions(user_prompt,
|
||||
stream=enable_streaming,
|
||||
seed=seed
|
||||
)
|
||||
except openai.error.APIError as e:
|
||||
except openai.error.AuthenticationError as e:
|
||||
if expect_api_error is not None and expect_api_error:
|
||||
return 401
|
||||
else:
|
||||
@@ -937,6 +947,9 @@ async def wait_for_health_status(context,
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}\n")
|
||||
interval = 0.5
|
||||
counter = 0
|
||||
if 'GITHUB_ACTIONS' in os.environ:
|
||||
timeout *= 2
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
while True:
|
||||
async with await session.get(f'{base_url}/health', params=params) as health_response:
|
||||
@@ -1029,6 +1042,8 @@ def start_server_background(context):
|
||||
]
|
||||
if context.n_batch:
|
||||
server_args.extend(['--batch-size', context.n_batch])
|
||||
if context.n_ubatch:
|
||||
server_args.extend(['--ubatch-size', context.n_ubatch])
|
||||
if context.n_gpu_layer:
|
||||
server_args.extend(['--n-gpu-layers', context.n_gpu_layer])
|
||||
if context.server_continuous_batching:
|
||||
|
||||
@@ -14,6 +14,17 @@
|
||||
|
||||
using json = nlohmann::json;
|
||||
|
||||
// https://community.openai.com/t/openai-chat-list-of-error-codes-and-types/357791/11
|
||||
enum error_type {
|
||||
ERROR_TYPE_INVALID_REQUEST,
|
||||
ERROR_TYPE_AUTHENTICATION,
|
||||
ERROR_TYPE_SERVER,
|
||||
ERROR_TYPE_NOT_FOUND,
|
||||
ERROR_TYPE_PERMISSION,
|
||||
ERROR_TYPE_UNAVAILABLE, // custom error
|
||||
ERROR_TYPE_NOT_SUPPORTED, // custom error
|
||||
};
|
||||
|
||||
extern bool server_verbose;
|
||||
extern bool server_log_json;
|
||||
|
||||
@@ -378,7 +389,7 @@ static json oaicompat_completion_params_parse(
|
||||
return llama_params;
|
||||
}
|
||||
|
||||
static json format_final_response_oaicompat(const json & request, json result, bool streaming = false) {
|
||||
static json format_final_response_oaicompat(const json & request, json result, const std::string & completion_id, bool streaming = false) {
|
||||
bool stopped_word = result.count("stopped_word") != 0;
|
||||
bool stopped_eos = json_value(result, "stopped_eos", false);
|
||||
int num_tokens_predicted = json_value(result, "tokens_predicted", 0);
|
||||
@@ -412,7 +423,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
|
||||
{"prompt_tokens", num_prompt_tokens},
|
||||
{"total_tokens", num_tokens_predicted + num_prompt_tokens}
|
||||
}},
|
||||
{"id", gen_chatcmplid()}
|
||||
{"id", completion_id}
|
||||
};
|
||||
|
||||
if (server_verbose) {
|
||||
@@ -427,7 +438,7 @@ static json format_final_response_oaicompat(const json & request, json result, b
|
||||
}
|
||||
|
||||
// return value is vector as there is one case where we might need to generate two responses
|
||||
static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
static std::vector<json> format_partial_response_oaicompat(json result, const std::string & completion_id) {
|
||||
if (!result.contains("model") || !result.contains("oaicompat_token_ctr")) {
|
||||
return std::vector<json>({result});
|
||||
}
|
||||
@@ -471,7 +482,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
{"role", "assistant"}
|
||||
}}}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
@@ -482,7 +493,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
{"content", content}}}
|
||||
}})},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}};
|
||||
|
||||
@@ -509,7 +520,7 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
json ret = json {
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", gen_chatcmplid()},
|
||||
{"id", completion_id},
|
||||
{"model", modelname},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
@@ -518,6 +529,16 @@ static std::vector<json> format_partial_response_oaicompat(json result) {
|
||||
}
|
||||
|
||||
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
|
||||
json data = json::array();
|
||||
int i = 0;
|
||||
for (auto & elem : embeddings) {
|
||||
data.push_back(json{
|
||||
{"embedding", json_value(elem, "embedding", json::array())},
|
||||
{"index", i++},
|
||||
{"object", "embedding"}
|
||||
});
|
||||
}
|
||||
|
||||
json res = json {
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
@@ -525,7 +546,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
|
||||
{"prompt_tokens", 0},
|
||||
{"total_tokens", 0}
|
||||
}},
|
||||
{"data", embeddings}
|
||||
{"data", data}
|
||||
};
|
||||
|
||||
return res;
|
||||
@@ -542,3 +563,43 @@ static json format_detokenized_response(const std::string & content) {
|
||||
{"content", content}
|
||||
};
|
||||
}
|
||||
|
||||
static json format_error_response(const std::string & message, const enum error_type type) {
|
||||
std::string type_str;
|
||||
int code = 500;
|
||||
switch (type) {
|
||||
case ERROR_TYPE_INVALID_REQUEST:
|
||||
type_str = "invalid_request_error";
|
||||
code = 400;
|
||||
break;
|
||||
case ERROR_TYPE_AUTHENTICATION:
|
||||
type_str = "authentication_error";
|
||||
code = 401;
|
||||
break;
|
||||
case ERROR_TYPE_NOT_FOUND:
|
||||
type_str = "not_found_error";
|
||||
code = 404;
|
||||
break;
|
||||
case ERROR_TYPE_SERVER:
|
||||
type_str = "server_error";
|
||||
code = 500;
|
||||
break;
|
||||
case ERROR_TYPE_PERMISSION:
|
||||
type_str = "permission_error";
|
||||
code = 403;
|
||||
break;
|
||||
case ERROR_TYPE_NOT_SUPPORTED:
|
||||
type_str = "not_supported_error";
|
||||
code = 501;
|
||||
break;
|
||||
case ERROR_TYPE_UNAVAILABLE:
|
||||
type_str = "unavailable_error";
|
||||
code = 503;
|
||||
break;
|
||||
}
|
||||
return json {
|
||||
{"code", code},
|
||||
{"message", message},
|
||||
{"type", type_str},
|
||||
};
|
||||
}
|
||||
|
||||
@@ -13,8 +13,11 @@ source /opt/intel/oneapi/setvars.sh
|
||||
#for FP32
|
||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||
|
||||
#build example/main only
|
||||
#build example/main
|
||||
#cmake --build . --config Release --target main
|
||||
|
||||
#build example/llama-bench
|
||||
#cmake --build . --config Release --target llama-bench
|
||||
|
||||
#build all binary
|
||||
cmake --build . --config Release -v
|
||||
|
||||
@@ -9,18 +9,28 @@ source /opt/intel/oneapi/setvars.sh
|
||||
|
||||
if [ $# -gt 0 ]; then
|
||||
GGML_SYCL_DEVICE=$1
|
||||
GGML_SYCL_SINGLE_GPU=1
|
||||
else
|
||||
GGML_SYCL_DEVICE=0
|
||||
fi
|
||||
echo "use $GGML_SYCL_DEVICE as main GPU"
|
||||
|
||||
#export GGML_SYCL_DEBUG=1
|
||||
|
||||
|
||||
#ZES_ENABLE_SYSMAN=1, Support to get free memory of GPU by sycl::aspect::ext_intel_free_memory. Recommended to use when --split-mode = layer.
|
||||
|
||||
#use all GPUs with same max compute units
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
if [ $GGML_SYCL_SINGLE_GPU -eq 1 ]; then
|
||||
echo "use $GGML_SYCL_DEVICE as main GPU"
|
||||
#use signle GPU only
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||
else
|
||||
#use multiple GPUs with same max compute units
|
||||
ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
fi
|
||||
|
||||
#use main GPU only
|
||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0 -mg $GGML_SYCL_DEVICE -sm none
|
||||
|
||||
#use multiple GPUs with same max compute units
|
||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/main -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||
|
||||
|
||||
@@ -711,6 +711,7 @@ static bool load_checkpoint_file(const char * filename, struct my_llama_model *
|
||||
|
||||
load_checkpoint_gguf(fctx, f_ggml_ctx, model, train);
|
||||
|
||||
gguf_free(fctx);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
+45
-64
@@ -61,7 +61,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) {
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: GGML_PAD ?
|
||||
static size_t aligned_offset(const void * buffer, size_t offset, size_t alignment) {
|
||||
assert(alignment && !(alignment & (alignment - 1))); // power of 2
|
||||
size_t align = (alignment - (((uintptr_t)buffer + offset) % alignment)) % alignment;
|
||||
@@ -69,25 +68,14 @@ static size_t aligned_offset(const void * buffer, size_t offset, size_t alignmen
|
||||
}
|
||||
|
||||
// tallocr
|
||||
struct ggml_tallocr {
|
||||
ggml_backend_buffer_t buffer;
|
||||
void * base;
|
||||
size_t alignment;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
||||
ggml_tallocr_t talloc = malloc(sizeof(struct ggml_tallocr));
|
||||
if (talloc == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
||||
void * base = ggml_backend_buffer_get_base(buffer);
|
||||
size_t align = ggml_backend_buffer_get_alignment(buffer);
|
||||
|
||||
assert(align && !(align & (align - 1))); // power of 2
|
||||
|
||||
*talloc = (struct ggml_tallocr) {
|
||||
struct ggml_tallocr talloc = (struct ggml_tallocr) {
|
||||
/*.buffer = */ buffer,
|
||||
/*.base = */ base,
|
||||
/*.alignment = */ align,
|
||||
@@ -96,11 +84,7 @@ ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer) {
|
||||
return talloc;
|
||||
}
|
||||
|
||||
void ggml_tallocr_free(ggml_tallocr_t talloc) {
|
||||
free(talloc);
|
||||
}
|
||||
|
||||
void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor) {
|
||||
void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor) {
|
||||
size_t size = ggml_backend_buffer_get_alloc_size(talloc->buffer, tensor);
|
||||
size = GGML_PAD(size, talloc->alignment);
|
||||
|
||||
@@ -354,12 +338,16 @@ struct hash_node {
|
||||
bool allocated;
|
||||
};
|
||||
|
||||
//
|
||||
struct tensor_alloc {
|
||||
size_t offset;
|
||||
size_t size_max; // 0 = pre-allocated, unused, or view
|
||||
};
|
||||
|
||||
struct leaf_alloc {
|
||||
int buffer_id;
|
||||
struct tensor_alloc leaf;
|
||||
};
|
||||
|
||||
struct node_alloc {
|
||||
int buffer_id;
|
||||
struct tensor_alloc dst;
|
||||
@@ -378,7 +366,7 @@ struct ggml_gallocr {
|
||||
struct node_alloc * node_allocs; // [n_nodes]
|
||||
int n_nodes;
|
||||
|
||||
struct tensor_alloc * leaf_allocs; // [n_leafs]
|
||||
struct leaf_alloc * leaf_allocs; // [n_leafs]
|
||||
int n_leafs;
|
||||
};
|
||||
|
||||
@@ -543,13 +531,20 @@ static int get_node_buffer_id(const int * node_buffer_ids, int i) {
|
||||
return node_buffer_ids ? node_buffer_ids[i] : 0;
|
||||
}
|
||||
|
||||
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
|
||||
static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
||||
// clear hash tables
|
||||
memset(galloc->hash_set.keys, 0, galloc->hash_set.size * sizeof(struct ggml_tensor *));
|
||||
memset(galloc->hash_values, 0, galloc->hash_set.size * sizeof(struct hash_node));
|
||||
|
||||
// allocate leafs
|
||||
// these may be tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
ggml_gallocr_allocate_node(galloc, leaf, get_node_buffer_id(leaf_buffer_ids, i));
|
||||
}
|
||||
|
||||
// count number of children and views
|
||||
// allocate all graph inputs and leafs first to avoid overwriting them
|
||||
// allocate other graph inputs and leafs first to avoid overwriting them
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
|
||||
@@ -577,19 +572,6 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
||||
}
|
||||
}
|
||||
|
||||
// allocate the remaining leafs that are unused on the graph
|
||||
// these are effectively static tensors that the application is not using in the graph, but may still want to allocate for other purposes
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
||||
|
||||
if (hn->n_children == 0) {
|
||||
assert(!hn->allocated);
|
||||
// since buffer ids are only given for nodes, these leafs are always allocated in the first buffer
|
||||
ggml_gallocr_allocate_node(galloc, leaf, 0);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate tensors
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
@@ -652,7 +634,7 @@ static void ggml_gallocr_alloc_graph_impl(ggml_gallocr_t galloc, struct ggml_cgr
|
||||
}
|
||||
}
|
||||
|
||||
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids) {
|
||||
bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids, const int * leaf_buffer_ids) {
|
||||
size_t hash_size = graph->visited_hash_table.size;
|
||||
|
||||
// initialize hash table
|
||||
@@ -676,7 +658,7 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
}
|
||||
|
||||
// allocate in hash table
|
||||
ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids);
|
||||
ggml_gallocr_alloc_graph_impl(galloc, graph, node_buffer_ids, leaf_buffer_ids);
|
||||
|
||||
// set the node_allocs from the hash table
|
||||
if (galloc->n_nodes < graph->n_nodes) {
|
||||
@@ -711,15 +693,16 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
}
|
||||
if (galloc->n_leafs < graph->n_leafs) {
|
||||
free(galloc->leaf_allocs);
|
||||
galloc->leaf_allocs = calloc(sizeof(struct tensor_alloc), graph->n_leafs);
|
||||
galloc->leaf_allocs = calloc(sizeof(galloc->leaf_allocs[0]), graph->n_leafs);
|
||||
GGML_ASSERT(galloc->leaf_allocs != NULL);
|
||||
}
|
||||
galloc->n_leafs = graph->n_leafs;
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
||||
galloc->leaf_allocs[i].offset = hn->offset;
|
||||
galloc->leaf_allocs[i].size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
||||
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
||||
galloc->leaf_allocs[i].leaf.offset = hn->offset;
|
||||
galloc->leaf_allocs[i].leaf.size_max = ggml_backend_buft_get_alloc_size(galloc->bufts[hn->buffer_id], leaf);
|
||||
}
|
||||
|
||||
// reallocate buffers if needed
|
||||
@@ -727,7 +710,8 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
size_t cur_size = galloc->buffers[i] ? ggml_backend_buffer_get_size(galloc->buffers[i]) : 0;
|
||||
size_t new_size = ggml_dyn_tallocr_max_size(galloc->buf_tallocs[i]);
|
||||
|
||||
if (new_size > cur_size) {
|
||||
// even if there are no tensors allocated in this buffer, we still need to allocate it to initialize views
|
||||
if (new_size > cur_size || galloc->buffers[i] == NULL) {
|
||||
#ifndef NDEBUG
|
||||
fprintf(stderr, "%s: reallocating %s buffer from size %.02f MiB to %.02f MiB\n", __func__, ggml_backend_buft_name(galloc->bufts[i]), cur_size / 1024.0 / 1024.0, new_size / 1024.0 / 1024.0);
|
||||
#endif
|
||||
@@ -744,30 +728,30 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||
}
|
||||
|
||||
bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph *graph) {
|
||||
return ggml_gallocr_reserve_n(galloc, graph, NULL);
|
||||
return ggml_gallocr_reserve_n(galloc, graph, NULL, NULL);
|
||||
}
|
||||
|
||||
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * node, int buffer_id, struct tensor_alloc * tensor_alloc) {
|
||||
assert(node->data || node->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
|
||||
static void ggml_gallocr_init_tensor(ggml_gallocr_t galloc, struct ggml_tensor * tensor, int buffer_id, struct tensor_alloc * tensor_alloc) {
|
||||
assert(tensor->data || tensor->view_src || ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
|
||||
|
||||
if (node->view_src != NULL) {
|
||||
if (node->buffer == NULL) {
|
||||
if (tensor->view_src != NULL) {
|
||||
if (tensor->buffer == NULL) {
|
||||
assert(tensor_alloc->offset == SIZE_MAX);
|
||||
if (node->view_src->buffer == NULL) {
|
||||
if (tensor->view_src->buffer == NULL) {
|
||||
// this tensor was allocated without ggml-backend
|
||||
return;
|
||||
}
|
||||
ggml_backend_view_init(galloc->buffers[buffer_id], node);
|
||||
ggml_backend_view_init(galloc->buffers[buffer_id], tensor);
|
||||
}
|
||||
} else {
|
||||
if (node->data == NULL) {
|
||||
if (tensor->data == NULL) {
|
||||
assert(tensor_alloc->offset != SIZE_MAX);
|
||||
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], node) <= tensor_alloc->size_max);
|
||||
assert(ggml_backend_buffer_get_alloc_size(galloc->buffers[buffer_id], tensor) <= tensor_alloc->size_max);
|
||||
void * base = ggml_backend_buffer_get_base(galloc->buffers[buffer_id]);
|
||||
void * addr = (char *)base + tensor_alloc->offset;
|
||||
ggml_backend_tensor_alloc(galloc->buffers[buffer_id], node, addr);
|
||||
ggml_backend_tensor_alloc(galloc->buffers[buffer_id], tensor, addr);
|
||||
} else {
|
||||
if (node->buffer == NULL) {
|
||||
if (tensor->buffer == NULL) {
|
||||
// this tensor was allocated without ggml-backend
|
||||
return;
|
||||
}
|
||||
@@ -843,13 +827,18 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
|
||||
|
||||
// reset buffers
|
||||
for (int i = 0; i < galloc->n_buffers; i++) {
|
||||
// zero size buffers are not allocated
|
||||
if (galloc->buffers[i] != NULL) {
|
||||
ggml_backend_buffer_reset(galloc->buffers[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// allocate the graph tensors from the previous assignments
|
||||
// leafs
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
struct leaf_alloc * leaf_alloc = &galloc->leaf_allocs[i];
|
||||
ggml_gallocr_init_tensor(galloc, leaf, leaf_alloc->buffer_id, &leaf_alloc->leaf);
|
||||
}
|
||||
// nodes
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
@@ -863,12 +852,6 @@ bool ggml_gallocr_alloc_graph(ggml_gallocr_t galloc, struct ggml_cgraph * graph)
|
||||
}
|
||||
ggml_gallocr_init_tensor(galloc, node, node_alloc->buffer_id, &node_alloc->dst);
|
||||
}
|
||||
// leafs
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
struct tensor_alloc * leaf_alloc = &galloc->leaf_allocs[i];
|
||||
ggml_gallocr_init_tensor(galloc, leaf, 0, leaf_alloc);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -900,12 +883,12 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
|
||||
return false;
|
||||
}
|
||||
|
||||
struct ggml_tallocr * tallocr = ggml_tallocr_new(buffer);
|
||||
struct ggml_tallocr tallocr = ggml_tallocr_new(buffer);
|
||||
|
||||
for (struct ggml_tensor * t = first; t != last; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (t->data == NULL) {
|
||||
if (t->view_src == NULL) {
|
||||
ggml_tallocr_alloc(tallocr, t);
|
||||
ggml_tallocr_alloc(&tallocr, t);
|
||||
} else if (t->buffer == NULL) {
|
||||
ggml_backend_view_init(buffer, t);
|
||||
}
|
||||
@@ -917,8 +900,6 @@ static bool alloc_tensor_range(struct ggml_context * ctx,
|
||||
}
|
||||
}
|
||||
|
||||
ggml_tallocr_free(tallocr);
|
||||
|
||||
*buffers = realloc(*buffers, sizeof(ggml_backend_buffer_t) * (*n_buffers + 1));
|
||||
(*buffers)[(*n_buffers)++] = buffer;
|
||||
|
||||
|
||||
+13
-5
@@ -11,11 +11,15 @@ typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
|
||||
// Tensor allocator
|
||||
typedef struct ggml_tallocr * ggml_tallocr_t;
|
||||
struct ggml_tallocr {
|
||||
ggml_backend_buffer_t buffer;
|
||||
void * base;
|
||||
size_t alignment;
|
||||
size_t offset;
|
||||
};
|
||||
|
||||
GGML_API ggml_tallocr_t ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_tallocr_free(ggml_tallocr_t talloc);
|
||||
GGML_API void ggml_tallocr_alloc(ggml_tallocr_t talloc, struct ggml_tensor * tensor);
|
||||
GGML_API struct ggml_tallocr ggml_tallocr_new(ggml_backend_buffer_t buffer);
|
||||
GGML_API void ggml_tallocr_alloc(struct ggml_tallocr * talloc, struct ggml_tensor * tensor);
|
||||
|
||||
// Graph allocator
|
||||
/*
|
||||
@@ -50,7 +54,11 @@ GGML_API void ggml_gallocr_free(ggml_gallocr_t galloc);
|
||||
// not strictly required for single buffer usage: ggml_gallocr_alloc_graph will reallocate the buffers automatically if needed
|
||||
// returns false if the buffer allocation failed
|
||||
GGML_API bool ggml_gallocr_reserve(ggml_gallocr_t galloc, struct ggml_cgraph * graph);
|
||||
GGML_API bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, const int * node_buffer_ids);
|
||||
GGML_API bool ggml_gallocr_reserve_n(
|
||||
ggml_gallocr_t galloc,
|
||||
struct ggml_cgraph * graph,
|
||||
const int * node_buffer_ids,
|
||||
const int * leaf_buffer_ids);
|
||||
|
||||
// automatic reallocation if the topology changes when using a single buffer
|
||||
// returns false if using multiple buffers and a re-allocation is needed (call ggml_gallocr_reserve_n first to set the node buffers)
|
||||
|
||||
+14
-3
@@ -86,12 +86,12 @@ extern "C" {
|
||||
// (optional) asynchronous tensor data access
|
||||
void (*GGML_CALL set_tensor_async)(ggml_backend_t backend, struct ggml_tensor * tensor, const void * data, size_t offset, size_t size);
|
||||
void (*GGML_CALL get_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * tensor, void * data, size_t offset, size_t size);
|
||||
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
bool (*GGML_CALL cpy_tensor_async)(ggml_backend_t backend_src, ggml_backend_t backend_dst, const struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
// (optional) complete all pending operations
|
||||
void (*GGML_CALL synchronize)(ggml_backend_t backend);
|
||||
|
||||
// create a plan for ggml_cgraph and free it
|
||||
// compute graph with a plan (not used currently)
|
||||
ggml_backend_graph_plan_t (*GGML_CALL graph_plan_create) (ggml_backend_t backend, const struct ggml_cgraph * cgraph);
|
||||
void (*GGML_CALL graph_plan_free) (ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
|
||||
@@ -102,16 +102,27 @@ extern "C" {
|
||||
|
||||
// check if the backend supports an operation
|
||||
bool (*GGML_CALL supports_op)(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// (optional) event synchronization
|
||||
ggml_backend_event_t (*GGML_CALL event_new) (ggml_backend_t backend);
|
||||
void (*GGML_CALL event_free) (ggml_backend_event_t event);
|
||||
void (*GGML_CALL event_record) (ggml_backend_event_t event);
|
||||
void (*GGML_CALL event_wait) (ggml_backend_t backend, ggml_backend_event_t event);
|
||||
void (*GGML_CALL event_synchronize) (ggml_backend_event_t event);
|
||||
};
|
||||
|
||||
struct ggml_backend {
|
||||
ggml_guid_t guid;
|
||||
|
||||
struct ggml_backend_i iface;
|
||||
|
||||
ggml_backend_context_t context;
|
||||
};
|
||||
|
||||
struct ggml_backend_event {
|
||||
ggml_backend_t backend;
|
||||
void * context;
|
||||
};
|
||||
|
||||
//
|
||||
// Backend registry
|
||||
//
|
||||
|
||||
+370
-147
@@ -221,29 +221,29 @@ void ggml_backend_tensor_get_async(ggml_backend_t backend, const struct ggml_ten
|
||||
GGML_CALL void ggml_backend_tensor_set(struct ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(buf != NULL && "tensor buffer not set");
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor write out of bounds");
|
||||
|
||||
if (!size) {
|
||||
return;
|
||||
}
|
||||
|
||||
tensor->buffer->iface.set_tensor(buf, tensor, data, offset, size);
|
||||
buf->iface.set_tensor(buf, tensor, data, offset, size);
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_backend_tensor_get(const struct ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
GGML_ASSERT(buf != NULL && "tensor buffer not set");
|
||||
GGML_ASSERT(tensor->data != NULL && "tensor not allocated");
|
||||
GGML_ASSERT(tensor->buffer != NULL && "tensor buffer not set");
|
||||
GGML_ASSERT(offset + size <= ggml_nbytes(tensor) && "tensor read out of bounds");
|
||||
|
||||
if (!size) {
|
||||
return;
|
||||
}
|
||||
|
||||
tensor->buffer->iface.get_tensor(buf, tensor, data, offset, size);
|
||||
buf->iface.get_tensor(buf, tensor, data, offset, size);
|
||||
}
|
||||
|
||||
void ggml_backend_synchronize(ggml_backend_t backend) {
|
||||
@@ -255,18 +255,30 @@ void ggml_backend_synchronize(ggml_backend_t backend) {
|
||||
}
|
||||
|
||||
ggml_backend_graph_plan_t ggml_backend_graph_plan_create(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
GGML_ASSERT(backend->iface.graph_plan_create != NULL);
|
||||
|
||||
return backend->iface.graph_plan_create(backend, cgraph);
|
||||
}
|
||||
|
||||
void ggml_backend_graph_plan_free(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
GGML_ASSERT(backend->iface.graph_plan_free != NULL);
|
||||
|
||||
backend->iface.graph_plan_free(backend, plan);
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan) {
|
||||
GGML_ASSERT(backend->iface.graph_plan_compute != NULL);
|
||||
|
||||
return backend->iface.graph_plan_compute(backend, plan);
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_graph_compute(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
enum ggml_status err = ggml_backend_graph_compute_async(backend, cgraph);
|
||||
ggml_backend_synchronize(backend);
|
||||
return err;
|
||||
}
|
||||
|
||||
bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph) {
|
||||
return backend->iface.graph_compute(backend, cgraph);
|
||||
}
|
||||
|
||||
@@ -314,34 +326,68 @@ void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst) {
|
||||
GGML_ASSERT(ggml_are_same_layout(src, dst) && "cannot copy tensors with different layouts");
|
||||
|
||||
if (src == dst) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (ggml_backend_buft_supports_backend(src->buffer->buft, backend) && ggml_backend_buft_supports_backend(dst->buffer->buft, backend)) {
|
||||
if (backend->iface.cpy_tensor_async != NULL) {
|
||||
if (backend->iface.cpy_tensor_async(backend, src, dst)) {
|
||||
return;
|
||||
}
|
||||
if (backend_dst->iface.cpy_tensor_async != NULL) {
|
||||
if (backend_dst->iface.cpy_tensor_async(backend_src, backend_dst, src, dst)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
size_t nbytes = ggml_nbytes(src);
|
||||
// an async copy would normally happen after all the queued operations on both backends are completed
|
||||
// sync src, set_async dst
|
||||
if (ggml_backend_buffer_is_host(src->buffer)) {
|
||||
ggml_backend_tensor_set_async(backend, dst, src->data, 0, nbytes);
|
||||
}
|
||||
else {
|
||||
ggml_backend_synchronize(backend_src);
|
||||
ggml_backend_tensor_set_async(backend_dst, dst, src->data, 0, ggml_nbytes(src));
|
||||
} else {
|
||||
ggml_backend_synchronize(backend_src);
|
||||
ggml_backend_tensor_copy(src, dst);
|
||||
ggml_backend_synchronize(backend_dst);
|
||||
}
|
||||
}
|
||||
|
||||
// events
|
||||
|
||||
ggml_backend_event_t ggml_backend_event_new(ggml_backend_t backend) {
|
||||
if (backend->iface.event_new == NULL) {
|
||||
return NULL;
|
||||
}
|
||||
return backend->iface.event_new(backend);
|
||||
}
|
||||
|
||||
void ggml_backend_event_free(ggml_backend_event_t event) {
|
||||
if (event == NULL) {
|
||||
return;
|
||||
}
|
||||
event->backend->iface.event_free(event);
|
||||
}
|
||||
|
||||
void ggml_backend_event_record(ggml_backend_event_t event) {
|
||||
GGML_ASSERT(event->backend->iface.event_record != NULL);
|
||||
|
||||
event->backend->iface.event_record(event);
|
||||
}
|
||||
|
||||
void ggml_backend_event_synchronize(ggml_backend_event_t event) {
|
||||
GGML_ASSERT(event->backend->iface.event_synchronize != NULL);
|
||||
|
||||
event->backend->iface.event_synchronize(event);
|
||||
}
|
||||
|
||||
void ggml_backend_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
||||
GGML_ASSERT(backend->iface.event_wait != NULL);
|
||||
|
||||
backend->iface.event_wait(backend, event);
|
||||
}
|
||||
|
||||
// backend registry
|
||||
|
||||
#define GGML_MAX_BACKENDS_REG 16
|
||||
#define GGML_REG_MAX_BACKENDS 16
|
||||
|
||||
struct ggml_backend_reg {
|
||||
char name[128];
|
||||
@@ -350,7 +396,7 @@ struct ggml_backend_reg {
|
||||
void * user_data;
|
||||
};
|
||||
|
||||
static struct ggml_backend_reg ggml_backend_registry[GGML_MAX_BACKENDS_REG];
|
||||
static struct ggml_backend_reg ggml_backend_registry[GGML_REG_MAX_BACKENDS];
|
||||
static size_t ggml_backend_registry_count = 0;
|
||||
|
||||
GGML_CALL static ggml_backend_t ggml_backend_reg_cpu_init(const char * params, void * user_data);
|
||||
@@ -395,7 +441,7 @@ GGML_CALL static void ggml_backend_registry_init(void) {
|
||||
}
|
||||
|
||||
GGML_CALL void ggml_backend_register(const char * name, ggml_backend_init_fn init_fn, ggml_backend_buffer_type_t default_buffer_type, void * user_data) {
|
||||
GGML_ASSERT(ggml_backend_registry_count < GGML_MAX_BACKENDS_REG);
|
||||
GGML_ASSERT(ggml_backend_registry_count < GGML_REG_MAX_BACKENDS);
|
||||
|
||||
size_t id = ggml_backend_registry_count;
|
||||
|
||||
@@ -746,8 +792,12 @@ GGML_CALL static enum ggml_status ggml_backend_cpu_graph_compute(ggml_backend_t
|
||||
struct ggml_cplan cplan = ggml_graph_plan(cgraph, cpu_ctx->n_threads);
|
||||
|
||||
if (cpu_ctx->work_size < cplan.work_size) {
|
||||
// TODO: may be faster to free and use malloc to avoid the copy
|
||||
cpu_ctx->work_data = realloc(cpu_ctx->work_data, cplan.work_size);
|
||||
free(cpu_ctx->work_data);
|
||||
cpu_ctx->work_data = malloc(cplan.work_size);
|
||||
if (cpu_ctx->work_data == NULL) {
|
||||
cpu_ctx->work_size = 0;
|
||||
return GGML_STATUS_ALLOC_FAILED;
|
||||
}
|
||||
cpu_ctx->work_size = cplan.work_size;
|
||||
}
|
||||
cplan.work_data = cpu_ctx->work_data;
|
||||
@@ -784,6 +834,11 @@ static struct ggml_backend_i cpu_backend_i = {
|
||||
/* .graph_plan_compute = */ ggml_backend_cpu_graph_plan_compute,
|
||||
/* .graph_compute = */ ggml_backend_cpu_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_cpu_supports_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_cpu_guid(void) {
|
||||
@@ -939,15 +994,27 @@ static bool ggml_is_view_op(enum ggml_op op) {
|
||||
|
||||
// scheduler
|
||||
|
||||
#define GGML_MAX_BACKENDS 16
|
||||
#define GGML_MAX_SPLITS 256
|
||||
#define GGML_MAX_SPLIT_INPUTS 16
|
||||
#ifndef GGML_SCHED_MAX_BACKENDS
|
||||
#define GGML_SCHED_MAX_BACKENDS 16
|
||||
#endif
|
||||
|
||||
#ifndef GGML_SCHED_MAX_SPLITS
|
||||
#define GGML_SCHED_MAX_SPLITS 256
|
||||
#endif
|
||||
|
||||
#ifndef GGML_SCHED_MAX_SPLIT_INPUTS
|
||||
#define GGML_SCHED_MAX_SPLIT_INPUTS 16
|
||||
#endif
|
||||
|
||||
#ifndef GGML_SCHED_MAX_COPIES
|
||||
#define GGML_SCHED_MAX_COPIES 4
|
||||
#endif
|
||||
|
||||
struct ggml_backend_sched_split {
|
||||
int backend_id;
|
||||
int i_start;
|
||||
int i_end;
|
||||
struct ggml_tensor * inputs[GGML_MAX_SPLIT_INPUTS];
|
||||
struct ggml_tensor * inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
||||
int n_inputs;
|
||||
// graph view of this split
|
||||
struct ggml_cgraph graph;
|
||||
@@ -955,45 +1022,53 @@ struct ggml_backend_sched_split {
|
||||
|
||||
struct ggml_backend_sched {
|
||||
bool is_reset; // true if the scheduler has been reset since the last graph split
|
||||
bool is_alloc;
|
||||
|
||||
int n_backends;
|
||||
ggml_backend_t backends[GGML_MAX_BACKENDS];
|
||||
ggml_backend_buffer_type_t bufts[GGML_MAX_BACKENDS];
|
||||
|
||||
ggml_backend_t backends[GGML_SCHED_MAX_BACKENDS];
|
||||
ggml_backend_buffer_type_t bufts[GGML_SCHED_MAX_BACKENDS];
|
||||
ggml_gallocr_t galloc;
|
||||
|
||||
// hash keys of the nodes in the graph
|
||||
struct ggml_hash_set hash_set;
|
||||
// hash values
|
||||
int * tensor_backend_id;
|
||||
struct ggml_tensor * (* tensor_copies)[GGML_MAX_BACKENDS];
|
||||
struct ggml_tensor * (* tensor_copies)[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
||||
|
||||
int * node_backend_ids; // [n_nodes]
|
||||
int n_nodes;
|
||||
int * node_backend_ids; // [graph_size]
|
||||
int * leaf_backend_ids; // [graph_size]
|
||||
|
||||
// copy of the graph with modified inputs
|
||||
struct ggml_cgraph * graph;
|
||||
|
||||
struct ggml_backend_sched_split splits[GGML_MAX_SPLITS];
|
||||
// graph splits
|
||||
struct ggml_backend_sched_split splits[GGML_SCHED_MAX_SPLITS];
|
||||
int n_splits;
|
||||
|
||||
// pipeline parallelism support
|
||||
int n_copies;
|
||||
int cur_copy;
|
||||
ggml_backend_event_t events[GGML_SCHED_MAX_BACKENDS][GGML_SCHED_MAX_COPIES];
|
||||
struct ggml_tensor * graph_inputs[GGML_SCHED_MAX_SPLIT_INPUTS];
|
||||
int n_graph_inputs;
|
||||
|
||||
struct ggml_context * ctx;
|
||||
|
||||
ggml_backend_sched_eval_callback callback_eval;
|
||||
void * callback_eval_user_data;
|
||||
|
||||
// align context_buffer to GGML_MEM_ALIGN
|
||||
#ifdef _MSC_VER
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(GGML_MEM_ALIGN))
|
||||
#else
|
||||
#else
|
||||
__attribute__((aligned(GGML_MEM_ALIGN)))
|
||||
#endif
|
||||
char context_buffer[GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
|
||||
#endif
|
||||
char context_buffer[GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS*2*sizeof(struct ggml_tensor) + sizeof(struct ggml_cgraph)];
|
||||
};
|
||||
|
||||
#define hash_id(node) ggml_hash_find_or_insert(sched->hash_set, node)
|
||||
#define tensor_backend_id(node) sched->tensor_backend_id[hash_id(node)]
|
||||
#define tensor_backend(node) (tensor_backend_id(node) == -1 ? NULL : sched->backends[tensor_backend_id(node)])
|
||||
#define hash_id(tensor) ggml_hash_find_or_insert(sched->hash_set, tensor)
|
||||
#define tensor_backend_id(tensor) sched->tensor_backend_id[hash_id(tensor)]
|
||||
|
||||
// returns the priority of the backend, lower id is higher priority
|
||||
static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
||||
@@ -1005,7 +1080,8 @@ static int ggml_backend_sched_backend_id(ggml_backend_sched_t sched, ggml_backen
|
||||
return -1;
|
||||
}
|
||||
|
||||
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, ggml_backend_buffer_t buffer) {
|
||||
static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, const struct ggml_tensor * tensor) {
|
||||
ggml_backend_buffer_t buffer = tensor->buffer;
|
||||
if (buffer == NULL) {
|
||||
return -1;
|
||||
}
|
||||
@@ -1016,12 +1092,16 @@ static int ggml_backend_sched_backend_from_buffer(ggml_backend_sched_t sched, gg
|
||||
return i;
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(false && "tensor buffer type not supported by any backend");
|
||||
return -1; // silence warning
|
||||
|
||||
fprintf(stderr, "%s: error: no backend supports buffer type %s used in tensor %s\n",
|
||||
__func__, ggml_backend_buffer_name(buffer), tensor->name);
|
||||
GGML_ASSERT(false);
|
||||
|
||||
return -1;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS][128]; // debug only
|
||||
static char causes[GGML_DEFAULT_GRAPH_SIZE*16 + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS][128]; // debug only
|
||||
#define SET_CAUSE(node, ...) sprintf(causes[hash_id(node)], __VA_ARGS__)
|
||||
#define GET_CAUSE(node) causes[hash_id(node)]
|
||||
#else
|
||||
@@ -1035,19 +1115,28 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
||||
|
||||
// assign pre-allocated nodes to their backend
|
||||
// dst
|
||||
int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->buffer);
|
||||
int cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor);
|
||||
if (cur_backend != -1) {
|
||||
SET_CAUSE(node, "1.dst");
|
||||
SET_CAUSE(tensor, "1.dst");
|
||||
return cur_backend;
|
||||
}
|
||||
|
||||
// view_src
|
||||
if (tensor->view_src != NULL) {
|
||||
cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src->buffer);
|
||||
cur_backend = ggml_backend_sched_backend_from_buffer(sched, tensor->view_src);
|
||||
if (cur_backend != -1) {
|
||||
SET_CAUSE(node, "1.vsrc");
|
||||
SET_CAUSE(tensor, "1.vsrc");
|
||||
return cur_backend;
|
||||
}
|
||||
}
|
||||
|
||||
// input
|
||||
if (tensor->flags & GGML_TENSOR_FLAG_INPUT) {
|
||||
cur_backend = sched->n_backends - 1; // last backend (assumed CPU)
|
||||
SET_CAUSE(tensor, "1.inp");
|
||||
return cur_backend;
|
||||
}
|
||||
|
||||
// assign nodes that use weights to the backend of the weights
|
||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||
const struct ggml_tensor * src = tensor->src[i];
|
||||
@@ -1055,9 +1144,9 @@ static int ggml_backend_sched_backend_id_from_cur(ggml_backend_sched_t sched, st
|
||||
continue;
|
||||
}
|
||||
if (src->buffer != NULL && src->buffer->usage == GGML_BACKEND_BUFFER_USAGE_WEIGHTS) {
|
||||
int src_backend = ggml_backend_sched_backend_from_buffer(sched, src->buffer);
|
||||
int src_backend = ggml_backend_sched_backend_from_buffer(sched, src);
|
||||
// operations with weights are always run on the same backend as the weights
|
||||
SET_CAUSE(node, "1.wgt%d", i);
|
||||
SET_CAUSE(tensor, "1.wgt%d", i);
|
||||
return src_backend;
|
||||
}
|
||||
}
|
||||
@@ -1093,7 +1182,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
||||
if (ggml_is_view_op(node->op)) {
|
||||
continue;
|
||||
}
|
||||
ggml_backend_t tensor_backend = tensor_backend(node);
|
||||
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
||||
fprintf(stderr, "node #%3d (%10.10s): %20.20s (%5.5s) [%5.5s %8.8s]:", i, ggml_op_name(node->op), node->name,
|
||||
fmt_size(ggml_nbytes(node)), tensor_backend ? ggml_backend_name(tensor_backend) : "NULL", GET_CAUSE(node));
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
@@ -1101,7 +1190,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
||||
if (src == NULL) {
|
||||
continue;
|
||||
}
|
||||
ggml_backend_t src_backend = tensor_backend(src);
|
||||
ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
|
||||
fprintf(stderr, " %20.20s (%5.5s) [%5.5s %8.8s]", src->name,
|
||||
fmt_size(ggml_nbytes(src)), src_backend ? ggml_backend_name(src_backend) : "NULL", GET_CAUSE(src));
|
||||
}
|
||||
@@ -1118,6 +1207,7 @@ static void ggml_backend_sched_print_assignments(ggml_backend_sched_t sched, str
|
||||
static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
// reset splits
|
||||
sched->n_splits = 0;
|
||||
sched->n_graph_inputs = 0;
|
||||
sched->is_reset = false;
|
||||
|
||||
struct ggml_init_params params = {
|
||||
@@ -1163,7 +1253,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_PASS1
|
||||
fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
fprintf(stderr, "PASS 1 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
||||
#endif
|
||||
|
||||
// pass 2: expand current backend assignments
|
||||
@@ -1171,28 +1261,6 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
// expand gpu backends (i.e. non last prio) up and down, ignoring cpu (the lowest priority backend)
|
||||
// thus, cpu will never be used unless weights are on cpu, or there are no gpu ops between cpu ops
|
||||
|
||||
// pass 2.1 expand gpu up
|
||||
{
|
||||
int cur_backend_id = -1;
|
||||
for (int i = graph->n_nodes - 1; i >= 0; i--) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
if (ggml_is_view_op(node->op)) {
|
||||
continue;
|
||||
}
|
||||
int tensor_backend_id = tensor_backend_id(node);
|
||||
if (tensor_backend_id != -1) {
|
||||
if (tensor_backend_id == sched->n_backends - 1) {
|
||||
// skip cpu (lowest prio backend)
|
||||
cur_backend_id = -1;
|
||||
} else {
|
||||
cur_backend_id = tensor_backend_id;
|
||||
}
|
||||
} else {
|
||||
tensor_backend_id(node) = cur_backend_id;
|
||||
SET_CAUSE(node, "2.1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pass 2.2 expand gpu down
|
||||
{
|
||||
@@ -1217,7 +1285,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
}
|
||||
}
|
||||
|
||||
// pass 2.3 expand rest up
|
||||
// pass 2.1 expand gpu up
|
||||
{
|
||||
int cur_backend_id = -1;
|
||||
for (int i = graph->n_nodes - 1; i >= 0; i--) {
|
||||
@@ -1227,14 +1295,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
}
|
||||
int tensor_backend_id = tensor_backend_id(node);
|
||||
if (tensor_backend_id != -1) {
|
||||
cur_backend_id = tensor_backend_id;
|
||||
if (tensor_backend_id == sched->n_backends - 1) {
|
||||
// skip cpu (lowest prio backend)
|
||||
cur_backend_id = -1;
|
||||
} else {
|
||||
cur_backend_id = tensor_backend_id;
|
||||
}
|
||||
} else {
|
||||
tensor_backend_id(node) = cur_backend_id;
|
||||
SET_CAUSE(node, "2.3");
|
||||
SET_CAUSE(node, "2.1");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// pass 2.4 expand rest down
|
||||
{
|
||||
int cur_backend_id = -1;
|
||||
@@ -1252,8 +1326,26 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
}
|
||||
}
|
||||
}
|
||||
// pass 2.3 expand rest up
|
||||
{
|
||||
int cur_backend_id = -1;
|
||||
for (int i = graph->n_nodes - 1; i >= 0; i--) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
if (ggml_is_view_op(node->op)) {
|
||||
continue;
|
||||
}
|
||||
int tensor_backend_id = tensor_backend_id(node);
|
||||
if (tensor_backend_id != -1) {
|
||||
cur_backend_id = tensor_backend_id;
|
||||
} else {
|
||||
tensor_backend_id(node) = cur_backend_id;
|
||||
SET_CAUSE(node, "2.3");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef DEBUG_PASS2
|
||||
fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
fprintf(stderr, "PASS 2 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
||||
#endif
|
||||
|
||||
// pass 3: assign backends to remaining src from dst and view_src
|
||||
@@ -1283,7 +1375,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG_PASS3
|
||||
fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
fprintf(stderr, "PASS 3 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
||||
#endif
|
||||
|
||||
// pass 4: split graph, find tensors that need to be copied
|
||||
@@ -1315,7 +1407,7 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
if (tensor_backend_id != cur_backend_id) {
|
||||
sched->splits[cur_split].i_end = i;
|
||||
cur_split++;
|
||||
GGML_ASSERT(cur_split < GGML_MAX_SPLITS);
|
||||
GGML_ASSERT(cur_split < GGML_SCHED_MAX_SPLITS);
|
||||
sched->splits[cur_split].backend_id = tensor_backend_id;
|
||||
sched->splits[cur_split].i_start = i;
|
||||
sched->splits[cur_split].n_inputs = 0;
|
||||
@@ -1328,25 +1420,57 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
if (src == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
int src_backend_id = tensor_backend_id(src);
|
||||
assert(src_backend_id != -1); // all inputs should be assigned by now
|
||||
|
||||
if (src->flags & GGML_TENSOR_FLAG_INPUT) {
|
||||
size_t id = hash_id(src);
|
||||
if (sched->tensor_copies[id][src_backend_id][0] == NULL) {
|
||||
ggml_backend_t backend = sched->backends[src_backend_id];
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
struct ggml_tensor * tensor_copy;
|
||||
if (c == sched->cur_copy) {
|
||||
tensor_copy = src; // use the original tensor as the current copy
|
||||
} else {
|
||||
tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
||||
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
|
||||
}
|
||||
if (sched->n_copies > 1) {
|
||||
ggml_set_input(tensor_copy);
|
||||
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
||||
}
|
||||
sched->tensor_copies[id][src_backend_id][c] = tensor_copy;
|
||||
tensor_backend_id(tensor_copy) = src_backend_id;
|
||||
SET_CAUSE(tensor_copy, "4.cpy");
|
||||
}
|
||||
int n_graph_inputs = sched->n_graph_inputs++;
|
||||
GGML_ASSERT(n_graph_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
||||
sched->graph_inputs[n_graph_inputs] = src;
|
||||
}
|
||||
}
|
||||
|
||||
if (src_backend_id != tensor_backend_id) {
|
||||
// create a copy of the input in the split's backend
|
||||
size_t id = hash_id(src);
|
||||
if (sched->tensor_copies[id][cur_backend_id] == NULL) {
|
||||
if (sched->tensor_copies[id][cur_backend_id][0] == NULL) {
|
||||
ggml_backend_t backend = sched->backends[cur_backend_id];
|
||||
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
||||
ggml_format_name(tensor_copy, "%s#%s", ggml_backend_name(backend), src->name);
|
||||
|
||||
sched->tensor_copies[id][cur_backend_id] = tensor_copy;
|
||||
tensor_backend_id(tensor_copy) = cur_backend_id;
|
||||
SET_CAUSE(tensor_copy, "4.cpy");
|
||||
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
struct ggml_tensor * tensor_copy = ggml_dup_tensor_layout(sched->ctx, src);
|
||||
ggml_format_name(tensor_copy, "%s#%s#%d", ggml_backend_name(backend), src->name, c);
|
||||
if (sched->n_copies > 1) {
|
||||
ggml_set_input(tensor_copy);
|
||||
ggml_set_output(tensor_copy); // prevent ggml-alloc from overwriting the tensor
|
||||
}
|
||||
sched->tensor_copies[id][cur_backend_id][c] = tensor_copy;
|
||||
tensor_backend_id(tensor_copy) = cur_backend_id;
|
||||
SET_CAUSE(tensor_copy, "4.cpy");
|
||||
}
|
||||
int n_inputs = sched->splits[cur_split].n_inputs++;
|
||||
GGML_ASSERT(n_inputs < GGML_MAX_SPLIT_INPUTS);
|
||||
GGML_ASSERT(n_inputs < GGML_SCHED_MAX_SPLIT_INPUTS);
|
||||
sched->splits[cur_split].inputs[n_inputs] = src;
|
||||
}
|
||||
node->src[j] = sched->tensor_copies[id][cur_backend_id];
|
||||
node->src[j] = sched->tensor_copies[id][cur_backend_id][sched->cur_copy];
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1354,37 +1478,39 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
sched->n_splits = cur_split + 1;
|
||||
}
|
||||
#ifdef DEBUG_PASS4
|
||||
fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); sched_print_assignments(sched, graph);
|
||||
fprintf(stderr, "PASS 4 ASSIGNMENTS\n"); ggml_backend_sched_print_assignments(sched, graph);
|
||||
#endif
|
||||
|
||||
#ifndef NDEBUG
|
||||
// sanity check: all sources should have the same backend as the node
|
||||
for (int i = 0; i < graph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = graph->nodes[i];
|
||||
ggml_backend_t tensor_backend = tensor_backend(node);
|
||||
ggml_backend_t tensor_backend = ggml_backend_sched_get_tensor_backend(sched, node);
|
||||
if (tensor_backend == NULL) {
|
||||
fprintf(stderr, "!!!!!!! %s has no backend\n", node->name);
|
||||
}
|
||||
if (node->view_src != NULL && tensor_backend != tensor_backend(node->view_src)) {
|
||||
if (node->view_src != NULL && tensor_backend != ggml_backend_sched_get_tensor_backend(sched, node->view_src)) {
|
||||
fprintf(stderr, "!!!!!!! %s has backend %s, view_src %s has backend %s\n",
|
||||
node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
|
||||
node->view_src->name, tensor_backend(node->view_src) ? ggml_backend_name(tensor_backend(node->view_src)) : "NULL");
|
||||
node->view_src->name, ggml_backend_sched_get_tensor_backend(sched, node->view_src) ?
|
||||
ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, node->view_src)) : "NULL");
|
||||
}
|
||||
for (int j = 0; j < GGML_MAX_SRC; j++) {
|
||||
struct ggml_tensor * src = node->src[j];
|
||||
if (src == NULL) {
|
||||
continue;
|
||||
}
|
||||
ggml_backend_t src_backend = tensor_backend(src);
|
||||
ggml_backend_t src_backend = ggml_backend_sched_get_tensor_backend(sched, src);
|
||||
if (src_backend != tensor_backend /* && src_backend != NULL */) {
|
||||
fprintf(stderr, "!!!! %s has backend %s, src %d (%s) has backend %s\n",
|
||||
node->name, tensor_backend ? ggml_backend_name(tensor_backend) : "NULL",
|
||||
j, src->name, src_backend ? ggml_backend_name(src_backend) : "NULL");
|
||||
}
|
||||
if (src->view_src != NULL && src_backend != tensor_backend(src->view_src)) {
|
||||
if (src->view_src != NULL && src_backend != ggml_backend_sched_get_tensor_backend(sched, src->view_src)) {
|
||||
fprintf(stderr, "!!!!!!! [src] %s has backend %s, view_src %s has backend %s\n",
|
||||
src->name, src_backend ? ggml_backend_name(src_backend) : "NULL",
|
||||
src->view_src->name, tensor_backend(src->view_src) ? ggml_backend_name(tensor_backend(src->view_src)) : "NULL");
|
||||
src->view_src->name, ggml_backend_sched_get_tensor_backend(sched, src->view_src) ?
|
||||
ggml_backend_name(ggml_backend_sched_get_tensor_backend(sched, src->view_src)) : "NULL");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1392,18 +1518,20 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
#endif
|
||||
|
||||
// create copies of the graph for each split
|
||||
// FIXME: avoid this copy, pass split inputs to ggml_gallocr_alloc_graph_n in some other way
|
||||
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_MAX_SPLIT_INPUTS, false);
|
||||
// TODO: avoid this copy
|
||||
struct ggml_cgraph * graph_copy = ggml_new_graph_custom(sched->ctx, graph->n_nodes + sched->n_splits*GGML_SCHED_MAX_SPLIT_INPUTS, false);
|
||||
for (int i = 0; i < sched->n_splits; i++) {
|
||||
struct ggml_backend_sched_split * split = &sched->splits[i];
|
||||
split->graph = ggml_graph_view(graph, split->i_start, split->i_end);
|
||||
|
||||
// add inputs to the graph copy so that they are allocated by ggml-alloc at the start of the split
|
||||
for (int j = 0; j < split->n_inputs; j++) {
|
||||
struct ggml_tensor * input = split->inputs[j];
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id];
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split->backend_id][sched->cur_copy];
|
||||
|
||||
// add a dependency to the input source so that it is not freed before the copy is done
|
||||
struct ggml_tensor * input_dep = ggml_view_tensor(sched->ctx, input);
|
||||
input_dep->src[0] = input;
|
||||
sched->node_backend_ids[graph_copy->n_nodes] = tensor_backend_id(input);
|
||||
graph_copy->nodes[graph_copy->n_nodes++] = input_dep;
|
||||
|
||||
@@ -1417,18 +1545,56 @@ static void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct gg
|
||||
graph_copy->nodes[graph_copy->n_nodes++] = graph->nodes[j];
|
||||
}
|
||||
}
|
||||
|
||||
if (sched->n_copies > 1) {
|
||||
// add input copies as leafs so that they are allocated first
|
||||
for (int i = 0; i < sched->n_graph_inputs; i++) {
|
||||
struct ggml_tensor * input = sched->graph_inputs[i];
|
||||
size_t id = hash_id(input);
|
||||
int backend_id = tensor_backend_id(input);
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
|
||||
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
||||
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < sched->n_splits; i++) {
|
||||
struct ggml_backend_sched_split * split = &sched->splits[i];
|
||||
int backend_id = split->backend_id;
|
||||
for (int j = 0; j < split->n_inputs; j++) {
|
||||
struct ggml_tensor * input = split->inputs[j];
|
||||
size_t id = hash_id(input);
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[id][backend_id][c];
|
||||
sched->leaf_backend_ids[graph_copy->n_leafs] = backend_id;
|
||||
graph_copy->leafs[graph_copy->n_leafs++] = input_cpy;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// add leafs from the original graph
|
||||
for (int i = 0; i < graph->n_leafs; i++) {
|
||||
struct ggml_tensor * leaf = graph->leafs[i];
|
||||
sched->leaf_backend_ids[graph_copy->n_leafs] = tensor_backend_id(leaf);
|
||||
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
|
||||
}
|
||||
|
||||
sched->graph = graph_copy;
|
||||
}
|
||||
|
||||
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
||||
// ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids);
|
||||
// allocate graph
|
||||
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
||||
// the re-allocation may cause the split inputs to be moved to a different address
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
#ifndef NDEBUG
|
||||
fprintf(stderr, "ggml_backend_sched: failed to allocate graph, reserving\n");
|
||||
fprintf(stderr, "%s: failed to allocate graph, reserving\n", __func__);
|
||||
#endif
|
||||
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids);
|
||||
ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids);
|
||||
if (!ggml_gallocr_alloc_graph(sched->galloc, sched->graph)) {
|
||||
fprintf(stderr, "ggml_backend_sched: failed to allocate graph\n");
|
||||
fprintf(stderr, "%s: failed to allocate graph\n", __func__);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -1437,9 +1603,6 @@ static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {
|
||||
}
|
||||
|
||||
static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t sched) {
|
||||
uint64_t copy_us[GGML_MAX_BACKENDS] = {0};
|
||||
uint64_t compute_us[GGML_MAX_BACKENDS] = {0};
|
||||
|
||||
struct ggml_backend_sched_split * splits = sched->splits;
|
||||
|
||||
for (int i = 0; i < sched->n_splits; i++) {
|
||||
@@ -1448,34 +1611,36 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
ggml_backend_t split_backend = sched->backends[split_backend_id];
|
||||
|
||||
// copy the input tensors to the split backend
|
||||
uint64_t copy_start_us = ggml_time_us();
|
||||
for (int j = 0; j < split->n_inputs; j++) {
|
||||
ggml_backend_t input_backend = ggml_backend_sched_get_tensor_backend(sched, split->inputs[j]);
|
||||
struct ggml_tensor * input = split->inputs[j];
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id];
|
||||
struct ggml_tensor * input_cpy = sched->tensor_copies[hash_id(input)][split_backend_id][sched->cur_copy];
|
||||
|
||||
GGML_ASSERT(input->buffer != NULL);
|
||||
GGML_ASSERT(input_cpy->buffer != NULL);
|
||||
if (input->flags & GGML_TENSOR_FLAG_INPUT) {
|
||||
// inputs from the user must be copied immediately to prevent the user overwriting the data before the copy is done
|
||||
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
||||
ggml_backend_event_synchronize(sched->events[split_backend_id][sched->cur_copy]);
|
||||
} else {
|
||||
ggml_backend_synchronize(split_backend);
|
||||
}
|
||||
ggml_backend_tensor_copy(input, input_cpy);
|
||||
} else {
|
||||
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
||||
ggml_backend_event_wait(split_backend, sched->events[split_backend_id][sched->cur_copy]);
|
||||
} else {
|
||||
ggml_backend_synchronize(split_backend);
|
||||
ggml_backend_synchronize(input_backend);
|
||||
}
|
||||
|
||||
ggml_backend_tensor_copy_async(split_backend, input, input_cpy);
|
||||
ggml_backend_tensor_copy_async(input_backend, split_backend, input, input_cpy);
|
||||
}
|
||||
}
|
||||
//ggml_backend_synchronize(split_backend); // necessary to measure copy time
|
||||
int64_t copy_end_us = ggml_time_us();
|
||||
copy_us[split_backend_id] += copy_end_us - copy_start_us;
|
||||
|
||||
#if 0
|
||||
char split_filename[GGML_MAX_NAME];
|
||||
snprintf(split_filename, GGML_MAX_NAME, "split_%i_%s.dot", i, ggml_backend_name(split_backend));
|
||||
ggml_graph_dump_dot(split->graph, NULL, split_filename);
|
||||
#endif
|
||||
|
||||
|
||||
uint64_t compute_start_us = ggml_time_us();
|
||||
if (!sched->callback_eval) {
|
||||
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &split->graph);
|
||||
enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &split->graph);
|
||||
if (ec != GGML_STATUS_SUCCESS) {
|
||||
return ec;
|
||||
}
|
||||
//ggml_backend_synchronize(split_backend); // necessary to measure compute time
|
||||
} else {
|
||||
// similar to ggml_backend_compare_graph_backend
|
||||
for (int j0 = 0; j0 < split->graph.n_nodes; j0++) {
|
||||
@@ -1494,11 +1659,14 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
|
||||
struct ggml_cgraph gv = ggml_graph_view(&split->graph, j0, j1 + 1);
|
||||
|
||||
enum ggml_status ec = ggml_backend_graph_compute(split_backend, &gv);
|
||||
enum ggml_status ec = ggml_backend_graph_compute_async(split_backend, &gv);
|
||||
if (ec != GGML_STATUS_SUCCESS) {
|
||||
return ec;
|
||||
}
|
||||
|
||||
// TODO: pass backend to the callback, then the user can decide if they want to synchronize
|
||||
ggml_backend_synchronize(split_backend);
|
||||
|
||||
if (need && !sched->callback_eval(t, false, sched->callback_eval_user_data)) {
|
||||
break;
|
||||
}
|
||||
@@ -1506,39 +1674,54 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
j0 = j1;
|
||||
}
|
||||
}
|
||||
uint64_t compute_end_us = ggml_time_us();
|
||||
compute_us[split_backend_id] += compute_end_us - compute_start_us;
|
||||
}
|
||||
|
||||
#if 0
|
||||
// per-backend timings
|
||||
fprintf(stderr, "sched_compute_splits times (%d splits):\n", sched->n_splits);
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
if (copy_us[i] > 0 || compute_us[i] > 0) {
|
||||
fprintf(stderr, "\t%5.5s: %lu us copy, %lu us compute\n", ggml_backend_name(sched->backends[i]), copy_us[i], compute_us[i]);
|
||||
// record the event of this copy
|
||||
if (split->n_inputs > 0) {
|
||||
if (sched->events[split_backend_id][sched->cur_copy] != NULL) {
|
||||
ggml_backend_event_record(sched->events[split_backend_id][sched->cur_copy]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
sched->cur_copy = (sched->cur_copy + 1) % sched->n_copies;
|
||||
|
||||
return GGML_STATUS_SUCCESS;
|
||||
}
|
||||
|
||||
ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size) {
|
||||
ggml_backend_sched_t ggml_backend_sched_new(
|
||||
ggml_backend_t * backends,
|
||||
ggml_backend_buffer_type_t * bufts,
|
||||
int n_backends,
|
||||
size_t graph_size,
|
||||
bool parallel) {
|
||||
GGML_ASSERT(n_backends > 0);
|
||||
GGML_ASSERT(n_backends <= GGML_MAX_BACKENDS);
|
||||
GGML_ASSERT(n_backends <= GGML_SCHED_MAX_BACKENDS);
|
||||
GGML_ASSERT(ggml_backend_is_cpu(backends[n_backends - 1])); // last backend must be CPU
|
||||
|
||||
struct ggml_backend_sched * sched = calloc(sizeof(struct ggml_backend_sched), 1);
|
||||
|
||||
// initialize hash table
|
||||
sched->hash_set = ggml_hash_set_new(graph_size + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
|
||||
sched->hash_set = ggml_hash_set_new(graph_size + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS);
|
||||
sched->tensor_backend_id = calloc(sizeof(sched->tensor_backend_id[0]), sched->hash_set.size);
|
||||
sched->tensor_copies = calloc(sizeof(sched->tensor_copies[0]), sched->hash_set.size);
|
||||
sched->node_backend_ids = calloc(sizeof(sched->node_backend_ids[0]), graph_size);
|
||||
sched->leaf_backend_ids = calloc(sizeof(sched->leaf_backend_ids[0]), graph_size);
|
||||
|
||||
sched->n_backends = n_backends;
|
||||
for (int i = 0; i < n_backends; i++) {
|
||||
sched->backends[i] = backends[i];
|
||||
sched->bufts[i] = bufts ? bufts[i] : ggml_backend_get_default_buffer_type(backends[i]);
|
||||
|
||||
sched->n_copies = parallel ? GGML_SCHED_MAX_COPIES : 1;
|
||||
|
||||
GGML_ASSERT(sched->n_copies <= GGML_SCHED_MAX_COPIES);
|
||||
|
||||
for (int b = 0; b < n_backends; b++) {
|
||||
sched->backends[b] = backends[b];
|
||||
sched->bufts[b] = bufts ? bufts[b] : ggml_backend_get_default_buffer_type(backends[b]);
|
||||
GGML_ASSERT(ggml_backend_buft_supports_backend(sched->bufts[b], backends[b]));
|
||||
if (sched->n_copies > 1) {
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
sched->events[b][c] = ggml_backend_event_new(backends[b]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
|
||||
@@ -1552,12 +1735,18 @@ void ggml_backend_sched_free(ggml_backend_sched_t sched) {
|
||||
if (sched == NULL) {
|
||||
return;
|
||||
}
|
||||
for (int b = 0; b < sched->n_backends; b++) {
|
||||
for (int c = 0; c < sched->n_copies; c++) {
|
||||
ggml_backend_event_free(sched->events[b][c]);
|
||||
}
|
||||
}
|
||||
ggml_gallocr_free(sched->galloc);
|
||||
ggml_free(sched->ctx);
|
||||
free(sched->hash_set.keys);
|
||||
free(sched->tensor_backend_id);
|
||||
free(sched->tensor_copies);
|
||||
free(sched->node_backend_ids);
|
||||
free(sched->leaf_backend_ids);
|
||||
free(sched);
|
||||
}
|
||||
|
||||
@@ -1569,34 +1758,63 @@ void ggml_backend_sched_reset(ggml_backend_sched_t sched) {
|
||||
memset(sched->tensor_copies, 0, sizeof(sched->tensor_copies[0]) * hash_size);
|
||||
|
||||
sched->is_reset = true;
|
||||
sched->is_alloc = false;
|
||||
}
|
||||
|
||||
bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph) {
|
||||
ggml_backend_sched_split_graph(sched, measure_graph);
|
||||
|
||||
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids)) {
|
||||
// TODO: extract this to a separate function
|
||||
if (!ggml_gallocr_reserve_n(sched->galloc, sched->graph, sched->node_backend_ids, sched->leaf_backend_ids)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
ggml_backend_sched_reset(sched);
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_SCHED_MAX_SPLITS*GGML_SCHED_MAX_SPLIT_INPUTS);
|
||||
|
||||
ggml_backend_sched_split_graph(sched, graph);
|
||||
|
||||
if (!ggml_backend_sched_alloc_splits(sched)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
sched->is_alloc = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + GGML_MAX_SPLITS*GGML_MAX_SPLIT_INPUTS);
|
||||
enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
return err;
|
||||
}
|
||||
|
||||
if (!sched->is_reset) {
|
||||
enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
if (!sched->is_reset && !sched->is_alloc) {
|
||||
ggml_backend_sched_reset(sched);
|
||||
}
|
||||
|
||||
ggml_backend_sched_split_graph(sched, graph);
|
||||
if (!ggml_backend_sched_alloc_splits(sched)) {
|
||||
return GGML_STATUS_ALLOC_FAILED;
|
||||
if (!sched->is_alloc) {
|
||||
if (!ggml_backend_sched_alloc_graph(sched, graph)) {
|
||||
return GGML_STATUS_ALLOC_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
return ggml_backend_sched_compute_splits(sched);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_synchronize(ggml_backend_sched_t sched) {
|
||||
for (int i = 0; i < sched->n_backends; i++) {
|
||||
ggml_backend_synchronize(sched->backends[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ggml_backend_sched_set_eval_callback(ggml_backend_sched_t sched, ggml_backend_sched_eval_callback callback, void * user_data) {
|
||||
sched->callback_eval = callback;
|
||||
sched->callback_eval_user_data = user_data;
|
||||
@@ -1606,19 +1824,24 @@ int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched) {
|
||||
return sched->n_splits;
|
||||
}
|
||||
|
||||
int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched) {
|
||||
return sched->n_copies;
|
||||
}
|
||||
|
||||
size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend) {
|
||||
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
||||
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
||||
|
||||
return ggml_gallocr_get_buffer_size(sched->galloc, backend_index);
|
||||
}
|
||||
|
||||
void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
||||
void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend) {
|
||||
int backend_index = ggml_backend_sched_backend_id(sched, backend);
|
||||
GGML_ASSERT(backend_index >= 0 && backend_index < sched->n_backends);
|
||||
tensor_backend_id(node) = backend_index;
|
||||
}
|
||||
|
||||
ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
||||
ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node) {
|
||||
int backend_index = tensor_backend_id(node);
|
||||
if (backend_index == -1) {
|
||||
return NULL;
|
||||
|
||||
+41
-17
@@ -9,6 +9,7 @@ extern "C" {
|
||||
|
||||
typedef struct ggml_backend_buffer_type * ggml_backend_buffer_type_t;
|
||||
typedef struct ggml_backend_buffer * ggml_backend_buffer_t;
|
||||
typedef struct ggml_backend_event * ggml_backend_event_t;
|
||||
typedef struct ggml_backend * ggml_backend_t;
|
||||
typedef void * ggml_backend_graph_plan_t;
|
||||
|
||||
@@ -72,11 +73,24 @@ extern "C" {
|
||||
GGML_API enum ggml_status ggml_backend_graph_plan_compute(ggml_backend_t backend, ggml_backend_graph_plan_t plan);
|
||||
GGML_API enum ggml_status ggml_backend_graph_compute (ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
|
||||
GGML_API bool ggml_backend_graph_compute_async(ggml_backend_t backend, struct ggml_cgraph * cgraph);
|
||||
GGML_API bool ggml_backend_supports_op(ggml_backend_t backend, const struct ggml_tensor * op);
|
||||
|
||||
// tensor copy between different backends
|
||||
GGML_API void ggml_backend_tensor_copy(struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend, struct ggml_tensor * src, struct ggml_tensor * dst); // automatic fallback to sync copy
|
||||
|
||||
// asynchronous copy
|
||||
// the copy is performed after all the currently queued operations in backend_src
|
||||
// backend_dst will wait for the copy to complete before performing other operations
|
||||
// automatic fallback to sync copy if async is not supported
|
||||
GGML_API void ggml_backend_tensor_copy_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, struct ggml_tensor * src, struct ggml_tensor * dst);
|
||||
|
||||
// events
|
||||
GGML_API ggml_backend_event_t ggml_backend_event_new (ggml_backend_t backend);
|
||||
GGML_API void ggml_backend_event_free (ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_record (ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_synchronize(ggml_backend_event_t event);
|
||||
GGML_API void ggml_backend_event_wait (ggml_backend_t backend, ggml_backend_event_t event); // wait async on event
|
||||
|
||||
//
|
||||
// CPU backend
|
||||
@@ -123,27 +137,31 @@ extern "C" {
|
||||
/*
|
||||
Example usage:
|
||||
|
||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, num_backends);
|
||||
// sched is initialized with measure allocators and cannot be used until allocated with a measure graph
|
||||
// operations that use tensors allocated in a buffer with USAGE_WEIGHTS will be asigned
|
||||
// preferrably to run on the same backend as the buffer
|
||||
ggml_backend_buffer_set_usage(buf_weights, GGML_BACKEND_BUFFER_USAGE_WEIGHTS);
|
||||
|
||||
// initialize buffers from a measure graph
|
||||
measure_graph = build_graph(sched); // use the allocr to allocate inputs as needed
|
||||
sched = ggml_backend_sched_new({backend_gpu, backend_gpu2, backend_cpu}, NULL, num_backends, GGML_DEFAULT_GRAPH_SIZE, false);
|
||||
|
||||
// in build_graph:
|
||||
build_graph(...) {
|
||||
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||
ggml_backend_sched_set_node_backend(sched, node, backend_gpu);
|
||||
}
|
||||
// initialize buffers from a max size graph (optional)
|
||||
reserve_graph = build_graph(sched, max_batch_size);
|
||||
|
||||
// allocate backend buffers from measure graph
|
||||
ggml_backend_sched_init_measure(sched, measure_graph);
|
||||
// manually assign nodes to a backend (optional, should not be needed in most cases)
|
||||
struct ggml_tensor * node = ggml_mul_mat(ctx, ...);
|
||||
ggml_backend_sched_set_tensor_backend(sched, node, backend_gpu);
|
||||
|
||||
// the scheduler is now ready to compute graphs
|
||||
ggml_backend_sched_reserve(sched, reserve_graph);
|
||||
|
||||
// compute
|
||||
graph = build_graph(sched);
|
||||
ggml_backend_sched_graph_compute(sched, graph);
|
||||
|
||||
// if there are graph inputs:
|
||||
ggml_backend_sched_reset(sched);
|
||||
ggml_backend_sched_alloc_graph(sched, graph);
|
||||
ggml_backend_tensor_set(input_tensor, ...);
|
||||
ggml_backend_sched_graph_compute(sched, graph);
|
||||
}
|
||||
*/
|
||||
|
||||
struct ggml_backend_sched;
|
||||
@@ -158,20 +176,26 @@ extern "C" {
|
||||
typedef bool (*ggml_backend_sched_eval_callback)(struct ggml_tensor * t, bool ask, void * user_data);
|
||||
|
||||
// Initialize a backend scheduler
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size);
|
||||
GGML_API ggml_backend_sched_t ggml_backend_sched_new(ggml_backend_t * backends, ggml_backend_buffer_type_t * bufts, int n_backends, size_t graph_size, bool parallel);
|
||||
GGML_API void ggml_backend_sched_free(ggml_backend_sched_t sched);
|
||||
|
||||
// Initialize backend buffers from a measure graph
|
||||
GGML_API bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph * measure_graph);
|
||||
|
||||
// Get the number of splits of the last graph
|
||||
GGML_API int ggml_backend_sched_get_n_splits(ggml_backend_sched_t sched);
|
||||
GGML_API int ggml_backend_sched_get_n_copies(ggml_backend_sched_t sched);
|
||||
|
||||
GGML_API size_t ggml_backend_sched_get_buffer_size(ggml_backend_sched_t sched, ggml_backend_t backend);
|
||||
|
||||
GGML_API void ggml_backend_sched_set_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_node_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
GGML_API void ggml_backend_sched_set_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node, ggml_backend_t backend);
|
||||
GGML_API ggml_backend_t ggml_backend_sched_get_tensor_backend(ggml_backend_sched_t sched, struct ggml_tensor * node);
|
||||
|
||||
// Allocate and compute graph on the backend scheduler
|
||||
GGML_API bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph);
|
||||
GGML_API void ggml_backend_sched_synchronize(ggml_backend_sched_t sched);
|
||||
|
||||
// Reset all assignments and allocators - must be called before changing the node backends
|
||||
GGML_API void ggml_backend_sched_reset(ggml_backend_sched_t sched);
|
||||
|
||||
+408
-2
@@ -1,4 +1,408 @@
|
||||
#pragma once
|
||||
#ifndef GGML_COMMON_DECL
|
||||
|
||||
#if defined(GGML_COMMON_DECL_C)
|
||||
#include <stdint.h>
|
||||
|
||||
typedef uint16_t ggml_half;
|
||||
typedef uint32_t ggml_half2;
|
||||
|
||||
#define GGML_COMMON_AGGR
|
||||
|
||||
#define GGML_COMMON_DECL
|
||||
#elif defined(GGML_COMMON_DECL_METAL)
|
||||
#include <metal_stdlib>
|
||||
|
||||
typedef half ggml_half;
|
||||
typedef half2 ggml_half2;
|
||||
|
||||
#define GGML_COMMON_AGGR
|
||||
|
||||
#define GGML_COMMON_DECL
|
||||
#elif defined(GGML_COMMON_DECL_CUDA)
|
||||
#include <cuda_fp16.h>
|
||||
#include <cstdint>
|
||||
|
||||
typedef half ggml_half;
|
||||
typedef half2 ggml_half2;
|
||||
|
||||
#define GGML_COMMON_AGGR data
|
||||
|
||||
#define GGML_COMMON_DECL
|
||||
#elif defined(GGML_COMMON_DECL_HIP)
|
||||
#include <hip/hip_fp16.h>
|
||||
#include <cstdint>
|
||||
|
||||
typedef half ggml_half;
|
||||
typedef half2 ggml_half2;
|
||||
|
||||
#define GGML_COMMON_AGGR data
|
||||
|
||||
#define GGML_COMMON_DECL
|
||||
#elif defined(GGML_COMMON_DECL_SYCL)
|
||||
#include <sycl/half_type.hpp>
|
||||
#include <cstdint>
|
||||
|
||||
typedef sycl::half ggml_half;
|
||||
typedef sycl::half2 ggml_half2;
|
||||
|
||||
#define GGML_COMMON_AGGR data
|
||||
|
||||
#define GGML_COMMON_DECL
|
||||
#endif
|
||||
|
||||
#if defined(GGML_COMMON_DECL)
|
||||
|
||||
#ifndef __cplusplus
|
||||
#ifndef static_assert
|
||||
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201100L)
|
||||
#define static_assert(cond, msg) _Static_assert(cond, msg)
|
||||
#else
|
||||
#define static_assert(cond, msg) struct global_scope_noop_trick
|
||||
#endif
|
||||
#endif
|
||||
#endif // __cplusplus
|
||||
|
||||
// QK = number of values after dequantization
|
||||
// QK_K = super-block size
|
||||
|
||||
#ifdef GGML_QKK_64
|
||||
#define QK_K 64
|
||||
#define K_SCALE_SIZE 4
|
||||
#else
|
||||
#define QK_K 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#endif // GGML_QKK_64
|
||||
|
||||
#if defined(GGML_COMMON_DECL_CUDA) || defined(GGML_COMMON_DECL_HIP) || defined(GGML_COMMON_DECL_SYCL)
|
||||
// QR = QK / number of values before dequantization
|
||||
// QI = number of 32 bit integers before dequantization
|
||||
|
||||
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
||||
#define QR4_0 2
|
||||
|
||||
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
||||
#define QR4_1 2
|
||||
|
||||
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
||||
#define QR5_0 2
|
||||
|
||||
#define QI5_1 (QK5_1 / (4 * QR5_1))
|
||||
#define QR5_1 2
|
||||
|
||||
#define QI8_0 (QK8_0 / (4 * QR8_0))
|
||||
#define QR8_0 1
|
||||
|
||||
#define QI8_1 (QK8_1 / (4 * QR8_1))
|
||||
#define QR8_1 1
|
||||
|
||||
#define QI2_K (QK_K / (4*QR2_K))
|
||||
#define QR2_K 4
|
||||
|
||||
#define QI3_K (QK_K / (4*QR3_K))
|
||||
#define QR3_K 4
|
||||
|
||||
#define QI4_K (QK_K / (4*QR4_K))
|
||||
#define QR4_K 2
|
||||
|
||||
#define QI5_K (QK_K / (4*QR5_K))
|
||||
#define QR5_K 2
|
||||
|
||||
#define QI6_K (QK_K / (4*QR6_K))
|
||||
#define QR6_K 2
|
||||
|
||||
#define QI2_XXS (QK_K / (4*QR2_XXS))
|
||||
#define QR2_XXS 8
|
||||
|
||||
#define QI2_XS (QK_K / (4*QR2_XS))
|
||||
#define QR2_XS 8
|
||||
|
||||
#define QI2_S (QK_K / (4*QR2_S))
|
||||
#define QR2_S 8
|
||||
|
||||
#define QI3_XXS (QK_K / (4*QR3_XXS))
|
||||
#define QR3_XXS 8
|
||||
|
||||
#define QI3_XS (QK_K / (4*QR3_XS))
|
||||
#define QR3_XS 8
|
||||
|
||||
#define QI1_S (QK_K / (4*QR1_S))
|
||||
#define QR1_S 8
|
||||
|
||||
#define QI4_NL (QK4_NL / (4*QR4_NL))
|
||||
#define QR4_NL 2
|
||||
|
||||
#if QK_K == 64
|
||||
#define QI4_XS QI4_NL
|
||||
#define QR4_XS QR4_NL
|
||||
#else
|
||||
#define QI4_XS (QK_K / (4*QR4_XS))
|
||||
#define QR4_XS 8
|
||||
#endif
|
||||
|
||||
#endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP
|
||||
|
||||
#define QK4_0 32
|
||||
typedef struct {
|
||||
ggml_half d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_half) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
||||
|
||||
#define QK4_1 32
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // delta
|
||||
ggml_half m; // min
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 dm;
|
||||
};
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_half) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
||||
|
||||
#define QK5_0 32
|
||||
typedef struct {
|
||||
ggml_half d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_half) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
||||
|
||||
#define QK5_1 32
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // delta
|
||||
ggml_half m; // min
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 dm;
|
||||
};
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_half) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#define QK8_0 32
|
||||
typedef struct {
|
||||
ggml_half d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_half) + QK8_0, "wrong q8_0 block size/padding");
|
||||
|
||||
#define QK8_1 32
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // delta
|
||||
ggml_half s; // d * sum(qs[i])
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 ds;
|
||||
};
|
||||
int8_t qs[QK8_1]; // quants
|
||||
} block_q8_1;
|
||||
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_half) + QK8_1, "wrong q8_1 block size/padding");
|
||||
|
||||
//
|
||||
// Super-block quantization structures
|
||||
//
|
||||
|
||||
// 2-bit quantization
|
||||
// weight is represented as x = a * q + b
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 2.625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // super-block scale for quantized scales
|
||||
ggml_half dmin; // super-block scale for quantized mins
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 dm;
|
||||
};
|
||||
} block_q2_K;
|
||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_half) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
||||
|
||||
// 3-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 3.4375 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[2];
|
||||
ggml_half d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[12]; // scales, quantized with 6 bits
|
||||
ggml_half d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_half) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 4-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 4.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_half d[2]; // super-block scales/mins
|
||||
uint8_t scales[2]; // 4-bit block scales/mins
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // super-block scale for quantized scales
|
||||
ggml_half dmin; // super-block scale for quantized mins
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 dm;
|
||||
};
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 5-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 5.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_half d; // super-block scale
|
||||
int8_t scales[QK_K/16]; // 8-bit block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_half) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
union {
|
||||
struct {
|
||||
ggml_half d; // super-block scale for quantized scales
|
||||
ggml_half dmin; // super-block scale for quantized mins
|
||||
} GGML_COMMON_AGGR;
|
||||
ggml_half2 dm;
|
||||
};
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_half) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 6-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 6.5625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
||||
ggml_half d; // super-block scale
|
||||
} block_q6_K;
|
||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_half) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
|
||||
|
||||
// This is only used for intermediate quantization and dot products
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
int8_t qs[QK_K]; // quants
|
||||
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
|
||||
} block_q8_K;
|
||||
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
|
||||
|
||||
// (Almost) "true" 2-bit quantization.
|
||||
// Due to the need to use blocks as per ggml design, it ends up using
|
||||
// 2.0625 bpw because of the 16-bit scale for each block of 256.
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
} block_iq2_xxs;
|
||||
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
||||
|
||||
// 2.3125 bpw quants
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_xs;
|
||||
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_half) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
||||
|
||||
// 2.5625 bpw quants
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_s;
|
||||
static_assert(sizeof(block_iq2_s) == sizeof(ggml_half) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
|
||||
|
||||
// (Almost) "true" 3-bit quantization.
|
||||
// Due to the need to use blocks as per ggml design, it ends up using
|
||||
// 3.0625 bpw because of the 16-bit scale for each block of 256.
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[3*QK_K/8];
|
||||
} block_iq3_xxs;
|
||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_half) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
// 3.4375 bpw
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_half) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
||||
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[QK_K/8];
|
||||
uint16_t qh[QK_K/32];
|
||||
} block_iq1_s;
|
||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_half) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||
|
||||
// Non-linear quants
|
||||
#define QK4_NL 32
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint8_t qs[QK4_NL/2];
|
||||
} block_iq4_nl;
|
||||
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_half) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_half d;
|
||||
uint16_t scales_h;
|
||||
uint8_t scales_l[QK_K/64];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_half) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
||||
#endif
|
||||
|
||||
#endif // GGML_COMMON_DECL
|
||||
#endif // GGML_COMMON_DECL
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef GGML_COMMON_IMPL
|
||||
|
||||
#if defined(GGML_COMMON_IMPL_C)
|
||||
#include <stdint.h>
|
||||
@@ -14,7 +418,7 @@
|
||||
#define GGML_TABLE_END() };
|
||||
|
||||
#define GGML_COMMON_IMPL
|
||||
#elif defined(GGML_COMMON_IMPL_CUDA)
|
||||
#elif defined(GGML_COMMON_IMPL_CUDA) || defined(GGML_COMMON_IMPL_HIP)
|
||||
#include <cstdint>
|
||||
|
||||
#define GGML_TABLE_BEGIN(type, name, size) static const __device__ type name[size] = {
|
||||
@@ -645,6 +1049,7 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
|
||||
GGML_TABLE_END()
|
||||
|
||||
#define NGRID_IQ1S 2048
|
||||
#define IQ1S_DELTA 0.125f
|
||||
#if defined(GGML_COMMON_IMPL_C)
|
||||
GGML_TABLE_BEGIN(uint64_t, iq1s_grid, NGRID_IQ1S)
|
||||
0xffffffffffffffff, 0xffffffffffffff01, 0xffffffffffff0000, 0xffffffffffff01ff,
|
||||
@@ -1422,3 +1827,4 @@ GGML_TABLE_END()
|
||||
#endif
|
||||
|
||||
#endif // GGML_COMMON_IMPL
|
||||
#endif // GGML_COMMON_IMPL
|
||||
|
||||
+173
-282
@@ -2,7 +2,13 @@
|
||||
#include "ggml.h"
|
||||
#include "ggml-backend-impl.h"
|
||||
|
||||
#if defined(GGML_USE_HIPBLAS)
|
||||
#define GGML_COMMON_DECL_HIP
|
||||
#define GGML_COMMON_IMPL_HIP
|
||||
#else
|
||||
#define GGML_COMMON_DECL_CUDA
|
||||
#define GGML_COMMON_IMPL_CUDA
|
||||
#endif
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include <algorithm>
|
||||
@@ -66,6 +72,7 @@
|
||||
#define cudaEventCreateWithFlags hipEventCreateWithFlags
|
||||
#define cudaEventDisableTiming hipEventDisableTiming
|
||||
#define cudaEventRecord hipEventRecord
|
||||
#define cudaEventSynchronize hipEventSynchronize
|
||||
#define cudaEvent_t hipEvent_t
|
||||
#define cudaEventDestroy hipEventDestroy
|
||||
#define cudaFree hipFree
|
||||
@@ -75,6 +82,7 @@
|
||||
#define cudaGetDeviceProperties hipGetDeviceProperties
|
||||
#define cudaGetErrorString hipGetErrorString
|
||||
#define cudaGetLastError hipGetLastError
|
||||
#define cudaLaunchHostFunc hipLaunchHostFunc
|
||||
#ifdef GGML_HIP_UMA
|
||||
#define cudaMalloc hipMallocManaged
|
||||
#define cudaMallocHost(ptr, size) hipHostMalloc(ptr, size)
|
||||
@@ -98,6 +106,7 @@
|
||||
#define cudaStreamCreateWithFlags hipStreamCreateWithFlags
|
||||
#define cudaStreamFireAndForget hipStreamFireAndForget
|
||||
#define cudaStreamNonBlocking hipStreamNonBlocking
|
||||
#define cudaStreamPerThread hipStreamPerThread
|
||||
#define cudaStreamSynchronize hipStreamSynchronize
|
||||
#define cudaStreamWaitEvent(stream, event, flags) hipStreamWaitEvent(stream, event, flags)
|
||||
#define cudaStream_t hipStream_t
|
||||
@@ -359,66 +368,6 @@ typedef void (*ggml_cuda_op_flatten_t)(
|
||||
const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst,
|
||||
const float * src0_dd, const float * src1_dd, float * dst_dd, cudaStream_t main_stream);
|
||||
|
||||
// QK = number of values after dequantization
|
||||
// QR = QK / number of values before dequantization
|
||||
// QI = number of 32 bit integers before dequantization
|
||||
|
||||
#define QK4_0 32
|
||||
#define QR4_0 2
|
||||
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
||||
|
||||
#define QK4_1 32
|
||||
#define QR4_1 2
|
||||
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
||||
typedef struct {
|
||||
half2 dm; // dm.x = delta, dm.y = min
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
|
||||
|
||||
#define QK5_0 32
|
||||
#define QR5_0 2
|
||||
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
||||
|
||||
#define QK5_1 32
|
||||
#define QR5_1 2
|
||||
#define QI5_1 (QK5_1 / (4 * QR5_1))
|
||||
typedef struct {
|
||||
half2 dm; // dm.x = delta, dm.y = min
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#define QK8_0 32
|
||||
#define QR8_0 1
|
||||
#define QI8_0 (QK8_0 / (4 * QR8_0))
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
||||
|
||||
#define QK8_1 32
|
||||
#define QR8_1 1
|
||||
#define QI8_1 (QK8_1 / (4 * QR8_1))
|
||||
typedef struct {
|
||||
half2 ds; // ds.x = delta, ds.y = sum
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_1;
|
||||
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
|
||||
|
||||
typedef float (*vec_dot_q_cuda_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
|
||||
typedef void (*allocate_tiles_cuda_t)(int ** x_ql, half2 ** x_dm, int ** x_qh, int ** x_sc);
|
||||
typedef void (*load_tiles_cuda_t)(
|
||||
@@ -428,174 +377,6 @@ typedef float (*vec_dot_q_mul_mat_cuda_t)(
|
||||
const int * __restrict__ x_ql, const half2 * __restrict__ x_dm, const int * __restrict__ x_qh, const int * __restrict__ x_sc,
|
||||
const int * __restrict__ y_qs, const half2 * __restrict__ y_ms, const int & i, const int & j, const int & k);
|
||||
|
||||
//================================= k-quants
|
||||
|
||||
#ifdef GGML_QKK_64
|
||||
#define QK_K 64
|
||||
#define K_SCALE_SIZE 4
|
||||
#else
|
||||
#define QK_K 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#endif
|
||||
|
||||
#define QR2_K 4
|
||||
#define QI2_K (QK_K / (4*QR2_K))
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
half2 dm; // super-block scale for quantized scales/mins
|
||||
} block_q2_K;
|
||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
||||
|
||||
#define QR3_K 4
|
||||
#define QI3_K (QK_K / (4*QR3_K))
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
#ifdef GGML_QKK_64
|
||||
uint8_t scales[2]; // scales, quantized with 8 bits
|
||||
#else
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
|
||||
#endif
|
||||
half d; // super-block scale
|
||||
} block_q3_K;
|
||||
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
|
||||
|
||||
#define QR4_K 2
|
||||
#define QI4_K (QK_K / (4*QR4_K))
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
half dm[2]; // super-block scales/mins
|
||||
uint8_t scales[2]; // 4-bit block scales/mins
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == sizeof(half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
half2 dm; // super-block scale for quantized scales/mins
|
||||
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
|
||||
#endif
|
||||
|
||||
#define QR5_K 2
|
||||
#define QI5_K (QK_K / (4*QR5_K))
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
half d; // super-block scale
|
||||
int8_t scales[QK_K/16]; // block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
half2 dm; // super-block scale for quantized scales/mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
||||
#endif
|
||||
|
||||
#define QR6_K 2
|
||||
#define QI6_K (QK_K / (4*QR6_K))
|
||||
typedef struct {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales
|
||||
half d; // delta
|
||||
} block_q6_K;
|
||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
|
||||
|
||||
#define QR2_XXS 8
|
||||
#define QI2_XXS (QK_K / (4*QR2_XXS))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
} block_iq2_xxs;
|
||||
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
||||
|
||||
#define QR2_XS 8
|
||||
#define QI2_XS (QK_K / (4*QR2_XS))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_xs;
|
||||
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
||||
|
||||
// 2.5625 bpw quants
|
||||
#define QR2_S 8
|
||||
#define QI2_S (QK_K / (4*QR2_S))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_s;
|
||||
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
|
||||
|
||||
#define QR3_XXS 8
|
||||
#define QI3_XXS (QK_K / (4*QR3_XXS))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[3*(QK_K/8)];
|
||||
} block_iq3_xxs;
|
||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
#define QR3_XS 8
|
||||
#define QI3_XS (QK_K / (4*QR3_XS))
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
||||
|
||||
#define QR1_S 8
|
||||
#define QI1_S (QK_K / (4*QR1_S))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/8];
|
||||
uint16_t qh[QK_K/32];
|
||||
} block_iq1_s;
|
||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||
|
||||
#define QK4_NL 32
|
||||
#define QR4_NL 2
|
||||
#define QI4_NL (QK4_NL / (4*QR4_NL))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK4_NL/2];
|
||||
} block_iq4_nl;
|
||||
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
#define QR4_XS QR4_NL
|
||||
#define QI4_XS QI4_NL
|
||||
#else
|
||||
// QR4_XS = 8 is very slightly faster than QR4_XS = 4
|
||||
#define QR4_XS 8
|
||||
#define QI4_XS (QK_K / (4*QR4_XS))
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t scales_h;
|
||||
uint8_t scales_l[QK_K/64];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
||||
#endif
|
||||
|
||||
#define WARP_SIZE 32
|
||||
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
||||
|
||||
@@ -1722,22 +1503,15 @@ static __global__ void dequantize_block_iq1_s(const void * __restrict__ vx, dst_
|
||||
const int il = tid/8; // 0...3
|
||||
const int ib = tid%8; // 0...7
|
||||
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
||||
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1);
|
||||
#if __CUDA_ARCH__ >= MIN_CC_DP4A // lowest compute capability for integer intrinsics
|
||||
int grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
||||
grid32[0] = *((const int *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8))));
|
||||
grid32[1] = __vsub4((grid32[0] >> 4) & 0x0f0f0f0f, 0x01010101);
|
||||
grid32[0] = __vsub4(grid32[0] & 0x0f0f0f0f, 0x01010101);
|
||||
const float delta = x[i].qh[ib] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA;
|
||||
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 7) + 1);
|
||||
uint32_t grid32[2]; const int8_t * q = (const int8_t *)grid32;
|
||||
grid32[0] = iq1s_grid_gpu[x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)];
|
||||
grid32[1] = (grid32[0] >> 4) & 0x0f0f0f0f;
|
||||
grid32[0] &= 0x0f0f0f0f;
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
y[j] = d * q[j];
|
||||
y[j] = d * (q[j] + delta);
|
||||
}
|
||||
#else
|
||||
const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (x[i].qs[4*ib+il] | (((x[i].qh[ib] >> 3*il) & 7) << 8)));
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
y[j+0] = d * ((grid[j] & 0xf) - 1);
|
||||
y[j+4] = d * ((grid[j] >> 4) - 1);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
@@ -3577,7 +3351,7 @@ static __device__ __forceinline__ float vec_dot_q2_K_q8_1(
|
||||
#pragma unroll
|
||||
for (int i = 0; i < QR2_K; ++ i) {
|
||||
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
||||
d8[i] = __low2half(bq8_1[bq8_offset + i].ds);
|
||||
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
||||
}
|
||||
|
||||
return vec_dot_q2_K_q8_1_impl_mmvq(v, u, scales, bq2_K->dm, d8);
|
||||
@@ -3699,7 +3473,7 @@ static __device__ __forceinline__ float vec_dot_q3_K_q8_1(
|
||||
#pragma unroll
|
||||
for (int i = 0; i < QR3_K; ++i) {
|
||||
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + i].qs, iqs % QI8_1);
|
||||
d8[i] = __low2half(bq8_1[bq8_offset + i].ds);
|
||||
d8[i] = __low2float(bq8_1[bq8_offset + i].ds);
|
||||
}
|
||||
|
||||
return vec_dot_q3_K_q8_1_impl_mmvq(vl, vh, u, bq3_K->scales, scale_offset, d, d8);
|
||||
@@ -3868,7 +3642,7 @@ static __device__ __forceinline__ float vec_dot_q4_K_q8_1(
|
||||
|
||||
for (int i = 0; i < QR4_K; ++i) {
|
||||
const block_q8_1 * bq8i = bq8_1 + bq8_offset + i;
|
||||
d8[i] = __low2half(bq8i->ds);
|
||||
d8[i] = __low2float(bq8i->ds);
|
||||
|
||||
const int * q8 = (const int *)bq8i->qs + ((iqs/2)%4);
|
||||
u[2*i+0] = q8[0];
|
||||
@@ -4233,7 +4007,7 @@ static __device__ __forceinline__ float vec_dot_q6_K_q8_1(
|
||||
#pragma unroll
|
||||
for (int i = 0; i < QR6_K; ++i) {
|
||||
u[i] = get_int_from_int8_aligned(bq8_1[bq8_offset + 2*i].qs, iqs % QI8_1);
|
||||
d8[i] = __low2half(bq8_1[bq8_offset + 2*i].ds);
|
||||
d8[i] = __low2float(bq8_1[bq8_offset + 2*i].ds);
|
||||
}
|
||||
|
||||
return vec_dot_q6_K_q8_1_impl_mmvq(vl, vh, u, scales, bq6_K->d, d8);
|
||||
@@ -4560,22 +4334,25 @@ static __device__ __forceinline__ float vec_dot_iq1_s_q8_1(
|
||||
const int * q8 = (const int *)bq8_1[ib32].qs;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const int * grid = (const int *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
||||
int grid0 = __vsub4(grid[0] & 0x0f0f0f0f, 0x01010101);
|
||||
int grid1 = __vsub4((grid[0] >> 4) & 0x0f0f0f0f, 0x01010101);
|
||||
int grid0 = grid[0] & 0x0f0f0f0f;
|
||||
int grid1 = (grid[0] >> 4) & 0x0f0f0f0f;
|
||||
sumi = __dp4a(q8[2*l+1], grid1, __dp4a(q8[2*l+0], grid0, sumi));
|
||||
}
|
||||
#else
|
||||
const int8_t * q8 = bq8_1[ib32].qs;
|
||||
const int8_t * q8 = bq8_1[ib32].qs;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const uint8_t * grid = (const uint8_t *)(iq1s_grid_gpu + (bq1->qs[4*ib32+l] | (((bq1->qh[ib32] >> 3*l) & 7) << 8)));
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
sumi += q8[j] * ((grid[j] & 0xf) - 1) + q8[j+4] * ((grid[j] >> 4) - 1);
|
||||
sumi += q8[j] * (grid[j] & 0xf) + q8[j+4] * (grid[j] >> 4);
|
||||
}
|
||||
q8 += 8;
|
||||
}
|
||||
#endif
|
||||
const float d = (float)bq1->d * __low2float(bq8_1[ib32].ds);
|
||||
return d * sumi * (2*(bq1->qh[ib32] >> 12) + 1);
|
||||
const float delta = bq1->qh[ib32] & 0x8000 ? -1-IQ1S_DELTA : -1+IQ1S_DELTA;
|
||||
const float d1q = (float)bq1->d * (2*((bq1->qh[ib32] >> 12) & 7) + 1);
|
||||
const float d = d1q * __low2float (bq8_1[ib32].ds);
|
||||
const float m = d1q * __high2float(bq8_1[ib32].ds);
|
||||
return d * sumi + m * delta;
|
||||
#else
|
||||
assert(false);
|
||||
return 0.f;
|
||||
@@ -4767,7 +4544,7 @@ static __device__ __forceinline__ void mul_mat_q(
|
||||
*dsi_dst = *dsi_src;
|
||||
} else {
|
||||
float * dfi_dst = (float *) dsi_dst;
|
||||
*dfi_dst = __low2half(*dsi_src);
|
||||
*dfi_dst = __low2float(*dsi_src);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10867,8 +10644,20 @@ GGML_CALL void ggml_cuda_get_device_description(int device, char * description,
|
||||
#define UNUSED GGML_UNUSED
|
||||
|
||||
struct ggml_backend_cuda_context {
|
||||
explicit ggml_backend_cuda_context(int device) :
|
||||
device(device),
|
||||
name(GGML_CUDA_NAME + std::to_string(device)) {
|
||||
}
|
||||
|
||||
~ggml_backend_cuda_context() {
|
||||
if (copy_event != nullptr) {
|
||||
CUDA_CHECK(cudaEventDestroy(copy_event));
|
||||
}
|
||||
}
|
||||
|
||||
int device;
|
||||
std::string name;
|
||||
cudaEvent_t copy_event = nullptr;
|
||||
};
|
||||
|
||||
// cuda buffer
|
||||
@@ -10958,9 +10747,8 @@ GGML_CALL static void ggml_backend_cuda_buffer_set_tensor(ggml_backend_buffer_t
|
||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
CUDA_CHECK(cudaMemcpy((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
@@ -10969,26 +10757,25 @@ GGML_CALL static void ggml_backend_cuda_buffer_get_tensor(ggml_backend_buffer_t
|
||||
ggml_backend_cuda_buffer_context * ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||
|
||||
ggml_cuda_set_device(ctx->device);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
CUDA_CHECK(cudaMemcpy(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost));
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_cuda_buffer_cpy_tensor(ggml_backend_buffer_t buffer, const ggml_tensor * src, ggml_tensor * dst) {
|
||||
if (ggml_backend_buffer_is_cuda(src->buffer)) {
|
||||
ggml_backend_cuda_buffer_context * src_ctx = (ggml_backend_cuda_buffer_context *)src->buffer->context;
|
||||
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)buffer->context;
|
||||
|
||||
ggml_cuda_set_device(src_ctx->device);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
ggml_cuda_set_device(dst_ctx->device);
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
CUDA_CHECK(cudaMemcpy((char *)dst->data, (const char *)src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice));
|
||||
CUDA_CHECK(cudaDeviceSynchronize());
|
||||
|
||||
ggml_backend_cuda_buffer_context * dst_ctx = (ggml_backend_cuda_buffer_context *)dst->buffer->context;
|
||||
if (src_ctx->device == dst_ctx->device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(src), cudaMemcpyDeviceToDevice, cudaStreamPerThread));
|
||||
} else {
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, dst_ctx->device, src->data, src_ctx->device, ggml_nbytes(src), cudaStreamPerThread));
|
||||
}
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
UNUSED(buffer);
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cuda_buffer_clear(ggml_backend_buffer_t buffer, uint8_t value) {
|
||||
@@ -11233,7 +11020,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_set_tensor(ggml_backend_buf
|
||||
}
|
||||
|
||||
const char * buf_host = (const char *)data + offset_split;
|
||||
CUDA_CHECK(cudaMemcpy(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice));
|
||||
CUDA_CHECK(cudaMemcpyAsync(extra->data_device[id], buf_host, original_size, cudaMemcpyHostToDevice, cudaStreamPerThread));
|
||||
}
|
||||
|
||||
for (int id = 0; id < g_device_count; ++id) {
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11267,7 +11058,11 @@ GGML_CALL static void ggml_backend_cuda_split_buffer_get_tensor(ggml_backend_buf
|
||||
}
|
||||
|
||||
char * buf_host = (char *)data + offset_split;
|
||||
CUDA_CHECK(cudaMemcpy(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost));
|
||||
CUDA_CHECK(cudaMemcpyAsync(buf_host, extra->data_device[id], original_size, cudaMemcpyDeviceToHost, cudaStreamPerThread));
|
||||
}
|
||||
|
||||
for (int id = 0; id < g_device_count; ++id) {
|
||||
CUDA_CHECK(cudaStreamSynchronize(cudaStreamPerThread));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11446,6 +11241,10 @@ GGML_CALL ggml_backend_buffer_type_t ggml_backend_cuda_host_buffer_type() {
|
||||
return &ggml_backend_cuda_buffer_type_host;
|
||||
}
|
||||
|
||||
//static bool ggml_backend_buffer_is_cuda_host(ggml_backend_buffer_t buffer) {
|
||||
// return buffer->buft->iface.get_name == ggml_backend_cuda_host_buffer_type_name;
|
||||
//}
|
||||
|
||||
// backend
|
||||
|
||||
GGML_CALL static const char * ggml_backend_cuda_name(ggml_backend_t backend) {
|
||||
@@ -11469,8 +11268,9 @@ GGML_CALL static ggml_backend_buffer_type_t ggml_backend_cuda_get_default_buffer
|
||||
|
||||
GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend, ggml_tensor * tensor, const void * data, size_t offset, size_t size) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
||||
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
CUDA_CHECK(cudaMemcpyAsync((char *)tensor->data + offset, data, size, cudaMemcpyHostToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
||||
@@ -11478,22 +11278,61 @@ GGML_CALL static void ggml_backend_cuda_set_tensor_async(ggml_backend_t backend,
|
||||
|
||||
GGML_CALL static void ggml_backend_cuda_get_tensor_async(ggml_backend_t backend, const ggml_tensor * tensor, void * data, size_t offset, size_t size) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
ggml_backend_buffer_t buf = tensor->view_src ? tensor->view_src->buffer : tensor->buffer;
|
||||
|
||||
GGML_ASSERT(tensor->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
||||
GGML_ASSERT(buf->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && "unsupported buffer type");
|
||||
GGML_ASSERT(tensor->backend == GGML_BACKEND_TYPE_GPU);
|
||||
|
||||
CUDA_CHECK(cudaMemcpyAsync(data, (const char *)tensor->data + offset, size, cudaMemcpyDeviceToHost, g_cudaStreams[cuda_ctx->device][0]));
|
||||
}
|
||||
|
||||
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend, const ggml_tensor * src, ggml_tensor * dst) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
GGML_CALL static bool ggml_backend_cuda_cpy_tensor_async(ggml_backend_t backend_src, ggml_backend_t backend_dst, const ggml_tensor * src, ggml_tensor * dst) {
|
||||
GGML_ASSERT(ggml_backend_is_cuda(backend_src) || ggml_backend_is_cuda(backend_dst));
|
||||
|
||||
if (dst->buffer->buft == ggml_backend_cuda_buffer_type(cuda_ctx->device) && ggml_backend_buffer_is_cuda(src->buffer)) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx->device][0]));
|
||||
return true;
|
||||
ggml_backend_buffer_t buf_src = src->view_src ? src->view_src->buffer : src->buffer;
|
||||
ggml_backend_buffer_t buf_dst = dst->view_src ? dst->view_src->buffer : dst->buffer;
|
||||
|
||||
if (!ggml_backend_buffer_is_cuda(src->buffer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
if (!ggml_backend_buffer_is_cuda(dst->buffer)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// device -> device
|
||||
ggml_backend_cuda_context * cuda_ctx_src = (ggml_backend_cuda_context *)backend_src->context;
|
||||
ggml_backend_cuda_context * cuda_ctx_dst = (ggml_backend_cuda_context *)backend_dst->context;
|
||||
|
||||
if (backend_src != backend_dst) {
|
||||
ggml_backend_cuda_buffer_context * buf_ctx_src = (ggml_backend_cuda_buffer_context *)buf_src->context;
|
||||
ggml_backend_cuda_buffer_context * buf_ctx_dst = (ggml_backend_cuda_buffer_context *)buf_dst->context;
|
||||
|
||||
GGML_ASSERT(cuda_ctx_src->device == buf_ctx_src->device);
|
||||
GGML_ASSERT(cuda_ctx_dst->device == buf_ctx_dst->device);
|
||||
|
||||
if (!cuda_ctx_src->copy_event) {
|
||||
ggml_cuda_set_device(cuda_ctx_src->device);
|
||||
CUDA_CHECK(cudaEventCreateWithFlags(&cuda_ctx_src->copy_event, cudaEventDisableTiming));
|
||||
}
|
||||
|
||||
// copy on src stream
|
||||
if (cuda_ctx_src->device == cuda_ctx_dst->device) {
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
||||
} else {
|
||||
CUDA_CHECK(cudaMemcpyPeerAsync(dst->data, cuda_ctx_dst->device, src->data, cuda_ctx_src->device, ggml_nbytes(dst), g_cudaStreams[cuda_ctx_src->device][0]));
|
||||
}
|
||||
|
||||
// record event on src stream
|
||||
CUDA_CHECK(cudaEventRecord(cuda_ctx_src->copy_event, g_cudaStreams[cuda_ctx_src->device][0]));
|
||||
|
||||
// wait on dst stream for the copy to complete
|
||||
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx_dst->device][0], cuda_ctx_src->copy_event, 0));
|
||||
} else {
|
||||
// src and dst are on the same backend
|
||||
CUDA_CHECK(cudaMemcpyAsync(dst->data, src->data, ggml_nbytes(dst), cudaMemcpyDeviceToDevice, g_cudaStreams[cuda_ctx_dst->device][0]));
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
GGML_CALL static void ggml_backend_cuda_synchronize(ggml_backend_t backend) {
|
||||
@@ -11670,6 +11509,52 @@ GGML_CALL static bool ggml_backend_cuda_supports_op(ggml_backend_t backend, cons
|
||||
UNUSED(backend);
|
||||
}
|
||||
|
||||
static ggml_backend_event_t ggml_backend_cuda_event_new(ggml_backend_t backend) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
|
||||
ggml_cuda_set_device(cuda_ctx->device);
|
||||
|
||||
cudaEvent_t event;
|
||||
CUDA_CHECK(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
|
||||
|
||||
return new ggml_backend_event {
|
||||
/* .backend = */ backend,
|
||||
/* .context = */ event,
|
||||
};
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_event_free(ggml_backend_event_t event) {
|
||||
CUDA_CHECK(cudaEventDestroy((cudaEvent_t)event->context));
|
||||
|
||||
delete event;
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_event_record(ggml_backend_event_t event) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)event->backend->context;
|
||||
|
||||
CUDA_CHECK(cudaEventRecord((cudaEvent_t)event->context, g_cudaStreams[cuda_ctx->device][0]));
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_event_wait(ggml_backend_t backend, ggml_backend_event_t event) {
|
||||
ggml_backend_cuda_context * cuda_ctx = (ggml_backend_cuda_context *)backend->context;
|
||||
|
||||
if (ggml_backend_is_cuda(event->backend)) {
|
||||
CUDA_CHECK(cudaStreamWaitEvent(g_cudaStreams[cuda_ctx->device][0], (cudaEvent_t)event->context, 0));
|
||||
} else {
|
||||
// untested
|
||||
auto wait_fn = [](void * user_data) {
|
||||
ggml_backend_event_t event = (ggml_backend_event_t)user_data;
|
||||
ggml_backend_event_synchronize(event);
|
||||
};
|
||||
|
||||
CUDA_CHECK(cudaLaunchHostFunc(g_cudaStreams[cuda_ctx->device][0], wait_fn, event));
|
||||
}
|
||||
}
|
||||
|
||||
static void ggml_backend_cuda_event_synchronize(ggml_backend_event_t event) {
|
||||
CUDA_CHECK(cudaEventSynchronize((cudaEvent_t)event->context));
|
||||
}
|
||||
|
||||
static ggml_backend_i ggml_backend_cuda_interface = {
|
||||
/* .get_name = */ ggml_backend_cuda_name,
|
||||
/* .free = */ ggml_backend_cuda_free,
|
||||
@@ -11683,6 +11568,11 @@ static ggml_backend_i ggml_backend_cuda_interface = {
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_cuda_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_cuda_supports_op,
|
||||
/* .event_new = */ ggml_backend_cuda_event_new,
|
||||
/* .event_free = */ ggml_backend_cuda_event_free,
|
||||
/* .event_record = */ ggml_backend_cuda_event_record,
|
||||
/* .event_wait = */ ggml_backend_cuda_event_wait,
|
||||
/* .event_synchronize = */ ggml_backend_cuda_event_synchronize,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_cuda_guid() {
|
||||
@@ -11701,10 +11591,11 @@ GGML_CALL ggml_backend_t ggml_backend_cuda_init(int device) {
|
||||
// not strictly necessary, but it may reduce the overhead of the first graph_compute
|
||||
ggml_cuda_set_main_device(device);
|
||||
|
||||
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context {
|
||||
/* .device = */ device,
|
||||
/* .name = */ GGML_CUDA_NAME + std::to_string(device),
|
||||
};
|
||||
ggml_backend_cuda_context * ctx = new ggml_backend_cuda_context(device);
|
||||
if (ctx == nullptr) {
|
||||
fprintf(stderr, "%s: error: failed to allocate context\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ggml_backend_t cuda_backend = new ggml_backend {
|
||||
/* .guid = */ ggml_backend_cuda_guid(),
|
||||
|
||||
+6
-2
@@ -53,26 +53,30 @@ extern "C" {
|
||||
//
|
||||
#include <arm_neon.h>
|
||||
|
||||
typedef __fp16 ggml_fp16_internal_t;
|
||||
|
||||
#define GGML_COMPUTE_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||
#define GGML_COMPUTE_FP32_TO_FP16(x) ggml_compute_fp32_to_fp16(x)
|
||||
|
||||
#define GGML_FP16_TO_FP32(x) ggml_compute_fp16_to_fp32(x)
|
||||
|
||||
static inline float ggml_compute_fp16_to_fp32(ggml_fp16_t h) {
|
||||
__fp16 tmp;
|
||||
ggml_fp16_internal_t tmp;
|
||||
memcpy(&tmp, &h, sizeof(ggml_fp16_t));
|
||||
return (float)tmp;
|
||||
}
|
||||
|
||||
static inline ggml_fp16_t ggml_compute_fp32_to_fp16(float f) {
|
||||
ggml_fp16_t res;
|
||||
__fp16 tmp = f;
|
||||
ggml_fp16_internal_t tmp = f;
|
||||
memcpy(&res, &tmp, sizeof(ggml_fp16_t));
|
||||
return res;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
typedef uint16_t ggml_fp16_internal_t;
|
||||
|
||||
#ifdef __wasm_simd128__
|
||||
#include <wasm_simd128.h>
|
||||
#else
|
||||
|
||||
@@ -1951,6 +1951,11 @@ static struct ggml_backend_i kompute_backend_i = {
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_kompute_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_kompute_supports_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_kompute_guid() {
|
||||
|
||||
+39
-17
@@ -280,6 +280,11 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
id<MTLLibrary> metal_library;
|
||||
|
||||
// load library
|
||||
//
|
||||
// - first check if the library is embedded
|
||||
// - then check if the library is in the bundle
|
||||
// - if not found, load the source and compile it
|
||||
// - if that fails, return NULL
|
||||
{
|
||||
NSBundle * bundle = nil;
|
||||
#ifdef SWIFT_PACKAGE
|
||||
@@ -287,12 +292,21 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
#else
|
||||
bundle = [NSBundle bundleForClass:[GGMLMetalClass class]];
|
||||
#endif
|
||||
|
||||
NSError * error = nil;
|
||||
NSString * libPath = [bundle pathForResource:@"default" ofType:@"metallib"];
|
||||
if (libPath != nil) {
|
||||
|
||||
#if GGML_METAL_EMBED_LIBRARY
|
||||
const bool try_metallib = false;
|
||||
#else
|
||||
const bool try_metallib = true;
|
||||
#endif
|
||||
|
||||
NSString * path_lib = [bundle pathForResource:@"default" ofType:@"metallib"];
|
||||
if (try_metallib && path_lib != nil) {
|
||||
// pre-compiled library found
|
||||
NSURL * libURL = [NSURL fileURLWithPath:libPath];
|
||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [libPath UTF8String]);
|
||||
NSURL * libURL = [NSURL fileURLWithPath:path_lib];
|
||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_lib UTF8String]);
|
||||
|
||||
metal_library = [ctx->device newLibraryWithURL:libURL error:&error];
|
||||
if (error) {
|
||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||
@@ -305,38 +319,41 @@ static struct ggml_metal_context * ggml_metal_init(int n_cb) {
|
||||
extern const char ggml_metallib_start[];
|
||||
extern const char ggml_metallib_end[];
|
||||
|
||||
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
|
||||
NSString * src = [[NSString alloc] initWithBytes:ggml_metallib_start length:(ggml_metallib_end-ggml_metallib_start) encoding:NSUTF8StringEncoding];
|
||||
#else
|
||||
GGML_METAL_LOG_INFO("%s: default.metallib not found, loading from source\n", __func__);
|
||||
|
||||
NSString * sourcePath;
|
||||
NSString * ggmlMetalPathResources = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
||||
NSString * path_source;
|
||||
NSString * path_resource = [[NSProcessInfo processInfo].environment objectForKey:@"GGML_METAL_PATH_RESOURCES"];
|
||||
|
||||
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, ggmlMetalPathResources ? [ggmlMetalPathResources UTF8String] : "nil");
|
||||
GGML_METAL_LOG_INFO("%s: GGML_METAL_PATH_RESOURCES = %s\n", __func__, path_resource ? [path_resource UTF8String] : "nil");
|
||||
|
||||
if (ggmlMetalPathResources) {
|
||||
sourcePath = [ggmlMetalPathResources stringByAppendingPathComponent:@"ggml-metal.metal"];
|
||||
if (path_resource) {
|
||||
path_source = [path_resource stringByAppendingPathComponent:@"ggml-metal.metal"];
|
||||
} else {
|
||||
sourcePath = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
||||
path_source = [bundle pathForResource:@"ggml-metal" ofType:@"metal"];
|
||||
}
|
||||
if (sourcePath == nil) {
|
||||
|
||||
if (path_source == nil) {
|
||||
GGML_METAL_LOG_WARN("%s: error: could not use bundle path to find ggml-metal.metal, falling back to trying cwd\n", __func__);
|
||||
sourcePath = @"ggml-metal.metal";
|
||||
path_source = @"ggml-metal.metal";
|
||||
}
|
||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [sourcePath UTF8String]);
|
||||
NSString * src = [NSString stringWithContentsOfFile:sourcePath encoding:NSUTF8StringEncoding error:&error];
|
||||
|
||||
GGML_METAL_LOG_INFO("%s: loading '%s'\n", __func__, [path_source UTF8String]);
|
||||
|
||||
NSString * src = [NSString stringWithContentsOfFile:path_source encoding:NSUTF8StringEncoding error:&error];
|
||||
if (error) {
|
||||
GGML_METAL_LOG_ERROR("%s: error: %s\n", __func__, [[error description] UTF8String]);
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#endif // GGML_METAL_EMBED_LIBRARY
|
||||
|
||||
@autoreleasepool {
|
||||
// dictionary of preprocessor macros
|
||||
NSMutableDictionary * prep = [NSMutableDictionary dictionary];
|
||||
|
||||
#ifdef GGML_QKK_64
|
||||
prep[@"QK_K"] = @(64);
|
||||
prep[@"GGML_QKK_64"] = @(1);
|
||||
#endif
|
||||
|
||||
MTLCompileOptions* options = [MTLCompileOptions new];
|
||||
@@ -2820,6 +2837,11 @@ static struct ggml_backend_i ggml_backend_metal_i = {
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_metal_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_metal_supports_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
void ggml_backend_metal_log_set_callback(ggml_log_callback log_callback, void * user_data) {
|
||||
|
||||
+13
-186
@@ -1,49 +1,15 @@
|
||||
#include <metal_stdlib>
|
||||
|
||||
#define GGML_COMMON_DECL_METAL
|
||||
#define GGML_COMMON_IMPL_METAL
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include <metal_stdlib>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
#define SWAP(x, y) { auto tmp = (x); (x) = (y); (y) = tmp; }
|
||||
|
||||
#define QK4_0 32
|
||||
#define QR4_0 2
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
|
||||
#define QK4_1 32
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
half m; // min
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
|
||||
#define QK5_0 32
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
|
||||
#define QK5_1 32
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
half m; // min
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
|
||||
#define QK8_0 32
|
||||
typedef struct {
|
||||
half d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
|
||||
#define N_SIMDWIDTH 32 // assuming SIMD group size is 32
|
||||
|
||||
enum ggml_sort_order {
|
||||
@@ -2478,147 +2444,6 @@ kernel void kernel_concat(
|
||||
}
|
||||
}
|
||||
|
||||
//============================================ k-quants ======================================================
|
||||
|
||||
#ifndef QK_K
|
||||
#define QK_K 256
|
||||
#else
|
||||
static_assert(QK_K == 256 || QK_K == 64, "QK_K must be 256 or 64");
|
||||
#endif
|
||||
|
||||
#if QK_K == 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#else
|
||||
#define K_SCALE_SIZE 4
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
half d; // super-block scale for quantized scales
|
||||
half dmin; // super-block scale for quantized mins
|
||||
} block_q2_K;
|
||||
// 84 bytes / block
|
||||
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
#if QK_K == 64
|
||||
uint8_t scales[2];
|
||||
#else
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
|
||||
#endif
|
||||
half d; // super-block scale
|
||||
} block_q3_K;
|
||||
|
||||
#if QK_K == 64
|
||||
typedef struct {
|
||||
half d[2]; // super-block scales/mins
|
||||
uint8_t scales[2];
|
||||
uint8_t qs[QK_K/2]; // 4-bit quants
|
||||
} block_q4_K;
|
||||
#else
|
||||
typedef struct {
|
||||
half d; // super-block scale for quantized scales
|
||||
half dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
#endif
|
||||
|
||||
#if QK_K == 64
|
||||
typedef struct {
|
||||
half d; // super-block scales/mins
|
||||
int8_t scales[QK_K/16]; // 8-bit block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
#else
|
||||
typedef struct {
|
||||
half d; // super-block scale for quantized scales
|
||||
half dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[3*QK_K/64]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
// 176 bytes / block
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
||||
half d; // super-block scale
|
||||
} block_q6_K;
|
||||
// 210 bytes / block
|
||||
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
} block_iq2_xxs;
|
||||
// 66 bytes / block for QK_K = 256, so 2.0625 bpw
|
||||
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_xs;
|
||||
// 74 bytes / block for QK_K = 256, so 2.3125 bpw
|
||||
|
||||
// 2.5625 bpw quants
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_s;
|
||||
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[3*QK_K/8];
|
||||
} block_iq3_xxs;
|
||||
// 98 bytes / block for QK_K = 256, so 3.0625 bpw
|
||||
|
||||
// 3.4375 bpw
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK_K/8];
|
||||
uint16_t qh[QK_K/32];
|
||||
} block_iq1_s;
|
||||
|
||||
// Non-linear quants
|
||||
#define QK4_NL 32
|
||||
typedef struct {
|
||||
half d;
|
||||
uint8_t qs[QK4_NL/2];
|
||||
} block_iq4_nl;
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
#else
|
||||
typedef struct {
|
||||
half d;
|
||||
uint16_t scales_h;
|
||||
uint8_t scales_l[QK_K/64];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
#endif
|
||||
|
||||
//====================================== dot products =========================
|
||||
|
||||
void kernel_mul_mv_q2_K_f32_impl(
|
||||
device const void * src0,
|
||||
device const float * src1,
|
||||
@@ -4377,7 +4202,7 @@ void kernel_mul_mv_iq1_s_f32_impl(
|
||||
+ yl[j+16] * (grid3[j] & 0xf) + yl[j+20] * (grid3[j] >> 4)
|
||||
+ yl[j+24] * (grid4[j] & 0xf) + yl[j+28] * (grid4[j] >> 4);
|
||||
}
|
||||
sumf[row] += (float)dh[0] * (sum - sumy) * (2*(qh[0] >> 12) + 1);
|
||||
sumf[row] += (float)dh[0] * (sum + sumy * (qh[0] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA)) * (2*((qh[0] >> 12) & 7) + 1);
|
||||
|
||||
dh += nb*sizeof(block_iq1_s)/2;
|
||||
qs += nb*sizeof(block_iq1_s);
|
||||
@@ -5076,14 +4901,16 @@ void dequantize_iq1_s(device const block_iq1_s * xb, short il, thread type4x4 &
|
||||
const float d = xb->d;
|
||||
device const uint8_t * qs = xb->qs + 4*ib32 + 2*il;
|
||||
device const uint16_t * qh = xb->qh;
|
||||
const float dl = d * (2*(qh[ib32] >> 12) + 1);
|
||||
constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | (((qh[ib32] >> (6*il+0)) & 7) << 8)));
|
||||
constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | (((qh[ib32] >> (6*il+3)) & 7) << 8)));
|
||||
const float dl = d * (2*((qh[ib32] >> 12) & 7) + 1);
|
||||
const float ml = dl * (qh[ib32] & 0x8000 ? -1 - IQ1S_DELTA : -1 + IQ1S_DELTA);
|
||||
const uint16_t h = qh[ib32] >> 6*il;
|
||||
constant uint8_t * grid1 = (constant uint8_t *)(iq1s_grid_gpu + (qs[0] | ((h << 8) & 0x700)));
|
||||
constant uint8_t * grid2 = (constant uint8_t *)(iq1s_grid_gpu + (qs[1] | ((h << 5) & 0x700)));
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
reg[0][i] = dl * (grid1[i] & 0xf) - dl;
|
||||
reg[1][i] = dl * (grid1[i] >> 4) - dl;
|
||||
reg[2][i] = dl * (grid2[i] & 0xf) - dl;
|
||||
reg[3][i] = dl * (grid2[i] >> 4) - dl;
|
||||
reg[0][i] = dl * (grid1[i] & 0xf) + ml;
|
||||
reg[1][i] = dl * (grid1[i] >> 4) + ml;
|
||||
reg[2][i] = dl * (grid2[i] & 0xf) + ml;
|
||||
reg[3][i] = dl * (grid2[i] >> 4) + ml;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+111
-78
@@ -1,3 +1,6 @@
|
||||
#define GGML_COMMON_IMPL_C
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include "ggml-quants.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
@@ -951,7 +954,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
||||
const float d = amax / ((1 << 7) - 1);
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
y[i].d = d;
|
||||
y[i].d = GGML_FP32_TO_FP16(d);
|
||||
|
||||
int sum = 0;
|
||||
|
||||
@@ -966,7 +969,7 @@ void quantize_row_q8_1_reference(const float * restrict x, block_q8_1 * restrict
|
||||
sum += y[i].qs[QK8_1/2 + j];
|
||||
}
|
||||
|
||||
y[i].s = sum*d;
|
||||
y[i].s = GGML_FP32_TO_FP16(sum*d);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -994,7 +997,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
const float d = amax / ((1 << 7) - 1);
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
y[i].d = d;
|
||||
y[i].d = GGML_FP32_TO_FP16(d);
|
||||
|
||||
int32x4_t accv = vdupq_n_s32(0);
|
||||
|
||||
@@ -1010,7 +1013,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
accv = vaddq_s32(accv, vi);
|
||||
}
|
||||
|
||||
y[i].s = d * vaddvq_s32(accv);
|
||||
y[i].s = GGML_FP32_TO_FP16(d * vaddvq_s32(accv));
|
||||
}
|
||||
#elif defined(__wasm_simd128__)
|
||||
for (int i = 0; i < nb; i++) {
|
||||
@@ -1033,7 +1036,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
const float d = amax / ((1 << 7) - 1);
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
y[i].d = d;
|
||||
y[i].d = GGML_FP32_TO_FP16(d);
|
||||
|
||||
v128_t accv = wasm_i32x4_splat(0);
|
||||
|
||||
@@ -1049,10 +1052,11 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
accv = wasm_i32x4_add(accv, vi);
|
||||
}
|
||||
|
||||
y[i].s = d * (wasm_i32x4_extract_lane(accv, 0) +
|
||||
wasm_i32x4_extract_lane(accv, 1) +
|
||||
wasm_i32x4_extract_lane(accv, 2) +
|
||||
wasm_i32x4_extract_lane(accv, 3));
|
||||
y[i].s = GGML_FP32_TO_FP16(
|
||||
d * (wasm_i32x4_extract_lane(accv, 0) +
|
||||
wasm_i32x4_extract_lane(accv, 1) +
|
||||
wasm_i32x4_extract_lane(accv, 2) +
|
||||
wasm_i32x4_extract_lane(accv, 3)));
|
||||
}
|
||||
#elif defined(__AVX2__) || defined(__AVX__)
|
||||
for (int i = 0; i < nb; i++) {
|
||||
@@ -1077,7 +1081,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
|
||||
// Quantize these floats
|
||||
const float d = maxScalar / 127.f;
|
||||
y[i].d = d;
|
||||
y[i].d = GGML_FP32_TO_FP16(d);
|
||||
const float id = ( maxScalar != 0.0f ) ? 127.f / maxScalar : 0.0f;
|
||||
const __m256 mul = _mm256_set1_ps( id );
|
||||
|
||||
@@ -1101,7 +1105,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
|
||||
#if defined(__AVX2__)
|
||||
// Compute the sum of the quants and set y[i].s
|
||||
y[i].s = d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3)));
|
||||
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_8(_mm256_add_epi32(_mm256_add_epi32(i0, i1), _mm256_add_epi32(i2, i3))));
|
||||
|
||||
// Convert int32 to int16
|
||||
i0 = _mm256_packs_epi32( i0, i1 ); // 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15
|
||||
@@ -1131,7 +1135,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
// Compute the sum of the quants and set y[i].s
|
||||
const __m128i s0 = _mm_add_epi32(_mm_add_epi32(ni0, ni1), _mm_add_epi32(ni2, ni3));
|
||||
const __m128i s1 = _mm_add_epi32(_mm_add_epi32(ni4, ni5), _mm_add_epi32(ni6, ni7));
|
||||
y[i].s = d * hsum_i32_4(_mm_add_epi32(s0, s1));
|
||||
y[i].s = GGML_FP32_TO_FP16(d * hsum_i32_4(_mm_add_epi32(s0, s1)));
|
||||
|
||||
// Convert int32 to int16
|
||||
ni0 = _mm_packs_epi32( ni0, ni1 );
|
||||
@@ -1162,7 +1166,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
const float d = amax / ((1 << 7) - 1);
|
||||
const float id = d ? 1.0f/d : 0.0f;
|
||||
|
||||
y[i].d = d;
|
||||
y[i].d = GGML_FP32_TO_FP16(d);
|
||||
|
||||
vfloat32m4_t x0 = __riscv_vfmul_vf_f32m4(v_x, id, vl);
|
||||
|
||||
@@ -1179,7 +1183,7 @@ void quantize_row_q8_1(const float * restrict x, void * restrict vy, int k) {
|
||||
|
||||
// set y[i].s
|
||||
int sum = __riscv_vmv_x_s_i16m1_i16(vwrs);
|
||||
y[i].s = sum*d;
|
||||
y[i].s = GGML_FP32_TO_FP16(sum*d);
|
||||
}
|
||||
#else
|
||||
GGML_UNUSED(nb);
|
||||
@@ -3456,11 +3460,12 @@ void dequantize_row_iq1_s(const block_iq1_s * restrict x, float * restrict y, in
|
||||
const uint16_t * qh = x[i].qh;
|
||||
|
||||
for (int ib = 0; ib < QK_K/32; ++ib) {
|
||||
const float dl = d * (2*(qh[ib] >> 12) + 1);
|
||||
const float dl = d * (2*((qh[ib] >> 12) & 7) + 1);
|
||||
const float delta = qh[ib] & 0x8000 ? -IQ1S_DELTA : IQ1S_DELTA;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
y[j] = dl * grid[j];
|
||||
y[j] = dl * (grid[j] + delta);
|
||||
}
|
||||
y += 8;
|
||||
}
|
||||
@@ -4018,10 +4023,10 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
const block_q8_1 * restrict b_y0 = &vy0[i];
|
||||
const block_q8_1 * restrict b_y1 = &vy1[i];
|
||||
|
||||
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * b_y0->s,
|
||||
GGML_FP16_TO_FP32(b_x1->m) * b_y0->s,
|
||||
GGML_FP16_TO_FP32(b_x0->m) * b_y1->s,
|
||||
GGML_FP16_TO_FP32(b_x1->m) * b_y1->s};
|
||||
float32x4_t summs_t = {GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y0->s),
|
||||
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y0->s),
|
||||
GGML_FP16_TO_FP32(b_x0->m) * GGML_FP16_TO_FP32(b_y1->s),
|
||||
GGML_FP16_TO_FP32(b_x1->m) * GGML_FP16_TO_FP32(b_y1->s)};
|
||||
summs0 += summs_t;
|
||||
|
||||
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
||||
@@ -4086,7 +4091,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
const block_q8_1 * restrict y0 = &y[i + 0];
|
||||
const block_q8_1 * restrict y1 = &y[i + 1];
|
||||
|
||||
summs += GGML_FP16_TO_FP32(x0->m) * y0->s + GGML_FP16_TO_FP32(x1->m) * y1->s;
|
||||
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s) + GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
||||
|
||||
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
||||
|
||||
@@ -4109,8 +4114,8 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
const int32x4_t p_0 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_0l, v1_0l), v0_0h, v1_0h);
|
||||
const int32x4_t p_1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), v0_1l, v1_1l), v0_1h, v1_1h);
|
||||
|
||||
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*y0->d);
|
||||
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*y1->d);
|
||||
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(p_0), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
|
||||
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(p_1), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
||||
}
|
||||
|
||||
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs;
|
||||
@@ -4123,9 +4128,9 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
// Main loop
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
const float d0 = GGML_FP16_TO_FP32(x[i].d);
|
||||
const float d1 = y[i].d;
|
||||
const float d1 = GGML_FP16_TO_FP32(y[i].d);
|
||||
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
||||
|
||||
const __m256 d0v = _mm256_set1_ps( d0 );
|
||||
const __m256 d1v = _mm256_set1_ps( d1 );
|
||||
@@ -4177,7 +4182,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
|
||||
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
||||
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -4195,7 +4200,7 @@ void ggml_vec_dot_q4_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
sumi += (v0 * y[i].qs[j]) + (v1 * y[i].qs[j + qk/2]);
|
||||
}
|
||||
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -4531,8 +4536,8 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
|
||||
const uint8x16_t m4b = vdupq_n_u8(0x0F);
|
||||
|
||||
summs0 += GGML_FP16_TO_FP32(x0->m) * y0->s;
|
||||
summs1 += GGML_FP16_TO_FP32(x1->m) * y1->s;
|
||||
summs0 += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
||||
summs1 += GGML_FP16_TO_FP32(x1->m) * GGML_FP16_TO_FP32(y1->s);
|
||||
|
||||
// extract the 5th bit via lookup table ((b) << 4)
|
||||
memcpy(&qh0, x0->qh, sizeof(qh0));
|
||||
@@ -4576,10 +4581,10 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
|
||||
sumv0 = vmlaq_n_f32(sumv0, vcvtq_f32_s32(vaddq_s32(
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_0lf, v1_0l),
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*y0->d);
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_0hf, v1_0h))), GGML_FP16_TO_FP32(x0->d)*GGML_FP16_TO_FP32(y0->d));
|
||||
sumv1 = vmlaq_n_f32(sumv1, vcvtq_f32_s32(vaddq_s32(
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_1lf, v1_1l),
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*y1->d);
|
||||
ggml_vdotq_s32(vdupq_n_s32(0), v0_1hf, v1_1h))), GGML_FP16_TO_FP32(x1->d)*GGML_FP16_TO_FP32(y1->d));
|
||||
}
|
||||
|
||||
*s = vaddvq_f32(sumv0) + vaddvq_f32(sumv1) + summs0 + summs1;
|
||||
@@ -4596,7 +4601,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
const block_q5_1 * restrict x0 = &x[i];
|
||||
const block_q8_1 * restrict y0 = &y[i];
|
||||
|
||||
summs += GGML_FP16_TO_FP32(x0->m) * y0->s;
|
||||
summs += GGML_FP16_TO_FP32(x0->m) * GGML_FP16_TO_FP32(y0->s);
|
||||
|
||||
const v128_t m4b = wasm_i8x16_splat(0x0F);
|
||||
|
||||
@@ -4643,7 +4648,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
wasm_i32x4_dot_i16x8(v0lfh, v1lh)),
|
||||
wasm_i32x4_add(wasm_i32x4_dot_i16x8(v0hfl, v1hl),
|
||||
wasm_i32x4_dot_i16x8(v0hfh, v1hh)))),
|
||||
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * y0->d)));
|
||||
wasm_f32x4_splat(GGML_FP16_TO_FP32(x0->d) * GGML_FP16_TO_FP32(y0->d))));
|
||||
}
|
||||
|
||||
*s = wasm_f32x4_extract_lane(sumv, 0) + wasm_f32x4_extract_lane(sumv, 1) +
|
||||
@@ -4658,14 +4663,14 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
for (int i = 0; i < nb; i++) {
|
||||
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
||||
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
||||
|
||||
__m256i qx = bytes_from_nibbles_32(x[i].qs);
|
||||
__m256i bxhi = bytes_from_bits_32(x[i].qh);
|
||||
bxhi = _mm256_and_si256(bxhi, _mm256_set1_epi8(0x10));
|
||||
qx = _mm256_or_si256(qx, bxhi);
|
||||
|
||||
const __m256 dy = _mm256_set1_ps(y[i].d);
|
||||
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
||||
const __m256i qy = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
||||
|
||||
const __m256 q = mul_sum_us8_pairs_float(qx, qy);
|
||||
@@ -4685,7 +4690,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
for (int i = 0; i < nb; i++) {
|
||||
const __m256 dx = _mm256_set1_ps(GGML_FP16_TO_FP32(x[i].d));
|
||||
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * y[i].s;
|
||||
summs += GGML_FP16_TO_FP32(x[i].m) * GGML_FP16_TO_FP32(y[i].s);
|
||||
|
||||
__m256i bx_0 = bytes_from_nibbles_32(x[i].qs);
|
||||
const __m256i bxhi = bytes_from_bits_32(x[i].qh);
|
||||
@@ -4699,7 +4704,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
bxh = _mm_or_si128(bxh, bxhih);
|
||||
bx_0 = MM256_SET_M128I(bxh, bxl);
|
||||
|
||||
const __m256 dy = _mm256_set1_ps(y[i].d);
|
||||
const __m256 dy = _mm256_set1_ps(GGML_FP16_TO_FP32(y[i].d));
|
||||
const __m256i by_0 = _mm256_loadu_si256((const __m256i *)y[i].qs);
|
||||
|
||||
const __m256 q = mul_sum_us8_pairs_float(bx_0, by_0);
|
||||
@@ -4766,7 +4771,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
|
||||
int sumi = __riscv_vmv_x_s_i32m1_i32(vs2);
|
||||
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -4790,7 +4795,7 @@ void ggml_vec_dot_q5_1_q8_1(int n, float * restrict s, size_t bs, const void * r
|
||||
sumi += (x0 * y[i].qs[j]) + (x1 * y[i].qs[j + qk/2]);
|
||||
}
|
||||
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*y[i].d)*sumi + GGML_FP16_TO_FP32(x[i].m)*y[i].s;
|
||||
sumf += (GGML_FP16_TO_FP32(x[i].d)*GGML_FP16_TO_FP32(y[i].d))*sumi + GGML_FP16_TO_FP32(x[i].m)*GGML_FP16_TO_FP32(y[i].s);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -9024,7 +9029,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
||||
vld1_s8((const int8_t *)(iq2s_grid + (qs[7] | ((qh[ib32+1] << 2) & 0x300)))));
|
||||
qs += 8;
|
||||
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16)));
|
||||
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
||||
@@ -9033,7 +9038,7 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
||||
q2s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[0], m1)), q2s.val[0]);
|
||||
q2s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vorrq_u8(vs.val[1], m1)), q2s.val[1]);
|
||||
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16)));
|
||||
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||
vs.val[0] = vceqq_u8(vs.val[0], mask2);
|
||||
@@ -9104,12 +9109,12 @@ void ggml_vec_dot_iq2_s_q8_K(int n, float * restrict s, size_t bs, const void *
|
||||
iq2s_grid[qs[4] | ((qh[ib32+1] << 8) & 0x300)]);
|
||||
qs += 8;
|
||||
|
||||
__m256i aux256 = _mm256_set1_epi32(signs[0] | (signs[1] << 16));
|
||||
__m256i aux256 = _mm256_set1_epi32(signs[0] | ((uint32_t) signs[1] << 16));
|
||||
aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2);
|
||||
const __m256i s2_1 = _mm256_cmpeq_epi8(aux256, mask2);
|
||||
const __m256i q8s_1 = _mm256_sub_epi8(_mm256_xor_si256(s2_1, q8_1), s2_1);
|
||||
|
||||
aux256 = _mm256_set1_epi32(signs[2] | (signs[3] << 16));
|
||||
aux256 = _mm256_set1_epi32(signs[2] | ((uint32_t) signs[3] << 16));
|
||||
aux256 = _mm256_and_si256(_mm256_shuffle_epi8(aux256,mask1), mask2);
|
||||
const __m256i s2_2 = _mm256_cmpeq_epi8(aux256, mask2);
|
||||
const __m256i q8s_2 = _mm256_sub_epi8(_mm256_xor_si256(s2_2, q8_2), s2_2);
|
||||
@@ -9374,18 +9379,18 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
||||
|
||||
const uint8x16_t idx_l = vld1q_u8(qs); qs += 16;
|
||||
idx.vec_index = vorrq_u16(vmovl_u8(vget_low_u8 (idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+0]), hshift), m256));
|
||||
const uint32x4_t aux32x4_0 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
|
||||
const uint32x4_t aux32x4_1 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
|
||||
const uint32x4_t aux32x4_0 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
||||
const uint32x4_t aux32x4_1 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
||||
idx.vec_index = vorrq_u16(vmovl_u8(vget_high_u8(idx_l)), vandq_u16(vshlq_u16(vdupq_n_u16(qh[ib32+1]), hshift), m256));
|
||||
const uint32x4_t aux32x4_2 = {iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]};
|
||||
const uint32x4_t aux32x4_3 = {iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]};
|
||||
const uint32x4_t aux32x4_2 = ggml_vld1q_u32(iq3s_grid[idx.index[0]], iq3s_grid[idx.index[1]],
|
||||
iq3s_grid[idx.index[2]], iq3s_grid[idx.index[3]]);
|
||||
const uint32x4_t aux32x4_3 = ggml_vld1q_u32(iq3s_grid[idx.index[4]], iq3s_grid[idx.index[5]],
|
||||
iq3s_grid[idx.index[6]], iq3s_grid[idx.index[7]]);
|
||||
|
||||
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | (signs[1] << 16)));
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[0] | ((uint32_t) signs[1] << 16)));
|
||||
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||
vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1);
|
||||
@@ -9394,7 +9399,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * restrict s, size_t bs, const void *
|
||||
q3s.val[0] = vmulq_s8(vreinterpretq_s8_u8(vs.val[0]), vreinterpretq_s8_u32(aux32x4_0));
|
||||
q3s.val[1] = vmulq_s8(vreinterpretq_s8_u8(vs.val[1]), vreinterpretq_s8_u32(aux32x4_1));
|
||||
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | (signs[3] << 16)));
|
||||
vs.val[0] = vreinterpretq_u8_u32(vdupq_n_u32(signs[2] | ((uint32_t) signs[3] << 16)));
|
||||
vs.val[1] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[1]), mask2);
|
||||
vs.val[0] = vandq_u8(ggml_vqtbl1q_u8(vs.val[0], mask1.val[0]), mask2);
|
||||
vs.val[0] = vorrq_u8(vceqq_u8(vs.val[0], mask2), m1);
|
||||
@@ -9582,7 +9587,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
const uint8_t * qs = x[i].qs;
|
||||
const uint16_t * qh = x[i].qh;
|
||||
|
||||
int sumi1 = 0, sumi2 = 0;
|
||||
int sumi1 = 0, sumi2 = 0, sumi3 = 0;
|
||||
|
||||
for (int ib = 0; ib < QK_K/32; ib += 2) {
|
||||
|
||||
@@ -9601,12 +9606,16 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
const int32x4_t p1 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[0], q8b.val[0]), q1b.val[1], q8b.val[1]);
|
||||
const int32x4_t p2 = ggml_vdotq_s32(ggml_vdotq_s32(vdupq_n_s32(0), q1b.val[2], q8b.val[2]), q1b.val[3], q8b.val[3]);
|
||||
|
||||
sumi1 += vaddvq_s32(p1) * (2*(qh[ib+0] >> 12) + 1);
|
||||
sumi2 += vaddvq_s32(p2) * (2*(qh[ib+1] >> 12) + 1);
|
||||
const int ls1 = 2*((qh[ib+0] >> 12) & 7) + 1;
|
||||
const int ls2 = 2*((qh[ib+1] >> 12) & 7) + 1;
|
||||
sumi1 += vaddvq_s32(p1) * ls1;
|
||||
sumi2 += vaddvq_s32(p2) * ls2;
|
||||
sumi3 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * ls1 * (qh[ib+0] & 0x8000 ? -1 : 1)
|
||||
+ (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * ls2 * (qh[ib+1] & 0x8000 ? -1 : 1);
|
||||
|
||||
}
|
||||
|
||||
sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2);
|
||||
sumf += y[i].d * GGML_FP16_TO_FP32(x[i].d) * (sumi1 + sumi2 + IQ1S_DELTA * sumi3);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -9614,6 +9623,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
#elif defined __AVX2__
|
||||
|
||||
__m256 accum = _mm256_setzero_ps();
|
||||
float accum1 = 0;
|
||||
for (int i = 0; i < nb; ++i) {
|
||||
|
||||
const int8_t * q8 = y[i].qs;
|
||||
@@ -9621,6 +9631,7 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
const uint16_t * qh = x[i].qh;
|
||||
|
||||
__m256i sumi = _mm256_setzero_si256();
|
||||
int sumi1 = 0;
|
||||
for (int ib = 0; ib < QK_K/32; ib += 2) {
|
||||
const __m256i q1b_1 = _mm256_set_epi64x(iq1s_grid[qs[3] | ((qh[ib+0] >> 1) & 0x700)], iq1s_grid[qs[2] | ((qh[ib+0] << 2) & 0x700)],
|
||||
iq1s_grid[qs[1] | ((qh[ib+0] << 5) & 0x700)], iq1s_grid[qs[0] | ((qh[ib+0] << 8) & 0x700)]);
|
||||
@@ -9632,17 +9643,23 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
|
||||
const __m256i dot1 = mul_add_epi8(q1b_1, q8b_1);
|
||||
const __m256i dot2 = mul_add_epi8(q1b_2, q8b_2);
|
||||
const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(2*(qh[ib+0] >> 12) + 1));
|
||||
const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(2*(qh[ib+1] >> 12) + 1));
|
||||
const int16_t ls1 = 2*((qh[ib+0] >> 12) & 7) + 1;
|
||||
const int16_t ls2 = 2*((qh[ib+1] >> 12) & 7) + 1;
|
||||
const __m256i p1 = _mm256_madd_epi16(dot1, _mm256_set1_epi16(ls1));
|
||||
const __m256i p2 = _mm256_madd_epi16(dot2, _mm256_set1_epi16(ls2));
|
||||
|
||||
sumi = _mm256_add_epi32(sumi, _mm256_add_epi32(p1, p2));
|
||||
sumi1 += (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]) * (qh[ib+0] & 0x8000 ? -1 : 1) * ls1
|
||||
+ (y[i].bsums[2*ib+2] + y[i].bsums[2*ib+3]) * (qh[ib+1] & 0x8000 ? -1 : 1) * ls2;
|
||||
}
|
||||
|
||||
accum = _mm256_fmadd_ps(_mm256_set1_ps(y[i].d * GGML_FP16_TO_FP32(x[i].d)), _mm256_cvtepi32_ps(sumi), accum);
|
||||
const float d = y[i].d * GGML_FP16_TO_FP32(x[i].d);
|
||||
accum = _mm256_fmadd_ps(_mm256_set1_ps(d), _mm256_cvtepi32_ps(sumi), accum);
|
||||
accum1 += d * sumi1;
|
||||
|
||||
}
|
||||
|
||||
*s = hsum_float_8(accum);
|
||||
*s = hsum_float_8(accum) + IQ1S_DELTA * accum1;
|
||||
|
||||
#else
|
||||
|
||||
@@ -9653,9 +9670,10 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
const uint8_t * qs = x[i].qs;
|
||||
const uint16_t * qh = x[i].qh;
|
||||
|
||||
int sumi = 0;
|
||||
int sumi = 0, sumi1 = 0;
|
||||
for (int ib = 0; ib < QK_K/32; ++ib) {
|
||||
const int ls = 2*(qh[ib] >> 12) + 1;
|
||||
const int ls = 2*((qh[ib] >> 12) & 7) + 1;
|
||||
const int delta = qh[ib] & 0x8000 ? -1 : 1;
|
||||
int lsum = 0;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const int8_t * grid = (const int8_t *)(iq1s_grid + (qs[l] | (((qh[ib] >> 3*l) & 7) << 8)));
|
||||
@@ -9664,11 +9682,12 @@ void ggml_vec_dot_iq1_s_q8_K (int n, float * restrict s, size_t bs, const void
|
||||
}
|
||||
q8 += 8;
|
||||
}
|
||||
sumi += ls * lsum;
|
||||
sumi += ls * lsum;
|
||||
sumi1 += ls * delta * (y[i].bsums[2*ib+0] + y[i].bsums[2*ib+1]);
|
||||
qs += 4;
|
||||
}
|
||||
|
||||
sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * sumi;
|
||||
sumf += GGML_FP16_TO_FP32(x[i].d) * y[i].d * (sumi + IQ1S_DELTA * sumi1);
|
||||
}
|
||||
|
||||
*s = sumf;
|
||||
@@ -11438,7 +11457,7 @@ static int iq1_find_best_neighbour(const uint16_t * restrict neighbours, const u
|
||||
}
|
||||
|
||||
static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const uint64_t * restrict grid,
|
||||
const float * restrict xval, const float * restrict weight, float scale, int8_t * restrict L, int ngrid) {
|
||||
const float * restrict xval, const float * restrict weight, float scale, const float * restrict xg, int8_t * restrict L, int ngrid) {
|
||||
int num_neighbors = neighbours[0];
|
||||
GGML_ASSERT(num_neighbors > 0);
|
||||
float best_score = FLT_MAX;
|
||||
@@ -11447,7 +11466,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const
|
||||
const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
|
||||
float d2 = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
float q = (pg[i] - 3)/2;
|
||||
float q = xg[(pg[i] - 1)/2];
|
||||
float w = weight[i];
|
||||
float diff = scale*q - xval[i];
|
||||
d2 += w*diff*diff;
|
||||
@@ -11463,7 +11482,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const
|
||||
float d2 = 0;
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
float w = weight[j];
|
||||
float q = (grid_i[j] - 3)/2;
|
||||
float q = xg[(grid_i[j] - 1)/2];
|
||||
float diff = scale*q - xval[i];
|
||||
d2 += w*diff*diff;
|
||||
}
|
||||
@@ -11480,7 +11499,7 @@ static int iq1_find_best_neighbour2(const uint16_t * restrict neighbours, const
|
||||
const int8_t * pg = (const int8_t *)(grid + neighbours[j]);
|
||||
float sumqx = 0, sumq2 = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
float q = (pg[i] - 3)/2;
|
||||
float q = xg[(pg[i] - 1)/2];
|
||||
float w = weight[i];
|
||||
sumqx += w*q*xval[i];
|
||||
sumq2 += w*q*q;
|
||||
@@ -11519,6 +11538,9 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
|
||||
block_iq1_s * y = vy;
|
||||
|
||||
const float x_p[3] = {-1 + IQ1S_DELTA, IQ1S_DELTA, 1 + IQ1S_DELTA};
|
||||
const float x_m[3] = {-1 - IQ1S_DELTA, -IQ1S_DELTA, 1 - IQ1S_DELTA};
|
||||
|
||||
float scales[QK_K/IQ1S_BLOCK_SIZE];
|
||||
float weight[IQ1S_BLOCK_SIZE];
|
||||
int8_t L[IQ1S_BLOCK_SIZE];
|
||||
@@ -11527,6 +11549,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
float pairs[2*IQ1S_BLOCK_SIZE];
|
||||
int * idx = (int *)(pairs + 1);
|
||||
uint16_t index[IQ1S_BLOCK_SIZE/8];
|
||||
int8_t shifts[QK_K/IQ1S_BLOCK_SIZE];
|
||||
|
||||
for (int ibl = 0; ibl < nbl; ++ibl) {
|
||||
|
||||
@@ -11572,25 +11595,33 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
}
|
||||
}
|
||||
float best_score = 0, scale = max;
|
||||
int besti1 = 0, besti2 = 0;
|
||||
int besti1 = -1, besti2 = -1, best_shift = 0;
|
||||
for (int i1 = 0; i1 <= IQ1S_BLOCK_SIZE; ++i1) {
|
||||
for (int i2 = i1; i2 <= IQ1S_BLOCK_SIZE; ++i2) {
|
||||
float sumqx = -(sumx[i1] - sumx[0]) + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2]);
|
||||
float sumq2 = (sumw[i1] - sumw[0]) + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2]);
|
||||
float sumqx = (sumx[i1] - sumx[0])*x_p[0] + (sumx[i2] - sumx[i1])*x_p[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_p[2];
|
||||
float sumq2 = (sumw[i1] - sumw[0])*x_p[0]*x_p[0] + (sumw[i2] - sumw[i1])*x_p[1]*x_p[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_p[2]*x_p[2];
|
||||
if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
|
||||
scale = sumqx/sumq2; best_score = scale*sumqx;
|
||||
besti1 = i1; besti2 = i2;
|
||||
besti1 = i1; besti2 = i2; best_shift = 1;
|
||||
}
|
||||
sumqx = (sumx[i1] - sumx[0])*x_m[0] + (sumx[i2] - sumx[i1])*x_m[1] + (sumx[IQ1S_BLOCK_SIZE] - sumx[i2])*x_m[2];
|
||||
sumq2 = (sumw[i1] - sumw[0])*x_m[0]*x_m[0] + (sumw[i2] - sumw[i1])*x_m[1]*x_m[1] + (sumw[IQ1S_BLOCK_SIZE] - sumw[i2])*x_m[2]*x_m[2];
|
||||
if (sumq2 > 0 && sumqx*sumqx > best_score*sumq2) {
|
||||
scale = sumqx/sumq2; best_score = scale*sumqx;
|
||||
besti1 = i1; besti2 = i2; best_shift = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
GGML_ASSERT(besti1 >= 0 && besti2 >= 0 && best_shift != 0);
|
||||
for (int j = 0; j < besti1; ++j) L[idx[2*j]] = 0;
|
||||
for (int j = besti1; j < besti2; ++j) L[idx[2*j]] = 1;
|
||||
for (int j = besti2; j < IQ1S_BLOCK_SIZE; ++j) L[idx[2*j]] = 2;
|
||||
if (scale < 0) {
|
||||
for (int j = 0; j < IQ1S_BLOCK_SIZE; ++j) L[j] = 2 - L[j];
|
||||
scale = -scale;
|
||||
scale = -scale; best_shift = -best_shift;
|
||||
}
|
||||
bool all_on_grid = true;
|
||||
const float * xx = best_shift == 1 ? x_p : x_m;
|
||||
for (int k = 0; k < IQ1S_BLOCK_SIZE/8; ++k) {
|
||||
uint16_t u = 0;
|
||||
for (int j = 0; j < 8; ++j) u |= (L[8*k+j] << 2*j);
|
||||
@@ -11598,7 +11629,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
if (grid_index < 0) {
|
||||
all_on_grid = false;
|
||||
const uint16_t * neighbours = kneighbors_q2xs - kmap_q2xs[u] - 1;
|
||||
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, L + 8*k, NGRID_IQ1S);
|
||||
grid_index = iq1_find_best_neighbour2(neighbours, kgrid_q2xs, xb + 8*k, weight + 8*k, scale, xx, L + 8*k, NGRID_IQ1S);
|
||||
GGML_ASSERT(grid_index >= 0);
|
||||
}
|
||||
index[k] = grid_index;
|
||||
@@ -11609,7 +11640,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
const int8_t * pg = (const int8_t *)(kgrid_q2xs + index[k]);
|
||||
for (int j = 0; j < 8; ++j) {
|
||||
float w = weight[8*k + j];
|
||||
float q = (pg[j] - 3)/2;
|
||||
float q = xx[(pg[j] - 1)/2];
|
||||
sumqx += w*q*xb[8*k+j];
|
||||
sumq2 += w*q*q;
|
||||
}
|
||||
@@ -11624,6 +11655,7 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
y[ibl].qh[ib] = h;
|
||||
GGML_ASSERT(scale >= 0);
|
||||
scales[ib] = scale;
|
||||
shifts[ib] = best_shift;
|
||||
max_scale = MAX(max_scale, scale);
|
||||
}
|
||||
|
||||
@@ -11632,12 +11664,13 @@ static void quantize_row_iq1_s_impl(const float * restrict x, void * restrict vy
|
||||
continue;
|
||||
}
|
||||
|
||||
float d = max_scale/31;
|
||||
float d = max_scale/15;
|
||||
y[ibl].d = GGML_FP32_TO_FP16(d*1.125f); // 1.085f is another fudge factor. Don't ask me why it is needed.
|
||||
float id = 1/d;
|
||||
for (int ib = 0; ib < QK_K/IQ1S_BLOCK_SIZE; ++ib) {
|
||||
int l = nearest_int(0.5f*(id*scales[ib]-1));
|
||||
l = MAX(0, MIN(15, l));
|
||||
l = MAX(0, MIN(7, l));
|
||||
if (shifts[ib] == -1) l |= 8;
|
||||
y[ibl].qh[ib] |= (l << 12);
|
||||
}
|
||||
}
|
||||
|
||||
+5
-241
@@ -1,248 +1,12 @@
|
||||
#pragma once
|
||||
|
||||
#define GGML_COMMON_DECL_C
|
||||
#include "ggml-common.h"
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
// GGML internal header
|
||||
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#define QK4_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
||||
|
||||
#define QK4_1 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
ggml_fp16_t m; // min
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
static_assert(sizeof(block_q4_1) == 2 * sizeof(ggml_fp16_t) + QK4_1 / 2, "wrong q4_1 block size/padding");
|
||||
|
||||
#define QK5_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
||||
|
||||
#define QK5_1 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
ggml_fp16_t m; // min
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#define QK8_0 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
||||
|
||||
#define QK8_1 32
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
float s; // d * sum(qs[i])
|
||||
int8_t qs[QK8_1]; // quants
|
||||
} block_q8_1;
|
||||
static_assert(sizeof(block_q8_1) == 2*sizeof(float) + QK8_1, "wrong q8_1 block size/padding");
|
||||
|
||||
//
|
||||
// Super-block quantization structures
|
||||
//
|
||||
|
||||
// Super-block size
|
||||
#ifdef GGML_QKK_64
|
||||
#define QK_K 64
|
||||
#define K_SCALE_SIZE 4
|
||||
#else
|
||||
#define QK_K 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#endif
|
||||
|
||||
// 2-bit quantization
|
||||
// weight is represented as x = a * q + b
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 2.625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
} block_q2_K;
|
||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
||||
|
||||
// 3-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 3.4375 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[2];
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 2, "wrong q3_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
uint8_t scales[12]; // scales, quantized with 6 bits
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q3_K;
|
||||
static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + 12, "wrong q3_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 4-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 4.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_fp16_t d[2]; // super-block scales/mins
|
||||
uint8_t scales[2]; // 4-bit block scales/mins
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2, "wrong q4_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 5-bit quantization
|
||||
// 8 blocks of 32 elements each
|
||||
// weight is represented as x = a * q + b
|
||||
// Effectively 5.5 bits per weight
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale
|
||||
int8_t scales[QK_K/16]; // 8-bit block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d; // super-block scale for quantized scales
|
||||
ggml_fp16_t dmin; // super-block scale for quantized mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
||||
#endif
|
||||
|
||||
// 6-bit quantization
|
||||
// weight is represented as x = a * q
|
||||
// 16 blocks of 16 elements each
|
||||
// Effectively 6.5625 bits per weight
|
||||
typedef struct {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales, quantized with 8 bits
|
||||
ggml_fp16_t d; // super-block scale
|
||||
} block_q6_K;
|
||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + QK_K / 16 + 3*QK_K/4, "wrong q6_K block size/padding");
|
||||
|
||||
// This is only used for intermediate quantization and dot products
|
||||
typedef struct {
|
||||
float d; // delta
|
||||
int8_t qs[QK_K]; // quants
|
||||
int16_t bsums[QK_K/16]; // sum of quants in groups of 16
|
||||
} block_q8_K;
|
||||
static_assert(sizeof(block_q8_K) == sizeof(float) + QK_K + QK_K/16*sizeof(int16_t), "wrong q8_K block size/padding");
|
||||
|
||||
// (Almost) "true" 2-bit quantization.
|
||||
// Due to the need to use blocks as per ggml design, it ends up using
|
||||
// 2.0625 bpw because of the 16-bit scale for each block of 256.
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint16_t qs[QK_K/8];
|
||||
} block_iq2_xxs;
|
||||
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
||||
|
||||
// 2.3125 bpw quants
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint16_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_xs;
|
||||
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
||||
|
||||
// 2.5625 bpw quants
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_s;
|
||||
static_assert(sizeof(block_iq2_s) == sizeof(ggml_fp16_t) + QK_K/4 + QK_K/16, "wrong iq2_s block size/padding");
|
||||
|
||||
// (Almost) "true" 3-bit quantization.
|
||||
// Due to the need to use blocks as per ggml design, it ends up using
|
||||
// 3.0625 bpw because of the 16-bit scale for each block of 256.
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[3*QK_K/8];
|
||||
} block_iq3_xxs;
|
||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
// 3.4375 bpw
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
||||
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[QK_K/8];
|
||||
uint16_t qh[QK_K/32];
|
||||
} block_iq1_s;
|
||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||
|
||||
// Non-linear quants
|
||||
#define QK4_NL 32
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint8_t qs[QK4_NL/2];
|
||||
} block_iq4_nl;
|
||||
static_assert(sizeof(block_iq4_nl) == sizeof(ggml_fp16_t) + QK4_NL/2, "wrong iq4_nl block size/padding");
|
||||
|
||||
#if QK_K == 64
|
||||
#define block_iq4_xs block_iq4_nl
|
||||
//typedef struct block_iq4_nl block_iq4_xs;
|
||||
#else
|
||||
typedef struct {
|
||||
ggml_fp16_t d;
|
||||
uint16_t scales_h;
|
||||
uint8_t scales_l[QK_K/64];
|
||||
uint8_t qs[QK_K/2];
|
||||
} block_iq4_xs;
|
||||
static_assert(sizeof(block_iq4_xs) == sizeof(ggml_fp16_t) + sizeof(uint16_t) + QK_K/64 + QK_K/2, "wrong iq4_xs block size/padding");
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
+309
-322
@@ -16,6 +16,7 @@
|
||||
#include <cinttypes>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <cstdlib>
|
||||
#include <float.h>
|
||||
#include <limits>
|
||||
#include <stdint.h>
|
||||
@@ -24,10 +25,9 @@
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include <regex>
|
||||
|
||||
#include <sycl/sycl.hpp>
|
||||
#include <sycl/half_type.hpp>
|
||||
@@ -82,6 +82,30 @@ Following definition copied from DPCT head files, which are used by ggml-sycl.cp
|
||||
#define __dpct_noinline__ __attribute__((noinline))
|
||||
#endif
|
||||
|
||||
|
||||
std::string get_device_type_name(const sycl::device &Device) {
|
||||
auto DeviceType = Device.get_info<sycl::info::device::device_type>();
|
||||
switch (DeviceType) {
|
||||
case sycl::info::device_type::cpu:
|
||||
return "cpu";
|
||||
case sycl::info::device_type::gpu:
|
||||
return "gpu";
|
||||
case sycl::info::device_type::host:
|
||||
return "host";
|
||||
case sycl::info::device_type::accelerator:
|
||||
return "acc";
|
||||
default:
|
||||
return "unknown";
|
||||
}
|
||||
}
|
||||
|
||||
std::string get_device_backend_and_type(const sycl::device &device) {
|
||||
std::stringstream device_type;
|
||||
sycl::backend backend = device.get_backend();
|
||||
device_type << backend << ":" << get_device_type_name(device);
|
||||
return device_type.str();
|
||||
}
|
||||
|
||||
namespace dpct
|
||||
{
|
||||
typedef sycl::queue *queue_ptr;
|
||||
@@ -202,24 +226,29 @@ namespace dpct
|
||||
// Version string has the following format:
|
||||
// a. OpenCL<space><major.minor><space><vendor-specific-information>
|
||||
// b. <major.minor>
|
||||
// c. <AmdGcnArchName> e.g gfx1030
|
||||
std::string ver;
|
||||
ver = dev.get_info<sycl::info::device::version>();
|
||||
std::string::size_type i = 0;
|
||||
while (i < ver.size())
|
||||
{
|
||||
if (isdigit(ver[i]))
|
||||
break;
|
||||
i++;
|
||||
while (i < ver.size()) {
|
||||
if (isdigit(ver[i]))
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
major = std::stoi(&(ver[i]));
|
||||
while (i < ver.size())
|
||||
{
|
||||
if (ver[i] == '.')
|
||||
break;
|
||||
i++;
|
||||
while (i < ver.size()) {
|
||||
if (ver[i] == '.')
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
if (i < ver.size()) {
|
||||
// a. and b.
|
||||
i++;
|
||||
minor = std::stoi(&(ver[i]));
|
||||
} else {
|
||||
// c.
|
||||
minor = 0;
|
||||
}
|
||||
i++;
|
||||
minor = std::stoi(&(ver[i]));
|
||||
}
|
||||
|
||||
template <typename tag, typename T>
|
||||
@@ -937,17 +966,65 @@ namespace dpct
|
||||
|
||||
private:
|
||||
mutable std::recursive_mutex m_mutex;
|
||||
static bool compare_dev(sycl::device &device1, sycl::device &device2)
|
||||
{
|
||||
dpct::device_info prop1;
|
||||
dpct::get_device_info(prop1, device1);
|
||||
dpct::device_info prop2;
|
||||
dpct::get_device_info(prop2, device2);
|
||||
return prop1.get_max_compute_units() > prop2.get_max_compute_units();
|
||||
}
|
||||
static int convert_backend_index(std::string & backend) {
|
||||
if (backend == "ext_oneapi_level_zero:gpu") return 0;
|
||||
if (backend == "opencl:gpu") return 1;
|
||||
if (backend == "opencl:cpu") return 2;
|
||||
if (backend == "opencl:acc") return 3;
|
||||
printf("convert_backend_index: can't handle backend=%s\n", backend.c_str());
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
static bool compare_backend(std::string &backend1, std::string &backend2) {
|
||||
return convert_backend_index(backend1) < convert_backend_index(backend2);
|
||||
}
|
||||
dev_mgr()
|
||||
{
|
||||
sycl::device default_device =
|
||||
sycl::device(sycl::default_selector_v);
|
||||
_devs.push_back(std::make_shared<device_ext>(default_device));
|
||||
|
||||
std::vector<sycl::device> sycl_all_devs =
|
||||
sycl::device::get_devices(sycl::info::device_type::all);
|
||||
std::vector<sycl::device> sycl_all_devs;
|
||||
// Collect other devices except for the default device.
|
||||
if (default_device.is_cpu())
|
||||
_cpu_device = 0;
|
||||
|
||||
auto Platforms = sycl::platform::get_platforms();
|
||||
// Keep track of the number of devices per backend
|
||||
std::map<sycl::backend, size_t> DeviceNums;
|
||||
std::map<std::string, std::vector<sycl::device>> backend_devices;
|
||||
|
||||
while (!Platforms.empty()) {
|
||||
auto Platform = Platforms.back();
|
||||
Platforms.pop_back();
|
||||
auto devices = Platform.get_devices();
|
||||
std::string backend_type = get_device_backend_and_type(devices[0]);
|
||||
for (const auto &device : devices) {
|
||||
backend_devices[backend_type].push_back(device);
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> keys;
|
||||
for(auto it = backend_devices.begin(); it != backend_devices.end(); ++it) {
|
||||
keys.push_back(it->first);
|
||||
}
|
||||
std::sort(keys.begin(), keys.end(), compare_backend);
|
||||
|
||||
for (auto &key : keys) {
|
||||
std::vector<sycl::device> devs = backend_devices[key];
|
||||
std::sort(devs.begin(), devs.end(), compare_dev);
|
||||
for (const auto &dev : devs) {
|
||||
sycl_all_devs.push_back(dev);
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &dev : sycl_all_devs)
|
||||
{
|
||||
if (dev == default_device)
|
||||
@@ -3144,6 +3221,7 @@ namespace dpct
|
||||
|
||||
} // COPY from DPCT head files
|
||||
|
||||
#define GGML_COMMON_DECL_SYCL
|
||||
#define GGML_COMMON_IMPL_SYCL
|
||||
#include "ggml-common.h"
|
||||
|
||||
@@ -3196,6 +3274,11 @@ static int g_work_group_size = 0;
|
||||
#define GGML_SYCL_MMV_Y 1
|
||||
#endif
|
||||
|
||||
enum ggml_sycl_backend_gpu_mode {
|
||||
SYCL_UNSET_GPU_MODE = -1,
|
||||
SYCL_SINGLE_GPU_MODE = 0,
|
||||
SYCL_MUL_GPU_MODE
|
||||
};
|
||||
|
||||
static_assert(sizeof(sycl::half) == sizeof(ggml_fp16_t), "wrong fp16 size");
|
||||
|
||||
@@ -3312,66 +3395,6 @@ typedef void (*ggml_sycl_op_flatten_t)(const ggml_tensor *src0,
|
||||
const float *src1_dd, float *dst_dd,
|
||||
const dpct::queue_ptr &main_stream);
|
||||
|
||||
// QK = number of values after dequantization
|
||||
// QR = QK / number of values before dequantization
|
||||
// QI = number of 32 bit integers before dequantization
|
||||
|
||||
#define QK4_0 32
|
||||
#define QR4_0 2
|
||||
#define QI4_0 (QK4_0 / (4 * QR4_0))
|
||||
typedef struct dpct_type_block_q4_0 {
|
||||
sycl::half d; // delta
|
||||
uint8_t qs[QK4_0 / 2]; // nibbles / quants
|
||||
} block_q4_0;
|
||||
static_assert(sizeof(block_q4_0) == sizeof(ggml_fp16_t) + QK4_0 / 2, "wrong q4_0 block size/padding");
|
||||
|
||||
#define QK4_1 32
|
||||
#define QR4_1 2
|
||||
#define QI4_1 (QK4_1 / (4 * QR4_1))
|
||||
typedef struct dpct_type_block_q4_1 {
|
||||
sycl::half2 dm; // dm.x = delta, dm.y = min
|
||||
uint8_t qs[QK4_1 / 2]; // nibbles / quants
|
||||
} block_q4_1;
|
||||
static_assert(sizeof(block_q4_1) == sizeof(ggml_fp16_t) * 2 + QK4_1 / 2, "wrong q4_1 block size/padding");
|
||||
|
||||
#define QK5_0 32
|
||||
#define QR5_0 2
|
||||
#define QI5_0 (QK5_0 / (4 * QR5_0))
|
||||
typedef struct dpct_type_block_q5_0 {
|
||||
sycl::half d; // delta
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_0 / 2]; // nibbles / quants
|
||||
} block_q5_0;
|
||||
static_assert(sizeof(block_q5_0) == sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_0 / 2, "wrong q5_0 block size/padding");
|
||||
|
||||
#define QK5_1 32
|
||||
#define QR5_1 2
|
||||
#define QI5_1 (QK5_1 / (4 * QR5_1))
|
||||
typedef struct dpct_type_block_q5_1 {
|
||||
sycl::half2 dm; // dm.x = delta, dm.y = min
|
||||
uint8_t qh[4]; // 5-th bit of quants
|
||||
uint8_t qs[QK5_1 / 2]; // nibbles / quants
|
||||
} block_q5_1;
|
||||
static_assert(sizeof(block_q5_1) == 2 * sizeof(ggml_fp16_t) + sizeof(uint32_t) + QK5_1 / 2, "wrong q5_1 block size/padding");
|
||||
|
||||
#define QK8_0 32
|
||||
#define QR8_0 1
|
||||
#define QI8_0 (QK8_0 / (4 * QR8_0))
|
||||
typedef struct dpct_type_block_q8_0 {
|
||||
sycl::half d; // delta
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_0;
|
||||
static_assert(sizeof(block_q8_0) == sizeof(ggml_fp16_t) + QK8_0, "wrong q8_0 block size/padding");
|
||||
|
||||
#define QK8_1 32
|
||||
#define QR8_1 1
|
||||
#define QI8_1 (QK8_1 / (4 * QR8_1))
|
||||
typedef struct dpct_type_block_q8_1 {
|
||||
sycl::half2 ds; // ds.x = delta, ds.y = sum
|
||||
int8_t qs[QK8_0]; // quants
|
||||
} block_q8_1;
|
||||
static_assert(sizeof(block_q8_1) == 2*sizeof(ggml_fp16_t) + QK8_0, "wrong q8_1 block size/padding");
|
||||
|
||||
typedef float (*vec_dot_q_sycl_t)(const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & iqs);
|
||||
typedef void (*allocate_tiles_sycl_t)(int **x_ql, sycl::half2 **x_dm,
|
||||
int **x_qh, int **x_sc);
|
||||
@@ -3388,137 +3411,6 @@ typedef float (*vec_dot_q_mul_mat_sycl_t)(
|
||||
const int *__restrict__ y_qs, const sycl::half2 *__restrict__ y_ms,
|
||||
const int &i, const int &j, const int &k);
|
||||
|
||||
//================================= k-quants
|
||||
|
||||
#ifdef GGML_QKK_64
|
||||
#define QK_K 64
|
||||
#define K_SCALE_SIZE 4
|
||||
#else
|
||||
#define QK_K 256
|
||||
#define K_SCALE_SIZE 12
|
||||
#endif
|
||||
|
||||
#define QR2_K 4
|
||||
#define QI2_K (QK_K / (4*QR2_K))
|
||||
typedef struct dpct_type_block_q2_K {
|
||||
uint8_t scales[QK_K/16]; // scales and mins, quantized with 4 bits
|
||||
uint8_t qs[QK_K/4]; // quants
|
||||
sycl::half2 dm; // super-block scale for quantized scales/mins
|
||||
} block_q2_K;
|
||||
static_assert(sizeof(block_q2_K) == 2*sizeof(ggml_fp16_t) + QK_K/16 + QK_K/4, "wrong q2_K block size/padding");
|
||||
|
||||
#define QR3_K 4
|
||||
#define QI3_K (QK_K / (4*QR3_K))
|
||||
typedef struct dpct_type_block_q3_K {
|
||||
uint8_t hmask[QK_K/8]; // quants - high bit
|
||||
uint8_t qs[QK_K/4]; // quants - low 2 bits
|
||||
#ifdef GGML_QKK_64
|
||||
uint8_t scales[2]; // scales, quantized with 8 bits
|
||||
#else
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales, quantized with 6 bits
|
||||
#endif
|
||||
sycl::half d; // super-block scale
|
||||
} block_q3_K;
|
||||
//static_assert(sizeof(block_q3_K) == sizeof(ggml_fp16_t) + QK_K / 4 + QK_K / 8 + K_SCALE_SIZE, "wrong q3_K block size/padding");
|
||||
|
||||
#define QR4_K 2
|
||||
#define QI4_K (QK_K / (4*QR4_K))
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
sycl::half dm[2]; // super-block scales/mins
|
||||
uint8_t scales[2]; // 4-bit block scales/mins
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == sizeof(sycl::half2) + QK_K/2 + 2, "wrong q4_K block size/padding");
|
||||
#else
|
||||
typedef struct dpct_type_block_q4_K {
|
||||
sycl::half2 dm; // super-block scale for quantized scales/mins
|
||||
uint8_t scales[3*QK_K/64]; // scales, quantized with 6 bits
|
||||
uint8_t qs[QK_K/2]; // 4--bit quants
|
||||
} block_q4_K;
|
||||
static_assert(sizeof(block_q4_K) == 2*sizeof(ggml_fp16_t) + 3*QK_K/64 + QK_K/2, "wrong q4_K block size/padding");
|
||||
#endif
|
||||
|
||||
#define QR5_K 2
|
||||
#define QI5_K (QK_K / (4*QR5_K))
|
||||
#ifdef GGML_QKK_64
|
||||
typedef struct {
|
||||
sycl::half d; // super-block scale
|
||||
int8_t scales[QK_K/16]; // block scales
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == sizeof(ggml_fp16_t) + QK_K/2 + QK_K/8 + QK_K/16, "wrong q5_K block size/padding");
|
||||
#else
|
||||
typedef struct dpct_type_block_q5_K {
|
||||
sycl::half2 dm; // super-block scale for quantized scales/mins
|
||||
uint8_t scales[K_SCALE_SIZE]; // scales and mins, quantized with 6 bits
|
||||
uint8_t qh[QK_K/8]; // quants, high bit
|
||||
uint8_t qs[QK_K/2]; // quants, low 4 bits
|
||||
} block_q5_K;
|
||||
static_assert(sizeof(block_q5_K) == 2*sizeof(ggml_fp16_t) + K_SCALE_SIZE + QK_K/2 + QK_K/8, "wrong q5_K block size/padding");
|
||||
#endif
|
||||
|
||||
#define QR6_K 2
|
||||
#define QI6_K (QK_K / (4*QR6_K))
|
||||
typedef struct dpct_type_block_q6_K {
|
||||
uint8_t ql[QK_K/2]; // quants, lower 4 bits
|
||||
uint8_t qh[QK_K/4]; // quants, upper 2 bits
|
||||
int8_t scales[QK_K/16]; // scales
|
||||
sycl::half d; // delta
|
||||
} block_q6_K;
|
||||
static_assert(sizeof(block_q6_K) == sizeof(ggml_fp16_t) + 13*QK_K/16, "wrong q6_K block size/padding");
|
||||
|
||||
#define QR2_XXS 8
|
||||
#define QI2_XXS (QK_K / (4*QR2_XXS))
|
||||
typedef struct dpct_type_block_iq2_xxs {
|
||||
sycl::half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
} block_iq2_xxs;
|
||||
static_assert(sizeof(block_iq2_xxs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t), "wrong iq2_xxs block size/padding");
|
||||
|
||||
#define QR2_XS 8
|
||||
#define QI2_XS (QK_K / (4*QR2_XS))
|
||||
typedef struct dpct_type_block_iq2_xs {
|
||||
sycl::half d;
|
||||
uint16_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/32];
|
||||
} block_iq2_xs;
|
||||
static_assert(sizeof(block_iq2_xs) == sizeof(ggml_fp16_t) + QK_K/8*sizeof(uint16_t) + QK_K/32, "wrong iq2_xs block size/padding");
|
||||
|
||||
#define QR3_XXS 8
|
||||
#define QI3_XXS (QK_K / (4*QR3_XXS))
|
||||
typedef struct dpct_type_block_iq3_xxs {
|
||||
sycl::half d;
|
||||
uint8_t qs[3*(QK_K/8)];
|
||||
} block_iq3_xxs;
|
||||
static_assert(sizeof(block_iq3_xxs) == sizeof(ggml_fp16_t) + 3*(QK_K/8), "wrong iq3_xxs block size/padding");
|
||||
|
||||
#define QR3_XS 8
|
||||
#define QI3_XS (QK_K / (4*QR3_XS))
|
||||
#if QK_K == 64
|
||||
#define IQ3S_N_SCALE 2
|
||||
#else
|
||||
#define IQ3S_N_SCALE QK_K/64
|
||||
#endif
|
||||
typedef struct {
|
||||
sycl::half d;
|
||||
uint8_t qs[QK_K/4];
|
||||
uint8_t qh[QK_K/32];
|
||||
uint8_t signs[QK_K/8];
|
||||
uint8_t scales[IQ3S_N_SCALE];
|
||||
} block_iq3_s;
|
||||
static_assert(sizeof(block_iq3_s) == sizeof(ggml_fp16_t) + 13*(QK_K/32) + IQ3S_N_SCALE, "wrong iq3_s block size/padding");
|
||||
|
||||
#define QR1_S 8
|
||||
#define QI1_S (QK_K / (4*QR1_S))
|
||||
typedef struct {
|
||||
sycl::half d;
|
||||
uint8_t qs[QK_K/8];
|
||||
uint8_t scales[QK_K/16];
|
||||
} block_iq1_s;
|
||||
static_assert(sizeof(block_iq1_s) == sizeof(ggml_fp16_t) + QK_K/8 + QK_K/16, "wrong iq1_s block size/padding");
|
||||
|
||||
#define WARP_SIZE 32
|
||||
#define MATRIX_ROW_PADDING 512 // last row of quant. matrices is a multiple of this to avoid out-of-bounds memory accesses
|
||||
|
||||
@@ -3586,12 +3478,31 @@ class sycl_gpu_mgr {
|
||||
int work_group_size = 0;
|
||||
std::string gpus_list = "";
|
||||
|
||||
/*
|
||||
Use all GPUs with same top max compute units
|
||||
*/
|
||||
sycl_gpu_mgr() {
|
||||
detect_sycl_gpu_list_with_max_cu();
|
||||
get_allow_gpus();
|
||||
create_context_with_gpus();
|
||||
}
|
||||
|
||||
/*
|
||||
Only use the assigned GPU
|
||||
*/
|
||||
sycl_gpu_mgr(int main_gpu_id) {
|
||||
sycl::device device = dpct::dev_mgr::instance().get_device(main_gpu_id);
|
||||
dpct::device_info prop;
|
||||
dpct::get_device_info(prop, device);
|
||||
gpus.push_back(main_gpu_id);
|
||||
devices.push_back(device);
|
||||
work_group_size = prop.get_max_work_group_size();
|
||||
max_compute_units = prop.get_max_compute_units();
|
||||
|
||||
get_allow_gpus();
|
||||
create_context_with_gpus();
|
||||
}
|
||||
|
||||
void create_context_with_gpus() {
|
||||
sycl::context ctx = sycl::context(devices);
|
||||
assert(gpus.size() > 0);
|
||||
@@ -3607,7 +3518,7 @@ class sycl_gpu_mgr {
|
||||
gpus_list += std::to_string(gpus[i]);
|
||||
gpus_list += ",";
|
||||
}
|
||||
if (gpus_list.length() > 2) {
|
||||
if (gpus_list.length() > 1) {
|
||||
gpus_list.pop_back();
|
||||
}
|
||||
}
|
||||
@@ -3636,7 +3547,7 @@ class sycl_gpu_mgr {
|
||||
dpct::device_info prop;
|
||||
dpct::get_device_info(prop, device);
|
||||
if (max_compute_units == prop.get_max_compute_units() &&
|
||||
prop.get_major_version() == 1) {
|
||||
is_ext_oneapi_device(device)) {
|
||||
gpus.push_back(id);
|
||||
devices.push_back(device);
|
||||
work_group_size = prop.get_max_work_group_size();
|
||||
@@ -3656,8 +3567,8 @@ class sycl_gpu_mgr {
|
||||
if (gpus[i] == id)
|
||||
return i;
|
||||
}
|
||||
assert(false);
|
||||
return -1;
|
||||
printf("miss to get device index by id=%d\n", id);
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
int get_next_index(int id) {
|
||||
@@ -3666,8 +3577,16 @@ class sycl_gpu_mgr {
|
||||
if (gpus[i] == id)
|
||||
return i;
|
||||
}
|
||||
assert(false);
|
||||
return -1;
|
||||
GGML_ASSERT(false);
|
||||
}
|
||||
|
||||
bool is_ext_oneapi_device(const sycl::device &dev) {
|
||||
sycl::backend dev_backend = dev.get_backend();
|
||||
if (dev_backend == sycl::backend::ext_oneapi_level_zero ||
|
||||
dev_backend == sycl::backend::ext_oneapi_cuda ||
|
||||
dev_backend == sycl::backend::ext_oneapi_hip)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
@@ -3676,11 +3595,14 @@ static int g_device_count = -1;
|
||||
static int g_all_sycl_device_count = -1;
|
||||
static int g_main_device = -1;
|
||||
static int g_main_device_id = -1;
|
||||
static bool g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||
|
||||
static std::array<float, GGML_SYCL_MAX_DEVICES> g_default_tensor_split = {};
|
||||
|
||||
static float g_tensor_split[GGML_SYCL_MAX_DEVICES] = {0};
|
||||
|
||||
static ggml_sycl_backend_gpu_mode g_ggml_sycl_backend_gpu_mode = SYCL_UNSET_GPU_MODE;
|
||||
|
||||
struct sycl_device_capabilities {
|
||||
int cc; // compute capability
|
||||
bool vmm; // virtual memory support
|
||||
@@ -4891,10 +4813,9 @@ static void dequantize_block_iq3_s(const void * __restrict__ vx, dst_t * __restr
|
||||
template<typename dst_t>
|
||||
static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restrict__ yy,
|
||||
const sycl::nd_item<3> &item_ct1,
|
||||
const uint64_t *iq1s_grid,
|
||||
const uint32_t *iq1s_grid,
|
||||
const uint8_t *ksigns_iq2xs,
|
||||
const uint8_t *kmask_iq2xs) {
|
||||
|
||||
const int i = item_ct1.get_group(2);
|
||||
const block_iq1_s * x = (const block_iq1_s *) vx;
|
||||
|
||||
@@ -4903,11 +4824,15 @@ static void dequantize_block_iq1_s(const void * __restrict__ vx, dst_t * __restr
|
||||
const int il = tid/8; // 0...3
|
||||
const int ib = tid%8; // 0...7
|
||||
dst_t * y = yy + i*QK_K + 32*ib + 8*il;
|
||||
const int i8 = 4*ib+il;
|
||||
uint8_t h = x[i].scales[i8/2] >> 4*(i8%2);
|
||||
const int8_t * grid = (const int8_t *)(iq1s_grid + (x[i].qs[i8] | ((h & 8) << 5)));
|
||||
const float d = (float)x[i].d * (2*(h & 7) + 1);
|
||||
for (int j = 0; j < 8; ++j) y[j] = d * grid[j];
|
||||
const uint8_t * qs = x[i].qs + 8*ib;
|
||||
const uint8_t * grid1 = (const uint8_t *)(iq1s_grid + qs[2*il+0]);
|
||||
const uint8_t * grid2 = (const uint8_t *)(iq1s_grid + qs[2*il+1]);
|
||||
const float d = (float)x[i].d * (2*((x[i].qh[ib] >> 12) & 0xf) + 1);
|
||||
const uint8_t signs = ksigns_iq2xs[(x[i].qh[ib] >> 3*il) & 7];
|
||||
for (int j = 0; j < 4; ++j) {
|
||||
y[j+0] = d * grid1[j] * (signs & kmask_iq2xs[j+0] ? -1.f : 1.f);
|
||||
y[j+4] = d * grid2[j] * (signs & kmask_iq2xs[j+4] ? -1.f : 1.f);
|
||||
}
|
||||
#else
|
||||
assert(false);
|
||||
#endif
|
||||
@@ -7803,28 +7728,27 @@ vec_dot_iq3_s_q8_1(const void *__restrict__ vbq,
|
||||
static __dpct_inline__ float
|
||||
vec_dot_iq1_s_q8_1(const void *__restrict__ vbq,
|
||||
const block_q8_1 *__restrict__ bq8_1, const int &iqs,
|
||||
const uint64_t *iq1s_grid, const uint64_t *ksigns64) {
|
||||
const uint32_t *iq1s_grid, const uint64_t *ksigns64) {
|
||||
#if QK_K == 256
|
||||
const block_iq1_s * bq1 = (const block_iq1_s *) vbq;
|
||||
|
||||
const int ib32 = iqs;
|
||||
int sumi1 = 0, sumi2 = 0, sumi3 = 0, sumi4 = 0;
|
||||
const uint8_t h1 = bq1->scales[2*ib32+0];
|
||||
const uint8_t h2 = bq1->scales[2*ib32+1];
|
||||
const int * q8 = (const int *)bq8_1[ib32].qs;
|
||||
const int * grid1 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+0] | ((h1 & 0x08) << 5)));
|
||||
const int * grid2 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+1] | ((h1 & 0x80) << 1)));
|
||||
const int * grid3 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+2] | ((h2 & 0x08) << 5)));
|
||||
const int * grid4 = (const int *)(iq1s_grid + (bq1->qs[4*ib32+3] | ((h2 & 0x80) << 1)));
|
||||
for (int j = 0; j < 2; ++j) {
|
||||
sumi1 = dpct::dp4a(q8[j+0], grid1[j], sumi1);
|
||||
sumi2 = dpct::dp4a(q8[j+2], grid2[j], sumi2);
|
||||
sumi3 = dpct::dp4a(q8[j+4], grid3[j], sumi3);
|
||||
sumi4 = dpct::dp4a(q8[j+6], grid4[j], sumi4);
|
||||
const uint8_t * qs = bq1->qs + 4*ib32;
|
||||
const int8_t * q8 = bq8_1[ib32].qs;
|
||||
int sumi = 0;
|
||||
for (int l = 0; l < 4; ++l) {
|
||||
const uint32_t * grid = (const uint32_t *)(iq1s_grid + qs[l]);
|
||||
const uint32_t * signs = (const uint32_t *)(ksigns64 + (qs[l] >> 8));
|
||||
const int grid_l = dpct::vectorized_binary<sycl::uchar4>(
|
||||
grid[0] ^ signs[0], signs[0], std::minus<>());
|
||||
const int grid_h = dpct::vectorized_binary<sycl::uchar4>(
|
||||
grid[1] ^ signs[1], signs[1], std::minus<>());
|
||||
sumi = dpct::dp4a(grid_l, *((int *)q8 + 0), sumi);
|
||||
sumi = dpct::dp4a(grid_h, *((int *)q8 + 1), sumi);
|
||||
q8 += 8;
|
||||
}
|
||||
const float d = (float)bq1->d * bq8_1[ib32].ds[0];
|
||||
return d * (sumi1 * (2*(h1 & 7) + 1) + sumi2 * (2*((h1 >> 4) & 7) + 1) +
|
||||
sumi3 * (2*(h2 & 7) + 1) + sumi4 * (2*((h2 >> 4) & 7) + 1));
|
||||
const float d = (float)bq1->d * bq8_1[ib32].ds[0] * 0.25f;
|
||||
return d * sumi;
|
||||
#else
|
||||
assert(false);
|
||||
return 0.f;
|
||||
@@ -8644,7 +8568,7 @@ static void mul_mat_vec_q_iq3_s_q8_1(const void * __restrict__ vx, const void *
|
||||
template <int qk, int qi, typename block_q_t, int vdr>
|
||||
static void mul_mat_vec_q_iq1_s_q8_1(const void * __restrict__ vx, const void * __restrict__ vy, float * __restrict__ dst, const int ncols, const int nrows,
|
||||
const sycl::nd_item<3> &item_ct1,
|
||||
const uint64_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) {
|
||||
const uint32_t *iq1s_grid_ptr, const uint64_t *ksigns64_ptr ) {
|
||||
const int row = item_ct1.get_group(2) * item_ct1.get_local_range(1) +
|
||||
item_ct1.get_local_id(1);
|
||||
|
||||
@@ -10406,7 +10330,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
||||
dpct::queue_ptr stream) {
|
||||
const int nb = k / QK_K;
|
||||
{
|
||||
iq1s_grid.init(*stream);
|
||||
iq1s_grid_gpu.init(*stream);
|
||||
ksigns_iq2xs.init(*stream);
|
||||
kmask_iq2xs.init(*stream);
|
||||
|
||||
@@ -10414,7 +10338,7 @@ static void dequantize_row_iq1_s_sycl(const void *vx, dst_t *y, const int k,
|
||||
{sycl::aspect::fp16});
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr();
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
||||
auto ksigns_iq2xs_ptr_ct1 = ksigns_iq2xs.get_ptr();
|
||||
auto kmask_iq2xs_ptr_ct1 = kmask_iq2xs.get_ptr();
|
||||
|
||||
@@ -11154,11 +11078,11 @@ static void mul_mat_vec_iq1_s_q8_1_sycl(const void *vx, const void *vy,
|
||||
const sycl::range<3> block_nums(1, 1, block_num_y);
|
||||
const sycl::range<3> block_dims(1, GGML_SYCL_MMV_Y, WARP_SIZE);
|
||||
{
|
||||
iq1s_grid.init(*stream);
|
||||
iq1s_grid_gpu.init(*stream);
|
||||
ksigns64.init(*stream);
|
||||
|
||||
stream->submit([&](sycl::handler &cgh) {
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid.get_ptr();
|
||||
auto iq1s_grid_ptr_ct1 = iq1s_grid_gpu.get_ptr();
|
||||
auto ksigns64_ptr_ct1 = ksigns64.get_ptr();
|
||||
|
||||
cgh.parallel_for(
|
||||
@@ -13182,17 +13106,20 @@ bool ggml_sycl_loaded(void) {
|
||||
return g_sycl_loaded;
|
||||
}
|
||||
|
||||
void print_device_detail(int id) {
|
||||
void print_device_detail(int id, sycl::device &device, std::string device_type) {
|
||||
|
||||
dpct::device_info prop;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||
dpct::get_device_info(prop, dpct::dev_mgr::instance().get_device(id))));
|
||||
sycl::device cur_device = dpct::dev_mgr::instance().get_device(id);
|
||||
dpct::get_device_info(prop, device)));
|
||||
|
||||
std::string version;
|
||||
version += std::to_string(prop.get_major_version());
|
||||
version += ".";
|
||||
version += std::to_string(prop.get_minor_version());
|
||||
|
||||
fprintf(stderr, "|%2d|%45s|%18s|%17d|%14d|%13d|%15lu|\n", id,
|
||||
device_type = std::regex_replace(device_type, std::regex("ext_oneapi_"), "");
|
||||
|
||||
fprintf(stderr, "|%2d|%18s|%45s|%10s|%11d|%8d|%7d|%15lu|\n", id, device_type.c_str(),
|
||||
prop.get_name(), version.c_str(), prop.get_max_compute_units(),
|
||||
prop.get_max_work_group_size(), prop.get_max_sub_group_size(),
|
||||
prop.get_global_mem_size());
|
||||
@@ -13200,19 +13127,35 @@ void print_device_detail(int id) {
|
||||
|
||||
void ggml_backend_sycl_print_sycl_devices() {
|
||||
int device_count = dpct::dev_mgr::instance().device_count();
|
||||
std::map<std::string, size_t> DeviceNums;
|
||||
fprintf(stderr, "found %d SYCL devices:\n", device_count);
|
||||
fprintf(stderr, "|ID| Name |compute capability|Max compute units|Max work group|Max sub group|Global mem size|\n");
|
||||
fprintf(stderr, "|--|---------------------------------------------|------------------|-----------------|--------------|-------------|---------------|\n");
|
||||
fprintf(stderr, "| | | |Compute |Max compute|Max work|Max sub| |\n");
|
||||
fprintf(stderr, "|ID| Device Type| Name|capability|units |group |group |Global mem size|\n");
|
||||
fprintf(stderr, "|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|\n");
|
||||
for (int id = 0; id < device_count; ++id) {
|
||||
print_device_detail(id);
|
||||
sycl::device device = dpct::dev_mgr::instance().get_device(id);
|
||||
sycl::backend backend = device.get_backend();
|
||||
std::string backend_type = get_device_backend_and_type(device);
|
||||
int type_id=DeviceNums[backend_type]++;
|
||||
std::stringstream device_type;
|
||||
device_type << "[" << backend_type << ":" << std::to_string(type_id) << "]";
|
||||
print_device_detail(id, device, device_type.str());
|
||||
}
|
||||
}
|
||||
|
||||
void print_gpu_device_list() {
|
||||
fprintf(stderr, "detect %d SYCL GPUs: [%s] with Max compute units:%d\n",
|
||||
g_sycl_gpu_mgr->get_gpu_count(),
|
||||
g_sycl_gpu_mgr->gpus_list.c_str(),
|
||||
g_sycl_gpu_mgr->max_compute_units);
|
||||
GGML_ASSERT(g_sycl_gpu_mgr);
|
||||
|
||||
char* hint=NULL;
|
||||
if (g_ggml_sycl_backend_gpu_mode == SYCL_SINGLE_GPU_MODE) {
|
||||
hint = "use %d SYCL GPUs: [%s] with Max compute units:%d\n";
|
||||
} else {
|
||||
hint = "detect %d SYCL GPUs: [%s] with top Max compute units:%d\n";
|
||||
}
|
||||
fprintf(stderr, hint,
|
||||
g_sycl_gpu_mgr->get_gpu_count(),
|
||||
g_sycl_gpu_mgr->gpus_list.c_str(),
|
||||
g_sycl_gpu_mgr->max_compute_units);
|
||||
}
|
||||
|
||||
int get_sycl_env(const char *env_name, int default_val) {
|
||||
@@ -13248,23 +13191,6 @@ void ggml_init_sycl() try {
|
||||
#else
|
||||
fprintf(stderr, "%s: GGML_SYCL_F16: no\n", __func__);
|
||||
#endif
|
||||
if (CHECK_TRY_ERROR(g_all_sycl_device_count =
|
||||
dpct::dev_mgr::instance().device_count()) != 0) {
|
||||
initialized = true;
|
||||
g_sycl_loaded = false;
|
||||
return;
|
||||
}
|
||||
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
||||
ggml_backend_sycl_print_sycl_devices();
|
||||
|
||||
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
||||
|
||||
g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
||||
g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
||||
|
||||
print_gpu_device_list();
|
||||
|
||||
int64_t total_vram = 0;
|
||||
|
||||
/* NOT REMOVE, keep it for next optimize for XMX.
|
||||
#if defined(SYCL_USE_XMX)
|
||||
@@ -13273,49 +13199,15 @@ void ggml_init_sycl() try {
|
||||
fprintf(stderr, "%s: SYCL_USE_XMX: no\n", __func__);
|
||||
#endif
|
||||
*/
|
||||
for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
||||
g_device_caps[id].vmm = 0;
|
||||
g_device_caps[id].device_id = -1;
|
||||
g_device_caps[id].cc = 0;
|
||||
g_tensor_split[id] = 0;
|
||||
g_default_tensor_split[id] = 0;
|
||||
|
||||
if (CHECK_TRY_ERROR(g_all_sycl_device_count =
|
||||
dpct::dev_mgr::instance().device_count()) != 0) {
|
||||
initialized = true;
|
||||
g_sycl_loaded = false;
|
||||
return;
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
int device_id = g_sycl_gpu_mgr->gpus[i];
|
||||
g_device_caps[i].vmm = 0;
|
||||
|
||||
dpct::device_info prop;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||
prop, dpct::dev_mgr::instance().get_device(device_id))));
|
||||
|
||||
g_default_tensor_split[i] = total_vram;
|
||||
total_vram += prop.get_global_mem_size();
|
||||
|
||||
g_device_caps[i].cc =
|
||||
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
g_default_tensor_split[i] /= total_vram;
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
SYCL_CHECK(ggml_sycl_set_device(i));
|
||||
|
||||
// create sycl streams
|
||||
for (int is = 0; is < MAX_STREAMS; ++is) {
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||
g_syclStreams[i][is] =
|
||||
dpct::get_current_device().create_queue(
|
||||
g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
|
||||
}
|
||||
|
||||
const dpct::queue_ptr stream = g_syclStreams[i][0];
|
||||
// create sycl handle
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
|
||||
}
|
||||
|
||||
GGML_ASSERT(g_all_sycl_device_count <= GGML_SYCL_MAX_DEVICES);
|
||||
ggml_backend_sycl_print_sycl_devices();
|
||||
initialized = true;
|
||||
g_sycl_loaded = true;
|
||||
}
|
||||
@@ -13326,6 +13218,63 @@ catch (sycl::exception const &exc) {
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
void ggml_init_by_gpus(int device_count) try {
|
||||
g_device_count = device_count;
|
||||
g_work_group_size = g_sycl_gpu_mgr->work_group_size;
|
||||
|
||||
int64_t total_vram = 0;
|
||||
|
||||
print_gpu_device_list();
|
||||
|
||||
for (int id = 0; id < GGML_SYCL_MAX_DEVICES; ++id) {
|
||||
g_device_caps[id].vmm = 0;
|
||||
g_device_caps[id].device_id = -1;
|
||||
g_device_caps[id].cc = 0;
|
||||
g_tensor_split[id] = 0;
|
||||
g_default_tensor_split[id] = 0;
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
int device_id = g_sycl_gpu_mgr->gpus[i];
|
||||
g_device_caps[i].vmm = 0;
|
||||
|
||||
dpct::device_info prop;
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(dpct::get_device_info(
|
||||
prop, dpct::dev_mgr::instance().get_device(device_id))));
|
||||
|
||||
g_default_tensor_split[i] = total_vram;
|
||||
total_vram += prop.get_global_mem_size();
|
||||
|
||||
g_device_caps[i].cc =
|
||||
100 * prop.get_major_version() + 10 * prop.get_minor_version();
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
g_default_tensor_split[i] /= total_vram;
|
||||
}
|
||||
|
||||
for (int i = 0; i < g_device_count; ++i) {
|
||||
SYCL_CHECK(ggml_sycl_set_device(i));
|
||||
|
||||
// create sycl streams
|
||||
for (int is = 0; is < MAX_STREAMS; ++is) {
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(
|
||||
g_syclStreams[i][is] =
|
||||
dpct::get_current_device().create_queue(
|
||||
g_sycl_gpu_mgr->get_co_ctx(), dpct::get_current_device())));
|
||||
}
|
||||
|
||||
const dpct::queue_ptr stream = g_syclStreams[i][0];
|
||||
// create sycl handle
|
||||
SYCL_CHECK(CHECK_TRY_ERROR(g_sycl_handles[i] = stream));
|
||||
}
|
||||
}
|
||||
catch (sycl::exception const &exc) {
|
||||
std::cerr << exc.what() << "Exception caught at file:" << __FILE__
|
||||
<< ", line:" << __LINE__ << std::endl;
|
||||
std::exit(1);
|
||||
}
|
||||
|
||||
void *ggml_sycl_host_malloc(size_t size) try {
|
||||
if (getenv("GGML_SYCL_NO_PINNED") != nullptr) {
|
||||
return nullptr;
|
||||
@@ -16725,22 +16674,24 @@ static ggml_backend_buffer_type_i ggml_backend_sycl_buffer_type_interface = {
|
||||
/* .is_host = */ nullptr,
|
||||
};
|
||||
|
||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device) {
|
||||
ggml_backend_buffer_type_t ggml_backend_sycl_buffer_type(int device_index) {
|
||||
if (device_index>=g_device_count or device_index<0) {
|
||||
printf("ggml_backend_sycl_buffer_type error: device_index:%d is out of range [0, %d], miss to call ggml_backend_sycl_set_single_device()\n",
|
||||
device_index, g_device_count-1);
|
||||
GGML_ASSERT(device_index<g_device_count);
|
||||
}
|
||||
static struct ggml_backend_buffer_type ggml_backend_sycl_buffer_types[GGML_SYCL_MAX_DEVICES];
|
||||
|
||||
static bool ggml_backend_sycl_buffer_type_initialized = false;
|
||||
|
||||
if (!ggml_backend_sycl_buffer_type_initialized) {
|
||||
if (!g_ggml_backend_sycl_buffer_type_initialized) {
|
||||
for (int i = 0; i < g_device_count; i++) {
|
||||
ggml_backend_sycl_buffer_types[i] = {
|
||||
/* .iface = */ ggml_backend_sycl_buffer_type_interface,
|
||||
/* .context = */ new ggml_backend_sycl_buffer_type_context{i, GGML_SYCL_NAME + std::to_string(g_sycl_gpu_mgr->gpus[i])},
|
||||
};
|
||||
}
|
||||
ggml_backend_sycl_buffer_type_initialized = true;
|
||||
g_ggml_backend_sycl_buffer_type_initialized = true;
|
||||
}
|
||||
|
||||
return &ggml_backend_sycl_buffer_types[device];
|
||||
return &ggml_backend_sycl_buffer_types[device_index];
|
||||
}
|
||||
|
||||
// sycl split buffer type
|
||||
@@ -17432,13 +17383,18 @@ static ggml_backend_i ggml_backend_sycl_interface = {
|
||||
/* .get_default_buffer_type = */ ggml_backend_sycl_get_default_buffer_type,
|
||||
/* .set_tensor_async = */ ggml_backend_sycl_set_tensor_async,
|
||||
/* .get_tensor_async = */ ggml_backend_sycl_get_tensor_async,
|
||||
/* .cpy_tensor_async = */ ggml_backend_sycl_cpy_tensor_async,
|
||||
/* .cpy_tensor_async = */ NULL, //ggml_backend_sycl_cpy_tensor_async, // TODO: update for the new interface
|
||||
/* .synchronize = */ ggml_backend_sycl_synchronize,
|
||||
/* .graph_plan_create = */ NULL,
|
||||
/* .graph_plan_free = */ NULL,
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_sycl_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_sycl_supports_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_sycl_guid() {
|
||||
@@ -17488,11 +17444,42 @@ GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id) {
|
||||
return g_sycl_gpu_mgr->get_index(device_id);
|
||||
}
|
||||
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index) {
|
||||
return g_sycl_gpu_mgr->gpus[device_index];
|
||||
}
|
||||
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id) {
|
||||
GGML_ASSERT(main_gpu_id<g_all_sycl_device_count);
|
||||
fprintf(stderr, "ggml_backend_sycl_set_single_device: use single device: [%d]\n", main_gpu_id);
|
||||
if (g_sycl_gpu_mgr) {
|
||||
delete g_sycl_gpu_mgr;
|
||||
}
|
||||
g_sycl_gpu_mgr = new sycl_gpu_mgr(main_gpu_id);
|
||||
g_ggml_sycl_backend_gpu_mode = SYCL_SINGLE_GPU_MODE;
|
||||
ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
||||
g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||
}
|
||||
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode() {
|
||||
if (g_ggml_sycl_backend_gpu_mode == SYCL_MUL_GPU_MODE) {
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "ggml_backend_sycl_set_mul_device_mode: true\n");
|
||||
|
||||
if (g_sycl_gpu_mgr) {
|
||||
delete g_sycl_gpu_mgr;
|
||||
}
|
||||
g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
||||
g_ggml_sycl_backend_gpu_mode = SYCL_MUL_GPU_MODE;
|
||||
ggml_init_by_gpus(g_sycl_gpu_mgr->get_gpu_count());
|
||||
g_ggml_backend_sycl_buffer_type_initialized = false;
|
||||
}
|
||||
|
||||
extern "C" int ggml_backend_sycl_reg_devices();
|
||||
|
||||
int ggml_backend_sycl_reg_devices() {
|
||||
if (!g_sycl_gpu_mgr) g_sycl_gpu_mgr = new sycl_gpu_mgr();
|
||||
g_device_count = g_sycl_gpu_mgr->get_gpu_count();
|
||||
ggml_backend_sycl_set_mul_device_mode();
|
||||
assert(g_device_count>0);
|
||||
for (int i = 0; i < g_device_count; i++) {
|
||||
int id = g_sycl_gpu_mgr->gpus[i];
|
||||
|
||||
@@ -29,6 +29,11 @@ GGML_API GGML_CALL ggml_backend_buffer_type_t ggml_backend_sycl_split_buffer_typ
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_get_device_memory(int device, size_t *free, size_t *total);
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_index(int device_id);
|
||||
|
||||
// TODO: these are temporary
|
||||
// ref: https://github.com/ggerganov/llama.cpp/pull/6022#issuecomment-1992615670
|
||||
GGML_API GGML_CALL int ggml_backend_sycl_get_device_id(int device_index);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_single_device_mode(int main_gpu_id);
|
||||
GGML_API GGML_CALL void ggml_backend_sycl_set_mul_device_mode();
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -5693,6 +5693,11 @@ static ggml_backend_i ggml_backend_vk_interface = {
|
||||
/* .graph_plan_compute = */ NULL,
|
||||
/* .graph_compute = */ ggml_backend_vk_graph_compute,
|
||||
/* .supports_op = */ ggml_backend_vk_supports_op,
|
||||
/* .event_new = */ NULL,
|
||||
/* .event_free = */ NULL,
|
||||
/* .event_record = */ NULL,
|
||||
/* .event_wait = */ NULL,
|
||||
/* .event_synchronize = */ NULL,
|
||||
};
|
||||
|
||||
static ggml_guid_t ggml_backend_vk_guid() {
|
||||
|
||||
@@ -470,6 +470,19 @@ static const ggml_type_traits_t type_traits[GGML_TYPE_COUNT] = {
|
||||
.type_size = sizeof(int32_t),
|
||||
.is_quantized = false,
|
||||
},
|
||||
[GGML_TYPE_I64] = {
|
||||
.type_name = "i64",
|
||||
.blck_size = 1,
|
||||
.type_size = sizeof(int64_t),
|
||||
.is_quantized = false,
|
||||
},
|
||||
[GGML_TYPE_F64] = {
|
||||
.type_name = "f64",
|
||||
.blck_size = 1,
|
||||
.type_size = sizeof(double),
|
||||
.is_quantized = false,
|
||||
.nrows = 1,
|
||||
},
|
||||
[GGML_TYPE_F32] = {
|
||||
.type_name = "f32",
|
||||
.blck_size = 1,
|
||||
@@ -857,7 +870,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
||||
#define GGML_F16x8 float16x8_t
|
||||
#define GGML_F16x8_ZERO vdupq_n_f16(0.0f)
|
||||
#define GGML_F16x8_SET1(x) vdupq_n_f16(x)
|
||||
#define GGML_F16x8_LOAD(x) vld1q_f16((const __fp16 *)(x))
|
||||
#define GGML_F16x8_LOAD(x) vld1q_f16((const ggml_fp16_internal_t *)(x))
|
||||
#define GGML_F16x8_STORE vst1q_f16
|
||||
#define GGML_F16x8_FMA(a, b, c) vfmaq_f16(a, b, c)
|
||||
#define GGML_F16x8_ADD vaddq_f16
|
||||
@@ -900,7 +913,7 @@ inline static float vaddvq_f32(float32x4_t v) {
|
||||
#define GGML_F32Cx4 float32x4_t
|
||||
#define GGML_F32Cx4_ZERO vdupq_n_f32(0.0f)
|
||||
#define GGML_F32Cx4_SET1(x) vdupq_n_f32(x)
|
||||
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const __fp16 *)(x)))
|
||||
#define GGML_F32Cx4_LOAD(x) vcvt_f32_f16(vld1_f16((const ggml_fp16_internal_t *)(x)))
|
||||
#define GGML_F32Cx4_STORE(x, y) vst1_f16(x, vcvt_f16_f32(y))
|
||||
#define GGML_F32Cx4_FMA(a, b, c) vfmaq_f32(a, b, c)
|
||||
#define GGML_F32Cx4_ADD vaddq_f32
|
||||
@@ -11560,8 +11573,6 @@ static void ggml_compute_forward_get_rows_q(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
@@ -11569,7 +11580,7 @@ static void ggml_compute_forward_get_rows_q(
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int64_t nc = ne00;
|
||||
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
|
||||
const int64_t nr = ggml_nelements(src1);
|
||||
|
||||
const enum ggml_type type = src0->type;
|
||||
ggml_to_float_t const dequantize_row_q = type_traits[type].to_float;
|
||||
@@ -11579,17 +11590,25 @@ static void ggml_compute_forward_get_rows_q(
|
||||
assert(nb00 == ggml_type_size(type));
|
||||
assert(ggml_nrows(dst) == nr);
|
||||
|
||||
// TODO: multi-thread
|
||||
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
||||
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
||||
for (int64_t i10 = 0; i10 < ne10; ++i10) {
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
dequantize_row_q(
|
||||
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
}
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
|
||||
// row range for this thread
|
||||
const int ir0 = dr*ith;
|
||||
const int ir1 = MIN(ir0 + dr, nr);
|
||||
|
||||
for (int64_t i = ir0; i < ir1; ++i) {
|
||||
const int64_t i12 = i/(ne11*ne10);
|
||||
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
|
||||
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
|
||||
dequantize_row_q(
|
||||
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11600,8 +11619,6 @@ static void ggml_compute_forward_get_rows_f16(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
@@ -11609,24 +11626,32 @@ static void ggml_compute_forward_get_rows_f16(
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int64_t nc = ne00;
|
||||
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
|
||||
const int64_t nr = ggml_nelements(src1);
|
||||
|
||||
assert(ne0 == nc);
|
||||
assert(ne02 == ne11);
|
||||
assert(nb00 == sizeof(ggml_fp16_t));
|
||||
assert(ggml_nrows(dst) == nr);
|
||||
|
||||
// TODO: multi-thread
|
||||
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
||||
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
||||
for (int64_t i10 = 0; i10 < ne10; ++i10) {
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
ggml_fp16_to_fp32_row(
|
||||
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
}
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
|
||||
// row range for this thread
|
||||
const int ir0 = dr*ith;
|
||||
const int ir1 = MIN(ir0 + dr, nr);
|
||||
|
||||
for (int64_t i = ir0; i < ir1; ++i) {
|
||||
const int64_t i12 = i/(ne11*ne10);
|
||||
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
|
||||
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
|
||||
ggml_fp16_to_fp32_row(
|
||||
(const void *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03),
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3), nc);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11637,8 +11662,6 @@ static void ggml_compute_forward_get_rows_f32(
|
||||
const struct ggml_tensor * src0 = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
|
||||
assert(params->ith == 0);
|
||||
|
||||
if (params->type == GGML_TASK_TYPE_INIT || params->type == GGML_TASK_TYPE_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
@@ -11646,24 +11669,32 @@ static void ggml_compute_forward_get_rows_f32(
|
||||
GGML_TENSOR_BINARY_OP_LOCALS
|
||||
|
||||
const int64_t nc = ne00;
|
||||
const int64_t nr = ggml_nelements(src1); GGML_UNUSED(nr);
|
||||
const int64_t nr = ggml_nelements(src1);
|
||||
|
||||
assert(ne0 == nc);
|
||||
assert(ne02 == ne11);
|
||||
assert(nb00 == sizeof(float));
|
||||
assert(ggml_nrows(dst) == nr);
|
||||
|
||||
// TODO: multi-thread
|
||||
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
||||
for (int64_t i11 = 0; i11 < ne11; ++i11) {
|
||||
for (int64_t i10 = 0; i10 < ne10; ++i10) {
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
const int ith = params->ith;
|
||||
const int nth = params->nth;
|
||||
|
||||
ggml_vec_cpy_f32(nc,
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3),
|
||||
(float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03));
|
||||
}
|
||||
}
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
|
||||
// row range for this thread
|
||||
const int ir0 = dr*ith;
|
||||
const int ir1 = MIN(ir0 + dr, nr);
|
||||
|
||||
for (int64_t i = ir0; i < ir1; ++i) {
|
||||
const int64_t i12 = i/(ne11*ne10);
|
||||
const int64_t i11 = (i - i12*ne11*ne10)/ne10;
|
||||
const int64_t i10 = (i - i12*ne11*ne10 - i11*ne10);
|
||||
const int64_t i01 = *(int32_t *) ((char *) src1->data + i10*nb10 + i11*nb11 + i12*nb12);
|
||||
|
||||
ggml_vec_cpy_f32(nc,
|
||||
(float *) ((char *) dst->data + i10*nb1 + i11*nb2 + i12*nb3),
|
||||
(float *) ((char *) src0->data + i01*nb01 + i11*nb02 + i12*nb03));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -12400,6 +12431,8 @@ static void ggml_compute_forward_alibi(
|
||||
case GGML_TYPE_I8:
|
||||
case GGML_TYPE_I16:
|
||||
case GGML_TYPE_I32:
|
||||
case GGML_TYPE_I64:
|
||||
case GGML_TYPE_F64:
|
||||
case GGML_TYPE_COUNT:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
@@ -12486,6 +12519,8 @@ static void ggml_compute_forward_clamp(
|
||||
case GGML_TYPE_I8:
|
||||
case GGML_TYPE_I16:
|
||||
case GGML_TYPE_I32:
|
||||
case GGML_TYPE_I64:
|
||||
case GGML_TYPE_F64:
|
||||
case GGML_TYPE_COUNT:
|
||||
{
|
||||
GGML_ASSERT(false);
|
||||
@@ -17796,7 +17831,7 @@ static void ggml_graph_compute_perf_stats_node(struct ggml_tensor * node, const
|
||||
node->perf_time_us += time_us_cur;
|
||||
}
|
||||
|
||||
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads, int n_cur_threads) {
|
||||
int n_tasks = 0;
|
||||
|
||||
switch (node->op) {
|
||||
@@ -17877,6 +17912,12 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
{
|
||||
n_tasks = n_threads;
|
||||
} break;
|
||||
case GGML_OP_GET_ROWS:
|
||||
{
|
||||
// FIXME: the cost of launching additional threads decreases performance with GPU offloading
|
||||
//n_tasks = MIN(n_threads, ggml_nelements(node->src[1]));
|
||||
n_tasks = MIN(n_cur_threads, ggml_nelements(node->src[1]));
|
||||
} break;
|
||||
case GGML_OP_SCALE:
|
||||
case GGML_OP_SET:
|
||||
case GGML_OP_CONT:
|
||||
@@ -17884,7 +17925,6 @@ static int ggml_get_n_tasks(struct ggml_tensor * node, int n_threads) {
|
||||
case GGML_OP_VIEW:
|
||||
case GGML_OP_PERMUTE:
|
||||
case GGML_OP_TRANSPOSE:
|
||||
case GGML_OP_GET_ROWS:
|
||||
case GGML_OP_GET_ROWS_BACK:
|
||||
case GGML_OP_DIAG:
|
||||
{
|
||||
@@ -18102,7 +18142,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
/* FINALIZE */
|
||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||
if (GGML_OP_HAS_FINALIZE[node->op]) {
|
||||
params.nth = ggml_get_n_tasks(node, n_threads);
|
||||
params.nth = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
|
||||
ggml_compute_forward(¶ms, node);
|
||||
}
|
||||
ggml_graph_compute_perf_stats_node(node, state->shared);
|
||||
@@ -18112,7 +18152,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
while (++node_n < cgraph->n_nodes) {
|
||||
GGML_PRINT_DEBUG_5("%s: %d/%d\n", __func__, node_n, cgraph->n_nodes);
|
||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
|
||||
|
||||
state->shared->perf_node_start_cycles = ggml_perf_cycles();
|
||||
state->shared->perf_node_start_time_us = ggml_perf_time_us();
|
||||
@@ -18160,7 +18200,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
|
||||
|
||||
/* INIT & COMPUTE */
|
||||
struct ggml_tensor * node = cgraph->nodes[node_n];
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads, state->shared->n_threads);
|
||||
|
||||
struct ggml_compute_params params = {
|
||||
/*.type =*/ GGML_TASK_TYPE_INIT,
|
||||
@@ -18225,7 +18265,7 @@ struct ggml_cplan ggml_graph_plan(const struct ggml_cgraph * cgraph, int n_threa
|
||||
for (int i = 0; i < cgraph->n_nodes; i++) {
|
||||
struct ggml_tensor * node = cgraph->nodes[i];
|
||||
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads);
|
||||
const int n_tasks = ggml_get_n_tasks(node, n_threads, 1);
|
||||
|
||||
max_tasks = MAX(max_tasks, n_tasks);
|
||||
|
||||
|
||||
@@ -337,24 +337,24 @@ extern "C" {
|
||||
struct ggml_object;
|
||||
struct ggml_context;
|
||||
|
||||
// NOTE: always add types at the end of the enum to keep backward compatibility
|
||||
enum ggml_type {
|
||||
GGML_TYPE_F32 = 0,
|
||||
GGML_TYPE_F16 = 1,
|
||||
GGML_TYPE_Q4_0 = 2,
|
||||
GGML_TYPE_Q4_1 = 3,
|
||||
GGML_TYPE_F32 = 0,
|
||||
GGML_TYPE_F16 = 1,
|
||||
GGML_TYPE_Q4_0 = 2,
|
||||
GGML_TYPE_Q4_1 = 3,
|
||||
// GGML_TYPE_Q4_2 = 4, support has been removed
|
||||
// GGML_TYPE_Q4_3 (5) support has been removed
|
||||
GGML_TYPE_Q5_0 = 6,
|
||||
GGML_TYPE_Q5_1 = 7,
|
||||
GGML_TYPE_Q8_0 = 8,
|
||||
GGML_TYPE_Q8_1 = 9,
|
||||
// k-quantizations
|
||||
GGML_TYPE_Q2_K = 10,
|
||||
GGML_TYPE_Q3_K = 11,
|
||||
GGML_TYPE_Q4_K = 12,
|
||||
GGML_TYPE_Q5_K = 13,
|
||||
GGML_TYPE_Q6_K = 14,
|
||||
GGML_TYPE_Q8_K = 15,
|
||||
// GGML_TYPE_Q4_3 = 5, support has been removed
|
||||
GGML_TYPE_Q5_0 = 6,
|
||||
GGML_TYPE_Q5_1 = 7,
|
||||
GGML_TYPE_Q8_0 = 8,
|
||||
GGML_TYPE_Q8_1 = 9,
|
||||
GGML_TYPE_Q2_K = 10,
|
||||
GGML_TYPE_Q3_K = 11,
|
||||
GGML_TYPE_Q4_K = 12,
|
||||
GGML_TYPE_Q5_K = 13,
|
||||
GGML_TYPE_Q6_K = 14,
|
||||
GGML_TYPE_Q8_K = 15,
|
||||
GGML_TYPE_IQ2_XXS = 16,
|
||||
GGML_TYPE_IQ2_XS = 17,
|
||||
GGML_TYPE_IQ3_XXS = 18,
|
||||
@@ -363,9 +363,11 @@ extern "C" {
|
||||
GGML_TYPE_IQ3_S = 21,
|
||||
GGML_TYPE_IQ2_S = 22,
|
||||
GGML_TYPE_IQ4_XS = 23,
|
||||
GGML_TYPE_I8,
|
||||
GGML_TYPE_I16,
|
||||
GGML_TYPE_I32,
|
||||
GGML_TYPE_I8 = 24,
|
||||
GGML_TYPE_I16 = 25,
|
||||
GGML_TYPE_I32 = 26,
|
||||
GGML_TYPE_I64 = 27,
|
||||
GGML_TYPE_F64 = 28,
|
||||
GGML_TYPE_COUNT,
|
||||
};
|
||||
|
||||
@@ -383,20 +385,20 @@ extern "C" {
|
||||
|
||||
// model file types
|
||||
enum ggml_ftype {
|
||||
GGML_FTYPE_UNKNOWN = -1,
|
||||
GGML_FTYPE_ALL_F32 = 0,
|
||||
GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
GGML_FTYPE_UNKNOWN = -1,
|
||||
GGML_FTYPE_ALL_F32 = 0,
|
||||
GGML_FTYPE_MOSTLY_F16 = 1, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_0 = 2, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_1 = 3, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_1_SOME_F16 = 4, // tok_embeddings.weight and output.weight are F16
|
||||
GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q8_0 = 7, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_0 = 8, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_1 = 9, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q2_K = 10, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q3_K = 11, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_K = 12, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q5_K = 13, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q6_K = 14, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_IQ2_XXS = 15, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_IQ2_XS = 16, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_IQ3_XXS = 17, // except 1d tensors
|
||||
|
||||
@@ -32,6 +32,7 @@ class Keys:
|
||||
FILE_TYPE = "general.file_type"
|
||||
|
||||
class LLM:
|
||||
VOCAB_SIZE = "{arch}.vocab_size"
|
||||
CONTEXT_LENGTH = "{arch}.context_length"
|
||||
EMBEDDING_LENGTH = "{arch}.embedding_length"
|
||||
BLOCK_COUNT = "{arch}.block_count"
|
||||
@@ -661,6 +662,11 @@ class GGMLQuantizationType(IntEnum):
|
||||
IQ3_S = 21
|
||||
IQ2_S = 22
|
||||
IQ4_XS = 23
|
||||
I8 = 24
|
||||
I16 = 25
|
||||
I32 = 26
|
||||
I64 = 27
|
||||
F64 = 28
|
||||
|
||||
|
||||
class GGUFEndian(IntEnum):
|
||||
@@ -727,6 +733,11 @@ GGML_QUANT_SIZES = {
|
||||
GGMLQuantizationType.IQ3_S: (256, 2 + QK_K // 4 + QK_K // 8 + QK_K // 32 + 4),
|
||||
GGMLQuantizationType.IQ2_S: (256, 2 + QK_K // 4 + QK_K // 16),
|
||||
GGMLQuantizationType.IQ4_XS: (256, 2 + 2 + QK_K // 2 + QK_K // 64),
|
||||
GGMLQuantizationType.I8: (1, 1),
|
||||
GGMLQuantizationType.I16: (1, 2),
|
||||
GGMLQuantizationType.I32: (1, 4),
|
||||
GGMLQuantizationType.I64: (1, 8),
|
||||
GGMLQuantizationType.F64: (1, 8),
|
||||
}
|
||||
|
||||
|
||||
@@ -746,6 +757,7 @@ KEY_GENERAL_SOURCE_HF_REPO = Keys.General.SOURCE_HF_REPO
|
||||
KEY_GENERAL_FILE_TYPE = Keys.General.FILE_TYPE
|
||||
|
||||
# LLM
|
||||
KEY_VOCAB_SIZE = Keys.LLM.VOCAB_SIZE
|
||||
KEY_CONTEXT_LENGTH = Keys.LLM.CONTEXT_LENGTH
|
||||
KEY_EMBEDDING_LENGTH = Keys.LLM.EMBEDDING_LENGTH
|
||||
KEY_BLOCK_COUNT = Keys.LLM.BLOCK_COUNT
|
||||
|
||||
@@ -242,12 +242,27 @@ class GGUFReader:
|
||||
n_bytes = n_elems * type_size // block_size
|
||||
data_offs = int(start_offs + offset_tensor[0])
|
||||
item_type: npt.DTypeLike
|
||||
if ggml_type == GGMLQuantizationType.F32:
|
||||
item_count = n_elems
|
||||
item_type = np.float32
|
||||
elif ggml_type == GGMLQuantizationType.F16:
|
||||
if ggml_type == GGMLQuantizationType.F16:
|
||||
item_count = n_elems
|
||||
item_type = np.float16
|
||||
elif ggml_type == GGMLQuantizationType.F32:
|
||||
item_count = n_elems
|
||||
item_type = np.float32
|
||||
elif ggml_type == GGMLQuantizationType.F64:
|
||||
item_count = n_elems
|
||||
item_type = np.float64
|
||||
elif ggml_type == GGMLQuantizationType.I8:
|
||||
item_count = n_elems
|
||||
item_type = np.int8
|
||||
elif ggml_type == GGMLQuantizationType.I16:
|
||||
item_count = n_elems
|
||||
item_type = np.int16
|
||||
elif ggml_type == GGMLQuantizationType.I32:
|
||||
item_count = n_elems
|
||||
item_type = np.int32
|
||||
elif ggml_type == GGMLQuantizationType.I64:
|
||||
item_count = n_elems
|
||||
item_type = np.int64
|
||||
else:
|
||||
item_count = n_bytes
|
||||
item_type = np.uint8
|
||||
|
||||
@@ -196,9 +196,6 @@ class GGUFWriter:
|
||||
if self.state is not WriterState.EMPTY:
|
||||
raise ValueError(f'Expected output file to be empty, got {self.state}')
|
||||
|
||||
if raw_dtype is None and tensor_dtype not in (np.float32, np.float16):
|
||||
raise ValueError("Only F32 and F16 tensors are supported for now")
|
||||
|
||||
encoded_name = name.encode("utf8")
|
||||
self.ti_data += self._pack("Q", len(encoded_name))
|
||||
self.ti_data += encoded_name
|
||||
@@ -207,7 +204,22 @@ class GGUFWriter:
|
||||
for i in range(n_dims):
|
||||
self.ti_data += self._pack("Q", tensor_shape[n_dims - 1 - i])
|
||||
if raw_dtype is None:
|
||||
dtype = GGMLQuantizationType.F32 if tensor_dtype == np.float32 else GGMLQuantizationType.F16
|
||||
if tensor_dtype == np.float16:
|
||||
dtype = GGMLQuantizationType.F16
|
||||
elif tensor_dtype == np.float32:
|
||||
dtype = GGMLQuantizationType.F32
|
||||
elif tensor_dtype == np.float64:
|
||||
dtype = GGMLQuantizationType.F64
|
||||
elif tensor_dtype == np.int8:
|
||||
dtype = GGMLQuantizationType.I8
|
||||
elif tensor_dtype == np.int16:
|
||||
dtype = GGMLQuantizationType.I16
|
||||
elif tensor_dtype == np.int32:
|
||||
dtype = GGMLQuantizationType.I32
|
||||
elif tensor_dtype == np.int64:
|
||||
dtype = GGMLQuantizationType.I64
|
||||
else:
|
||||
raise ValueError("Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now")
|
||||
else:
|
||||
dtype = raw_dtype
|
||||
self.ti_data += self._pack("I", dtype)
|
||||
@@ -313,6 +325,9 @@ class GGUFWriter:
|
||||
self.data_alignment = alignment
|
||||
self.add_uint32(Keys.General.ALIGNMENT, alignment)
|
||||
|
||||
def add_vocab_size(self, size: int) -> None:
|
||||
self.add_uint32(Keys.LLM.VOCAB_SIZE.format(arch=self.arch), size)
|
||||
|
||||
def add_context_length(self, length: int) -> None:
|
||||
self.add_uint32(Keys.LLM.CONTEXT_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "gguf"
|
||||
version = "0.7.0"
|
||||
version = "0.8.0"
|
||||
description = "Read and write ML models in GGUF for GGML"
|
||||
authors = ["GGML <ggml@ggml.ai>"]
|
||||
packages = [
|
||||
|
||||
@@ -59,9 +59,10 @@ extern "C" {
|
||||
typedef int32_t llama_seq_id;
|
||||
|
||||
enum llama_vocab_type {
|
||||
LLAMA_VOCAB_TYPE_SPM = 0, // SentencePiece
|
||||
LLAMA_VOCAB_TYPE_BPE = 1, // Byte Pair Encoding
|
||||
LLAMA_VOCAB_TYPE_WPM = 2, // WordPiece
|
||||
LLAMA_VOCAB_TYPE_NONE = 0, // For models without vocab
|
||||
LLAMA_VOCAB_TYPE_SPM = 1, // SentencePiece
|
||||
LLAMA_VOCAB_TYPE_BPE = 2, // Byte Pair Encoding
|
||||
LLAMA_VOCAB_TYPE_WPM = 3, // WordPiece
|
||||
};
|
||||
|
||||
// note: these values should be synchronized with ggml_rope
|
||||
@@ -234,8 +235,9 @@ extern "C" {
|
||||
struct llama_context_params {
|
||||
uint32_t seed; // RNG seed, -1 for random
|
||||
uint32_t n_ctx; // text context, 0 = from model
|
||||
uint32_t n_batch; // prompt processing maximum batch size
|
||||
uint32_t n_parallel; // number of parallel sequences (i.e. distinct states for recurrent models)
|
||||
uint32_t n_batch; // logical maximum batch size that can be submitted to llama_decode
|
||||
uint32_t n_ubatch; // physical maximum batch size
|
||||
uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
|
||||
uint32_t n_threads; // number of threads to use for generation
|
||||
uint32_t n_threads_batch; // number of threads to use for batch processing
|
||||
|
||||
@@ -377,7 +379,8 @@ extern "C" {
|
||||
|
||||
LLAMA_API uint32_t llama_n_ctx (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_batch (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_max_seq (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_ubatch (const struct llama_context * ctx);
|
||||
LLAMA_API uint32_t llama_n_seq_max (const struct llama_context * ctx);
|
||||
|
||||
LLAMA_API enum llama_vocab_type llama_vocab_type(const struct llama_model * model);
|
||||
LLAMA_API enum llama_rope_type llama_rope_type (const struct llama_model * model);
|
||||
@@ -456,7 +459,7 @@ extern "C" {
|
||||
// Maximum number of sequences that can exist in a cell. It's not an error
|
||||
// if there are more sequences in a cell than this value, however they will
|
||||
// not be visible in the view cells_sequences.
|
||||
int32_t n_max_seq;
|
||||
int32_t n_seq_max;
|
||||
|
||||
// Number of tokens in the cache. For example, if there are two populated
|
||||
// cells, the first with 1 sequence id in it and the second with 2 sequence
|
||||
@@ -476,12 +479,12 @@ extern "C" {
|
||||
// Information for an individual cell.
|
||||
struct llama_kv_cache_view_cell * cells;
|
||||
|
||||
// The sequences for each cell. There will be n_max_seq items per cell.
|
||||
// The sequences for each cell. There will be n_seq_max items per cell.
|
||||
llama_seq_id * cells_sequences;
|
||||
};
|
||||
|
||||
// Create an empty KV cache view. (use only for debugging purposes)
|
||||
LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_max_seq);
|
||||
LLAMA_API struct llama_kv_cache_view llama_kv_cache_view_init(const struct llama_context * ctx, int32_t n_seq_max);
|
||||
|
||||
// Free a KV cache view. (use only for debugging purposes)
|
||||
LLAMA_API void llama_kv_cache_view_free(struct llama_kv_cache_view * view);
|
||||
@@ -650,6 +653,11 @@ extern "C" {
|
||||
// Set abort callback
|
||||
LLAMA_API void llama_set_abort_callback(struct llama_context * ctx, ggml_abort_callback abort_callback, void * abort_callback_data);
|
||||
|
||||
// Wait until all computations are finished
|
||||
// This is automatically done when using one of the functions below to obtain the computation results
|
||||
// and is not necessary to call it explicitly in most cases
|
||||
LLAMA_API void llama_synchronize(struct llama_context * ctx);
|
||||
|
||||
// Token logits obtained from the last call to llama_decode()
|
||||
// The logits for the last token are stored in the last row
|
||||
// Logits for which llama_batch.logits[i] == 0 are undefined
|
||||
@@ -708,7 +716,7 @@ extern "C" {
|
||||
|
||||
/// @details Convert the provided text into tokens.
|
||||
/// @param tokens The tokens pointer must be large enough to hold the resulting tokens.
|
||||
/// @return Returns the number of tokens on success, no more than n_max_tokens
|
||||
/// @return Returns the number of tokens on success, no more than n_tokens_max
|
||||
/// @return Returns a negative number on failure - the number of tokens that would have been returned
|
||||
/// @param special Allow tokenizing special and/or control tokens which otherwise are not exposed and treated as plaintext.
|
||||
/// Does not insert a leading space.
|
||||
@@ -717,7 +725,7 @@ extern "C" {
|
||||
const char * text,
|
||||
int32_t text_len,
|
||||
llama_token * tokens,
|
||||
int32_t n_max_tokens,
|
||||
int32_t n_tokens_max,
|
||||
bool add_bos,
|
||||
bool special);
|
||||
|
||||
|
||||
@@ -2222,8 +2222,8 @@ static void usage(char ** argv) {
|
||||
|
||||
int main(int argc, char ** argv) {
|
||||
test_mode mode = MODE_TEST;
|
||||
const char * op_name = NULL;
|
||||
const char * backend = NULL;
|
||||
const char * op_name_filter = NULL;
|
||||
const char * backend_filter = NULL;
|
||||
|
||||
for (int i = 1; i < argc; i++) {
|
||||
if (strcmp(argv[i], "test") == 0) {
|
||||
@@ -2232,14 +2232,14 @@ int main(int argc, char ** argv) {
|
||||
mode = MODE_PERF;
|
||||
} else if (strcmp(argv[i], "-o") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
op_name = argv[++i];
|
||||
op_name_filter = argv[++i];
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
}
|
||||
} else if (strcmp(argv[i], "-b") == 0) {
|
||||
if (i + 1 < argc) {
|
||||
backend = argv[++i];
|
||||
backend_filter = argv[++i];
|
||||
} else {
|
||||
usage(argv);
|
||||
return 1;
|
||||
@@ -2258,7 +2258,7 @@ int main(int argc, char ** argv) {
|
||||
for (size_t i = 0; i < ggml_backend_reg_get_count(); i++) {
|
||||
printf("Backend %zu/%zu (%s)\n", i + 1, ggml_backend_reg_get_count(), ggml_backend_reg_get_name(i));
|
||||
|
||||
if (backend != NULL && strcmp(backend, ggml_backend_reg_get_name(i)) != 0) {
|
||||
if (backend_filter != NULL && strcmp(backend_filter, ggml_backend_reg_get_name(i)) != 0) {
|
||||
printf(" Skipping\n");
|
||||
n_ok++;
|
||||
continue;
|
||||
@@ -2266,9 +2266,17 @@ int main(int argc, char ** argv) {
|
||||
|
||||
ggml_backend_t backend = ggml_backend_reg_init_backend(i, NULL);
|
||||
GGML_ASSERT(backend != NULL);
|
||||
|
||||
if (backend_filter == NULL && ggml_backend_is_cpu(backend)) {
|
||||
printf(" Skipping CPU backend\n");
|
||||
ggml_backend_free(backend);
|
||||
n_ok++;
|
||||
continue;
|
||||
}
|
||||
|
||||
printf(" Backend name: %s\n", ggml_backend_name(backend));
|
||||
|
||||
bool ok = test_backend(backend, mode, op_name);
|
||||
bool ok = test_backend(backend, mode, op_name_filter);
|
||||
|
||||
printf(" Backend %s: ", ggml_backend_name(backend));
|
||||
if (ok) {
|
||||
|
||||
@@ -31,6 +31,8 @@ int main(void) {
|
||||
"{% for message in messages %}{{bos_token + message['role'] + '\\n' + message['content'] + eos_token + '\\n'}}{% endfor %}{% if add_generation_prompt %}{{ bos_token + 'assistant\\n' }}{% endif %}",
|
||||
// google/gemma-7b-it
|
||||
"{% if messages[0]['role'] == 'system' %}{{ raise_exception('System role not supported') }}{% endif %}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if (message['role'] == 'assistant') %}{% set role = 'model' %}{% else %}{% set role = message['role'] %}{% endif %}{{ '<start_of_turn>' + role + '\\n' + message['content'] | trim + '<end_of_turn>\\n' }}{% endfor %}{% if add_generation_prompt %}{{'<start_of_turn>model\\n'}}{% endif %}",
|
||||
// OrionStarAI/Orion-14B-Chat
|
||||
"{% for message in messages %}{% if loop.first %}{{ bos_token }}{% endif %}{% if message['role'] == 'user' %}{{ 'Human: ' + message['content'] + '\\n\\nAssistant: ' + eos_token }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token }}{% endif %}{% endfor %}",
|
||||
};
|
||||
std::vector<std::string> expected_output = {
|
||||
// teknium/OpenHermes-2.5-Mistral-7B
|
||||
@@ -45,6 +47,8 @@ int main(void) {
|
||||
"system\nYou are a helpful assistant</s>\n<s>user\nHello</s>\n<s>assistant\nHi there</s>\n<s>user\nWho are you</s>\n<s>assistant\n I am an assistant </s>\n<s>user\nAnother question</s>\n<s>assistant\n",
|
||||
// google/gemma-7b-it
|
||||
"<start_of_turn>user\nYou are a helpful assistant\n\nHello<end_of_turn>\n<start_of_turn>model\nHi there<end_of_turn>\n<start_of_turn>user\nWho are you<end_of_turn>\n<start_of_turn>model\nI am an assistant<end_of_turn>\n<start_of_turn>user\nAnother question<end_of_turn>\n<start_of_turn>model\n",
|
||||
// OrionStarAI/Orion-14B-Chat
|
||||
"Human: You are a helpful assistant\n\nHello\n\nAssistant: </s>Hi there</s>Human: Who are you\n\nAssistant: </s> I am an assistant </s>Human: Another question\n\nAssistant: </s>",
|
||||
};
|
||||
std::vector<char> formatted_chat(1024);
|
||||
int32_t res;
|
||||
|
||||
@@ -64,7 +64,7 @@ int main(int argc, char **argv) {
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = llama_detokenize_bpe(ctx, std::vector<int>(1, i));
|
||||
try {
|
||||
auto cps = codepoints_from_utf8(str);
|
||||
auto cps = unicode_cpts_from_utf8(str);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||
std::string check = llama_detokenize_bpe(ctx, tokens);
|
||||
if (check != str) {
|
||||
@@ -97,7 +97,7 @@ int main(int argc, char **argv) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string str = codepoint_to_utf8(cp);
|
||||
std::string str = unicode_cpt_to_utf8(cp);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||
std::string check = llama_detokenize_bpe(ctx, tokens);
|
||||
if (cp != 9601 && str != check) {
|
||||
|
||||
@@ -85,7 +85,7 @@ int main(int argc, char **argv) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::string str = codepoint_to_utf8(cp);
|
||||
std::string str = unicode_cpt_to_utf8(cp);
|
||||
std::vector<llama_token> tokens = llama_tokenize(ctx, str, false);
|
||||
std::string check = llama_detokenize_spm(ctx, tokens);
|
||||
if (cp != 9601 && str != check) {
|
||||
|
||||
+1672
File diff suppressed because it is too large
Load Diff
@@ -1,784 +1,26 @@
|
||||
#pragma once
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <map>
|
||||
#include <stdexcept>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> digit_ranges = {
|
||||
{0x30, 0x39}, {0xB2, 0xB3}, {0xB9, 0xB9}, {0x660, 0x669}, {0x6F0, 0x6F9}, {0x7C0, 0x7C9}, {0x966, 0x96F}, {0x9E6, 0x9EF}, {0xA66, 0xA6F}, {0xAE6, 0xAEF}, {0xB66, 0xB6F}, {0xBE6, 0xBEF}, {0xC66, 0xC6F},
|
||||
{0xCE6, 0xCEF}, {0xD66, 0xD6F}, {0xDE6, 0xDEF}, {0xE50, 0xE59}, {0xED0, 0xED9}, {0xF20, 0xF29}, {0x1040, 0x1049}, {0x1090, 0x1099}, {0x1369, 0x1371}, {0x17E0, 0x17E9}, {0x1810, 0x1819}, {0x1946, 0x194F},
|
||||
{0x19D0, 0x19DA}, {0x1A80, 0x1A89}, {0x1A90, 0x1A99}, {0x1B50, 0x1B59}, {0x1BB0, 0x1BB9}, {0x1C40, 0x1C49}, {0x1C50, 0x1C59}, {0x2070, 0x2070}, {0x2074, 0x2079}, {0x2080, 0x2089}, {0x2460, 0x2468},
|
||||
{0x2474, 0x247C}, {0x2488, 0x2490}, {0x24EA, 0x24EA}, {0x24F5, 0x24FD}, {0x24FF, 0x24FF}, {0x2776, 0x277E}, {0x2780, 0x2788}, {0x278A, 0x2792}, {0xA620, 0xA629}, {0xA8D0, 0xA8D9}, {0xA900, 0xA909},
|
||||
{0xA9D0, 0xA9D9}, {0xA9F0, 0xA9F9}, {0xAA50, 0xAA59}, {0xABF0, 0xABF9}, {0xFF10, 0xFF19}, {0x104A0, 0x104A9}, {0x10A40, 0x10A43}, {0x10D30, 0x10D39}, {0x10E60, 0x10E68}, {0x11052, 0x1105A},
|
||||
{0x11066, 0x1106F}, {0x110F0, 0x110F9}, {0x11136, 0x1113F}, {0x111D0, 0x111D9}, {0x112F0, 0x112F9}, {0x11450, 0x11459}, {0x114D0, 0x114D9}, {0x11650, 0x11659}, {0x116C0, 0x116C9}, {0x11730, 0x11739},
|
||||
{0x118E0, 0x118E9}, {0x11950, 0x11959}, {0x11C50, 0x11C59}, {0x11D50, 0x11D59}, {0x11DA0, 0x11DA9}, {0x16A60, 0x16A69}, {0x16B50, 0x16B59}, {0x1D7CE, 0x1D7FF}, {0x1E140, 0x1E149}, {0x1E2F0, 0x1E2F9},
|
||||
{0x1E950, 0x1E959}, {0x1F100, 0x1F10A}, {0x1FBF0, 0x1FBF9},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> letter_ranges = {
|
||||
{0x41, 0x5A}, {0x61, 0x7A}, {0xAA, 0xAA}, {0xB5, 0xB5}, {0xBA, 0xBA}, {0xC0, 0xD6}, {0xD8, 0xF6}, {0xF8, 0x2C1}, {0x2C6, 0x2D1}, {0x2E0, 0x2E4}, {0x2EC, 0x2EC}, {0x2EE, 0x2EE}, {0x370, 0x374},
|
||||
{0x376, 0x377}, {0x37A, 0x37D}, {0x37F, 0x37F}, {0x386, 0x386}, {0x388, 0x38A}, {0x38C, 0x38C}, {0x38E, 0x3A1}, {0x3A3, 0x3F5}, {0x3F7, 0x481}, {0x48A, 0x52F}, {0x531, 0x556}, {0x559, 0x559},
|
||||
{0x560, 0x588}, {0x5D0, 0x5EA}, {0x5EF, 0x5F2}, {0x620, 0x64A}, {0x66E, 0x66F}, {0x671, 0x6D3}, {0x6D5, 0x6D5}, {0x6E5, 0x6E6}, {0x6EE, 0x6EF}, {0x6FA, 0x6FC}, {0x6FF, 0x6FF}, {0x710, 0x710},
|
||||
{0x712, 0x72F}, {0x74D, 0x7A5}, {0x7B1, 0x7B1}, {0x7CA, 0x7EA}, {0x7F4, 0x7F5}, {0x7FA, 0x7FA}, {0x800, 0x815}, {0x81A, 0x81A}, {0x824, 0x824}, {0x828, 0x828}, {0x840, 0x858}, {0x860, 0x86A},
|
||||
{0x8A0, 0x8B4}, {0x8B6, 0x8C7}, {0x904, 0x939}, {0x93D, 0x93D}, {0x950, 0x950}, {0x958, 0x961}, {0x971, 0x980}, {0x985, 0x98C}, {0x98F, 0x990}, {0x993, 0x9A8}, {0x9AA, 0x9B0}, {0x9B2, 0x9B2},
|
||||
{0x9B6, 0x9B9}, {0x9BD, 0x9BD}, {0x9CE, 0x9CE}, {0x9DC, 0x9DD}, {0x9DF, 0x9E1}, {0x9F0, 0x9F1}, {0x9FC, 0x9FC}, {0xA05, 0xA0A}, {0xA0F, 0xA10}, {0xA13, 0xA28}, {0xA2A, 0xA30}, {0xA32, 0xA33},
|
||||
{0xA35, 0xA36}, {0xA38, 0xA39}, {0xA59, 0xA5C}, {0xA5E, 0xA5E}, {0xA72, 0xA74}, {0xA85, 0xA8D}, {0xA8F, 0xA91}, {0xA93, 0xAA8}, {0xAAA, 0xAB0}, {0xAB2, 0xAB3}, {0xAB5, 0xAB9}, {0xABD, 0xABD},
|
||||
{0xAD0, 0xAD0}, {0xAE0, 0xAE1}, {0xAF9, 0xAF9}, {0xB05, 0xB0C}, {0xB0F, 0xB10}, {0xB13, 0xB28}, {0xB2A, 0xB30}, {0xB32, 0xB33}, {0xB35, 0xB39}, {0xB3D, 0xB3D}, {0xB5C, 0xB5D}, {0xB5F, 0xB61},
|
||||
{0xB71, 0xB71}, {0xB83, 0xB83}, {0xB85, 0xB8A}, {0xB8E, 0xB90}, {0xB92, 0xB95}, {0xB99, 0xB9A}, {0xB9C, 0xB9C}, {0xB9E, 0xB9F}, {0xBA3, 0xBA4}, {0xBA8, 0xBAA}, {0xBAE, 0xBB9}, {0xBD0, 0xBD0},
|
||||
{0xC05, 0xC0C}, {0xC0E, 0xC10}, {0xC12, 0xC28}, {0xC2A, 0xC39}, {0xC3D, 0xC3D}, {0xC58, 0xC5A}, {0xC60, 0xC61}, {0xC80, 0xC80}, {0xC85, 0xC8C}, {0xC8E, 0xC90}, {0xC92, 0xCA8}, {0xCAA, 0xCB3},
|
||||
{0xCB5, 0xCB9}, {0xCBD, 0xCBD}, {0xCDE, 0xCDE}, {0xCE0, 0xCE1}, {0xCF1, 0xCF2}, {0xD04, 0xD0C}, {0xD0E, 0xD10}, {0xD12, 0xD3A}, {0xD3D, 0xD3D}, {0xD4E, 0xD4E}, {0xD54, 0xD56}, {0xD5F, 0xD61},
|
||||
{0xD7A, 0xD7F}, {0xD85, 0xD96}, {0xD9A, 0xDB1}, {0xDB3, 0xDBB}, {0xDBD, 0xDBD}, {0xDC0, 0xDC6}, {0xE01, 0xE30}, {0xE32, 0xE33}, {0xE40, 0xE46}, {0xE81, 0xE82}, {0xE84, 0xE84}, {0xE86, 0xE8A},
|
||||
{0xE8C, 0xEA3}, {0xEA5, 0xEA5}, {0xEA7, 0xEB0}, {0xEB2, 0xEB3}, {0xEBD, 0xEBD}, {0xEC0, 0xEC4}, {0xEC6, 0xEC6}, {0xEDC, 0xEDF}, {0xF00, 0xF00}, {0xF40, 0xF47}, {0xF49, 0xF6C}, {0xF88, 0xF8C},
|
||||
{0x1000, 0x102A}, {0x103F, 0x103F}, {0x1050, 0x1055}, {0x105A, 0x105D}, {0x1061, 0x1061}, {0x1065, 0x1066}, {0x106E, 0x1070}, {0x1075, 0x1081}, {0x108E, 0x108E}, {0x10A0, 0x10C5}, {0x10C7, 0x10C7},
|
||||
{0x10CD, 0x10CD}, {0x10D0, 0x10FA}, {0x10FC, 0x1248}, {0x124A, 0x124D}, {0x1250, 0x1256}, {0x1258, 0x1258}, {0x125A, 0x125D}, {0x1260, 0x1288}, {0x128A, 0x128D}, {0x1290, 0x12B0}, {0x12B2, 0x12B5},
|
||||
{0x12B8, 0x12BE}, {0x12C0, 0x12C0}, {0x12C2, 0x12C5}, {0x12C8, 0x12D6}, {0x12D8, 0x1310}, {0x1312, 0x1315}, {0x1318, 0x135A}, {0x1380, 0x138F}, {0x13A0, 0x13F5}, {0x13F8, 0x13FD}, {0x1401, 0x166C},
|
||||
{0x166F, 0x167F}, {0x1681, 0x169A}, {0x16A0, 0x16EA}, {0x16F1, 0x16F8}, {0x1700, 0x170C}, {0x170E, 0x1711}, {0x1720, 0x1731}, {0x1740, 0x1751}, {0x1760, 0x176C}, {0x176E, 0x1770}, {0x1780, 0x17B3},
|
||||
{0x17D7, 0x17D7}, {0x17DC, 0x17DC}, {0x1820, 0x1878}, {0x1880, 0x1884}, {0x1887, 0x18A8}, {0x18AA, 0x18AA}, {0x18B0, 0x18F5}, {0x1900, 0x191E}, {0x1950, 0x196D}, {0x1970, 0x1974}, {0x1980, 0x19AB},
|
||||
{0x19B0, 0x19C9}, {0x1A00, 0x1A16}, {0x1A20, 0x1A54}, {0x1AA7, 0x1AA7}, {0x1B05, 0x1B33}, {0x1B45, 0x1B4B}, {0x1B83, 0x1BA0}, {0x1BAE, 0x1BAF}, {0x1BBA, 0x1BE5}, {0x1C00, 0x1C23}, {0x1C4D, 0x1C4F},
|
||||
{0x1C5A, 0x1C7D}, {0x1C80, 0x1C88}, {0x1C90, 0x1CBA}, {0x1CBD, 0x1CBF}, {0x1CE9, 0x1CEC}, {0x1CEE, 0x1CF3}, {0x1CF5, 0x1CF6}, {0x1CFA, 0x1CFA}, {0x1D00, 0x1DBF}, {0x1E00, 0x1F15}, {0x1F18, 0x1F1D},
|
||||
{0x1F20, 0x1F45}, {0x1F48, 0x1F4D}, {0x1F50, 0x1F57}, {0x1F59, 0x1F59}, {0x1F5B, 0x1F5B}, {0x1F5D, 0x1F5D}, {0x1F5F, 0x1F7D}, {0x1F80, 0x1FB4}, {0x1FB6, 0x1FBC}, {0x1FBE, 0x1FBE}, {0x1FC2, 0x1FC4},
|
||||
{0x1FC6, 0x1FCC}, {0x1FD0, 0x1FD3}, {0x1FD6, 0x1FDB}, {0x1FE0, 0x1FEC}, {0x1FF2, 0x1FF4}, {0x1FF6, 0x1FFC}, {0x2071, 0x2071}, {0x207F, 0x207F}, {0x2090, 0x209C}, {0x2102, 0x2102}, {0x2107, 0x2107},
|
||||
{0x210A, 0x2113}, {0x2115, 0x2115}, {0x2119, 0x211D}, {0x2124, 0x2124}, {0x2126, 0x2126}, {0x2128, 0x2128}, {0x212A, 0x212D}, {0x212F, 0x2139}, {0x213C, 0x213F}, {0x2145, 0x2149}, {0x214E, 0x214E},
|
||||
{0x2183, 0x2184}, {0x2C00, 0x2C2E}, {0x2C30, 0x2C5E}, {0x2C60, 0x2CE4}, {0x2CEB, 0x2CEE}, {0x2CF2, 0x2CF3}, {0x2D00, 0x2D25}, {0x2D27, 0x2D27}, {0x2D2D, 0x2D2D}, {0x2D30, 0x2D67}, {0x2D6F, 0x2D6F},
|
||||
{0x2D80, 0x2D96}, {0x2DA0, 0x2DA6}, {0x2DA8, 0x2DAE}, {0x2DB0, 0x2DB6}, {0x2DB8, 0x2DBE}, {0x2DC0, 0x2DC6}, {0x2DC8, 0x2DCE}, {0x2DD0, 0x2DD6}, {0x2DD8, 0x2DDE}, {0x2E2F, 0x2E2F}, {0x3005, 0x3006},
|
||||
{0x3031, 0x3035}, {0x303B, 0x303C}, {0x3041, 0x3096}, {0x309D, 0x309F}, {0x30A1, 0x30FA}, {0x30FC, 0x30FF}, {0x3105, 0x312F}, {0x3131, 0x318E}, {0x31A0, 0x31BF}, {0x31F0, 0x31FF}, {0x3400, 0x4DBF},
|
||||
{0x4E00, 0x9FFC}, {0xA000, 0xA48C}, {0xA4D0, 0xA4FD}, {0xA500, 0xA60C}, {0xA610, 0xA61F}, {0xA62A, 0xA62B}, {0xA640, 0xA66E}, {0xA67F, 0xA69D}, {0xA6A0, 0xA6E5}, {0xA717, 0xA71F}, {0xA722, 0xA788},
|
||||
{0xA78B, 0xA7BF}, {0xA7C2, 0xA7CA}, {0xA7F5, 0xA801}, {0xA803, 0xA805}, {0xA807, 0xA80A}, {0xA80C, 0xA822}, {0xA840, 0xA873}, {0xA882, 0xA8B3}, {0xA8F2, 0xA8F7}, {0xA8FB, 0xA8FB}, {0xA8FD, 0xA8FE},
|
||||
{0xA90A, 0xA925}, {0xA930, 0xA946}, {0xA960, 0xA97C}, {0xA984, 0xA9B2}, {0xA9CF, 0xA9CF}, {0xA9E0, 0xA9E4}, {0xA9E6, 0xA9EF}, {0xA9FA, 0xA9FE}, {0xAA00, 0xAA28}, {0xAA40, 0xAA42}, {0xAA44, 0xAA4B},
|
||||
{0xAA60, 0xAA76}, {0xAA7A, 0xAA7A}, {0xAA7E, 0xAAAF}, {0xAAB1, 0xAAB1}, {0xAAB5, 0xAAB6}, {0xAAB9, 0xAABD}, {0xAAC0, 0xAAC0}, {0xAAC2, 0xAAC2}, {0xAADB, 0xAADD}, {0xAAE0, 0xAAEA}, {0xAAF2, 0xAAF4},
|
||||
{0xAB01, 0xAB06}, {0xAB09, 0xAB0E}, {0xAB11, 0xAB16}, {0xAB20, 0xAB26}, {0xAB28, 0xAB2E}, {0xAB30, 0xAB5A}, {0xAB5C, 0xAB69}, {0xAB70, 0xABE2}, {0xAC00, 0xD7A3}, {0xD7B0, 0xD7C6}, {0xD7CB, 0xD7FB},
|
||||
{0xF900, 0xFA6D}, {0xFA70, 0xFAD9}, {0xFB00, 0xFB06}, {0xFB13, 0xFB17}, {0xFB1D, 0xFB1D}, {0xFB1F, 0xFB28}, {0xFB2A, 0xFB36}, {0xFB38, 0xFB3C}, {0xFB3E, 0xFB3E}, {0xFB40, 0xFB41}, {0xFB43, 0xFB44},
|
||||
{0xFB46, 0xFBB1}, {0xFBD3, 0xFD3D}, {0xFD50, 0xFD8F}, {0xFD92, 0xFDC7}, {0xFDF0, 0xFDFB}, {0xFE70, 0xFE74}, {0xFE76, 0xFEFC}, {0xFF21, 0xFF3A}, {0xFF41, 0xFF5A}, {0xFF66, 0xFFBE}, {0xFFC2, 0xFFC7},
|
||||
{0xFFCA, 0xFFCF}, {0xFFD2, 0xFFD7}, {0xFFDA, 0xFFDC}, {0x10000, 0x1000B}, {0x1000D, 0x10026}, {0x10028, 0x1003A}, {0x1003C, 0x1003D}, {0x1003F, 0x1004D}, {0x10050, 0x1005D}, {0x10080, 0x100FA},
|
||||
{0x10280, 0x1029C}, {0x102A0, 0x102D0}, {0x10300, 0x1031F}, {0x1032D, 0x10340}, {0x10342, 0x10349}, {0x10350, 0x10375}, {0x10380, 0x1039D}, {0x103A0, 0x103C3}, {0x103C8, 0x103CF}, {0x10400, 0x1049D},
|
||||
{0x104B0, 0x104D3}, {0x104D8, 0x104FB}, {0x10500, 0x10527}, {0x10530, 0x10563}, {0x10600, 0x10736}, {0x10740, 0x10755}, {0x10760, 0x10767}, {0x10800, 0x10805}, {0x10808, 0x10808}, {0x1080A, 0x10835},
|
||||
{0x10837, 0x10838}, {0x1083C, 0x1083C}, {0x1083F, 0x10855}, {0x10860, 0x10876}, {0x10880, 0x1089E}, {0x108E0, 0x108F2}, {0x108F4, 0x108F5}, {0x10900, 0x10915}, {0x10920, 0x10939}, {0x10980, 0x109B7},
|
||||
{0x109BE, 0x109BF}, {0x10A00, 0x10A00}, {0x10A10, 0x10A13}, {0x10A15, 0x10A17}, {0x10A19, 0x10A35}, {0x10A60, 0x10A7C}, {0x10A80, 0x10A9C}, {0x10AC0, 0x10AC7}, {0x10AC9, 0x10AE4}, {0x10B00, 0x10B35},
|
||||
{0x10B40, 0x10B55}, {0x10B60, 0x10B72}, {0x10B80, 0x10B91}, {0x10C00, 0x10C48}, {0x10C80, 0x10CB2}, {0x10CC0, 0x10CF2}, {0x10D00, 0x10D23}, {0x10E80, 0x10EA9}, {0x10EB0, 0x10EB1}, {0x10F00, 0x10F1C},
|
||||
{0x10F27, 0x10F27}, {0x10F30, 0x10F45}, {0x10FB0, 0x10FC4}, {0x10FE0, 0x10FF6}, {0x11003, 0x11037}, {0x11083, 0x110AF}, {0x110D0, 0x110E8}, {0x11103, 0x11126}, {0x11144, 0x11144}, {0x11147, 0x11147},
|
||||
{0x11150, 0x11172}, {0x11176, 0x11176}, {0x11183, 0x111B2}, {0x111C1, 0x111C4}, {0x111DA, 0x111DA}, {0x111DC, 0x111DC}, {0x11200, 0x11211}, {0x11213, 0x1122B}, {0x11280, 0x11286}, {0x11288, 0x11288},
|
||||
{0x1128A, 0x1128D}, {0x1128F, 0x1129D}, {0x1129F, 0x112A8}, {0x112B0, 0x112DE}, {0x11305, 0x1130C}, {0x1130F, 0x11310}, {0x11313, 0x11328}, {0x1132A, 0x11330}, {0x11332, 0x11333}, {0x11335, 0x11339},
|
||||
{0x1133D, 0x1133D}, {0x11350, 0x11350}, {0x1135D, 0x11361}, {0x11400, 0x11434}, {0x11447, 0x1144A}, {0x1145F, 0x11461}, {0x11480, 0x114AF}, {0x114C4, 0x114C5}, {0x114C7, 0x114C7}, {0x11580, 0x115AE},
|
||||
{0x115D8, 0x115DB}, {0x11600, 0x1162F}, {0x11644, 0x11644}, {0x11680, 0x116AA}, {0x116B8, 0x116B8}, {0x11700, 0x1171A}, {0x11800, 0x1182B}, {0x118A0, 0x118DF}, {0x118FF, 0x11906}, {0x11909, 0x11909},
|
||||
{0x1190C, 0x11913}, {0x11915, 0x11916}, {0x11918, 0x1192F}, {0x1193F, 0x1193F}, {0x11941, 0x11941}, {0x119A0, 0x119A7}, {0x119AA, 0x119D0}, {0x119E1, 0x119E1}, {0x119E3, 0x119E3}, {0x11A00, 0x11A00},
|
||||
{0x11A0B, 0x11A32}, {0x11A3A, 0x11A3A}, {0x11A50, 0x11A50}, {0x11A5C, 0x11A89}, {0x11A9D, 0x11A9D}, {0x11AC0, 0x11AF8}, {0x11C00, 0x11C08}, {0x11C0A, 0x11C2E}, {0x11C40, 0x11C40}, {0x11C72, 0x11C8F},
|
||||
{0x11D00, 0x11D06}, {0x11D08, 0x11D09}, {0x11D0B, 0x11D30}, {0x11D46, 0x11D46}, {0x11D60, 0x11D65}, {0x11D67, 0x11D68}, {0x11D6A, 0x11D89}, {0x11D98, 0x11D98}, {0x11EE0, 0x11EF2}, {0x11FB0, 0x11FB0},
|
||||
{0x12000, 0x12399}, {0x12480, 0x12543}, {0x13000, 0x1342E}, {0x14400, 0x14646}, {0x16800, 0x16A38}, {0x16A40, 0x16A5E}, {0x16AD0, 0x16AED}, {0x16B00, 0x16B2F}, {0x16B40, 0x16B43}, {0x16B63, 0x16B77},
|
||||
{0x16B7D, 0x16B8F}, {0x16E40, 0x16E7F}, {0x16F00, 0x16F4A}, {0x16F50, 0x16F50}, {0x16F93, 0x16F9F}, {0x16FE0, 0x16FE1}, {0x16FE3, 0x16FE3}, {0x17000, 0x187F7}, {0x18800, 0x18CD5}, {0x18D00, 0x18D08},
|
||||
{0x1B000, 0x1B11E}, {0x1B150, 0x1B152}, {0x1B164, 0x1B167}, {0x1B170, 0x1B2FB}, {0x1BC00, 0x1BC6A}, {0x1BC70, 0x1BC7C}, {0x1BC80, 0x1BC88}, {0x1BC90, 0x1BC99}, {0x1D400, 0x1D454}, {0x1D456, 0x1D49C},
|
||||
{0x1D49E, 0x1D49F}, {0x1D4A2, 0x1D4A2}, {0x1D4A5, 0x1D4A6}, {0x1D4A9, 0x1D4AC}, {0x1D4AE, 0x1D4B9}, {0x1D4BB, 0x1D4BB}, {0x1D4BD, 0x1D4C3}, {0x1D4C5, 0x1D505}, {0x1D507, 0x1D50A}, {0x1D50D, 0x1D514},
|
||||
{0x1D516, 0x1D51C}, {0x1D51E, 0x1D539}, {0x1D53B, 0x1D53E}, {0x1D540, 0x1D544}, {0x1D546, 0x1D546}, {0x1D54A, 0x1D550}, {0x1D552, 0x1D6A5}, {0x1D6A8, 0x1D6C0}, {0x1D6C2, 0x1D6DA}, {0x1D6DC, 0x1D6FA},
|
||||
{0x1D6FC, 0x1D714}, {0x1D716, 0x1D734}, {0x1D736, 0x1D74E}, {0x1D750, 0x1D76E}, {0x1D770, 0x1D788}, {0x1D78A, 0x1D7A8}, {0x1D7AA, 0x1D7C2}, {0x1D7C4, 0x1D7CB}, {0x1E100, 0x1E12C}, {0x1E137, 0x1E13D},
|
||||
{0x1E14E, 0x1E14E}, {0x1E2C0, 0x1E2EB}, {0x1E800, 0x1E8C4}, {0x1E900, 0x1E943}, {0x1E94B, 0x1E94B}, {0x1EE00, 0x1EE03}, {0x1EE05, 0x1EE1F}, {0x1EE21, 0x1EE22}, {0x1EE24, 0x1EE24}, {0x1EE27, 0x1EE27},
|
||||
{0x1EE29, 0x1EE32}, {0x1EE34, 0x1EE37}, {0x1EE39, 0x1EE39}, {0x1EE3B, 0x1EE3B}, {0x1EE42, 0x1EE42}, {0x1EE47, 0x1EE47}, {0x1EE49, 0x1EE49}, {0x1EE4B, 0x1EE4B}, {0x1EE4D, 0x1EE4F}, {0x1EE51, 0x1EE52},
|
||||
{0x1EE54, 0x1EE54}, {0x1EE57, 0x1EE57}, {0x1EE59, 0x1EE59}, {0x1EE5B, 0x1EE5B}, {0x1EE5D, 0x1EE5D}, {0x1EE5F, 0x1EE5F}, {0x1EE61, 0x1EE62}, {0x1EE64, 0x1EE64}, {0x1EE67, 0x1EE6A}, {0x1EE6C, 0x1EE72},
|
||||
{0x1EE74, 0x1EE77}, {0x1EE79, 0x1EE7C}, {0x1EE7E, 0x1EE7E}, {0x1EE80, 0x1EE89}, {0x1EE8B, 0x1EE9B}, {0x1EEA1, 0x1EEA3}, {0x1EEA5, 0x1EEA9}, {0x1EEAB, 0x1EEBB}, {0x20000, 0x2A6DD}, {0x2A700, 0x2B734},
|
||||
{0x2B740, 0x2B81D}, {0x2B820, 0x2CEA1}, {0x2CEB0, 0x2EBE0}, {0x2F800, 0x2FA1D}, {0x30000, 0x3134A},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> whitespace_ranges = {
|
||||
{0x9, 0xD}, {0x1C, 0x20}, {0x85, 0x85}, {0xA0, 0xA0}, {0x1680, 0x1680}, {0x2000, 0x200A}, {0x2028, 0x2029}, {0x202F, 0x202F}, {0x205F, 0x205F}, {0x3000, 0x3000},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> accent_mark_ranges = {
|
||||
{0x300, 0x36F}, {0x483, 0x489}, {0x591, 0x5BD}, {0x5BF, 0x5BF}, {0x5C1, 0x5C2}, {0x5C4, 0x5C5}, {0x5C7, 0x5C7}, {0x610, 0x61A}, {0x64B, 0x65F}, {0x670, 0x670}, {0x6D6, 0x6DC}, {0x6DF, 0x6E4},
|
||||
{0x6E7, 0x6E8}, {0x6EA, 0x6ED}, {0x711, 0x711}, {0x730, 0x74A}, {0x7A6, 0x7B0}, {0x7EB, 0x7F3}, {0x7FD, 0x7FD}, {0x816, 0x819}, {0x81B, 0x823}, {0x825, 0x827}, {0x829, 0x82D}, {0x859, 0x85B},
|
||||
{0x8D3, 0x8E1}, {0x8E3, 0x903}, {0x93A, 0x93C}, {0x93E, 0x94F}, {0x951, 0x957}, {0x962, 0x963}, {0x981, 0x983}, {0x9BC, 0x9BC}, {0x9BE, 0x9C4}, {0x9C7, 0x9C8}, {0x9CB, 0x9CD}, {0x9D7, 0x9D7},
|
||||
{0x9E2, 0x9E3}, {0x9FE, 0x9FE}, {0xA01, 0xA03}, {0xA3C, 0xA3C}, {0xA3E, 0xA42}, {0xA47, 0xA48}, {0xA4B, 0xA4D}, {0xA51, 0xA51}, {0xA70, 0xA71}, {0xA75, 0xA75}, {0xA81, 0xA83}, {0xABC, 0xABC},
|
||||
{0xABE, 0xAC5}, {0xAC7, 0xAC9}, {0xACB, 0xACD}, {0xAE2, 0xAE3}, {0xAFA, 0xAFF}, {0xB01, 0xB03}, {0xB3C, 0xB3C}, {0xB3E, 0xB44}, {0xB47, 0xB48}, {0xB4B, 0xB4D}, {0xB55, 0xB57}, {0xB62, 0xB63},
|
||||
{0xB82, 0xB82}, {0xBBE, 0xBC2}, {0xBC6, 0xBC8}, {0xBCA, 0xBCD}, {0xBD7, 0xBD7}, {0xC00, 0xC04}, {0xC3E, 0xC44}, {0xC46, 0xC48}, {0xC4A, 0xC4D}, {0xC55, 0xC56}, {0xC62, 0xC63}, {0xC81, 0xC83},
|
||||
{0xCBC, 0xCBC}, {0xCBE, 0xCC4}, {0xCC6, 0xCC8}, {0xCCA, 0xCCD}, {0xCD5, 0xCD6}, {0xCE2, 0xCE3}, {0xD00, 0xD03}, {0xD3B, 0xD3C}, {0xD3E, 0xD44}, {0xD46, 0xD48}, {0xD4A, 0xD4D}, {0xD57, 0xD57},
|
||||
{0xD62, 0xD63}, {0xD81, 0xD83}, {0xDCA, 0xDCA}, {0xDCF, 0xDD4}, {0xDD6, 0xDD6}, {0xDD8, 0xDDF}, {0xDF2, 0xDF3}, {0xE31, 0xE31}, {0xE34, 0xE3A}, {0xE47, 0xE4E}, {0xEB1, 0xEB1}, {0xEB4, 0xEBC},
|
||||
{0xEC8, 0xECD}, {0xF18, 0xF19}, {0xF35, 0xF35}, {0xF37, 0xF37}, {0xF39, 0xF39}, {0xF3E, 0xF3F}, {0xF71, 0xF84}, {0xF86, 0xF87}, {0xF8D, 0xF97}, {0xF99, 0xFBC}, {0xFC6, 0xFC6}, {0x102B, 0x103E},
|
||||
{0x1056, 0x1059}, {0x105E, 0x1060}, {0x1062, 0x1064}, {0x1067, 0x106D}, {0x1071, 0x1074}, {0x1082, 0x108D}, {0x108F, 0x108F}, {0x109A, 0x109D}, {0x135D, 0x135F}, {0x1712, 0x1714}, {0x1732, 0x1734},
|
||||
{0x1752, 0x1753}, {0x1772, 0x1773}, {0x17B4, 0x17D3}, {0x17DD, 0x17DD}, {0x180B, 0x180D}, {0x1885, 0x1886}, {0x18A9, 0x18A9}, {0x1920, 0x192B}, {0x1930, 0x193B}, {0x1A17, 0x1A1B}, {0x1A55, 0x1A5E},
|
||||
{0x1A60, 0x1A7C}, {0x1A7F, 0x1A7F}, {0x1AB0, 0x1AC0}, {0x1B00, 0x1B04}, {0x1B34, 0x1B44}, {0x1B6B, 0x1B73}, {0x1B80, 0x1B82}, {0x1BA1, 0x1BAD}, {0x1BE6, 0x1BF3}, {0x1C24, 0x1C37}, {0x1CD0, 0x1CD2},
|
||||
{0x1CD4, 0x1CE8}, {0x1CED, 0x1CED}, {0x1CF4, 0x1CF4}, {0x1CF7, 0x1CF9}, {0x1DC0, 0x1DF9}, {0x1DFB, 0x1DFF}, {0x20D0, 0x20F0}, {0x2CEF, 0x2CF1}, {0x2D7F, 0x2D7F}, {0x2DE0, 0x2DFF}, {0x302A, 0x302F},
|
||||
{0x3099, 0x309A}, {0xA66F, 0xA672}, {0xA674, 0xA67D}, {0xA69E, 0xA69F}, {0xA6F0, 0xA6F1}, {0xA802, 0xA802}, {0xA806, 0xA806}, {0xA80B, 0xA80B}, {0xA823, 0xA827}, {0xA82C, 0xA82C}, {0xA880, 0xA881},
|
||||
{0xA8B4, 0xA8C5}, {0xA8E0, 0xA8F1}, {0xA8FF, 0xA8FF}, {0xA926, 0xA92D}, {0xA947, 0xA953}, {0xA980, 0xA983}, {0xA9B3, 0xA9C0}, {0xA9E5, 0xA9E5}, {0xAA29, 0xAA36}, {0xAA43, 0xAA43}, {0xAA4C, 0xAA4D},
|
||||
{0xAA7B, 0xAA7D}, {0xAAB0, 0xAAB0}, {0xAAB2, 0xAAB4}, {0xAAB7, 0xAAB8}, {0xAABE, 0xAABF}, {0xAAC1, 0xAAC1}, {0xAAEB, 0xAAEF}, {0xAAF5, 0xAAF6}, {0xABE3, 0xABEA}, {0xABEC, 0xABED}, {0xFB1E, 0xFB1E},
|
||||
{0xFE00, 0xFE0F}, {0xFE20, 0xFE2F}, {0x101FD, 0x101FD}, {0x102E0, 0x102E0}, {0x10376, 0x1037A}, {0x10A01, 0x10A03}, {0x10A05, 0x10A06}, {0x10A0C, 0x10A0F}, {0x10A38, 0x10A3A}, {0x10A3F, 0x10A3F},
|
||||
{0x10AE5, 0x10AE6}, {0x10D24, 0x10D27}, {0x10EAB, 0x10EAC}, {0x10F46, 0x10F50}, {0x11000, 0x11002}, {0x11038, 0x11046}, {0x1107F, 0x11082}, {0x110B0, 0x110BA}, {0x11100, 0x11102}, {0x11127, 0x11134},
|
||||
{0x11145, 0x11146}, {0x11173, 0x11173}, {0x11180, 0x11182}, {0x111B3, 0x111C0}, {0x111C9, 0x111CC}, {0x111CE, 0x111CF}, {0x1122C, 0x11237}, {0x1123E, 0x1123E}, {0x112DF, 0x112EA}, {0x11300, 0x11303},
|
||||
{0x1133B, 0x1133C}, {0x1133E, 0x11344}, {0x11347, 0x11348}, {0x1134B, 0x1134D}, {0x11357, 0x11357}, {0x11362, 0x11363}, {0x11366, 0x1136C}, {0x11370, 0x11374}, {0x11435, 0x11446}, {0x1145E, 0x1145E},
|
||||
{0x114B0, 0x114C3}, {0x115AF, 0x115B5}, {0x115B8, 0x115C0}, {0x115DC, 0x115DD}, {0x11630, 0x11640}, {0x116AB, 0x116B7}, {0x1171D, 0x1172B}, {0x1182C, 0x1183A}, {0x11930, 0x11935}, {0x11937, 0x11938},
|
||||
{0x1193B, 0x1193E}, {0x11940, 0x11940}, {0x11942, 0x11943}, {0x119D1, 0x119D7}, {0x119DA, 0x119E0}, {0x119E4, 0x119E4}, {0x11A01, 0x11A0A}, {0x11A33, 0x11A39}, {0x11A3B, 0x11A3E}, {0x11A47, 0x11A47},
|
||||
{0x11A51, 0x11A5B}, {0x11A8A, 0x11A99}, {0x11C2F, 0x11C36}, {0x11C38, 0x11C3F}, {0x11C92, 0x11CA7}, {0x11CA9, 0x11CB6}, {0x11D31, 0x11D36}, {0x11D3A, 0x11D3A}, {0x11D3C, 0x11D3D}, {0x11D3F, 0x11D45},
|
||||
{0x11D47, 0x11D47}, {0x11D8A, 0x11D8E}, {0x11D90, 0x11D91}, {0x11D93, 0x11D97}, {0x11EF3, 0x11EF6}, {0x16AF0, 0x16AF4}, {0x16B30, 0x16B36}, {0x16F4F, 0x16F4F}, {0x16F51, 0x16F87}, {0x16F8F, 0x16F92},
|
||||
{0x16FE4, 0x16FE4}, {0x16FF0, 0x16FF1}, {0x1BC9D, 0x1BC9E}, {0x1D165, 0x1D169}, {0x1D16D, 0x1D172}, {0x1D17B, 0x1D182}, {0x1D185, 0x1D18B}, {0x1D1AA, 0x1D1AD}, {0x1D242, 0x1D244}, {0x1DA00, 0x1DA36},
|
||||
{0x1DA3B, 0x1DA6C}, {0x1DA75, 0x1DA75}, {0x1DA84, 0x1DA84}, {0x1DA9B, 0x1DA9F}, {0x1DAA1, 0x1DAAF}, {0x1E000, 0x1E006}, {0x1E008, 0x1E018}, {0x1E01B, 0x1E021}, {0x1E023, 0x1E024}, {0x1E026, 0x1E02A},
|
||||
{0x1E130, 0x1E136}, {0x1E2EC, 0x1E2EF}, {0x1E8D0, 0x1E8D6}, {0x1E944, 0x1E94A}, {0xE0100, 0xE01EF},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> punctuation_ranges = {
|
||||
{0x21, 0x23}, {0x25, 0x2A}, {0x2C, 0x2F}, {0x3A, 0x3B}, {0x3F, 0x40}, {0x5B, 0x5D}, {0x5F, 0x5F}, {0x7B, 0x7B}, {0x7D, 0x7D}, {0xA1, 0xA1}, {0xA7, 0xA7}, {0xAB, 0xAB}, {0xB6, 0xB7}, {0xBB, 0xBB},
|
||||
{0xBF, 0xBF}, {0x37E, 0x37E}, {0x387, 0x387}, {0x55A, 0x55F}, {0x589, 0x58A}, {0x5BE, 0x5BE}, {0x5C0, 0x5C0}, {0x5C3, 0x5C3}, {0x5C6, 0x5C6}, {0x5F3, 0x5F4}, {0x609, 0x60A}, {0x60C, 0x60D},
|
||||
{0x61B, 0x61B}, {0x61E, 0x61F}, {0x66A, 0x66D}, {0x6D4, 0x6D4}, {0x700, 0x70D}, {0x7F7, 0x7F9}, {0x830, 0x83E}, {0x85E, 0x85E}, {0x964, 0x965}, {0x970, 0x970}, {0x9FD, 0x9FD}, {0xA76, 0xA76},
|
||||
{0xAF0, 0xAF0}, {0xC77, 0xC77}, {0xC84, 0xC84}, {0xDF4, 0xDF4}, {0xE4F, 0xE4F}, {0xE5A, 0xE5B}, {0xF04, 0xF12}, {0xF14, 0xF14}, {0xF3A, 0xF3D}, {0xF85, 0xF85}, {0xFD0, 0xFD4}, {0xFD9, 0xFDA},
|
||||
{0x104A, 0x104F}, {0x10FB, 0x10FB}, {0x1360, 0x1368}, {0x1400, 0x1400}, {0x166E, 0x166E}, {0x169B, 0x169C}, {0x16EB, 0x16ED}, {0x1735, 0x1736}, {0x17D4, 0x17D6}, {0x17D8, 0x17DA}, {0x1800, 0x180A},
|
||||
{0x1944, 0x1945}, {0x1A1E, 0x1A1F}, {0x1AA0, 0x1AA6}, {0x1AA8, 0x1AAD}, {0x1B5A, 0x1B60}, {0x1BFC, 0x1BFF}, {0x1C3B, 0x1C3F}, {0x1C7E, 0x1C7F}, {0x1CC0, 0x1CC7}, {0x1CD3, 0x1CD3}, {0x2010, 0x2027},
|
||||
{0x2030, 0x2043}, {0x2045, 0x2051}, {0x2053, 0x205E}, {0x207D, 0x207E}, {0x208D, 0x208E}, {0x2308, 0x230B}, {0x2329, 0x232A}, {0x2768, 0x2775}, {0x27C5, 0x27C6}, {0x27E6, 0x27EF}, {0x2983, 0x2998},
|
||||
{0x29D8, 0x29DB}, {0x29FC, 0x29FD}, {0x2CF9, 0x2CFC}, {0x2CFE, 0x2CFF}, {0x2D70, 0x2D70}, {0x2E00, 0x2E2E}, {0x2E30, 0x2E4F}, {0x2E52, 0x2E52}, {0x3001, 0x3003}, {0x3008, 0x3011}, {0x3014, 0x301F},
|
||||
{0x3030, 0x3030}, {0x303D, 0x303D}, {0x30A0, 0x30A0}, {0x30FB, 0x30FB}, {0xA4FE, 0xA4FF}, {0xA60D, 0xA60F}, {0xA673, 0xA673}, {0xA67E, 0xA67E}, {0xA6F2, 0xA6F7}, {0xA874, 0xA877}, {0xA8CE, 0xA8CF},
|
||||
{0xA8F8, 0xA8FA}, {0xA8FC, 0xA8FC}, {0xA92E, 0xA92F}, {0xA95F, 0xA95F}, {0xA9C1, 0xA9CD}, {0xA9DE, 0xA9DF}, {0xAA5C, 0xAA5F}, {0xAADE, 0xAADF}, {0xAAF0, 0xAAF1}, {0xABEB, 0xABEB}, {0xFD3E, 0xFD3F},
|
||||
{0xFE10, 0xFE19}, {0xFE30, 0xFE52}, {0xFE54, 0xFE61}, {0xFE63, 0xFE63}, {0xFE68, 0xFE68}, {0xFE6A, 0xFE6B}, {0xFF01, 0xFF03}, {0xFF05, 0xFF0A}, {0xFF0C, 0xFF0F}, {0xFF1A, 0xFF1B}, {0xFF1F, 0xFF20},
|
||||
{0xFF3B, 0xFF3D}, {0xFF3F, 0xFF3F}, {0xFF5B, 0xFF5B}, {0xFF5D, 0xFF5D}, {0xFF5F, 0xFF65}, {0x10100, 0x10102}, {0x1039F, 0x1039F}, {0x103D0, 0x103D0}, {0x1056F, 0x1056F}, {0x10857, 0x10857},
|
||||
{0x1091F, 0x1091F}, {0x1093F, 0x1093F}, {0x10A50, 0x10A58}, {0x10A7F, 0x10A7F}, {0x10AF0, 0x10AF6}, {0x10B39, 0x10B3F}, {0x10B99, 0x10B9C}, {0x10EAD, 0x10EAD}, {0x10F55, 0x10F59}, {0x11047, 0x1104D},
|
||||
{0x110BB, 0x110BC}, {0x110BE, 0x110C1}, {0x11140, 0x11143}, {0x11174, 0x11175}, {0x111C5, 0x111C8}, {0x111CD, 0x111CD}, {0x111DB, 0x111DB}, {0x111DD, 0x111DF}, {0x11238, 0x1123D}, {0x112A9, 0x112A9},
|
||||
{0x1144B, 0x1144F}, {0x1145A, 0x1145B}, {0x1145D, 0x1145D}, {0x114C6, 0x114C6}, {0x115C1, 0x115D7}, {0x11641, 0x11643}, {0x11660, 0x1166C}, {0x1173C, 0x1173E}, {0x1183B, 0x1183B}, {0x11944, 0x11946},
|
||||
{0x119E2, 0x119E2}, {0x11A3F, 0x11A46}, {0x11A9A, 0x11A9C}, {0x11A9E, 0x11AA2}, {0x11C41, 0x11C45}, {0x11C70, 0x11C71}, {0x11EF7, 0x11EF8}, {0x11FFF, 0x11FFF}, {0x12470, 0x12474}, {0x16A6E, 0x16A6F},
|
||||
{0x16AF5, 0x16AF5}, {0x16B37, 0x16B3B}, {0x16B44, 0x16B44}, {0x16E97, 0x16E9A}, {0x16FE2, 0x16FE2}, {0x1BC9F, 0x1BC9F}, {0x1DA87, 0x1DA8B}, {0x1E95E, 0x1E95F},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> symbol_ranges = {
|
||||
{0x24, 0x24}, {0x2B, 0x2B}, {0x3C, 0x3E}, {0x5E, 0x5E}, {0x60, 0x60}, {0x7C, 0x7C}, {0x7E, 0x7E}, {0xA2, 0xA6}, {0xA8, 0xA9}, {0xAC, 0xAC}, {0xAE, 0xB1}, {0xB4, 0xB4}, {0xB8, 0xB8}, {0xD7, 0xD7},
|
||||
{0xF7, 0xF7}, {0x2C2, 0x2C5}, {0x2D2, 0x2DF}, {0x2E5, 0x2EB}, {0x2ED, 0x2ED}, {0x2EF, 0x2FF}, {0x375, 0x375}, {0x384, 0x385}, {0x3F6, 0x3F6}, {0x482, 0x482}, {0x58D, 0x58F}, {0x606, 0x608},
|
||||
{0x60B, 0x60B}, {0x60E, 0x60F}, {0x6DE, 0x6DE}, {0x6E9, 0x6E9}, {0x6FD, 0x6FE}, {0x7F6, 0x7F6}, {0x7FE, 0x7FF}, {0x9F2, 0x9F3}, {0x9FA, 0x9FB}, {0xAF1, 0xAF1}, {0xB70, 0xB70}, {0xBF3, 0xBFA},
|
||||
{0xC7F, 0xC7F}, {0xD4F, 0xD4F}, {0xD79, 0xD79}, {0xE3F, 0xE3F}, {0xF01, 0xF03}, {0xF13, 0xF13}, {0xF15, 0xF17}, {0xF1A, 0xF1F}, {0xF34, 0xF34}, {0xF36, 0xF36}, {0xF38, 0xF38}, {0xFBE, 0xFC5},
|
||||
{0xFC7, 0xFCC}, {0xFCE, 0xFCF}, {0xFD5, 0xFD8}, {0x109E, 0x109F}, {0x1390, 0x1399}, {0x166D, 0x166D}, {0x17DB, 0x17DB}, {0x1940, 0x1940}, {0x19DE, 0x19FF}, {0x1B61, 0x1B6A}, {0x1B74, 0x1B7C},
|
||||
{0x1FBD, 0x1FBD}, {0x1FBF, 0x1FC1}, {0x1FCD, 0x1FCF}, {0x1FDD, 0x1FDF}, {0x1FED, 0x1FEF}, {0x1FFD, 0x1FFE}, {0x2044, 0x2044}, {0x2052, 0x2052}, {0x207A, 0x207C}, {0x208A, 0x208C}, {0x20A0, 0x20BF},
|
||||
{0x2100, 0x2101}, {0x2103, 0x2106}, {0x2108, 0x2109}, {0x2114, 0x2114}, {0x2116, 0x2118}, {0x211E, 0x2123}, {0x2125, 0x2125}, {0x2127, 0x2127}, {0x2129, 0x2129}, {0x212E, 0x212E}, {0x213A, 0x213B},
|
||||
{0x2140, 0x2144}, {0x214A, 0x214D}, {0x214F, 0x214F}, {0x218A, 0x218B}, {0x2190, 0x2307}, {0x230C, 0x2328}, {0x232B, 0x2426}, {0x2440, 0x244A}, {0x249C, 0x24E9}, {0x2500, 0x2767}, {0x2794, 0x27C4},
|
||||
{0x27C7, 0x27E5}, {0x27F0, 0x2982}, {0x2999, 0x29D7}, {0x29DC, 0x29FB}, {0x29FE, 0x2B73}, {0x2B76, 0x2B95}, {0x2B97, 0x2BFF}, {0x2CE5, 0x2CEA}, {0x2E50, 0x2E51}, {0x2E80, 0x2E99}, {0x2E9B, 0x2EF3},
|
||||
{0x2F00, 0x2FD5}, {0x2FF0, 0x2FFB}, {0x3004, 0x3004}, {0x3012, 0x3013}, {0x3020, 0x3020}, {0x3036, 0x3037}, {0x303E, 0x303F}, {0x309B, 0x309C}, {0x3190, 0x3191}, {0x3196, 0x319F}, {0x31C0, 0x31E3},
|
||||
{0x3200, 0x321E}, {0x322A, 0x3247}, {0x3250, 0x3250}, {0x3260, 0x327F}, {0x328A, 0x32B0}, {0x32C0, 0x33FF}, {0x4DC0, 0x4DFF}, {0xA490, 0xA4C6}, {0xA700, 0xA716}, {0xA720, 0xA721}, {0xA789, 0xA78A},
|
||||
{0xA828, 0xA82B}, {0xA836, 0xA839}, {0xAA77, 0xAA79}, {0xAB5B, 0xAB5B}, {0xAB6A, 0xAB6B}, {0xFB29, 0xFB29}, {0xFBB2, 0xFBC1}, {0xFDFC, 0xFDFD}, {0xFE62, 0xFE62}, {0xFE64, 0xFE66}, {0xFE69, 0xFE69},
|
||||
{0xFF04, 0xFF04}, {0xFF0B, 0xFF0B}, {0xFF1C, 0xFF1E}, {0xFF3E, 0xFF3E}, {0xFF40, 0xFF40}, {0xFF5C, 0xFF5C}, {0xFF5E, 0xFF5E}, {0xFFE0, 0xFFE6}, {0xFFE8, 0xFFEE}, {0xFFFC, 0xFFFD}, {0x10137, 0x1013F},
|
||||
{0x10179, 0x10189}, {0x1018C, 0x1018E}, {0x10190, 0x1019C}, {0x101A0, 0x101A0}, {0x101D0, 0x101FC}, {0x10877, 0x10878}, {0x10AC8, 0x10AC8}, {0x1173F, 0x1173F}, {0x11FD5, 0x11FF1}, {0x16B3C, 0x16B3F},
|
||||
{0x16B45, 0x16B45}, {0x1BC9C, 0x1BC9C}, {0x1D000, 0x1D0F5}, {0x1D100, 0x1D126}, {0x1D129, 0x1D164}, {0x1D16A, 0x1D16C}, {0x1D183, 0x1D184}, {0x1D18C, 0x1D1A9}, {0x1D1AE, 0x1D1E8}, {0x1D200, 0x1D241},
|
||||
{0x1D245, 0x1D245}, {0x1D300, 0x1D356}, {0x1D6C1, 0x1D6C1}, {0x1D6DB, 0x1D6DB}, {0x1D6FB, 0x1D6FB}, {0x1D715, 0x1D715}, {0x1D735, 0x1D735}, {0x1D74F, 0x1D74F}, {0x1D76F, 0x1D76F}, {0x1D789, 0x1D789},
|
||||
{0x1D7A9, 0x1D7A9}, {0x1D7C3, 0x1D7C3}, {0x1D800, 0x1D9FF}, {0x1DA37, 0x1DA3A}, {0x1DA6D, 0x1DA74}, {0x1DA76, 0x1DA83}, {0x1DA85, 0x1DA86}, {0x1E14F, 0x1E14F}, {0x1E2FF, 0x1E2FF}, {0x1ECAC, 0x1ECAC},
|
||||
{0x1ECB0, 0x1ECB0}, {0x1ED2E, 0x1ED2E}, {0x1EEF0, 0x1EEF1}, {0x1F000, 0x1F02B}, {0x1F030, 0x1F093}, {0x1F0A0, 0x1F0AE}, {0x1F0B1, 0x1F0BF}, {0x1F0C1, 0x1F0CF}, {0x1F0D1, 0x1F0F5}, {0x1F10D, 0x1F1AD},
|
||||
{0x1F1E6, 0x1F202}, {0x1F210, 0x1F23B}, {0x1F240, 0x1F248}, {0x1F250, 0x1F251}, {0x1F260, 0x1F265}, {0x1F300, 0x1F6D7}, {0x1F6E0, 0x1F6EC}, {0x1F6F0, 0x1F6FC}, {0x1F700, 0x1F773}, {0x1F780, 0x1F7D8},
|
||||
{0x1F7E0, 0x1F7EB}, {0x1F800, 0x1F80B}, {0x1F810, 0x1F847}, {0x1F850, 0x1F859}, {0x1F860, 0x1F887}, {0x1F890, 0x1F8AD}, {0x1F8B0, 0x1F8B1}, {0x1F900, 0x1F978}, {0x1F97A, 0x1F9CB}, {0x1F9CD, 0x1FA53},
|
||||
{0x1FA60, 0x1FA6D}, {0x1FA70, 0x1FA74}, {0x1FA78, 0x1FA7A}, {0x1FA80, 0x1FA86}, {0x1FA90, 0x1FAA8}, {0x1FAB0, 0x1FAB6}, {0x1FAC0, 0x1FAC2}, {0x1FAD0, 0x1FAD6}, {0x1FB00, 0x1FB92}, {0x1FB94, 0x1FBCA},
|
||||
};
|
||||
|
||||
static const std::vector<std::pair<uint32_t, uint32_t>> control_ranges = {
|
||||
{0x0, 0x8}, {0xE, 0x1B}, {0x7F, 0x84}, {0x86, 0x9F}, {0xAD, 0xAD}, {0x378, 0x379}, {0x380, 0x383}, {0x38B, 0x38B}, {0x38D, 0x38D}, {0x3A2, 0x3A2}, {0x530, 0x530}, {0x557, 0x558}, {0x58B, 0x58C},
|
||||
{0x590, 0x590}, {0x5C8, 0x5CF}, {0x5EB, 0x5EE}, {0x5F5, 0x605}, {0x61C, 0x61D}, {0x6DD, 0x6DD}, {0x70E, 0x70F}, {0x74B, 0x74C}, {0x7B2, 0x7BF}, {0x7FB, 0x7FC}, {0x82E, 0x82F}, {0x83F, 0x83F},
|
||||
{0x85C, 0x85D}, {0x85F, 0x85F}, {0x86B, 0x89F}, {0x8B5, 0x8B5}, {0x8C8, 0x8D2}, {0x8E2, 0x8E2}, {0x984, 0x984}, {0x98D, 0x98E}, {0x991, 0x992}, {0x9A9, 0x9A9}, {0x9B1, 0x9B1}, {0x9B3, 0x9B5},
|
||||
{0x9BA, 0x9BB}, {0x9C5, 0x9C6}, {0x9C9, 0x9CA}, {0x9CF, 0x9D6}, {0x9D8, 0x9DB}, {0x9DE, 0x9DE}, {0x9E4, 0x9E5}, {0x9FF, 0xA00}, {0xA04, 0xA04}, {0xA0B, 0xA0E}, {0xA11, 0xA12}, {0xA29, 0xA29},
|
||||
{0xA31, 0xA31}, {0xA34, 0xA34}, {0xA37, 0xA37}, {0xA3A, 0xA3B}, {0xA3D, 0xA3D}, {0xA43, 0xA46}, {0xA49, 0xA4A}, {0xA4E, 0xA50}, {0xA52, 0xA58}, {0xA5D, 0xA5D}, {0xA5F, 0xA65}, {0xA77, 0xA80},
|
||||
{0xA84, 0xA84}, {0xA8E, 0xA8E}, {0xA92, 0xA92}, {0xAA9, 0xAA9}, {0xAB1, 0xAB1}, {0xAB4, 0xAB4}, {0xABA, 0xABB}, {0xAC6, 0xAC6}, {0xACA, 0xACA}, {0xACE, 0xACF}, {0xAD1, 0xADF}, {0xAE4, 0xAE5},
|
||||
{0xAF2, 0xAF8}, {0xB00, 0xB00}, {0xB04, 0xB04}, {0xB0D, 0xB0E}, {0xB11, 0xB12}, {0xB29, 0xB29}, {0xB31, 0xB31}, {0xB34, 0xB34}, {0xB3A, 0xB3B}, {0xB45, 0xB46}, {0xB49, 0xB4A}, {0xB4E, 0xB54},
|
||||
{0xB58, 0xB5B}, {0xB5E, 0xB5E}, {0xB64, 0xB65}, {0xB78, 0xB81}, {0xB84, 0xB84}, {0xB8B, 0xB8D}, {0xB91, 0xB91}, {0xB96, 0xB98}, {0xB9B, 0xB9B}, {0xB9D, 0xB9D}, {0xBA0, 0xBA2}, {0xBA5, 0xBA7},
|
||||
{0xBAB, 0xBAD}, {0xBBA, 0xBBD}, {0xBC3, 0xBC5}, {0xBC9, 0xBC9}, {0xBCE, 0xBCF}, {0xBD1, 0xBD6}, {0xBD8, 0xBE5}, {0xBFB, 0xBFF}, {0xC0D, 0xC0D}, {0xC11, 0xC11}, {0xC29, 0xC29}, {0xC3A, 0xC3C},
|
||||
{0xC45, 0xC45}, {0xC49, 0xC49}, {0xC4E, 0xC54}, {0xC57, 0xC57}, {0xC5B, 0xC5F}, {0xC64, 0xC65}, {0xC70, 0xC76}, {0xC8D, 0xC8D}, {0xC91, 0xC91}, {0xCA9, 0xCA9}, {0xCB4, 0xCB4}, {0xCBA, 0xCBB},
|
||||
{0xCC5, 0xCC5}, {0xCC9, 0xCC9}, {0xCCE, 0xCD4}, {0xCD7, 0xCDD}, {0xCDF, 0xCDF}, {0xCE4, 0xCE5}, {0xCF0, 0xCF0}, {0xCF3, 0xCFF}, {0xD0D, 0xD0D}, {0xD11, 0xD11}, {0xD45, 0xD45}, {0xD49, 0xD49},
|
||||
{0xD50, 0xD53}, {0xD64, 0xD65}, {0xD80, 0xD80}, {0xD84, 0xD84}, {0xD97, 0xD99}, {0xDB2, 0xDB2}, {0xDBC, 0xDBC}, {0xDBE, 0xDBF}, {0xDC7, 0xDC9}, {0xDCB, 0xDCE}, {0xDD5, 0xDD5}, {0xDD7, 0xDD7},
|
||||
{0xDE0, 0xDE5}, {0xDF0, 0xDF1}, {0xDF5, 0xE00}, {0xE3B, 0xE3E}, {0xE5C, 0xE80}, {0xE83, 0xE83}, {0xE85, 0xE85}, {0xE8B, 0xE8B}, {0xEA4, 0xEA4}, {0xEA6, 0xEA6}, {0xEBE, 0xEBF}, {0xEC5, 0xEC5},
|
||||
{0xEC7, 0xEC7}, {0xECE, 0xECF}, {0xEDA, 0xEDB}, {0xEE0, 0xEFF}, {0xF48, 0xF48}, {0xF6D, 0xF70}, {0xF98, 0xF98}, {0xFBD, 0xFBD}, {0xFCD, 0xFCD}, {0xFDB, 0xFFF}, {0x10C6, 0x10C6}, {0x10C8, 0x10CC},
|
||||
{0x10CE, 0x10CF}, {0x1249, 0x1249}, {0x124E, 0x124F}, {0x1257, 0x1257}, {0x1259, 0x1259}, {0x125E, 0x125F}, {0x1289, 0x1289}, {0x128E, 0x128F}, {0x12B1, 0x12B1}, {0x12B6, 0x12B7}, {0x12BF, 0x12BF},
|
||||
{0x12C1, 0x12C1}, {0x12C6, 0x12C7}, {0x12D7, 0x12D7}, {0x1311, 0x1311}, {0x1316, 0x1317}, {0x135B, 0x135C}, {0x137D, 0x137F}, {0x139A, 0x139F}, {0x13F6, 0x13F7}, {0x13FE, 0x13FF}, {0x169D, 0x169F},
|
||||
{0x16F9, 0x16FF}, {0x170D, 0x170D}, {0x1715, 0x171F}, {0x1737, 0x173F}, {0x1754, 0x175F}, {0x176D, 0x176D}, {0x1771, 0x1771}, {0x1774, 0x177F}, {0x17DE, 0x17DF}, {0x17EA, 0x17EF}, {0x17FA, 0x17FF},
|
||||
{0x180E, 0x180F}, {0x181A, 0x181F}, {0x1879, 0x187F}, {0x18AB, 0x18AF}, {0x18F6, 0x18FF}, {0x191F, 0x191F}, {0x192C, 0x192F}, {0x193C, 0x193F}, {0x1941, 0x1943}, {0x196E, 0x196F}, {0x1975, 0x197F},
|
||||
{0x19AC, 0x19AF}, {0x19CA, 0x19CF}, {0x19DB, 0x19DD}, {0x1A1C, 0x1A1D}, {0x1A5F, 0x1A5F}, {0x1A7D, 0x1A7E}, {0x1A8A, 0x1A8F}, {0x1A9A, 0x1A9F}, {0x1AAE, 0x1AAF}, {0x1AC1, 0x1AFF}, {0x1B4C, 0x1B4F},
|
||||
{0x1B7D, 0x1B7F}, {0x1BF4, 0x1BFB}, {0x1C38, 0x1C3A}, {0x1C4A, 0x1C4C}, {0x1C89, 0x1C8F}, {0x1CBB, 0x1CBC}, {0x1CC8, 0x1CCF}, {0x1CFB, 0x1CFF}, {0x1DFA, 0x1DFA}, {0x1F16, 0x1F17}, {0x1F1E, 0x1F1F},
|
||||
{0x1F46, 0x1F47}, {0x1F4E, 0x1F4F}, {0x1F58, 0x1F58}, {0x1F5A, 0x1F5A}, {0x1F5C, 0x1F5C}, {0x1F5E, 0x1F5E}, {0x1F7E, 0x1F7F}, {0x1FB5, 0x1FB5}, {0x1FC5, 0x1FC5}, {0x1FD4, 0x1FD5}, {0x1FDC, 0x1FDC},
|
||||
{0x1FF0, 0x1FF1}, {0x1FF5, 0x1FF5}, {0x1FFF, 0x1FFF}, {0x200B, 0x200F}, {0x202A, 0x202E}, {0x2060, 0x206F}, {0x2072, 0x2073}, {0x208F, 0x208F}, {0x209D, 0x209F}, {0x20C0, 0x20CF}, {0x20F1, 0x20FF},
|
||||
{0x218C, 0x218F}, {0x2427, 0x243F}, {0x244B, 0x245F}, {0x2B74, 0x2B75}, {0x2B96, 0x2B96}, {0x2C2F, 0x2C2F}, {0x2C5F, 0x2C5F}, {0x2CF4, 0x2CF8}, {0x2D26, 0x2D26}, {0x2D28, 0x2D2C}, {0x2D2E, 0x2D2F},
|
||||
{0x2D68, 0x2D6E}, {0x2D71, 0x2D7E}, {0x2D97, 0x2D9F}, {0x2DA7, 0x2DA7}, {0x2DAF, 0x2DAF}, {0x2DB7, 0x2DB7}, {0x2DBF, 0x2DBF}, {0x2DC7, 0x2DC7}, {0x2DCF, 0x2DCF}, {0x2DD7, 0x2DD7}, {0x2DDF, 0x2DDF},
|
||||
{0x2E53, 0x2E7F}, {0x2E9A, 0x2E9A}, {0x2EF4, 0x2EFF}, {0x2FD6, 0x2FEF}, {0x2FFC, 0x2FFF}, {0x3040, 0x3040}, {0x3097, 0x3098}, {0x3100, 0x3104}, {0x3130, 0x3130}, {0x318F, 0x318F}, {0x31E4, 0x31EF},
|
||||
{0x321F, 0x321F}, {0x9FFD, 0x9FFF}, {0xA48D, 0xA48F}, {0xA4C7, 0xA4CF}, {0xA62C, 0xA63F}, {0xA6F8, 0xA6FF}, {0xA7C0, 0xA7C1}, {0xA7CB, 0xA7F4}, {0xA82D, 0xA82F}, {0xA83A, 0xA83F}, {0xA878, 0xA87F},
|
||||
{0xA8C6, 0xA8CD}, {0xA8DA, 0xA8DF}, {0xA954, 0xA95E}, {0xA97D, 0xA97F}, {0xA9CE, 0xA9CE}, {0xA9DA, 0xA9DD}, {0xA9FF, 0xA9FF}, {0xAA37, 0xAA3F}, {0xAA4E, 0xAA4F}, {0xAA5A, 0xAA5B}, {0xAAC3, 0xAADA},
|
||||
{0xAAF7, 0xAB00}, {0xAB07, 0xAB08}, {0xAB0F, 0xAB10}, {0xAB17, 0xAB1F}, {0xAB27, 0xAB27}, {0xAB2F, 0xAB2F}, {0xAB6C, 0xAB6F}, {0xABEE, 0xABEF}, {0xABFA, 0xABFF}, {0xD7A4, 0xD7AF}, {0xD7C7, 0xD7CA},
|
||||
{0xD7FC, 0xF8FF}, {0xFA6E, 0xFA6F}, {0xFADA, 0xFAFF}, {0xFB07, 0xFB12}, {0xFB18, 0xFB1C}, {0xFB37, 0xFB37}, {0xFB3D, 0xFB3D}, {0xFB3F, 0xFB3F}, {0xFB42, 0xFB42}, {0xFB45, 0xFB45}, {0xFBC2, 0xFBD2},
|
||||
{0xFD40, 0xFD4F}, {0xFD90, 0xFD91}, {0xFDC8, 0xFDEF}, {0xFDFE, 0xFDFF}, {0xFE1A, 0xFE1F}, {0xFE53, 0xFE53}, {0xFE67, 0xFE67}, {0xFE6C, 0xFE6F}, {0xFE75, 0xFE75}, {0xFEFD, 0xFF00}, {0xFFBF, 0xFFC1},
|
||||
{0xFFC8, 0xFFC9}, {0xFFD0, 0xFFD1}, {0xFFD8, 0xFFD9}, {0xFFDD, 0xFFDF}, {0xFFE7, 0xFFE7}, {0xFFEF, 0xFFFB}, {0xFFFE, 0xFFFF}, {0x1000C, 0x1000C}, {0x10027, 0x10027}, {0x1003B, 0x1003B},
|
||||
{0x1003E, 0x1003E}, {0x1004E, 0x1004F}, {0x1005E, 0x1007F}, {0x100FB, 0x100FF}, {0x10103, 0x10106}, {0x10134, 0x10136}, {0x1018F, 0x1018F}, {0x1019D, 0x1019F}, {0x101A1, 0x101CF}, {0x101FE, 0x1027F},
|
||||
{0x1029D, 0x1029F}, {0x102D1, 0x102DF}, {0x102FC, 0x102FF}, {0x10324, 0x1032C}, {0x1034B, 0x1034F}, {0x1037B, 0x1037F}, {0x1039E, 0x1039E}, {0x103C4, 0x103C7}, {0x103D6, 0x103FF}, {0x1049E, 0x1049F},
|
||||
{0x104AA, 0x104AF}, {0x104D4, 0x104D7}, {0x104FC, 0x104FF}, {0x10528, 0x1052F}, {0x10564, 0x1056E}, {0x10570, 0x105FF}, {0x10737, 0x1073F}, {0x10756, 0x1075F}, {0x10768, 0x107FF}, {0x10806, 0x10807},
|
||||
{0x10809, 0x10809}, {0x10836, 0x10836}, {0x10839, 0x1083B}, {0x1083D, 0x1083E}, {0x10856, 0x10856}, {0x1089F, 0x108A6}, {0x108B0, 0x108DF}, {0x108F3, 0x108F3}, {0x108F6, 0x108FA}, {0x1091C, 0x1091E},
|
||||
{0x1093A, 0x1093E}, {0x10940, 0x1097F}, {0x109B8, 0x109BB}, {0x109D0, 0x109D1}, {0x10A04, 0x10A04}, {0x10A07, 0x10A0B}, {0x10A14, 0x10A14}, {0x10A18, 0x10A18}, {0x10A36, 0x10A37}, {0x10A3B, 0x10A3E},
|
||||
{0x10A49, 0x10A4F}, {0x10A59, 0x10A5F}, {0x10AA0, 0x10ABF}, {0x10AE7, 0x10AEA}, {0x10AF7, 0x10AFF}, {0x10B36, 0x10B38}, {0x10B56, 0x10B57}, {0x10B73, 0x10B77}, {0x10B92, 0x10B98}, {0x10B9D, 0x10BA8},
|
||||
{0x10BB0, 0x10BFF}, {0x10C49, 0x10C7F}, {0x10CB3, 0x10CBF}, {0x10CF3, 0x10CF9}, {0x10D28, 0x10D2F}, {0x10D3A, 0x10E5F}, {0x10E7F, 0x10E7F}, {0x10EAA, 0x10EAA}, {0x10EAE, 0x10EAF}, {0x10EB2, 0x10EFF},
|
||||
{0x10F28, 0x10F2F}, {0x10F5A, 0x10FAF}, {0x10FCC, 0x10FDF}, {0x10FF7, 0x10FFF}, {0x1104E, 0x11051}, {0x11070, 0x1107E}, {0x110BD, 0x110BD}, {0x110C2, 0x110CF}, {0x110E9, 0x110EF}, {0x110FA, 0x110FF},
|
||||
{0x11135, 0x11135}, {0x11148, 0x1114F}, {0x11177, 0x1117F}, {0x111E0, 0x111E0}, {0x111F5, 0x111FF}, {0x11212, 0x11212}, {0x1123F, 0x1127F}, {0x11287, 0x11287}, {0x11289, 0x11289}, {0x1128E, 0x1128E},
|
||||
{0x1129E, 0x1129E}, {0x112AA, 0x112AF}, {0x112EB, 0x112EF}, {0x112FA, 0x112FF}, {0x11304, 0x11304}, {0x1130D, 0x1130E}, {0x11311, 0x11312}, {0x11329, 0x11329}, {0x11331, 0x11331}, {0x11334, 0x11334},
|
||||
{0x1133A, 0x1133A}, {0x11345, 0x11346}, {0x11349, 0x1134A}, {0x1134E, 0x1134F}, {0x11351, 0x11356}, {0x11358, 0x1135C}, {0x11364, 0x11365}, {0x1136D, 0x1136F}, {0x11375, 0x113FF}, {0x1145C, 0x1145C},
|
||||
{0x11462, 0x1147F}, {0x114C8, 0x114CF}, {0x114DA, 0x1157F}, {0x115B6, 0x115B7}, {0x115DE, 0x115FF}, {0x11645, 0x1164F}, {0x1165A, 0x1165F}, {0x1166D, 0x1167F}, {0x116B9, 0x116BF}, {0x116CA, 0x116FF},
|
||||
{0x1171B, 0x1171C}, {0x1172C, 0x1172F}, {0x11740, 0x117FF}, {0x1183C, 0x1189F}, {0x118F3, 0x118FE}, {0x11907, 0x11908}, {0x1190A, 0x1190B}, {0x11914, 0x11914}, {0x11917, 0x11917}, {0x11936, 0x11936},
|
||||
{0x11939, 0x1193A}, {0x11947, 0x1194F}, {0x1195A, 0x1199F}, {0x119A8, 0x119A9}, {0x119D8, 0x119D9}, {0x119E5, 0x119FF}, {0x11A48, 0x11A4F}, {0x11AA3, 0x11ABF}, {0x11AF9, 0x11BFF}, {0x11C09, 0x11C09},
|
||||
{0x11C37, 0x11C37}, {0x11C46, 0x11C4F}, {0x11C6D, 0x11C6F}, {0x11C90, 0x11C91}, {0x11CA8, 0x11CA8}, {0x11CB7, 0x11CFF}, {0x11D07, 0x11D07}, {0x11D0A, 0x11D0A}, {0x11D37, 0x11D39}, {0x11D3B, 0x11D3B},
|
||||
{0x11D3E, 0x11D3E}, {0x11D48, 0x11D4F}, {0x11D5A, 0x11D5F}, {0x11D66, 0x11D66}, {0x11D69, 0x11D69}, {0x11D8F, 0x11D8F}, {0x11D92, 0x11D92}, {0x11D99, 0x11D9F}, {0x11DAA, 0x11EDF}, {0x11EF9, 0x11FAF},
|
||||
{0x11FB1, 0x11FBF}, {0x11FF2, 0x11FFE}, {0x1239A, 0x123FF}, {0x1246F, 0x1246F}, {0x12475, 0x1247F}, {0x12544, 0x12FFF}, {0x1342F, 0x143FF}, {0x14647, 0x167FF}, {0x16A39, 0x16A3F}, {0x16A5F, 0x16A5F},
|
||||
{0x16A6A, 0x16A6D}, {0x16A70, 0x16ACF}, {0x16AEE, 0x16AEF}, {0x16AF6, 0x16AFF}, {0x16B46, 0x16B4F}, {0x16B5A, 0x16B5A}, {0x16B62, 0x16B62}, {0x16B78, 0x16B7C}, {0x16B90, 0x16E3F}, {0x16E9B, 0x16EFF},
|
||||
{0x16F4B, 0x16F4E}, {0x16F88, 0x16F8E}, {0x16FA0, 0x16FDF}, {0x16FE5, 0x16FEF}, {0x16FF2, 0x16FFF}, {0x187F8, 0x187FF}, {0x18CD6, 0x18CFF}, {0x18D09, 0x1AFFF}, {0x1B11F, 0x1B14F}, {0x1B153, 0x1B163},
|
||||
{0x1B168, 0x1B16F}, {0x1B2FC, 0x1BBFF}, {0x1BC6B, 0x1BC6F}, {0x1BC7D, 0x1BC7F}, {0x1BC89, 0x1BC8F}, {0x1BC9A, 0x1BC9B}, {0x1BCA0, 0x1CFFF}, {0x1D0F6, 0x1D0FF}, {0x1D127, 0x1D128}, {0x1D173, 0x1D17A},
|
||||
{0x1D1E9, 0x1D1FF}, {0x1D246, 0x1D2DF}, {0x1D2F4, 0x1D2FF}, {0x1D357, 0x1D35F}, {0x1D379, 0x1D3FF}, {0x1D455, 0x1D455}, {0x1D49D, 0x1D49D}, {0x1D4A0, 0x1D4A1}, {0x1D4A3, 0x1D4A4}, {0x1D4A7, 0x1D4A8},
|
||||
{0x1D4AD, 0x1D4AD}, {0x1D4BA, 0x1D4BA}, {0x1D4BC, 0x1D4BC}, {0x1D4C4, 0x1D4C4}, {0x1D506, 0x1D506}, {0x1D50B, 0x1D50C}, {0x1D515, 0x1D515}, {0x1D51D, 0x1D51D}, {0x1D53A, 0x1D53A}, {0x1D53F, 0x1D53F},
|
||||
{0x1D545, 0x1D545}, {0x1D547, 0x1D549}, {0x1D551, 0x1D551}, {0x1D6A6, 0x1D6A7}, {0x1D7CC, 0x1D7CD}, {0x1DA8C, 0x1DA9A}, {0x1DAA0, 0x1DAA0}, {0x1DAB0, 0x1DFFF}, {0x1E007, 0x1E007}, {0x1E019, 0x1E01A},
|
||||
{0x1E022, 0x1E022}, {0x1E025, 0x1E025}, {0x1E02B, 0x1E0FF}, {0x1E12D, 0x1E12F}, {0x1E13E, 0x1E13F}, {0x1E14A, 0x1E14D}, {0x1E150, 0x1E2BF}, {0x1E2FA, 0x1E2FE}, {0x1E300, 0x1E7FF}, {0x1E8C5, 0x1E8C6},
|
||||
{0x1E8D7, 0x1E8FF}, {0x1E94C, 0x1E94F}, {0x1E95A, 0x1E95D}, {0x1E960, 0x1EC70}, {0x1ECB5, 0x1ED00}, {0x1ED3E, 0x1EDFF}, {0x1EE04, 0x1EE04}, {0x1EE20, 0x1EE20}, {0x1EE23, 0x1EE23}, {0x1EE25, 0x1EE26},
|
||||
{0x1EE28, 0x1EE28}, {0x1EE33, 0x1EE33}, {0x1EE38, 0x1EE38}, {0x1EE3A, 0x1EE3A}, {0x1EE3C, 0x1EE41}, {0x1EE43, 0x1EE46}, {0x1EE48, 0x1EE48}, {0x1EE4A, 0x1EE4A}, {0x1EE4C, 0x1EE4C}, {0x1EE50, 0x1EE50},
|
||||
{0x1EE53, 0x1EE53}, {0x1EE55, 0x1EE56}, {0x1EE58, 0x1EE58}, {0x1EE5A, 0x1EE5A}, {0x1EE5C, 0x1EE5C}, {0x1EE5E, 0x1EE5E}, {0x1EE60, 0x1EE60}, {0x1EE63, 0x1EE63}, {0x1EE65, 0x1EE66}, {0x1EE6B, 0x1EE6B},
|
||||
{0x1EE73, 0x1EE73}, {0x1EE78, 0x1EE78}, {0x1EE7D, 0x1EE7D}, {0x1EE7F, 0x1EE7F}, {0x1EE8A, 0x1EE8A}, {0x1EE9C, 0x1EEA0}, {0x1EEA4, 0x1EEA4}, {0x1EEAA, 0x1EEAA}, {0x1EEBC, 0x1EEEF}, {0x1EEF2, 0x1EFFF},
|
||||
{0x1F02C, 0x1F02F}, {0x1F094, 0x1F09F}, {0x1F0AF, 0x1F0B0}, {0x1F0C0, 0x1F0C0}, {0x1F0D0, 0x1F0D0}, {0x1F0F6, 0x1F0FF}, {0x1F1AE, 0x1F1E5}, {0x1F203, 0x1F20F}, {0x1F23C, 0x1F23F}, {0x1F249, 0x1F24F},
|
||||
{0x1F252, 0x1F25F}, {0x1F266, 0x1F2FF}, {0x1F6D8, 0x1F6DF}, {0x1F6ED, 0x1F6EF}, {0x1F6FD, 0x1F6FF}, {0x1F774, 0x1F77F}, {0x1F7D9, 0x1F7DF}, {0x1F7EC, 0x1F7FF}, {0x1F80C, 0x1F80F}, {0x1F848, 0x1F84F},
|
||||
{0x1F85A, 0x1F85F}, {0x1F888, 0x1F88F}, {0x1F8AE, 0x1F8AF}, {0x1F8B2, 0x1F8FF}, {0x1F979, 0x1F979}, {0x1F9CC, 0x1F9CC}, {0x1FA54, 0x1FA5F}, {0x1FA6E, 0x1FA6F}, {0x1FA75, 0x1FA77}, {0x1FA7B, 0x1FA7F},
|
||||
{0x1FA87, 0x1FA8F}, {0x1FAA9, 0x1FAAF}, {0x1FAB7, 0x1FABF}, {0x1FAC3, 0x1FACF}, {0x1FAD7, 0x1FAFF}, {0x1FB93, 0x1FB93}, {0x1FBCB, 0x1FBEF}, {0x1FBFA, 0x1FFFF}, {0x2A6DE, 0x2A6FF}, {0x2B735, 0x2B73F},
|
||||
{0x2B81E, 0x2B81F}, {0x2CEA2, 0x2CEAF}, {0x2EBE1, 0x2F7FF}, {0x2FA1E, 0x2FFFF}, {0x3134B, 0xE00FF}, {0xE01F0, 0x10FFFF},
|
||||
};
|
||||
|
||||
static const std::multimap<uint32_t, uint32_t> nfd_map = {
|
||||
{0xC0, 0x41}, {0xC0, 0x300}, {0xC1, 0x41}, {0xC1, 0x301}, {0xC2, 0x41}, {0xC2, 0x302}, {0xC3, 0x41}, {0xC3, 0x303}, {0xC4, 0x41}, {0xC4, 0x308}, {0xC5, 0x41}, {0xC5, 0x30A}, {0xC7, 0x43},
|
||||
{0xC7, 0x327}, {0xC8, 0x45}, {0xC8, 0x300}, {0xC9, 0x45}, {0xC9, 0x301}, {0xCA, 0x45}, {0xCA, 0x302}, {0xCB, 0x45}, {0xCB, 0x308}, {0xCC, 0x49}, {0xCC, 0x300}, {0xCD, 0x49}, {0xCD, 0x301},
|
||||
{0xCE, 0x49}, {0xCE, 0x302}, {0xCF, 0x49}, {0xCF, 0x308}, {0xD1, 0x4E}, {0xD1, 0x303}, {0xD2, 0x4F}, {0xD2, 0x300}, {0xD3, 0x4F}, {0xD3, 0x301}, {0xD4, 0x4F}, {0xD4, 0x302}, {0xD5, 0x4F},
|
||||
{0xD5, 0x303}, {0xD6, 0x4F}, {0xD6, 0x308}, {0xD9, 0x55}, {0xD9, 0x300}, {0xDA, 0x55}, {0xDA, 0x301}, {0xDB, 0x55}, {0xDB, 0x302}, {0xDC, 0x55}, {0xDC, 0x308}, {0xDD, 0x59}, {0xDD, 0x301},
|
||||
{0xE0, 0x61}, {0xE0, 0x300}, {0xE1, 0x61}, {0xE1, 0x301}, {0xE2, 0x61}, {0xE2, 0x302}, {0xE3, 0x61}, {0xE3, 0x303}, {0xE4, 0x61}, {0xE4, 0x308}, {0xE5, 0x61}, {0xE5, 0x30A}, {0xE7, 0x63},
|
||||
{0xE7, 0x327}, {0xE8, 0x65}, {0xE8, 0x300}, {0xE9, 0x65}, {0xE9, 0x301}, {0xEA, 0x65}, {0xEA, 0x302}, {0xEB, 0x65}, {0xEB, 0x308}, {0xEC, 0x69}, {0xEC, 0x300}, {0xED, 0x69}, {0xED, 0x301},
|
||||
{0xEE, 0x69}, {0xEE, 0x302}, {0xEF, 0x69}, {0xEF, 0x308}, {0xF1, 0x6E}, {0xF1, 0x303}, {0xF2, 0x6F}, {0xF2, 0x300}, {0xF3, 0x6F}, {0xF3, 0x301}, {0xF4, 0x6F}, {0xF4, 0x302}, {0xF5, 0x6F},
|
||||
{0xF5, 0x303}, {0xF6, 0x6F}, {0xF6, 0x308}, {0xF9, 0x75}, {0xF9, 0x300}, {0xFA, 0x75}, {0xFA, 0x301}, {0xFB, 0x75}, {0xFB, 0x302}, {0xFC, 0x75}, {0xFC, 0x308}, {0xFD, 0x79}, {0xFD, 0x301},
|
||||
{0xFF, 0x79}, {0xFF, 0x308}, {0x100, 0x41}, {0x100, 0x304}, {0x101, 0x61}, {0x101, 0x304}, {0x102, 0x41}, {0x102, 0x306}, {0x103, 0x61}, {0x103, 0x306}, {0x104, 0x41}, {0x104, 0x328}, {0x105, 0x61},
|
||||
{0x105, 0x328}, {0x106, 0x43}, {0x106, 0x301}, {0x107, 0x63}, {0x107, 0x301}, {0x108, 0x43}, {0x108, 0x302}, {0x109, 0x63}, {0x109, 0x302}, {0x10A, 0x43}, {0x10A, 0x307}, {0x10B, 0x63},
|
||||
{0x10B, 0x307}, {0x10C, 0x43}, {0x10C, 0x30C}, {0x10D, 0x63}, {0x10D, 0x30C}, {0x10E, 0x44}, {0x10E, 0x30C}, {0x10F, 0x64}, {0x10F, 0x30C}, {0x112, 0x45}, {0x112, 0x304}, {0x113, 0x65},
|
||||
{0x113, 0x304}, {0x114, 0x45}, {0x114, 0x306}, {0x115, 0x65}, {0x115, 0x306}, {0x116, 0x45}, {0x116, 0x307}, {0x117, 0x65}, {0x117, 0x307}, {0x118, 0x45}, {0x118, 0x328}, {0x119, 0x65},
|
||||
{0x119, 0x328}, {0x11A, 0x45}, {0x11A, 0x30C}, {0x11B, 0x65}, {0x11B, 0x30C}, {0x11C, 0x47}, {0x11C, 0x302}, {0x11D, 0x67}, {0x11D, 0x302}, {0x11E, 0x47}, {0x11E, 0x306}, {0x11F, 0x67},
|
||||
{0x11F, 0x306}, {0x120, 0x47}, {0x120, 0x307}, {0x121, 0x67}, {0x121, 0x307}, {0x122, 0x47}, {0x122, 0x327}, {0x123, 0x67}, {0x123, 0x327}, {0x124, 0x48}, {0x124, 0x302}, {0x125, 0x68},
|
||||
{0x125, 0x302}, {0x128, 0x49}, {0x128, 0x303}, {0x129, 0x69}, {0x129, 0x303}, {0x12A, 0x49}, {0x12A, 0x304}, {0x12B, 0x69}, {0x12B, 0x304}, {0x12C, 0x49}, {0x12C, 0x306}, {0x12D, 0x69},
|
||||
{0x12D, 0x306}, {0x12E, 0x49}, {0x12E, 0x328}, {0x12F, 0x69}, {0x12F, 0x328}, {0x130, 0x49}, {0x130, 0x307}, {0x134, 0x4A}, {0x134, 0x302}, {0x135, 0x6A}, {0x135, 0x302}, {0x136, 0x4B},
|
||||
{0x136, 0x327}, {0x137, 0x6B}, {0x137, 0x327}, {0x139, 0x4C}, {0x139, 0x301}, {0x13A, 0x6C}, {0x13A, 0x301}, {0x13B, 0x4C}, {0x13B, 0x327}, {0x13C, 0x6C}, {0x13C, 0x327}, {0x13D, 0x4C},
|
||||
{0x13D, 0x30C}, {0x13E, 0x6C}, {0x13E, 0x30C}, {0x143, 0x4E}, {0x143, 0x301}, {0x144, 0x6E}, {0x144, 0x301}, {0x145, 0x4E}, {0x145, 0x327}, {0x146, 0x6E}, {0x146, 0x327}, {0x147, 0x4E},
|
||||
{0x147, 0x30C}, {0x148, 0x6E}, {0x148, 0x30C}, {0x14C, 0x4F}, {0x14C, 0x304}, {0x14D, 0x6F}, {0x14D, 0x304}, {0x14E, 0x4F}, {0x14E, 0x306}, {0x14F, 0x6F}, {0x14F, 0x306}, {0x150, 0x4F},
|
||||
{0x150, 0x30B}, {0x151, 0x6F}, {0x151, 0x30B}, {0x154, 0x52}, {0x154, 0x301}, {0x155, 0x72}, {0x155, 0x301}, {0x156, 0x52}, {0x156, 0x327}, {0x157, 0x72}, {0x157, 0x327}, {0x158, 0x52},
|
||||
{0x158, 0x30C}, {0x159, 0x72}, {0x159, 0x30C}, {0x15A, 0x53}, {0x15A, 0x301}, {0x15B, 0x73}, {0x15B, 0x301}, {0x15C, 0x53}, {0x15C, 0x302}, {0x15D, 0x73}, {0x15D, 0x302}, {0x15E, 0x53},
|
||||
{0x15E, 0x327}, {0x15F, 0x73}, {0x15F, 0x327}, {0x160, 0x53}, {0x160, 0x30C}, {0x161, 0x73}, {0x161, 0x30C}, {0x162, 0x54}, {0x162, 0x327}, {0x163, 0x74}, {0x163, 0x327}, {0x164, 0x54},
|
||||
{0x164, 0x30C}, {0x165, 0x74}, {0x165, 0x30C}, {0x168, 0x55}, {0x168, 0x303}, {0x169, 0x75}, {0x169, 0x303}, {0x16A, 0x55}, {0x16A, 0x304}, {0x16B, 0x75}, {0x16B, 0x304}, {0x16C, 0x55},
|
||||
{0x16C, 0x306}, {0x16D, 0x75}, {0x16D, 0x306}, {0x16E, 0x55}, {0x16E, 0x30A}, {0x16F, 0x75}, {0x16F, 0x30A}, {0x170, 0x55}, {0x170, 0x30B}, {0x171, 0x75}, {0x171, 0x30B}, {0x172, 0x55},
|
||||
{0x172, 0x328}, {0x173, 0x75}, {0x173, 0x328}, {0x174, 0x57}, {0x174, 0x302}, {0x175, 0x77}, {0x175, 0x302}, {0x176, 0x59}, {0x176, 0x302}, {0x177, 0x79}, {0x177, 0x302}, {0x178, 0x59},
|
||||
{0x178, 0x308}, {0x179, 0x5A}, {0x179, 0x301}, {0x17A, 0x7A}, {0x17A, 0x301}, {0x17B, 0x5A}, {0x17B, 0x307}, {0x17C, 0x7A}, {0x17C, 0x307}, {0x17D, 0x5A}, {0x17D, 0x30C}, {0x17E, 0x7A},
|
||||
{0x17E, 0x30C}, {0x1A0, 0x4F}, {0x1A0, 0x31B}, {0x1A1, 0x6F}, {0x1A1, 0x31B}, {0x1AF, 0x55}, {0x1AF, 0x31B}, {0x1B0, 0x75}, {0x1B0, 0x31B}, {0x1CD, 0x41}, {0x1CD, 0x30C}, {0x1CE, 0x61},
|
||||
{0x1CE, 0x30C}, {0x1CF, 0x49}, {0x1CF, 0x30C}, {0x1D0, 0x69}, {0x1D0, 0x30C}, {0x1D1, 0x4F}, {0x1D1, 0x30C}, {0x1D2, 0x6F}, {0x1D2, 0x30C}, {0x1D3, 0x55}, {0x1D3, 0x30C}, {0x1D4, 0x75},
|
||||
{0x1D4, 0x30C}, {0x1D5, 0x55}, {0x1D5, 0x308}, {0x1D5, 0x304}, {0x1D6, 0x75}, {0x1D6, 0x308}, {0x1D6, 0x304}, {0x1D7, 0x55}, {0x1D7, 0x308}, {0x1D7, 0x301}, {0x1D8, 0x75}, {0x1D8, 0x308},
|
||||
{0x1D8, 0x301}, {0x1D9, 0x55}, {0x1D9, 0x308}, {0x1D9, 0x30C}, {0x1DA, 0x75}, {0x1DA, 0x308}, {0x1DA, 0x30C}, {0x1DB, 0x55}, {0x1DB, 0x308}, {0x1DB, 0x300}, {0x1DC, 0x75}, {0x1DC, 0x308},
|
||||
{0x1DC, 0x300}, {0x1DE, 0x41}, {0x1DE, 0x308}, {0x1DE, 0x304}, {0x1DF, 0x61}, {0x1DF, 0x308}, {0x1DF, 0x304}, {0x1E0, 0x41}, {0x1E0, 0x307}, {0x1E0, 0x304}, {0x1E1, 0x61}, {0x1E1, 0x307},
|
||||
{0x1E1, 0x304}, {0x1E2, 0xC6}, {0x1E2, 0x304}, {0x1E3, 0xE6}, {0x1E3, 0x304}, {0x1E6, 0x47}, {0x1E6, 0x30C}, {0x1E7, 0x67}, {0x1E7, 0x30C}, {0x1E8, 0x4B}, {0x1E8, 0x30C}, {0x1E9, 0x6B},
|
||||
{0x1E9, 0x30C}, {0x1EA, 0x4F}, {0x1EA, 0x328}, {0x1EB, 0x6F}, {0x1EB, 0x328}, {0x1EC, 0x4F}, {0x1EC, 0x328}, {0x1EC, 0x304}, {0x1ED, 0x6F}, {0x1ED, 0x328}, {0x1ED, 0x304}, {0x1EE, 0x1B7},
|
||||
{0x1EE, 0x30C}, {0x1EF, 0x292}, {0x1EF, 0x30C}, {0x1F0, 0x6A}, {0x1F0, 0x30C}, {0x1F4, 0x47}, {0x1F4, 0x301}, {0x1F5, 0x67}, {0x1F5, 0x301}, {0x1F8, 0x4E}, {0x1F8, 0x300}, {0x1F9, 0x6E},
|
||||
{0x1F9, 0x300}, {0x1FA, 0x41}, {0x1FA, 0x30A}, {0x1FA, 0x301}, {0x1FB, 0x61}, {0x1FB, 0x30A}, {0x1FB, 0x301}, {0x1FC, 0xC6}, {0x1FC, 0x301}, {0x1FD, 0xE6}, {0x1FD, 0x301}, {0x1FE, 0xD8},
|
||||
{0x1FE, 0x301}, {0x1FF, 0xF8}, {0x1FF, 0x301}, {0x200, 0x41}, {0x200, 0x30F}, {0x201, 0x61}, {0x201, 0x30F}, {0x202, 0x41}, {0x202, 0x311}, {0x203, 0x61}, {0x203, 0x311}, {0x204, 0x45},
|
||||
{0x204, 0x30F}, {0x205, 0x65}, {0x205, 0x30F}, {0x206, 0x45}, {0x206, 0x311}, {0x207, 0x65}, {0x207, 0x311}, {0x208, 0x49}, {0x208, 0x30F}, {0x209, 0x69}, {0x209, 0x30F}, {0x20A, 0x49},
|
||||
{0x20A, 0x311}, {0x20B, 0x69}, {0x20B, 0x311}, {0x20C, 0x4F}, {0x20C, 0x30F}, {0x20D, 0x6F}, {0x20D, 0x30F}, {0x20E, 0x4F}, {0x20E, 0x311}, {0x20F, 0x6F}, {0x20F, 0x311}, {0x210, 0x52},
|
||||
{0x210, 0x30F}, {0x211, 0x72}, {0x211, 0x30F}, {0x212, 0x52}, {0x212, 0x311}, {0x213, 0x72}, {0x213, 0x311}, {0x214, 0x55}, {0x214, 0x30F}, {0x215, 0x75}, {0x215, 0x30F}, {0x216, 0x55},
|
||||
{0x216, 0x311}, {0x217, 0x75}, {0x217, 0x311}, {0x218, 0x53}, {0x218, 0x326}, {0x219, 0x73}, {0x219, 0x326}, {0x21A, 0x54}, {0x21A, 0x326}, {0x21B, 0x74}, {0x21B, 0x326}, {0x21E, 0x48},
|
||||
{0x21E, 0x30C}, {0x21F, 0x68}, {0x21F, 0x30C}, {0x226, 0x41}, {0x226, 0x307}, {0x227, 0x61}, {0x227, 0x307}, {0x228, 0x45}, {0x228, 0x327}, {0x229, 0x65}, {0x229, 0x327}, {0x22A, 0x4F},
|
||||
{0x22A, 0x308}, {0x22A, 0x304}, {0x22B, 0x6F}, {0x22B, 0x308}, {0x22B, 0x304}, {0x22C, 0x4F}, {0x22C, 0x303}, {0x22C, 0x304}, {0x22D, 0x6F}, {0x22D, 0x303}, {0x22D, 0x304}, {0x22E, 0x4F},
|
||||
{0x22E, 0x307}, {0x22F, 0x6F}, {0x22F, 0x307}, {0x230, 0x4F}, {0x230, 0x307}, {0x230, 0x304}, {0x231, 0x6F}, {0x231, 0x307}, {0x231, 0x304}, {0x232, 0x59}, {0x232, 0x304}, {0x233, 0x79},
|
||||
{0x233, 0x304}, {0x340, 0x300}, {0x341, 0x301}, {0x343, 0x313}, {0x344, 0x308}, {0x344, 0x301}, {0x374, 0x2B9}, {0x37E, 0x3B}, {0x385, 0xA8}, {0x385, 0x301}, {0x386, 0x391}, {0x386, 0x301},
|
||||
{0x387, 0xB7}, {0x388, 0x395}, {0x388, 0x301}, {0x389, 0x397}, {0x389, 0x301}, {0x38A, 0x399}, {0x38A, 0x301}, {0x38C, 0x39F}, {0x38C, 0x301}, {0x38E, 0x3A5}, {0x38E, 0x301}, {0x38F, 0x3A9},
|
||||
{0x38F, 0x301}, {0x390, 0x3B9}, {0x390, 0x308}, {0x390, 0x301}, {0x3AA, 0x399}, {0x3AA, 0x308}, {0x3AB, 0x3A5}, {0x3AB, 0x308}, {0x3AC, 0x3B1}, {0x3AC, 0x301}, {0x3AD, 0x3B5}, {0x3AD, 0x301},
|
||||
{0x3AE, 0x3B7}, {0x3AE, 0x301}, {0x3AF, 0x3B9}, {0x3AF, 0x301}, {0x3B0, 0x3C5}, {0x3B0, 0x308}, {0x3B0, 0x301}, {0x3CA, 0x3B9}, {0x3CA, 0x308}, {0x3CB, 0x3C5}, {0x3CB, 0x308}, {0x3CC, 0x3BF},
|
||||
{0x3CC, 0x301}, {0x3CD, 0x3C5}, {0x3CD, 0x301}, {0x3CE, 0x3C9}, {0x3CE, 0x301}, {0x3D3, 0x3D2}, {0x3D3, 0x301}, {0x3D4, 0x3D2}, {0x3D4, 0x308}, {0x400, 0x415}, {0x400, 0x300}, {0x401, 0x415},
|
||||
{0x401, 0x308}, {0x403, 0x413}, {0x403, 0x301}, {0x407, 0x406}, {0x407, 0x308}, {0x40C, 0x41A}, {0x40C, 0x301}, {0x40D, 0x418}, {0x40D, 0x300}, {0x40E, 0x423}, {0x40E, 0x306}, {0x419, 0x418},
|
||||
{0x419, 0x306}, {0x439, 0x438}, {0x439, 0x306}, {0x450, 0x435}, {0x450, 0x300}, {0x451, 0x435}, {0x451, 0x308}, {0x453, 0x433}, {0x453, 0x301}, {0x457, 0x456}, {0x457, 0x308}, {0x45C, 0x43A},
|
||||
{0x45C, 0x301}, {0x45D, 0x438}, {0x45D, 0x300}, {0x45E, 0x443}, {0x45E, 0x306}, {0x476, 0x474}, {0x476, 0x30F}, {0x477, 0x475}, {0x477, 0x30F}, {0x4C1, 0x416}, {0x4C1, 0x306}, {0x4C2, 0x436},
|
||||
{0x4C2, 0x306}, {0x4D0, 0x410}, {0x4D0, 0x306}, {0x4D1, 0x430}, {0x4D1, 0x306}, {0x4D2, 0x410}, {0x4D2, 0x308}, {0x4D3, 0x430}, {0x4D3, 0x308}, {0x4D6, 0x415}, {0x4D6, 0x306}, {0x4D7, 0x435},
|
||||
{0x4D7, 0x306}, {0x4DA, 0x4D8}, {0x4DA, 0x308}, {0x4DB, 0x4D9}, {0x4DB, 0x308}, {0x4DC, 0x416}, {0x4DC, 0x308}, {0x4DD, 0x436}, {0x4DD, 0x308}, {0x4DE, 0x417}, {0x4DE, 0x308}, {0x4DF, 0x437},
|
||||
{0x4DF, 0x308}, {0x4E2, 0x418}, {0x4E2, 0x304}, {0x4E3, 0x438}, {0x4E3, 0x304}, {0x4E4, 0x418}, {0x4E4, 0x308}, {0x4E5, 0x438}, {0x4E5, 0x308}, {0x4E6, 0x41E}, {0x4E6, 0x308}, {0x4E7, 0x43E},
|
||||
{0x4E7, 0x308}, {0x4EA, 0x4E8}, {0x4EA, 0x308}, {0x4EB, 0x4E9}, {0x4EB, 0x308}, {0x4EC, 0x42D}, {0x4EC, 0x308}, {0x4ED, 0x44D}, {0x4ED, 0x308}, {0x4EE, 0x423}, {0x4EE, 0x304}, {0x4EF, 0x443},
|
||||
{0x4EF, 0x304}, {0x4F0, 0x423}, {0x4F0, 0x308}, {0x4F1, 0x443}, {0x4F1, 0x308}, {0x4F2, 0x423}, {0x4F2, 0x30B}, {0x4F3, 0x443}, {0x4F3, 0x30B}, {0x4F4, 0x427}, {0x4F4, 0x308}, {0x4F5, 0x447},
|
||||
{0x4F5, 0x308}, {0x4F8, 0x42B}, {0x4F8, 0x308}, {0x4F9, 0x44B}, {0x4F9, 0x308}, {0x622, 0x627}, {0x622, 0x653}, {0x623, 0x627}, {0x623, 0x654}, {0x624, 0x648}, {0x624, 0x654}, {0x625, 0x627},
|
||||
{0x625, 0x655}, {0x626, 0x64A}, {0x626, 0x654}, {0x6C0, 0x6D5}, {0x6C0, 0x654}, {0x6C2, 0x6C1}, {0x6C2, 0x654}, {0x6D3, 0x6D2}, {0x6D3, 0x654}, {0x929, 0x928}, {0x929, 0x93C}, {0x931, 0x930},
|
||||
{0x931, 0x93C}, {0x934, 0x933}, {0x934, 0x93C}, {0x958, 0x915}, {0x958, 0x93C}, {0x959, 0x916}, {0x959, 0x93C}, {0x95A, 0x917}, {0x95A, 0x93C}, {0x95B, 0x91C}, {0x95B, 0x93C}, {0x95C, 0x921},
|
||||
{0x95C, 0x93C}, {0x95D, 0x922}, {0x95D, 0x93C}, {0x95E, 0x92B}, {0x95E, 0x93C}, {0x95F, 0x92F}, {0x95F, 0x93C}, {0x9CB, 0x9C7}, {0x9CB, 0x9BE}, {0x9CC, 0x9C7}, {0x9CC, 0x9D7}, {0x9DC, 0x9A1},
|
||||
{0x9DC, 0x9BC}, {0x9DD, 0x9A2}, {0x9DD, 0x9BC}, {0x9DF, 0x9AF}, {0x9DF, 0x9BC}, {0xA33, 0xA32}, {0xA33, 0xA3C}, {0xA36, 0xA38}, {0xA36, 0xA3C}, {0xA59, 0xA16}, {0xA59, 0xA3C}, {0xA5A, 0xA17},
|
||||
{0xA5A, 0xA3C}, {0xA5B, 0xA1C}, {0xA5B, 0xA3C}, {0xA5E, 0xA2B}, {0xA5E, 0xA3C}, {0xB48, 0xB47}, {0xB48, 0xB56}, {0xB4B, 0xB47}, {0xB4B, 0xB3E}, {0xB4C, 0xB47}, {0xB4C, 0xB57}, {0xB5C, 0xB21},
|
||||
{0xB5C, 0xB3C}, {0xB5D, 0xB22}, {0xB5D, 0xB3C}, {0xB94, 0xB92}, {0xB94, 0xBD7}, {0xBCA, 0xBC6}, {0xBCA, 0xBBE}, {0xBCB, 0xBC7}, {0xBCB, 0xBBE}, {0xBCC, 0xBC6}, {0xBCC, 0xBD7}, {0xC48, 0xC46},
|
||||
{0xC48, 0xC56}, {0xCC0, 0xCBF}, {0xCC0, 0xCD5}, {0xCC7, 0xCC6}, {0xCC7, 0xCD5}, {0xCC8, 0xCC6}, {0xCC8, 0xCD6}, {0xCCA, 0xCC6}, {0xCCA, 0xCC2}, {0xCCB, 0xCC6}, {0xCCB, 0xCC2}, {0xCCB, 0xCD5},
|
||||
{0xD4A, 0xD46}, {0xD4A, 0xD3E}, {0xD4B, 0xD47}, {0xD4B, 0xD3E}, {0xD4C, 0xD46}, {0xD4C, 0xD57}, {0xDDA, 0xDD9}, {0xDDA, 0xDCA}, {0xDDC, 0xDD9}, {0xDDC, 0xDCF}, {0xDDD, 0xDD9}, {0xDDD, 0xDCF},
|
||||
{0xDDD, 0xDCA}, {0xDDE, 0xDD9}, {0xDDE, 0xDDF}, {0xF43, 0xF42}, {0xF43, 0xFB7}, {0xF4D, 0xF4C}, {0xF4D, 0xFB7}, {0xF52, 0xF51}, {0xF52, 0xFB7}, {0xF57, 0xF56}, {0xF57, 0xFB7}, {0xF5C, 0xF5B},
|
||||
{0xF5C, 0xFB7}, {0xF69, 0xF40}, {0xF69, 0xFB5}, {0xF73, 0xF71}, {0xF73, 0xF72}, {0xF75, 0xF71}, {0xF75, 0xF74}, {0xF76, 0xFB2}, {0xF76, 0xF80}, {0xF78, 0xFB3}, {0xF78, 0xF80}, {0xF81, 0xF71},
|
||||
{0xF81, 0xF80}, {0xF93, 0xF92}, {0xF93, 0xFB7}, {0xF9D, 0xF9C}, {0xF9D, 0xFB7}, {0xFA2, 0xFA1}, {0xFA2, 0xFB7}, {0xFA7, 0xFA6}, {0xFA7, 0xFB7}, {0xFAC, 0xFAB}, {0xFAC, 0xFB7}, {0xFB9, 0xF90},
|
||||
{0xFB9, 0xFB5}, {0x1026, 0x1025}, {0x1026, 0x102E}, {0x1B06, 0x1B05}, {0x1B06, 0x1B35}, {0x1B08, 0x1B07}, {0x1B08, 0x1B35}, {0x1B0A, 0x1B09}, {0x1B0A, 0x1B35}, {0x1B0C, 0x1B0B}, {0x1B0C, 0x1B35},
|
||||
{0x1B0E, 0x1B0D}, {0x1B0E, 0x1B35}, {0x1B12, 0x1B11}, {0x1B12, 0x1B35}, {0x1B3B, 0x1B3A}, {0x1B3B, 0x1B35}, {0x1B3D, 0x1B3C}, {0x1B3D, 0x1B35}, {0x1B40, 0x1B3E}, {0x1B40, 0x1B35}, {0x1B41, 0x1B3F},
|
||||
{0x1B41, 0x1B35}, {0x1B43, 0x1B42}, {0x1B43, 0x1B35}, {0x1E00, 0x41}, {0x1E00, 0x325}, {0x1E01, 0x61}, {0x1E01, 0x325}, {0x1E02, 0x42}, {0x1E02, 0x307}, {0x1E03, 0x62}, {0x1E03, 0x307},
|
||||
{0x1E04, 0x42}, {0x1E04, 0x323}, {0x1E05, 0x62}, {0x1E05, 0x323}, {0x1E06, 0x42}, {0x1E06, 0x331}, {0x1E07, 0x62}, {0x1E07, 0x331}, {0x1E08, 0x43}, {0x1E08, 0x327}, {0x1E08, 0x301}, {0x1E09, 0x63},
|
||||
{0x1E09, 0x327}, {0x1E09, 0x301}, {0x1E0A, 0x44}, {0x1E0A, 0x307}, {0x1E0B, 0x64}, {0x1E0B, 0x307}, {0x1E0C, 0x44}, {0x1E0C, 0x323}, {0x1E0D, 0x64}, {0x1E0D, 0x323}, {0x1E0E, 0x44}, {0x1E0E, 0x331},
|
||||
{0x1E0F, 0x64}, {0x1E0F, 0x331}, {0x1E10, 0x44}, {0x1E10, 0x327}, {0x1E11, 0x64}, {0x1E11, 0x327}, {0x1E12, 0x44}, {0x1E12, 0x32D}, {0x1E13, 0x64}, {0x1E13, 0x32D}, {0x1E14, 0x45}, {0x1E14, 0x304},
|
||||
{0x1E14, 0x300}, {0x1E15, 0x65}, {0x1E15, 0x304}, {0x1E15, 0x300}, {0x1E16, 0x45}, {0x1E16, 0x304}, {0x1E16, 0x301}, {0x1E17, 0x65}, {0x1E17, 0x304}, {0x1E17, 0x301}, {0x1E18, 0x45}, {0x1E18, 0x32D},
|
||||
{0x1E19, 0x65}, {0x1E19, 0x32D}, {0x1E1A, 0x45}, {0x1E1A, 0x330}, {0x1E1B, 0x65}, {0x1E1B, 0x330}, {0x1E1C, 0x45}, {0x1E1C, 0x327}, {0x1E1C, 0x306}, {0x1E1D, 0x65}, {0x1E1D, 0x327}, {0x1E1D, 0x306},
|
||||
{0x1E1E, 0x46}, {0x1E1E, 0x307}, {0x1E1F, 0x66}, {0x1E1F, 0x307}, {0x1E20, 0x47}, {0x1E20, 0x304}, {0x1E21, 0x67}, {0x1E21, 0x304}, {0x1E22, 0x48}, {0x1E22, 0x307}, {0x1E23, 0x68}, {0x1E23, 0x307},
|
||||
{0x1E24, 0x48}, {0x1E24, 0x323}, {0x1E25, 0x68}, {0x1E25, 0x323}, {0x1E26, 0x48}, {0x1E26, 0x308}, {0x1E27, 0x68}, {0x1E27, 0x308}, {0x1E28, 0x48}, {0x1E28, 0x327}, {0x1E29, 0x68}, {0x1E29, 0x327},
|
||||
{0x1E2A, 0x48}, {0x1E2A, 0x32E}, {0x1E2B, 0x68}, {0x1E2B, 0x32E}, {0x1E2C, 0x49}, {0x1E2C, 0x330}, {0x1E2D, 0x69}, {0x1E2D, 0x330}, {0x1E2E, 0x49}, {0x1E2E, 0x308}, {0x1E2E, 0x301}, {0x1E2F, 0x69},
|
||||
{0x1E2F, 0x308}, {0x1E2F, 0x301}, {0x1E30, 0x4B}, {0x1E30, 0x301}, {0x1E31, 0x6B}, {0x1E31, 0x301}, {0x1E32, 0x4B}, {0x1E32, 0x323}, {0x1E33, 0x6B}, {0x1E33, 0x323}, {0x1E34, 0x4B}, {0x1E34, 0x331},
|
||||
{0x1E35, 0x6B}, {0x1E35, 0x331}, {0x1E36, 0x4C}, {0x1E36, 0x323}, {0x1E37, 0x6C}, {0x1E37, 0x323}, {0x1E38, 0x4C}, {0x1E38, 0x323}, {0x1E38, 0x304}, {0x1E39, 0x6C}, {0x1E39, 0x323}, {0x1E39, 0x304},
|
||||
{0x1E3A, 0x4C}, {0x1E3A, 0x331}, {0x1E3B, 0x6C}, {0x1E3B, 0x331}, {0x1E3C, 0x4C}, {0x1E3C, 0x32D}, {0x1E3D, 0x6C}, {0x1E3D, 0x32D}, {0x1E3E, 0x4D}, {0x1E3E, 0x301}, {0x1E3F, 0x6D}, {0x1E3F, 0x301},
|
||||
{0x1E40, 0x4D}, {0x1E40, 0x307}, {0x1E41, 0x6D}, {0x1E41, 0x307}, {0x1E42, 0x4D}, {0x1E42, 0x323}, {0x1E43, 0x6D}, {0x1E43, 0x323}, {0x1E44, 0x4E}, {0x1E44, 0x307}, {0x1E45, 0x6E}, {0x1E45, 0x307},
|
||||
{0x1E46, 0x4E}, {0x1E46, 0x323}, {0x1E47, 0x6E}, {0x1E47, 0x323}, {0x1E48, 0x4E}, {0x1E48, 0x331}, {0x1E49, 0x6E}, {0x1E49, 0x331}, {0x1E4A, 0x4E}, {0x1E4A, 0x32D}, {0x1E4B, 0x6E}, {0x1E4B, 0x32D},
|
||||
{0x1E4C, 0x4F}, {0x1E4C, 0x303}, {0x1E4C, 0x301}, {0x1E4D, 0x6F}, {0x1E4D, 0x303}, {0x1E4D, 0x301}, {0x1E4E, 0x4F}, {0x1E4E, 0x303}, {0x1E4E, 0x308}, {0x1E4F, 0x6F}, {0x1E4F, 0x303}, {0x1E4F, 0x308},
|
||||
{0x1E50, 0x4F}, {0x1E50, 0x304}, {0x1E50, 0x300}, {0x1E51, 0x6F}, {0x1E51, 0x304}, {0x1E51, 0x300}, {0x1E52, 0x4F}, {0x1E52, 0x304}, {0x1E52, 0x301}, {0x1E53, 0x6F}, {0x1E53, 0x304}, {0x1E53, 0x301},
|
||||
{0x1E54, 0x50}, {0x1E54, 0x301}, {0x1E55, 0x70}, {0x1E55, 0x301}, {0x1E56, 0x50}, {0x1E56, 0x307}, {0x1E57, 0x70}, {0x1E57, 0x307}, {0x1E58, 0x52}, {0x1E58, 0x307}, {0x1E59, 0x72}, {0x1E59, 0x307},
|
||||
{0x1E5A, 0x52}, {0x1E5A, 0x323}, {0x1E5B, 0x72}, {0x1E5B, 0x323}, {0x1E5C, 0x52}, {0x1E5C, 0x323}, {0x1E5C, 0x304}, {0x1E5D, 0x72}, {0x1E5D, 0x323}, {0x1E5D, 0x304}, {0x1E5E, 0x52}, {0x1E5E, 0x331},
|
||||
{0x1E5F, 0x72}, {0x1E5F, 0x331}, {0x1E60, 0x53}, {0x1E60, 0x307}, {0x1E61, 0x73}, {0x1E61, 0x307}, {0x1E62, 0x53}, {0x1E62, 0x323}, {0x1E63, 0x73}, {0x1E63, 0x323}, {0x1E64, 0x53}, {0x1E64, 0x301},
|
||||
{0x1E64, 0x307}, {0x1E65, 0x73}, {0x1E65, 0x301}, {0x1E65, 0x307}, {0x1E66, 0x53}, {0x1E66, 0x30C}, {0x1E66, 0x307}, {0x1E67, 0x73}, {0x1E67, 0x30C}, {0x1E67, 0x307}, {0x1E68, 0x53}, {0x1E68, 0x323},
|
||||
{0x1E68, 0x307}, {0x1E69, 0x73}, {0x1E69, 0x323}, {0x1E69, 0x307}, {0x1E6A, 0x54}, {0x1E6A, 0x307}, {0x1E6B, 0x74}, {0x1E6B, 0x307}, {0x1E6C, 0x54}, {0x1E6C, 0x323}, {0x1E6D, 0x74}, {0x1E6D, 0x323},
|
||||
{0x1E6E, 0x54}, {0x1E6E, 0x331}, {0x1E6F, 0x74}, {0x1E6F, 0x331}, {0x1E70, 0x54}, {0x1E70, 0x32D}, {0x1E71, 0x74}, {0x1E71, 0x32D}, {0x1E72, 0x55}, {0x1E72, 0x324}, {0x1E73, 0x75}, {0x1E73, 0x324},
|
||||
{0x1E74, 0x55}, {0x1E74, 0x330}, {0x1E75, 0x75}, {0x1E75, 0x330}, {0x1E76, 0x55}, {0x1E76, 0x32D}, {0x1E77, 0x75}, {0x1E77, 0x32D}, {0x1E78, 0x55}, {0x1E78, 0x303}, {0x1E78, 0x301}, {0x1E79, 0x75},
|
||||
{0x1E79, 0x303}, {0x1E79, 0x301}, {0x1E7A, 0x55}, {0x1E7A, 0x304}, {0x1E7A, 0x308}, {0x1E7B, 0x75}, {0x1E7B, 0x304}, {0x1E7B, 0x308}, {0x1E7C, 0x56}, {0x1E7C, 0x303}, {0x1E7D, 0x76}, {0x1E7D, 0x303},
|
||||
{0x1E7E, 0x56}, {0x1E7E, 0x323}, {0x1E7F, 0x76}, {0x1E7F, 0x323}, {0x1E80, 0x57}, {0x1E80, 0x300}, {0x1E81, 0x77}, {0x1E81, 0x300}, {0x1E82, 0x57}, {0x1E82, 0x301}, {0x1E83, 0x77}, {0x1E83, 0x301},
|
||||
{0x1E84, 0x57}, {0x1E84, 0x308}, {0x1E85, 0x77}, {0x1E85, 0x308}, {0x1E86, 0x57}, {0x1E86, 0x307}, {0x1E87, 0x77}, {0x1E87, 0x307}, {0x1E88, 0x57}, {0x1E88, 0x323}, {0x1E89, 0x77}, {0x1E89, 0x323},
|
||||
{0x1E8A, 0x58}, {0x1E8A, 0x307}, {0x1E8B, 0x78}, {0x1E8B, 0x307}, {0x1E8C, 0x58}, {0x1E8C, 0x308}, {0x1E8D, 0x78}, {0x1E8D, 0x308}, {0x1E8E, 0x59}, {0x1E8E, 0x307}, {0x1E8F, 0x79}, {0x1E8F, 0x307},
|
||||
{0x1E90, 0x5A}, {0x1E90, 0x302}, {0x1E91, 0x7A}, {0x1E91, 0x302}, {0x1E92, 0x5A}, {0x1E92, 0x323}, {0x1E93, 0x7A}, {0x1E93, 0x323}, {0x1E94, 0x5A}, {0x1E94, 0x331}, {0x1E95, 0x7A}, {0x1E95, 0x331},
|
||||
{0x1E96, 0x68}, {0x1E96, 0x331}, {0x1E97, 0x74}, {0x1E97, 0x308}, {0x1E98, 0x77}, {0x1E98, 0x30A}, {0x1E99, 0x79}, {0x1E99, 0x30A}, {0x1E9B, 0x17F}, {0x1E9B, 0x307}, {0x1EA0, 0x41}, {0x1EA0, 0x323},
|
||||
{0x1EA1, 0x61}, {0x1EA1, 0x323}, {0x1EA2, 0x41}, {0x1EA2, 0x309}, {0x1EA3, 0x61}, {0x1EA3, 0x309}, {0x1EA4, 0x41}, {0x1EA4, 0x302}, {0x1EA4, 0x301}, {0x1EA5, 0x61}, {0x1EA5, 0x302}, {0x1EA5, 0x301},
|
||||
{0x1EA6, 0x41}, {0x1EA6, 0x302}, {0x1EA6, 0x300}, {0x1EA7, 0x61}, {0x1EA7, 0x302}, {0x1EA7, 0x300}, {0x1EA8, 0x41}, {0x1EA8, 0x302}, {0x1EA8, 0x309}, {0x1EA9, 0x61}, {0x1EA9, 0x302}, {0x1EA9, 0x309},
|
||||
{0x1EAA, 0x41}, {0x1EAA, 0x302}, {0x1EAA, 0x303}, {0x1EAB, 0x61}, {0x1EAB, 0x302}, {0x1EAB, 0x303}, {0x1EAC, 0x41}, {0x1EAC, 0x323}, {0x1EAC, 0x302}, {0x1EAD, 0x61}, {0x1EAD, 0x323}, {0x1EAD, 0x302},
|
||||
{0x1EAE, 0x41}, {0x1EAE, 0x306}, {0x1EAE, 0x301}, {0x1EAF, 0x61}, {0x1EAF, 0x306}, {0x1EAF, 0x301}, {0x1EB0, 0x41}, {0x1EB0, 0x306}, {0x1EB0, 0x300}, {0x1EB1, 0x61}, {0x1EB1, 0x306}, {0x1EB1, 0x300},
|
||||
{0x1EB2, 0x41}, {0x1EB2, 0x306}, {0x1EB2, 0x309}, {0x1EB3, 0x61}, {0x1EB3, 0x306}, {0x1EB3, 0x309}, {0x1EB4, 0x41}, {0x1EB4, 0x306}, {0x1EB4, 0x303}, {0x1EB5, 0x61}, {0x1EB5, 0x306}, {0x1EB5, 0x303},
|
||||
{0x1EB6, 0x41}, {0x1EB6, 0x323}, {0x1EB6, 0x306}, {0x1EB7, 0x61}, {0x1EB7, 0x323}, {0x1EB7, 0x306}, {0x1EB8, 0x45}, {0x1EB8, 0x323}, {0x1EB9, 0x65}, {0x1EB9, 0x323}, {0x1EBA, 0x45}, {0x1EBA, 0x309},
|
||||
{0x1EBB, 0x65}, {0x1EBB, 0x309}, {0x1EBC, 0x45}, {0x1EBC, 0x303}, {0x1EBD, 0x65}, {0x1EBD, 0x303}, {0x1EBE, 0x45}, {0x1EBE, 0x302}, {0x1EBE, 0x301}, {0x1EBF, 0x65}, {0x1EBF, 0x302}, {0x1EBF, 0x301},
|
||||
{0x1EC0, 0x45}, {0x1EC0, 0x302}, {0x1EC0, 0x300}, {0x1EC1, 0x65}, {0x1EC1, 0x302}, {0x1EC1, 0x300}, {0x1EC2, 0x45}, {0x1EC2, 0x302}, {0x1EC2, 0x309}, {0x1EC3, 0x65}, {0x1EC3, 0x302}, {0x1EC3, 0x309},
|
||||
{0x1EC4, 0x45}, {0x1EC4, 0x302}, {0x1EC4, 0x303}, {0x1EC5, 0x65}, {0x1EC5, 0x302}, {0x1EC5, 0x303}, {0x1EC6, 0x45}, {0x1EC6, 0x323}, {0x1EC6, 0x302}, {0x1EC7, 0x65}, {0x1EC7, 0x323}, {0x1EC7, 0x302},
|
||||
{0x1EC8, 0x49}, {0x1EC8, 0x309}, {0x1EC9, 0x69}, {0x1EC9, 0x309}, {0x1ECA, 0x49}, {0x1ECA, 0x323}, {0x1ECB, 0x69}, {0x1ECB, 0x323}, {0x1ECC, 0x4F}, {0x1ECC, 0x323}, {0x1ECD, 0x6F}, {0x1ECD, 0x323},
|
||||
{0x1ECE, 0x4F}, {0x1ECE, 0x309}, {0x1ECF, 0x6F}, {0x1ECF, 0x309}, {0x1ED0, 0x4F}, {0x1ED0, 0x302}, {0x1ED0, 0x301}, {0x1ED1, 0x6F}, {0x1ED1, 0x302}, {0x1ED1, 0x301}, {0x1ED2, 0x4F}, {0x1ED2, 0x302},
|
||||
{0x1ED2, 0x300}, {0x1ED3, 0x6F}, {0x1ED3, 0x302}, {0x1ED3, 0x300}, {0x1ED4, 0x4F}, {0x1ED4, 0x302}, {0x1ED4, 0x309}, {0x1ED5, 0x6F}, {0x1ED5, 0x302}, {0x1ED5, 0x309}, {0x1ED6, 0x4F}, {0x1ED6, 0x302},
|
||||
{0x1ED6, 0x303}, {0x1ED7, 0x6F}, {0x1ED7, 0x302}, {0x1ED7, 0x303}, {0x1ED8, 0x4F}, {0x1ED8, 0x323}, {0x1ED8, 0x302}, {0x1ED9, 0x6F}, {0x1ED9, 0x323}, {0x1ED9, 0x302}, {0x1EDA, 0x4F}, {0x1EDA, 0x31B},
|
||||
{0x1EDA, 0x301}, {0x1EDB, 0x6F}, {0x1EDB, 0x31B}, {0x1EDB, 0x301}, {0x1EDC, 0x4F}, {0x1EDC, 0x31B}, {0x1EDC, 0x300}, {0x1EDD, 0x6F}, {0x1EDD, 0x31B}, {0x1EDD, 0x300}, {0x1EDE, 0x4F}, {0x1EDE, 0x31B},
|
||||
{0x1EDE, 0x309}, {0x1EDF, 0x6F}, {0x1EDF, 0x31B}, {0x1EDF, 0x309}, {0x1EE0, 0x4F}, {0x1EE0, 0x31B}, {0x1EE0, 0x303}, {0x1EE1, 0x6F}, {0x1EE1, 0x31B}, {0x1EE1, 0x303}, {0x1EE2, 0x4F}, {0x1EE2, 0x31B},
|
||||
{0x1EE2, 0x323}, {0x1EE3, 0x6F}, {0x1EE3, 0x31B}, {0x1EE3, 0x323}, {0x1EE4, 0x55}, {0x1EE4, 0x323}, {0x1EE5, 0x75}, {0x1EE5, 0x323}, {0x1EE6, 0x55}, {0x1EE6, 0x309}, {0x1EE7, 0x75}, {0x1EE7, 0x309},
|
||||
{0x1EE8, 0x55}, {0x1EE8, 0x31B}, {0x1EE8, 0x301}, {0x1EE9, 0x75}, {0x1EE9, 0x31B}, {0x1EE9, 0x301}, {0x1EEA, 0x55}, {0x1EEA, 0x31B}, {0x1EEA, 0x300}, {0x1EEB, 0x75}, {0x1EEB, 0x31B}, {0x1EEB, 0x300},
|
||||
{0x1EEC, 0x55}, {0x1EEC, 0x31B}, {0x1EEC, 0x309}, {0x1EED, 0x75}, {0x1EED, 0x31B}, {0x1EED, 0x309}, {0x1EEE, 0x55}, {0x1EEE, 0x31B}, {0x1EEE, 0x303}, {0x1EEF, 0x75}, {0x1EEF, 0x31B}, {0x1EEF, 0x303},
|
||||
{0x1EF0, 0x55}, {0x1EF0, 0x31B}, {0x1EF0, 0x323}, {0x1EF1, 0x75}, {0x1EF1, 0x31B}, {0x1EF1, 0x323}, {0x1EF2, 0x59}, {0x1EF2, 0x300}, {0x1EF3, 0x79}, {0x1EF3, 0x300}, {0x1EF4, 0x59}, {0x1EF4, 0x323},
|
||||
{0x1EF5, 0x79}, {0x1EF5, 0x323}, {0x1EF6, 0x59}, {0x1EF6, 0x309}, {0x1EF7, 0x79}, {0x1EF7, 0x309}, {0x1EF8, 0x59}, {0x1EF8, 0x303}, {0x1EF9, 0x79}, {0x1EF9, 0x303}, {0x1F00, 0x3B1}, {0x1F00, 0x313},
|
||||
{0x1F01, 0x3B1}, {0x1F01, 0x314}, {0x1F02, 0x3B1}, {0x1F02, 0x313}, {0x1F02, 0x300}, {0x1F03, 0x3B1}, {0x1F03, 0x314}, {0x1F03, 0x300}, {0x1F04, 0x3B1}, {0x1F04, 0x313}, {0x1F04, 0x301},
|
||||
{0x1F05, 0x3B1}, {0x1F05, 0x314}, {0x1F05, 0x301}, {0x1F06, 0x3B1}, {0x1F06, 0x313}, {0x1F06, 0x342}, {0x1F07, 0x3B1}, {0x1F07, 0x314}, {0x1F07, 0x342}, {0x1F08, 0x391}, {0x1F08, 0x313},
|
||||
{0x1F09, 0x391}, {0x1F09, 0x314}, {0x1F0A, 0x391}, {0x1F0A, 0x313}, {0x1F0A, 0x300}, {0x1F0B, 0x391}, {0x1F0B, 0x314}, {0x1F0B, 0x300}, {0x1F0C, 0x391}, {0x1F0C, 0x313}, {0x1F0C, 0x301},
|
||||
{0x1F0D, 0x391}, {0x1F0D, 0x314}, {0x1F0D, 0x301}, {0x1F0E, 0x391}, {0x1F0E, 0x313}, {0x1F0E, 0x342}, {0x1F0F, 0x391}, {0x1F0F, 0x314}, {0x1F0F, 0x342}, {0x1F10, 0x3B5}, {0x1F10, 0x313},
|
||||
{0x1F11, 0x3B5}, {0x1F11, 0x314}, {0x1F12, 0x3B5}, {0x1F12, 0x313}, {0x1F12, 0x300}, {0x1F13, 0x3B5}, {0x1F13, 0x314}, {0x1F13, 0x300}, {0x1F14, 0x3B5}, {0x1F14, 0x313}, {0x1F14, 0x301},
|
||||
{0x1F15, 0x3B5}, {0x1F15, 0x314}, {0x1F15, 0x301}, {0x1F18, 0x395}, {0x1F18, 0x313}, {0x1F19, 0x395}, {0x1F19, 0x314}, {0x1F1A, 0x395}, {0x1F1A, 0x313}, {0x1F1A, 0x300}, {0x1F1B, 0x395},
|
||||
{0x1F1B, 0x314}, {0x1F1B, 0x300}, {0x1F1C, 0x395}, {0x1F1C, 0x313}, {0x1F1C, 0x301}, {0x1F1D, 0x395}, {0x1F1D, 0x314}, {0x1F1D, 0x301}, {0x1F20, 0x3B7}, {0x1F20, 0x313}, {0x1F21, 0x3B7},
|
||||
{0x1F21, 0x314}, {0x1F22, 0x3B7}, {0x1F22, 0x313}, {0x1F22, 0x300}, {0x1F23, 0x3B7}, {0x1F23, 0x314}, {0x1F23, 0x300}, {0x1F24, 0x3B7}, {0x1F24, 0x313}, {0x1F24, 0x301}, {0x1F25, 0x3B7},
|
||||
{0x1F25, 0x314}, {0x1F25, 0x301}, {0x1F26, 0x3B7}, {0x1F26, 0x313}, {0x1F26, 0x342}, {0x1F27, 0x3B7}, {0x1F27, 0x314}, {0x1F27, 0x342}, {0x1F28, 0x397}, {0x1F28, 0x313}, {0x1F29, 0x397},
|
||||
{0x1F29, 0x314}, {0x1F2A, 0x397}, {0x1F2A, 0x313}, {0x1F2A, 0x300}, {0x1F2B, 0x397}, {0x1F2B, 0x314}, {0x1F2B, 0x300}, {0x1F2C, 0x397}, {0x1F2C, 0x313}, {0x1F2C, 0x301}, {0x1F2D, 0x397},
|
||||
{0x1F2D, 0x314}, {0x1F2D, 0x301}, {0x1F2E, 0x397}, {0x1F2E, 0x313}, {0x1F2E, 0x342}, {0x1F2F, 0x397}, {0x1F2F, 0x314}, {0x1F2F, 0x342}, {0x1F30, 0x3B9}, {0x1F30, 0x313}, {0x1F31, 0x3B9},
|
||||
{0x1F31, 0x314}, {0x1F32, 0x3B9}, {0x1F32, 0x313}, {0x1F32, 0x300}, {0x1F33, 0x3B9}, {0x1F33, 0x314}, {0x1F33, 0x300}, {0x1F34, 0x3B9}, {0x1F34, 0x313}, {0x1F34, 0x301}, {0x1F35, 0x3B9},
|
||||
{0x1F35, 0x314}, {0x1F35, 0x301}, {0x1F36, 0x3B9}, {0x1F36, 0x313}, {0x1F36, 0x342}, {0x1F37, 0x3B9}, {0x1F37, 0x314}, {0x1F37, 0x342}, {0x1F38, 0x399}, {0x1F38, 0x313}, {0x1F39, 0x399},
|
||||
{0x1F39, 0x314}, {0x1F3A, 0x399}, {0x1F3A, 0x313}, {0x1F3A, 0x300}, {0x1F3B, 0x399}, {0x1F3B, 0x314}, {0x1F3B, 0x300}, {0x1F3C, 0x399}, {0x1F3C, 0x313}, {0x1F3C, 0x301}, {0x1F3D, 0x399},
|
||||
{0x1F3D, 0x314}, {0x1F3D, 0x301}, {0x1F3E, 0x399}, {0x1F3E, 0x313}, {0x1F3E, 0x342}, {0x1F3F, 0x399}, {0x1F3F, 0x314}, {0x1F3F, 0x342}, {0x1F40, 0x3BF}, {0x1F40, 0x313}, {0x1F41, 0x3BF},
|
||||
{0x1F41, 0x314}, {0x1F42, 0x3BF}, {0x1F42, 0x313}, {0x1F42, 0x300}, {0x1F43, 0x3BF}, {0x1F43, 0x314}, {0x1F43, 0x300}, {0x1F44, 0x3BF}, {0x1F44, 0x313}, {0x1F44, 0x301}, {0x1F45, 0x3BF},
|
||||
{0x1F45, 0x314}, {0x1F45, 0x301}, {0x1F48, 0x39F}, {0x1F48, 0x313}, {0x1F49, 0x39F}, {0x1F49, 0x314}, {0x1F4A, 0x39F}, {0x1F4A, 0x313}, {0x1F4A, 0x300}, {0x1F4B, 0x39F}, {0x1F4B, 0x314},
|
||||
{0x1F4B, 0x300}, {0x1F4C, 0x39F}, {0x1F4C, 0x313}, {0x1F4C, 0x301}, {0x1F4D, 0x39F}, {0x1F4D, 0x314}, {0x1F4D, 0x301}, {0x1F50, 0x3C5}, {0x1F50, 0x313}, {0x1F51, 0x3C5}, {0x1F51, 0x314},
|
||||
{0x1F52, 0x3C5}, {0x1F52, 0x313}, {0x1F52, 0x300}, {0x1F53, 0x3C5}, {0x1F53, 0x314}, {0x1F53, 0x300}, {0x1F54, 0x3C5}, {0x1F54, 0x313}, {0x1F54, 0x301}, {0x1F55, 0x3C5}, {0x1F55, 0x314},
|
||||
{0x1F55, 0x301}, {0x1F56, 0x3C5}, {0x1F56, 0x313}, {0x1F56, 0x342}, {0x1F57, 0x3C5}, {0x1F57, 0x314}, {0x1F57, 0x342}, {0x1F59, 0x3A5}, {0x1F59, 0x314}, {0x1F5B, 0x3A5}, {0x1F5B, 0x314},
|
||||
{0x1F5B, 0x300}, {0x1F5D, 0x3A5}, {0x1F5D, 0x314}, {0x1F5D, 0x301}, {0x1F5F, 0x3A5}, {0x1F5F, 0x314}, {0x1F5F, 0x342}, {0x1F60, 0x3C9}, {0x1F60, 0x313}, {0x1F61, 0x3C9}, {0x1F61, 0x314},
|
||||
{0x1F62, 0x3C9}, {0x1F62, 0x313}, {0x1F62, 0x300}, {0x1F63, 0x3C9}, {0x1F63, 0x314}, {0x1F63, 0x300}, {0x1F64, 0x3C9}, {0x1F64, 0x313}, {0x1F64, 0x301}, {0x1F65, 0x3C9}, {0x1F65, 0x314},
|
||||
{0x1F65, 0x301}, {0x1F66, 0x3C9}, {0x1F66, 0x313}, {0x1F66, 0x342}, {0x1F67, 0x3C9}, {0x1F67, 0x314}, {0x1F67, 0x342}, {0x1F68, 0x3A9}, {0x1F68, 0x313}, {0x1F69, 0x3A9}, {0x1F69, 0x314},
|
||||
{0x1F6A, 0x3A9}, {0x1F6A, 0x313}, {0x1F6A, 0x300}, {0x1F6B, 0x3A9}, {0x1F6B, 0x314}, {0x1F6B, 0x300}, {0x1F6C, 0x3A9}, {0x1F6C, 0x313}, {0x1F6C, 0x301}, {0x1F6D, 0x3A9}, {0x1F6D, 0x314},
|
||||
{0x1F6D, 0x301}, {0x1F6E, 0x3A9}, {0x1F6E, 0x313}, {0x1F6E, 0x342}, {0x1F6F, 0x3A9}, {0x1F6F, 0x314}, {0x1F6F, 0x342}, {0x1F70, 0x3B1}, {0x1F70, 0x300}, {0x1F71, 0x3B1}, {0x1F71, 0x301},
|
||||
{0x1F72, 0x3B5}, {0x1F72, 0x300}, {0x1F73, 0x3B5}, {0x1F73, 0x301}, {0x1F74, 0x3B7}, {0x1F74, 0x300}, {0x1F75, 0x3B7}, {0x1F75, 0x301}, {0x1F76, 0x3B9}, {0x1F76, 0x300}, {0x1F77, 0x3B9},
|
||||
{0x1F77, 0x301}, {0x1F78, 0x3BF}, {0x1F78, 0x300}, {0x1F79, 0x3BF}, {0x1F79, 0x301}, {0x1F7A, 0x3C5}, {0x1F7A, 0x300}, {0x1F7B, 0x3C5}, {0x1F7B, 0x301}, {0x1F7C, 0x3C9}, {0x1F7C, 0x300},
|
||||
{0x1F7D, 0x3C9}, {0x1F7D, 0x301}, {0x1F80, 0x3B1}, {0x1F80, 0x313}, {0x1F80, 0x345}, {0x1F81, 0x3B1}, {0x1F81, 0x314}, {0x1F81, 0x345}, {0x1F82, 0x3B1}, {0x1F82, 0x313}, {0x1F82, 0x300},
|
||||
{0x1F82, 0x345}, {0x1F83, 0x3B1}, {0x1F83, 0x314}, {0x1F83, 0x300}, {0x1F83, 0x345}, {0x1F84, 0x3B1}, {0x1F84, 0x313}, {0x1F84, 0x301}, {0x1F84, 0x345}, {0x1F85, 0x3B1}, {0x1F85, 0x314},
|
||||
{0x1F85, 0x301}, {0x1F85, 0x345}, {0x1F86, 0x3B1}, {0x1F86, 0x313}, {0x1F86, 0x342}, {0x1F86, 0x345}, {0x1F87, 0x3B1}, {0x1F87, 0x314}, {0x1F87, 0x342}, {0x1F87, 0x345}, {0x1F88, 0x391},
|
||||
{0x1F88, 0x313}, {0x1F88, 0x345}, {0x1F89, 0x391}, {0x1F89, 0x314}, {0x1F89, 0x345}, {0x1F8A, 0x391}, {0x1F8A, 0x313}, {0x1F8A, 0x300}, {0x1F8A, 0x345}, {0x1F8B, 0x391}, {0x1F8B, 0x314},
|
||||
{0x1F8B, 0x300}, {0x1F8B, 0x345}, {0x1F8C, 0x391}, {0x1F8C, 0x313}, {0x1F8C, 0x301}, {0x1F8C, 0x345}, {0x1F8D, 0x391}, {0x1F8D, 0x314}, {0x1F8D, 0x301}, {0x1F8D, 0x345}, {0x1F8E, 0x391},
|
||||
{0x1F8E, 0x313}, {0x1F8E, 0x342}, {0x1F8E, 0x345}, {0x1F8F, 0x391}, {0x1F8F, 0x314}, {0x1F8F, 0x342}, {0x1F8F, 0x345}, {0x1F90, 0x3B7}, {0x1F90, 0x313}, {0x1F90, 0x345}, {0x1F91, 0x3B7},
|
||||
{0x1F91, 0x314}, {0x1F91, 0x345}, {0x1F92, 0x3B7}, {0x1F92, 0x313}, {0x1F92, 0x300}, {0x1F92, 0x345}, {0x1F93, 0x3B7}, {0x1F93, 0x314}, {0x1F93, 0x300}, {0x1F93, 0x345}, {0x1F94, 0x3B7},
|
||||
{0x1F94, 0x313}, {0x1F94, 0x301}, {0x1F94, 0x345}, {0x1F95, 0x3B7}, {0x1F95, 0x314}, {0x1F95, 0x301}, {0x1F95, 0x345}, {0x1F96, 0x3B7}, {0x1F96, 0x313}, {0x1F96, 0x342}, {0x1F96, 0x345},
|
||||
{0x1F97, 0x3B7}, {0x1F97, 0x314}, {0x1F97, 0x342}, {0x1F97, 0x345}, {0x1F98, 0x397}, {0x1F98, 0x313}, {0x1F98, 0x345}, {0x1F99, 0x397}, {0x1F99, 0x314}, {0x1F99, 0x345}, {0x1F9A, 0x397},
|
||||
{0x1F9A, 0x313}, {0x1F9A, 0x300}, {0x1F9A, 0x345}, {0x1F9B, 0x397}, {0x1F9B, 0x314}, {0x1F9B, 0x300}, {0x1F9B, 0x345}, {0x1F9C, 0x397}, {0x1F9C, 0x313}, {0x1F9C, 0x301}, {0x1F9C, 0x345},
|
||||
{0x1F9D, 0x397}, {0x1F9D, 0x314}, {0x1F9D, 0x301}, {0x1F9D, 0x345}, {0x1F9E, 0x397}, {0x1F9E, 0x313}, {0x1F9E, 0x342}, {0x1F9E, 0x345}, {0x1F9F, 0x397}, {0x1F9F, 0x314}, {0x1F9F, 0x342},
|
||||
{0x1F9F, 0x345}, {0x1FA0, 0x3C9}, {0x1FA0, 0x313}, {0x1FA0, 0x345}, {0x1FA1, 0x3C9}, {0x1FA1, 0x314}, {0x1FA1, 0x345}, {0x1FA2, 0x3C9}, {0x1FA2, 0x313}, {0x1FA2, 0x300}, {0x1FA2, 0x345},
|
||||
{0x1FA3, 0x3C9}, {0x1FA3, 0x314}, {0x1FA3, 0x300}, {0x1FA3, 0x345}, {0x1FA4, 0x3C9}, {0x1FA4, 0x313}, {0x1FA4, 0x301}, {0x1FA4, 0x345}, {0x1FA5, 0x3C9}, {0x1FA5, 0x314}, {0x1FA5, 0x301},
|
||||
{0x1FA5, 0x345}, {0x1FA6, 0x3C9}, {0x1FA6, 0x313}, {0x1FA6, 0x342}, {0x1FA6, 0x345}, {0x1FA7, 0x3C9}, {0x1FA7, 0x314}, {0x1FA7, 0x342}, {0x1FA7, 0x345}, {0x1FA8, 0x3A9}, {0x1FA8, 0x313},
|
||||
{0x1FA8, 0x345}, {0x1FA9, 0x3A9}, {0x1FA9, 0x314}, {0x1FA9, 0x345}, {0x1FAA, 0x3A9}, {0x1FAA, 0x313}, {0x1FAA, 0x300}, {0x1FAA, 0x345}, {0x1FAB, 0x3A9}, {0x1FAB, 0x314}, {0x1FAB, 0x300},
|
||||
{0x1FAB, 0x345}, {0x1FAC, 0x3A9}, {0x1FAC, 0x313}, {0x1FAC, 0x301}, {0x1FAC, 0x345}, {0x1FAD, 0x3A9}, {0x1FAD, 0x314}, {0x1FAD, 0x301}, {0x1FAD, 0x345}, {0x1FAE, 0x3A9}, {0x1FAE, 0x313},
|
||||
{0x1FAE, 0x342}, {0x1FAE, 0x345}, {0x1FAF, 0x3A9}, {0x1FAF, 0x314}, {0x1FAF, 0x342}, {0x1FAF, 0x345}, {0x1FB0, 0x3B1}, {0x1FB0, 0x306}, {0x1FB1, 0x3B1}, {0x1FB1, 0x304}, {0x1FB2, 0x3B1},
|
||||
{0x1FB2, 0x300}, {0x1FB2, 0x345}, {0x1FB3, 0x3B1}, {0x1FB3, 0x345}, {0x1FB4, 0x3B1}, {0x1FB4, 0x301}, {0x1FB4, 0x345}, {0x1FB6, 0x3B1}, {0x1FB6, 0x342}, {0x1FB7, 0x3B1}, {0x1FB7, 0x342},
|
||||
{0x1FB7, 0x345}, {0x1FB8, 0x391}, {0x1FB8, 0x306}, {0x1FB9, 0x391}, {0x1FB9, 0x304}, {0x1FBA, 0x391}, {0x1FBA, 0x300}, {0x1FBB, 0x391}, {0x1FBB, 0x301}, {0x1FBC, 0x391}, {0x1FBC, 0x345},
|
||||
{0x1FBE, 0x3B9}, {0x1FC1, 0xA8}, {0x1FC1, 0x342}, {0x1FC2, 0x3B7}, {0x1FC2, 0x300}, {0x1FC2, 0x345}, {0x1FC3, 0x3B7}, {0x1FC3, 0x345}, {0x1FC4, 0x3B7}, {0x1FC4, 0x301}, {0x1FC4, 0x345},
|
||||
{0x1FC6, 0x3B7}, {0x1FC6, 0x342}, {0x1FC7, 0x3B7}, {0x1FC7, 0x342}, {0x1FC7, 0x345}, {0x1FC8, 0x395}, {0x1FC8, 0x300}, {0x1FC9, 0x395}, {0x1FC9, 0x301}, {0x1FCA, 0x397}, {0x1FCA, 0x300},
|
||||
{0x1FCB, 0x397}, {0x1FCB, 0x301}, {0x1FCC, 0x397}, {0x1FCC, 0x345}, {0x1FCD, 0x1FBF}, {0x1FCD, 0x300}, {0x1FCE, 0x1FBF}, {0x1FCE, 0x301}, {0x1FCF, 0x1FBF}, {0x1FCF, 0x342}, {0x1FD0, 0x3B9},
|
||||
{0x1FD0, 0x306}, {0x1FD1, 0x3B9}, {0x1FD1, 0x304}, {0x1FD2, 0x3B9}, {0x1FD2, 0x308}, {0x1FD2, 0x300}, {0x1FD3, 0x3B9}, {0x1FD3, 0x308}, {0x1FD3, 0x301}, {0x1FD6, 0x3B9}, {0x1FD6, 0x342},
|
||||
{0x1FD7, 0x3B9}, {0x1FD7, 0x308}, {0x1FD7, 0x342}, {0x1FD8, 0x399}, {0x1FD8, 0x306}, {0x1FD9, 0x399}, {0x1FD9, 0x304}, {0x1FDA, 0x399}, {0x1FDA, 0x300}, {0x1FDB, 0x399}, {0x1FDB, 0x301},
|
||||
{0x1FDD, 0x1FFE}, {0x1FDD, 0x300}, {0x1FDE, 0x1FFE}, {0x1FDE, 0x301}, {0x1FDF, 0x1FFE}, {0x1FDF, 0x342}, {0x1FE0, 0x3C5}, {0x1FE0, 0x306}, {0x1FE1, 0x3C5}, {0x1FE1, 0x304}, {0x1FE2, 0x3C5},
|
||||
{0x1FE2, 0x308}, {0x1FE2, 0x300}, {0x1FE3, 0x3C5}, {0x1FE3, 0x308}, {0x1FE3, 0x301}, {0x1FE4, 0x3C1}, {0x1FE4, 0x313}, {0x1FE5, 0x3C1}, {0x1FE5, 0x314}, {0x1FE6, 0x3C5}, {0x1FE6, 0x342},
|
||||
{0x1FE7, 0x3C5}, {0x1FE7, 0x308}, {0x1FE7, 0x342}, {0x1FE8, 0x3A5}, {0x1FE8, 0x306}, {0x1FE9, 0x3A5}, {0x1FE9, 0x304}, {0x1FEA, 0x3A5}, {0x1FEA, 0x300}, {0x1FEB, 0x3A5}, {0x1FEB, 0x301},
|
||||
{0x1FEC, 0x3A1}, {0x1FEC, 0x314}, {0x1FED, 0xA8}, {0x1FED, 0x300}, {0x1FEE, 0xA8}, {0x1FEE, 0x301}, {0x1FEF, 0x60}, {0x1FF2, 0x3C9}, {0x1FF2, 0x300}, {0x1FF2, 0x345}, {0x1FF3, 0x3C9}, {0x1FF3, 0x345},
|
||||
{0x1FF4, 0x3C9}, {0x1FF4, 0x301}, {0x1FF4, 0x345}, {0x1FF6, 0x3C9}, {0x1FF6, 0x342}, {0x1FF7, 0x3C9}, {0x1FF7, 0x342}, {0x1FF7, 0x345}, {0x1FF8, 0x39F}, {0x1FF8, 0x300}, {0x1FF9, 0x39F},
|
||||
{0x1FF9, 0x301}, {0x1FFA, 0x3A9}, {0x1FFA, 0x300}, {0x1FFB, 0x3A9}, {0x1FFB, 0x301}, {0x1FFC, 0x3A9}, {0x1FFC, 0x345}, {0x1FFD, 0xB4}, {0x2000, 0x2002}, {0x2001, 0x2003}, {0x2126, 0x3A9},
|
||||
{0x212A, 0x4B}, {0x212B, 0x41}, {0x212B, 0x30A}, {0x219A, 0x2190}, {0x219A, 0x338}, {0x219B, 0x2192}, {0x219B, 0x338}, {0x21AE, 0x2194}, {0x21AE, 0x338}, {0x21CD, 0x21D0}, {0x21CD, 0x338},
|
||||
{0x21CE, 0x21D4}, {0x21CE, 0x338}, {0x21CF, 0x21D2}, {0x21CF, 0x338}, {0x2204, 0x2203}, {0x2204, 0x338}, {0x2209, 0x2208}, {0x2209, 0x338}, {0x220C, 0x220B}, {0x220C, 0x338}, {0x2224, 0x2223},
|
||||
{0x2224, 0x338}, {0x2226, 0x2225}, {0x2226, 0x338}, {0x2241, 0x223C}, {0x2241, 0x338}, {0x2244, 0x2243}, {0x2244, 0x338}, {0x2247, 0x2245}, {0x2247, 0x338}, {0x2249, 0x2248}, {0x2249, 0x338},
|
||||
{0x2260, 0x3D}, {0x2260, 0x338}, {0x2262, 0x2261}, {0x2262, 0x338}, {0x226D, 0x224D}, {0x226D, 0x338}, {0x226E, 0x3C}, {0x226E, 0x338}, {0x226F, 0x3E}, {0x226F, 0x338}, {0x2270, 0x2264},
|
||||
{0x2270, 0x338}, {0x2271, 0x2265}, {0x2271, 0x338}, {0x2274, 0x2272}, {0x2274, 0x338}, {0x2275, 0x2273}, {0x2275, 0x338}, {0x2278, 0x2276}, {0x2278, 0x338}, {0x2279, 0x2277}, {0x2279, 0x338},
|
||||
{0x2280, 0x227A}, {0x2280, 0x338}, {0x2281, 0x227B}, {0x2281, 0x338}, {0x2284, 0x2282}, {0x2284, 0x338}, {0x2285, 0x2283}, {0x2285, 0x338}, {0x2288, 0x2286}, {0x2288, 0x338}, {0x2289, 0x2287},
|
||||
{0x2289, 0x338}, {0x22AC, 0x22A2}, {0x22AC, 0x338}, {0x22AD, 0x22A8}, {0x22AD, 0x338}, {0x22AE, 0x22A9}, {0x22AE, 0x338}, {0x22AF, 0x22AB}, {0x22AF, 0x338}, {0x22E0, 0x227C}, {0x22E0, 0x338},
|
||||
{0x22E1, 0x227D}, {0x22E1, 0x338}, {0x22E2, 0x2291}, {0x22E2, 0x338}, {0x22E3, 0x2292}, {0x22E3, 0x338}, {0x22EA, 0x22B2}, {0x22EA, 0x338}, {0x22EB, 0x22B3}, {0x22EB, 0x338}, {0x22EC, 0x22B4},
|
||||
{0x22EC, 0x338}, {0x22ED, 0x22B5}, {0x22ED, 0x338}, {0x2329, 0x3008}, {0x232A, 0x3009}, {0x2ADC, 0x2ADD}, {0x2ADC, 0x338}, {0x304C, 0x304B}, {0x304C, 0x3099}, {0x304E, 0x304D}, {0x304E, 0x3099},
|
||||
{0x3050, 0x304F}, {0x3050, 0x3099}, {0x3052, 0x3051}, {0x3052, 0x3099}, {0x3054, 0x3053}, {0x3054, 0x3099}, {0x3056, 0x3055}, {0x3056, 0x3099}, {0x3058, 0x3057}, {0x3058, 0x3099}, {0x305A, 0x3059},
|
||||
{0x305A, 0x3099}, {0x305C, 0x305B}, {0x305C, 0x3099}, {0x305E, 0x305D}, {0x305E, 0x3099}, {0x3060, 0x305F}, {0x3060, 0x3099}, {0x3062, 0x3061}, {0x3062, 0x3099}, {0x3065, 0x3064}, {0x3065, 0x3099},
|
||||
{0x3067, 0x3066}, {0x3067, 0x3099}, {0x3069, 0x3068}, {0x3069, 0x3099}, {0x3070, 0x306F}, {0x3070, 0x3099}, {0x3071, 0x306F}, {0x3071, 0x309A}, {0x3073, 0x3072}, {0x3073, 0x3099}, {0x3074, 0x3072},
|
||||
{0x3074, 0x309A}, {0x3076, 0x3075}, {0x3076, 0x3099}, {0x3077, 0x3075}, {0x3077, 0x309A}, {0x3079, 0x3078}, {0x3079, 0x3099}, {0x307A, 0x3078}, {0x307A, 0x309A}, {0x307C, 0x307B}, {0x307C, 0x3099},
|
||||
{0x307D, 0x307B}, {0x307D, 0x309A}, {0x3094, 0x3046}, {0x3094, 0x3099}, {0x309E, 0x309D}, {0x309E, 0x3099}, {0x30AC, 0x30AB}, {0x30AC, 0x3099}, {0x30AE, 0x30AD}, {0x30AE, 0x3099}, {0x30B0, 0x30AF},
|
||||
{0x30B0, 0x3099}, {0x30B2, 0x30B1}, {0x30B2, 0x3099}, {0x30B4, 0x30B3}, {0x30B4, 0x3099}, {0x30B6, 0x30B5}, {0x30B6, 0x3099}, {0x30B8, 0x30B7}, {0x30B8, 0x3099}, {0x30BA, 0x30B9}, {0x30BA, 0x3099},
|
||||
{0x30BC, 0x30BB}, {0x30BC, 0x3099}, {0x30BE, 0x30BD}, {0x30BE, 0x3099}, {0x30C0, 0x30BF}, {0x30C0, 0x3099}, {0x30C2, 0x30C1}, {0x30C2, 0x3099}, {0x30C5, 0x30C4}, {0x30C5, 0x3099}, {0x30C7, 0x30C6},
|
||||
{0x30C7, 0x3099}, {0x30C9, 0x30C8}, {0x30C9, 0x3099}, {0x30D0, 0x30CF}, {0x30D0, 0x3099}, {0x30D1, 0x30CF}, {0x30D1, 0x309A}, {0x30D3, 0x30D2}, {0x30D3, 0x3099}, {0x30D4, 0x30D2}, {0x30D4, 0x309A},
|
||||
{0x30D6, 0x30D5}, {0x30D6, 0x3099}, {0x30D7, 0x30D5}, {0x30D7, 0x309A}, {0x30D9, 0x30D8}, {0x30D9, 0x3099}, {0x30DA, 0x30D8}, {0x30DA, 0x309A}, {0x30DC, 0x30DB}, {0x30DC, 0x3099}, {0x30DD, 0x30DB},
|
||||
{0x30DD, 0x309A}, {0x30F4, 0x30A6}, {0x30F4, 0x3099}, {0x30F7, 0x30EF}, {0x30F7, 0x3099}, {0x30F8, 0x30F0}, {0x30F8, 0x3099}, {0x30F9, 0x30F1}, {0x30F9, 0x3099}, {0x30FA, 0x30F2}, {0x30FA, 0x3099},
|
||||
{0x30FE, 0x30FD}, {0x30FE, 0x3099}, {0xF900, 0x8C48}, {0xF901, 0x66F4}, {0xF902, 0x8ECA}, {0xF903, 0x8CC8}, {0xF904, 0x6ED1}, {0xF905, 0x4E32}, {0xF906, 0x53E5}, {0xF907, 0x9F9C}, {0xF908, 0x9F9C},
|
||||
{0xF909, 0x5951}, {0xF90A, 0x91D1}, {0xF90B, 0x5587}, {0xF90C, 0x5948}, {0xF90D, 0x61F6}, {0xF90E, 0x7669}, {0xF90F, 0x7F85}, {0xF910, 0x863F}, {0xF911, 0x87BA}, {0xF912, 0x88F8}, {0xF913, 0x908F},
|
||||
{0xF914, 0x6A02}, {0xF915, 0x6D1B}, {0xF916, 0x70D9}, {0xF917, 0x73DE}, {0xF918, 0x843D}, {0xF919, 0x916A}, {0xF91A, 0x99F1}, {0xF91B, 0x4E82}, {0xF91C, 0x5375}, {0xF91D, 0x6B04}, {0xF91E, 0x721B},
|
||||
{0xF91F, 0x862D}, {0xF920, 0x9E1E}, {0xF921, 0x5D50}, {0xF922, 0x6FEB}, {0xF923, 0x85CD}, {0xF924, 0x8964}, {0xF925, 0x62C9}, {0xF926, 0x81D8}, {0xF927, 0x881F}, {0xF928, 0x5ECA}, {0xF929, 0x6717},
|
||||
{0xF92A, 0x6D6A}, {0xF92B, 0x72FC}, {0xF92C, 0x90CE}, {0xF92D, 0x4F86}, {0xF92E, 0x51B7}, {0xF92F, 0x52DE}, {0xF930, 0x64C4}, {0xF931, 0x6AD3}, {0xF932, 0x7210}, {0xF933, 0x76E7}, {0xF934, 0x8001},
|
||||
{0xF935, 0x8606}, {0xF936, 0x865C}, {0xF937, 0x8DEF}, {0xF938, 0x9732}, {0xF939, 0x9B6F}, {0xF93A, 0x9DFA}, {0xF93B, 0x788C}, {0xF93C, 0x797F}, {0xF93D, 0x7DA0}, {0xF93E, 0x83C9}, {0xF93F, 0x9304},
|
||||
{0xF940, 0x9E7F}, {0xF941, 0x8AD6}, {0xF942, 0x58DF}, {0xF943, 0x5F04}, {0xF944, 0x7C60}, {0xF945, 0x807E}, {0xF946, 0x7262}, {0xF947, 0x78CA}, {0xF948, 0x8CC2}, {0xF949, 0x96F7}, {0xF94A, 0x58D8},
|
||||
{0xF94B, 0x5C62}, {0xF94C, 0x6A13}, {0xF94D, 0x6DDA}, {0xF94E, 0x6F0F}, {0xF94F, 0x7D2F}, {0xF950, 0x7E37}, {0xF951, 0x964B}, {0xF952, 0x52D2}, {0xF953, 0x808B}, {0xF954, 0x51DC}, {0xF955, 0x51CC},
|
||||
{0xF956, 0x7A1C}, {0xF957, 0x7DBE}, {0xF958, 0x83F1}, {0xF959, 0x9675}, {0xF95A, 0x8B80}, {0xF95B, 0x62CF}, {0xF95C, 0x6A02}, {0xF95D, 0x8AFE}, {0xF95E, 0x4E39}, {0xF95F, 0x5BE7}, {0xF960, 0x6012},
|
||||
{0xF961, 0x7387}, {0xF962, 0x7570}, {0xF963, 0x5317}, {0xF964, 0x78FB}, {0xF965, 0x4FBF}, {0xF966, 0x5FA9}, {0xF967, 0x4E0D}, {0xF968, 0x6CCC}, {0xF969, 0x6578}, {0xF96A, 0x7D22}, {0xF96B, 0x53C3},
|
||||
{0xF96C, 0x585E}, {0xF96D, 0x7701}, {0xF96E, 0x8449}, {0xF96F, 0x8AAA}, {0xF970, 0x6BBA}, {0xF971, 0x8FB0}, {0xF972, 0x6C88}, {0xF973, 0x62FE}, {0xF974, 0x82E5}, {0xF975, 0x63A0}, {0xF976, 0x7565},
|
||||
{0xF977, 0x4EAE}, {0xF978, 0x5169}, {0xF979, 0x51C9}, {0xF97A, 0x6881}, {0xF97B, 0x7CE7}, {0xF97C, 0x826F}, {0xF97D, 0x8AD2}, {0xF97E, 0x91CF}, {0xF97F, 0x52F5}, {0xF980, 0x5442}, {0xF981, 0x5973},
|
||||
{0xF982, 0x5EEC}, {0xF983, 0x65C5}, {0xF984, 0x6FFE}, {0xF985, 0x792A}, {0xF986, 0x95AD}, {0xF987, 0x9A6A}, {0xF988, 0x9E97}, {0xF989, 0x9ECE}, {0xF98A, 0x529B}, {0xF98B, 0x66C6}, {0xF98C, 0x6B77},
|
||||
{0xF98D, 0x8F62}, {0xF98E, 0x5E74}, {0xF98F, 0x6190}, {0xF990, 0x6200}, {0xF991, 0x649A}, {0xF992, 0x6F23}, {0xF993, 0x7149}, {0xF994, 0x7489}, {0xF995, 0x79CA}, {0xF996, 0x7DF4}, {0xF997, 0x806F},
|
||||
{0xF998, 0x8F26}, {0xF999, 0x84EE}, {0xF99A, 0x9023}, {0xF99B, 0x934A}, {0xF99C, 0x5217}, {0xF99D, 0x52A3}, {0xF99E, 0x54BD}, {0xF99F, 0x70C8}, {0xF9A0, 0x88C2}, {0xF9A1, 0x8AAA}, {0xF9A2, 0x5EC9},
|
||||
{0xF9A3, 0x5FF5}, {0xF9A4, 0x637B}, {0xF9A5, 0x6BAE}, {0xF9A6, 0x7C3E}, {0xF9A7, 0x7375}, {0xF9A8, 0x4EE4}, {0xF9A9, 0x56F9}, {0xF9AA, 0x5BE7}, {0xF9AB, 0x5DBA}, {0xF9AC, 0x601C}, {0xF9AD, 0x73B2},
|
||||
{0xF9AE, 0x7469}, {0xF9AF, 0x7F9A}, {0xF9B0, 0x8046}, {0xF9B1, 0x9234}, {0xF9B2, 0x96F6}, {0xF9B3, 0x9748}, {0xF9B4, 0x9818}, {0xF9B5, 0x4F8B}, {0xF9B6, 0x79AE}, {0xF9B7, 0x91B4}, {0xF9B8, 0x96B8},
|
||||
{0xF9B9, 0x60E1}, {0xF9BA, 0x4E86}, {0xF9BB, 0x50DA}, {0xF9BC, 0x5BEE}, {0xF9BD, 0x5C3F}, {0xF9BE, 0x6599}, {0xF9BF, 0x6A02}, {0xF9C0, 0x71CE}, {0xF9C1, 0x7642}, {0xF9C2, 0x84FC}, {0xF9C3, 0x907C},
|
||||
{0xF9C4, 0x9F8D}, {0xF9C5, 0x6688}, {0xF9C6, 0x962E}, {0xF9C7, 0x5289}, {0xF9C8, 0x677B}, {0xF9C9, 0x67F3}, {0xF9CA, 0x6D41}, {0xF9CB, 0x6E9C}, {0xF9CC, 0x7409}, {0xF9CD, 0x7559}, {0xF9CE, 0x786B},
|
||||
{0xF9CF, 0x7D10}, {0xF9D0, 0x985E}, {0xF9D1, 0x516D}, {0xF9D2, 0x622E}, {0xF9D3, 0x9678}, {0xF9D4, 0x502B}, {0xF9D5, 0x5D19}, {0xF9D6, 0x6DEA}, {0xF9D7, 0x8F2A}, {0xF9D8, 0x5F8B}, {0xF9D9, 0x6144},
|
||||
{0xF9DA, 0x6817}, {0xF9DB, 0x7387}, {0xF9DC, 0x9686}, {0xF9DD, 0x5229}, {0xF9DE, 0x540F}, {0xF9DF, 0x5C65}, {0xF9E0, 0x6613}, {0xF9E1, 0x674E}, {0xF9E2, 0x68A8}, {0xF9E3, 0x6CE5}, {0xF9E4, 0x7406},
|
||||
{0xF9E5, 0x75E2}, {0xF9E6, 0x7F79}, {0xF9E7, 0x88CF}, {0xF9E8, 0x88E1}, {0xF9E9, 0x91CC}, {0xF9EA, 0x96E2}, {0xF9EB, 0x533F}, {0xF9EC, 0x6EBA}, {0xF9ED, 0x541D}, {0xF9EE, 0x71D0}, {0xF9EF, 0x7498},
|
||||
{0xF9F0, 0x85FA}, {0xF9F1, 0x96A3}, {0xF9F2, 0x9C57}, {0xF9F3, 0x9E9F}, {0xF9F4, 0x6797}, {0xF9F5, 0x6DCB}, {0xF9F6, 0x81E8}, {0xF9F7, 0x7ACB}, {0xF9F8, 0x7B20}, {0xF9F9, 0x7C92}, {0xF9FA, 0x72C0},
|
||||
{0xF9FB, 0x7099}, {0xF9FC, 0x8B58}, {0xF9FD, 0x4EC0}, {0xF9FE, 0x8336}, {0xF9FF, 0x523A}, {0xFA00, 0x5207}, {0xFA01, 0x5EA6}, {0xFA02, 0x62D3}, {0xFA03, 0x7CD6}, {0xFA04, 0x5B85}, {0xFA05, 0x6D1E},
|
||||
{0xFA06, 0x66B4}, {0xFA07, 0x8F3B}, {0xFA08, 0x884C}, {0xFA09, 0x964D}, {0xFA0A, 0x898B}, {0xFA0B, 0x5ED3}, {0xFA0C, 0x5140}, {0xFA0D, 0x55C0}, {0xFA10, 0x585A}, {0xFA12, 0x6674}, {0xFA15, 0x51DE},
|
||||
{0xFA16, 0x732A}, {0xFA17, 0x76CA}, {0xFA18, 0x793C}, {0xFA19, 0x795E}, {0xFA1A, 0x7965}, {0xFA1B, 0x798F}, {0xFA1C, 0x9756}, {0xFA1D, 0x7CBE}, {0xFA1E, 0x7FBD}, {0xFA20, 0x8612}, {0xFA22, 0x8AF8},
|
||||
{0xFA25, 0x9038}, {0xFA26, 0x90FD}, {0xFA2A, 0x98EF}, {0xFA2B, 0x98FC}, {0xFA2C, 0x9928}, {0xFA2D, 0x9DB4}, {0xFA2E, 0x90DE}, {0xFA2F, 0x96B7}, {0xFA30, 0x4FAE}, {0xFA31, 0x50E7}, {0xFA32, 0x514D},
|
||||
{0xFA33, 0x52C9}, {0xFA34, 0x52E4}, {0xFA35, 0x5351}, {0xFA36, 0x559D}, {0xFA37, 0x5606}, {0xFA38, 0x5668}, {0xFA39, 0x5840}, {0xFA3A, 0x58A8}, {0xFA3B, 0x5C64}, {0xFA3C, 0x5C6E}, {0xFA3D, 0x6094},
|
||||
{0xFA3E, 0x6168}, {0xFA3F, 0x618E}, {0xFA40, 0x61F2}, {0xFA41, 0x654F}, {0xFA42, 0x65E2}, {0xFA43, 0x6691}, {0xFA44, 0x6885}, {0xFA45, 0x6D77}, {0xFA46, 0x6E1A}, {0xFA47, 0x6F22}, {0xFA48, 0x716E},
|
||||
{0xFA49, 0x722B}, {0xFA4A, 0x7422}, {0xFA4B, 0x7891}, {0xFA4C, 0x793E}, {0xFA4D, 0x7949}, {0xFA4E, 0x7948}, {0xFA4F, 0x7950}, {0xFA50, 0x7956}, {0xFA51, 0x795D}, {0xFA52, 0x798D}, {0xFA53, 0x798E},
|
||||
{0xFA54, 0x7A40}, {0xFA55, 0x7A81}, {0xFA56, 0x7BC0}, {0xFA57, 0x7DF4}, {0xFA58, 0x7E09}, {0xFA59, 0x7E41}, {0xFA5A, 0x7F72}, {0xFA5B, 0x8005}, {0xFA5C, 0x81ED}, {0xFA5D, 0x8279}, {0xFA5E, 0x8279},
|
||||
{0xFA5F, 0x8457}, {0xFA60, 0x8910}, {0xFA61, 0x8996}, {0xFA62, 0x8B01}, {0xFA63, 0x8B39}, {0xFA64, 0x8CD3}, {0xFA65, 0x8D08}, {0xFA66, 0x8FB6}, {0xFA67, 0x9038}, {0xFA68, 0x96E3}, {0xFA69, 0x97FF},
|
||||
{0xFA6A, 0x983B}, {0xFA6B, 0x6075}, {0xFA6C, 0x242EE}, {0xFA6D, 0x8218}, {0xFA70, 0x4E26}, {0xFA71, 0x51B5}, {0xFA72, 0x5168}, {0xFA73, 0x4F80}, {0xFA74, 0x5145}, {0xFA75, 0x5180}, {0xFA76, 0x52C7},
|
||||
{0xFA77, 0x52FA}, {0xFA78, 0x559D}, {0xFA79, 0x5555}, {0xFA7A, 0x5599}, {0xFA7B, 0x55E2}, {0xFA7C, 0x585A}, {0xFA7D, 0x58B3}, {0xFA7E, 0x5944}, {0xFA7F, 0x5954}, {0xFA80, 0x5A62}, {0xFA81, 0x5B28},
|
||||
{0xFA82, 0x5ED2}, {0xFA83, 0x5ED9}, {0xFA84, 0x5F69}, {0xFA85, 0x5FAD}, {0xFA86, 0x60D8}, {0xFA87, 0x614E}, {0xFA88, 0x6108}, {0xFA89, 0x618E}, {0xFA8A, 0x6160}, {0xFA8B, 0x61F2}, {0xFA8C, 0x6234},
|
||||
{0xFA8D, 0x63C4}, {0xFA8E, 0x641C}, {0xFA8F, 0x6452}, {0xFA90, 0x6556}, {0xFA91, 0x6674}, {0xFA92, 0x6717}, {0xFA93, 0x671B}, {0xFA94, 0x6756}, {0xFA95, 0x6B79}, {0xFA96, 0x6BBA}, {0xFA97, 0x6D41},
|
||||
{0xFA98, 0x6EDB}, {0xFA99, 0x6ECB}, {0xFA9A, 0x6F22}, {0xFA9B, 0x701E}, {0xFA9C, 0x716E}, {0xFA9D, 0x77A7}, {0xFA9E, 0x7235}, {0xFA9F, 0x72AF}, {0xFAA0, 0x732A}, {0xFAA1, 0x7471}, {0xFAA2, 0x7506},
|
||||
{0xFAA3, 0x753B}, {0xFAA4, 0x761D}, {0xFAA5, 0x761F}, {0xFAA6, 0x76CA}, {0xFAA7, 0x76DB}, {0xFAA8, 0x76F4}, {0xFAA9, 0x774A}, {0xFAAA, 0x7740}, {0xFAAB, 0x78CC}, {0xFAAC, 0x7AB1}, {0xFAAD, 0x7BC0},
|
||||
{0xFAAE, 0x7C7B}, {0xFAAF, 0x7D5B}, {0xFAB0, 0x7DF4}, {0xFAB1, 0x7F3E}, {0xFAB2, 0x8005}, {0xFAB3, 0x8352}, {0xFAB4, 0x83EF}, {0xFAB5, 0x8779}, {0xFAB6, 0x8941}, {0xFAB7, 0x8986}, {0xFAB8, 0x8996},
|
||||
{0xFAB9, 0x8ABF}, {0xFABA, 0x8AF8}, {0xFABB, 0x8ACB}, {0xFABC, 0x8B01}, {0xFABD, 0x8AFE}, {0xFABE, 0x8AED}, {0xFABF, 0x8B39}, {0xFAC0, 0x8B8A}, {0xFAC1, 0x8D08}, {0xFAC2, 0x8F38}, {0xFAC3, 0x9072},
|
||||
{0xFAC4, 0x9199}, {0xFAC5, 0x9276}, {0xFAC6, 0x967C}, {0xFAC7, 0x96E3}, {0xFAC8, 0x9756}, {0xFAC9, 0x97DB}, {0xFACA, 0x97FF}, {0xFACB, 0x980B}, {0xFACC, 0x983B}, {0xFACD, 0x9B12}, {0xFACE, 0x9F9C},
|
||||
{0xFACF, 0x2284A}, {0xFAD0, 0x22844}, {0xFAD1, 0x233D5}, {0xFAD2, 0x3B9D}, {0xFAD3, 0x4018}, {0xFAD4, 0x4039}, {0xFAD5, 0x25249}, {0xFAD6, 0x25CD0}, {0xFAD7, 0x27ED3}, {0xFAD8, 0x9F43},
|
||||
{0xFAD9, 0x9F8E}, {0xFB1D, 0x5D9}, {0xFB1D, 0x5B4}, {0xFB1F, 0x5F2}, {0xFB1F, 0x5B7}, {0xFB2A, 0x5E9}, {0xFB2A, 0x5C1}, {0xFB2B, 0x5E9}, {0xFB2B, 0x5C2}, {0xFB2C, 0x5E9}, {0xFB2C, 0x5BC},
|
||||
{0xFB2C, 0x5C1}, {0xFB2D, 0x5E9}, {0xFB2D, 0x5BC}, {0xFB2D, 0x5C2}, {0xFB2E, 0x5D0}, {0xFB2E, 0x5B7}, {0xFB2F, 0x5D0}, {0xFB2F, 0x5B8}, {0xFB30, 0x5D0}, {0xFB30, 0x5BC}, {0xFB31, 0x5D1},
|
||||
{0xFB31, 0x5BC}, {0xFB32, 0x5D2}, {0xFB32, 0x5BC}, {0xFB33, 0x5D3}, {0xFB33, 0x5BC}, {0xFB34, 0x5D4}, {0xFB34, 0x5BC}, {0xFB35, 0x5D5}, {0xFB35, 0x5BC}, {0xFB36, 0x5D6}, {0xFB36, 0x5BC},
|
||||
{0xFB38, 0x5D8}, {0xFB38, 0x5BC}, {0xFB39, 0x5D9}, {0xFB39, 0x5BC}, {0xFB3A, 0x5DA}, {0xFB3A, 0x5BC}, {0xFB3B, 0x5DB}, {0xFB3B, 0x5BC}, {0xFB3C, 0x5DC}, {0xFB3C, 0x5BC}, {0xFB3E, 0x5DE},
|
||||
{0xFB3E, 0x5BC}, {0xFB40, 0x5E0}, {0xFB40, 0x5BC}, {0xFB41, 0x5E1}, {0xFB41, 0x5BC}, {0xFB43, 0x5E3}, {0xFB43, 0x5BC}, {0xFB44, 0x5E4}, {0xFB44, 0x5BC}, {0xFB46, 0x5E6}, {0xFB46, 0x5BC},
|
||||
{0xFB47, 0x5E7}, {0xFB47, 0x5BC}, {0xFB48, 0x5E8}, {0xFB48, 0x5BC}, {0xFB49, 0x5E9}, {0xFB49, 0x5BC}, {0xFB4A, 0x5EA}, {0xFB4A, 0x5BC}, {0xFB4B, 0x5D5}, {0xFB4B, 0x5B9}, {0xFB4C, 0x5D1},
|
||||
{0xFB4C, 0x5BF}, {0xFB4D, 0x5DB}, {0xFB4D, 0x5BF}, {0xFB4E, 0x5E4}, {0xFB4E, 0x5BF}, {0x1109A, 0x11099}, {0x1109A, 0x110BA}, {0x1109C, 0x1109B}, {0x1109C, 0x110BA}, {0x110AB, 0x110A5},
|
||||
{0x110AB, 0x110BA}, {0x1112E, 0x11131}, {0x1112E, 0x11127}, {0x1112F, 0x11132}, {0x1112F, 0x11127}, {0x1134B, 0x11347}, {0x1134B, 0x1133E}, {0x1134C, 0x11347}, {0x1134C, 0x11357}, {0x114BB, 0x114B9},
|
||||
{0x114BB, 0x114BA}, {0x114BC, 0x114B9}, {0x114BC, 0x114B0}, {0x114BE, 0x114B9}, {0x114BE, 0x114BD}, {0x115BA, 0x115B8}, {0x115BA, 0x115AF}, {0x115BB, 0x115B9}, {0x115BB, 0x115AF}, {0x1D15E, 0x1D157},
|
||||
{0x1D15E, 0x1D165}, {0x1D15F, 0x1D158}, {0x1D15F, 0x1D165}, {0x1D160, 0x1D158}, {0x1D160, 0x1D165}, {0x1D160, 0x1D16E}, {0x1D161, 0x1D158}, {0x1D161, 0x1D165}, {0x1D161, 0x1D16F}, {0x1D162, 0x1D158},
|
||||
{0x1D162, 0x1D165}, {0x1D162, 0x1D170}, {0x1D163, 0x1D158}, {0x1D163, 0x1D165}, {0x1D163, 0x1D171}, {0x1D164, 0x1D158}, {0x1D164, 0x1D165}, {0x1D164, 0x1D172}, {0x1D1BB, 0x1D1B9}, {0x1D1BB, 0x1D165},
|
||||
{0x1D1BC, 0x1D1BA}, {0x1D1BC, 0x1D165}, {0x1D1BD, 0x1D1B9}, {0x1D1BD, 0x1D165}, {0x1D1BD, 0x1D16E}, {0x1D1BE, 0x1D1BA}, {0x1D1BE, 0x1D165}, {0x1D1BE, 0x1D16E}, {0x1D1BF, 0x1D1B9}, {0x1D1BF, 0x1D165},
|
||||
{0x1D1BF, 0x1D16F}, {0x1D1C0, 0x1D1BA}, {0x1D1C0, 0x1D165}, {0x1D1C0, 0x1D16F}, {0x2F800, 0x4E3D}, {0x2F801, 0x4E38}, {0x2F802, 0x4E41}, {0x2F803, 0x20122}, {0x2F804, 0x4F60}, {0x2F805, 0x4FAE},
|
||||
{0x2F806, 0x4FBB}, {0x2F807, 0x5002}, {0x2F808, 0x507A}, {0x2F809, 0x5099}, {0x2F80A, 0x50E7}, {0x2F80B, 0x50CF}, {0x2F80C, 0x349E}, {0x2F80D, 0x2063A}, {0x2F80E, 0x514D}, {0x2F80F, 0x5154},
|
||||
{0x2F810, 0x5164}, {0x2F811, 0x5177}, {0x2F812, 0x2051C}, {0x2F813, 0x34B9}, {0x2F814, 0x5167}, {0x2F815, 0x518D}, {0x2F816, 0x2054B}, {0x2F817, 0x5197}, {0x2F818, 0x51A4}, {0x2F819, 0x4ECC},
|
||||
{0x2F81A, 0x51AC}, {0x2F81B, 0x51B5}, {0x2F81C, 0x291DF}, {0x2F81D, 0x51F5}, {0x2F81E, 0x5203}, {0x2F81F, 0x34DF}, {0x2F820, 0x523B}, {0x2F821, 0x5246}, {0x2F822, 0x5272}, {0x2F823, 0x5277},
|
||||
{0x2F824, 0x3515}, {0x2F825, 0x52C7}, {0x2F826, 0x52C9}, {0x2F827, 0x52E4}, {0x2F828, 0x52FA}, {0x2F829, 0x5305}, {0x2F82A, 0x5306}, {0x2F82B, 0x5317}, {0x2F82C, 0x5349}, {0x2F82D, 0x5351},
|
||||
{0x2F82E, 0x535A}, {0x2F82F, 0x5373}, {0x2F830, 0x537D}, {0x2F831, 0x537F}, {0x2F832, 0x537F}, {0x2F833, 0x537F}, {0x2F834, 0x20A2C}, {0x2F835, 0x7070}, {0x2F836, 0x53CA}, {0x2F837, 0x53DF},
|
||||
{0x2F838, 0x20B63}, {0x2F839, 0x53EB}, {0x2F83A, 0x53F1}, {0x2F83B, 0x5406}, {0x2F83C, 0x549E}, {0x2F83D, 0x5438}, {0x2F83E, 0x5448}, {0x2F83F, 0x5468}, {0x2F840, 0x54A2}, {0x2F841, 0x54F6},
|
||||
{0x2F842, 0x5510}, {0x2F843, 0x5553}, {0x2F844, 0x5563}, {0x2F845, 0x5584}, {0x2F846, 0x5584}, {0x2F847, 0x5599}, {0x2F848, 0x55AB}, {0x2F849, 0x55B3}, {0x2F84A, 0x55C2}, {0x2F84B, 0x5716},
|
||||
{0x2F84C, 0x5606}, {0x2F84D, 0x5717}, {0x2F84E, 0x5651}, {0x2F84F, 0x5674}, {0x2F850, 0x5207}, {0x2F851, 0x58EE}, {0x2F852, 0x57CE}, {0x2F853, 0x57F4}, {0x2F854, 0x580D}, {0x2F855, 0x578B},
|
||||
{0x2F856, 0x5832}, {0x2F857, 0x5831}, {0x2F858, 0x58AC}, {0x2F859, 0x214E4}, {0x2F85A, 0x58F2}, {0x2F85B, 0x58F7}, {0x2F85C, 0x5906}, {0x2F85D, 0x591A}, {0x2F85E, 0x5922}, {0x2F85F, 0x5962},
|
||||
{0x2F860, 0x216A8}, {0x2F861, 0x216EA}, {0x2F862, 0x59EC}, {0x2F863, 0x5A1B}, {0x2F864, 0x5A27}, {0x2F865, 0x59D8}, {0x2F866, 0x5A66}, {0x2F867, 0x36EE}, {0x2F868, 0x36FC}, {0x2F869, 0x5B08},
|
||||
{0x2F86A, 0x5B3E}, {0x2F86B, 0x5B3E}, {0x2F86C, 0x219C8}, {0x2F86D, 0x5BC3}, {0x2F86E, 0x5BD8}, {0x2F86F, 0x5BE7}, {0x2F870, 0x5BF3}, {0x2F871, 0x21B18}, {0x2F872, 0x5BFF}, {0x2F873, 0x5C06},
|
||||
{0x2F874, 0x5F53}, {0x2F875, 0x5C22}, {0x2F876, 0x3781}, {0x2F877, 0x5C60}, {0x2F878, 0x5C6E}, {0x2F879, 0x5CC0}, {0x2F87A, 0x5C8D}, {0x2F87B, 0x21DE4}, {0x2F87C, 0x5D43}, {0x2F87D, 0x21DE6},
|
||||
{0x2F87E, 0x5D6E}, {0x2F87F, 0x5D6B}, {0x2F880, 0x5D7C}, {0x2F881, 0x5DE1}, {0x2F882, 0x5DE2}, {0x2F883, 0x382F}, {0x2F884, 0x5DFD}, {0x2F885, 0x5E28}, {0x2F886, 0x5E3D}, {0x2F887, 0x5E69},
|
||||
{0x2F888, 0x3862}, {0x2F889, 0x22183}, {0x2F88A, 0x387C}, {0x2F88B, 0x5EB0}, {0x2F88C, 0x5EB3}, {0x2F88D, 0x5EB6}, {0x2F88E, 0x5ECA}, {0x2F88F, 0x2A392}, {0x2F890, 0x5EFE}, {0x2F891, 0x22331},
|
||||
{0x2F892, 0x22331}, {0x2F893, 0x8201}, {0x2F894, 0x5F22}, {0x2F895, 0x5F22}, {0x2F896, 0x38C7}, {0x2F897, 0x232B8}, {0x2F898, 0x261DA}, {0x2F899, 0x5F62}, {0x2F89A, 0x5F6B}, {0x2F89B, 0x38E3},
|
||||
{0x2F89C, 0x5F9A}, {0x2F89D, 0x5FCD}, {0x2F89E, 0x5FD7}, {0x2F89F, 0x5FF9}, {0x2F8A0, 0x6081}, {0x2F8A1, 0x393A}, {0x2F8A2, 0x391C}, {0x2F8A3, 0x6094}, {0x2F8A4, 0x226D4}, {0x2F8A5, 0x60C7},
|
||||
{0x2F8A6, 0x6148}, {0x2F8A7, 0x614C}, {0x2F8A8, 0x614E}, {0x2F8A9, 0x614C}, {0x2F8AA, 0x617A}, {0x2F8AB, 0x618E}, {0x2F8AC, 0x61B2}, {0x2F8AD, 0x61A4}, {0x2F8AE, 0x61AF}, {0x2F8AF, 0x61DE},
|
||||
{0x2F8B0, 0x61F2}, {0x2F8B1, 0x61F6}, {0x2F8B2, 0x6210}, {0x2F8B3, 0x621B}, {0x2F8B4, 0x625D}, {0x2F8B5, 0x62B1}, {0x2F8B6, 0x62D4}, {0x2F8B7, 0x6350}, {0x2F8B8, 0x22B0C}, {0x2F8B9, 0x633D},
|
||||
{0x2F8BA, 0x62FC}, {0x2F8BB, 0x6368}, {0x2F8BC, 0x6383}, {0x2F8BD, 0x63E4}, {0x2F8BE, 0x22BF1}, {0x2F8BF, 0x6422}, {0x2F8C0, 0x63C5}, {0x2F8C1, 0x63A9}, {0x2F8C2, 0x3A2E}, {0x2F8C3, 0x6469},
|
||||
{0x2F8C4, 0x647E}, {0x2F8C5, 0x649D}, {0x2F8C6, 0x6477}, {0x2F8C7, 0x3A6C}, {0x2F8C8, 0x654F}, {0x2F8C9, 0x656C}, {0x2F8CA, 0x2300A}, {0x2F8CB, 0x65E3}, {0x2F8CC, 0x66F8}, {0x2F8CD, 0x6649},
|
||||
{0x2F8CE, 0x3B19}, {0x2F8CF, 0x6691}, {0x2F8D0, 0x3B08}, {0x2F8D1, 0x3AE4}, {0x2F8D2, 0x5192}, {0x2F8D3, 0x5195}, {0x2F8D4, 0x6700}, {0x2F8D5, 0x669C}, {0x2F8D6, 0x80AD}, {0x2F8D7, 0x43D9},
|
||||
{0x2F8D8, 0x6717}, {0x2F8D9, 0x671B}, {0x2F8DA, 0x6721}, {0x2F8DB, 0x675E}, {0x2F8DC, 0x6753}, {0x2F8DD, 0x233C3}, {0x2F8DE, 0x3B49}, {0x2F8DF, 0x67FA}, {0x2F8E0, 0x6785}, {0x2F8E1, 0x6852},
|
||||
{0x2F8E2, 0x6885}, {0x2F8E3, 0x2346D}, {0x2F8E4, 0x688E}, {0x2F8E5, 0x681F}, {0x2F8E6, 0x6914}, {0x2F8E7, 0x3B9D}, {0x2F8E8, 0x6942}, {0x2F8E9, 0x69A3}, {0x2F8EA, 0x69EA}, {0x2F8EB, 0x6AA8},
|
||||
{0x2F8EC, 0x236A3}, {0x2F8ED, 0x6ADB}, {0x2F8EE, 0x3C18}, {0x2F8EF, 0x6B21}, {0x2F8F0, 0x238A7}, {0x2F8F1, 0x6B54}, {0x2F8F2, 0x3C4E}, {0x2F8F3, 0x6B72}, {0x2F8F4, 0x6B9F}, {0x2F8F5, 0x6BBA},
|
||||
{0x2F8F6, 0x6BBB}, {0x2F8F7, 0x23A8D}, {0x2F8F8, 0x21D0B}, {0x2F8F9, 0x23AFA}, {0x2F8FA, 0x6C4E}, {0x2F8FB, 0x23CBC}, {0x2F8FC, 0x6CBF}, {0x2F8FD, 0x6CCD}, {0x2F8FE, 0x6C67}, {0x2F8FF, 0x6D16},
|
||||
{0x2F900, 0x6D3E}, {0x2F901, 0x6D77}, {0x2F902, 0x6D41}, {0x2F903, 0x6D69}, {0x2F904, 0x6D78}, {0x2F905, 0x6D85}, {0x2F906, 0x23D1E}, {0x2F907, 0x6D34}, {0x2F908, 0x6E2F}, {0x2F909, 0x6E6E},
|
||||
{0x2F90A, 0x3D33}, {0x2F90B, 0x6ECB}, {0x2F90C, 0x6EC7}, {0x2F90D, 0x23ED1}, {0x2F90E, 0x6DF9}, {0x2F90F, 0x6F6E}, {0x2F910, 0x23F5E}, {0x2F911, 0x23F8E}, {0x2F912, 0x6FC6}, {0x2F913, 0x7039},
|
||||
{0x2F914, 0x701E}, {0x2F915, 0x701B}, {0x2F916, 0x3D96}, {0x2F917, 0x704A}, {0x2F918, 0x707D}, {0x2F919, 0x7077}, {0x2F91A, 0x70AD}, {0x2F91B, 0x20525}, {0x2F91C, 0x7145}, {0x2F91D, 0x24263},
|
||||
{0x2F91E, 0x719C}, {0x2F91F, 0x243AB}, {0x2F920, 0x7228}, {0x2F921, 0x7235}, {0x2F922, 0x7250}, {0x2F923, 0x24608}, {0x2F924, 0x7280}, {0x2F925, 0x7295}, {0x2F926, 0x24735}, {0x2F927, 0x24814},
|
||||
{0x2F928, 0x737A}, {0x2F929, 0x738B}, {0x2F92A, 0x3EAC}, {0x2F92B, 0x73A5}, {0x2F92C, 0x3EB8}, {0x2F92D, 0x3EB8}, {0x2F92E, 0x7447}, {0x2F92F, 0x745C}, {0x2F930, 0x7471}, {0x2F931, 0x7485},
|
||||
{0x2F932, 0x74CA}, {0x2F933, 0x3F1B}, {0x2F934, 0x7524}, {0x2F935, 0x24C36}, {0x2F936, 0x753E}, {0x2F937, 0x24C92}, {0x2F938, 0x7570}, {0x2F939, 0x2219F}, {0x2F93A, 0x7610}, {0x2F93B, 0x24FA1},
|
||||
{0x2F93C, 0x24FB8}, {0x2F93D, 0x25044}, {0x2F93E, 0x3FFC}, {0x2F93F, 0x4008}, {0x2F940, 0x76F4}, {0x2F941, 0x250F3}, {0x2F942, 0x250F2}, {0x2F943, 0x25119}, {0x2F944, 0x25133}, {0x2F945, 0x771E},
|
||||
{0x2F946, 0x771F}, {0x2F947, 0x771F}, {0x2F948, 0x774A}, {0x2F949, 0x4039}, {0x2F94A, 0x778B}, {0x2F94B, 0x4046}, {0x2F94C, 0x4096}, {0x2F94D, 0x2541D}, {0x2F94E, 0x784E}, {0x2F94F, 0x788C},
|
||||
{0x2F950, 0x78CC}, {0x2F951, 0x40E3}, {0x2F952, 0x25626}, {0x2F953, 0x7956}, {0x2F954, 0x2569A}, {0x2F955, 0x256C5}, {0x2F956, 0x798F}, {0x2F957, 0x79EB}, {0x2F958, 0x412F}, {0x2F959, 0x7A40},
|
||||
{0x2F95A, 0x7A4A}, {0x2F95B, 0x7A4F}, {0x2F95C, 0x2597C}, {0x2F95D, 0x25AA7}, {0x2F95E, 0x25AA7}, {0x2F95F, 0x7AEE}, {0x2F960, 0x4202}, {0x2F961, 0x25BAB}, {0x2F962, 0x7BC6}, {0x2F963, 0x7BC9},
|
||||
{0x2F964, 0x4227}, {0x2F965, 0x25C80}, {0x2F966, 0x7CD2}, {0x2F967, 0x42A0}, {0x2F968, 0x7CE8}, {0x2F969, 0x7CE3}, {0x2F96A, 0x7D00}, {0x2F96B, 0x25F86}, {0x2F96C, 0x7D63}, {0x2F96D, 0x4301},
|
||||
{0x2F96E, 0x7DC7}, {0x2F96F, 0x7E02}, {0x2F970, 0x7E45}, {0x2F971, 0x4334}, {0x2F972, 0x26228}, {0x2F973, 0x26247}, {0x2F974, 0x4359}, {0x2F975, 0x262D9}, {0x2F976, 0x7F7A}, {0x2F977, 0x2633E},
|
||||
{0x2F978, 0x7F95}, {0x2F979, 0x7FFA}, {0x2F97A, 0x8005}, {0x2F97B, 0x264DA}, {0x2F97C, 0x26523}, {0x2F97D, 0x8060}, {0x2F97E, 0x265A8}, {0x2F97F, 0x8070}, {0x2F980, 0x2335F}, {0x2F981, 0x43D5},
|
||||
{0x2F982, 0x80B2}, {0x2F983, 0x8103}, {0x2F984, 0x440B}, {0x2F985, 0x813E}, {0x2F986, 0x5AB5}, {0x2F987, 0x267A7}, {0x2F988, 0x267B5}, {0x2F989, 0x23393}, {0x2F98A, 0x2339C}, {0x2F98B, 0x8201},
|
||||
{0x2F98C, 0x8204}, {0x2F98D, 0x8F9E}, {0x2F98E, 0x446B}, {0x2F98F, 0x8291}, {0x2F990, 0x828B}, {0x2F991, 0x829D}, {0x2F992, 0x52B3}, {0x2F993, 0x82B1}, {0x2F994, 0x82B3}, {0x2F995, 0x82BD},
|
||||
{0x2F996, 0x82E6}, {0x2F997, 0x26B3C}, {0x2F998, 0x82E5}, {0x2F999, 0x831D}, {0x2F99A, 0x8363}, {0x2F99B, 0x83AD}, {0x2F99C, 0x8323}, {0x2F99D, 0x83BD}, {0x2F99E, 0x83E7}, {0x2F99F, 0x8457},
|
||||
{0x2F9A0, 0x8353}, {0x2F9A1, 0x83CA}, {0x2F9A2, 0x83CC}, {0x2F9A3, 0x83DC}, {0x2F9A4, 0x26C36}, {0x2F9A5, 0x26D6B}, {0x2F9A6, 0x26CD5}, {0x2F9A7, 0x452B}, {0x2F9A8, 0x84F1}, {0x2F9A9, 0x84F3},
|
||||
{0x2F9AA, 0x8516}, {0x2F9AB, 0x273CA}, {0x2F9AC, 0x8564}, {0x2F9AD, 0x26F2C}, {0x2F9AE, 0x455D}, {0x2F9AF, 0x4561}, {0x2F9B0, 0x26FB1}, {0x2F9B1, 0x270D2}, {0x2F9B2, 0x456B}, {0x2F9B3, 0x8650},
|
||||
{0x2F9B4, 0x865C}, {0x2F9B5, 0x8667}, {0x2F9B6, 0x8669}, {0x2F9B7, 0x86A9}, {0x2F9B8, 0x8688}, {0x2F9B9, 0x870E}, {0x2F9BA, 0x86E2}, {0x2F9BB, 0x8779}, {0x2F9BC, 0x8728}, {0x2F9BD, 0x876B},
|
||||
{0x2F9BE, 0x8786}, {0x2F9BF, 0x45D7}, {0x2F9C0, 0x87E1}, {0x2F9C1, 0x8801}, {0x2F9C2, 0x45F9}, {0x2F9C3, 0x8860}, {0x2F9C4, 0x8863}, {0x2F9C5, 0x27667}, {0x2F9C6, 0x88D7}, {0x2F9C7, 0x88DE},
|
||||
{0x2F9C8, 0x4635}, {0x2F9C9, 0x88FA}, {0x2F9CA, 0x34BB}, {0x2F9CB, 0x278AE}, {0x2F9CC, 0x27966}, {0x2F9CD, 0x46BE}, {0x2F9CE, 0x46C7}, {0x2F9CF, 0x8AA0}, {0x2F9D0, 0x8AED}, {0x2F9D1, 0x8B8A},
|
||||
{0x2F9D2, 0x8C55}, {0x2F9D3, 0x27CA8}, {0x2F9D4, 0x8CAB}, {0x2F9D5, 0x8CC1}, {0x2F9D6, 0x8D1B}, {0x2F9D7, 0x8D77}, {0x2F9D8, 0x27F2F}, {0x2F9D9, 0x20804}, {0x2F9DA, 0x8DCB}, {0x2F9DB, 0x8DBC},
|
||||
{0x2F9DC, 0x8DF0}, {0x2F9DD, 0x208DE}, {0x2F9DE, 0x8ED4}, {0x2F9DF, 0x8F38}, {0x2F9E0, 0x285D2}, {0x2F9E1, 0x285ED}, {0x2F9E2, 0x9094}, {0x2F9E3, 0x90F1}, {0x2F9E4, 0x9111}, {0x2F9E5, 0x2872E},
|
||||
{0x2F9E6, 0x911B}, {0x2F9E7, 0x9238}, {0x2F9E8, 0x92D7}, {0x2F9E9, 0x92D8}, {0x2F9EA, 0x927C}, {0x2F9EB, 0x93F9}, {0x2F9EC, 0x9415}, {0x2F9ED, 0x28BFA}, {0x2F9EE, 0x958B}, {0x2F9EF, 0x4995},
|
||||
{0x2F9F0, 0x95B7}, {0x2F9F1, 0x28D77}, {0x2F9F2, 0x49E6}, {0x2F9F3, 0x96C3}, {0x2F9F4, 0x5DB2}, {0x2F9F5, 0x9723}, {0x2F9F6, 0x29145}, {0x2F9F7, 0x2921A}, {0x2F9F8, 0x4A6E}, {0x2F9F9, 0x4A76},
|
||||
{0x2F9FA, 0x97E0}, {0x2F9FB, 0x2940A}, {0x2F9FC, 0x4AB2}, {0x2F9FD, 0x29496}, {0x2F9FE, 0x980B}, {0x2F9FF, 0x980B}, {0x2FA00, 0x9829}, {0x2FA01, 0x295B6}, {0x2FA02, 0x98E2}, {0x2FA03, 0x4B33},
|
||||
{0x2FA04, 0x9929}, {0x2FA05, 0x99A7}, {0x2FA06, 0x99C2}, {0x2FA07, 0x99FE}, {0x2FA08, 0x4BCE}, {0x2FA09, 0x29B30}, {0x2FA0A, 0x9B12}, {0x2FA0B, 0x9C40}, {0x2FA0C, 0x9CFD}, {0x2FA0D, 0x4CCE},
|
||||
{0x2FA0E, 0x4CED}, {0x2FA0F, 0x9D67}, {0x2FA10, 0x2A0CE}, {0x2FA11, 0x4CF8}, {0x2FA12, 0x2A105}, {0x2FA13, 0x2A20E}, {0x2FA14, 0x2A291}, {0x2FA15, 0x9EBB}, {0x2FA16, 0x4D56}, {0x2FA17, 0x9EF9},
|
||||
{0x2FA18, 0x9EFE}, {0x2FA19, 0x9F05}, {0x2FA1A, 0x9F0F}, {0x2FA1B, 0x9F16}, {0x2FA1D, 0x2A600},
|
||||
};
|
||||
|
||||
static std::string codepoint_to_utf8(uint32_t cp) {
|
||||
std::string result;
|
||||
if (/* 0x00 <= cp && */ cp <= 0x7f) {
|
||||
result.push_back(cp);
|
||||
}
|
||||
else if (0x80 <= cp && cp <= 0x7ff) {
|
||||
result.push_back(0xc0 | ((cp >> 6) & 0x1f));
|
||||
result.push_back(0x80 | (cp & 0x3f));
|
||||
}
|
||||
else if (0x800 <= cp && cp <= 0xffff) {
|
||||
result.push_back(0xe0 | ((cp >> 12) & 0x0f));
|
||||
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
||||
result.push_back(0x80 | (cp & 0x3f));
|
||||
}
|
||||
else if (0x10000 <= cp && cp <= 0x10ffff) {
|
||||
result.push_back(0xf0 | ((cp >> 18) & 0x07));
|
||||
result.push_back(0x80 | ((cp >> 12) & 0x3f));
|
||||
result.push_back(0x80 | ((cp >> 6) & 0x3f));
|
||||
result.push_back(0x80 | (cp & 0x3f));
|
||||
}
|
||||
else {
|
||||
throw std::invalid_argument("invalid codepoint");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::string codepoints_to_utf8(const std::vector<uint32_t> & cps) {
|
||||
std::string result;
|
||||
for (size_t i = 0; i < cps.size(); ++i) {
|
||||
result.append(codepoint_to_utf8(cps[i]));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t codepoint_from_utf8(const std::string & utf8, size_t & offset) {
|
||||
assert(offset < utf8.size());
|
||||
if (!(utf8[offset + 0] & 0x80)) {
|
||||
auto result = utf8[offset + 0];
|
||||
offset += 1;
|
||||
return result;
|
||||
}
|
||||
if (!(utf8[offset + 0] & 0x40)) {
|
||||
throw std::invalid_argument("invalid character");
|
||||
}
|
||||
if (!(utf8[offset + 0] & 0x20)) {
|
||||
if (offset + 1 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80)) {
|
||||
throw std::invalid_argument("invalid character");
|
||||
}
|
||||
auto result = ((utf8[offset + 0] & 0x1f) << 6) | (utf8[offset + 1] & 0x3f);
|
||||
offset += 2;
|
||||
return result;
|
||||
}
|
||||
if (!(utf8[offset + 0] & 0x10)) {
|
||||
if (offset + 2 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80)) {
|
||||
throw std::invalid_argument("invalid character");
|
||||
}
|
||||
auto result = ((utf8[offset + 0] & 0x0f) << 12) | ((utf8[offset + 1] & 0x3f) << 6) | (utf8[offset + 2] & 0x3f);
|
||||
offset += 3;
|
||||
return result;
|
||||
}
|
||||
if (!(utf8[offset + 0] & 0x08)) {
|
||||
if (offset + 3 >= utf8.size() || ! ((utf8[offset + 1] & 0xc0) == 0x80) || ! ((utf8[offset + 2] & 0xc0) == 0x80) || !((utf8[offset + 3] & 0xc0) == 0x80)) {
|
||||
throw std::invalid_argument("invalid character");
|
||||
}
|
||||
auto result = ((utf8[offset + 0] & 0x07) << 18) | ((utf8[offset + 1] & 0x3f) << 12) | ((utf8[offset + 2] & 0x3f) << 6) | (utf8[offset + 3] & 0x3f);
|
||||
offset += 4;
|
||||
return result;
|
||||
}
|
||||
throw std::invalid_argument("invalid string");
|
||||
}
|
||||
|
||||
static std::vector<uint32_t> codepoints_from_utf8(const std::string & utf8) {
|
||||
std::vector<uint32_t> result;
|
||||
size_t offset = 0;
|
||||
while (offset < utf8.size()) {
|
||||
result.push_back(codepoint_from_utf8(utf8, offset));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<uint16_t> codepoint_to_utf16(uint32_t cp) {
|
||||
std::vector<uint16_t> result;
|
||||
if (/* 0x0000 <= cp && */ cp <= 0xffff) {
|
||||
result.emplace_back(cp);
|
||||
}
|
||||
else if (0x10000 <= cp && cp <= 0x10ffff) {
|
||||
result.emplace_back(0xd800 | ((cp - 0x10000) >> 10));
|
||||
result.emplace_back(0xdc00 | ((cp - 0x10000) & 0x03ff));
|
||||
}
|
||||
else {
|
||||
throw std::invalid_argument("invalid codepoint");
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<uint16_t> codepoints_to_utf16(const std::vector<uint32_t> & cps) {
|
||||
std::vector<uint16_t> result;
|
||||
for (size_t i = 0; i < cps.size(); ++i) {
|
||||
auto temp = codepoint_to_utf16(cps[i]);
|
||||
result.insert(result.end(), temp.begin(), temp.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static uint32_t codepoint_from_utf16(const std::vector<uint16_t> & utf16, size_t & offset) {
|
||||
assert(offset < utf16.size());
|
||||
if (((utf16[0] >> 10) << 10) != 0xd800) {
|
||||
auto result = utf16[offset + 0];
|
||||
offset += 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
if (offset + 1 >= utf16.size() || !((utf16[1] & 0xdc00) == 0xdc00)) {
|
||||
throw std::invalid_argument("invalid character");
|
||||
}
|
||||
|
||||
auto result = 0x10000 + (((utf16[0] & 0x03ff) << 10) | (utf16[1] & 0x03ff));
|
||||
offset += 2;
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::vector<uint32_t> codepoints_from_utf16(const std::vector<uint16_t> & utf16) {
|
||||
std::vector<uint32_t> result;
|
||||
size_t offset = 0;
|
||||
while (offset < utf16.size()) {
|
||||
result.push_back(codepoint_from_utf16(utf16, offset));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
#define CODEPOINT_TYPE_UNIDENTIFIED 0
|
||||
#define CODEPOINT_TYPE_DIGIT 1
|
||||
#define CODEPOINT_TYPE_LETTER 2
|
||||
#define CODEPOINT_TYPE_WHITESPACE 3
|
||||
#define CODEPOINT_TYPE_ACCENT_MARK 4
|
||||
#define CODEPOINT_TYPE_PUNCTUATION 5
|
||||
#define CODEPOINT_TYPE_SYMBOL 6
|
||||
#define CODEPOINT_TYPE_CONTROL 7
|
||||
#define CODEPOINT_TYPE_DIGIT 1
|
||||
#define CODEPOINT_TYPE_LETTER 2
|
||||
#define CODEPOINT_TYPE_WHITESPACE 3
|
||||
#define CODEPOINT_TYPE_ACCENT_MARK 4
|
||||
#define CODEPOINT_TYPE_PUNCTUATION 5
|
||||
#define CODEPOINT_TYPE_SYMBOL 6
|
||||
#define CODEPOINT_TYPE_CONTROL 7
|
||||
|
||||
static std::unordered_map<uint32_t, int> codepoint_type_map() {
|
||||
std::unordered_map<uint32_t, int> codepoint_types;
|
||||
for (auto p : digit_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_DIGIT;
|
||||
}
|
||||
}
|
||||
for (auto p : letter_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_LETTER;
|
||||
}
|
||||
}
|
||||
for (auto p : whitespace_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_WHITESPACE;
|
||||
}
|
||||
}
|
||||
for (auto p : accent_mark_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_ACCENT_MARK;
|
||||
}
|
||||
}
|
||||
for (auto p : punctuation_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_PUNCTUATION;
|
||||
}
|
||||
}
|
||||
for (auto p : symbol_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_SYMBOL;
|
||||
}
|
||||
}
|
||||
for (auto p : control_ranges) {
|
||||
for (auto i = p.first; i <= p.second; ++ i) {
|
||||
codepoint_types[i] = CODEPOINT_TYPE_CONTROL;
|
||||
}
|
||||
}
|
||||
return codepoint_types;
|
||||
}
|
||||
std::string unicode_cpt_to_utf8(uint32_t cp);
|
||||
std::vector<uint32_t> unicode_cpts_from_utf8(const std::string & utf8);
|
||||
|
||||
static int codepoint_type(uint32_t cp) {
|
||||
static std::unordered_map<uint32_t, int> codepoint_types = codepoint_type_map();
|
||||
const auto it = codepoint_types.find(cp);
|
||||
return it == codepoint_types.end() ? CODEPOINT_TYPE_UNIDENTIFIED : it->second;
|
||||
}
|
||||
std::vector<uint32_t> unicode_cpts_normalize_nfd(const std::vector<uint32_t> & cpts);
|
||||
|
||||
static int codepoint_type(const std::string & utf8) {
|
||||
if (utf8.length() == 0) {
|
||||
return CODEPOINT_TYPE_UNIDENTIFIED;
|
||||
}
|
||||
size_t offset = 0;
|
||||
return codepoint_type(codepoint_from_utf8(utf8, offset));
|
||||
}
|
||||
int unicode_cpt_type(uint32_t cp);
|
||||
int unicode_cpt_type(const std::string & utf8);
|
||||
|
||||
static std::unordered_map<uint8_t, std::string> bytes_to_unicode_map_bpe() {
|
||||
std::unordered_map<uint8_t, std::string> map;
|
||||
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[ch] = codepoint_to_utf8(ch);
|
||||
}
|
||||
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[ch] = codepoint_to_utf8(ch);
|
||||
}
|
||||
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[ch] = codepoint_to_utf8(ch);
|
||||
}
|
||||
auto n = 0;
|
||||
for (int ch = 0; ch < 256; ++ch) {
|
||||
if (map.find(ch) == map.end()) {
|
||||
map[ch] = codepoint_to_utf8(256 + n);
|
||||
++n;
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
static std::string bytes_to_unicode_bpe(uint8_t byte) {
|
||||
static std::unordered_map<uint8_t, std::string> map = bytes_to_unicode_map_bpe();
|
||||
return map.at(byte);
|
||||
}
|
||||
|
||||
static std::unordered_map<std::string, uint8_t> unicode_to_bytes_map_bpe() {
|
||||
std::unordered_map<std::string, uint8_t> map;
|
||||
for (int ch = u'!'; ch <= u'~'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[codepoint_to_utf8(ch)] = ch;
|
||||
}
|
||||
for (int ch = u'¡'; ch <= u'¬'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[codepoint_to_utf8(ch)] = ch;
|
||||
}
|
||||
for (int ch = u'®'; ch <= u'ÿ'; ++ch) {
|
||||
assert(0 <= ch && ch < 256);
|
||||
map[codepoint_to_utf8(ch)] = ch;
|
||||
}
|
||||
auto n = 0;
|
||||
for (int ch = 0; ch < 256; ++ch) {
|
||||
if (map.find(codepoint_to_utf8(ch)) == map.end()) {
|
||||
map[codepoint_to_utf8(256 + n)] = ch;
|
||||
++n;
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
static uint8_t unicode_to_bytes_bpe(const std::string & utf8) {
|
||||
static std::unordered_map<std::string, uint8_t> map = unicode_to_bytes_map_bpe();
|
||||
return map.at(utf8);
|
||||
}
|
||||
std::string unicode_byte_to_utf8(uint8_t byte);
|
||||
uint8_t unicode_utf8_to_byte(const std::string & utf8);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user