Compare commits

..

6 Commits

Author SHA1 Message Date
Xuan-Son Nguyen e37abd6b5f mtmd: add batching API (#24384)
* mtmd: add batching API

* wip

* first working version (gemma4v)

* add arg

* nits

* wire up support_batch()

* fix 0.0 output embd

* fix audio

* nits

* refactor a bit

* nits

* fix non-batching case

* fix comment
2026-06-13 00:10:29 +02:00
Sigbjørn Skjæret f58bad4137 ci : unbreak release harder (#24545)
* unbreak release harder

* missed one

* remove missing test for now
2026-06-12 23:49:36 +02:00
Sigbjørn Skjæret cd5044661c ci : unbreak release (#24544) 2026-06-12 23:29:49 +03:00
Georgi Gerganov ebc10770ac server : fix reasoning budget WebUI precedence over model.ini (#24517)
When reasoning-budget is set in model.ini, the per-request
thinking_budget_tokens from the WebUI was ignored because the
model.ini value took unconditional precedence.

Swap the precedence so the WebUI per-request value is checked
first, with the model.ini value serving as a fallback default.

Assisted-by: pi:llama.cpp/Qwen3.6-27B
2026-06-12 17:59:56 +03:00
Ruben Ortlam 3e7bd4f39a vulkan: add pipeline barriers for memcpy read operations (#23770)
* vulkan: add pipeline barriers for memcpy read/write operations

* remove unnecessary host write pipeline barriers
2026-06-12 16:43:50 +02:00
Aleksander Grygier f7ca93d12c ui: PWA support (#23871)
* feat: Add basic PWA support and service worker for offline caching

* feat: Vite PWA implementation WIP

* feat: Improve PWA icons generation

* feat: Add PWA workbox to server routes

* feat: Include `version.json` in static assets

* feat: Add HTTP cache headers for PWA static assets

* feat: Update app name for `apple-mobile-web-app-title`

* feat: Implement PWA versioning and automatic update detection

* chore: Update `.gitignore` files

* feat: Splash Screens

* feat: Add dark mode favicon support

* refactor: Cleanup

* fix: Use dark logo for dark splash screens

* refactor: Simplify favicons SVG code

* fix: Adjust caching and polling for reliable service worker updates

* fix: Add missing favicon entry

* fix: Align PWA service worker configuration with SvelteKit build structure

* fix: Replace hashed bundle paths with versioned static paths

* test: Add PWA tests

* ci: Add build output for unit tests

* refactor: Cleanup

* fix: Server build & release versioning

* chore: Update package-lock.json

* chore: Increase PWA cache size

* chore: Update packages

* feat: Update favicons

* refactor: Post-merge fix

* feat: support explicit build version for PWA cache busting

* fix: CI

* feat: Improve PWA Refresh Alert UI

* feat: Add toggleable build version display

* refactor: Cleanup

* feat: Add version mismatch detection and manual app reload

* refactor: replace dynamic imports with static

* refactor: Cleanup

* feat: Add safe space for `pwa-<size>.png` rendered icons

* fix: use relative paths for PWA assets to support base path deployment

* feat: add PWA mode detection via URL query parameter

* feat: Use ?cache=true for SW-cached PWA assets

* refactor: Build process cleanup

* refactor: Decouple PWA versioning and remove ?cache=true workaround

* chore: Update README logo

* feat: Include PWA Assets generation in build script

* refactor: `usePwa` hook for core layout

* fix: Relativize base vite plugin

* fix: remove unnecessary backslash escapes in test regexes

* test: update static asset paths for API Key test

* refactor: Move SvelteKit PWA Options config to constants

* ui: fix update notification never appearing

Keep the PWA hook object intact instead of destructuring needRefreshByStorage,
which freezes the reactive getter. Also exclude loading.html from PWA
precache to prevent 404 errors and broken SW installation.
2026-06-12 15:53:26 +02:00
68 changed files with 9417 additions and 1598 deletions
+47 -13
View File
@@ -59,8 +59,31 @@ jobs:
echo "should_release=false" >> $GITHUB_OUTPUT
fi
get-version:
runs-on: ubuntu-slim
outputs:
ui_version: ${{ steps.version.outputs.ui_version }}
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 0
- id: version
run: |
# Resolve UI version: BUILD_NUMBER from cmake/build-info.cmake > git hash + epoch > fallback
version=""
if grep -q "BUILD_NUMBER" cmake/build-info.cmake; then
build_number=$(grep "set(BUILD_NUMBER" cmake/build-info.cmake | grep -oP '\d+')
if [ -n "$build_number" ] && [ "$build_number" -gt 0 ]; then
version="b${build_number}"
fi
fi
if [ -z "$version" ]; then
version=$(git rev-parse --short HEAD)-$(date +%s)
fi
echo "ui_version=${version}" >> $GITHUB_OUTPUT
macos-cpu:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
strategy:
matrix:
@@ -116,6 +139,7 @@ jobs:
-DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DLLAMA_BUILD_BORINGSSL=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
@@ -141,7 +165,7 @@ jobs:
name: llama-bin-macos-${{ matrix.build }}.tar.gz
ubuntu-cpu:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
strategy:
matrix:
@@ -201,6 +225,7 @@ jobs:
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DLLAMA_FATAL_WARNINGS=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
@@ -227,7 +252,7 @@ jobs:
name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz
ubuntu-vulkan:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
strategy:
@@ -287,6 +312,7 @@ jobs:
-DGGML_NATIVE=OFF \
-DGGML_CPU_ALL_VARIANTS=ON \
-DGGML_VULKAN=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
@@ -312,7 +338,7 @@ jobs:
name: llama-bin-ubuntu-vulkan-${{ matrix.build }}.tar.gz
android-arm64:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
runs-on: ubuntu-latest
@@ -379,6 +405,7 @@ jobs:
-DLLAMA_FATAL_WARNINGS=ON \
-DGGML_OPENMP=OFF \
-DLLAMA_BUILD_BORINGSSL=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
@@ -404,7 +431,7 @@ jobs:
name: llama-bin-android-arm64.tar.gz
ubuntu-24-openvino:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
runs-on: ubuntu-24.04
@@ -476,7 +503,8 @@ jobs:
source ./openvino_toolkit/setupvars.sh
cmake -B build/ReleaseOV -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DGGML_OPENVINO=ON
-DGGML_OPENVINO=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
cmake --build build/ReleaseOV --config Release -j $(nproc)
- name: ccache-clear
@@ -952,7 +980,7 @@ jobs:
name: llama-bin-ubuntu-sycl-${{ matrix.build }}-x64.tar.gz
ubuntu-22-rocm:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
runs-on: ubuntu-22.04
@@ -1044,6 +1072,7 @@ jobs:
-DGGML_HIP=ON \
-DHIP_PLATFORM=amd \
-DGGML_HIP_ROCWMMA_FATTN=ON \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
${{ env.CMAKE_ARGS }}
cmake --build build --config Release -j $(nproc)
@@ -1072,7 +1101,7 @@ jobs:
name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
windows-hip:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
runs-on: windows-2022
@@ -1168,6 +1197,7 @@ jobs:
-DGPU_TARGETS="${{ matrix.gpu_targets }}" `
-DGGML_HIP_ROCWMMA_FATTN=ON `
-DGGML_HIP=ON `
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} `
-DLLAMA_BUILD_BORINGSSL=ON
cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
md "build\bin\rocblas\library\"
@@ -1195,7 +1225,7 @@ jobs:
name: llama-bin-win-hip-${{ matrix.name }}-x64.zip
ios-xcode:
needs: [check-release]
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
runs-on: macos-26
@@ -1224,7 +1254,8 @@ jobs:
-DLLAMA_BUILD_SERVER=OFF \
-DCMAKE_SYSTEM_NAME=iOS \
-DCMAKE_OSX_DEPLOYMENT_TARGET=16.0 \
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml \
-DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
- name: xcodebuild for swift package
@@ -1344,10 +1375,12 @@ jobs:
# path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
# name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
ui:
needs: [check-release]
ui-build:
needs: [check-release, get-version]
if: ${{ needs.check-release.outputs.should_release == 'true' }}
uses: ./.github/workflows/ui-build.yml
with:
hf_ui_version: ${{ needs.get-version.outputs.ui_version }}
release:
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
@@ -1360,6 +1393,7 @@ jobs:
runs-on: ubuntu-slim
needs:
- get-version
- windows
- windows-cpu
- windows-cuda
@@ -1374,7 +1408,7 @@ jobs:
- macos-cpu
- ios-xcode
#- openEuler-cann
- ui
- ui-build
outputs:
tag_name: ${{ steps.tag.outputs.name }}
+12
View File
@@ -2,6 +2,11 @@ name: UI Build
on:
workflow_call:
inputs:
hf_ui_version:
description: 'Version string for version.json (e.g. 12345)'
required: false
type: string
jobs:
build:
@@ -25,9 +30,16 @@ jobs:
working-directory: tools/ui
- name: Build application
env:
HF_UI_VERSION: ${{ inputs.hf_ui_version || '' }}
LLAMA_UI_VERSION: ${{ inputs.hf_ui_version || 'b0000' }}
run: npm run build
working-directory: tools/ui
- name: Run PWA unit tests (versioned build output)
run: npx vitest --project=unit --run tests/unit/pwa.spec.ts
working-directory: tools/ui
- name: Generate checksums
run: |
cd tools/ui/dist
+18 -11
View File
@@ -1,8 +1,8 @@
name: UI (self-hosted)
# these are the same as ui.yml, but with self-hosted runners
# the runners come with pre-installed Playwright browsers version: 1.56.1
# the jobs are much lighter because they don't need to install node and playwright browsers
# the jobs are lighter because they don't need to install Node.js or Playwright browsers
# the runner has pre-installed Playwright browsers for @playwright/test (1.56.1) at /ms-playwright/
on:
workflow_dispatch:
@@ -61,6 +61,12 @@ jobs:
run: npm ci
working-directory: tools/ui
- name: Download built UI artifacts
uses: actions/download-artifact@v6
with:
name: ui-build
path: tools/ui/dist/
- name: Run type checking
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run check
@@ -72,12 +78,12 @@ jobs:
working-directory: tools/ui
- name: Run Client tests
if: ${{ always() }}
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run test:client
working-directory: tools/ui
- name: Run Unit tests
if: ${{ always() }}
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run test:unit
working-directory: tools/ui
@@ -97,22 +103,23 @@ jobs:
run: npm ci
working-directory: tools/ui
- name: Build application
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run build
working-directory: tools/ui
- name: Download built UI artifacts
uses: actions/download-artifact@v6
with:
name: ui-build
path: tools/ui/dist/
- name: Build Storybook
if: ${{ always() }}
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run build-storybook
working-directory: tools/ui
- name: Run UI tests
if: ${{ always() }}
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/ui
- name: Run E2E tests
if: ${{ always() }}
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run test:e2e
working-directory: tools/ui
+15 -8
View File
@@ -43,7 +43,7 @@ jobs:
ui-checks:
name: Checks
needs: ui-build
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
continue-on-error: true
steps:
- name: Checkout code
@@ -60,6 +60,12 @@ jobs:
cache: "npm"
cache-dependency-path: "tools/ui/package-lock.json"
- name: Download built UI artifacts
uses: actions/download-artifact@v6
with:
name: ui-build
path: tools/ui/dist/
- name: Install dependencies
id: setup
if: ${{ steps.node.conclusion == 'success' }}
@@ -87,7 +93,7 @@ jobs:
run: npm run test:client
working-directory: tools/ui
- name: Run Unit tests
- name: Run Unit tests (uses pre-built dist/ from ui-build)
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:unit
working-directory: tools/ui
@@ -95,7 +101,7 @@ jobs:
e2e-tests:
name: E2E Tests
needs: ui-build
runs-on: ubuntu-latest
runs-on: ubuntu-24.04
steps:
- name: Checkout code
uses: actions/checkout@v6
@@ -117,10 +123,11 @@ jobs:
run: npm ci
working-directory: tools/ui
- name: Build application
if: ${{ always() && steps.setup.conclusion == 'success' }}
run: npm run build
working-directory: tools/ui
- name: Download built UI artifacts (reuses ui-build)
uses: actions/download-artifact@v6
with:
name: ui-build
path: tools/ui/dist/
- name: Install Playwright browsers
id: playwright
@@ -138,7 +145,7 @@ jobs:
run: npm run test:ui -- --testTimeout=60000
working-directory: tools/ui
- name: Run E2E tests
- name: Run E2E tests (uses pre-built dist/ from ui-build)
if: ${{ always() && steps.playwright.conclusion == 'success' }}
run: npm run test:e2e
working-directory: tools/ui
-7
View File
@@ -92,13 +92,6 @@
!/examples/sycl/*.bat
!/examples/sycl/*.sh
# Server Web UI temporary files (+ legacy directory)
/tools/server/webui/node_modules
/tools/server/webui/dist
/tools/ui/node_modules
/tools/ui/dist
# Python
/.venv
+1 -1
View File
@@ -1,6 +1,6 @@
# llama.cpp
![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png)
![llama](https://raw.githubusercontent.com/ggml-org/llama.brand/refs/heads/master/cover/llama-cpp/cover-llama-cpp-dark.svg)
[![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![Release](https://img.shields.io/github/v/release/ggml-org/llama.cpp)](https://github.com/ggml-org/llama.cpp/releases)
+7
View File
@@ -2243,6 +2243,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
params.image_max_tokens = value;
}
).set_examples(mmproj_examples).set_env("LLAMA_ARG_IMAGE_MAX_TOKENS"));
add_opt(common_arg(
{"--mtmd-batch-max-tokens"}, "N",
string_format("maximum number of image tokens per batch when encoding images (default: %d)", params.mtmd_batch_max_tokens),
[](common_params & params, int value) {
params.mtmd_batch_max_tokens = value;
}
).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MTMD_BATCH_MAX_TOKENS"));
if (llama_supports_rpc()) {
add_opt(common_arg(
{"--rpc"}, "SERVERS",
+1
View File
@@ -575,6 +575,7 @@ struct common_params {
std::vector<std::string> image; // path to image file(s) ; TODO: change the name to "media"
int image_min_tokens = -1;
int image_max_tokens = -1;
int mtmd_batch_max_tokens = 1024;
// finetune
struct lr_opt lr;
+17
View File
@@ -7741,6 +7741,23 @@ static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, si
if(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostVisible && src->device->uma) {
GGML_ASSERT(src->memory_property_flags & vk::MemoryPropertyFlagBits::eHostCoherent);
std::lock_guard<std::recursive_mutex> guard(src->device->mutex);
vk_context subctx = ggml_vk_create_temporary_context(src->device->compute_queue.cmd_pool);
ggml_vk_ctx_begin(src->device, subctx);
subctx->s->buffer->buf.pipelineBarrier(
vk::PipelineStageFlagBits::eComputeShader | vk::PipelineStageFlagBits::eTransfer,
vk::PipelineStageFlagBits::eHost,
{},
{ { vk::AccessFlagBits::eShaderWrite | vk::AccessFlagBits::eTransferWrite,
vk::AccessFlagBits::eHostRead } },
{}, {});
ggml_vk_ctx_end(subctx);
ggml_vk_submit(subctx, src->device->fence);
VK_CHECK(src->device->device.waitForFences({ src->device->fence }, true, UINT64_MAX),
"vk_buffer_read_2d uma waitForFences");
src->device->device.resetFences({ src->device->fence });
ggml_vk_queue_command_pools_cleanup(src->device);
if (width == spitch && width == dpitch) {
memcpy(dst, (const uint8_t *) src->ptr + offset, width * height);
} else {
+101 -3
View File
@@ -16,11 +16,80 @@ set(HF_ENABLED "" CACHE STRING "Whether to allow HF Bucket download (ON/O
set(BUILD_UI "" CACHE STRING "Build UI via npm (ON/OFF)")
set(LLAMA_UI_EMBED "" CACHE STRING "Path to llama-ui-embed helper")
# IMPORTANT: When adding PWA assets, sync across all 3 places:
# 1. tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES, PUBLIC_ENDPOINTS)
# 2. tools/server/server-http.cpp (public_endpoints)
# 3. scripts/ui-assets.cmake (ASSETS list)
# - C++ (server-http.cpp) - public endpoints (splash screens generated via helper)
# - TypeScript (constants/pwa.ts) - APPLE_DEVICES, PWA_MANIFEST, PUBLIC_ENDPOINTS
#
# When adding/changing PWA assets, update tools/ui/src/lib/constants/pwa.ts first,
# then sync any new file names here and in server-http.cpp.
set(ASSETS
bundle.css
bundle.js
index.html
loading.html
# PWA assets
favicon.ico
favicon-dark.ico
favicon.svg
favicon-dark.svg
pwa-64x64.png
pwa-192x192.png
pwa-512x512.png
maskable-icon-512x512.png
apple-touch-icon-180x180.png
# iOS splash screens
apple-splash-portrait-640x1136.png
apple-splash-landscape-1136x640.png
apple-splash-portrait-750x1334.png
apple-splash-landscape-1334x750.png
apple-splash-portrait-1170x2532.png
apple-splash-landscape-2532x1170.png
apple-splash-portrait-1179x2556.png
apple-splash-landscape-2556x1179.png
apple-splash-portrait-1206x2622.png
apple-splash-landscape-2622x1206.png
apple-splash-portrait-1284x2778.png
apple-splash-landscape-2778x1284.png
apple-splash-portrait-1290x2796.png
apple-splash-landscape-2796x1290.png
apple-splash-portrait-1320x2868.png
apple-splash-landscape-2868x1320.png
apple-splash-portrait-1488x2266.png
apple-splash-landscape-2266x1488.png
apple-splash-portrait-1640x2360.png
apple-splash-landscape-2360x1640.png
apple-splash-portrait-1668x2388.png
apple-splash-landscape-2388x1668.png
apple-splash-portrait-2048x2732.png
apple-splash-landscape-2732x2048.png
# iOS dark splash screens
apple-splash-portrait-dark-640x1136.png
apple-splash-landscape-dark-1136x640.png
apple-splash-portrait-dark-750x1334.png
apple-splash-landscape-dark-1334x750.png
apple-splash-portrait-dark-1170x2532.png
apple-splash-landscape-dark-2532x1170.png
apple-splash-portrait-dark-1179x2556.png
apple-splash-landscape-dark-2556x1179.png
apple-splash-portrait-dark-1206x2622.png
apple-splash-landscape-dark-2622x1206.png
apple-splash-portrait-dark-1284x2778.png
apple-splash-landscape-dark-2778x1284.png
apple-splash-portrait-dark-1290x2796.png
apple-splash-landscape-dark-2796x1290.png
apple-splash-portrait-dark-1320x2868.png
apple-splash-landscape-dark-2868x1320.png
apple-splash-portrait-dark-1640x2360.png
apple-splash-landscape-dark-2360x1640.png
apple-splash-portrait-dark-1668x2388.png
apple-splash-landscape-dark-2388x1668.png
apple-splash-portrait-dark-2048x2732.png
apple-splash-landscape-dark-2732x2048.png
manifest.webmanifest
sw.js
_app/version.json
build.json
)
set(DIST_DIR "${UI_BINARY_DIR}/dist")
@@ -159,7 +228,7 @@ function(npm_build out_var)
message(STATUS "UI: running npm run build, output -> ${DIST_DIR}")
execute_process(
COMMAND ${CMAKE_COMMAND} -E env "LLAMA_UI_OUT_DIR=${DIST_DIR}"
COMMAND ${CMAKE_COMMAND} -E env "LLAMA_UI_OUT_DIR=${DIST_DIR}" "LLAMA_UI_VERSION=${HF_VERSION}" "LLAMA_BUILD_NUMBER=${LLAMA_BUILD_NUMBER}"
${NPM_EXECUTABLE} run build
WORKING_DIRECTORY "${UI_SOURCE_DIR}"
RESULT_VARIABLE rc
@@ -274,8 +343,35 @@ function(emit_files)
foreach(asset ${ASSETS})
list(APPEND args "${asset}" "${DIST_DIR}/${asset}")
endforeach()
# Bundle files live in _app/immutable/ — vanilla SvelteKit output, no plugin
# rewriting. Embedded names must match the exact _app/ paths that index.html
# and sw.js reference.
file(GLOB_RECURSE detected_bundle_js "${DIST_DIR}/_app/immutable/bundle.*.js")
file(GLOB_RECURSE detected_bundle_css "${DIST_DIR}/_app/immutable/assets/bundle.*.css")
file(GLOB_RECURSE detected_workbox "${DIST_DIR}/workbox-*.js")
# Compute relative path from DIST_DIR to each found file.
# e.g. /path/to/build/tools/ui/dist/_app/immutable/bundle.XXX.js
# -> _app/immutable/bundle.XXX.js
foreach(f ${detected_bundle_js})
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
list(APPEND args "${rel}" "${f}")
endforeach()
foreach(f ${detected_bundle_css})
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
list(APPEND args "${rel}" "${f}")
endforeach()
foreach(f ${detected_workbox})
string(REPLACE "${DIST_DIR}/" "" rel "${f}")
list(APPEND args "${rel}" "${f}")
endforeach()
endif()
# Create build.json with the llama.cpp build number for UI version display.
# This is separate from SvelteKit's _app/version.json (used for SW cache invalidation).
# build.json is generated by the vite plugin (buildInfoPlugin) during npm build.
# CMake just embeds it from the dist that npm produced.
execute_process(
COMMAND "${LLAMA_UI_EMBED}" ${args}
RESULT_VARIABLE rc
@@ -300,6 +396,8 @@ endif()
set(provisioned FALSE)
if(BUILD_UI)
# Resolve version from git build-info if not explicitly set
resolve_version(HF_VERSION)
npm_build(NPM_OK)
if(NPM_OK)
set(provisioned TRUE)
+4
View File
@@ -54,6 +54,10 @@ struct clip_graph {
virtual ggml_tensor * build_mm(ggml_tensor * w, ggml_tensor * x) const;
// TODO: build_mm(w, b, x) to support bias
virtual bool support_batch() const {
return false;
}
//
// utility functions
//
+54 -23
View File
@@ -171,6 +171,8 @@ struct clip_ctx {
std::map<ggml_backend_dev_t, size_t> mem_usage;
std::map<ggml_backend_dev_t, size_t> mem_compute;
bool support_batch = false;
clip_ctx(clip_context_params & ctx_params) {
flash_attn_type = ctx_params.flash_attn_type;
no_alloc = ctx_params.no_alloc;
@@ -314,7 +316,7 @@ ggml_tensor * clip_graph::build_vit(
std::function<ggml_tensor *(ggml_tensor *, const clip_layer &)> add_pos,
const build_vit_opts & opts
) {
// batch dim: inp is [n_embd, n_pos] (B==1) or [n_embd, n_pos, B] (multi-tile encode)
// batch dim: inp is [n_embd, n_pos, B]
const int64_t B = inp->ne[2];
if (learned_pos_embd) {
@@ -862,7 +864,7 @@ ggml_tensor * clip_graph::build_patch_merge_permute(ggml_tensor * cur, int scale
return cur;
}
static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
static std::unique_ptr<clip_graph> clip_get_graph_builder(clip_ctx * ctx, const clip_image_f32_batch & imgs) {
const clip_image_f32 & img = *imgs.entries[0];
std::unique_ptr<clip_graph> builder;
@@ -1025,7 +1027,7 @@ static ggml_cgraph * clip_image_build_graph(clip_ctx * ctx, const clip_image_f32
// TODO [QWEN_VIDEO]: improve this in the future
builder->n_batch = imgs.entries.size();
return builder->build();
return builder;
}
//
@@ -2819,7 +2821,7 @@ struct clip_model_loader {
std::vector<support_info_op> ops;
};
static void warmup(clip_ctx & ctx_clip) {
static clip_image_f32_batch get_dummy_batch(clip_ctx & ctx_clip) {
// create a fake batch
const auto & hparams = ctx_clip.model.hparams;
clip_image_f32_batch batch;
@@ -2833,6 +2835,20 @@ struct clip_model_loader {
LOG_INF("%s: warmup with audio size = %d\n", __func__, hparams.warmup_audio_size);
}
batch.entries.push_back(std::move(img));
return batch;
}
static void init_ctx(clip_ctx & ctx_clip) {
ctx_clip.buf_compute_meta.resize(ctx_clip.max_nodes * ggml_tensor_overhead() + ggml_graph_overhead());
// check batching support
auto batch = get_dummy_batch(ctx_clip);
auto builder = clip_get_graph_builder(&ctx_clip, batch);
ctx_clip.support_batch = builder->support_batch();
}
static void warmup(clip_ctx & ctx_clip) {
auto batch = get_dummy_batch(ctx_clip);
warmup(ctx_clip, batch);
}
@@ -2905,9 +2921,7 @@ struct clip_model_loader {
// only initialize backend buffers, but do not allocate them yet
static support_info_graph reserve_compute_meta(clip_ctx & ctx_clip, const clip_image_f32_batch & batch) {
ctx_clip.buf_compute_meta.resize(ctx_clip.max_nodes * ggml_tensor_overhead() + ggml_graph_overhead());
ggml_cgraph * gf = clip_image_build_graph(&ctx_clip, batch);
ggml_cgraph * gf = clip_get_graph_builder(&ctx_clip, batch)->build();
ggml_backend_sched_reserve(ctx_clip.sched.get(), gf);
ctx_clip.mem_compute.clear();
@@ -3070,6 +3084,7 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
ctx_vision = new clip_ctx(ctx_params);
loader.load_hparams(ctx_vision->model, CLIP_MODALITY_VISION);
loader.load_tensors(*ctx_vision);
loader.init_ctx(*ctx_vision);
if (ctx_params.warmup) {
loader.warmup(*ctx_vision);
}
@@ -3083,6 +3098,7 @@ struct clip_init_result clip_init(const char * fname, struct clip_context_params
ctx_audio = new clip_ctx(ctx_params);
loader.load_hparams(ctx_audio->model, CLIP_MODALITY_AUDIO);
loader.load_tensors(*ctx_audio);
loader.init_ctx(*ctx_audio);
if (ctx_params.warmup) {
loader.warmup(*ctx_audio);
}
@@ -3484,25 +3500,22 @@ int clip_n_output_tokens(const struct clip_ctx * ctx, struct clip_image_f32 * im
return n_patches;
}
bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, float * vec) {
bool clip_image_encode(struct clip_ctx * ctx, const int n_threads, clip_image_f32 * img, std::vector<float> & out_vec) {
clip_image_f32_batch imgs;
clip_image_f32_ptr img_copy(clip_image_f32_init());
*img_copy = *img;
imgs.entries.push_back(std::move(img_copy));
return clip_image_batch_encode(ctx, n_threads, &imgs, vec);
return clip_image_batch_encode(ctx, n_threads, &imgs, out_vec);
}
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, float * vec) {
bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_image_f32_batch * imgs_c_ptr, std::vector<float> & out_batch_embd) {
const clip_image_f32_batch & imgs = *imgs_c_ptr;
int n_batch_cur = imgs.entries.size();
// maximum supported batch size, usually == 2 for qwen-vl-based models
int n_batch_max = clip_model_n_batch_max(ctx);
// TODO @ngxson : implement batch size > 1 as a loop
// we don't need true batching support because the cgraph will gonna be big anyway
if (n_batch_cur > n_batch_max) {
// [QWEN_VIDEO] for video models, the batch dimension is used as temporal dimension for merged frames
if (!ctx->support_batch && n_batch_cur > clip_model_n_temporal_merge(ctx)) {
LOG_ERR("%s: batch size %d exceeds maximum supported batch/temporal-merge size %d\n", __func__, n_batch_cur, clip_model_n_temporal_merge(ctx));
return false;
}
@@ -3513,7 +3526,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
// build the inference graph
ggml_backend_sched_reset(ctx->sched.get());
ggml_cgraph * gf = clip_image_build_graph(ctx, imgs);
ggml_cgraph * gf = clip_get_graph_builder(ctx, imgs)->build();
ggml_backend_sched_alloc_graph(ctx->sched.get(), gf);
// set inputs
@@ -3582,6 +3595,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
const int n = nx * ny;
for (int b = 0; b < n_batch_cur; b++) {
LOG_DBG("%s: copying image %d/%d to input buffer (nx=%d, ny=%d)\n", __func__, b+1, n_batch_cur, nx, ny);
const auto & buf = imgs.entries[b]->get_ro_buf();
float * batch_entry = inp_raw.data() + b * (3*n);
for (int y = 0; y < ny; y++) {
@@ -4416,7 +4430,7 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
// the last node is the embedding tensor
ggml_tensor * embeddings = ggml_graph_node(gf, -1);
// sanity check (only support batch size of 1 for now)
// sanity check (assuming that all images in batch have the same number of tokens, so we only check the first one)
const int n_tokens_out = embeddings->ne[1];
const int expected_n_tokens_out = clip_n_output_tokens(ctx, imgs.entries[0].get());
if (n_tokens_out != expected_n_tokens_out) {
@@ -4424,16 +4438,26 @@ bool clip_image_batch_encode(clip_ctx * ctx, const int n_threads, const clip_ima
GGML_ABORT("Invalid number of output tokens");
}
// copy the embeddings to the location passed by the user
if (vec != nullptr) {
ggml_backend_tensor_get(embeddings, vec, 0, ggml_nbytes(embeddings));
LOG_DBG("%s: output embedding shape [%d, %d, %d]\n", __func__,
(int)embeddings->ne[0], (int)embeddings->ne[1], (int)embeddings->ne[2]);
// copy output to user buffer if provided
// if output is empty, skip the copy
if (!out_batch_embd.empty()) {
if (out_batch_embd.size() != (size_t)ggml_nelements(embeddings)) {
LOG_ERR("%s: output buffer has %zu elements but expected %zu\n", __func__, out_batch_embd.size(), (size_t)ggml_nelements(embeddings));
GGML_ABORT("Output buffer size mismatch");
}
ggml_backend_tensor_get(embeddings, out_batch_embd.data(), 0, ggml_nbytes(embeddings));
} else {
LOG_WRN("%s: output buffer is empty, skipping copy\n", __func__);
}
// Debug: dump final embeddings if MTMD_DEBUG_EMBEDDINGS is set
if (ctx->debug_output_embeddings) {
const int64_t n_embd = embeddings->ne[0];
const int64_t n_tokens = embeddings->ne[1];
std::vector<float> emb_data(n_embd * n_tokens);
std::vector<float> emb_data(ggml_nelements(embeddings));
ggml_backend_tensor_get(embeddings, emb_data.data(), 0, ggml_nbytes(embeddings));
LOG_INF("\n=== MTMD_DEBUG_EMBEDDINGS ===\n");
@@ -4570,7 +4594,14 @@ bool clip_has_audio_encoder(const struct clip_ctx * ctx) {
return ctx->model.modality == CLIP_MODALITY_AUDIO;
}
int clip_model_n_batch_max(const struct clip_ctx * ctx) {
bool clip_support_batch(const struct clip_ctx * ctx) {
return ctx->support_batch;
}
// TODO @ngxson : this is no longer correct with mtmd_batch API
// this was only meant to be used by qwen-vl-based models, to fuse 2 input images into one (qwen-vl video support)
// this logic should be refactored in near future to distinctly handle "merge frames" and "batching"
int clip_model_n_temporal_merge(const struct clip_ctx * ctx) {
switch (ctx->proj_type()) {
case PROJECTOR_TYPE_QWEN2VL:
case PROJECTOR_TYPE_QWEN25VL:
+5 -3
View File
@@ -97,8 +97,8 @@ size_t clip_image_f32_batch_nx(const struct clip_image_f32_batch * batch, int id
size_t clip_image_f32_batch_ny(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->ny
struct clip_image_f32 * clip_image_f32_get_img(const struct clip_image_f32_batch * batch, int idx); // equivalent to batch[idx]->data
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, float * vec);
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, float * vec);
bool clip_image_encode (struct clip_ctx * ctx, int n_threads, struct clip_image_f32 * img, std::vector<float> & out_vec);
bool clip_image_batch_encode(struct clip_ctx * ctx, int n_threads, const struct clip_image_f32_batch * imgs, std::vector<float> & out_batch_embd);
bool clip_is_llava(const struct clip_ctx * ctx);
// note for contributor: this clip_is_(model) pattern is deprecated
@@ -107,7 +107,9 @@ bool clip_is_llava(const struct clip_ctx * ctx);
bool clip_has_vision_encoder(const struct clip_ctx * ctx);
bool clip_has_audio_encoder(const struct clip_ctx * ctx);
int clip_model_n_batch_max(const struct clip_ctx * ctx);
bool clip_support_batch(const struct clip_ctx * ctx);
int clip_model_n_temporal_merge(const struct clip_ctx * ctx); // TODO @ngxson : remove, refactor this
std::map<ggml_backend_dev_t, size_t> clip_get_mem_usage(const struct clip_ctx * ctx);
+11 -9
View File
@@ -10,7 +10,7 @@ ggml_cgraph * clip_graph_gemma4v::build() {
ggml_set_name(inp_raw, "inp_raw_scaled");
ggml_tensor * inp = ggml_conv_2d(ctx0, model.patch_embeddings_0, inp_raw, patch_size, patch_size, 0, 0, 1, 1);
inp = ggml_reshape_2d(ctx0, inp, n_patches, n_embd);
inp = ggml_reshape_3d(ctx0, inp, n_patches, n_embd, n_batch);
inp = ggml_cont(ctx0, ggml_transpose(ctx0, inp));
ggml_set_name(inp, "inp");
// note: no patch bias
@@ -51,10 +51,11 @@ ggml_cgraph * clip_graph_gemma4v::build() {
// first half
ggml_tensor * first;
{
first = ggml_view_3d(ctx0, cur,
n_dim/2, n_head, n_pos,
first = ggml_view_4d(ctx0, cur,
n_dim/2, n_head, n_pos, n_batch,
cur->nb[1],
cur->nb[2],
cur->nb[3],
0);
first = ggml_rope_ext(
ctx0,
@@ -70,10 +71,11 @@ ggml_cgraph * clip_graph_gemma4v::build() {
// second half
ggml_tensor * second;
{
second = ggml_view_3d(ctx0, cur,
n_dim/2, n_head, n_pos,
second = ggml_view_4d(ctx0, cur,
n_dim/2, n_head, n_pos, n_batch,
cur->nb[1],
cur->nb[2],
cur->nb[3],
n_dim/2 * ggml_element_size(cur));
second = ggml_rope_ext(
ctx0,
@@ -103,14 +105,14 @@ ggml_cgraph * clip_graph_gemma4v::build() {
const int kernel_size = hparams.n_merge;
GGML_ASSERT(kernel_size > 0);
// [n_embd, n_patches] -> [n_patches_x, n_patches_y, n_embd, 1]
cur = ggml_cont_4d(ctx0, ggml_transpose(ctx0, cur), n_patches_x, n_patches_y, n_embd, 1);
// [n_embd, n_patches] -> [n_patches_x, n_patches_y, n_embd, n_batch]
cur = ggml_cont_4d(ctx0, ggml_transpose(ctx0, cur), n_patches_x, n_patches_y, n_embd, n_batch);
cur = ggml_pool_2d(ctx0, cur, GGML_OP_POOL_AVG,
kernel_size, kernel_size, kernel_size, kernel_size, 0, 0);
const int out_x = n_patches_x / kernel_size;
const int out_y = n_patches_y / kernel_size;
// [out_x, out_y, n_embd, 1] -> [n_embd, out_x * out_y]
cur = ggml_reshape_3d(ctx0, cur, out_x * out_y, n_embd, 1);
// [out_x, out_y, n_embd, n_batch] -> [n_embd, out_x * out_y, n_batch]
cur = ggml_reshape_3d(ctx0, cur, out_x * out_y, n_embd, n_batch);
cur = ggml_cont(ctx0, ggml_transpose(ctx0, cur));
cur = ggml_scale(ctx0, cur, sqrtf((float)n_embd));
cb(cur, "pooled", -1);
+1
View File
@@ -16,6 +16,7 @@ struct clip_graph_gemma4v : clip_graph {
clip_graph_gemma4v(clip_ctx * ctx, const clip_image_f32 & img) : clip_graph(ctx, img) {}
ggml_cgraph * build() override;
ggml_tensor * build_mm(ggml_tensor * w, ggml_tensor * x) const override;
bool support_batch() const override { return true; }
};
struct clip_graph_gemma4uv : clip_graph {
+8 -5
View File
@@ -67,8 +67,8 @@ MTMD_API void mtmd_helper_image_get_decoder_pos(const mtmd_image_tokens * image,
// helper function that automatically:
// 1. run llama_decode() on text chunks
// 2. run mtmd_encode() on image chunks, then mtmd_get_output_embd() and then llama_decode()
// if any of the mtmd_encode() or llama_decode() calls return non-zero, stop and forward the error
// 2. run mtmd_encode_chunk() on image chunks, then mtmd_get_output_embd() and then llama_decode()
// if any of the mtmd_encode_chunk() or llama_decode() calls return non-zero, stop and forward the error
// otherwise, returns 0 on success
// this function is NOT thread-safe
MTMD_API int32_t mtmd_helper_eval_chunks(mtmd_context * ctx,
@@ -157,13 +157,16 @@ MTMD_API int32_t mtmd_helper_video_read_next(mtmd_helper_video * ctx,
} // extern "C"
#endif
#ifdef __cplusplus
#include <set>
#include <memory>
namespace mtmd_helper {
//
// C++ wrappers
//
#ifdef __cplusplus
namespace mtmd_helper {
// video-related C++ wrappers
struct mtmd_helper_video_deleter {
void operator()(mtmd_helper_video * val) { mtmd_helper_video_free(val); }
+296 -66
View File
@@ -69,8 +69,8 @@ struct mtmd_bitmap {
return data.size();
}
bool can_batch_with(const mtmd_bitmap & other) const {
// [QWEN_VIDEO] can batch if both are images with same size
bool can_merge_with(const mtmd_bitmap & other) const {
// [QWEN_VIDEO] can (temporal) merge if both are images with same size
return !is_audio && !other.is_audio && nx == other.nx && ny == other.ny;
}
@@ -90,12 +90,24 @@ struct mtmd_image_tokens {
uint32_t ny = 0; // number of tokens in y direction
mtmd_pos_type pos = MTMD_POS_TYPE_NORMAL;
uint32_t image_idx = 0; // 0-based position of this image among image chunks in the prompt(used by pos == MTMD_POS_TYPE_HUNYUANVL)
uint32_t n_temporal_merge = 1; // for qwen-vl style temporal merge
uint32_t n_tokens() const {
if (pos == MTMD_POS_TYPE_HUNYUANVL) {
// [BOI] [row0 tokens + newline] ... [row(ny-1) tokens + newline] [EOI]
return (nx + 1) * ny + 2;
}
return nx * ny;
// [QWEN_VIDEO] this logic is quite ugly, it's mostly to make qwen-vl temporal merge work, can be improved in the future
if (batch_f32.entries.size() == 1 || n_temporal_merge == 1) {
return nx * ny;
}
uint32_t nz = batch_f32.entries.size();
// TODO: simplify this by repeating the last frame until it fits the temporal merge
if (nz % n_temporal_merge != 0) {
nz = nz / n_temporal_merge + 1;
} else {
nz = nz / n_temporal_merge;
}
return nx * ny * nz;
}
clip_image_f32_batch batch_f32; // preprocessed image patches
std::string id; // optional user-defined ID, useful for KV cache tracking
@@ -110,12 +122,17 @@ struct mtmd_image_tokens {
return false;
}
bool can_batch_with(const mtmd_image_tokens & other) {
return nx == other.nx && ny == other.ny && pos == other.pos;
}
mtmd_image_tokens clone() {
return mtmd_image_tokens{
nx,
ny,
pos,
image_idx,
n_temporal_merge,
batch_f32.clone(),
id
};
@@ -153,12 +170,49 @@ struct mtmd_input_chunk {
std::vector<llama_token> tokens_text;
mtmd_image_tokens_ptr tokens_image;
mtmd_audio_tokens_ptr tokens_audio;
bool can_batch_with(const mtmd_input_chunk & other) const {
if (type != other.type) {
return false;
}
if (tokens_image && other.tokens_image) {
return tokens_image->can_batch_with(*other.tokens_image);
}
// TODO: allow batching audio chunks of the same size
return false;
}
bool is_placeholder() const {
if (type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
return tokens_image && tokens_image->is_placeholder();
} else if (type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
return tokens_audio && tokens_audio->is_placeholder();
}
return false;
}
};
struct mtmd_input_chunks {
std::vector<mtmd_input_chunk> entries;
};
struct mtmd_batch {
mtmd_context * ctx;
std::vector<const mtmd_input_chunk *> entries;
std::vector<float> output_embd; // aggregated output embedding for the whole batch
mtmd_batch(mtmd_context * ctx): ctx(ctx) {}
int32_t n_tokens() const {
int32_t n = 0;
for (const auto * chunk : entries) {
n += mtmd_input_chunk_get_n_tokens(chunk);
}
return n;
}
};
// slice template, used by some llava-uhd models to correctly place the special tokens around image embeddings
// models not having it (llava-1.6) will process embeddings without any special tokens in-between
enum mtmd_slice_tmpl {
@@ -197,6 +251,7 @@ mtmd_context_params mtmd_context_params_default() {
/* image_max_tokens */ -1,
/* cb_eval */ nullptr,
/* cb_eval_user_data */ nullptr,
/* batch_max_tokens */ 1024,
};
return params;
}
@@ -204,7 +259,7 @@ mtmd_context_params mtmd_context_params_default() {
struct mtmd_context {
struct clip_ctx * ctx_v; // vision
struct clip_ctx * ctx_a; // audio
std::vector<float> image_embd_v; // image embedding vector
std::vector<float> out_embd; // image embedding vector
bool print_timings;
int n_threads;
@@ -239,17 +294,21 @@ struct mtmd_context {
std::unique_ptr<mtmd_audio_preprocessor> audio_preproc;
std::unique_ptr<mtmd_image_preprocessor> image_preproc;
// batching
int32_t batch_max_tokens;
// TODO @ngxson : add timings
mtmd_context(const char * mmproj_fname,
const llama_model * text_model,
const mtmd_context_params & ctx_params,
bool no_alloc = false) :
print_timings(ctx_params.print_timings),
n_threads (ctx_params.n_threads),
media_marker (ctx_params.media_marker),
n_embd_text (text_model ? llama_model_n_embd_inp(text_model) : -1),
vocab (text_model ? llama_model_get_vocab(text_model) : nullptr)
print_timings (ctx_params.print_timings),
n_threads (ctx_params.n_threads),
media_marker (ctx_params.media_marker),
n_embd_text (text_model ? llama_model_n_embd_inp(text_model) : -1),
vocab (text_model ? llama_model_get_vocab(text_model) : nullptr),
batch_max_tokens(ctx_params.batch_max_tokens)
{
if (ctx_params.image_marker != nullptr) {
throw std::runtime_error("custom image_marker is not supported anymore, use media_marker instead");
@@ -680,6 +739,16 @@ struct mtmd_context {
return ctx_a ? clip_get_projector_type(ctx_a) : PROJECTOR_TYPE_UNKNOWN;
}
int64_t n_embd_out() const {
if (ctx_v) {
return clip_n_mmproj_embd(ctx_v);
} else if (ctx_a) {
return clip_n_mmproj_embd(ctx_a);
} else {
throw std::runtime_error("no CLIP model loaded");
}
}
~mtmd_context() {
clip_free(ctx_a);
clip_free(ctx_v);
@@ -845,7 +914,7 @@ struct mtmd_tokenizer {
// [QWEN_VIDEO] handle frame merging for models that support it (i.e. qwen-vl)
int n_merge_frames = 1;
if (ctx->ctx_v) {
n_merge_frames = clip_model_n_batch_max(ctx->ctx_v);
n_merge_frames = clip_model_n_temporal_merge(ctx->ctx_v);
GGML_ASSERT(n_merge_frames <= 2 && "we only support merging maximum 2 images for now; open an issue if this model supports merging more");
}
@@ -860,7 +929,7 @@ struct mtmd_tokenizer {
if (i + 1 < parts.size() && parts[i + 1].bitmap != nullptr) {
const mtmd_bitmap * bm_a = parts[i].bitmap;
const mtmd_bitmap * bm_b = parts[i + 1].bitmap;
if (bm_a->can_batch_with(*bm_b)) {
if (bm_a->can_merge_with(*bm_b)) {
LOG_DBG("%s: merging 2 frames at part index %zu and %zu\n", __func__, i, i + 1);
merged_bitmaps.push_back({bm_a, bm_b});
parts.erase(parts.begin() + i + 1); // collapse the second bitmap part
@@ -1103,13 +1172,17 @@ struct mtmd_tokenizer {
size_t n_tokens = 0;
for (const auto & e : batch_f32.entries) {
n_tokens += clip_n_output_tokens(ctx->ctx_v, e.get());
if (clip_model_n_batch_max(ctx->ctx_v) == 2) {
if (clip_model_n_temporal_merge(ctx->ctx_v) == 2) {
// [QWEN_VIDEO] pair input is merged to the same embd, so only count as one image
break;
}
}
mtmd_image_tokens_ptr image_tokens(new mtmd_image_tokens);
// [QWEN_VIDEO] improve this in the future
image_tokens->n_temporal_merge = clip_model_n_temporal_merge(ctx->ctx_v);
if (mtmd_decode_use_mrope(ctx)) {
// for Qwen2VL, we need this information for M-RoPE decoding positions
image_tokens->nx = clip_n_output_tokens_x(ctx->ctx_v, batch_f32.entries[0].get());
@@ -1327,60 +1400,18 @@ int32_t mtmd_tokenize(mtmd_context * ctx,
}
}
int32_t mtmd_encode_chunk(mtmd_context * ctx, const mtmd_input_chunk * chunk) {
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
LOG_WRN("mtmd_encode_chunk has no effect for text chunks\n");
return 0;
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
if (!ctx->ctx_v) {
LOG_ERR("%s: model does not support vision input\n", __func__);
return 1;
}
if (chunk->tokens_image == nullptr) {
LOG_ERR("%s: image tokens are null\n", __func__);
return 1;
}
if (chunk->tokens_image->is_placeholder()) {
LOG_ERR("%s: image tokens batch is placeholder\n", __func__);
return 1;
}
return mtmd_encode(ctx, chunk->tokens_image.get());
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
if (!ctx->ctx_a) {
LOG_ERR("%s: model does not support audio input\n", __func__);
return 1;
}
if (chunk->tokens_audio == nullptr) {
LOG_ERR("%s: audio tokens are null\n", __func__);
return 1;
}
if (chunk->tokens_audio->is_placeholder()) {
LOG_ERR("%s: audio tokens batch is placeholder\n", __func__);
return 1;
}
int n_mmproj_embd = ctx->n_embd_text;
ctx->image_embd_v.resize(chunk->tokens_audio->n_tokens * n_mmproj_embd);
bool ok = clip_image_batch_encode(
ctx->ctx_a,
ctx->n_threads,
&chunk->tokens_audio->batch_f32,
ctx->image_embd_v.data());
return ok ? 0 : 1;
}
LOG_ERR("%s: unknown chunk type %d\n", __func__, (int)chunk->type);
return 1;
}
int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
static int32_t mtmd_encode_impl(mtmd_context * ctx, const mtmd_image_tokens * image_tokens, std::vector<float> & out_embd) {
clip_ctx * ctx_clip = ctx->ctx_v;
if (!ctx_clip) {
LOG_ERR("%s: this API does not support non-vision input, please use mtmd_encode_chunk instead\n", __func__);
return 1;
}
auto proj_type = clip_get_projector_type(ctx_clip);
int n_mmproj_embd = clip_n_mmproj_embd(ctx_clip);
ctx->image_embd_v.resize(image_tokens->n_tokens() * n_mmproj_embd);
int n_embd_out = ctx->n_embd_out();
auto n_tokens_out = image_tokens->n_tokens();
out_embd.resize((size_t)n_embd_out * n_tokens_out);
bool ok = false;
if (clip_is_llava(ctx_clip)
@@ -1400,12 +1431,19 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
return 1;
}
int n_tokens_per_image = clip_n_output_tokens(ctx_clip, entries[i].get());
ok = clip_image_encode(
std::vector<float> tmp_embd((size_t)n_tokens_per_image * n_embd_out);
bool ok_i = clip_image_encode(
ctx_clip,
ctx->n_threads,
entries[i].get(),
ctx->image_embd_v.data() + offset);
offset += static_cast<size_t>(n_mmproj_embd) * n_tokens_per_image;
tmp_embd);
if (!ok_i) {
LOG_ERR("%s: failed to encode image %zu\n", __func__, i);
return 1;
}
ok = true;
std::copy(tmp_embd.begin(), tmp_embd.end(), out_embd.begin() + offset);
offset += static_cast<size_t>(n_embd_out) * n_tokens_per_image;
}
} else {
if (image_tokens->is_placeholder()) {
@@ -1416,14 +1454,206 @@ int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens)
ctx_clip,
ctx->n_threads,
&image_tokens->batch_f32,
ctx->image_embd_v.data());
out_embd);
}
return ok ? 0 : 1;
}
static int32_t mtmd_encode_chunk_impl(mtmd_context * ctx, const mtmd_input_chunk * chunk, std::vector<float> & out_embd) {
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
LOG_WRN("mtmd_encode_chunk has no effect for text chunks\n");
return 0;
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_IMAGE) {
if (!ctx->ctx_v) {
LOG_ERR("%s: model does not support vision input\n", __func__);
return 1;
}
if (chunk->tokens_image == nullptr) {
LOG_ERR("%s: image tokens are null\n", __func__);
return 1;
}
if (chunk->tokens_image->is_placeholder()) {
LOG_ERR("%s: image tokens batch is placeholder\n", __func__);
return 1;
}
return mtmd_encode_impl(ctx, chunk->tokens_image.get(), out_embd);
} else if (chunk->type == MTMD_INPUT_CHUNK_TYPE_AUDIO) {
if (!ctx->ctx_a) {
LOG_ERR("%s: model does not support audio input\n", __func__);
return 1;
}
if (chunk->tokens_audio == nullptr) {
LOG_ERR("%s: audio tokens are null\n", __func__);
return 1;
}
if (chunk->tokens_audio->is_placeholder()) {
LOG_ERR("%s: audio tokens batch is placeholder\n", __func__);
return 1;
}
int n_mmproj_embd = ctx->n_embd_out();
out_embd.resize((size_t)chunk->tokens_audio->n_tokens * n_mmproj_embd);
bool ok = clip_image_batch_encode(
ctx->ctx_a,
ctx->n_threads,
&chunk->tokens_audio->batch_f32,
out_embd);
return ok ? 0 : 1;
}
LOG_ERR("%s: unknown chunk type %d\n", __func__, (int)chunk->type);
return 1;
}
int32_t mtmd_encode_chunk(mtmd_context * ctx, const mtmd_input_chunk * chunk) {
// this is the non-batching version
try {
return mtmd_encode_chunk_impl(ctx, chunk, ctx->out_embd);
} catch (const std::exception & e) {
LOG_ERR("%s: error: %s\n", __func__, e.what());
return 1;
}
}
int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens) {
try {
return mtmd_encode_impl(ctx, image_tokens, ctx->out_embd);
} catch (const std::exception & e) {
LOG_ERR("%s: error: %s\n", __func__, e.what());
return 1;
}
}
float * mtmd_get_output_embd(mtmd_context * ctx) {
return ctx->image_embd_v.data();
return ctx->out_embd.data();
}
mtmd_batch * mtmd_batch_init(mtmd_context * ctx) {
return new mtmd_batch(ctx);
}
void mtmd_batch_free(mtmd_batch * batch) {
if (batch) {
delete batch;
}
}
int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk) {
if (chunk->type == MTMD_INPUT_CHUNK_TYPE_TEXT) {
LOG_ERR("%s: text chunk is not supported in batch\n", __func__);
return 1;
}
auto * ctx = batch->ctx->get_clip_ctx(chunk);
if (!ctx) {
LOG_ERR("%s: model does not support input chunk type %d\n", __func__, (int)chunk->type);
return 1;
}
if (batch->entries.empty()) {
// batch must have at least one chunk
batch->entries.push_back(chunk);
return 0;
}
if (!clip_support_batch(ctx)) {
// if no batching support, batch can only have one single chunk
return 2; // "batch too large" error code
}
int32_t new_n_tokens = batch->n_tokens() + (int32_t)mtmd_input_chunk_get_n_tokens(chunk);
if (new_n_tokens > batch->ctx->batch_max_tokens) {
return 2; // "batch too large" error code
}
auto & first_chunk = batch->entries[0];
if (first_chunk->can_batch_with(*chunk)) {
batch->entries.push_back(chunk);
return 0;
}
return 3; // "cannot batch" error code
}
static int32_t mtmd_batch_encode_impl(mtmd_batch * batch) {
if (batch->entries.empty()) {
LOG_ERR("%s: batch is empty\n", __func__);
return 1;
}
for (const auto * chunk : batch->entries) {
if (chunk->is_placeholder()) {
LOG_ERR("%s: chunk is placeholder\n", __func__);
return 1;
}
}
// represent the whole batch as one single chunk
mtmd::input_chunk_ptr batch_chunk(mtmd_input_chunk_copy(batch->entries[0]));
if (batch_chunk->tokens_image) {
auto & b0_f32 = batch_chunk->tokens_image->batch_f32;
// copy all entries from other chunks into the first chunk's batch_f32
// note: skip first entry because it's already in batch_chunk
for (size_t ic = 1; ic < batch->entries.size(); ic++) {
auto & chunk = batch->entries[ic];
GGML_ASSERT(chunk->tokens_image);
auto b1_f32 = chunk->tokens_image->batch_f32.clone();
for (size_t i = 0; i < b1_f32.entries.size(); i++) {
b0_f32.entries.push_back(std::move(b1_f32.entries[i]));
}
}
} else if (batch_chunk->tokens_audio) {
auto & b0_f32 = batch_chunk->tokens_audio->batch_f32;
// copy all entries from other chunks into the first chunk's batch_f32
// note: skip first entry because it's already in batch_chunk
for (size_t ic = 1; ic < batch->entries.size(); ic++) {
auto & chunk = batch->entries[ic];
GGML_ASSERT(chunk->tokens_audio);
auto b1_f32 = chunk->tokens_audio->batch_f32.clone();
for (size_t i = 0; i < b1_f32.entries.size(); i++) {
b0_f32.entries.push_back(std::move(b1_f32.entries[i]));
}
}
} else {
LOG_ERR("%s: unsupported chunk type\n", __func__);
return 1;
}
LOG_DBG("%s: encoding batch with %zu entries and total %zu tokens\n",
__func__, batch->entries.size(), mtmd_input_chunk_get_n_tokens(batch_chunk.get()));
int32_t res = mtmd_encode_chunk_impl(
batch->ctx,
batch_chunk.get(),
batch->output_embd);
return res;
}
int32_t mtmd_batch_encode(mtmd_batch * batch) {
try {
return mtmd_batch_encode_impl(batch);
} catch (const std::exception & e) {
LOG_ERR("%s: error: %s\n", __func__, e.what());
return 1;
}
}
float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk) {
if (batch->output_embd.empty()) {
LOG_ERR("%s: batch has not been encoded yet\n", __func__);
return nullptr;
}
size_t offset = 0;
const size_t n_embd = batch->ctx->n_embd_out();
for (const auto * c : batch->entries) {
size_t offset_prev = offset;
size_t n_tokens = mtmd_input_chunk_get_n_tokens(c);
offset += n_tokens * n_embd;
GGML_ASSERT(offset_prev < batch->output_embd.size());
GGML_ASSERT(offset <= batch->output_embd.size());
if (c == chunk) {
return &batch->output_embd.data()[offset_prev];
}
}
return nullptr; // not found
}
bool mtmd_decode_use_non_causal(const mtmd_context * ctx, const mtmd_input_chunk * chunk) {
@@ -1801,7 +2031,7 @@ static void mtmd_debug_encode_impl(mtmd_context * ctx, clip_ctx * ctx_clip, clip
ctx_clip,
ctx->n_threads,
&image,
embd_output.data());
embd_output);
if (!ok) {
LOG_ERR("%s: failed to encode image\n", __func__);
}
+36 -4
View File
@@ -63,6 +63,7 @@ struct mtmd_bitmap;
struct mtmd_image_tokens;
struct mtmd_input_chunk;
struct mtmd_input_chunks;
struct mtmd_batch;
struct mtmd_input_text {
const char * text;
@@ -80,6 +81,7 @@ typedef struct mtmd_image_tokens mtmd_image_tokens;
typedef struct mtmd_input_chunk mtmd_input_chunk;
typedef struct mtmd_input_chunks mtmd_input_chunks;
typedef struct mtmd_input_text mtmd_input_text;
typedef struct mtmd_batch mtmd_batch;
struct mtmd_context_params {
bool use_gpu;
@@ -97,6 +99,11 @@ struct mtmd_context_params {
// callback function passed over to mtmd proper
ggml_backend_sched_eval_callback cb_eval;
void * cb_eval_user_data;
// batching params
int32_t batch_max_tokens; // maximum number of output tokens in a batch
// (note: this is not a hard-limit, the first image will always be added even if it exceeds this limit)
// (default: 1024)
};
MTMD_API const char * mtmd_default_marker(void);
@@ -265,12 +272,12 @@ MTMD_API int32_t mtmd_tokenize(mtmd_context * ctx,
const mtmd_bitmap ** bitmaps,
size_t n_bitmaps);
// returns 0 on success
// TODO: deprecate
MTMD_API int32_t mtmd_encode(mtmd_context * ctx,
const mtmd_image_tokens * image_tokens);
DEPRECATED(MTMD_API int32_t mtmd_encode(mtmd_context * ctx, const mtmd_image_tokens * image_tokens),
"use mtmd_encode_chunk() instead");
// text chunk will be ignored silently, only media chunk will be encoded
// returns 0 on success
// returns 1 on generic error
MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
const mtmd_input_chunk * chunk);
@@ -279,6 +286,26 @@ MTMD_API int32_t mtmd_encode_chunk(mtmd_context * ctx,
// llama_model_n_embd_inp(model) * mtmd_input_chunk_get_n_tokens(chunk) * sizeof(float)
MTMD_API float * mtmd_get_output_embd(mtmd_context * ctx);
// batch encoding API
// chunks are not owned by the batch, they will not be freed by mtmd_batch_free()
// batch is valid for a given context, cannot be shared across contexts
MTMD_API mtmd_batch * mtmd_batch_init(mtmd_context * ctx);
MTMD_API void mtmd_batch_free(mtmd_batch * batch);
// only media chunks are allowed, text chunks will be rejected
// returns 0 on success
// returns 1 on generic error
// returns 2 if the batch is too large (chunk won't be added)
// returns 3 if it cannot be batched with the existing chunks in the batch
MTMD_API int32_t mtmd_batch_add_chunk(mtmd_batch * batch, const mtmd_input_chunk * chunk);
// returns 0 on success
// returns 1 on generic error
MTMD_API int32_t mtmd_batch_encode(mtmd_batch * batch);
MTMD_API float * mtmd_batch_get_output_embd(mtmd_batch * batch, const mtmd_input_chunk * chunk);
// Set callback for all future logging events.
// If this is not called, or NULL is supplied, everything is output on stderr.
MTMD_API void mtmd_log_set(ggml_log_callback log_callback, void * user_data);
@@ -336,6 +363,11 @@ struct mtmd_input_chunk_deleter {
};
using input_chunk_ptr = std::unique_ptr<mtmd_input_chunk, mtmd_input_chunk_deleter>;
struct mtmd_batch_deleter {
void operator()(mtmd_batch * val) { mtmd_batch_free(val); }
};
using batch_ptr = std::unique_ptr<mtmd_batch, mtmd_batch_deleter>;
struct bitmap {
bitmap_ptr ptr;
bitmap() : ptr(nullptr) {}
+11 -3
View File
@@ -344,6 +344,14 @@ const mtmd::input_chunk_ptr & server_tokens::find_chunk(size_t idx) const {
throw std::runtime_error("Chunk not found");
}
std::pair<const mtmd::input_chunk_ptr *, size_t> server_tokens::find_next_media_chunk(size_t idx) const {
auto it = map_idx_to_media.upper_bound(idx);
if (it != map_idx_to_media.end()) {
return { &it->second, it->first };
}
return { nullptr, 0 };
}
void server_tokens::push_back(llama_token tok) {
if (tok == LLAMA_TOKEN_NULL) {
throw std::runtime_error("Invalid token");
@@ -1126,9 +1134,9 @@ json oaicompat_chat_params_parse(
// Reasoning budget: pass parameters through to sampling layer
{
int reasoning_budget = opt.reasoning_budget;
if (reasoning_budget == -1 && body.contains("thinking_budget_tokens")) {
reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
int reasoning_budget = json_value(body, "thinking_budget_tokens", -1);
if (reasoning_budget == -1) {
reasoning_budget = opt.reasoning_budget;
}
if (!chat_params.thinking_end_tag.empty()) {
+4
View File
@@ -180,6 +180,10 @@ public:
const mtmd::input_chunk_ptr & find_chunk(size_t idx) const;
// find next media chunk after idx
// returns a pair of pointer to the chunk (nullptr if not found) and its start index in tokens
std::pair<const mtmd::input_chunk_ptr *, size_t> find_next_media_chunk(size_t idx) const;
void push_back(llama_token tok);
// will create a copy of the chunk if it contains non-text data
+108 -15
View File
@@ -80,6 +80,8 @@ struct server_slot {
// multimodal
mtmd_context * mctx = nullptr;
mtmd::batch_ptr mbatch = nullptr;
std::array<llama_context *, 2> mtgt = {nullptr, nullptr}; // [0] for main context, [1] for optional draft context
// speculative decoding
common_speculative * spec;
@@ -239,6 +241,18 @@ struct server_slot {
// clear alora start
alora_invocation_start = -1;
// clear multimodal state
mbatch.reset();
mtgt[0] = ctx_tgt;
mtgt[1] = nullptr;
if (ctx_dft && llama_get_ctx_other(ctx_dft) != ctx_tgt) {
// TODO: in the future, figure out how to infuse target embeddings to the images
// for now, we re-decode the same chunk in both ctx_tgt and ctx_dft
// maybe we simply need to call `common_speculative_process()` ?
// [TAG_MTMD_DRAFT_PROCESSING]
mtgt[1] = ctx_dft;
}
}
void init_sampler() const {
@@ -578,6 +592,87 @@ struct server_slot {
other.prompt = prompt.clone();
other.init_sampler();
}
// returns 0 on success
// caller need to update prompt.tokens after a successful call to keep track of the processing progress
int process_mtmd_chunk(size_t idx, size_t & n_tokens_out) {
GGML_ASSERT(mctx);
const auto & input_tokens = task->tokens;
auto & chunk = input_tokens.find_chunk(idx);
int32_t res = 0;
auto try_decode = [&]() -> int32_t {
if (mbatch) {
float * embd = mtmd_batch_get_output_embd(mbatch.get(), chunk.get());
if (embd) {
for (auto * lctx : mtgt) {
if (lctx == nullptr) {
continue;
}
llama_pos new_n_past; // unused for now
res = mtmd_helper_decode_image_chunk(
mctx,
lctx,
chunk.get(),
embd,
prompt.tokens.pos_next(),
id,
llama_n_batch(lctx),
&new_n_past
);
if (res != 0) {
SLT_ERR(*this, "failed to decode mtmd chunk, idx = %zu, res = %d\n", idx, res);
return -1;
}
}
n_tokens_out = mtmd_input_chunk_get_n_tokens(chunk.get());
return 0; // success
}
}
return 1; // (non-error) need to create & encode batch
};
// if the batch is already exist, try searching & encode
res = try_decode();
if (res == 0) {
return 0;
} else if (res < 0) {
// fatal error
return res;
}
// otherwise, the batch is either uninitialized or is used up
// we need to create & encode a new batch
mbatch.reset(mtmd_batch_init(mctx));
res = mtmd_batch_add_chunk(mbatch.get(), chunk.get());
GGML_ASSERT(res == 0); // we should never have an empty batch
// try batching as much as possible
int n_added = 1;
size_t idx_cur = idx;
while (res == 0) {
auto [next_chunk, next_idx] = input_tokens.find_next_media_chunk(idx_cur);
if (next_chunk == nullptr) {
break;
}
res = mtmd_batch_add_chunk(mbatch.get(), next_chunk->get());
n_added += (res == 0 ? 1 : 0);
idx_cur = next_idx;
SLT_DBG(*this, "try adding media chunk idx = %zu to batch, res = %d\n", next_idx, res);
// if res != 0, batch is full or chunk is not compatible -> this loop breaks
}
// TODO @ngxson : move this log line to debug when it become more stable
SLT_INF(*this, "encoding mtmd batch from idx = %zu, n_chunks = %d\n", idx, n_added);
res = mtmd_batch_encode(mbatch.get());
if (res != 0) {
SLT_ERR(*this, "failed to encode mtmd batch for chunk idx = %zu, res = %d\n", idx, res);
return -1;
}
return try_decode();
}
};
@@ -781,6 +876,7 @@ private:
mparams.warmup = params_base.warmup;
mparams.image_min_tokens = params_base.image_min_tokens;
mparams.image_max_tokens = params_base.image_max_tokens;
mparams.batch_max_tokens = params_base.mtmd_batch_max_tokens;
mparams.media_marker = get_media_marker();
}
@@ -2928,7 +3024,7 @@ private:
send_partial_response(slot, {}, false, true);
}
}
}
} // end of SLOT_STATE_STARTED
if (!slot.can_split()) {
// cannot fit the prompt in the current batch - will try next iter
@@ -2983,10 +3079,18 @@ private:
bool has_mtmd = false;
// check if we should process the image
while (slot.prompt.n_tokens() < slot.task->n_tokens() && input_tokens[slot.prompt.n_tokens()] == LLAMA_TOKEN_NULL) {
while (true) {
auto cur_token_idx = slot.prompt.n_tokens();
if (
cur_token_idx >= slot.task->n_tokens() ||
input_tokens[cur_token_idx] != LLAMA_TOKEN_NULL // encountered a text token
) {
break;
}
// process the image
size_t n_tokens_out = 0;
int32_t res = input_tokens.process_chunk(ctx_tgt, mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);
int32_t res = slot.process_mtmd_chunk(cur_token_idx, n_tokens_out);
if (res != 0) {
SLT_ERR(slot, "failed to process image, res = %d\n", res);
send_error(slot, "failed to process image", ERROR_TYPE_SERVER);
@@ -2994,22 +3098,11 @@ private:
continue;
}
if (ctx_dft && llama_get_ctx_other(ctx_dft.get()) != ctx_tgt) {
// TODO: in the future, figure out how to infuse target embeddings to the images
// for now, we skip this for simplicity
// maybe we simply need to call `common_speculative_process()` on the mtmd batches in the `process_chunk` above?
// [TAG_MTMD_DRAFT_PROCESSING]
res = input_tokens.process_chunk(ctx_dft.get(), mctx, slot.prompt.n_tokens(), slot.prompt.tokens.pos_next(), slot.id, n_tokens_out);
if (res != 0) {
GGML_ABORT("failed to process multi-modal data on draft context\n");
}
}
slot.n_prompt_tokens_processed += n_tokens_out;
// add the image chunk to cache
{
const auto & chunk = input_tokens.find_chunk(slot.prompt.n_tokens());
const auto & chunk = input_tokens.find_chunk(cur_token_idx);
slot.prompt.tokens.push_back(chunk.get()); // copy
}
+208 -10
View File
@@ -113,7 +113,7 @@ bool server_http_context::init(const common_params & params) {
#endif
srv->set_default_headers({{"Server", "llama.cpp"}});
srv->set_logger(log_server_request);
// srv->set_logger(log_server_request); // TODO @ngxson : this is too spamy, no very useful; improve it in the future
srv->set_exception_handler([](const httplib::Request &, httplib::Response & res, const std::exception_ptr & ep) {
// this is fail-safe; exceptions should already handled by `ex_wrapper`
@@ -169,29 +169,108 @@ bool server_http_context::init(const common_params & params) {
SRV_INF("api_keys: %zu keys loaded\n", params.api_keys.size());
}
//
// Helper: Generate iOS splash screen paths from device dimensions
// This centralizes PWA asset paths to avoid duplication across CMake, C++, and TypeScript.
// Source of truth: tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES)
//
auto generate_splash_endpoints = []() -> std::vector<std::string> {
// Apple device dimensions (width x height) with orientation and color scheme
// Format: "orientation-dimension1xdimension2.png" or "orientation-dark-dimension1xdimension2.png"
// Based on https://developer.apple.com/design/human-interface-guidelines/app-icons
static const std::vector<std::pair<std::string, std::string>> splash_specs = {
// Portrait screens (light)
{"portrait", "640x1136"}, {"portrait", "750x1334"},
{"portrait", "1170x2532"}, {"portrait", "1179x2556"},
{"portrait", "1206x2622"}, {"portrait", "1284x2778"},
{"portrait", "1290x2796"}, {"portrait", "1320x2868"},
{"portrait", "1488x2266"}, {"portrait", "1640x2360"},
{"portrait", "1668x2388"}, {"portrait", "2048x2732"},
// Landscape screens (light) - dimensions swapped
{"landscape", "1136x640"}, {"landscape", "1334x750"},
{"landscape", "2532x1170"}, {"landscape", "2556x1179"},
{"landscape", "2622x1206"}, {"landscape", "2778x1284"},
{"landscape", "2796x1290"}, {"landscape", "2868x1320"},
{"landscape", "2266x1488"}, {"landscape", "2360x1640"},
{"landscape", "2388x1668"}, {"landscape", "2732x2048"},
// Portrait screens (dark)
{"portrait-dark", "640x1136"}, {"portrait-dark", "750x1334"},
{"portrait-dark", "1170x2532"}, {"portrait-dark", "1179x2556"},
{"portrait-dark", "1206x2622"}, {"portrait-dark", "1284x2778"},
{"portrait-dark", "1290x2796"}, {"portrait-dark", "1320x2868"},
{"portrait-dark", "1488x2266"}, {"portrait-dark", "1640x2360"},
{"portrait-dark", "1668x2388"}, {"portrait-dark", "2048x2732"},
// Landscape screens (dark)
{"landscape-dark", "1136x640"}, {"landscape-dark", "1334x750"},
{"landscape-dark", "2532x1170"}, {"landscape-dark", "2556x1179"},
{"landscape-dark", "2622x1206"}, {"landscape-dark", "2778x1284"},
{"landscape-dark", "2796x1290"}, {"landscape-dark", "2868x1320"},
{"landscape-dark", "2266x1488"}, {"landscape-dark", "2360x1640"},
{"landscape-dark", "2388x1668"}, {"landscape-dark", "2732x2048"}
};
std::vector<std::string> endpoints;
endpoints.reserve(splash_specs.size());
for (const auto & [orientation, dimensions] : splash_specs) {
endpoints.push_back("/apple-splash-" + orientation + "-" + dimensions + ".png");
}
return endpoints;
};
//
// Middlewares
//
auto middleware_validate_api_key = [api_keys = params.api_keys](const httplib::Request & req, httplib::Response & res) {
static const std::unordered_set<std::string> public_endpoints = {
// Public endpoints list - includes health, UI, and PWA assets
// Source of truth for splash screen paths: tools/ui/src/lib/constants/pwa.ts (APPLE_DEVICES)
static const std::unordered_set<std::string> get_public_endpoints = [generate_splash_endpoints]() {
std::unordered_set<std::string> endpoints {
"/health",
"/v1/health",
"/models",
"/v1/models",
"/",
"/index.html",
"/bundle.js",
"/bundle.css",
// PWA assets
"/favicon.ico",
"/favicon-dark.ico",
"/favicon.svg",
"/favicon-dark.svg",
"/pwa-64x64.png",
"/pwa-192x192.png",
"/pwa-512x512.png",
"/maskable-icon-512x512.png",
"/apple-touch-icon-180x180.png",
// iOS splash screens (generated from APPLE_DEVICES in TypeScript)
// PWA runtime files
"/manifest.webmanifest",
"/sw.js",
"/version.json",
"/workbox-<hash>.js",
"/_app/version.json",
"/build.json"
};
// Add all splash screen endpoints
auto splash = generate_splash_endpoints();
for (const auto & path : splash) {
endpoints.insert(path);
}
return endpoints;
}();
auto middleware_validate_api_key = [api_keys = params.api_keys](const httplib::Request & req, httplib::Response & res) {
// If API key is not set, skip validation
if (api_keys.empty()) {
return true;
}
// If path is public or static file, skip validation
if (public_endpoints.find(req.path) != public_endpoints.end()) {
if (get_public_endpoints.find(req.path) != get_public_endpoints.end()) {
return true;
}
// Static assets (_app/ files, workbox runtime). These are embedded at build time
// so no API key is needed — browsers fetch them directly.
if (req.path.find("/_app/") == 0 || req.path.find("/workbox-") == 0) {
return true;
}
@@ -315,7 +394,11 @@ bool server_http_context::init(const common_params & params) {
}
} else {
#if defined(LLAMA_UI_HAS_ASSETS)
auto serve_asset = [](const std::string & name, const char * mime, bool with_isolation_headers) {
// Embedded assets are immutable — cache aggressively for PWA/sw offline support.
// PWA runtime files (sw.js, manifest, version.json) use no-cache for revalidation.
// Bundle files use Vite content hashes (bundle.<hash>.js/css) so each build
// produces a different filename — browsers naturally get a fresh copy on upgrade.
auto serve_asset_cached = [](const std::string & name, const char * mime, bool with_isolation_headers) {
return [name, mime, with_isolation_headers](const httplib::Request & req, httplib::Response & res) {
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
if (!a) {
@@ -334,14 +417,129 @@ bool server_http_context::init(const common_params & params) {
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
}
res.set_header("Cache-Control", "public, max-age=31536000, immutable");
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
return false;
};
};
srv->Get(params.api_prefix + "/", serve_asset("index.html", "text/html; charset=utf-8", true));
srv->Get(params.api_prefix + "/bundle.js", serve_asset("bundle.js", "application/javascript; charset=utf-8", false));
srv->Get(params.api_prefix + "/bundle.css", serve_asset("bundle.css", "text/css; charset=utf-8", false));
auto serve_asset_nocache = [](const std::string & name, const char * mime, bool with_isolation_headers) {
return [name, mime, with_isolation_headers](const httplib::Request & /*req*/, httplib::Response & res) {
const llama_ui_asset * a = llama_ui_find_asset(name.c_str());
if (!a) {
res.status = 404;
return false;
}
if (with_isolation_headers) {
res.set_header("Cross-Origin-Embedder-Policy", "require-corp");
res.set_header("Cross-Origin-Opener-Policy", "same-origin");
}
res.set_header("Cache-Control", "no-cache");
res.set_content(reinterpret_cast<const char*>(a->data), a->size, mime);
return false;
};
};
// Bundle files in _app/immutable/ — SvelteKit outputs them here and index.html
// and sw.js reference them via these paths (vanilla build, no plugin).
auto serve_bundle = [serve_asset_cached](const httplib::Request & req, httplib::Response & res) {
std::string path = req.path;
std::string name;
const char * mime;
if (path.rfind("/_app/immutable/bundle.", 0) == 0 && path.size() > 22) {
name = path.substr(1); // strip leading /
mime = "application/javascript; charset=utf-8";
} else if (path.rfind("/_app/immutable/assets/bundle.", 0) == 0 && path.size() > 30) {
name = path.substr(1); // strip leading /
mime = "text/css; charset=utf-8";
} else {
res.status = 404;
return false;
}
return serve_asset_cached(name, mime, false)(req, res);
};
// _app/ paths — vanilla SvelteKit output, index.html and sw.js reference
// bundles and version.json here directly.
srv->Get(params.api_prefix + R"(/_app/immutable/bundle\.[^/]+\.js)", serve_bundle);
srv->Get(params.api_prefix + R"(/_app/immutable/assets/bundle\.[^/]+\.css)", serve_bundle);
srv->Get(params.api_prefix + "/_app/version.json", serve_asset_cached("_app/version.json", "application/json; charset=utf-8", false));
auto serve_workbox = [serve_asset_cached](const httplib::Request & req, httplib::Response & res) {
std::string name = req.path.substr(1);
if (name.rfind("workbox-", 0) == 0 && name.size() > 10) {
return serve_asset_cached(name, "application/javascript; charset=utf-8", false)(req, res);
}
res.status = 404;
return false;
};
srv->Get(params.api_prefix + R"(/workbox-[^/]+\.js)", serve_workbox);
srv->Get(params.api_prefix + R"(/sw\.js)", serve_asset_cached("sw.js", "application/javascript; charset=utf-8", false));
srv->Get(params.api_prefix + "/manifest.webmanifest", serve_asset_cached("manifest.webmanifest", "application/manifest+json; charset=utf-8", false));
srv->Get(params.api_prefix + "/version.json", serve_asset_cached("_app/version.json", "application/json; charset=utf-8", false));
srv->Get(params.api_prefix + "/build.json", serve_asset_cached("build.json", "application/json; charset=utf-8", false));
// Finally serve index.html for all other routes (SPA fallback)
srv->Get(params.api_prefix + "/", serve_asset_cached("index.html", "text/html; charset=utf-8", true));
srv->Get(params.api_prefix + "/favicon.ico", serve_asset_cached("favicon.ico", "image/x-icon", false));
srv->Get(params.api_prefix + "/favicon-dark.ico", serve_asset_cached("favicon-dark.ico", "image/x-icon", false));
srv->Get(params.api_prefix + "/favicon.svg", serve_asset_cached("favicon.svg", "image/svg+xml", false));
srv->Get(params.api_prefix + "/favicon-dark.svg", serve_asset_cached("favicon-dark.svg", "image/svg+xml", false));
srv->Get(params.api_prefix + "/pwa-64x64.png", serve_asset_cached("pwa-64x64.png", "image/png", false));
srv->Get(params.api_prefix + "/pwa-192x192.png", serve_asset_cached("pwa-192x192.png", "image/png", false));
srv->Get(params.api_prefix + "/pwa-512x512.png", serve_asset_cached("pwa-512x512.png", "image/png", false));
srv->Get(params.api_prefix + "/maskable-icon-512x512.png", serve_asset_cached("maskable-icon-512x512.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-touch-icon-180x180.png", serve_asset_cached("apple-touch-icon-180x180.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-640x1136.png", serve_asset_cached("apple-splash-portrait-640x1136.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-1136x640.png", serve_asset_cached("apple-splash-landscape-1136x640.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-750x1334.png", serve_asset_cached("apple-splash-portrait-750x1334.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-1334x750.png", serve_asset_cached("apple-splash-landscape-1334x750.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1170x2532.png", serve_asset_cached("apple-splash-portrait-1170x2532.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2532x1170.png", serve_asset_cached("apple-splash-landscape-2532x1170.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1179x2556.png", serve_asset_cached("apple-splash-portrait-1179x2556.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2556x1179.png", serve_asset_cached("apple-splash-landscape-2556x1179.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1206x2622.png", serve_asset_cached("apple-splash-portrait-1206x2622.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2622x1206.png", serve_asset_cached("apple-splash-landscape-2622x1206.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1284x2778.png", serve_asset_cached("apple-splash-portrait-1284x2778.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2778x1284.png", serve_asset_cached("apple-splash-landscape-2778x1284.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1290x2796.png", serve_asset_cached("apple-splash-portrait-1290x2796.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2796x1290.png", serve_asset_cached("apple-splash-landscape-2796x1290.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1320x2868.png", serve_asset_cached("apple-splash-portrait-1320x2868.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2868x1320.png", serve_asset_cached("apple-splash-landscape-2868x1320.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1488x2266.png", serve_asset_cached("apple-splash-portrait-1488x2266.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2266x1488.png", serve_asset_cached("apple-splash-landscape-2266x1488.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1640x2360.png", serve_asset_cached("apple-splash-portrait-1640x2360.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2360x1640.png", serve_asset_cached("apple-splash-landscape-2360x1640.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-1668x2388.png", serve_asset_cached("apple-splash-portrait-1668x2388.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2388x1668.png", serve_asset_cached("apple-splash-landscape-2388x1668.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-2048x2732.png", serve_asset_cached("apple-splash-portrait-2048x2732.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-2732x2048.png", serve_asset_cached("apple-splash-landscape-2732x2048.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-640x1136.png", serve_asset_cached("apple-splash-portrait-dark-640x1136.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-1136x640.png", serve_asset_cached("apple-splash-landscape-dark-1136x640.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-750x1334.png", serve_asset_cached("apple-splash-portrait-dark-750x1334.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-1334x750.png", serve_asset_cached("apple-splash-landscape-dark-1334x750.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1170x2532.png", serve_asset_cached("apple-splash-portrait-dark-1170x2532.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2532x1170.png", serve_asset_cached("apple-splash-landscape-dark-2532x1170.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1179x2556.png", serve_asset_cached("apple-splash-portrait-dark-1179x2556.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2556x1179.png", serve_asset_cached("apple-splash-landscape-dark-2556x1179.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1206x2622.png", serve_asset_cached("apple-splash-portrait-dark-1206x2622.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2622x1206.png", serve_asset_cached("apple-splash-landscape-dark-2622x1206.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1284x2778.png", serve_asset_cached("apple-splash-portrait-dark-1284x2778.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2778x1284.png", serve_asset_cached("apple-splash-landscape-dark-2778x1284.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1290x2796.png", serve_asset_cached("apple-splash-portrait-dark-1290x2796.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2796x1290.png", serve_asset_cached("apple-splash-landscape-dark-2796x1290.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1320x2868.png", serve_asset_cached("apple-splash-portrait-dark-1320x2868.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2868x1320.png", serve_asset_cached("apple-splash-landscape-dark-2868x1320.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1640x2360.png", serve_asset_cached("apple-splash-portrait-dark-1640x2360.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2360x1640.png", serve_asset_cached("apple-splash-landscape-dark-2360x1640.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-1668x2388.png", serve_asset_cached("apple-splash-portrait-dark-1668x2388.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2388x1668.png", serve_asset_cached("apple-splash-landscape-dark-2388x1668.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-portrait-dark-2048x2732.png", serve_asset_cached("apple-splash-portrait-dark-2048x2732.png", "image/png", false));
srv->Get(params.api_prefix + "/apple-splash-landscape-dark-2732x2048.png", serve_asset_cached("apple-splash-landscape-dark-2732x2048.png", "image/png", false));
srv->Get(params.api_prefix + "/manifest.webmanifest", serve_asset_nocache("manifest.webmanifest", "application/manifest+json", false));
srv->Get(params.api_prefix + "/sw.js", serve_asset_nocache("sw.js", "application/javascript; charset=utf-8", false));
srv->Get(params.api_prefix + "/version.json", serve_asset_nocache("version.json", "application/json", false));
#endif
}
}
+1 -1
View File
@@ -26,7 +26,7 @@ def test_access_static_assets_without_api_key():
"""Static web UI assets should not require API key authentication (issue #21229)"""
global server
server.start()
for path in ["/", "/bundle.js", "/bundle.css"]:
for path in ["/", "/sw.js", "/manifest.webmanifest", "/_app/version.json"]:
res = server.make_request("GET", path)
assert res.status_code == 200, f"Expected 200 for {path}, got {res.status_code}"
+11
View File
@@ -8,6 +8,8 @@ node_modules
.wrangler
/.svelte-kit
/build
dev-dist
dist
# OS
.DS_Store
@@ -23,6 +25,15 @@ Thumbs.db
vite.config.js.timestamp-*
vite.config.ts.timestamp-*
# PWA Artifacts
apple-splash-*.png
apple-touch-icon-*.png
favicon.ico
favicon-dark.ico
maskable-icon-*.png
pwa-*.png
# Storybook
*storybook.log
storybook-static
*.code-workspace
+1
View File
@@ -77,6 +77,7 @@ add_custom_target(llama-ui-assets ALL
"-DUI_SOURCE_DIR=${CMAKE_CURRENT_SOURCE_DIR}"
"-DUI_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}"
"-DLLAMA_SOURCE_DIR=${PROJECT_SOURCE_DIR}"
"-DLLAMA_BUILD_NUMBER=${LLAMA_BUILD_NUMBER}"
"-DHF_BUCKET=${LLAMA_UI_HF_BUCKET}"
"-DHF_VERSION=${HF_UI_VERSION}"
"-DHF_ENABLED=${LLAMA_USE_PREBUILT_UI}"
+6854 -1259
View File
File diff suppressed because it is too large Load Diff
+31 -25
View File
@@ -4,8 +4,9 @@
"version": "1.0.0",
"type": "module",
"scripts": {
"build": "npm run build-pwa-assets && vite build",
"build-pwa-assets": "npx @vite-pwa/assets-generator --root . --config pwa-assets.config.ts && npx @vite-pwa/assets-generator --root . --config pwa-assets-dark.config.ts && node scripts/make-icons-circular.js",
"dev": "bash scripts/dev.sh",
"build": "vite build",
"preview": "vite preview",
"prepare": "svelte-kit sync || echo ''",
"check": "svelte-kit sync && svelte-check --tsconfig ./tsconfig.json",
@@ -15,12 +16,15 @@
"lint": "prettier --check . && eslint .",
"test": "npm run test:ui -- --run && npm run test:client -- --run && npm run test:unit -- --run && npm run test:e2e",
"test:e2e": "playwright test",
"test:e2e:pwa": "playwright test tests/e2e/pwa.e2e.ts",
"test:client": "vitest --project=client",
"test:unit": "vitest --project=unit",
"test:unit:pwa": "vitest --project=unit --run tests/unit/pwa.spec.ts",
"test:pwa": "npm run test:unit:pwa && npm run test:e2e:pwa",
"test:ui": "vitest --project=ui",
"storybook": "storybook dev -p 6006",
"build-storybook": "storybook build",
"cleanup": "rm -rf .svelte-kit build node_modules test-results"
"cleanup": "rm -rf .svelte-kit build node_modules test-results dist dev-dist debug-storybook.log static/pwa-*.png static/maskable-icon-*.png static/apple-touch-icon-*.png static/apple-splash-*.png static/favicon*.ico"
},
"devDependencies": {
"@chromatic-com/storybook": "5.0.0",
@@ -41,29 +45,31 @@
"@tailwindcss/forms": "0.5.10",
"@tailwindcss/typography": "0.5.16",
"@tailwindcss/vite": "4.1.11",
"@types/node": "^24",
"@types/node": "24.13.0",
"@vite-pwa/assets-generator": "1.0.2",
"@vite-pwa/sveltekit": "1.1.0",
"@vitest/browser": "4.1.8",
"@vitest/browser-playwright": "4.1.8",
"@vitest/coverage-v8": "4.1.8",
"bits-ui": "2.18.1",
"clsx": "2.1.1",
"dexie": "4.0.11",
"eslint": "9.39.2",
"dexie": "4.4.3",
"eslint": "9.39.4",
"eslint-config-prettier": "10.1.8",
"eslint-plugin-storybook": "10.2.4",
"eslint-plugin-svelte": "3.15.0",
"globals": "16.3.0",
"eslint-plugin-storybook": "10.4.2",
"eslint-plugin-svelte": "3.19.0",
"globals": "16.5.0",
"highlight.js": "11.11.1",
"http-server": "14.1.1",
"mdast": "3.0.0",
"mdsvex": "0.12.6",
"mdsvex": "0.12.7",
"mermaid": "11.15.0",
"mode-watcher": "1.1.0",
"pdfjs-dist": "5.4.54",
"playwright": "1.56.1",
"prettier": "3.6.2",
"prettier-plugin-svelte": "3.4.0",
"prettier-plugin-tailwindcss": "0.6.14",
"prettier": "3.8.3",
"prettier-plugin-svelte": "4.1.0",
"prettier-plugin-tailwindcss": "0.8.0",
"rehype-highlight": "7.0.2",
"rehype-katex": "7.0.1",
"rehype-stringify": "10.0.1",
@@ -73,25 +79,25 @@
"remark-html": "16.0.1",
"remark-math": "6.0.0",
"remark-rehype": "11.1.2",
"sass": "1.93.3",
"storybook": "10.3.3",
"svelte": "5.55.7",
"svelte-check": "4.3.0",
"svelte-sonner": "1.0.5",
"tailwind-merge": "3.3.1",
"sass": "1.100.0",
"storybook": "10.4.2",
"svelte": "5.56.1",
"svelte-check": "4.6.0",
"svelte-sonner": "1.1.1",
"tailwind-merge": "3.6.0",
"tailwind-variants": "3.2.2",
"tailwindcss": "4.1.11",
"tw-animate-css": "1.3.5",
"typescript": "5.8.3",
"typescript-eslint": "8.56.0",
"tailwindcss": "4.3.0",
"tw-animate-css": "1.4.0",
"typescript": "5.9.3",
"typescript-eslint": "8.60.1",
"unified": "11.0.5",
"unist-util-visit": "5.0.0",
"unist-util-visit": "5.1.0",
"uuid": "13.0.2",
"vite": "7.3.2",
"vite": "7.3.5",
"vite-plugin-devtools-json": "0.2.1",
"vitest": "4.1.8",
"vitest-browser-svelte": "2.1.1",
"zod": "4.2.1"
"workbox-window": "7.4.1"
},
"overrides": {
"cookie": "1.1.1"
+24 -4
View File
@@ -1,11 +1,31 @@
import { defineConfig } from '@playwright/test';
import { defineConfig, devices } from '@playwright/test';
export default defineConfig({
testDir: 'tests/e2e',
testMatch: ['**/*.e2e.ts'],
timeout: 30000,
expect: {
timeout: 5000
},
fullyParallel: true,
forbidOnly: !!process.env.CI,
retries: process.env.CI ? 2 : 0,
workers: process.env.CI ? 1 : undefined,
reporter: 'line',
use: {
baseURL: 'http://localhost:8181',
trace: 'on-first-retry'
},
projects: [
{
name: 'chromium',
use: { ...devices['Desktop Chrome'] }
}
],
webServer: {
command: 'npm run build && npx http-server ./dist -p 8181',
port: 8181,
timeout: 120000,
reuseExistingServer: false
},
testDir: 'tests/e2e'
reuseExistingServer: !process.env.CI
}
});
+20
View File
@@ -0,0 +1,20 @@
import { defineConfig } from '@vite-pwa/assets-generator/config';
export default defineConfig({
headLinkOptions: {
preset: '2023'
},
preset: {
transparent: {
sizes: [],
favicons: [[48, 'favicon-dark.ico']]
},
maskable: {
sizes: []
},
apple: {
sizes: []
}
},
images: ['static/favicon-dark.svg']
});
+51
View File
@@ -0,0 +1,51 @@
import {
combinePresetAndAppleSplashScreens,
defineConfig,
minimal2023Preset
} from '@vite-pwa/assets-generator/config';
import { readFileSync } from 'node:fs';
import { resolve } from 'node:path';
import { THEME_COLORS, PWA_GENERATOR_DEVICES, PWA_ASSET_GENERATOR } from './src/lib/constants/pwa';
import { SplashOrientation } from './src/lib/enums/splash.enums';
export default defineConfig({
headLinkOptions: {
preset: PWA_ASSET_GENERATOR.LINK_PRESET
},
preset: combinePresetAndAppleSplashScreens(
minimal2023Preset,
{
padding: PWA_ASSET_GENERATOR.SPLASH_PADDING,
resizeOptions: {
background: THEME_COLORS.BACKGROUND_LIGHT,
fit: PWA_ASSET_GENERATOR.FIT_MODE
},
darkResizeOptions: {
background: THEME_COLORS.BACKGROUND_DARK,
fit: PWA_ASSET_GENERATOR.FIT_MODE
},
darkImageResolver: async (imageName: string) => {
if (imageName.endsWith('favicon.svg')) {
return readFileSync(resolve('static/favicon-dark.svg'));
}
},
linkMediaOptions: {
log: true,
addMediaScreen: PWA_ASSET_GENERATOR.ADD_MEDIA_SCREEN,
basePath: PWA_ASSET_GENERATOR.BASE_PATH,
xhtml: PWA_ASSET_GENERATOR.XHTML
},
png: {
compressionLevel: PWA_ASSET_GENERATOR.PNG_COMPRESSION_LEVEL,
quality: PWA_ASSET_GENERATOR.PNG_QUALITY
},
name: (landscape, size, dark) => {
const orientation = landscape ? SplashOrientation.LANDSCAPE : SplashOrientation.PORTRAIT;
const darkPrefix = dark ? PWA_ASSET_GENERATOR.DARK_PREFIX : '';
return `apple-splash-${orientation}-${darkPrefix}${size.width}x${size.height}.png`;
}
},
PWA_GENERATOR_DEVICES
),
images: ['static/favicon.svg']
});
+137
View File
@@ -0,0 +1,137 @@
#!/usr/bin/env node
/**
* Apply circular mask to pwa-*.png icons.
* Uses the maskable icon as source (white bg, full logo) to avoid
* the small-colormap pwa icons looking bad when cropped to a circle.
*
* Usage: node scripts/make-icons-circular.js [--padding-pct <0-50>] [--scale-pct <50-100>]
*
* - padding-pct: percentage of icon size kept as padding around the circle (default: 25)
* - scale-pct: scale down the source image before cropping (default: 85)
*
* maskable-icon and apple-touch-icon are left untouched.
*/
import sharp from 'sharp';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const STATIC_DIR = path.resolve(__dirname, '..', 'static');
const paddingPct = process.argv.reduce((acc, arg, i, args) => {
if (arg === '--padding-pct' && args[i + 1]) return parseFloat(args[i + 1]);
return acc;
}, 0);
// Scale down the source image before cropping to circle
const scalePct = process.argv.reduce((acc, arg, i, args) => {
if (arg === '--scale-pct' && args[i + 1]) return parseFloat(args[i + 1]);
return acc;
}, 85); // default 85% - icon fills 85% of the circular area
// Source for circular icons: the maskable icon (white bg, full logo)
const sourceIcon = 'maskable-icon-512x512.png';
const targetIcons = ['pwa-64x64.png', 'pwa-192x192.png', 'pwa-512x512.png'];
// maskable-icon and apple-touch-icon stay square
const untouchedIcons = ['maskable-icon-512x512.png', 'apple-touch-icon-180x180.png'];
async function makeCircle(targetFilename) {
const targetPath = path.join(STATIC_DIR, targetFilename);
const sourcePath = path.join(STATIC_DIR, sourceIcon);
if (!fs.existsSync(sourcePath)) {
console.log(`⏭️ ${sourceIcon} not found, skipping`);
return;
}
if (!fs.existsSync(targetPath)) {
console.log(`⏭️ ${targetFilename} not found, skipping`);
return;
}
const metadata = await sharp(targetPath).metadata();
const size = Math.max(metadata.width, metadata.height);
const radius = Math.floor((size * (1 - paddingPct / 100)) / 2);
const center = Math.floor(size / 2);
// Build circular mask as RGBA buffer: white opaque circle on transparent bg
const maskBuf = Buffer.alloc(size * size * 4, 0);
for (let y = 0; y < size; y++) {
for (let x = 0; x < size; x++) {
const dx = x - center;
const dy = y - center;
const dist = Math.sqrt(dx * dx + dy * dy);
if (dist < radius) {
const i = (y * size + x) * 4;
maskBuf[i] = 255;
maskBuf[i + 1] = 255;
maskBuf[i + 2] = 255;
maskBuf[i + 3] = 255;
}
}
}
const tmpMask = path.join(STATIC_DIR, '.mask-tmp.png');
await sharp(maskBuf, {
raw: { width: size, height: size, channels: 4 }
})
.png()
.toFile(tmpMask);
// Step 1: Scale source relative to circle diameter (not full icon), composite centered onto white canvas of full size
const circleDiameter = Math.floor(size * (1 - paddingPct / 100));
const scaledSize = Math.floor((circleDiameter * scalePct) / 100);
const offset = Math.floor((size - scaledSize) / 2);
const scaledBuf = await sharp(sourcePath)
.resize(scaledSize, scaledSize, {
fit: 'cover',
background: { r: 255, g: 255, b: 255, alpha: 1 }
})
.ensureAlpha()
.png()
.toBuffer();
// Step 2: Composite scaled image onto white background, then apply circular mask
const output = await sharp({
create: {
width: size,
height: size,
channels: 4,
background: { r: 255, g: 255, b: 255, alpha: 1 }
}
})
.composite([
{ input: scaledBuf, top: offset, left: offset },
{ input: tmpMask, top: 0, left: 0, blend: 'dest-in' }
])
.png()
.toBuffer();
fs.writeFileSync(targetPath, output);
fs.unlinkSync(tmpMask);
console.log(
`${targetFilename} → circle from ${sourceIcon}, ${paddingPct}% padding (size=${size}, r=${radius}, scale=${scalePct}%, circleDiameter=${circleDiameter})`
);
}
async function main() {
console.log(`Circular mask: ${paddingPct}% padding, ${scalePct}% scale, source=${sourceIcon}\n`);
for (const icon of targetIcons) {
await makeCircle(icon);
}
console.log('\nUnchanged:');
for (const icon of untouchedIcons) {
const fp = path.join(STATIC_DIR, icon);
console.log(` ${icon} (${fs.existsSync(fp) ? fs.statSync(fp).size + ' bytes' : 'missing'})`);
}
}
main();
@@ -0,0 +1,42 @@
import { writeFileSync, existsSync } from 'node:fs';
import { resolve } from 'path';
import type { Plugin } from 'vite';
import { BUILD_CONFIG } from '../src/lib/constants/pwa';
let processed = false;
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? BUILD_CONFIG.OUTPUT_DIR;
/**
* Write build.json with the llama.cpp release build number.
*
* LLAMA_BUILD_NUMBER is passed from CMake -> npm -> vite via env var.
* Used for display of the current llama-server release (e.g. "b1234").
*/
export function buildInfoPlugin(): Plugin {
return {
name: 'llamacpp:build-info',
apply: 'build',
closeBundle() {
setTimeout(() => {
try {
if (processed) return;
processed = true;
const buildNumber = process.env.LLAMA_BUILD_NUMBER;
if (!buildNumber) return;
const outDir = resolve(OUTPUT_DIR);
const indexPath = resolve(outDir, 'index.html');
if (!existsSync(indexPath)) return;
const buildJsonPath = resolve(outDir, 'build.json');
writeFileSync(buildJsonPath, JSON.stringify({ version: buildNumber }), 'utf-8');
console.log(`Created build.json (version: ${buildNumber})`);
} catch (error) {
console.error('Failed to write build.json:', error);
}
}, 100);
}
};
}
@@ -1,105 +0,0 @@
import {
readFileSync,
writeFileSync,
existsSync,
readdirSync,
copyFileSync,
rmSync,
unlinkSync
} from 'fs';
import { resolve } from 'path';
import type { Plugin } from 'vite';
const GUIDE_FOR_FRONTEND = `
<!--
This is a static build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
-->
`.trim();
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? './dist';
export function llamaCppBuildPlugin(): Plugin {
return {
name: 'llamacpp:build',
apply: 'build',
closeBundle() {
setTimeout(() => {
try {
const outDir = resolve(OUTPUT_DIR);
const indexPath = resolve(outDir, 'index.html');
if (!existsSync(indexPath)) return;
let content = readFileSync(indexPath, 'utf-8');
// Inline favicon as base64 data URL
const faviconPath = resolve('static/favicon.svg');
if (existsSync(faviconPath)) {
const faviconContent = readFileSync(faviconPath, 'utf-8');
const faviconBase64 = Buffer.from(faviconContent).toString('base64');
const faviconDataUrl = `data:image/svg+xml;base64,${faviconBase64}`;
content = content.replace(/href="[^"]*favicon\.svg"/g, `href="${faviconDataUrl}"`);
console.log('✓ Inlined favicon.svg as base64 data URL');
}
content = content.replace(/\r/g, '');
content = GUIDE_FOR_FRONTEND + '\n' + content;
// Keep the Vite hash as a query string so each build busts the browser cache
content = content.replace(/\/_app\/immutable\/bundle\.([^".]+)\.js/g, './bundle.js?$1');
content = content.replace(
/\/_app\/immutable\/assets\/bundle\.([^".]+)\.css/g,
'./bundle.css?$1'
);
content = content.replace(/__sveltekit_[a-z0-9]+/g, '__sveltekit__');
writeFileSync(indexPath, content, 'utf-8');
console.log('✓ Updated index.html');
// Copy bundle.*.js -> bundle.js at output root
const immutableDir = resolve(outDir, '_app/immutable');
const bundleDir = resolve(outDir, '_app/immutable/assets');
if (existsSync(immutableDir)) {
const jsFiles = readdirSync(immutableDir).filter((f) => f.match(/^bundle\..+\.js$/));
if (jsFiles.length > 0) {
copyFileSync(resolve(immutableDir, jsFiles[0]), resolve(outDir, 'bundle.js'));
// Normalize __sveltekit_<hash> to __sveltekit__ in bundle.js
const bundleJsPath = resolve(outDir, 'bundle.js');
let bundleJs = readFileSync(bundleJsPath, 'utf-8');
bundleJs = bundleJs.replace(/__sveltekit_[a-z0-9]+/g, '__sveltekit__');
writeFileSync(bundleJsPath, bundleJs, 'utf-8');
console.log(`✓ Copied ${jsFiles[0]} -> bundle.js`);
}
}
// Copy bundle.*.css -> bundle.css at output root
if (existsSync(bundleDir)) {
const cssFiles = readdirSync(bundleDir).filter((f) => f.match(/^bundle\..+\.css$/));
if (cssFiles.length > 0) {
copyFileSync(resolve(bundleDir, cssFiles[0]), resolve(outDir, 'bundle.css'));
console.log(`✓ Copied ${cssFiles[0]} -> bundle.css`);
}
}
// Cleanup: remove _app directory, favicon.svg, and legacy index.html.gz
const appDir = resolve(outDir, '_app');
if (existsSync(appDir)) {
rmSync(appDir, { recursive: true, force: true });
console.log('✓ Removed _app directory');
}
const faviconOut = resolve(outDir, 'favicon.svg');
if (existsSync(faviconOut)) {
unlinkSync(faviconOut);
console.log('✓ Removed favicon.svg');
}
} catch (error) {
console.error('Failed to process build output:', error);
}
}, 100);
}
};
}
@@ -0,0 +1,61 @@
import { readFileSync, writeFileSync, existsSync } from 'node:fs';
import { resolve } from 'path';
import type { Plugin } from 'vite';
import { BUILD_CONFIG } from '../src/lib/constants/pwa';
let processed = false;
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? BUILD_CONFIG.OUTPUT_DIR;
function rewrite(path: string, pairs: [string, string][]): void {
if (!existsSync(path)) {
return;
}
const text = readFileSync(path, 'utf-8');
let out = text;
for (const [from, to] of pairs) {
out = out.split(from).join(to);
}
if (out !== text) {
writeFileSync(path, out, 'utf-8');
}
}
/**
* Relativize SvelteKit absolute base refs so the build is relocatable under any subpath.
*
* SvelteKit bakes root absolute /_app/ paths into the SPA fallback because paths.relative
* does not apply to a depth agnostic fallback page. Rewriting to ./_app/ lets a plain
* recursive copy of the output into /any/subdir/ resolve assets against the document URL.
* Runs after adapter-static writes index.html and the PWA plugin writes sw.js, deferred the
* same way as buildInfoPlugin so the emitted files exist.
*/
export function relativizeBasePlugin(): Plugin {
return {
name: 'llamacpp:relativize-base',
apply: 'build',
closeBundle() {
setTimeout(() => {
try {
if (processed) return;
processed = true;
const outDir = resolve(OUTPUT_DIR);
// index.html: modulepreload, stylesheet and bootstrap import reference "/_app/
rewrite(resolve(outDir, 'index.html'), [['"/_app/', '"./_app/']]);
// sw.js: the only absolute entries are the navigate fallback precache key and handler
rewrite(resolve(outDir, 'sw.js'), [
['{url:"/"', '{url:"./"'],
['createHandlerBoundToURL("/"', 'createHandlerBoundToURL("./"']
]);
console.log('Relativized base refs in index.html and sw.js');
} catch (error) {
console.error('Failed to relativize base refs:', error);
}
}, 100);
}
};
}
@@ -0,0 +1,115 @@
import { readdirSync, readFileSync, writeFileSync, existsSync } from 'node:fs';
import { resolve } from 'path';
import type { Plugin } from 'vite';
import { TAB, NEWLINE } from '../src/lib/constants/code';
import { APPLE_DEVICES, BUILD_CONFIG, REGEX_PATTERNS, SPLASH_LINK } from '../src/lib/constants/pwa';
import type { SplashDimensions } from '../src/lib/types';
import { SplashOrientation } from '../src/lib/enums/splash.enums';
let processed = false;
const OUTPUT_DIR = process.env.LLAMA_UI_OUT_DIR ?? BUILD_CONFIG.OUTPUT_DIR;
/**
* Generate iOS splash screen <link> tags from generated apple-splash-*.png files.
* Returns an array of HTML link strings to be injected into the page head.
*/
export function generateSplashScreenLinks(outDir: string): string[] {
const files = readdirSync(outDir).filter((f) => f.match(REGEX_PATTERNS.SPLASH_FILE));
if (files.length === 0) return [];
const dimMap = new Map<string, SplashDimensions>();
for (const [dims, spec] of Object.entries(APPLE_DEVICES)) {
const [w, h] = dims.split('x').map(Number);
// logical-point dimensions
dimMap.set(`${w}x${h}`, { deviceW: spec.width, deviceH: spec.height, dpr: spec.dpr });
dimMap.set(`${h}x${w}`, { deviceW: spec.width, deviceH: spec.height, dpr: spec.dpr });
// pixel dimensions (used by actual generated splash files)
dimMap.set(`${w * spec.dpr}x${h * spec.dpr}`, {
deviceW: spec.width,
deviceH: spec.height,
dpr: spec.dpr
});
dimMap.set(`${h * spec.dpr}x${w * spec.dpr}`, {
deviceW: spec.width,
deviceH: spec.height,
dpr: spec.dpr
});
}
const lightLinks: string[] = [];
const darkLinks: string[] = [];
for (const file of files) {
const match = file.match(REGEX_PATTERNS.SPLASH_FILE);
if (!match) continue;
const orientation = match[1] as SplashOrientation;
const isDark = !!match[2];
const pixelW = parseInt(match[3]);
const pixelH = parseInt(match[4]);
const key = `${pixelW}x${pixelH}`;
const spec = dimMap.get(key);
if (!spec) {
console.warn(`Unknown splash screen dimensions: ${key} (${file})`);
continue;
}
const { deviceW, deviceH, dpr } = spec;
const media = `screen and (device-width: ${deviceW}px) and (device-height: ${deviceH}px) and (-webkit-device-pixel-ratio: ${dpr}) and (orientation: ${orientation})`;
const href = `./${file}`;
if (isDark) {
darkLinks.push(
`${SPLASH_LINK.HTML} media="${media}${SPLASH_LINK.DARK_MEDIA_SUFFIX}" href="${href}">`
);
} else {
lightLinks.push(`${SPLASH_LINK.HTML} media="${media}" href="${href}">`);
}
}
return [...lightLinks, ...darkLinks];
}
export function splashScreenPlugin(): Plugin {
return {
name: 'llamacpp:splash-screen',
apply: 'build',
closeBundle() {
setTimeout(() => {
try {
if (processed) return;
processed = true;
const outDir = resolve(OUTPUT_DIR);
const indexPath = resolve(outDir, 'index.html');
if (!existsSync(indexPath)) return;
let content = readFileSync(indexPath, 'utf-8');
// Inject iOS splash screen <link> tags into <head>.
// The @vite-pwa/assets-generator generates apple-splash-*.png files;
// this scans them and creates the <link> tags SvelteKit needs.
const splashLinks = generateSplashScreenLinks(outDir);
if (splashLinks.length > 0) {
console.log(`Generated ${splashLinks.length} apple-splash link tags`);
const splashHtml = splashLinks.map((l) => TAB + TAB + l).join(NEWLINE);
content = content.replace(
REGEX_PATTERNS.HEAD_CLOSE,
splashHtml + NEWLINE + TAB + TAB + '</head>'
);
}
// Remove trailing \r from Windows line endings
content = content.replace(/\r/g, '');
content = BUILD_CONFIG.GUIDE_COMMENT + NEWLINE + content;
writeFileSync(indexPath, content, 'utf-8');
console.log('Updated index.html');
} catch (error) {
console.error('Failed to process build output:', error);
}
}, 100);
}
};
}
+3
View File
@@ -1,6 +1,9 @@
// See https://svelte.dev/docs/kit/types#app.d.ts
// for information about these interfaces
import 'vite-plugin-pwa/pwa-assets';
import 'vite-plugin-pwa/svelte';
// Import chat types from dedicated module
import type {
+8 -1
View File
@@ -2,10 +2,17 @@
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%sveltekit.assets%/favicon.svg" />
<link rel="icon" href="favicon.ico" sizes="48x48" />
<link rel="icon" href="favicon.svg" sizes="any" type="image/svg+xml" />
<link rel="apple-touch-icon" href="apple-touch-icon-180x180.png" />
<link rel="manifest" href="./manifest.webmanifest" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
%sveltekit.head%
</head>
<body data-sveltekit-preload-data="hover">
<div style="display: contents">%sveltekit.body%</div>
</body>
@@ -20,6 +20,8 @@
import { ColorMode } from '$lib/enums/ui.enums';
import { fade } from 'svelte/transition';
import { goto } from '$app/navigation';
import { Button } from '$lib/components/ui/button';
import { RefreshCw } from '@lucide/svelte';
import { page } from '$app/state';
import { setChatSettingsConfigContext } from '$lib/contexts';
import { settingsReferrer } from '$lib/stores/settings-referrer.svelte';
@@ -164,6 +166,15 @@
onConfigChange={handleConfigChange}
onThemeChange={handleThemeChange}
/>
{#if currentSection.title === SETTINGS_SECTION_TITLES.GENERAL}
<div class="flex justify-end">
<Button variant="outline" onclick={() => window.location.reload()}>
<RefreshCw class="h-3 w-3" />
Reload app
</Button>
</div>
{/if}
</div>
{/if}
</div>
@@ -0,0 +1,23 @@
<script lang="ts">
import { APPLE_META_TAGS, MEDIA_QUERIES, THEME_COLORS } from '$lib/constants/pwa';
import { APP_NAME } from '$lib/constants';
let { appName = APP_NAME } = $props();
</script>
<svelte:head>
<!-- Theme color for light/dark modes -->
<meta name="theme-color" content={THEME_COLORS.LIGHT} media={MEDIA_QUERIES.PREFERS_LIGHT} />
<meta name="theme-color" content={THEME_COLORS.DARK} media={MEDIA_QUERIES.PREFERS_DARK} />
<!-- Apple mobile web app meta tags -->
<meta
name={APPLE_META_TAGS.MOBILE_WEB_APP_CAPABLE.name}
content={APPLE_META_TAGS.MOBILE_WEB_APP_CAPABLE.content}
/>
<meta
name={APPLE_META_TAGS.STATUS_BAR_STYLE.name}
content={APPLE_META_TAGS.STATUS_BAR_STYLE.content}
/>
<meta name={APPLE_META_TAGS.MOBILE_WEB_APP_TITLE.name} content={appName} />
</svelte:head>
@@ -0,0 +1,35 @@
<script lang="ts">
import * as Card from '$lib/components/ui/card';
import { Button } from '$lib/components/ui/button';
let { needRefresh: needRefreshProp, updateServiceWorker, forceReload } = $props();
let needRefresh = $derived(needRefreshProp ?? false);
</script>
{#if needRefresh}
<Card.Root class="overflow-hidden gap-1 py-5">
<Card.Header class="px-5">
<Card.Title class="text-sm font-medium">Update available</Card.Title>
</Card.Header>
<Card.Content class="gap-6 grid px-5">
<p class="text-xs text-muted-foreground">A new version is available. Reload to update.</p>
<Button
class="justify-self-end-safe"
size="sm"
onclick={() => {
updateServiceWorker();
if (forceReload) {
window.location.reload();
}
needRefresh = false;
}}
>
Reload
</Button>
</Card.Content>
</Card.Root>
{/if}
+2
View File
@@ -0,0 +1,2 @@
export { default as PwaMetaTags } from './PwaMetaTags.svelte';
export { default as PwaRefreshAlert } from './PwaRefreshAlert.svelte';
+1
View File
@@ -0,0 +1 @@
export const APP_NAME = import.meta.env?.VITE_PUBLIC_APP_NAME || 'llama-ui';
+1
View File
@@ -1,4 +1,5 @@
export const NEWLINE = '\n';
export const TAB = '\t';
export const DEFAULT_LANGUAGE = 'text';
export const LANG_PATTERN = /^(\w*)\n?/;
export const AMPERSAND_REGEX = /&/g;
+2
View File
@@ -3,6 +3,7 @@
export * from './agentic';
export * from './api-endpoints';
export * from './app';
export * from './attachment-labels';
export * from './database';
export * from './reasoning-effort';
@@ -36,6 +37,7 @@ export * from './message-export';
export * from './model-id';
export * from './precision';
export * from './processing-info';
export * from './pwa';
export * from './routes';
export * from './sandbox';
export * from './settings-keys';
+352
View File
@@ -0,0 +1,352 @@
/**
* Centralized PWA constants to avoid magic strings, regexes, and duplicated
* definitions across the codebase.
*/
import { APP_NAME } from './app';
export const MEDIA_QUERIES = {
PREFERS_DARK: '(prefers-color-scheme: dark)',
PREFERS_LIGHT: '(prefers-color-scheme: light)'
} as const;
export const THEME_COLORS = {
LIGHT: '#ffffff',
DARK: '#0d0d0d',
ACCENT_BLUE: '#2563eb',
ACCENT_BLUE_HOVER: '#1d4ed8',
BACKGROUND_LIGHT: 'white',
BACKGROUND_DARK: '#111111',
TITLE_UPDATE_ALERT: {
BORDER_LIGHT: 'zinc-200',
BORDER_DARK: 'zinc-700',
BG_LIGHT: 'white',
BG_DARK: 'zinc-800',
TEXT_LIGHT: 'zinc-500',
TEXT_DARK: 'zinc-400'
}
} as const;
export const FAVICON_PATHS = {
ICO_LIGHT: 'favicon.ico',
ICO_DARK: 'favicon-dark.ico',
SVG_LIGHT: 'favicon.svg',
SVG_DARK: 'favicon-dark.svg'
} as const;
export const FAVICON_SELECTORS = {
ICO_48X48: 'link[rel="icon"][sizes="48x48"]',
SVG_ANY: 'link[rel="icon"][type="image/svg+xml"]'
} as const;
export const APPLE_ASSETS = {
TOUCH_ICON: 'apple-touch-icon-180x180.png'
} as const;
export const PWA_MANIFEST = {
name: APP_NAME,
short_name: APP_NAME,
description: 'Local AI chat interface powered by llama.cpp',
start_url: './',
display: 'standalone' as const,
background_color: THEME_COLORS.BACKGROUND_LIGHT,
theme_color: THEME_COLORS.BACKGROUND_LIGHT,
icons: [
{ src: 'pwa-64x64.png', sizes: '64x64', type: 'image/png' },
{ src: 'pwa-192x192.png', sizes: '192x192', type: 'image/png' },
{ src: 'pwa-512x512.png', sizes: '512x512', type: 'image/png', purpose: 'any' as const },
{
src: 'maskable-icon-512x512.png',
sizes: '512x512',
type: 'image/png',
purpose: 'maskable' as const
}
]
};
export const PWA_ICON_PATHS = {
PWA_64: '/pwa-64x64.png',
PWA_192: '/pwa-192x192.png',
PWA_512: '/pwa-512x512.png',
MASKABLE_512: '/maskable-icon-512x512.png'
} as const;
/** Apple device dimensions (logical points) and DPR, from Apple HIG. */
export const APPLE_DEVICES = {
// iPhones (DPR 3)
'1170x2532': { width: 390, height: 844, dpr: 3 }, // iPhone 13, 15
'1179x2556': { width: 393, height: 852, dpr: 3 }, // iPhone 14, 15 Pro, 16
'1206x2622': { width: 402, height: 874, dpr: 3 }, // iPhone 16 Plus, 16e
'1284x2778': { width: 428, height: 926, dpr: 3 }, // iPhone 15 Plus
'1290x2796': { width: 430, height: 932, dpr: 3 }, // iPhone 15 Pro Max, 16 Pro
'1320x2868': { width: 440, height: 956, dpr: 3 }, // iPhone 16 Pro Max
'750x1334': { width: 375, height: 667, dpr: 2 }, // iPhone 6/7/8, 14
'640x1136': { width: 320, height: 568, dpr: 2 }, // iPhone 6/7/8 Plus
// iPads (DPR 2)
'1668x2388': { width: 834, height: 1194, dpr: 2 }, // iPad Air 11", iPad 11"
'2048x2732': { width: 1024, height: 1366, dpr: 2 }, // iPad Pro 12.9"
'1640x2360': { width: 820, height: 1180, dpr: 2 }, // iPad Air 10.9"
'1032x1376': { width: 1032, height: 1376, dpr: 2 }, // iPad Air 13"
'744x1133': { width: 376, height: 573, dpr: 2 } // iPad mini 8.3"
} as const;
export type AppleDeviceKey = keyof typeof APPLE_DEVICES;
export const PWA_FILE_PATHS = {
MANIFEST: '/manifest.webmanifest',
SERVICE_WORKER: '/sw.js',
VERSION: '/version.json',
WORKBOX: '/workbox-<hash>.js'
} as const;
// Used by the server middleware to skip API key validation.
// Keep in sync with tools/server/server-http.cpp public_endpoints list.
export const PUBLIC_ENDPOINTS = [
'/health',
'/v1/health',
'/models',
'/v1/models',
'/props',
'/metrics',
'/',
'/index.html',
'/favicon.ico',
'/favicon-dark.ico',
'/favicon.svg',
'/favicon-dark.svg',
'/pwa-64x64.png',
'/pwa-192x192.png',
'/pwa-512x512.png',
'/maskable-icon-512x512.png',
'/apple-touch-icon-180x180.png',
'/apple-splash-portrait-640x1136.png',
'/apple-splash-landscape-640x1136.png',
'/apple-splash-portrait-750x1334.png',
'/apple-splash-landscape-750x1334.png',
'/apple-splash-portrait-1170x2532.png',
'/apple-splash-landscape-1170x2532.png',
'/apple-splash-portrait-1179x2556.png',
'/apple-splash-landscape-1179x2556.png',
'/apple-splash-portrait-1206x2622.png',
'/apple-splash-landscape-1206x2622.png',
'/apple-splash-portrait-1284x2778.png',
'/apple-splash-landscape-1284x2778.png',
'/apple-splash-portrait-1290x2796.png',
'/apple-splash-landscape-1290x2796.png',
'/apple-splash-portrait-1320x2868.png',
'/apple-splash-landscape-1320x2868.png',
'/apple-splash-portrait-1488x2266.png',
'/apple-splash-landscape-1488x2266.png',
'/apple-splash-portrait-1640x2360.png',
'/apple-splash-landscape-1640x2360.png',
'/apple-splash-portrait-1668x2388.png',
'/apple-splash-landscape-1668x2388.png',
'/apple-splash-portrait-2048x2732.png',
'/apple-splash-landscape-2048x2732.png',
'/apple-splash-portrait-dark-640x1136.png',
'/apple-splash-landscape-dark-640x1136.png',
'/apple-splash-portrait-dark-750x1334.png',
'/apple-splash-landscape-dark-750x1334.png',
'/apple-splash-portrait-dark-1170x2532.png',
'/apple-splash-landscape-dark-1170x2532.png',
'/apple-splash-portrait-dark-1179x2556.png',
'/apple-splash-landscape-dark-1179x2556.png',
'/apple-splash-portrait-dark-1206x2622.png',
'/apple-splash-landscape-dark-1206x2622.png',
'/apple-splash-portrait-dark-1284x2778.png',
'/apple-splash-landscape-dark-1284x2778.png',
'/apple-splash-portrait-dark-1290x2796.png',
'/apple-splash-landscape-dark-1290x2796.png',
'/apple-splash-portrait-dark-1320x2868.png',
'/apple-splash-landscape-dark-1320x2868.png',
'/apple-splash-portrait-dark-1488x2266.png',
'/apple-splash-landscape-dark-1488x2266.png',
'/apple-splash-portrait-dark-1640x2360.png',
'/apple-splash-landscape-dark-1640x2360.png',
'/apple-splash-portrait-dark-1668x2388.png',
'/apple-splash-landscape-dark-1668x2388.png',
'/apple-splash-portrait-dark-2048x2732.png',
'/apple-splash-landscape-dark-2048x2732.png',
'/manifest.webmanifest',
'/sw.js',
'/version.json',
'/workbox-<hash>.js'
] as const;
export const BUILD_CONFIG = {
OUTPUT_DIR: './dist',
GUIDE_COMMENT: `
<!--
This is a static build of the frontend.
It is automatically generated by the build process.
Do not edit this file directly.
To make changes, refer to the "Web UI" section in the README.
-->
`.trim()
} as const;
export const REGEX_PATTERNS = {
SPLASH_FILE: /^apple-splash-(portrait|landscape)-(dark-)?(\d+)x(\d+)\.png$/,
HEAD_CLOSE: /\t*<\/head>/
} as const;
// Device names used by @vite-pwa/assets-generator for splash screen generation.
// Keep in sync with pwa-assets.config.ts.
export const PWA_GENERATOR_DEVICES = [
'iPhone 13',
'iPhone 13 Pro',
'iPhone 13 Pro Max',
'iPhone 14',
'iPhone 14 Plus',
'iPhone 14 Pro',
'iPhone 14 Pro Max',
'iPhone 15',
'iPhone 15 Plus',
'iPhone 15 Pro',
'iPhone 15 Pro Max',
'iPhone 16',
'iPhone 16 Plus',
'iPhone 16 Pro',
'iPhone 16 Pro Max',
'iPhone 16e',
'iPhone SE 4"',
'iPhone SE 4.7"',
'iPad 11"',
'iPad Air 10.9"',
'iPad Air 11"',
'iPad Air 13"',
'iPad Pro 11"',
'iPad Pro 12.9"',
'iPad mini 8.3"'
] as const;
// PWA assets generator configuration — used by pwa-assets.config.ts
export const PWA_ASSET_GENERATOR = {
LINK_PRESET: '2023',
SPLASH_PADDING: 0.75,
FIT_MODE: 'contain',
ADD_MEDIA_SCREEN: true,
BASE_PATH: './',
XHTML: false,
PNG_COMPRESSION_LEVEL: 9,
PNG_QUALITY: 60,
DARK_PREFIX: 'dark-'
} as const;
export const CACHE_SETTINGS = {
IMMUTABLE_MAX_AGE_SECONDS: 31536000,
API_CACHE_MAX_AGE_SECONDS: 60 * 60 * 24,
API_CACHE_MAX_ENTRIES: 50,
MAX_FILE_SIZE_BYTES: 10 * 1024 * 1024
} as const;
export const GLOB_PATTERNS: string[] = [
'**/*.{js,css,html,ico,svg,png,webp,woff,woff2,json,webmanifest}'
];
// loading.html is the model loading page served by llama-server itself.
// The SvelteKit PWA manifest transform strips the html extension from every
// precache entry to match clean URLs, but loading.html is a plain static asset
// with no clean URL, so static servers answer 404 and the SW install fails.
export const GLOB_IGNORES: string[] = ['**/loading.html'];
export const SW_CONFIG = {
CHECK_INTERVAL_MS: 60000,
UPDATE_FETCH_OPTIONS: {
CACHE: 'no-store',
HEADERS: {
CACHE: 'no-store',
CACHE_CONTROL: 'no-cache'
}
}
} as const;
// Runtime caching configuration for Workbox
export const RUNTIME_CACHING = {
HANDLER: 'NetworkFirst',
CACHE_NAME: 'api-cache'
} as const;
// Workbox runtime caching patterns
export const API_CACHING_PATTERNS = {
V1_API: /^\/v1\/.*/,
STATIC_API: /^\/(health|props|models|tools|slots|cors-proxy).*/
} as const;
// SvelteKit PWA plugin options
export const PWA_KIT_OPTIONS = {
NAVIGATE_FALLBACK: './'
} as const;
export const APPLE_META_TAGS = {
MOBILE_WEB_APP_CAPABLE: { name: 'apple-mobile-web-app-capable', content: 'yes' },
STATUS_BAR_STYLE: { name: 'apple-mobile-web-app-status-bar-style', content: 'black-translucent' },
MOBILE_WEB_APP_TITLE: { name: 'apple-mobile-web-app-title' }
} as const;
// Splash screen HTML link tag prefix used by generateSplashScreenLinks
export const SPLASH_LINK = {
HTML: '<link rel="apple-touch-startup-image"',
DARK_MEDIA_SUFFIX: ' and (prefers-color-scheme: dark)'
} as const;
// SvelteKit PWA plugin configuration — used by @vite.config.ts
import type { SvelteKitPWAOptions } from '@vite-pwa/sveltekit';
export const SVELTEKIT_PWA_OPTIONS: SvelteKitPWAOptions = {
// Strategy: generateSW - the plugin generates a service worker automatically
// using Workbox. For a custom SW, use 'injectManifest' instead.
// Manifest configuration
manifest: PWA_MANIFEST,
// Workbox configuration for generateSW strategy
workbox: {
// Match all static assets in the build output.
// Uses '**/' because SvelteKit outputs files under _app/immutable/
// subdirectories.
globPatterns: GLOB_PATTERNS,
globIgnores: GLOB_IGNORES,
maximumFileSizeToCacheInBytes: CACHE_SETTINGS.MAX_FILE_SIZE_BYTES,
// Runtime caching for API calls - use NetworkFirst so APIs are always fresh
runtimeCaching: [
{
urlPattern: API_CACHING_PATTERNS.V1_API,
handler: RUNTIME_CACHING.HANDLER,
options: {
cacheName: RUNTIME_CACHING.CACHE_NAME,
expiration: {
maxEntries: CACHE_SETTINGS.API_CACHE_MAX_ENTRIES,
maxAgeSeconds: CACHE_SETTINGS.API_CACHE_MAX_AGE_SECONDS
}
}
},
{
urlPattern: API_CACHING_PATTERNS.STATIC_API,
handler: RUNTIME_CACHING.HANDLER,
options: {
cacheName: RUNTIME_CACHING.CACHE_NAME,
expiration: {
maxEntries: CACHE_SETTINGS.API_CACHE_MAX_ENTRIES,
maxAgeSeconds: CACHE_SETTINGS.API_CACHE_MAX_AGE_SECONDS
}
}
}
]
},
devOptions: {
enabled: true,
suppressWarnings: true,
// Use PWA_KIT_OPTIONS.NAVIGATE_FALLBACK to match production SW behaviour
// (navigateFallback defaults to the configured base path, which is '/' for this SPA).
navigateFallback: PWA_KIT_OPTIONS.NAVIGATE_FALLBACK
},
// SvelteKit-specific options
kit: {
// Include version file for proper cache invalidation
includeVersionFile: true
}
};
@@ -31,6 +31,7 @@ export const SETTINGS_KEYS = {
SHOW_RAW_MODEL_NAMES: 'showRawModelNames',
SHOW_MODEL_QUANTIZATION: 'showModelQuantization',
SHOW_MODEL_TAGS: 'showModelTags',
SHOW_BUILD_VERSION: 'showBuildVersion',
SHOW_SYSTEM_MESSAGE: 'showSystemMessage',
// Sampling
TEMPERATURE: 'temperature',
@@ -365,6 +365,14 @@ const SETTINGS_REGISTRY: Record<string, SettingsSectionEntry> = {
serverKey: SETTINGS_KEYS.ALWAYS_SHOW_AGENTIC_TURNS,
paramType: SyncableParameterType.BOOLEAN
}
},
{
key: SETTINGS_KEYS.SHOW_BUILD_VERSION,
label: 'Show build version information',
help: 'Display the current build version in the bottom-right corner of the interface.',
defaultValue: false,
type: SettingsFieldType.CHECKBOX,
section: SETTINGS_SECTION_SLUGS.DISPLAY
}
]
},
+3
View File
@@ -40,6 +40,9 @@ export const DEPRECATED_MCP_DEFAULT_ENABLED_LOCALSTORAGE_KEY = `${STORAGE_APP_NA
/** @deprecated Use {@link USER_OVERRIDES_LOCALSTORAGE_KEY} instead */
export const DEPRECATED_USER_OVERRIDES_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME_DEPRECATED}.userOverrides`;
/** Build version stored in localStorage for non-PWA update detection */
export const BUILD_VERSION_LOCALSTORAGE_KEY = `${STORAGE_APP_NAME}.buildVersion`;
/** Maps new keys to their deprecated fallback keys */
export const NEW_TO_DEPRECATED_MAP: Record<string, string> = {
[ALWAYS_ALLOWED_TOOLS_LOCALSTORAGE_KEY]: DEPRECATED_ALWAYS_ALLOWED_TOOLS_LOCALSTORAGE_KEY,
-1
View File
@@ -5,7 +5,6 @@ import { ROUTES } from './routes';
export const FORK_TREE_DEPTH_PADDING = 8;
export const SYSTEM_MESSAGE_PLACEHOLDER = 'System message';
export const APP_NAME = import.meta.env.VITE_PUBLIC_APP_NAME || 'llama-ui';
export const ICON_STRIP_TRANSITION_DURATION = 150;
export const ICON_STRIP_TRANSITION_DELAY_MULTIPLIER = 50;
+2
View File
@@ -63,3 +63,5 @@ export { ColorMode, HtmlInputType, McpPromptVariant, TooltipSide, UrlProtocol }
export { KeyboardKey } from './keyboard.enums';
export { ToolSource, ToolPermissionDecision, ToolResponseField } from './tools.enums';
export { SplashOrientation } from './splash.enums';
+7
View File
@@ -0,0 +1,7 @@
/**
* Splash screen orientation for iOS apple-touch-startup-image
*/
export enum SplashOrientation {
PORTRAIT = 'portrait',
LANDSCAPE = 'landscape'
}
+80
View File
@@ -0,0 +1,80 @@
import { browser } from '$app/environment';
import { useRegisterSW } from 'virtual:pwa-register/svelte';
import { versionStore } from '$lib/stores/version.svelte';
import { BUILD_VERSION_LOCALSTORAGE_KEY } from '$lib/constants/storage';
import { SW_CONFIG } from '$lib/constants/pwa';
/**
* Hook for PWA service worker registration, update polling, and build version mismatch detection.
*
* Combines two concerns that always belong together:
* 1. SW registration with periodic polling for updates
* 2. localStorage-based version tracking for non-PWA users
*/
export function usePwa() {
let swCheckInterval: ReturnType<typeof setInterval> | null = null;
let needRefreshByStorage = $state(false);
const {
// offlineReady, // to do - add installation banners for iOS
needRefresh: pwaNeedRefresh,
updateServiceWorker
} = useRegisterSW({
onRegisteredSW(swUrl: string, r: ServiceWorkerRegistration | undefined) {
if (swCheckInterval) {
clearInterval(swCheckInterval);
}
swCheckInterval = setInterval(async () => {
if (!r || r.installing || !navigator?.onLine) return;
try {
const resp = await fetch(swUrl, {
cache: SW_CONFIG.UPDATE_FETCH_OPTIONS.CACHE,
headers: {
cache: SW_CONFIG.UPDATE_FETCH_OPTIONS.HEADERS.CACHE,
'cache-control': SW_CONFIG.UPDATE_FETCH_OPTIONS.HEADERS.CACHE_CONTROL
}
});
if (resp?.status === 200) {
await r.update();
}
} catch (e) {
console.error(e);
}
}, SW_CONFIG.CHECK_INTERVAL_MS);
},
onRegisterError(error: unknown) {
console.error('[PWA] SW registration error:', error);
}
});
// Detect version mismatch via localStorage.
// _app/version.json is SvelteKit's native version file for PWA cache invalidation.
// This comparison detects server upgrades for non-PWA users.
$effect(() => {
if (!browser) return;
const currentVersion = versionStore.value;
if (!currentVersion) return;
try {
const storedVersion = localStorage.getItem(BUILD_VERSION_LOCALSTORAGE_KEY);
needRefreshByStorage = !!storedVersion && storedVersion !== currentVersion;
localStorage.setItem(BUILD_VERSION_LOCALSTORAGE_KEY, currentVersion);
} catch {
needRefreshByStorage = false;
}
});
return {
/** Writable that is true when a PWA service worker update is available */
get needRefresh() {
return pwaNeedRefresh;
},
updateServiceWorker,
/** Version mismatch detected via localStorage (non-PWA users) */
get needRefreshByStorage() {
return needRefreshByStorage;
}
};
}
@@ -0,0 +1,42 @@
/**
* buildInfoStore - llama.cpp build information
*
* Reads the build version from `build.json` — embedded at llama.cpp build time
* with the llama.cpp build number (LLAMA_BUILD_NUMBER). Shown in the UI when
* `showBuildVersion` is enabled.
*
* In dev mode (via `npm run dev`), falls back to `import.meta.env.DEV`'s truthy
* value since the artifact is not produced.
*/
import { browser } from '$app/environment';
import { base } from '$app/paths';
let build = $state<string>('');
async function loadBuild() {
if (!browser) return;
if (import.meta.env.DEV) {
build = 'dev';
return;
}
try {
const res = await fetch(`${base}/build.json`, { cache: 'no-store' });
if (res.ok) {
const data = await res.json();
build = data.version ?? '';
}
} catch {
// build.json missing or unreachable - leave as empty string
}
}
loadBuild();
export const buildInfoStore = {
get value(): string {
return build;
}
};
+1 -1
View File
@@ -489,7 +489,7 @@ class MCPStore {
if (!rootDomain) return null;
const origin = `${url.protocol}//${rootDomain}`;
const candidates = ['favicon.ico', 'favicon.svg', 'favicon.png'];
const candidates = ['favicon.ico', 'favicon.png'];
for (const path of candidates) {
const faviconUrl = `${origin}/${path}`;
+14
View File
@@ -0,0 +1,14 @@
import { browser } from '$app/environment';
import { MEDIA_QUERIES } from '$lib/constants';
export const theme = $state({
isSystemDark: browser && window.matchMedia(MEDIA_QUERIES.PREFERS_DARK).matches
});
if (browser) {
const mql = window.matchMedia(MEDIA_QUERIES.PREFERS_DARK);
mql.addEventListener('change', (e) => {
theme.isSystemDark = e.matches;
});
}
+41
View File
@@ -0,0 +1,41 @@
/**
* versionStore - Frontend build version
*
* Reads from SvelteKit's `_app/version.json` — generated by the @vite-pwa/sveltekit
* plugin. The version string changes on every build, so comparing it against
* localStorage reliably detects server upgrades.
*
* In dev mode, falls back to `'dev'`.
*/
import { browser } from '$app/environment';
import { base } from '$app/paths';
let version = $state<string>('');
async function loadVersion() {
if (!browser) return;
if (import.meta.env.DEV) {
version = 'dev';
return;
}
try {
const res = await fetch(`${base}/_app/version.json`, { cache: 'no-store' });
if (res.ok) {
const data = await res.json();
version = data.version ?? '';
}
} catch {
// _app/version.json missing or unreachable - leave as empty string
}
}
loadVersion();
export const versionStore = {
get value(): string {
return version;
}
};
+3
View File
@@ -165,3 +165,6 @@ export type { ToolEntry, ToolGroup } from './tools';
// Reasoning
export type { ReasoningEffortLevel } from './reasoning';
// Splash
export type { SplashDimensions } from './splash';
+1
View File
@@ -0,0 +1 @@
export type SplashDimensions = { deviceW: number; deviceH: number; dpr: number };
+2 -2
View File
@@ -57,7 +57,7 @@ export async function convertPDFToText(file: File): Promise<string> {
try {
const buffer = await getFileAsBuffer(file);
const pdf = await pdfjs.getDocument(buffer).promise;
const pdf = await pdfjs.getDocument({ data: buffer }).promise;
const numPages = pdf.numPages;
const textContentPromises: Promise<TextContent>[] = [];
@@ -94,7 +94,7 @@ export async function convertPDFToImage(file: File, scale: number = 1.5): Promis
try {
const buffer = await getFileAsBuffer(file);
const doc = await pdfjs.getDocument(buffer).promise;
const doc = await pdfjs.getDocument({ data: buffer }).promise;
const pages: Promise<string>[] = [];
for (let i = 1; i <= doc.numPages; i++) {
+62 -4
View File
@@ -13,6 +13,8 @@
DialogConversationTitleUpdate,
SidebarNavigation
} from '$lib/components/app';
import { PwaMetaTags, PwaRefreshAlert } from '$lib/components/pwa';
import { pwaAssetsHead } from 'virtual:pwa-assets/head';
import { conversationsStore } from '$lib/stores/conversations.svelte';
import * as Sidebar from '$lib/components/ui/sidebar/index.js';
@@ -26,10 +28,16 @@
import { modelsStore } from '$lib/stores/models.svelte';
import { mcpStore } from '$lib/stores/mcp.svelte';
import { TOOLTIP_DELAY_DURATION } from '$lib/constants';
import { FAVICON_PATHS, FAVICON_SELECTORS } from '$lib/constants/pwa';
import { useKeyboardShortcuts } from '$lib/hooks/use-keyboard-shortcuts.svelte';
import { usePwa } from '$lib/hooks/use-pwa.svelte';
import { useSettingsNavigation } from '$lib/hooks/use-settings-navigation.svelte';
import { conversations } from '$lib/stores/conversations.svelte';
import { isMobile } from '$lib/stores/viewport.svelte';
import { theme } from '$lib/stores/theme.svelte';
import { buildInfoStore } from '$lib/stores/build-info.svelte';
import { SETTINGS_KEYS } from '$lib/constants';
let { children } = $props();
let alwaysShowSidebarOnDesktop = $derived(config().alwaysShowSidebarOnDesktop);
@@ -46,11 +54,31 @@
}
| undefined = $state();
let showBuildVersion = $derived(config()[SETTINGS_KEYS.SHOW_BUILD_VERSION] as boolean);
let titleUpdateDialogOpen = $state(false);
let titleUpdateCurrentTitle = $state('');
let titleUpdateNewTitle = $state('');
let titleUpdateResolve: ((value: boolean) => void) | null = null;
const panelNav = useSettingsNavigation();
// Keep the hook object intact: destructuring needRefreshByStorage reads the getter once and freezes it
const pwa = usePwa();
const { needRefresh, updateServiceWorker } = pwa;
function updateFavicon() {
const dark = theme.isSystemDark;
let icoLink = document.querySelector(FAVICON_SELECTORS.ICO_48X48) as HTMLLinkElement | null;
if (icoLink) {
icoLink.href = dark ? FAVICON_PATHS.ICO_DARK : FAVICON_PATHS.ICO_LIGHT;
}
let svgLink = document.querySelector(FAVICON_SELECTORS.SVG_ANY) as HTMLLinkElement | null;
if (svgLink) {
svgLink.href = dark ? FAVICON_PATHS.SVG_DARK : FAVICON_PATHS.SVG_LIGHT;
}
}
function navigateToConversation(direction: -1 | 1) {
const allConvs = conversations();
@@ -137,9 +165,16 @@
}
onMount(() => {
updateFavicon();
mounted = true;
});
$effect(() => {
void theme.isSystemDark;
updateFavicon();
});
$effect(() => {
if (alwaysShowSidebarOnDesktop && isDesktop) {
sidebarOpen = true;
@@ -236,13 +271,36 @@
</script>
<svelte:head>
{#if pwaAssetsHead.themeColor}
<meta name="theme-color" content={pwaAssetsHead.themeColor.content} />
{/if}
{#if config().customCss}
<style use:customCss></style>
{/if}
{#each pwaAssetsHead.links as link (link.href)}
<link {...link} />
{/each}
<PwaMetaTags />
</svelte:head>
<!-- PWA update prompt + version -->
<div class="fixed right-4 bottom-4 z-[9999] flex flex-col items-end gap-1">
{#if showBuildVersion && buildInfoStore.value}
<span class="text-[10px] tabular-nums text-muted-foreground">{buildInfoStore.value}</span>
{/if}
<PwaRefreshAlert
needRefresh={$needRefresh || pwa.needRefreshByStorage}
forceReload={pwa.needRefreshByStorage}
{updateServiceWorker}
/>
</div>
<Tooltip.Provider delayDuration={TOOLTIP_DELAY_DURATION}>
<ModeWatcher />
<Toaster richColors />
<DialogConversationTitleUpdate
@@ -254,7 +312,7 @@
/>
<Sidebar.Provider bind:open={sidebarOpen}>
<div class="flex h-dvh w-full">
<div class="flex h-screen w-full">
<Sidebar.Root variant="floating" class="h-full"
><SidebarNavigation bind:this={chatSidebar} /></Sidebar.Root
>
@@ -285,9 +343,9 @@
/>
{/if}
<Sidebar.Inset class="flex flex-1 flex-col overflow-hidden"
>{@render children?.()}</Sidebar.Inset
>
<Sidebar.Inset class="flex flex-1 flex-col overflow-hidden">
{@render children?.()}
</Sidebar.Inset>
</div>
</Sidebar.Provider>
</Tooltip.Provider>
+14
View File
@@ -0,0 +1,14 @@
<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_29_291)">
<path d="M244.95 8C215.233 8 187.774 23.8591 172.923 49.5999L95.6009 183.625C60.2162 244.959 104.481 321.6 175.29 321.6H208L316.977 132.708C348.959 77.2719 308.95 8 244.95 8ZM208 321.6H351.947C415.982 321.6 456.013 390.91 424.013 446.377C409.155 472.132 381.681 488 351.947 488H271.29C200.481 488 156.216 411.359 191.601 350.026L208 321.6Z" fill="#FAFAFA"/>
<path d="M208 321.6H16L106.462 164.8L208 321.6Z" fill="#FAFAFA"/>
<path d="M388.923 8L208 321.6L253.6 8H388.923Z" fill="#FAFAFA"/>
<path d="M304 488H112L202.462 331.2L304 488Z" fill="#FAFAFA"/>
<path d="M496 321.6H208L419.399 454.4L496 321.6Z" fill="#FAFAFA"/>
</g>
<defs>
<clipPath id="clip0_29_291">
<rect width="512" height="512" fill="white"/>
</clipPath>
</defs>
</svg>

After

Width:  |  Height:  |  Size: 868 B

+14 -1
View File
@@ -1 +1,14 @@
<svg width="256" xmlns="http://www.w3.org/2000/svg" height="256" id="screenshot-ef94fbb0-dbab-80ed-8006-89429900edbf" viewBox="0 0 256 256" xmlns:xlink="http://www.w3.org/1999/xlink" fill="none" version="1.1"><g id="shape-ef94fbb0-dbab-80ed-8006-89429900edbf" rx="0" ry="0"><g id="shape-ef94fbb0-dbab-80ed-8006-894215755c3a"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-894215755c3a"><rect rx="0" ry="0" x="0" y="0" transform="matrix(1.000000, 0.000000, 0.000000, 1.000000, 0.000000, 0.000000)" width="256" height="256" style="fill: rgb(27, 31, 32); fill-opacity: 1;"/></g></g><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef3f" rx="0" ry="0"><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef40"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-89422363ef40"><path d="M171.66500854492188,99.5302505493164L159.79953002929688,120.62468719482422C144.15451049804688,108.58329010009766,120.9504165649414,106.8254165649414,105.3053970336914,119.7457504272461C80.0798110961914,140.57652282714844,81.8376235961914,188.7422637939453,121.1261978149414,189.00587463378906C132.11300659179688,189.00587463378906,141.42965698242188,183.8201141357422,151.44967651367188,180.39234924316406L156.72335815429688,201.3988494873047C147.84591674804688,205.52989196777344,138.79293823242188,209.7487335205078,129.03683471679688,211.06712341308594C40.08835220336914,223.1964569091797,45.18600845336914,94.78400421142578,125.6088638305664,88.10407257080078C142.48434448242188,86.69782257080078,157.33834838867188,91.09247589111328,171.75314331054688,99.5302505493164Z" class="st0" style="fill: rgb(255, 130, 54); fill-opacity: 1;"/></g></g><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef41"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-89422363ef41"><path d="M110.2272720336914,79.31470489501953C96.6918716430664,83.35785675048828,84.1232681274414,90.8288345336914,74.6305923461914,101.28812408447266C72.8727798461914,80.01782989501953,77.6188735961914,37.03793716430664,101.2621841430664,28.6001033782959C104.7780532836914,27.36964988708496,116.8195571899414,24.293371200561523,116.4679946899414,30.533788681030273C116.1161880493164,36.77426528930664,107.7663345336914,47.49722671508789,105.7450942993164,53.29823684692383C102.2292251586914,63.49386978149414,105.4811782836914,70.52535247802734,110.3154067993164,79.40265655517578Z" class="st0" style="fill: rgb(255, 130, 54); fill-opacity: 1;"/></g></g><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef42"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-89422363ef42"><path d="M143.62692260742188,127.65621185302734L143.62692260742188,143.47706604003906L157.68991088867188,143.47706604003906L157.68991088867188,155.7821807861328L143.62692260742188,155.7821807861328L143.62692260742188,170.7240753173828L130.44284057617188,170.7240753173828L130.44284057617188,155.7821807861328L115.5009536743164,155.7821807861328L115.5009536743164,143.47706604003906L129.12448120117188,143.47706604003906L130.44284057617188,142.15867614746094L130.44284057617188,127.65621185302734L143.62692260742188,127.65621185302734Z" class="st0" style="fill: rgb(255, 130, 54); fill-opacity: 1;"/></g></g><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef43"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-89422363ef43"><path d="M191.96823120117188,127.65621185302734L191.96823120117188,142.15867614746094L193.28683471679688,143.47706604003906L206.91036987304688,143.47706604003906L206.91036987304688,155.7821807861328L191.96823120117188,155.7821807861328L191.96823120117188,170.7240753173828L178.78439331054688,170.7240753173828L178.78439331054688,155.7821807861328L164.72140502929688,155.7821807861328L164.72140502929688,143.47706604003906L178.78439331054688,143.47706604003906L178.78439331054688,127.65621185302734L191.96823120117188,127.65621185302734Z" class="st0" style="fill: rgb(255, 130, 54); fill-opacity: 1;"/></g></g><g id="shape-ef94fbb0-dbab-80ed-8006-89422363ef44"><g class="fills" id="fills-ef94fbb0-dbab-80ed-8006-89422363ef44"><path d="M153.20748901367188,38.092655181884766C154.96554565429688,40.72946548461914,145.03341674804688,52.06770706176758,143.45114135742188,54.96817398071289C138.88082885742188,63.581790924072266,141.95700073242188,68.50382232666016,145.38473510742188,76.67792510986328C135.45285034179688,75.18372344970703,126.2240982055664,76.41425323486328,116.3798599243164,77.55683135986328C118.5773696899414,58.659732818603516,129.21261596679688,31.1490535736084,153.20748901367188,38.092655181884766Z" class="st0" style="fill: rgb(255, 130, 54); fill-opacity: 1;"/></g></g></g></g></svg>
<svg width="512" height="512" viewBox="0 0 512 512" fill="none" xmlns="http://www.w3.org/2000/svg">
<g clip-path="url(#clip0_29_291)">
<path d="M244.95 8C215.233 8 187.774 23.8591 172.923 49.5999L95.6009 183.625C60.2162 244.959 104.481 321.6 175.29 321.6H208L316.977 132.708C348.959 77.2719 308.95 8 244.95 8ZM208 321.6H351.947C415.982 321.6 456.013 390.91 424.013 446.377C409.155 472.132 381.681 488 351.947 488H271.29C200.481 488 156.216 411.359 191.601 350.026L208 321.6Z" fill="#111111"/>
<path d="M208 321.6H16L106.462 164.8L208 321.6Z" fill="#111111"/>
<path d="M388.923 8L208 321.6L253.6 8H388.923Z" fill="#111111"/>
<path d="M304 488H112L202.462 331.2L304 488Z" fill="#111111"/>
<path d="M496 321.6H208L419.399 454.4L496 321.6Z" fill="#111111"/>
</g>
<defs>
<clipPath id="clip0_29_291">
<rect width="512" height="512" fill="white"/>
</clipPath>
</defs>
</svg>

Before

Width:  |  Height:  |  Size: 4.4 KiB

After

Width:  |  Height:  |  Size: 868 B

-3
View File
@@ -29,9 +29,6 @@ const config = {
},
alias: {
$styles: 'src/styles'
},
version: {
name: 'llama-ui'
}
},
-7
View File
@@ -1,7 +0,0 @@
import { expect, test } from '@playwright/test';
test('home page loads correctly', async ({ page }) => {
await page.goto('/');
// Wait for the greeting to become visible (stores need time to initialize)
await expect(page.locator('h1', { hasText: /Hello there/ })).toBeVisible();
});
+106
View File
@@ -0,0 +1,106 @@
import { expect, test } from '@playwright/test';
test.describe('PWA Service Worker', () => {
test('service worker is registered', async ({ page }) => {
await page.goto('/');
const swURL = await page.evaluate(async () => {
const registration = await Promise.race([
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore - type inference differs from browser runtime
navigator.serviceWorker.ready,
new Promise((_, reject) =>
setTimeout(() => reject(new Error('Service worker registration failed: timeout')), 15000)
)
]);
// @ts-expect-error registration is of type unknown
return registration.active?.scriptURL;
});
expect(swURL).toBeTruthy();
expect(swURL).toContain('/sw.js');
});
test('service worker has precache configured', async ({ page }) => {
await page.goto('/');
await page.evaluate(async () => {
await navigator.serviceWorker.ready;
});
const swActive = await page.evaluate(async () => {
const reg = await navigator.serviceWorker.ready;
return reg.active?.scriptURL ?? null;
});
expect(swActive).toBeTruthy();
const swResponse = await page.request.get(swActive!);
const swContent = await swResponse.text();
// Precache contains SvelteKit content-hashed bundle paths
expect(swContent).toMatch(/"_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"/);
expect(swContent).toMatch(/"_app\/immutable\/assets\/bundle\.[a-zA-Z0-9-]+\.css"/);
expect(swContent).toMatch(/"manifest\.webmanifest"/);
expect(swContent).toMatch(/"_app\/version\.json"/);
expect(swContent).toMatch(/NavigationRoute/);
expect(swContent).toMatch(/api-cache/);
});
test('offline mode - page loads when offline after caching', async ({ browser }) => {
const context = await browser.newContext();
const offlinePage = await context.newPage();
await offlinePage.goto('/');
await offlinePage.waitForLoadState('networkidle');
await offlinePage.evaluate(async () => {
await navigator.serviceWorker.ready;
});
await offlinePage.waitForTimeout(2000);
await context.setOffline(true);
await offlinePage.goto('/');
const bodyText = await offlinePage.locator('body').textContent();
expect(bodyText).toBeTruthy();
await context.close();
});
test('version.json is accessible and contains version', async ({ page }) => {
const versionResponse = await page.request.get('/_app/version.json');
expect(versionResponse.ok()).toBeTruthy();
const versionData = await versionResponse.json();
expect(versionData).toHaveProperty('version');
expect(typeof versionData.version).toBe('string');
expect(versionData.version.length).toBeGreaterThan(0);
});
test('manifest.webmanifest is accessible and valid', async ({ page }) => {
const response = await page.request.get('/manifest.webmanifest');
expect(response.ok()).toBeTruthy();
const manifest = await response.json();
expect(manifest).toHaveProperty('name', 'llama-ui');
expect(manifest).toHaveProperty('short_name', 'llama-ui');
expect(manifest).toHaveProperty('start_url', './');
expect(manifest).toHaveProperty('display', 'standalone');
expect(manifest.icons).toBeTruthy();
expect(manifest.icons.length).toBeGreaterThan(0);
});
test('index.html contains content-hashed bundle references', async ({ page }) => {
const response = await page.request.get('/');
expect(response.ok()).toBeTruthy();
const html = await response.text();
// SvelteKit outputs content-hashed bundle names in _app/immutable/
expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"/);
expect(html).toMatch(/href="(\.\/|\/)_app\/immutable\/assets\/bundle\.[a-zA-Z0-9-]+\.css"/);
expect(html).toMatch(/import\("(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9-]+\.js"\)/);
});
});
@@ -0,0 +1,57 @@
<script module lang="ts">
import { defineMeta } from '@storybook/addon-svelte-csf';
import PwaRefreshAlert from '$lib/components/pwa/PwaRefreshAlert.svelte';
import { expect } from 'storybook/test';
const { Story } = defineMeta({
title: 'Components/PwaRefreshAlert',
component: PwaRefreshAlert,
parameters: {
layout: 'centered'
}
});
</script>
<Story
name="Default"
args={{ needRefresh: true, updateServiceWorker: () => console.log('reload') }}
play={async ({ canvas }) => {
const title = canvas.getByText('Update available');
await expect(title).toBeInTheDocument();
const description = canvas.getByText(/A new version is available/);
await expect(description).toBeInTheDocument();
const button = canvas.getByRole('button', { name: 'Reload' });
await expect(button).toBeInTheDocument();
}}
/>
<Story
name="Hidden"
args={{ needRefresh: false, updateServiceWorker: () => console.log('reload') }}
play={async ({ canvas }) => {
const title = canvas.queryByText('Update available');
await expect(title).not.toBeInTheDocument();
}}
/>
<Story
name="ClickReload"
args={{
needRefresh: true,
updateServiceWorker: () => console.log('reload')
}}
play={async ({ canvas, userEvent }) => {
const button = canvas.getByRole('button', { name: 'Reload' });
await expect(button).toBeInTheDocument();
await userEvent.click(button);
const title = canvas.queryByText('Update available');
await expect(title).not.toBeInTheDocument();
const reloadBtn = canvas.queryByRole('button', { name: 'Reload' });
await expect(reloadBtn).not.toBeInTheDocument();
}}
/>
+195
View File
@@ -0,0 +1,195 @@
import { existsSync, readFileSync, readdirSync } from 'node:fs';
import { resolve } from 'node:path';
import { describe, expect, it } from 'vitest';
const DIST_DIR = resolve(__dirname, '../../dist');
const distExists = existsSync(DIST_DIR);
// PWA Build Output tests are integration tests that require a built dist/.
// CI builds first then runs these tests; local devs should run `npm run build` or use `npm run test:pwa`.
describe('PWA Build Output', () => {
if (!distExists) {
console.warn(`⚠ Skipping PWA Build Output tests - dist/ not found (run 'npm run build' first)`);
it('skipped - dist/ not found', () => {});
return;
}
const swContent = readFileSync(resolve(DIST_DIR, 'sw.js'), 'utf-8');
const indexContent = readFileSync(resolve(DIST_DIR, 'index.html'), 'utf-8');
describe('Core files exist', () => {
it('service worker (sw.js) exists', () => {
expect(existsSync(resolve(DIST_DIR, 'sw.js')), 'sw.js not found').toBeTruthy();
});
it('workbox library exists (hashed filename)', () => {
// SvelteKit generates workbox-{hash}.js files
const files = readdirSync(DIST_DIR).filter((f) => f.match(/^workbox-[^.]+\.js$/));
expect(files.length).toBeGreaterThan(0);
});
it('manifest.webmanifest exists', () => {
expect(
existsSync(resolve(DIST_DIR, 'manifest.webmanifest')),
'manifest.webmanifest not found'
).toBeTruthy();
});
it('SvelteKit bundle.js exists in _app/immutable/', () => {
// SvelteKit generates hashed bundle names in _app/immutable/
const appDir = resolve(DIST_DIR, '_app', 'immutable');
expect(existsSync(appDir), '_app/immutable/ not found').toBeTruthy();
const files = readdirSync(appDir).filter((f) => f.startsWith('bundle.') && f.endsWith('.js'));
expect(files.length).toBeGreaterThan(0);
});
it('SvelteKit bundle.css exists in _app/immutable/assets/', () => {
// SvelteKit generates hashed CSS bundles in _app/immutable/assets/
const cssDir = resolve(DIST_DIR, '_app', 'immutable', 'assets');
expect(existsSync(cssDir), '_app/immutable/assets/ not found').toBeTruthy();
const files = readdirSync(cssDir).filter(
(f) => f.startsWith('bundle.') && f.endsWith('.css')
);
expect(files.length).toBeGreaterThan(0);
});
it('version.json exists in _app/', () => {
// SvelteKit stores version.json in _app directory
expect(
existsSync(resolve(DIST_DIR, '_app', 'version.json')),
'_app/version.json not found'
).toBeTruthy();
});
});
describe('version.json content', () => {
it('has valid JSON with version field', () => {
const content = readFileSync(resolve(DIST_DIR, '_app', 'version.json'), 'utf-8');
const parsed = JSON.parse(content);
expect(parsed).toHaveProperty('version');
expect(typeof parsed.version).toBe('string');
expect(parsed.version.length).toBeGreaterThan(0);
});
});
describe('Service worker content', () => {
it('service worker has minified self.define format', () => {
expect(swContent).toBeTruthy();
// SvelteKit's workbox-plugin-sveltekit produces a minified SW with self.define
expect(swContent).toMatch(/if\(!self.define\)/);
});
it('references hashed workbox file (SvelteKit build output)', () => {
expect(swContent).toBeTruthy();
// SvelteKit's workbox-plugin-sveltekit references hashed workbox files
expect(swContent).toMatch(/define\(\["\.\/workbox-[a-zA-Z0-9]+"\]/);
});
it('precache contains SvelteKit bundle.js with content hash', () => {
expect(swContent).toBeTruthy();
// SvelteKit uses content-hashed bundle names in _app/immutable/
expect(swContent).toMatch(/"_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"/);
});
it('precache contains SvelteKit bundle.css with content hash', () => {
expect(swContent).toBeTruthy();
// SvelteKit uses content-hashed CSS bundle names in _app/immutable/assets/
expect(swContent).toMatch(/"_app\/immutable\/assets\/bundle\.[a-zA-Z0-9_-]+\.css"/);
});
it('precache contains _app/version.json', () => {
expect(swContent).toBeTruthy();
// SvelteKit stores version.json in _app directory
expect(swContent).toMatch(/"_app\/version\.json"/);
});
it('precache contains manifest.webmanifest', () => {
expect(swContent).toBeTruthy();
expect(swContent).toMatch(/"manifest\.webmanifest"/);
});
it('has navigation route registered', () => {
expect(swContent).toBeTruthy();
expect(swContent).toMatch(/NavigationRoute/);
});
it('has runtime caching for API routes', () => {
expect(swContent).toBeTruthy();
expect(swContent).toMatch(/api-cache/);
expect(swContent).toMatch(/NetworkFirst/);
});
});
describe('index.html content', () => {
it('has modulepreload link for SvelteKit bundle with content hash', () => {
expect(indexContent).toBeTruthy();
// SvelteKit generates hashed bundle names in _app/immutable/
expect(indexContent).toMatch(/href="(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"/);
});
it('has stylesheet link for SvelteKit bundle.css with content hash', () => {
expect(indexContent).toBeTruthy();
expect(indexContent).toMatch(
/href="(\.\/|\/)_app\/immutable\/assets\/bundle\.[a-zA-Z0-9_-]+\.css"/
);
});
it('has dynamic import for SvelteKit bundle with content hash', () => {
expect(indexContent).toBeTruthy();
expect(indexContent).toMatch(
/import\("(\.\/|\/)_app\/immutable\/bundle\.[a-zA-Z0-9_-]+\.js"\)/
);
});
it('has __sveltekit__ variable (SvelteKit adds hash suffix)', () => {
expect(indexContent).toBeTruthy();
// SvelteKit 2.x uses __sveltekit__ as base with random suffix
expect(indexContent).toMatch(/__sveltekit_[a-zA-Z0-9-]+/);
});
it('has PWA manifest link', () => {
expect(indexContent).toBeTruthy();
expect(indexContent).toMatch(/rel="manifest" href="(\.?\/)?manifest\.webmanifest"/);
});
it('has apple-touch-icon link', () => {
expect(indexContent).toBeTruthy();
expect(indexContent).toMatch(/rel="apple-touch-icon"/);
});
it('has _app paths for SvelteKit bundles', () => {
expect(indexContent).toBeTruthy();
// SvelteKit uses _app paths for hashed assets
expect(indexContent).toMatch(/_app\//);
});
});
describe('SvelteKit _app directory', () => {
it('_app directory exists (SvelteKit uses it for hashed assets)', () => {
expect(existsSync(resolve(DIST_DIR, '_app'))).toBeTruthy();
});
});
describe('Hashed workbox files', () => {
it('workbox-*.js files exist in dist root (SvelteKit build output)', () => {
const files = readdirSync(DIST_DIR).filter((f) => f.match(/^workbox-[^.]+\.js$/));
expect(files.length).toBeGreaterThan(0);
});
});
describe('Static assets', () => {
it('has favicon.ico', () => {
expect(existsSync(resolve(DIST_DIR, 'favicon.ico'))).toBeTruthy();
});
it('has PWA icons', () => {
expect(existsSync(resolve(DIST_DIR, 'pwa-64x64.png'))).toBeTruthy();
expect(existsSync(resolve(DIST_DIR, 'pwa-192x192.png'))).toBeTruthy();
expect(existsSync(resolve(DIST_DIR, 'pwa-512x512.png'))).toBeTruthy();
});
it('has loading.html fallback page', () => {
expect(existsSync(resolve(DIST_DIR, 'loading.html'))).toBeTruthy();
});
});
});
+13 -3
View File
@@ -1,13 +1,16 @@
import tailwindcss from '@tailwindcss/vite';
import { sveltekit } from '@sveltejs/kit/vite';
import { SvelteKitPWA } from '@vite-pwa/sveltekit';
import { dirname, resolve } from 'path';
import { fileURLToPath } from 'url';
import { defineConfig, searchForWorkspaceRoot } from 'vite';
import devtoolsJson from 'vite-plugin-devtools-json';
import { storybookTest } from '@storybook/addon-vitest/vitest-plugin';
import { llamaCppBuildPlugin } from './scripts/vite-plugin-llama-cpp-build';
import { splashScreenPlugin } from './scripts/vite-plugin-splash-screen';
import { buildInfoPlugin } from './scripts/vite-plugin-build-info';
import { relativizeBasePlugin } from './scripts/vite-plugin-relativize-base';
import { playwright } from '@vitest/browser-playwright';
import { SVELTEKIT_PWA_OPTIONS } from './src/lib/constants/pwa';
const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -37,7 +40,14 @@ export default defineConfig({
minify: true
},
plugins: [tailwindcss(), sveltekit(), devtoolsJson(), llamaCppBuildPlugin()],
plugins: [
tailwindcss(),
sveltekit(),
SvelteKitPWA(SVELTEKIT_PWA_OPTIONS),
splashScreenPlugin(),
buildInfoPlugin(),
relativizeBasePlugin()
],
test: {
projects: [