Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
d81e63d
CI : support IOT device (IQ9) (#22987)
zhiyuan8 May 14, 2026
3e037f3
HIP: RDNA3 mma FA, faster AMD transpose, tune AMD (#22880)
JohannesGaessler May 14, 2026
5c0e946
ggml-hexagon: cpy: add contiguous fast-path in reshape copy (#23076)
pdhinaka May 14, 2026
7155a49
readme : update bindings (#23063)
KitaitiMakoto May 15, 2026
91e84fe
Support for Codex CLI by skipping unsupported Responses tools (#23041)
SidShaytay May 15, 2026
d528444
webui: preserve partial response on streaming error (#23090)
ServeurpersoCom May 15, 2026
ac33f03
reasoning-budget: clone should do a deep-copy (#23095)
am17an May 15, 2026
d5dc2e0
llama-eval : add AIME 2026 dataset support (#23058)
ggerganov May 15, 2026
10710a2
webui: Move static build output from `tools/server/public` to `build/…
allozaur May 14, 2026
ad4913d
refactor: Move to `tools/ui`
allozaur May 14, 2026
356251b
refactor: rename CMake variables and preprocessor defines
allozaur May 14, 2026
a3c5df9
refactor: rename CLI flags (--webui -> --ui) with backward compat
allozaur May 14, 2026
d46c543
refactor: update C++ server internals with backward compat
allozaur May 14, 2026
245e2d1
refactor: rename CI/CD workflows, artifacts, and build script
allozaur May 14, 2026
41a5c85
docs: update CODEOWNERS and server README docs
allozaur May 14, 2026
5eed357
fix: Small fixes for UI build
allozaur May 14, 2026
b7df005
fix: CMake.txt syntax
allozaur May 15, 2026
b2f2886
chore: Formatting
allozaur May 15, 2026
3adae5a
fix: `.editorconfig` for llama-ui
allozaur May 15, 2026
eb5216b
chore: Formatting
allozaur May 15, 2026
bcd3694
refactor: Use `APP_NAME` in Error route
allozaur May 15, 2026
0f53553
refactor: Cleanup
allozaur May 15, 2026
7d25dd2
refactor: Single migration service
allozaur May 15, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ insert_final_newline = unset
trim_trailing_whitespace = unset
insert_final_newline = unset

[tools/server/webui/**]
[tools/ui/**]
indent_style = unset
indent_size = unset
end_of_line = unset
Expand Down
4 changes: 2 additions & 2 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ android:
- changed-files:
- any-glob-to-any-file:
- examples/llama.android/**
server/webui:
server/ui:
- changed-files:
- any-glob-to-any-file:
- tools/server/webui/**
- tools/ui/**
server:
- changed-files:
- any-glob-to-any-file:
Expand Down
46 changes: 39 additions & 7 deletions .github/workflows/build-and-test-snapdragon.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,14 +58,45 @@ jobs:
name: llama-cpp-android-arm64-snapdragon
path: pkg-snapdragon/llama.cpp

linux-iot-snapdragon:
runs-on: ubuntu-latest
container:
image: 'ghcr.io/snapdragon-toolchain/arm64-linux:v0.1'
defaults:
run:
shell: bash

steps:
- name: Clone
uses: actions/checkout@v6
with:
fetch-depth: 0
lfs: false

- name: Build Llama.CPP for Snapdragon Linux IoT
id: build_llama_cpp_snapdragon_linux
run: |
cp docs/backend/snapdragon/CMakeUserPresets.json .
cmake --preset arm64-linux-snapdragon-release -B build-snapdragon -DGGML_OPENCL=ON
cmake --build build-snapdragon -j $(nproc)
cmake --install build-snapdragon --prefix pkg-snapdragon/llama.cpp

- name: Upload Llama.CPP Snapdragon Linux IoT Build Artifact
if: ${{ always() && steps.build_llama_cpp_snapdragon_linux.outcome == 'success' }}
uses: actions/upload-artifact@v6
with:
name: llama-cpp-linux-arm64-snapdragon
path: pkg-snapdragon/llama.cpp

test-snapdragon-qdc:
name: Test on QDC Android Device (${{ matrix.device }})
needs: [android-ndk-snapdragon]
runs-on: ubuntu-slim
name: Test on QDC Device (${{ matrix.device }})
needs: [android-ndk-snapdragon, linux-iot-snapdragon]
runs-on: ubuntu-24.04-arm
timeout-minutes: 90
strategy:
fail-fast: false
matrix:
device: [SM8750, SM8650, SM8850]
device: [SM8750, SM8850, QCS9075M]

steps:
- name: Checkout
Expand All @@ -74,11 +105,11 @@ jobs:
- name: Download build artifact
uses: actions/download-artifact@v7
with:
name: llama-cpp-android-arm64-snapdragon
name: ${{ startsWith(matrix.device, 'QCS') && 'llama-cpp-linux-arm64-snapdragon' || 'llama-cpp-android-arm64-snapdragon' }}
path: pkg-snapdragon/llama.cpp

- name: Set up Python
uses: actions/setup-python@v5
uses: actions/setup-python@v6
with:
python-version: '3.x'
cache: pip
Expand Down Expand Up @@ -107,7 +138,8 @@ jobs:
--test all \
--pkg-dir pkg-snapdragon/llama.cpp \
--model-url "https://huggingface.co/bartowski/Llama-3.2-1B-Instruct-GGUF/resolve/main/Llama-3.2-1B-Instruct-Q4_0.gguf" \
--device ${{ matrix.device }}
--device ${{ matrix.device }} \
${{ startsWith(matrix.device, 'QCS') && '--retries 2 --retry-delay 300' || '' }}
env:
QDC_API_KEY: ${{ secrets.QDC_API_KEY }}

Expand Down
20 changes: 11 additions & 9 deletions .github/workflows/build-self-hosted.yml
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ jobs:
- name: Determine tag name
id: tag
uses: ./.github/actions/get-tag-name
env:
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

ggml-ci-nvidia-cuda:
needs: determine-tag
Expand All @@ -81,7 +83,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
nvidia-smi
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
Expand All @@ -98,7 +100,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
Expand All @@ -115,7 +117,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp /mnt/llama.cpp
Expand Down Expand Up @@ -205,7 +207,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp

Expand Down Expand Up @@ -234,7 +236,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
Expand All @@ -251,7 +253,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
Expand All @@ -270,7 +272,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
Expand All @@ -291,7 +293,7 @@ jobs:
MSYSTEM: UCRT64
CHERE_INVOKING: 1
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
vulkaninfo --summary
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
Expand Down Expand Up @@ -332,7 +334,7 @@ jobs:
- name: Test
id: ggml-ci
env:
HF_WEBUI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
HF_UI_VERSION: ${{ needs.determine-tag.outputs.tag_name }}
run: |
source ./openvino_toolkit/setupvars.sh
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
Loading
Loading