Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
4683901
[Core] Import 1Cat-vLLM-0.0.2 runtime and build system
yangzhuxinyzx Mar 21, 2026
92c6efb
[CI/Build] Vendor lmdeploy source for standalone builds
yangzhuxinyzx Mar 21, 2026
5262499
[Kernel] Add validation, examples, and benchmark assets
yangzhuxinyzx Mar 21, 2026
b3b1abd
[Doc] Publish 1Cat-vLLM-0.0.2 release snapshot
yangzhuxinyzx Mar 21, 2026
6fd0f8d
[Doc] Update rebuilt wheel download links
Pan-Shuhan-YMZX Mar 21, 2026
a8783b0
[Bugfix] Vendor runtime Python packages for source builds
Pan-Shuhan-YMZX Mar 21, 2026
1e6c257
[CI/Build][Doc] Add verified SM70 Docker runtime path
yangzhuxinyzx Mar 21, 2026
f29bd45
Add files via upload
Pan-Shuhan-YMZX Mar 30, 2026
d6c28dc
Change WeChat group QR code image
Pan-Shuhan-YMZX Mar 30, 2026
18e5223
Update README.md
TCat2003 Apr 2, 2026
3c7a8a3
Add files via upload
Pan-Shuhan-YMZX Apr 9, 2026
f5d2e15
Update Dockerfile.sm70-wheel
TCat2003 Apr 9, 2026
feb8402
Add files via upload
Pan-Shuhan-YMZX Apr 10, 2026
c1dce83
docs: update wechat group qr code
Apr 18, 2026
82f59c8
docs: update WeChat group QR code
Apr 26, 2026
92a785c
Release 1Cat-vLLM 0.0.3
yangzhuxinyzx May 1, 2026
eea9d81
Merge 1CatAI main history for 0.0.3
yangzhuxinyzx May 1, 2026
04bb4b7
Update README.md
1CatAIOfficial May 1, 2026
7a7549c
Update README.md
1CatAIOfficial May 1, 2026
6276450
Update README.md
1CatAIOfficial May 1, 2026
a1bf487
Update README.md
1CatAIOfficial May 1, 2026
197f1cc
docs: clarify wheel runtime directory
yangzhuxinyzx May 1, 2026
58ebaa6
[Kernel] Add V100 FA2 fp8 KV cache audits
yangzhuxinyzx May 8, 2026
3b539f9
[Core] Trim V100 startup memory defaults
yangzhuxinyzx May 8, 2026
437b358
QRcode-update
Pan-Shuhan-YMZX May 9, 2026
a4daad6
[Core] Prepare 1.0.0 V100 release
yangzhuxinyzx May 13, 2026
761ae33
[Doc] Update 1.0.0 wheel install and MTP launch
yangzhuxinyzx May 13, 2026
0741a30
[Doc] Simplify public launch commands
yangzhuxinyzx May 13, 2026
36536e5
[Doc] Restore validated MTP launch profile
yangzhuxinyzx May 13, 2026
29b73ec
[Doc] Add MTP throughput note
yangzhuxinyzx May 13, 2026
0ac0632
[Bugfix] Restore spec proposer compatibility
yangzhuxinyzx May 13, 2026
05ac1a4
[Doc] Add TP2 MTP launch profile
yangzhuxinyzx May 13, 2026
8b536c1
[Core] Archive FP8 MTP investigation state
yangzhuxinyzx May 14, 2026
bf37452
docs: update WeChat group QR code
yangzhuxinyzx May 14, 2026
69749dd
[Kernel] Add SM70 FP8 MoE fast path
yangzhuxinyzx May 14, 2026
d18b16c
[Doc] Credit flash-attention-v100
yangzhuxinyzx May 14, 2026
acd2a31
[Bugfix] Stabilize MTP state handling
yangzhuxinyzx May 17, 2026
06f7a38
docs: update WeChat group QR code
yangzhuxinyzx May 18, 2026
f1a64a7
docs: update WeChat group QR code to Group 3
yangzhuxinyzx May 20, 2026
42f23f6
[Build] Prepare 1Cat-vLLM 1.0.1 release
yangzhuxinyzx May 25, 2026
a645fcb
[Build] Prepare 1Cat-vLLM 1.1.0 beta release
yangzhuxinyzx May 27, 2026
530ac4d
[Doc] Refocus README on project overview
yangzhuxinyzx May 27, 2026
432f197
docs: update WeChat group QR code
May 28, 2026
6235d4d
fix: keep partial content when reasoning block is truncated
rivetphilbot May 19, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
The diff you're trying to view is too large. We only load the first 3000 changed files.
106 changes: 53 additions & 53 deletions .buildkite/check-wheel-size.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,53 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
import sys
import zipfile

# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
# Note that we have 800 MiB quota, please use it wisely.
# See https://github.com/pypi/support/issues/6326 .
# Please also sync the value with the one in Dockerfile.
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 500))


def print_top_10_largest_files(zip_file):
"""Print the top 10 largest files in the given zip file."""
with zipfile.ZipFile(zip_file, "r") as z:
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
file_sizes.sort(key=lambda x: x[1], reverse=True)
for f, size in file_sizes[:10]:
print(f"{f}: {size / (1024 * 1024):.2f} MBs uncompressed.")


def check_wheel_size(directory):
"""Check the size of .whl files in the given directory."""
for root, _, files in os.walk(directory):
for file_name in files:
if file_name.endswith(".whl"):
wheel_path = os.path.join(root, file_name)
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
if wheel_size_mb > VLLM_MAX_SIZE_MB:
print(
f"Not allowed: Wheel {wheel_path} is larger "
f"({wheel_size_mb:.2f} MB) than the limit "
f"({VLLM_MAX_SIZE_MB} MB)."
)
print_top_10_largest_files(wheel_path)
return 1
else:
print(
f"Wheel {wheel_path} is within the allowed size "
f"({wheel_size_mb:.2f} MB)."
)
return 0


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python check-wheel-size.py <directory>")
sys.exit(1)

directory = sys.argv[1]
sys.exit(check_wheel_size(directory))
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project

import os
import sys
import zipfile

# Read the VLLM_MAX_SIZE_MB environment variable, defaulting to 500 MiB
# Note that we have 800 MiB quota, please use it wisely.
# See https://github.com/pypi/support/issues/6326 .
# Please also sync the value with the one in Dockerfile.
VLLM_MAX_SIZE_MB = int(os.environ.get("VLLM_MAX_SIZE_MB", 500))


def print_top_10_largest_files(zip_file):
"""Print the top 10 largest files in the given zip file."""
with zipfile.ZipFile(zip_file, "r") as z:
file_sizes = [(f, z.getinfo(f).file_size) for f in z.namelist()]
file_sizes.sort(key=lambda x: x[1], reverse=True)
for f, size in file_sizes[:10]:
print(f"{f}: {size / (1024 * 1024):.2f} MBs uncompressed.")


def check_wheel_size(directory):
"""Check the size of .whl files in the given directory."""
for root, _, files in os.walk(directory):
for file_name in files:
if file_name.endswith(".whl"):
wheel_path = os.path.join(root, file_name)
wheel_size_mb = os.path.getsize(wheel_path) / (1024 * 1024)
if wheel_size_mb > VLLM_MAX_SIZE_MB:
print(
f"Not allowed: Wheel {wheel_path} is larger "
f"({wheel_size_mb:.2f} MB) than the limit "
f"({VLLM_MAX_SIZE_MB} MB)."
)
print_top_10_largest_files(wheel_path)
return 1
else:
print(
f"Wheel {wheel_path} is within the allowed size "
f"({wheel_size_mb:.2f} MB)."
)
return 0


if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python check-wheel-size.py <directory>")
sys.exit(1)

directory = sys.argv[1]
sys.exit(check_wheel_size(directory))
50 changes: 25 additions & 25 deletions .buildkite/ci_config.yaml
Original file line number Diff line number Diff line change
@@ -1,25 +1,25 @@
name: vllm_ci
job_dirs:
- ".buildkite/image_build"
- ".buildkite/test_areas"
- ".buildkite/hardware_tests"
run_all_patterns:
- "docker/Dockerfile"
- "CMakeLists.txt"
- "requirements/common.txt"
- "requirements/cuda.txt"
- "requirements/build.txt"
- "requirements/test.txt"
- "setup.py"
- "csrc/"
- "cmake/"
run_all_exclude_patterns:
- "docker/Dockerfile."
- "csrc/cpu/"
- "csrc/rocm/"
- "cmake/hipify.py"
- "cmake/cpu_extension.cmake"
registries: public.ecr.aws/q9t5s3a7
repositories:
main: "vllm-ci-postmerge-repo"
premerge: "vllm-ci-test-repo"
name: vllm_ci
job_dirs:
- ".buildkite/image_build"
- ".buildkite/test_areas"
- ".buildkite/hardware_tests"
run_all_patterns:
- "docker/Dockerfile"
- "CMakeLists.txt"
- "requirements/common.txt"
- "requirements/cuda.txt"
- "requirements/build.txt"
- "requirements/test.txt"
- "setup.py"
- "csrc/"
- "cmake/"
run_all_exclude_patterns:
- "docker/Dockerfile."
- "csrc/cpu/"
- "csrc/rocm/"
- "cmake/hipify.py"
- "cmake/cpu_extension.cmake"
registries: public.ecr.aws/q9t5s3a7
repositories:
main: "vllm-ci-postmerge-repo"
premerge: "vllm-ci-test-repo"
58 changes: 29 additions & 29 deletions .buildkite/hardware_tests/amd.yaml
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
group: Hardware
steps:
- label: "AMD: :docker: build image"
depends_on: []
device: amd_cpu
no_plugin: true
commands:
- >
docker build
--build-arg max_jobs=16
--build-arg REMOTE_VLLM=1
--build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942'
--build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
--tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
-f docker/Dockerfile.rocm
--target test
--no-cache
--progress plain .
- docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
env:
DOCKER_BUILDKIT: "1"
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 1
- exit_status: -10 # Agent was lost
limit: 1
- exit_status: 1 # Machine occasionally fail
limit: 1
group: Hardware
steps:
- label: "AMD: :docker: build image"
depends_on: []
device: amd_cpu
no_plugin: true
commands:
- >
docker build
--build-arg max_jobs=16
--build-arg REMOTE_VLLM=1
--build-arg ARG_PYTORCH_ROCM_ARCH='gfx90a;gfx942'
--build-arg VLLM_BRANCH=$BUILDKITE_COMMIT
--tag "rocm/vllm-ci:${BUILDKITE_COMMIT}"
-f docker/Dockerfile.rocm
--target test
--no-cache
--progress plain .
- docker push "rocm/vllm-ci:${BUILDKITE_COMMIT}"
env:
DOCKER_BUILDKIT: "1"
retry:
automatic:
- exit_status: -1 # Agent was lost
limit: 1
- exit_status: -10 # Agent was lost
limit: 1
- exit_status: 1 # Machine occasionally fail
limit: 1
16 changes: 8 additions & 8 deletions .buildkite/hardware_tests/arm.yaml
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
group: Hardware
steps:
- label: "Arm CPU Test"
soft_fail: true
device: arm_cpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
group: Hardware
steps:
- label: "Arm CPU Test"
soft_fail: true
device: arm_cpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-cpu-test-arm.sh
20 changes: 10 additions & 10 deletions .buildkite/hardware_tests/ascend_npu.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
group: Hardware
depends_on: ~
steps:
- label: "Ascend NPU Test"
soft_fail: true
timeout_in_minutes: 20
no_plugin: true
device: ascend_npu
commands:
- bash .buildkite/scripts/hardware_ci/run-npu-test.sh
group: Hardware
depends_on: ~
steps:
- label: "Ascend NPU Test"
soft_fail: true
timeout_in_minutes: 20
no_plugin: true
device: ascend_npu
commands:
- bash .buildkite/scripts/hardware_ci/run-npu-test.sh
20 changes: 10 additions & 10 deletions .buildkite/hardware_tests/gh200.yaml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
group: Hardware
steps:
- label: "GH200 Test"
soft_fail: true
device: gh200
no_plugin: true
optional: true
commands:
- nvidia-smi
- bash .buildkite/scripts/hardware_ci/run-gh200-test.sh
group: Hardware
steps:
- label: "GH200 Test"
soft_fail: true
device: gh200
no_plugin: true
optional: true
commands:
- nvidia-smi
- bash .buildkite/scripts/hardware_ci/run-gh200-test.sh
48 changes: 24 additions & 24 deletions .buildkite/hardware_tests/intel.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,24 @@
group: Hardware
depends_on: ~
steps:
- label: "Intel CPU Test"
soft_fail: true
device: intel_cpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-cpu-test.sh

- label: "Intel HPU Test"
soft_fail: true
device: intel_hpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-hpu-test.sh

- label: "Intel GPU Test"
depends_on: []
soft_fail: true
device: intel_gpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-xpu-test.sh
group: Hardware
depends_on: ~
steps:
- label: "Intel CPU Test"
soft_fail: true
device: intel_cpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-cpu-test.sh
- label: "Intel HPU Test"
soft_fail: true
device: intel_hpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-hpu-test.sh
- label: "Intel GPU Test"
depends_on: []
soft_fail: true
device: intel_gpu
no_plugin: true
commands:
- bash .buildkite/scripts/hardware_ci/run-xpu-test.sh
Loading