Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
4953384
[build] update deps: transformers -> 5.2.0
CyCle1024 Feb 27, 2026
9ad6114
[ci] update ut and e2e image to triggle pt2.9+tf5.2 ci test
CyCle1024 Mar 18, 2026
014d1f5
chore(ci): fix some case on pt2.9.1 and tf5.2
CyCle1024 Mar 24, 2026
bb9a269
chore(build): update cudnn to 9.15.1.9 of torch 2.9.1 in dockerfile
CyCle1024 Mar 24, 2026
92fa286
chore(ci): update docker image for ut
CyCle1024 Mar 25, 2026
435bb76
chore(ci): fix ut cudnn version and use wider tolerance in test_qwen3…
CyCle1024 Mar 25, 2026
d54527e
fix test_rope
CyCle1024 Mar 27, 2026
e4e45c1
[refactor] add RopeParametersConfig due to transformers 5.2.0 bc
CyCle1024 Mar 30, 2026
321fbda
build: conditional path for lmdeploy and sglang in Dockerfile
CyCle1024 Mar 31, 2026
3a816e3
[build] update dockerfile for deepep, deep_gemm and ci proxy speed fix
CyCle1024 Apr 2, 2026
c788926
fixup! fix test_rope
CyCle1024 Apr 8, 2026
82a6e55
fix(ci): ep>1 clip_grad_norm fails due to pt2.9 check
CyCle1024 Apr 10, 2026
431bf76
fix qwen3_5 test
CyCle1024 Apr 13, 2026
1805b75
ci: update ut and e2e image
CyCle1024 Apr 13, 2026
ab9d3a9
ci: restore CI_ENV.sh PYTHONPATH and xtuner install
CyCle1024 Apr 14, 2026
f581fd4
build: update sglang deps in dockerfile
CyCle1024 Apr 14, 2026
734e038
ci(fix): ut and e2e image
CyCle1024 Apr 14, 2026
2aeaba0
ci(e2e): fix torch 2.9.1 cudnn memory issue in e2e test
CyCle1024 Apr 15, 2026
0eaa40e
test: clean hf dynamic modules before test setup
CyCle1024 Apr 15, 2026
07187d4
build(docker): update lmdeploy deps
CyCle1024 Apr 21, 2026
cef5930
[Refactor] Move compile config from FSDPConfig to model_cfg
CyCle1024 Apr 23, 2026
0745196
fixup e2e autotest qwen3_vl_8B_dense.py due to OOM
CyCle1024 Apr 24, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .dev_scripts/xtuner_rl_path.pth
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
import xtuner_rl_path
18 changes: 18 additions & 0 deletions .dev_scripts/xtuner_rl_path/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
import os
import sys

dist_packages_index = 0
for i, path in enumerate(sys.path):
if path.endswith("dist-packages"):
dist_packages_index = i
break

if os.getenv('XTUNER_USE_LMDEPLOY', '').lower() in ['1', 'on', 'true']:
lmdeploy_envs_dir = os.getenv('XTUNER_LMDEPLOY_ENVS_DIR', '/envs/lmdeploy')
if lmdeploy_envs_dir not in sys.path:
sys.path.insert(dist_packages_index, lmdeploy_envs_dir)

elif os.getenv('XTUNER_USE_SGLANG', '').lower() in ['1', 'on', 'true']:
sglang_envs_dir = os.getenv('XTUNER_SGLANG_ENVS_DIR', '/envs/sglang')
if sglang_envs_dir not in sys.path:
sys.path.insert(dist_packages_index, sglang_envs_dir)
2 changes: 1 addition & 1 deletion .github/workflows/unit_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ on:
env:
WORKSPACE_PREFIX: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-5)
WORKSPACE_PREFIX_SHORT: $(echo $GITHUB_WORKSPACE |cut -d '/' -f 1-3)
IMAGE: ailab-llmrazor/xtuner:pt28_20251216_d769950
IMAGE: ailab-llmrazor/xtuner_tmp:pt29_20260414_c8f6fa1

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
Expand Down
170 changes: 117 additions & 53 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,38 @@
# builder
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.03-py3

## build args
## build base env
FROM ${BASE_IMAGE} AS setup_env

ARG TORCH_VERSION
ARG PPA_SOURCE

RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN sed -i "s@http://.*.ubuntu.com@${PPA_SOURCE}@g" /etc/apt/sources.list.d/ubuntu.sources && \
apt update && \
apt install --no-install-recommends ca-certificates -y && \
apt install --no-install-recommends bc wget -y && \
apt install --no-install-recommends build-essential sudo -y && \
apt install --no-install-recommends git curl pkg-config tree unzip tmux \
openssh-server openssh-client dnsutils iproute2 lsof net-tools zsh rclone \
iputils-ping telnet netcat-openbsd -y && \
iputils-ping telnet netcat-openbsd htop bubblewrap socat -y && \
apt clean && rm -rf /var/lib/apt/lists/*

RUN if [ -d /etc/pip ] && [ -f /etc/pip/constraint.txt ]; then echo > /etc/pip/constraint.txt; fi
RUN pip install pystack py-spy --no-cache-dir
RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
RUN git config --system --add safe.directory "*"

# torch
ARG TORCH_VERSION
ARG PYTORCH_WHEELS_URL
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
--mount=type=secret,id=NO_PROXY,env=no_proxy \
if [ -n "${TORCH_VERSION}" ]; then \
pip install torchvision torch==${TORCH_VERSION} \
--index-url https://download.pytorch.org/whl/cu128 \
--extra-index-url https://download.pytorch.org/whl/cu126 \
-i ${PYTORCH_WHEELS_URL}/cu128 \
--extra-index-url ${PYTORCH_WHEELS_URL}/cu126 \
--no-cache-dir; \
fi

# set reasonable default for CUDA architectures when building ngc image
ENV TORCH_CUDA_ARCH_LIST="7.5 8.0 8.6 9.0 10.0"

RUN pip uninstall flash_attn opencv -y && rm -rf /usr/local/lib/python3.12/dist-packages/cv2
ENV TORCH_CUDA_ARCH_LIST="9.0 10.0"

ARG FLASH_ATTN_DIR=/tmp/flash-attn
ARG CODESPACE=/root/codespace
Expand All @@ -56,6 +55,9 @@ ARG CODESPACE
ARG FLASH_ATTN_DIR
ARG FLASH_ATTN3_DIR
ARG FLASH_ATTN_URL
# force hopper for now, you change it throught build args
ARG FLASH_ATTN_CUDA_ARCHS="90"
ARG FLASH_ATTENTION_DISABLE_SM80="TRUE"

RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
git clone $(echo ${FLASH_ATTN_URL} | cut -d '@' -f 1) && \
Expand Down Expand Up @@ -119,42 +121,41 @@ WORKDIR ${CODESPACE}/causal-conv1d

RUN CAUSAL_CONV1D_FORCE_BUILD=TRUE pip wheel -w ${CAUSAL_CONV1D_DIR} -v --no-deps --no-build-isolation .

# pypi install nvshmem and compile deepep
# compile nvshmem and deepep
FROM setup_env AS deep_ep

ARG CODESPACE
ARG DEEP_EP_DIR
ARG DEEP_EP_URL
# build sm90 and sm100 for deep_ep for now
ARG TORCH_CUDA_ARCH_LIST="9.0 10.0"

# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
# curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
# tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
# cd ${CODESPACE}/nvshmem_src && \
# NVSHMEM_SHMEM_SUPPORT=0 \
# NVSHMEM_UCX_SUPPORT=0 \
# NVSHMEM_USE_NCCL=0 \
# NVSHMEM_MPI_SUPPORT=0 \
# NVSHMEM_IBGDA_SUPPORT=1 \
# NVSHMEM_USE_GDRCOPY=0 \
# NVSHMEM_PMIX_SUPPORT=0 \
# NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
# NVSHMEM_BUILD_TESTS=0 \
# NVSHMEM_BUILD_EXAMPLES=0 \
# NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
# NVSHMEM_BUILD_TXZ_PACKAGE=0 \
# NVSHMEM_BUILD_PYTHON_LIB=OFF \
# cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
# cmake --build build --target install --parallel 32 && \
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
curl -LO https://github.com/NVIDIA/nvshmem/releases/download/v3.4.5-0/nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
tar -zxvf nvshmem_src_cuda-all-all-3.4.5.tar.gz && \
cd ${CODESPACE}/nvshmem_src && \
NVSHMEM_SHMEM_SUPPORT=0 \
NVSHMEM_UCX_SUPPORT=0 \
NVSHMEM_USE_NCCL=0 \
NVSHMEM_MPI_SUPPORT=0 \
NVSHMEM_IBGDA_SUPPORT=1 \
NVSHMEM_USE_GDRCOPY=0 \
NVSHMEM_PMIX_SUPPORT=0 \
NVSHMEM_TIMEOUT_DEVICE_POLLING=0 \
NVSHMEM_BUILD_TESTS=0 \
NVSHMEM_BUILD_EXAMPLES=0 \
NVSHMEM_BUILD_HYDRA_LAUNCHER=0 \
NVSHMEM_BUILD_TXZ_PACKAGE=0 \
NVSHMEM_BUILD_PYTHON_LIB=OFF \
cmake -S . -B build/ -DCMAKE_INSTALL_PREFIX=${NVSHMEM_PREFIX} -DMLX5_lib=/lib/x86_64-linux-gnu/libmlx5.so.1 && \
cmake --build build --target install --parallel 32 && \
cd ${CODESPACE} && git clone $(echo ${DEEP_EP_URL} | cut -d '@' -f 1) && \
cd ${CODESPACE}/DeepEP && \
git checkout $(echo ${DEEP_EP_URL} | cut -d '@' -f 2) && \
git submodule update --init --recursive --force

WORKDIR ${CODESPACE}/DeepEP

RUN NVSHMEM_DIR=${NVSHMEM_PREFIX} pip wheel -w ${DEEP_EP_DIR} -v --no-deps .
RUN pip wheel -w ${DEEP_EP_DIR} -v --no-deps .

# compile deep_gemm
FROM setup_env AS deep_gemm
Expand Down Expand Up @@ -192,7 +193,7 @@ COPY --from=flash_attn ${FLASH_ATTN_DIR} ${FLASH_ATTN_DIR}
COPY --from=adaptive_gemm ${ADAPTIVE_GEMM_DIR} ${ADAPTIVE_GEMM_DIR}
COPY --from=grouped_gemm ${GROUPED_GEMM_DIR} ${GROUPED_GEMM_DIR}
COPY --from=deep_ep ${DEEP_EP_DIR} ${DEEP_EP_DIR}
COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
# COPY --from=deep_ep ${NVSHMEM_PREFIX} ${NVSHMEM_PREFIX}
COPY --from=deep_gemm ${DEEP_GEMM_DIR} ${DEEP_GEMM_DIR}
COPY --from=causal_conv1d ${CAUSAL_CONV1D_DIR} ${CAUSAL_CONV1D_DIR}

Expand All @@ -204,51 +205,114 @@ RUN unzip ${DEEP_EP_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
RUN unzip ${DEEP_GEMM_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}
RUN unzip ${CAUSAL_CONV1D_DIR}/*.whl -d ${PYTHON_SITE_PACKAGE_PATH}

# install sglang and its runtime requirements
ARG SGLANG_VERSION
ARG DEFAULT_PYPI_URL

RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
pip install sglang==${SGLANG_VERSION} sgl-kernel==0.3.14.post1 pybase64 orjson uvloop setproctitle msgspec \
compressed_tensors python-multipart torch_memory_saver \
grpcio-tools==1.75.1 hf_transfer interegular llguidance==0.7.11 \
xgrammar==0.1.24 blobfile==3.0.0 flashinfer_python==0.4.0 --no-cache-dir --no-deps
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN pip install pystack py-spy --no-cache-dir -i ${DEFAULT_PYPI_URL}

# install sglang and its runtime requirements
ENV XTUNER_SGLANG_ENVS_DIR=/envs/sglang

# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN \
pip install --target ${XTUNER_SGLANG_ENVS_DIR} \
sglang==0.5.9 sgl-kernel==0.3.21 \
apache-tvm-ffi==0.1.9 \
anthropic==0.86.0 \
build==1.4.0 \
cuda-python==12.9.0 \
decord2==3.2.0 \
flashinfer_python==0.6.3 \
flashinfer_cubin==0.6.3 \
gguf==0.18.0 \
modelscope==1.35.3 \
nvidia-cutlass-dsl==4.4.2 \
openai-harmony==0.0.4 \
openai==2.6.1 \
outlines==0.1.11 \
quack-kernels==0.2.4 \
timm==1.0.16 \
torchao==0.9.0 \
torchaudio==2.9.1 \
torchcodec==0.8.0 \
xgrammar==0.1.32 \
smg-grpc-proto==0.4.5 \
grpcio==1.78.1 \
grpcio-reflection==1.78.1 \
grpcio-health-checking==1.80.0 \
pycryptodomex==3.23.0 \
lxml==6.0.2 \
cuda-bindings==12.9.6 \
cuda-pathfinder==1.5.0 \
nvidia-cudnn-frontend==1.21.0 \
lark==1.3.1 \
pycountry==26.2.16 \
airportsdata==20260315 \
outlines_core==0.1.26 \
torch-c-dlpack-ext==0.1.5 \
pyproject_hooks==1.2.0 \
huggingface_hub==0.36.2 \
torch_memory_saver==0.0.9 \
diskcache==5.6.3 distro==1.9.0 jiter==0.13.0 \
llguidance==0.7.11 blobfile==3.0.0 \
pybase64 orjson uvloop setproctitle msgspec partial_json_parser \
compressed_tensors python-multipart \
hf_transfer interegular --no-cache-dir --no-deps -i ${DEFAULT_PYPI_URL}

# install lmdeploy and its missing runtime requirements
ARG LMDEPLOY_VERSION
ARG LMDEPLOY_URL
ENV XTUNER_LMDEPLOY_ENVS_DIR=/envs/lmdeploy

# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
ARG LMDEPLOY_WHEELS=https://github.com/InternLM/lmdeploy/releases/download/v${LMDEPLOY_VERSION}/lmdeploy-${LMDEPLOY_VERSION}+cu128-cp312-cp312-manylinux2014_x86_64.whl
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
--mount=type=secret,id=NO_PROXY,env=no_proxy \
pip install fastapi fire openai outlines \
partial_json_parser ray[default] shortuuid uvicorn \
'pydantic>2' openai_harmony dlblas --no-cache-dir && \
pyzmq aiohttp cloudpickle prometheus_client protobuf numpy pillow einops tiktoken sentencepiece \
partial_json_parser 'ray[default]<3' shortuuid uvicorn pybase64 \
'pydantic>2' openai_harmony dlblas --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-cache-dir -i ${DEFAULT_PYPI_URL} && \
pip install xgrammar==0.1.32 timm!=1.0.23 --no-cache-dir -i ${DEFAULT_PYPI_URL} --no-deps && \
if [ -n "${LMDEPLOY_VERSION}" ]; then \
pip install lmdeploy==${LMDEPLOY_VERSION} --no-deps --no-cache-dir; \
# pip install lmdeploy==${LMDEPLOY_VERSION} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
echo pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
pip install ${LMDEPLOY_WHEELS} --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
else \
git clone $(echo ${LMDEPLOY_URL} | cut -d '@' -f 1) && \
cd ${CODESPACE}/lmdeploy && \
git checkout $(echo ${LMDEPLOY_URL} | cut -d '@' -f 2) && \
pip install . -v --no-deps --no-cache-dir; \
pip install . -v --target ${XTUNER_LMDEPLOY_ENVS_DIR} --no-deps --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
fi

## install xtuner
ARG XTUNER_URL
ARG XTUNER_COMMIT
#RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
# git clone $(echo ${XTUNER_URL} | cut -d '@' -f 1) && \
# cd ${CODESPACE}/xtuner && \
# git checkout $(echo ${XTUNER_URL} | cut -d '@' -f 2)
COPY . ${CODESPACE}/xtuner

WORKDIR ${CODESPACE}/xtuner
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
pip install .[all] -v --no-cache-dir

# Install custom .pth file for conditional lmdeploy and sglang path injection
RUN cp -r .dev_scripts/xtuner_rl_path* ${PYTHON_SITE_PACKAGE_PATH}/

# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN pip install .[all] -v --no-cache-dir -i ${DEFAULT_PYPI_URL}

WORKDIR ${CODESPACE}

# nccl update for torch 2.6.0
RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir; \
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN if [ "x${TORCH_VERSION}" = "x2.6.0" ]; then \
pip install nvidia-nccl-cu12==2.25.1 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
fi

# cudnn update for torch 2.9.1
# RUN --mount=type=secret,id=HTTPS_PROXY,env=https_proxy \
RUN if [ "x${TORCH_VERSION}" = "x2.9.1" ]; then \
pip install nvidia-cudnn-cu12==9.15.1.9 --no-cache-dir -i ${DEFAULT_PYPI_URL}; \
fi

# setup sysctl
Expand Down
2 changes: 1 addition & 1 deletion autotest/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ default_config:
gpus_per_task: 8
cpus_per_task: 120
memory_per_task: 512
image: ailab-llmrazor/xtuner:pt28_latest
image: ailab-llmrazor/xtuner_tmp:pt29_20260414_c8f6fa1
envs:
- HF_HUB_CACHE=/mnt/shared-storage-user/auto-eval-pipeline/opencompass/models/hf_hub
eval:
Expand Down
6 changes: 3 additions & 3 deletions autotest/config/gptoss.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,27 @@


gptoss_cfg = GptOss21BA3P6Config(
compile_cfg=False,
rope_scaling_cfg=RopeScalingConfig(
type="yarn",
beta_fast=16.0,
beta_slow=1.05,
factor=16.0,
original_max_position_embeddings=4096,
truncate=True,
)
),
)
optim_cfg = AdamWConfig(lr=6e-05)
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
fsdp_cfg = FSDPConfig(
torch_compile=False,
cpu_offload=False,
ep_size=gptoss_cfg.ep_size,
)

dataset_config = [
{
"dataset": DatasetConfig(name="alpaca", anno_path=ALPACA_PATH, sample_ratio=1.0),
"tokenize_fn": OpenaiTokenizeFunctionConfig(chat_template='gpt-oss', max_length=16384),
"tokenize_fn": OpenaiTokenizeFunctionConfig(chat_template="gpt-oss", max_length=16384),
},
]

Expand Down
5 changes: 2 additions & 3 deletions autotest/config/npu_qwen3.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
ALPACA_PATH = os.environ["ALPACA_PATH"]


moe_cfg = Qwen3MoE30BA3Config()
moe_cfg = Qwen3MoE30BA3Config(compile_cfg=False)
optim_cfg = AdamWConfig(lr=6e-05)
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
fsdp_cfg = FSDPConfig(
torch_compile=False,
cpu_offload=False,
ep_size=moe_cfg.ep_size,
)
Expand All @@ -34,7 +33,7 @@

dataloader_config = DataloaderConfig(pack_max_length=16384)

loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()


trainer = TrainerConfig(
Expand Down
5 changes: 2 additions & 3 deletions autotest/config/npu_qwen3_16nums.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
ALPACA_PATH = os.environ["ALPACA_PATH"]


moe_cfg = Qwen3MoE30BA3Config()
moe_cfg = Qwen3MoE30BA3Config(compile_cfg=False)
optim_cfg = AdamWConfig(lr=6e-05)
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
fsdp_cfg = FSDPConfig(
torch_compile=False,
cpu_offload=False,
ep_size=moe_cfg.ep_size,
)
Expand All @@ -34,7 +33,7 @@

dataloader_config = DataloaderConfig(pack_max_length=16384)

loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()
loss_cfg = CELossConfig(mode="chunk", chunk_size=1024) # CELossConfig()


trainer = TrainerConfig(
Expand Down
3 changes: 1 addition & 2 deletions autotest/config/npu_qwen3_moe_30BA3_ep8.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@
ALPACA_PATH = os.environ["ALPACA_PATH"]


moe_cfg = Qwen3MoE30BA3Config(ep_size=8)
moe_cfg = Qwen3MoE30BA3Config(ep_size=8, compile_cfg=False)
optim_cfg = AdamWConfig(lr=6e-05)
lr_cfg = LRConfig(lr_type="cosine", lr_min=1e-6)
fsdp_cfg = FSDPConfig(
torch_compile=True,
cpu_offload=False,
ep_size=moe_cfg.ep_size,
)
Expand Down
Loading
Loading