Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,7 @@ save*
.log
*.pid
*.ipynb*
model/
output_*
HiFloat4/
datasets/
6 changes: 6 additions & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
我需要给wan2.2(https://huggingface.co/Wan-AI/Wan2.2-Animate-14B-Diffusers)进行hifp4的模拟量化,使用的方法是AWQ,但是目前生成的权重有一些问题
我的两个推测一是代码存在real quant和fake quant的糅合导致结果错误,二是模型本身没有完全保存(只保存了transfomer部分)
请参考配置文件configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml和和运行脚本scripts/run_llmc.sh,帮我解决这个问题
请注意,我现在的电脑是本地主机而不是服务器,所以需要你从代码本身的逻辑去寻找错误而不能真的运行
可以参考int4的real quant和llmc中本身的fake quant配置寻找原因
你有权限修改本文件夹下所有文件
57 changes: 57 additions & 0 deletions configs/quantization/video_gen/wan2_2_t2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
base:
seed: &seed 42
model:
type: Wan2T2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/model/Wan2.2-T2V-A14B-Diffusers
torch_dtype: auto
# 显存不足时开启:校准阶段捕获的激活存到 CPU,量化时再按 block 搬到 GPU
use_cpu_to_save_cuda_mem_for_catcher: True
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20 # OOM 时可减小,如 8 或 10
bs: 1
target_height: 480 # OOM 时可减小,如 320
target_width: 832 # OOM 时可减小,如 576
num_frames: 81 # OOM 时可减小,如 49 或 33
guidance_scale: 5.0
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_awq/
quant:
video_gen:
method: Awq
weight:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
# quant_type: int-quant
quant_type: hif4
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
# save_lightx2v: True
# save_path: ./save_for_lightx2v/wan2_2_t2v/awq_w_a/original/
save_fake: True
save_path: ./save_for_fake/wan2_2_t2v/awq_w_a/original/
8 changes: 4 additions & 4 deletions configs/quantization/video_gen/wan_i2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ base:
seed: &seed 42
model:
type: WanI2V
path: /path/to/model
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.2-T2V-A14B/
torch_dtype: auto
calib:
name: i2v
Expand Down Expand Up @@ -31,12 +31,12 @@ quant:
video_gen:
method: Awq
weight:
bit: 8
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 8
bit: 4
symmetric: True
granularity: per_token
special:
Expand All @@ -46,4 +46,4 @@ quant:
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_i2v_awq_w_a/x2v/
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Wan2.1 I2V FP8 量化配置示例
# 这是一个快速开始的配置文件,请根据实际情况修改路径

base:
seed: &seed 42

model:
type: WanI2V
path: /path/to/wan2.1-i2v-model # 修改为你的 Wan2.1 I2V 模型路径
torch_dtype: auto

calib:
name: i2v
download: False
path: /path/to/calibration/data # 修改为你的校准数据路径
sample_steps: 40
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
seed: *seed

eval:
eval_pos: [fake_quant]
type: video_gen
name: i2v
download: False
path: /path/to/eval/data # 修改为你的评估数据路径
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_fp8/

quant:
video_gen:
method: SmoothQuant
weight:
quant_type: float-quant
bit: e4m3 # FP8 E4M3 格式
symmetric: True
granularity: per_channel
use_qtorch: True
act:
quant_type: float-quant
bit: e4m3 # FP8 E4M3 格式
symmetric: True
granularity: per_token
use_qtorch: True
special:
alpha: 0.75 # SmoothQuant 平衡参数,范围 0.5-1.0

save:
save_lightx2v: True # 保存为 lightx2v 兼容格式
save_path: /path/to/save/quantized/model # 修改为你的保存路径
12 changes: 6 additions & 6 deletions configs/quantization/video_gen/wan_t2v/awq_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ base:
seed: &seed 42
model:
type: WanT2V
path: /path/to/wan_t2v
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-14B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
Expand All @@ -20,7 +20,7 @@ eval:
type: video_gen
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
Expand All @@ -31,12 +31,12 @@ quant:
video_gen:
method: Awq
weight:
bit: 6
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 6
bit: 4
symmetric: True
granularity: per_token
special:
Expand All @@ -46,4 +46,4 @@ quant:
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_t2v_awq_w_a/x2v/
49 changes: 49 additions & 0 deletions configs/quantization/video_gen/wan_t2v/awq_w_a_s.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
base:
seed: &seed 42
model:
type: WanT2V
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.1-T2V-1.3B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
seed: *seed
eval:
eval_pos: [transformed, fake_quant]
type: video_gen
name: t2v
download: False
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_awq/
quant:
video_gen:
method: Awq
weight:
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 4
symmetric: True
granularity: per_token
special:
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
save_lightx2v: True
save_path: ../lightx2v/wan_t2v_awq_w_a_s/x2v/
32 changes: 0 additions & 32 deletions configs/quantization/video_gen/wan_t2v/rtn_w_a.yaml

This file was deleted.

22 changes: 13 additions & 9 deletions configs/quantization/video_gen/wan_t2v/smoothquant_w_a.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ base:
seed: &seed 42
model:
type: WanT2V
path: /path/to/wan_t2v
path: /mnt/lm_data_afs/wangzining/charles/lab/llmc/models/Wan2.2-T2V-14B-Diffusers
torch_dtype: auto
calib:
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
sample_steps: 20
bs: 1
target_height: 480
Expand All @@ -20,26 +20,30 @@ eval:
type: video_gen
name: t2v
download: False
path: ../assets/wan_t2v/calib/
path: ./assets/wan_t2v/calib/
bs: 1
target_height: 480
target_width: 832
num_frames: 81
guidance_scale: 5.0
output_video_path: ./output_videos_sq/
output_video_path: ./output_videos_awq/
quant:
video_gen:
method: SmoothQuant
method: Awq
weight:
bit: 6
bit: 4
symmetric: True
granularity: per_channel
group_size: -1
act:
bit: 6
bit: 4
symmetric: True
granularity: per_token
special:
alpha: 0.7
trans: True
trans_version: v2
weight_clip: True
clip_sym: True
save:
save_lightx2v: True
save_path: /path/to/x2v/
save_path: ../lightx2v/wan_t2v_awq_w_a/x2v/
Loading