-
Notifications
You must be signed in to change notification settings - Fork 606
Description
System Info
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
self.keys = torch.tensor([], dtype=self.dtype, device=self.device)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:93: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
self.values = torch.tensor([], dtype=self.dtype, device=self.device)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:130: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
if self.keys is None or self.keys.numel() == 0:
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/integrations/sdpa_attention.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
is_causal = query.shape[2] > 1 and attention_mask is None and getattr(module, "is_causal", True)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/models/qwen3_moe/modeling_qwen3_moe.py:250: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results).
for expert_idx in expert_hit:
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:5383: UserWarning: Exporting aten::index operator of advanced indexing in opset 17 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results.
warnings.warn(
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:6073: UserWarning: Warning: ONNX export does not support duplicated values in 'index' field, this will cause the ONNX model to be incorrect.
warnings.warn(
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py:657: UserWarning: Casting complex values to real discards the imaginary part (Triggered internally at /opt/pytorch/aten/src/ATen/native/Copy.cpp:308.)
_C._jit_pass_onnx_graph_shape_type_inference(
Traceback (most recent call last):
File "/home/smartsens/sdz/export_onnx.py", line 124, in <module>
export_optiumm_onnx(0, 4)
File "/home/smartsens/sdz/export_onnx.py", line 108, in export_optiumm_onnx
inputs_name, outputs_name = onnx.export(
File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/optimum/exporters/onnx/convert.py", line 874, in export
export_output = export_pytorch(
File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/optimum/exporters/onnx/convert.py", line 567, in export_pytorch
onnx_export(
File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/__init__.py", line 383, in export
export(
File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py", line 495, in export
_export(
File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py", line 1468, in _export
) = graph._export_onnx( # type: ignore[attr-defined]
RuntimeError: ScalarType ComplexDouble is an unexpected tensor scalar typeWho can help?
@echarlaix and @IlyasMoutawwakil
Information
- The official example scripts
- My own modified scripts
Tasks
- An officially supported task in the
examplesfolder (such as GLUE/SQuAD, ...) - My own task or dataset (give details below)
Reproduction (minimal, reproducible, runnable)
from pathlib import Path
from typing import Dict, OrderedDict
import torch
from transformers import AutoTokenizer, AutoConfig, Qwen3Config
from kv_cache import initialization_kv_cache
from part_model import Qwen3MoeForCausalLMSlice
from optimum.exporters.onnx import OnnxConfigWithPast
import optimum.exporters.onnx as onnx
from optimum.exporters.tasks import TasksManager
from transformers import AutoTokenizer
from optimum.utils import DummyPastKeyValuesGenerator, NormalizedConfigManager, DummyTextInputGenerator
class Qwen3MoeCausalLMOnnxConfig(OnnxConfigWithPast):
NORMALIZED_CONFIG_CLASS = NormalizedConfigManager.get_normalized_config_class("qwen3_moe")
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, DummyPastKeyValuesGenerator)
def __init__(
self,
config,
node_rank,
use_past: bool = True,
):
super().__init__(
config=config
)
self.config = config
self.node_rank = node_rank
self.use_past = use_past
self.float_dtype = "bf16"
@property
def inputs(self) -> Dict[str, Dict[int, str]]:
inputs = OrderedDict()
inputs["input_ids"] = {0: "batch_size_size", 1: "seq_len"}
if self.use_past:
self.add_past_key_values(inputs, direction="inputs")
return inputs
@property
def outputs(self):
outputs = OrderedDict()
outputs["last_hidden_state"] = {0: "batch_size_size", 1: "seq_len"}
# outputs["logits"] = {0: "batch_size", 1: "seq_len"}
if self.use_past:
self.add_past_key_values(outputs, direction="outputs")
return outputs
def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
if direction not in ["inputs", "outputs"]:
raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')
if direction == "inputs":
axis_name = "past_kv_len"
name = "past_key_values"
else:
axis_name = "present_kv_len"
name = "present_key_values"
start_idx = self.node_rank * self.config.num_hidden_layers
end_idx = start_idx + self.config.num_hidden_layers
for layer_index in range(start_idx, end_idx):
inputs_or_outputs[f"{name}.{layer_index}.key"] = {0: "batch_size", 2: axis_name}
inputs_or_outputs[f"{name}.{layer_index}.value"] = {0: "batch_size", 2: axis_name}
def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
dummy_inputs_generators = self._create_dummy_input_generator_classes(**kwargs)
dummy_inputs = {}
input_names = [key for key in self.inputs.keys() if not key.startswith("past_key_values")]
if self.use_past:
input_names.append("past_key_values")
# import ipdb
# ipdb.set_trace()
for input_name in input_names:
for dummy_input_gen in dummy_inputs_generators:
if dummy_input_gen.supports_input(input_name):
dummy_inputs[input_name] = dummy_input_gen.generate(
input_name, framework=framework, int_dtype=self.int_dtype, float_dtype=self.float_dtype
)
return dummy_inputs
def export_optiumm_onnx(node_rank, num_nodes):
model_path = Path("/mnt/nvme0/models/Qwen3-Coder-30B-A3B-Instruct")
export_path = Path(f"/mnt/nvme0/models/onnx/")
# 初始化设备(Orin的GPU)
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print(f"节点 {node_rank} 使用设备: {device}")
model_config = AutoConfig.from_pretrained(model_path)
model_config.num_hidden_layers = model_config.num_hidden_layers // num_nodes
slice_model = Qwen3MoeForCausalLMSlice.from_pretrained(
model_path, node_rank, num_nodes,
config = model_config,
dtype="auto", device_map=device,
)
onnx_config = Qwen3MoeCausalLMOnnxConfig(model_config, node_rank, use_past=True)
# Step 1. 导出模型到 ONNX
inputs_name, outputs_name = onnx.export(
model=slice_model,
config=onnx_config,
output=export_path,
device="cuda",
opset=17,
)
print(inputs_name, outputs_name)
# # Step 2. 保存 tokenizer
# tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# tokenizer.save_pretrained(onnx_dir)
if name == "main":
export_optiumm_onnx(0, 4)
just run this.
Expected behavior
I think I can export the huggingface model to onnx successfully.