Skip to content

RuntimeError: ScalarType ComplexDouble is an unexpected tensor scalar type #2349

@capyun

Description

@capyun

System Info

/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:92: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
  self.keys = torch.tensor([], dtype=self.dtype, device=self.device)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:93: TracerWarning: torch.tensor results are registered as constants in the trace. You can safely ignore this warning if you use this function to create tensors out of constant variables that would be the same every time you call this function. In any other case, this might cause the trace to be incorrect.
  self.values = torch.tensor([], dtype=self.dtype, device=self.device)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/cache_utils.py:130: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  if self.keys is None or self.keys.numel() == 0:
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/integrations/sdpa_attention.py:76: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!
  is_causal = query.shape[2] > 1 and attention_mask is None and getattr(module, "is_causal", True)
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/transformers/models/qwen3_moe/modeling_qwen3_moe.py:250: TracerWarning: Iterating over a tensor might cause the trace to be incorrect. Passing a tensor of different shape won't change the number of iterations executed (and might lead to errors or silently give incorrect results).
  for expert_idx in expert_hit:
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:5383: UserWarning: Exporting aten::index operator of advanced indexing in opset 17 is achieved by combination of multiple ONNX operators, including Reshape, Transpose, Concat, and Gather. If indices include negative values, the exported graph will produce incorrect results.
  warnings.warn(
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/symbolic_opset9.py:6073: UserWarning: Warning: ONNX export does not support duplicated values in 'index' field, this will cause the ONNX model to be incorrect.
  warnings.warn(
/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py:657: UserWarning: Casting complex values to real discards the imaginary part (Triggered internally at /opt/pytorch/aten/src/ATen/native/Copy.cpp:308.)
  _C._jit_pass_onnx_graph_shape_type_inference(
Traceback (most recent call last):
  File "/home/smartsens/sdz/export_onnx.py", line 124, in <module>
    export_optiumm_onnx(0, 4)
  File "/home/smartsens/sdz/export_onnx.py", line 108, in export_optiumm_onnx
    inputs_name, outputs_name = onnx.export(
  File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/optimum/exporters/onnx/convert.py", line 874, in export
    export_output = export_pytorch(
  File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/optimum/exporters/onnx/convert.py", line 567, in export_pytorch
    onnx_export(
  File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/__init__.py", line 383, in export
    export(
  File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py", line 495, in export
    _export(
  File "/mnt/nvme0/conda_envs/tch/lib/python3.10/site-packages/torch/onnx/utils.py", line 1468, in _export
    ) = graph._export_onnx(  # type: ignore[attr-defined]
RuntimeError: ScalarType ComplexDouble is an unexpected tensor scalar type

Who can help?

@echarlaix and @IlyasMoutawwakil

Information

  • The official example scripts
  • My own modified scripts

Tasks

  • An officially supported task in the examples folder (such as GLUE/SQuAD, ...)
  • My own task or dataset (give details below)

Reproduction (minimal, reproducible, runnable)

from pathlib import Path
from typing import Dict, OrderedDict
import torch
from transformers import AutoTokenizer, AutoConfig, Qwen3Config
from kv_cache import initialization_kv_cache
from part_model import Qwen3MoeForCausalLMSlice
from optimum.exporters.onnx import OnnxConfigWithPast
import optimum.exporters.onnx as onnx
from optimum.exporters.tasks import TasksManager
from transformers import AutoTokenizer
from optimum.utils import DummyPastKeyValuesGenerator, NormalizedConfigManager, DummyTextInputGenerator

class Qwen3MoeCausalLMOnnxConfig(OnnxConfigWithPast):
NORMALIZED_CONFIG_CLASS = NormalizedConfigManager.get_normalized_config_class("qwen3_moe")
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, DummyPastKeyValuesGenerator)

def __init__(
    self,
    config,
    node_rank,
    use_past: bool = True,
):
    super().__init__(
        config=config
    )
    self.config = config
    self.node_rank = node_rank
    self.use_past = use_past
    self.float_dtype = "bf16"

@property
def inputs(self) -> Dict[str, Dict[int, str]]:
    inputs = OrderedDict()
    inputs["input_ids"] = {0: "batch_size_size", 1: "seq_len"}
    
    if self.use_past:
        self.add_past_key_values(inputs, direction="inputs")

    return inputs

@property
def outputs(self):
    outputs = OrderedDict()
    outputs["last_hidden_state"] = {0: "batch_size_size", 1: "seq_len"}
    # outputs["logits"] = {0: "batch_size", 1: "seq_len"}
    if self.use_past:
        self.add_past_key_values(outputs, direction="outputs")

    return outputs

def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
    if direction not in ["inputs", "outputs"]:
        raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')

    if direction == "inputs":
        axis_name = "past_kv_len"
        name = "past_key_values"
    else:
        axis_name = "present_kv_len"
        name = "present_key_values"

    start_idx = self.node_rank * self.config.num_hidden_layers
    end_idx = start_idx + self.config.num_hidden_layers
    for layer_index in range(start_idx, end_idx):
        inputs_or_outputs[f"{name}.{layer_index}.key"] = {0: "batch_size", 2: axis_name}
        inputs_or_outputs[f"{name}.{layer_index}.value"] = {0: "batch_size", 2: axis_name}


def generate_dummy_inputs(self, framework: str = "pt", **kwargs):
    dummy_inputs_generators = self._create_dummy_input_generator_classes(**kwargs)

    dummy_inputs = {}
    input_names = [key for key in self.inputs.keys() if not key.startswith("past_key_values")]
    if self.use_past:
        input_names.append("past_key_values")

    # import ipdb
    # ipdb.set_trace()
    for input_name in input_names:
        for dummy_input_gen in dummy_inputs_generators:
            if dummy_input_gen.supports_input(input_name):
                dummy_inputs[input_name] = dummy_input_gen.generate(
                    input_name, framework=framework, int_dtype=self.int_dtype, float_dtype=self.float_dtype
                )

    return dummy_inputs

def export_optiumm_onnx(node_rank, num_nodes):
model_path = Path("/mnt/nvme0/models/Qwen3-Coder-30B-A3B-Instruct")
export_path = Path(f"/mnt/nvme0/models/onnx/")

# 初始化设备(Orin的GPU)
device = torch.device(f"cuda:{0}" if torch.cuda.is_available() else "cpu")
print(f"节点 {node_rank} 使用设备: {device}")


model_config = AutoConfig.from_pretrained(model_path)
model_config.num_hidden_layers = model_config.num_hidden_layers // num_nodes
slice_model = Qwen3MoeForCausalLMSlice.from_pretrained(
    model_path, node_rank, num_nodes, 
    config = model_config,
    dtype="auto", device_map=device,
)
onnx_config = Qwen3MoeCausalLMOnnxConfig(model_config, node_rank, use_past=True)

# Step 1. 导出模型到 ONNX
inputs_name, outputs_name = onnx.export(
    model=slice_model,
    config=onnx_config,
    output=export_path,
    device="cuda",
    opset=17,                    
)

print(inputs_name, outputs_name)

# # Step 2. 保存 tokenizer
# tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
# tokenizer.save_pretrained(onnx_dir)

if name == "main":
export_optiumm_onnx(0, 4)

just run this.

Expected behavior

I think I can export the huggingface model to onnx successfully.

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions