Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions backends/arm/MODELS.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
<!-- Copyright 2025-2026 Arm Limited and/or its affiliates. -->
# The following file contains all models that have been confirmed to be functional and tested for the Arm backend:
# Note: Deep AutoEncoder requires manual Linear+BatchNorm1d fusion as the quantizer does not yet support this pattern.
# Note: DS CNN requires AvgPool2d workaround for Ethos-U55 due to stride > 3 limitation.
- Conformer
- Deep AutoEncoder
- Deit Tiny
- DeepLab v3 (DL3)
- DS CNN
- Inception v3 (IC3)
- Llama
- Gemma3n
- Long Short-Term Memory (LSTM)
- MobileNet V1 0.25
- MobileNet v2 (MV2)
- MobileNet v3 (MV3)
- Some popular torch.nn.functional models (NN functional)
Expand All @@ -16,6 +21,7 @@
- Neural Super Sampler (NSS)
- Phi-3
- ResNet 18
- ResNet-8
- Wav2Letter (W2L)
- Stable Diffusion:
* CLIP Text Encoder (CLIP Text with Projection)
Expand Down
123 changes: 123 additions & 0 deletions backends/arm/test/models/test_deep_autoencoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
# Copyright 2026 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Ethos-U FVP tests for the MLPerf Tiny anomaly detection Deep AutoEncoder."""

from typing import Tuple

import pytest
import torch
import torch.nn as nn
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
EthosU55PipelineINT,
EthosU85PipelineINT,
TosaPipelineFP,
TosaPipelineINT,
)

from executorch.examples.models.mlperf_tiny import DeepAutoEncoderModel
from torch.nn.utils.fusion import fuse_linear_bn_eval


def _fuse_linear_bn(mod: nn.Module) -> nn.Module:
"""Fuse Linear + BatchNorm1d pairs in the model.

The TOSA quantizer does not annotate linear+batch_norm patterns, so we fold
the BatchNorm1d into the preceding Linear before export.
TODO: Remove once the quantizer supports linear+bn.

"""
if not isinstance(mod, nn.Sequential):
for name, child in mod.named_children():
setattr(mod, name, _fuse_linear_bn(child))
return mod
new_layers = []
layers = list(mod)
i = 0
while i < len(layers):
if (
isinstance(layers[i], nn.Linear)
and i + 1 < len(layers)
and isinstance(layers[i + 1], nn.BatchNorm1d)
):
new_layers.append(fuse_linear_bn_eval(layers[i], layers[i + 1])) # type: ignore[type-var, arg-type]
i += 2
else:
new_layers.append(_fuse_linear_bn(layers[i]))
i += 1
return nn.Sequential(*new_layers)


_wrapper = DeepAutoEncoderModel()
model = _fuse_linear_bn(_wrapper.get_eager_model())
model_inputs = _wrapper.get_example_inputs()
input_t = Tuple[torch.Tensor]

quant_test_data = {
"per_channel_quantization=true": True,
"per_channel_quantization=false": False,
}


def test_deep_autoencoder_tosa_FP():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here as MV1

pipeline = TosaPipelineFP[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
)
pipeline.run()


@common.parametrize("per_channel_quantization", quant_test_data)
def test_deep_autoencoder_tosa_INT(per_channel_quantization):
pipeline = TosaPipelineINT[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
frobenius_threshold=None,
cosine_threshold=None,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone300
@common.parametrize("per_channel_quantization", quant_test_data)
def test_deep_autoencoder_u55_INT(per_channel_quantization):
pipeline = EthosU55PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone320
@common.parametrize("per_channel_quantization", quant_test_data)
def test_deep_autoencoder_u85_INT(per_channel_quantization):
pipeline = EthosU85PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()
97 changes: 97 additions & 0 deletions backends/arm/test/models/test_ds_cnn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright 2026 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Ethos-U FVP tests for the MLPerf Tiny Keyword Spotting DS-CNN model."""

from typing import Tuple

import pytest
import torch
import torch.nn as nn
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
EthosU55PipelineINT,
EthosU85PipelineINT,
TosaPipelineFP,
TosaPipelineINT,
)

from executorch.examples.models.mlperf_tiny import DSCNNKWSModel

_wrapper = DSCNNKWSModel()
model = _wrapper.get_eager_model()
# TODO: Remove once a pass decomposes large-stride AvgPool2d.
# Replace AvgPool2d(24,5) with AdaptiveAvgPool2d(1) so the
# DecomposeAdaptiveAvgPool2dPass can break it into stride-1
# pools that satisfy the Ethos-U55 stride <= 3 constraint.
model.pool = nn.AdaptiveAvgPool2d(output_size=1) # type: ignore[assignment]
model_inputs = _wrapper.get_example_inputs()
input_t = Tuple[torch.Tensor]

quant_test_data = {
"per_channel_quantization=true": True,
"per_channel_quantization=false": False,
}


def test_ds_cnn_tosa_FP():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here as MV1

pipeline = TosaPipelineFP[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
)
pipeline.run()


@common.parametrize("per_channel_quantization", quant_test_data)
def test_ds_cnn_tosa_INT(per_channel_quantization):
pipeline = TosaPipelineINT[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
frobenius_threshold=None,
cosine_threshold=None,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone300
@common.parametrize("per_channel_quantization", quant_test_data)
def test_ds_cnn_u55_INT(per_channel_quantization):
pipeline = EthosU55PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone320
@common.parametrize("per_channel_quantization", quant_test_data)
def test_ds_cnn_u85_INT(per_channel_quantization):
pipeline = EthosU85PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()
93 changes: 93 additions & 0 deletions backends/arm/test/models/test_mobilenet_v1_025.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
# Copyright 2026 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""Ethos-U FVP tests for the MLPerf Tiny Visual Wake Words MobileNetV1 (width
0.25).
"""

from typing import Tuple

import pytest
import torch
from executorch.backends.arm.test import common
from executorch.backends.arm.test.tester.test_pipeline import (
EthosU55PipelineINT,
EthosU85PipelineINT,
TosaPipelineFP,
TosaPipelineINT,
)

from executorch.examples.models.mlperf_tiny import MobileNetV1025Model

_wrapper = MobileNetV1025Model()
model = _wrapper.get_eager_model()
model_inputs = _wrapper.get_example_inputs()
input_t = Tuple[torch.Tensor]

quant_test_data = {
"per_channel_quantization=true": True,
"per_channel_quantization=false": False,
}


def test_mobilenet_v1_025_tosa_FP():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do they add any more coverage besides what we have for Mobilenet? Let's just add them only in examples if they aren't too different from what we already have, rationale is the CI job freq

Copy link
Collaborator Author

@tirwu01 tirwu01 Mar 18, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi, MobileNetV1-0.25 is a distinct model from MobileNetV2/V3 — it's the specific architecture used in the MLPerf Tiny.These four models (ResNet8, DS-CNN, Deep AutoEncoder, MobileNetV1-0.25) are the standard MLPerf Tiny benchmark suite and are tested together as a set.

pipeline = TosaPipelineFP[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
)
pipeline.run()


@common.parametrize("per_channel_quantization", quant_test_data)
def test_mobilenet_v1_025_tosa_INT(per_channel_quantization):
pipeline = TosaPipelineINT[input_t](
model,
model_inputs,
aten_op=[],
exir_op=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
frobenius_threshold=None,
cosine_threshold=None,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone300
@common.parametrize("per_channel_quantization", quant_test_data)
def test_mobilenet_v1_025_u55_INT(per_channel_quantization):
pipeline = EthosU55PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()


@pytest.mark.slow
@common.XfailIfNoCorstone320
@common.parametrize("per_channel_quantization", quant_test_data)
def test_mobilenet_v1_025_u85_INT(per_channel_quantization):
pipeline = EthosU85PipelineINT[input_t](
model,
model_inputs,
aten_ops=[],
exir_ops=[],
use_to_edge_transform_and_lower=True,
per_channel_quantization=per_channel_quantization,
atol=0.25,
qtol=1,
)
pipeline.run()
Loading
Loading