Skip to content

Commit 1925873

Browse files
committed
Arm backend: Add Ethos-U FVP tests for MLPerf Tiny models
Add model definitions and Arm backend tests for four MLPerf Tiny benchmark models: ResNet8, DS-CNN, Deep AutoEncoder, and MobileNetV1-0.25. Model definitions are placed under examples/models/mlperf_tiny/. Each model has tests for tosa_FP, tosa_INT, u55_INT and u85_INT pipelines in backends/arm/test/models/. Notable model adaptations for Arm delegation: - Deep AutoEncoder: Fuse Linear + BatchNorm1d pairs before export since the TOSA quantizer only annotates conv + batch_norm patterns. - DS-CNN: Replace AvgPool2d(24, 5) with AdaptiveAvgPool2d(1) to satisfy the Ethos-U55 stride <= 3 constraint; the DecomposeAdaptiveAvgPool2dPass decomposes it into stride-1 pools. Change-Id: I8dbf5e8a4b80996faab9f850c21740899f6b36fd Signed-off-by: Tirui Wu <tirui.wu@arm.com>
1 parent bf2243a commit 1925873

12 files changed

Lines changed: 761 additions & 4 deletions

File tree

backends/arm/MODELS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
<!-- Copyright 2025-2026 Arm Limited and/or its affiliates. -->
22
# The following file contains all models that have been confirmed to be functional and tested for the Arm backend:
3+
# Note: Deep AutoEncoder requires manual Linear+BatchNorm1d fusion as the quantizer does not yet support this pattern.
4+
# Note: DS CNN requires AvgPool2d workaround for Ethos-U55 due to stride > 3 limitation.
35
- Conformer
6+
- Deep AutoEncoder
47
- Deit Tiny
58
- DeepLab v3 (DL3)
9+
- DS CNN
610
- Inception v3 (IC3)
711
- Llama
812
- Gemma3n
913
- Long Short-Term Memory (LSTM)
14+
- MobileNet V1 0.25
1015
- MobileNet v2 (MV2)
1116
- MobileNet v3 (MV3)
1217
- Some popular torch.nn.functional models (NN functional)
@@ -16,6 +21,7 @@
1621
- Neural Super Sampler (NSS)
1722
- Phi-3
1823
- ResNet 18
24+
- ResNet-8
1925
- Wav2Letter (W2L)
2026
- Stable Diffusion:
2127
* CLIP Text Encoder (CLIP Text with Projection)
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
"""Ethos-U FVP tests for the MLPerf Tiny anomaly detection Deep AutoEncoder."""
6+
7+
from typing import Tuple
8+
9+
import pytest
10+
import torch
11+
import torch.nn as nn
12+
from executorch.backends.arm.test import common
13+
from executorch.backends.arm.test.tester.test_pipeline import (
14+
EthosU55PipelineINT,
15+
EthosU85PipelineINT,
16+
TosaPipelineFP,
17+
TosaPipelineINT,
18+
)
19+
20+
from executorch.examples.models.mlperf_tiny import DeepAutoEncoderModel
21+
from torch.nn.utils.fusion import fuse_linear_bn_eval
22+
23+
24+
def _fuse_linear_bn(mod: nn.Module) -> nn.Module:
25+
"""Fuse Linear + BatchNorm1d pairs in the model.
26+
27+
The TOSA quantizer does not annotate linear+batch_norm patterns, so we fold
28+
the BatchNorm1d into the preceding Linear before export.
29+
TODO: Remove once the quantizer supports linear+bn.
30+
31+
"""
32+
if not isinstance(mod, nn.Sequential):
33+
for name, child in mod.named_children():
34+
setattr(mod, name, _fuse_linear_bn(child))
35+
return mod
36+
new_layers = []
37+
layers = list(mod)
38+
i = 0
39+
while i < len(layers):
40+
if (
41+
isinstance(layers[i], nn.Linear)
42+
and i + 1 < len(layers)
43+
and isinstance(layers[i + 1], nn.BatchNorm1d)
44+
):
45+
new_layers.append(fuse_linear_bn_eval(layers[i], layers[i + 1])) # type: ignore[type-var, arg-type]
46+
i += 2
47+
else:
48+
new_layers.append(_fuse_linear_bn(layers[i]))
49+
i += 1
50+
return nn.Sequential(*new_layers)
51+
52+
53+
_wrapper = DeepAutoEncoderModel()
54+
model = _fuse_linear_bn(_wrapper.get_eager_model())
55+
model_inputs = _wrapper.get_example_inputs()
56+
input_t = Tuple[torch.Tensor]
57+
58+
quant_test_data = {
59+
"per_channel_quantization=true": True,
60+
"per_channel_quantization=false": False,
61+
}
62+
63+
64+
def test_deep_autoencoder_tosa_FP():
65+
pipeline = TosaPipelineFP[input_t](
66+
model,
67+
model_inputs,
68+
aten_op=[],
69+
exir_op=[],
70+
use_to_edge_transform_and_lower=True,
71+
)
72+
pipeline.run()
73+
74+
75+
@common.parametrize("per_channel_quantization", quant_test_data)
76+
def test_deep_autoencoder_tosa_INT(per_channel_quantization):
77+
pipeline = TosaPipelineINT[input_t](
78+
model,
79+
model_inputs,
80+
aten_op=[],
81+
exir_op=[],
82+
use_to_edge_transform_and_lower=True,
83+
per_channel_quantization=per_channel_quantization,
84+
atol=0.25,
85+
qtol=1,
86+
frobenius_threshold=None,
87+
cosine_threshold=None,
88+
)
89+
pipeline.run()
90+
91+
92+
@pytest.mark.slow
93+
@common.XfailIfNoCorstone300
94+
@common.parametrize("per_channel_quantization", quant_test_data)
95+
def test_deep_autoencoder_u55_INT(per_channel_quantization):
96+
pipeline = EthosU55PipelineINT[input_t](
97+
model,
98+
model_inputs,
99+
aten_ops=[],
100+
exir_ops=[],
101+
use_to_edge_transform_and_lower=True,
102+
per_channel_quantization=per_channel_quantization,
103+
atol=0.25,
104+
qtol=1,
105+
)
106+
pipeline.run()
107+
108+
109+
@pytest.mark.slow
110+
@common.XfailIfNoCorstone320
111+
@common.parametrize("per_channel_quantization", quant_test_data)
112+
def test_deep_autoencoder_u85_INT(per_channel_quantization):
113+
pipeline = EthosU85PipelineINT[input_t](
114+
model,
115+
model_inputs,
116+
aten_ops=[],
117+
exir_ops=[],
118+
use_to_edge_transform_and_lower=True,
119+
per_channel_quantization=per_channel_quantization,
120+
atol=0.25,
121+
qtol=1,
122+
)
123+
pipeline.run()
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
"""Ethos-U FVP tests for the MLPerf Tiny Keyword Spotting DS-CNN model."""
6+
7+
from typing import Tuple
8+
9+
import pytest
10+
import torch
11+
import torch.nn as nn
12+
from executorch.backends.arm.test import common
13+
from executorch.backends.arm.test.tester.test_pipeline import (
14+
EthosU55PipelineINT,
15+
EthosU85PipelineINT,
16+
TosaPipelineFP,
17+
TosaPipelineINT,
18+
)
19+
20+
from executorch.examples.models.mlperf_tiny import DSCNNKWSModel
21+
22+
_wrapper = DSCNNKWSModel()
23+
model = _wrapper.get_eager_model()
24+
# TODO: Remove once a pass decomposes large-stride AvgPool2d.
25+
# Replace AvgPool2d(24,5) with AdaptiveAvgPool2d(1) so the
26+
# DecomposeAdaptiveAvgPool2dPass can break it into stride-1
27+
# pools that satisfy the Ethos-U55 stride <= 3 constraint.
28+
model.pool = nn.AdaptiveAvgPool2d(output_size=1) # type: ignore[assignment]
29+
model_inputs = _wrapper.get_example_inputs()
30+
input_t = Tuple[torch.Tensor]
31+
32+
quant_test_data = {
33+
"per_channel_quantization=true": True,
34+
"per_channel_quantization=false": False,
35+
}
36+
37+
38+
def test_ds_cnn_tosa_FP():
39+
pipeline = TosaPipelineFP[input_t](
40+
model,
41+
model_inputs,
42+
aten_op=[],
43+
exir_op=[],
44+
use_to_edge_transform_and_lower=True,
45+
)
46+
pipeline.run()
47+
48+
49+
@common.parametrize("per_channel_quantization", quant_test_data)
50+
def test_ds_cnn_tosa_INT(per_channel_quantization):
51+
pipeline = TosaPipelineINT[input_t](
52+
model,
53+
model_inputs,
54+
aten_op=[],
55+
exir_op=[],
56+
use_to_edge_transform_and_lower=True,
57+
per_channel_quantization=per_channel_quantization,
58+
atol=0.25,
59+
qtol=1,
60+
frobenius_threshold=None,
61+
cosine_threshold=None,
62+
)
63+
pipeline.run()
64+
65+
66+
@pytest.mark.slow
67+
@common.XfailIfNoCorstone300
68+
@common.parametrize("per_channel_quantization", quant_test_data)
69+
def test_ds_cnn_u55_INT(per_channel_quantization):
70+
pipeline = EthosU55PipelineINT[input_t](
71+
model,
72+
model_inputs,
73+
aten_ops=[],
74+
exir_ops=[],
75+
use_to_edge_transform_and_lower=True,
76+
per_channel_quantization=per_channel_quantization,
77+
atol=0.25,
78+
qtol=1,
79+
)
80+
pipeline.run()
81+
82+
83+
@pytest.mark.slow
84+
@common.XfailIfNoCorstone320
85+
@common.parametrize("per_channel_quantization", quant_test_data)
86+
def test_ds_cnn_u85_INT(per_channel_quantization):
87+
pipeline = EthosU85PipelineINT[input_t](
88+
model,
89+
model_inputs,
90+
aten_ops=[],
91+
exir_ops=[],
92+
use_to_edge_transform_and_lower=True,
93+
per_channel_quantization=per_channel_quantization,
94+
atol=0.25,
95+
qtol=1,
96+
)
97+
pipeline.run()
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
"""Ethos-U FVP tests for the MLPerf Tiny Visual Wake Words MobileNetV1 (width
6+
0.25).
7+
"""
8+
9+
from typing import Tuple
10+
11+
import pytest
12+
import torch
13+
from executorch.backends.arm.test import common
14+
from executorch.backends.arm.test.tester.test_pipeline import (
15+
EthosU55PipelineINT,
16+
EthosU85PipelineINT,
17+
TosaPipelineFP,
18+
TosaPipelineINT,
19+
)
20+
21+
from executorch.examples.models.mlperf_tiny import MobileNetV1025Model
22+
23+
_wrapper = MobileNetV1025Model()
24+
model = _wrapper.get_eager_model()
25+
model_inputs = _wrapper.get_example_inputs()
26+
input_t = Tuple[torch.Tensor]
27+
28+
quant_test_data = {
29+
"per_channel_quantization=true": True,
30+
"per_channel_quantization=false": False,
31+
}
32+
33+
34+
def test_mobilenet_v1_025_tosa_FP():
35+
pipeline = TosaPipelineFP[input_t](
36+
model,
37+
model_inputs,
38+
aten_op=[],
39+
exir_op=[],
40+
use_to_edge_transform_and_lower=True,
41+
)
42+
pipeline.run()
43+
44+
45+
@common.parametrize("per_channel_quantization", quant_test_data)
46+
def test_mobilenet_v1_025_tosa_INT(per_channel_quantization):
47+
pipeline = TosaPipelineINT[input_t](
48+
model,
49+
model_inputs,
50+
aten_op=[],
51+
exir_op=[],
52+
use_to_edge_transform_and_lower=True,
53+
per_channel_quantization=per_channel_quantization,
54+
atol=0.25,
55+
qtol=1,
56+
frobenius_threshold=None,
57+
cosine_threshold=None,
58+
)
59+
pipeline.run()
60+
61+
62+
@pytest.mark.slow
63+
@common.XfailIfNoCorstone300
64+
@common.parametrize("per_channel_quantization", quant_test_data)
65+
def test_mobilenet_v1_025_u55_INT(per_channel_quantization):
66+
pipeline = EthosU55PipelineINT[input_t](
67+
model,
68+
model_inputs,
69+
aten_ops=[],
70+
exir_ops=[],
71+
use_to_edge_transform_and_lower=True,
72+
per_channel_quantization=per_channel_quantization,
73+
atol=0.25,
74+
qtol=1,
75+
)
76+
pipeline.run()
77+
78+
79+
@pytest.mark.slow
80+
@common.XfailIfNoCorstone320
81+
@common.parametrize("per_channel_quantization", quant_test_data)
82+
def test_mobilenet_v1_025_u85_INT(per_channel_quantization):
83+
pipeline = EthosU85PipelineINT[input_t](
84+
model,
85+
model_inputs,
86+
aten_ops=[],
87+
exir_ops=[],
88+
use_to_edge_transform_and_lower=True,
89+
per_channel_quantization=per_channel_quantization,
90+
atol=0.25,
91+
qtol=1,
92+
)
93+
pipeline.run()

0 commit comments

Comments
 (0)