Skip to content

Commit 32a86b6

Browse files
authored
Arm backend: Add VGF Swin2SR example and OOTB smoke test (pytorch#19670)
Summary - Adds a VGF Swin2SR super-resolution example for Arm. - Adds FP and INT8 export/eval flows with deterministic demo assets. - Adds Arm OOTB smoke coverage and model tests. Validation - bash -n backends/arm/test/test_arm_ootb.sh - PYTHONPATH=. /Users/usazah01/src/executorch/env/bin/python -m pytest -q -p no:rerunfailures backends/arm/test/models/test_swin2sr_arm.py -s - PATH=/Users/usazah01/src/executorch/env/bin:$PATH backends/arm/scripts/pre-push cc @digantdesai @freddan80 @per @zingo @oscarandersson8218 @mansnils @Sebastian-Larsson @robell @rascani --------- Signed-off-by: Usamah Zaheer <usamah.zaheer@arm.com>
1 parent 82cf123 commit 32a86b6

11 files changed

Lines changed: 1678 additions & 2 deletions

File tree

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# This source code is licensed under the BSD-style license found in the
4+
# LICENSE file in the root directory of this source tree.
5+
6+
from typing import Tuple
7+
8+
import torch
9+
10+
from executorch.backends.arm.test import common
11+
from executorch.backends.arm.test.tester.test_pipeline import (
12+
TosaPipelineFP,
13+
TosaPipelineINT,
14+
VgfPipeline,
15+
)
16+
from transformers import Swin2SRConfig, Swin2SRForImageSuperResolution
17+
18+
input_t = Tuple[torch.Tensor]
19+
20+
exir_ops = [
21+
"executorch_exir_dialects_edge__ops_aten_add_Tensor",
22+
"executorch_exir_dialects_edge__ops_aten_convolution_default",
23+
"executorch_exir_dialects_edge__ops_aten_layer_norm_default",
24+
"executorch_exir_dialects_edge__ops_aten_matmul_default",
25+
"executorch_exir_dialects_edge__ops_aten_mul_Tensor",
26+
"executorch_exir_dialects_edge__ops_aten_pixel_shuffle_default",
27+
"executorch_exir_dialects_edge__ops_aten_softmax_int",
28+
]
29+
30+
31+
class TinySwin2SR(torch.nn.Module):
32+
def __init__(self):
33+
super().__init__()
34+
config = Swin2SRConfig(
35+
image_size=8,
36+
patch_size=1,
37+
num_channels=3,
38+
embed_dim=16,
39+
depths=[1, 1],
40+
num_heads=[1, 1],
41+
window_size=4,
42+
upscale=2,
43+
img_range=1.0,
44+
resi_connection="1conv",
45+
upsampler="pixelshuffle",
46+
)
47+
self.model = Swin2SRForImageSuperResolution(config).eval()
48+
49+
def forward(self, x: torch.Tensor) -> torch.Tensor:
50+
return self.model(pixel_values=x, return_dict=True).reconstruction
51+
52+
53+
def make_model_and_inputs() -> tuple[torch.nn.Module, input_t]:
54+
model = TinySwin2SR().eval()
55+
inputs = (torch.rand(1, 3, 8, 8),)
56+
return model, inputs
57+
58+
59+
def test_swin2sr_tosa_FP():
60+
model, model_inputs = make_model_and_inputs()
61+
pipeline = TosaPipelineFP[input_t](
62+
model,
63+
model_inputs,
64+
aten_op=[],
65+
exir_op=exir_ops,
66+
use_to_edge_transform_and_lower=True,
67+
)
68+
pipeline.pop_stage("check_count.exir")
69+
# TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
70+
pipeline.pop_stage("run_method_and_compare_outputs")
71+
pipeline.run()
72+
73+
74+
def test_swin2sr_tosa_INT():
75+
model, model_inputs = make_model_and_inputs()
76+
pipeline = TosaPipelineINT[input_t](
77+
model,
78+
model_inputs,
79+
aten_op=[],
80+
exir_op=exir_ops,
81+
use_to_edge_transform_and_lower=True,
82+
)
83+
pipeline.pop_stage("check_count.exir")
84+
# TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
85+
pipeline.pop_stage("run_method_and_compare_outputs")
86+
pipeline.run()
87+
88+
89+
@common.SkipIfNoModelConverter
90+
def test_swin2sr_vgf_quant():
91+
model, model_inputs = make_model_and_inputs()
92+
pipeline = VgfPipeline[input_t](
93+
model,
94+
model_inputs,
95+
aten_op=[],
96+
exir_op=exir_ops,
97+
use_to_edge_transform_and_lower=True,
98+
quantize=True,
99+
)
100+
pipeline.pop_stage("check_count.exir")
101+
# TODO: MLETORCH-2134 re-enable once Swin2SR runs on the TOSA ref model.
102+
pipeline.pop_stage("run_method_and_compare_outputs")
103+
pipeline.run()
104+
105+
106+
@common.SkipIfNoModelConverter
107+
def test_swin2sr_vgf_no_quant():
108+
model, model_inputs = make_model_and_inputs()
109+
pipeline = VgfPipeline[input_t](
110+
model,
111+
model_inputs,
112+
aten_op=[],
113+
exir_op=exir_ops,
114+
use_to_edge_transform_and_lower=True,
115+
quantize=False,
116+
)
117+
pipeline.pop_stage("check_count.exir")
118+
pipeline.run()

backends/arm/test/test_arm_ootb.sh

Lines changed: 131 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ if [[ "$1" == "-h" || "$1" == "--help" ]]; then
2222
fi
2323

2424
if [[ $# -eq 0 ]]; then
25-
TEST_SUITES=(run_ootb_tests_ethos_u run_ootb_tests_tosa run_ootb_tests_vgf run_deit_e2e_ethos_u)
25+
TEST_SUITES=(run_ootb_tests_ethos_u run_ootb_tests_tosa run_ootb_tests_vgf run_deit_e2e_ethos_u run_swin2sr_e2e_vgf)
2626
else
2727
TEST_SUITES=("$1")
2828
fi
@@ -66,7 +66,7 @@ run_deit_e2e_ethos_u() {
6666
local image_path="${work_root}/dog.bmp"
6767
local pte_path="${export_dir}/deit_tiny_smoke.pte"
6868
local toolchain_file="${et_root_dir}/examples/arm/ethos-u-setup/arm-none-eabi-gcc.cmake"
69-
echo "${FUNCNAME}: Work root is ${work_root}; existing artifacts will be reused if present"
69+
echo "${FUNCNAME}: Work directory: ${work_root}; existing artifacts will be reused if present"
7070

7171
mkdir -p "${model_dir}" "${export_dir}" "${build_dir}"
7272

@@ -150,6 +150,135 @@ run_deit_e2e_ethos_u() {
150150
echo "${FUNCNAME}: PASS"
151151
}
152152

153+
run_swin2sr_e2e_vgf() {
154+
echo "$FUNCNAME: Prepare demo assets, export FP/INT8, build, and run the Swin2SR VGF e2e test"
155+
156+
local script_dir
157+
script_dir=$(cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd)
158+
et_root_dir=$(cd "${script_dir}/../../.." && pwd)
159+
local example_dir="${et_root_dir}/examples/arm/super_resolution_example_vgf"
160+
local work_root="${et_root_dir}/arm_test/swin2sr_vgf_ootb_smoke"
161+
local demo_dir="${work_root}/demo_assets"
162+
local runtime_dir="${demo_dir}/runtime"
163+
local runner_path="${work_root}/executor_runner"
164+
local input_image="${runtime_dir}/demo_lr_64.png"
165+
local fp_pte_path="${demo_dir}/swin2sr_x2_vgf_fp.pte"
166+
local int8_pte_path="${demo_dir}/swin2sr_x2_vgf_int8.pte"
167+
local fp_output_image="${runtime_dir}/demo_fp_128.png"
168+
local int8_output_image="${runtime_dir}/demo_int8_128.png"
169+
local checkpoint_id="caidas/swin2SR-classical-sr-x2-64"
170+
local checkpoint_revision="cee1c923c6a37361c6e5650b65dcf4be821e5d52"
171+
echo "${FUNCNAME}: Work directory: ${work_root}; existing artifacts will be reused if present"
172+
173+
mkdir -p "${demo_dir}" "${runtime_dir}"
174+
175+
setup_path_script=${et_root_dir}/examples/arm/arm-scratch/setup_path.sh
176+
source ${setup_path_script}
177+
178+
echo "${FUNCNAME}: Installing example requirements"
179+
pip install -r "${example_dir}/requirements.txt"
180+
181+
echo "${FUNCNAME}: Preparing deterministic demo assets"
182+
python3 "${example_dir}/model_export/prepare_demo_assets.py" \
183+
--output-dir "${demo_dir}"
184+
185+
echo "${FUNCNAME}: Building VKML executor_runner"
186+
"${et_root_dir}/backends/arm/scripts/build_executor_runner_vkml.sh" \
187+
--output="${work_root}"
188+
189+
if [[ ! -f "${runner_path}" ]]; then
190+
runner_path=$(find "${work_root}" -name executor_runner -type f | head -n 1)
191+
fi
192+
[[ -f "${runner_path}" ]] || {
193+
echo "${FUNCNAME}: Missing executor_runner under ${work_root}"
194+
return 1
195+
}
196+
197+
echo "${FUNCNAME}: Exporting FP Swin2SR model"
198+
python3 "${example_dir}/model_export/export_super_resolution.py" \
199+
--model-name swin2sr \
200+
--checkpoint "${checkpoint_id}" \
201+
--checkpoint-revision "${checkpoint_revision}" \
202+
--input-height 64 \
203+
--input-width 64 \
204+
--quantization-mode none \
205+
--eval-lr-dir "${demo_dir}/eval/lr" \
206+
--eval-hr-dir "${demo_dir}/eval/hr" \
207+
--num-eval-samples 2 \
208+
--output-path "${fp_pte_path}"
209+
210+
for artifact in \
211+
"${fp_pte_path}" \
212+
"${demo_dir}/swin2sr_x2_vgf_fp.json" \
213+
"${demo_dir}/swin2sr_x2_vgf_fp_delegation.txt" \
214+
"${demo_dir}/swin2sr_x2_vgf_fp_metrics.json"; do
215+
[[ -f "${artifact}" ]] || {
216+
echo "${FUNCNAME}: Missing FP export artifact ${artifact}"
217+
return 1
218+
}
219+
done
220+
221+
echo "${FUNCNAME}: Exporting INT8 Swin2SR model"
222+
python3 "${example_dir}/model_export/export_super_resolution.py" \
223+
--model-name swin2sr \
224+
--checkpoint "${checkpoint_id}" \
225+
--checkpoint-revision "${checkpoint_revision}" \
226+
--input-height 64 \
227+
--input-width 64 \
228+
--quantization-mode int8 \
229+
--calibration-lr-dir "${demo_dir}/calibration/lr" \
230+
--eval-lr-dir "${demo_dir}/eval/lr" \
231+
--eval-hr-dir "${demo_dir}/eval/hr" \
232+
--num-calibration-samples 4 \
233+
--num-eval-samples 2 \
234+
--output-path "${int8_pte_path}"
235+
236+
for artifact in \
237+
"${int8_pte_path}" \
238+
"${demo_dir}/swin2sr_x2_vgf_int8.json" \
239+
"${demo_dir}/swin2sr_x2_vgf_int8_delegation.txt" \
240+
"${demo_dir}/swin2sr_x2_vgf_int8_metrics.json"; do
241+
[[ -f "${artifact}" ]] || {
242+
echo "${FUNCNAME}: Missing INT8 export artifact ${artifact}"
243+
return 1
244+
}
245+
done
246+
247+
echo "${FUNCNAME}: Running FP runtime smoke"
248+
python3 "${example_dir}/runtime/run_super_resolution.py" \
249+
--model-path "${fp_pte_path}" \
250+
--runner "${runner_path}" \
251+
--input-image "${input_image}" \
252+
--output-image "${fp_output_image}" \
253+
--working-dir "${runtime_dir}/fp_work"
254+
255+
[[ -f "${fp_output_image}" ]] || {
256+
echo "${FUNCNAME}: Missing FP runtime output ${fp_output_image}"
257+
return 1
258+
}
259+
260+
if [[ "$(uname -s)" == "Linux" ]]; then
261+
echo "${FUNCNAME}: Running INT8 runtime smoke"
262+
python3 "${example_dir}/runtime/run_super_resolution.py" \
263+
--model-path "${int8_pte_path}" \
264+
--runner "${runner_path}" \
265+
--input-image "${input_image}" \
266+
--output-image "${int8_output_image}" \
267+
--working-dir "${runtime_dir}/int8_work"
268+
269+
[[ -f "${int8_output_image}" ]] || {
270+
echo "${FUNCNAME}: Missing INT8 runtime output ${int8_output_image}"
271+
return 1
272+
}
273+
else
274+
# TODO: MLETORCH-2105 remove this once the next ML SDK release supports
275+
# quantized VKML runtime validation on Darwin.
276+
echo "${FUNCNAME}: Skipping INT8 runtime on $(uname -s); quantized VKML runtime validation is Linux-only"
277+
fi
278+
279+
echo "${FUNCNAME}: PASS"
280+
}
281+
153282
for suite in "${TEST_SUITES[@]}"; do
154283
"${suite}"
155284
done

examples/arm/README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,16 @@ $ python3 -m backends.arm.scripts.aot_arm_compiler --model_name=mv2 --target=eth
6464

6565
`aot_arm_compiler.py` is called from the scripts below so you don't need to, but it can be useful to do by hand in some cases.
6666

67+
## Host VGF example applications
68+
69+
The Arm examples directory also contains host-side VGF reference flows for
70+
specific tasks:
71+
72+
- `examples/arm/image_classification_example_vgf` for DEiT image
73+
classification.
74+
- `examples/arm/super_resolution_example_vgf` for Swin2SR image
75+
super-resolution.
76+
6777

6878
## ExecuTorch on Arm Ethos-U55/U65 and U85
6979

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# Swin2SR Super-Resolution Example Application (VGF)
2+
3+
This example shows how to export a Swin2SR image super-resolution model for the
4+
Arm VGF backend and run it on host using the generic `executor_runner` binary.
5+
It is a host-only workflow; a device-specific VGF runtime application is out of
6+
scope here.
7+
8+
## Layout
9+
10+
- `model_export/prepare_demo_assets.py` — Creates a deterministic text-heavy
11+
demo input plus small LR/HR calibration and evaluation sets from a repo-local
12+
screenshot.
13+
- `model_export/README.md` — Dataset-backed FP/INT8 export, PTQ
14+
calibration and evaluation, and `.pte` generation.
15+
- `runtime/README.md` — Running the exported `.pte` on host using
16+
`executor_runner` and converting the output tensor back into an image.
17+
18+
Use `examples/arm/image_classification_example_vgf` for the image
19+
classification flow.

0 commit comments

Comments
 (0)