Prevent _safe_softmax decomposition in traceand rewire replaceSafeSoftmaxWithSoftmax

ethansfng · web-flow · commit 824cbff02f85 · 2026-05-16T03:27:05.000Z
Differential Revision: D105367634 Pull Request resolved: pytorch#19619
diff --git a/backends/cadence/aot/compiler_funcs.py b/backends/cadence/aot/compiler_funcs.py
@@ -35,6 +35,7 @@ def trace(
         model.eval()
 
     decomp_table = torch.export.default_decompositions()
+    ops_to_keep = [*(ops_to_keep or []), torch.ops.aten._safe_softmax.default]
     # pyre-fixme[6]: For 1st argument expected `Dict[typing.Callable[..., typing.Any
     remove_decompositions(decomp_table, ops_to_keep)
     program = torch.export.export(model, inputs, strict=strict).run_decompositions(
diff --git a/backends/cadence/aot/passes.py b/backends/cadence/aot/passes.py
@@ -33,6 +33,7 @@
 from executorch.backends.cadence.aot.replace_ops import (
     CadenceReplaceOpsInGraph,
     ReplaceMulTensorWithMulAndFullOpsPass,
+    ReplaceSafeSoftmaxWithSoftmax,
 )
 from executorch.backends.cadence.aot.simplify_ops import CadenceSimplifyOpsInGraph
 from executorch.backends.cadence.aot.type_dispatch import CompileTimeTypeDispatchPass
@@ -131,7 +132,8 @@ def apply_torch_ops_passes(expo_program: ExportedProgram) -> ExportedProgram:
     """
 
     aten_passes: List[Callable[[torch.fx.GraphModule], Optional[PassResult]]] = [
-        ReplaceMulTensorWithMulAndFullOpsPass()
+        ReplaceSafeSoftmaxWithSoftmax(),
+        ReplaceMulTensorWithMulAndFullOpsPass(),
     ]
     # TODO(T230417247): Use PassResult which is currently ignored.
     PassManager(aten_passes)(expo_program.graph_module)