pulp-platform · Xeratec · Nov 21, 2025 · Jul 29, 2025 · Aug 7, 2025 · Sep 18, 2025
@@ -47,11 +47,20 @@ jobs:
           {"name":"Hardswish","L1":[750]},
           {"name":"RQHardswish","L1":[750]},
           {"name":"testFloatGEMM","L1":[8000]},
-          {"name":"testFloat2DConvolution","L1":[8000]},
+
+          {"name":"testFloat2DConvolution","L1":[1600]},
+          {"name":"testFloat2DConvolutionBias","L1":[6600]},
+          {"name":"testFloat2DConvolutionZeroBias","L1":[6600]},
+
+          {"name":"testFloat2DDWConvolution","L1":[7200]},
+          {"name":"testFloat2DDWConvolutionBias","L1":[7200]},
+          {"name":"testFloat2DDWConvolutionZeroBias","L1":[7200]},
+
           {"name":"testFloatLayerNorm","L1":[2000]},
-          {"name":"testFloatRelu","L1":[2000]},
           {"name":"testFloatMaxPool","L1":[2000]},
           {"name":"testFloatMatmul","L1":[2000]},
+          {"name":"testFloatRelu","L1":[2000]},
+          {"name":"testFloatReshapeWithSkipConnection","L1":[1400]},
           {"name":"testFloatSoftmax","L1":[4000]},
           {"name":"testFloatTranspose","L1":[2000]},
           {"name":"testFloatMul","L1":[2000]},
@@ -78,11 +87,20 @@ jobs:
           {"name":"Hardswish","L1":[750]},
           {"name":"RQHardswish","L1":[800]},
           {"name":"testFloatGEMM","L1":[8000]},
-          {"name":"testFloat2DConvolution","L1":[15000]},
+
+          {"name":"testFloat2DConvolution","L1":[2000]},
+          {"name":"testFloat2DConvolutionBias","L1":[8800]},
+          {"name":"testFloat2DConvolutionZeroBias","L1":[8800]},
+
+          {"name":"testFloat2DDWConvolution","L1":[9800]},
+          {"name":"testFloat2DDWConvolutionBias","L1":[10000]},
+          {"name":"testFloat2DDWConvolutionZeroBias","L1":[9800]},
+
           {"name":"testFloatLayerNorm","L1":[2000]},
-          {"name":"testFloatRelu","L1":[2000]},
           {"name":"testFloatMaxPool","L1":[5000]},
           {"name":"testFloatMatmul","L1":[5000]},
+          {"name":"testFloatRelu","L1":[20]},
+          {"name":"testFloatReshapeWithSkipConnection","L1":[2600]},
           {"name":"testFloatSoftmax","L1":[8000]},
           {"name":"testFloatTranspose","L1":[2000]},
           {"name":"testFloatMul","L1":[2000]}
@@ -117,9 +135,11 @@ jobs:
           - name: "MLPerf/AnomalyDetection"
             L1: [64000]
           - name: "CCT/CCT_1_16_16_8"
-            L1: [64000]
+            L1: [2000, 64000]
           - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_8"
-            L1: [64000]
+            L1: [4000, 64000]
+          - name: "testFloatDemoTinyViT"
+            L1: [4000]
         num-cores: [8]
     uses: ./.github/workflows/_runner-siracusa-tiled.yml
     with:
@@ -148,9 +168,11 @@ jobs:
           - name: "microLlama/microLlama1"
             L1: [60000, 10000, 5000]
           - name: "CCT/CCT_2_32_32_128"
-            L1: [128000]
+            L1: [64000, 128000]
           - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128"
-            L1: [64000]
+            L1: [32000, 64000]
+          - name: "testFloatDemoTinyViT"
+            L1: [4000]
         num-cores: [8]
         default-memory-level: ["L3"]
     uses: ./.github/workflows/_runner-siracusa-tiled.yml
@@ -186,9 +208,11 @@ jobs:
           - name: "microLlama/microLlama8_parallel"
             L1: [60000, 20000, 10000]
           - name: "CCT/CCT_2_32_32_128"
-            L1: [128000]
+            L1: [64000, 128000]
           - name: "testTrainCCT/CCT1_Classifier_Training/CCT_1_16_16_128"
-            L1: [64000]
+            L1: [8000, 64000]
+          - name: "testFloatDemoTinyViT"
+            L1: [4000]
         num-cores: [8]
         double-buffer: [true]
         default-memory-level: ["L3"]

@@ -4,6 +4,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 ## Unreleased (Planned Release Target: v0.2.1)
 
 ### List of Pull Requests
+- Demo TinyViT compatibility with tiled Siracusa [#124](https://github.com/pulp-platform/Deeploy/pull/124)
 - TinyViT on non-tiled Siracusa [#117](https://github.com/pulp-platform/Deeploy/pull/117)
 - Support Fully Asynchronous DMAs [#114](https://github.com/pulp-platform/Deeploy/pull/114)
 - Disallow shape inference [#128](https://github.com/pulp-platform/Deeploy/pull/128)
@@ -25,6 +26,10 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - Fix bias hoisting in generic GEMM with no bias [#126](https://github.com/pulp-platform/Deeploy/pull/126)
 
 ### Added
+- Support for input tiling for PULP FP regular and DW conv 2D.
+- CI tests for tiled Siracusa FP regular and DW conv 2D, with and without bias, for skip connections, and for the demo version of TinyViT.
+- Documentation for PULP FP regular and DW conv 2D and MatMul tile constraints.
+- PULP ReduceMean and Slice tile constraints.
 - PULP 2D FP DW conv Im2Col template and kernel, with bias support.
 - Bias support for PULP 2D FP regular conv Im2Col in template & kernel.
 - PULP FP DW conv 2D parser.
@@ -70,6 +75,7 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - annotateNCores method to PULPDeployer that adds an `n_cores` key to all PULPClusterEngine templates' operatorRepresentations
 
 ### Changed
+- Decreased L1 maximal memory limit for CI pipeline tests where compatible thanks to the implementation of Conv2D input tiling support.
 - Reduced size of reshape & skip connection test, for non-tiled Siracusa memory compatibility.
 - Replaced platform-specific tags (`*-amd64`, `*-arm64`) with direct digest references in `Noelware/docker-manifest-action`.
 - mchan HAL is now reduced to bare-bones
@@ -109,6 +115,10 @@ This file contains the changelog for the Deeploy project. The changelog is divid
 - changed `_mapNode` to `_selectEngine` which reduces the responsibility of that function to, as the name states, just engine selection
 
 ### Fixed
+- Fixed PULP FP32 regular and DW Conv2D, and MatMul tile constraints.
+- Fixed type casting for tiling code generation.
+- Fixed bug in buffer name identification in code generation for tests with L3 default memory level.
+- PULP GELU kernel to use tanh approximation.
 - Fixed bug for non-batched elements in the PULPOpen FP GEMM and matmul templates.
 - Added underscore to the beginning of closure names to avoid naming issues when they start with unsupported first characters (like numbers).
 - Data types in the PULPOpen FP add and mul templates.

@@ -480,7 +480,7 @@ class _ReferenceBuffer(VariableBuffer):
     % if offset is None:
     ${type.typeName} ${name} = (${type.typeName}) ${referenceName};\\
     % else:
-    ${type.typeName} ${name} = (${type.typeName}) ${referenceName} + ${offset};\\
+    ${type.typeName} ${name} = (${type.typeName})((char*) ${referenceName} + ${offset});\\
     % endif
     """)
     deallocTemplate = NodeTemplate("")

@@ -154,13 +154,17 @@
 
 PULPSliceBindings = [
     NodeBinding(
-        SliceChecker([
-            PointerClass(type),
-            PointerClass(uint8_t),
-            PointerClass(uint8_t),
-            PointerClass(uint8_t),
-            PointerClass(uint8_t)
-        ], [PointerClass(type)]), SliceTemplate.referenceTemplate, ForkTransformer) for type in FloatDataTypes
+        SliceChecker(
+            [
+                PointerClass(float_type),  # data_in
+                PointerClass(int_type),  # starts
+                PointerClass(int_type),  # ends
+                PointerClass(int_type),  # axes
+                PointerClass(int_type)  # steps
+            ],
+            [PointerClass(float_type)]),
+        SliceTemplate.referenceTemplate,
+        ForkTransformer) for float_type in FloatDataTypes for int_type in IntegerDataTypes
 ]
 
 PULPReshapeBindings = [

@@ -29,21 +29,22 @@
     MergeConstAddAndRequantPass, MergeTrueIntegerDivRequantShiftPass, QuantPatternPass, RQSSplitPass, \
     SkipEmptyConcatPass, SkipUnityRequantPass, iGELURequantMergePass, iHardswishRequantMergePass
 from Deeploy.Targets.PULPOpen.Bindings import BasicDequantBindings, BasicQuantBindings, PULPConv1DBinding, \
-    PULPDMASliceBindings, PULPDWConv1DBinding, PULPFloatDWConv2DBindings, PULPReduceMeanBindings, PULPSliceBindings
+    PULPDMASliceBindings, PULPDWConv1DBinding
 from Deeploy.Targets.PULPOpen.Layers import PULPRQSConvLayer, PULPRQSGEMMLayer
 from Deeploy.Targets.PULPOpen.Parsers import PULPConv1DParser, PULPConv2DParser, PULPDWConv1DParser, \
     PULPDWConv2DParser, PULPFPConv2DParser, PULPFPDWConv2DParser, PULPGEMMParser, PULPMatrixVecParser, \
     PULPTallGEMMParser
 from Deeploy.Targets.PULPOpen.Templates import AllocateTemplate, FreeTemplate
 from Deeploy.Targets.PULPOpen.Tiler import PULPAddTilingReadyBindings, PULPConcatTilingReadyBindings, \
-    PULPConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, PULPFPGELUTilingReadyBindings, \
-    PULPFPGEMMTilingReadyBindings, PULPGatherTilingReadyBindings, PULPiHardswishTilingReadyBindings, \
-    PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, PULPLayernormTilingReadyBindings, \
-    PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, PULPMulTilingReadyBindings, \
-    PULPReduceSumTilingReadyBindings, PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, \
-    PULPRQSConv2DTilingReadyBindings, PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, \
-    PULPRQSiHardswishTilingReadyBindings, PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, \
-    PULPRQSTilingReadyBindings, PULPSGDTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \
+    PULPConv2DTilingReadyBindings, PULPDWConv2DTilingReadyBindings, PULPFlattenTilingReadyBindings, \
+    PULPFPGELUTilingReadyBindings, PULPFPGEMMTilingReadyBindings, PULPGatherTilingReadyBindings, \
+    PULPiHardswishTilingReadyBindings, PULPiRMSNormTilingReadyBindings, PULPiRQSGELUTilingReadyBindings, \
+    PULPLayernormTilingReadyBindings, PULPMatMulTilingReadyBindings, PULPMaxPool2DTilingReadyBindings, \
+    PULPMulTilingReadyBindings, PULPReduceMeanTilingReadyBindings, PULPReduceSumTilingReadyBindings, \
+    PULPReluTilingReadyBindings, PULPRQAddTilingReadyBindings, PULPRQSConv2DTilingReadyBindings, \
+    PULPRQSDWConv2DTilingReadyBindings, PULPRQSGEMMTilingReadyBindings, PULPRQSiHardswishTilingReadyBindings, \
+    PULPRQSMatrixVecTilingReadyBindings, PULPRQSTallGEMMTilingReadyBindings, PULPRQSTilingReadyBindings, \
+    PULPSGDTilingReadyBindings, PULPSliceTilingReadyBindings, PULPSoftmaxCrossEntropyGradTilingReadyBindings, \
     PULPSoftmaxCrossEntropyTilingReadyBindings, PULPSoftmaxGradTilingReadyBindings, PULPSoftmaxTilingReadyBindings, \
     PULPTransposeTilingReadyBindings, PULPUniformRQSTilingReadyBindings
 from Deeploy.Targets.PULPOpen.TopologyOptimizationPasses.Passes import PULPAddRequantMergePass, \
@@ -64,7 +65,7 @@
 RequantShiftMapper = NodeMapper(RequantShiftParser(), PULPRQSTilingReadyBindings)
 UniformRequantShiftMapper = NodeMapper(UniformRequantShiftParser(), PULPUniformRQSTilingReadyBindings)
 
-ReduceMeanMapper = NodeMapper(ReduceMeanParser(), PULPReduceMeanBindings)
+ReduceMeanMapper = NodeMapper(ReduceMeanParser(), PULPReduceMeanTilingReadyBindings)
 ReduceSumMapper = NodeMapper(ReduceSumParser(), PULPReduceSumTilingReadyBindings)
 MatMulMapper = NodeMapper(MatMulParser(), PULPMatMulTilingReadyBindings)
 RQIntegerDivMapper = NodeMapper(RQIntegerDivParser(), [BasicRQIntegerDivBinding])
@@ -74,7 +75,7 @@
 DWConv1DMapper = NodeMapper(PULPDWConv1DParser(), [PULPDWConv1DBinding])
 FPConv2DMapper = NodeMapper(PULPFPConv2DParser(), PULPConv2DTilingReadyBindings)
 Conv2DMapper = NodeMapper(PULPConv2DParser(), PULPRQSConv2DTilingReadyBindings)
-FPDWConv2DMapper = NodeMapper(PULPFPDWConv2DParser(), PULPFloatDWConv2DBindings)
+FPDWConv2DMapper = NodeMapper(PULPFPDWConv2DParser(), PULPDWConv2DTilingReadyBindings)
 DWConv2DMapper = NodeMapper(PULPDWConv2DParser(), PULPRQSDWConv2DTilingReadyBindings)
 GEMMMapper = NodeMapper(PULPGEMMParser(), PULPRQSGEMMTilingReadyBindings)
 FloatGEMMMapper = NodeMapper(GEMMParser(), PULPFPGEMMTilingReadyBindings)
@@ -91,7 +92,7 @@
 
 DMASliceMapper = NodeMapper(SliceParser(), PULPDMASliceBindings)
 
-SliceMapper = NodeMapper(SliceParser(), PULPSliceBindings)
+SliceMapper = NodeMapper(SliceParser(), PULPSliceTilingReadyBindings)
 
 iRMSNormMapper = NodeMapper(iRMSNormParser(), PULPiRMSNormTilingReadyBindings)