|
| 1 | +; RUN: opt < %s -sroa -S | FileCheck %s |
| 2 | + |
| 3 | +; Regression test for SROA miscompilation of min precision vector element access. |
| 4 | +; DXC's data layout pads i16/f16 to 32 bits (i16:32, f16:32), so GEP offsets |
| 5 | +; between vector elements are 4 bytes apart. SROA must use alloc size (not |
| 6 | +; primitive size) for element stride, otherwise element stores get misplaced. |
| 7 | + |
| 8 | +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" |
| 9 | +target triple = "dxil-ms-dx" |
| 10 | + |
| 11 | +; Test 1: Element-wise write to <3 x i16> vector. |
| 12 | +; SROA must map GEP byte offsets to correct element indices using alloc size |
| 13 | +; (4 bytes per i16), not primitive size (2 bytes). All stores must survive |
| 14 | +; with correct indices, and the final vector load must be preserved. |
| 15 | + |
| 16 | +; CHECK-LABEL: @test_sroa_i16_vec3 |
| 17 | +; CHECK: getelementptr inbounds <3 x i16>, <3 x i16>* %{{.*}}, i32 0, i32 0 |
| 18 | +; CHECK: store i16 %v0 |
| 19 | +; CHECK: getelementptr inbounds <3 x i16>, <3 x i16>* %{{.*}}, i32 0, i32 1 |
| 20 | +; CHECK: store i16 %v1 |
| 21 | +; CHECK: getelementptr inbounds <3 x i16>, <3 x i16>* %{{.*}}, i32 0, i32 2 |
| 22 | +; CHECK: store i16 %v2 |
| 23 | +; CHECK: load <3 x i16> |
| 24 | +; CHECK: ret <3 x i16> |
| 25 | +define <3 x i16> @test_sroa_i16_vec3(i16 %v0, i16 %v1, i16 %v2) { |
| 26 | +entry: |
| 27 | + %dst = alloca <3 x i16>, align 4 |
| 28 | + store <3 x i16> zeroinitializer, <3 x i16>* %dst, align 4 |
| 29 | + %e0 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 0 |
| 30 | + store i16 %v0, i16* %e0, align 4 |
| 31 | + %e1 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 1 |
| 32 | + store i16 %v1, i16* %e1, align 4 |
| 33 | + %e2 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 2 |
| 34 | + store i16 %v2, i16* %e2, align 4 |
| 35 | + %result = load <3 x i16>, <3 x i16>* %dst, align 4 |
| 36 | + ret <3 x i16> %result |
| 37 | +} |
| 38 | + |
| 39 | +; Test 2: Same pattern with <3 x half> (f16:32 padding). |
| 40 | + |
| 41 | +; CHECK-LABEL: @test_sroa_f16_vec3 |
| 42 | +; CHECK: getelementptr inbounds <3 x half>, <3 x half>* %{{.*}}, i32 0, i32 0 |
| 43 | +; CHECK: store half %v0 |
| 44 | +; CHECK: getelementptr inbounds <3 x half>, <3 x half>* %{{.*}}, i32 0, i32 1 |
| 45 | +; CHECK: store half %v1 |
| 46 | +; CHECK: getelementptr inbounds <3 x half>, <3 x half>* %{{.*}}, i32 0, i32 2 |
| 47 | +; CHECK: store half %v2 |
| 48 | +; CHECK: load <3 x half> |
| 49 | +; CHECK: ret <3 x half> |
| 50 | +define <3 x half> @test_sroa_f16_vec3(half %v0, half %v1, half %v2) { |
| 51 | +entry: |
| 52 | + %dst = alloca <3 x half>, align 4 |
| 53 | + store <3 x half> zeroinitializer, <3 x half>* %dst, align 4 |
| 54 | + %e0 = getelementptr inbounds <3 x half>, <3 x half>* %dst, i32 0, i32 0 |
| 55 | + store half %v0, half* %e0, align 4 |
| 56 | + %e1 = getelementptr inbounds <3 x half>, <3 x half>* %dst, i32 0, i32 1 |
| 57 | + store half %v1, half* %e1, align 4 |
| 58 | + %e2 = getelementptr inbounds <3 x half>, <3 x half>* %dst, i32 0, i32 2 |
| 59 | + store half %v2, half* %e2, align 4 |
| 60 | + %result = load <3 x half>, <3 x half>* %dst, align 4 |
| 61 | + ret <3 x half> %result |
| 62 | +} |
| 63 | + |
| 64 | +; Test 3: Partial write — only element 1 is stored. SROA must index it correctly. |
| 65 | + |
| 66 | +; CHECK-LABEL: @test_sroa_i16_vec3_elem1 |
| 67 | +; Element 1 store must be correctly placed at GEP index 1, not index 2. |
| 68 | +; Without the fix, byte offset 4 / prim_size 2 = index 2 (wrong). |
| 69 | +; With the fix, byte offset 4 / alloc_size 4 = index 1 (correct). |
| 70 | +; CHECK: getelementptr inbounds <3 x i16>, <3 x i16>* %{{.*}}, i32 0, i32 1 |
| 71 | +; CHECK: store i16 %val |
| 72 | +; CHECK: load <3 x i16> |
| 73 | +; CHECK: ret <3 x i16> |
| 74 | +define <3 x i16> @test_sroa_i16_vec3_elem1(i16 %val) { |
| 75 | +entry: |
| 76 | + %dst = alloca <3 x i16>, align 4 |
| 77 | + store <3 x i16> zeroinitializer, <3 x i16>* %dst, align 4 |
| 78 | + %e1 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 1 |
| 79 | + store i16 %val, i16* %e1, align 4 |
| 80 | + %result = load <3 x i16>, <3 x i16>* %dst, align 4 |
| 81 | + ret <3 x i16> %result |
| 82 | +} |
| 83 | + |
| 84 | +; Test 4: Element 2 store — verifies highest index is correct. |
| 85 | + |
| 86 | +; CHECK-LABEL: @test_sroa_i16_vec3_elem2 |
| 87 | +; CHECK: getelementptr inbounds <3 x i16>, <3 x i16>* %{{.*}}, i32 0, i32 2 |
| 88 | +; CHECK: store i16 %val |
| 89 | +; CHECK: load <3 x i16> |
| 90 | +; CHECK: ret <3 x i16> |
| 91 | +define <3 x i16> @test_sroa_i16_vec3_elem2(i16 %val) { |
| 92 | +entry: |
| 93 | + %dst = alloca <3 x i16>, align 4 |
| 94 | + store <3 x i16> zeroinitializer, <3 x i16>* %dst, align 4 |
| 95 | + %e2 = getelementptr inbounds <3 x i16>, <3 x i16>* %dst, i32 0, i32 2 |
| 96 | + store i16 %val, i16* %e2, align 4 |
| 97 | + %result = load <3 x i16>, <3 x i16>* %dst, align 4 |
| 98 | + ret <3 x i16> %result |
| 99 | +} |
| 100 | + |
| 101 | +; Test 5: Long vector — <5 x i16> (exceeds 4-element native size). |
| 102 | + |
| 103 | +; CHECK-LABEL: @test_sroa_i16_vec5 |
| 104 | +; CHECK: getelementptr inbounds <5 x i16>, <5 x i16>* %{{.*}}, i32 0, i32 0 |
| 105 | +; CHECK: store i16 %v0 |
| 106 | +; CHECK: getelementptr inbounds <5 x i16>, <5 x i16>* %{{.*}}, i32 0, i32 1 |
| 107 | +; CHECK: store i16 %v1 |
| 108 | +; CHECK: getelementptr inbounds <5 x i16>, <5 x i16>* %{{.*}}, i32 0, i32 4 |
| 109 | +; CHECK: store i16 %v4 |
| 110 | +; CHECK: load <5 x i16> |
| 111 | +; CHECK: ret <5 x i16> |
| 112 | +define <5 x i16> @test_sroa_i16_vec5(i16 %v0, i16 %v1, i16 %v2, i16 %v3, i16 %v4) { |
| 113 | +entry: |
| 114 | + %dst = alloca <5 x i16>, align 4 |
| 115 | + store <5 x i16> zeroinitializer, <5 x i16>* %dst, align 4 |
| 116 | + %e0 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 0 |
| 117 | + store i16 %v0, i16* %e0, align 4 |
| 118 | + %e1 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 1 |
| 119 | + store i16 %v1, i16* %e1, align 4 |
| 120 | + %e2 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 2 |
| 121 | + store i16 %v2, i16* %e2, align 4 |
| 122 | + %e3 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 3 |
| 123 | + store i16 %v3, i16* %e3, align 4 |
| 124 | + %e4 = getelementptr inbounds <5 x i16>, <5 x i16>* %dst, i32 0, i32 4 |
| 125 | + store i16 %v4, i16* %e4, align 4 |
| 126 | + %result = load <5 x i16>, <5 x i16>* %dst, align 4 |
| 127 | + ret <5 x i16> %result |
| 128 | +} |
| 129 | + |
| 130 | +; Test 6: Long vector — <8 x half>. |
| 131 | + |
| 132 | +; CHECK-LABEL: @test_sroa_f16_vec8_partial |
| 133 | +; CHECK: getelementptr inbounds <8 x half>, <8 x half>* %{{.*}}, i32 0, i32 0 |
| 134 | +; CHECK: store half %v0 |
| 135 | +; CHECK: getelementptr inbounds <8 x half>, <8 x half>* %{{.*}}, i32 0, i32 7 |
| 136 | +; CHECK: store half %v7 |
| 137 | +; CHECK: load <8 x half> |
| 138 | +; CHECK: ret <8 x half> |
| 139 | +define <8 x half> @test_sroa_f16_vec8_partial(half %v0, half %v7) { |
| 140 | +entry: |
| 141 | + %dst = alloca <8 x half>, align 4 |
| 142 | + store <8 x half> zeroinitializer, <8 x half>* %dst, align 4 |
| 143 | + %e0 = getelementptr inbounds <8 x half>, <8 x half>* %dst, i32 0, i32 0 |
| 144 | + store half %v0, half* %e0, align 4 |
| 145 | + %e7 = getelementptr inbounds <8 x half>, <8 x half>* %dst, i32 0, i32 7 |
| 146 | + store half %v7, half* %e7, align 4 |
| 147 | + %result = load <8 x half>, <8 x half>* %dst, align 4 |
| 148 | + ret <8 x half> %result |
| 149 | +} |
0 commit comments