Skip to content

Commit 4b7e99c

Browse files
ssjiaSS-JIA
authored andcommitted
[ET-VK] Modernize constant_pad_nd
Modernize constant_pad_nd to support ANY_STORAGE (both buffer and texture). Migrate shaders to BufferMetadata/TextureMetadata with indexing.glslh and unify dispatch into a single add_constant_pad_nd_node function using DynamicDispatchNode. Pull Request resolved: pytorch#18051 ghstack-source-id: 353546682 @exported-using-ghexport Differential Revision: [D95970168](https://our.internmc.facebook.com/intern/diff/D95970168/)
1 parent 13a7c05 commit 4b7e99c

9 files changed

Lines changed: 193 additions & 185 deletions

File tree

backends/vulkan/op_registry.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1298,8 +1298,9 @@ def register_arange():
12981298
@update_features(exir_ops.edge.aten.constant_pad_nd.default)
12991299
def register_constant_pad_nd():
13001300
return OpFeatures(
1301-
inputs_storage=utils.CHANNELS_PACKED_TEXTURE,
1301+
inputs_storage=utils.ANY_STORAGE,
13021302
inputs_dtypes=utils.FP_INT_BOOL_T,
1303+
supports_resize=True,
13031304
)
13041305

13051306

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
${define_required_extensions("buffer", DTYPE)}
12+
13+
#define PRECISION ${PRECISION}
14+
15+
#define T ${buffer_scalar_type(DTYPE)}
16+
17+
${define_active_storage_type("buffer")}
18+
19+
#extension GL_EXT_control_flow_attributes : require
20+
21+
layout(std430) buffer;
22+
23+
#include "indexing.glslh"
24+
25+
${layout_declare_tensor(B, "w", "t_out", DTYPE, "buffer")}
26+
${layout_declare_tensor(B, "r", "t_in", DTYPE, "buffer")}
27+
28+
${layout_declare_ubo(B, "BufferMetadata", "outp")}
29+
${layout_declare_ubo(B, "BufferMetadata", "inp")}
30+
${layout_declare_ubo(B, "ivec4", "pad_per_dim")}
31+
${layout_declare_ubo(B, "float", "fill_value")}
32+
33+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
34+
35+
${layout_declare_spec_const(C, "int", "out_layout", "CONTIG_LAYOUT_INT")}
36+
37+
void main() {
38+
const uint out_bufi = gl_GlobalInvocationID.x;
39+
if (out_of_bounds(out_bufi, outp)) {
40+
return;
41+
}
42+
43+
TensorIndex out_tidx = linear_idx_to_tensor_idx(outp, out_bufi, out_layout);
44+
45+
// Subtract pad offsets per dimension to get input tensor index.
46+
// Unsigned underflow (when output index < pad offset) wraps to a large
47+
// value that fails the out_of_bounds check below.
48+
TensorIndex in_tidx = out_tidx;
49+
[[unroll]] for (int d = 0; d < 4; d++) {
50+
in_tidx.data[0][d] -= uint(pad_per_dim[d]);
51+
}
52+
53+
if (out_of_bounds(in_tidx, inp)) {
54+
t_out[out_bufi] = T(fill_value);
55+
return;
56+
}
57+
58+
const uint in_bufi = tensor_idx_to_linear_idx(inp, in_tidx);
59+
t_out[out_bufi] = t_in[in_bufi];
60+
}

backends/vulkan/runtime/graph/ops/glsl/pad_channel.yaml renamed to backends/vulkan/runtime/graph/ops/glsl/pad_buffer.yaml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
pad_channel:
1+
pad_buffer:
22
parameter_names_with_default_values:
3-
NDIM: 3
43
DTYPE: float
5-
PACKING: C_packed
6-
STORAGE: texture3d
74
generate_variant_forall:
85
DTYPE:
96
- VALUE: float
107
- VALUE: half
118
- VALUE: int32
129
- VALUE: uint8
1310
shader_variants:
14-
- NAME: pad_channel
11+
- NAME: pad_buffer

backends/vulkan/runtime/graph/ops/glsl/pad_channel.glsl

Lines changed: 0 additions & 80 deletions
This file was deleted.

backends/vulkan/runtime/graph/ops/glsl/pad_height_width.glsl

Lines changed: 0 additions & 50 deletions
This file was deleted.
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#version 450 core
10+
11+
${define_required_extensions("texture3d", DTYPE)}
12+
13+
#define PRECISION ${PRECISION}
14+
15+
#define VEC4_T ${texel_load_type(DTYPE, "texture3d")}
16+
#define T ${texel_load_component_type(DTYPE, "texture3d")}
17+
18+
${define_active_storage_type("texture3d")}
19+
20+
#extension GL_EXT_control_flow_attributes : require
21+
22+
layout(std430) buffer;
23+
24+
#include "common.glslh"
25+
#include "indexing.glslh"
26+
27+
${layout_declare_tensor(B, "w", "t_out", DTYPE, "texture3d")}
28+
${layout_declare_tensor(B, "r", "t_in", DTYPE, "texture3d")}
29+
30+
${layout_declare_ubo(B, "TextureMetadata", "outp")}
31+
${layout_declare_ubo(B, "TextureMetadata", "inp")}
32+
${layout_declare_ubo(B, "ivec4", "pad_per_dim")}
33+
${layout_declare_ubo(B, "float", "fill_value")}
34+
35+
layout(local_size_x_id = 0, local_size_y_id = 1, local_size_z_id = 2) in;
36+
37+
${layout_declare_spec_const(C, "int", "out_layout", "CONTIG_LAYOUT_INT")}
38+
const int packed_dim = get_packed_dim(out_layout);
39+
40+
void main() {
41+
const ivec3 out_pos = ivec3(gl_GlobalInvocationID);
42+
43+
if (out_of_bounds(out_pos, outp)) {
44+
return;
45+
}
46+
47+
// Convert the thread position to output tensor indices in element space.
48+
// out_tidx.data[packed_dim] is the element index of the first component in
49+
// this texel; the remaining three dims are scalar element indices.
50+
TensorIndex4D out_tidx = texture_pos_to_tensor4d_idx_simple(outp, out_pos);
51+
52+
// Tail texels may have fewer than 4 valid elements; leave extras as 0.
53+
const int limit =
54+
min(4, outp.sizes[packed_dim] - out_tidx.data[packed_dim]);
55+
56+
VEC4_T out_texel = VEC4_T(0);
57+
58+
// Process each of the (up to 4) elements in this output texel independently.
59+
// For each element: subtract pad offsets to obtain the input element index,
60+
// then copy from the input if in-bounds or write fill_value if in the padding
61+
// region.
62+
[[unroll]] for (int comp = 0; comp < limit; comp++) {
63+
TensorIndex4D in_tidx = out_tidx;
64+
in_tidx.data[packed_dim] += comp;
65+
in_tidx.data[0] -= pad_per_dim[0];
66+
in_tidx.data[1] -= pad_per_dim[1];
67+
in_tidx.data[2] -= pad_per_dim[2];
68+
in_tidx.data[3] -= pad_per_dim[3];
69+
70+
// Signed underflow (output index < pad) produces a negative value that
71+
// fails the >= 0 check, correctly identifying the padding region.
72+
if (in_tidx.data[0] >= 0 && in_tidx.data[0] < inp.sizes[0] &&
73+
in_tidx.data[1] >= 0 && in_tidx.data[1] < inp.sizes[1] &&
74+
in_tidx.data[2] >= 0 && in_tidx.data[2] < inp.sizes[2] &&
75+
in_tidx.data[3] >= 0 && in_tidx.data[3] < inp.sizes[3]) {
76+
TextureElementIndex elem =
77+
tensor4d_idx_to_texture_element_idx_simple(inp, in_tidx);
78+
VEC4_T in_texel = texelFetch(t_in, elem.pos, 0);
79+
out_texel[comp] = T(in_texel[elem.comp]);
80+
} else {
81+
out_texel[comp] = T(fill_value);
82+
}
83+
}
84+
85+
imageStore(t_out, out_pos, out_texel);
86+
}

backends/vulkan/runtime/graph/ops/glsl/pad_height_width.yaml renamed to backends/vulkan/runtime/graph/ops/glsl/pad_texture.yaml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,11 @@
1-
pad_height_width:
1+
pad_texture:
22
parameter_names_with_default_values:
3-
NDIM: 3
43
DTYPE: float
5-
PACKING: C_packed
6-
STORAGE: texture3d
74
generate_variant_forall:
85
DTYPE:
96
- VALUE: float
107
- VALUE: half
118
- VALUE: int32
129
- VALUE: uint8
1310
shader_variants:
14-
- NAME: pad_height_width
11+
- NAME: pad_texture3d

0 commit comments

Comments
 (0)