KhronosGroup · gpx1000 · Oct 29, 2025 · Oct 30, 2025 · Dec 31, 2025 · Dec 31, 2025
diff --git a/antora/modules/ROOT/nav.adoc b/antora/modules/ROOT/nav.adoc
@@ -1,6 +1,6 @@
 ////
-- Copyright (c) 2023-2025, Holochip Inc
-- Copyright (c) 2023-2025, Sascha Willems
+- Copyright (c) 2023-2026, Holochip Inc
+- Copyright (c) 2023-2026, Sascha Willems
 - Copyright (c) 2025, Arm Limited and Contributors
 -
 - SPDX-License-Identifier: Apache-2.0
@@ -84,6 +84,7 @@
 *** xref:samples/extensions/hpp_push_descriptors/README.adoc[Push descriptors (Vulkan-Hpp)]
 ** xref:samples/extensions/ray_tracing_basic/README.adoc[Raytracing basic]
 ** xref:samples/extensions/ray_tracing_extended/README.adoc[Raytracing extended]
+** xref:samples/extensions/ray_tracing_invocation_reorder/README.adoc[Ray tracing invocation reorder (SER)]
 ** xref:samples/extensions/ray_queries/README.adoc[Ray queries]
 ** xref:samples/extensions/ray_tracing_reflection/README.adoc[Ray tracing reflection]
 ** xref:samples/extensions/ray_tracing_position_fetch/README.adoc[Ray tracing position fetch]

diff --git a/framework/vulkan_type_mapping.h b/framework/vulkan_type_mapping.h
@@ -1,5 +1,5 @@
 /* Copyright (c) 2025, Arm Limited and Contributors
- * Copyright (c) 2024-2025, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2024-2026, NVIDIA CORPORATION. All rights reserved.
  *
  * SPDX-License-Identifier: Apache-2.0
  *
@@ -243,6 +243,20 @@ struct HPPType<VkPhysicalDeviceRayTracingPipelineFeaturesKHR>
 	using Type = vk::PhysicalDeviceRayTracingPipelineFeaturesKHR;
 };
 
+#ifdef VK_EXT_ray_tracing_invocation_reorder
+template <>
+struct HPPType<VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT>
+{
+	using Type = vk::PhysicalDeviceRayTracingInvocationReorderFeaturesEXT;
+};
+#endif
+
+template <>
+struct HPPType<VkPhysicalDeviceRayTracingInvocationReorderFeaturesNV>
+{
+	using Type = vk::PhysicalDeviceRayTracingInvocationReorderFeaturesNV;
+};
+
 template <>
 struct HPPType<VkPhysicalDeviceRayTracingPositionFetchFeaturesKHR>
 {

diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2025, Arm Limited and Contributors
+# Copyright (c) 2019-2026, Arm Limited and Contributors
 #
 # SPDX-License-Identifier: Apache-2.0
 #
@@ -75,6 +75,7 @@ set(ORDER_LIST
     "ray_tracing_basic"
     "ray_tracing_extended"
     "ray_tracing_reflection"
+    "ray_tracing_invocation_reorder"
     "timeline_semaphore"
     "shader_object"
     "shader_debugprintf"

diff --git a/samples/extensions/README.adoc b/samples/extensions/README.adoc
@@ -1,6 +1,6 @@
 ////
 - Copyright (c) 2025, Arm Limited and Contributors
-- Copyright (c) 2021-2025, The Khronos Group
+- Copyright (c) 2021-2026, The Khronos Group
 -
 - SPDX-License-Identifier: Apache-2.0
 -
@@ -312,3 +312,9 @@ Demonstrate how to build data graph pipelines and execute neural networks:
 
 * xref:./{extension_samplespath}tensor_and_data_graph/simple_tensor_and_data_graph/README.adoc[simple_tensor_and_data_graph]
 - Explains how to set up and execute a simple neural network using a data graph pipeline.
+
+=== xref:./{extension_samplespath}ray_tracing_invocation_reorder/README.adoc[Ray Tracing Invocation Reorder]
+
+*Extensions:* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[`VK_EXT_ray_tracing_invocation_reorder`]
+
+Demonstrate how to optimize ray tracing pipelines by reordering the invocation order.
diff --git a/samples/extensions/ray_tracing_invocation_reorder/CMakeLists.txt b/samples/extensions/ray_tracing_invocation_reorder/CMakeLists.txt
@@ -0,0 +1,46 @@
+# Copyright (c) 2025-2026, Holochip Inc.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 the "License";
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME)
+get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH)
+get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME)
+
+add_sample_with_tags(
+    ID ${FOLDER_NAME}
+    CATEGORY ${CATEGORY_NAME}
+    AUTHOR "Holochip Inc."
+    NAME "Ray tracing invocation reorder"
+    DESCRIPTION "Demonstrates Shader Execution Reordering (SER) using VK_EXT_ray_tracing_invocation_reorder to reduce divergence"
+    # Note: We do not compile GLSL here because glslc may lack GL_EXT_shader_invocation_reorder.
+    # load_shader() will look under shaders/<sample>/glsl, so we copy Slang SPV there (see custom target below).
+    # Keep Slang shaders available for alternative toolchains
+    SHADER_FILES_SLANG
+        "ray_tracing_invocation_reorder/slang/raygen.rgen.slang"
+        "ray_tracing_invocation_reorder/slang/miss.rmiss.slang"
+    )
+
+# Copy Slang-compiled SPIR-V to the GLSL directory so load_shader() can find them
+# This mirrors the pathing used by ray_tracing_extended (GLSL folder) while keeping Slang as the compiler.
+add_custom_target(${FOLDER_NAME}-copy-slang-to-glsl ALL
+    DEPENDS ${FOLDER_NAME}-SLANG
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${PROJECT_SOURCE_DIR}/shaders/${FOLDER_NAME}/glsl
+    COMMAND ${CMAKE_COMMAND} -E copy_if_different
+            ${PROJECT_SOURCE_DIR}/shaders/${FOLDER_NAME}/slang/raygen.rgen.spv
+            ${PROJECT_SOURCE_DIR}/shaders/${FOLDER_NAME}/glsl/raygen.rgen.spv
+)
+
+# Also copy the GLSL SPIR-V from ray_tracing_extended to ensure shader parity
+# Do not reuse `ray_tracing_extended` SPIR-V. Keep local shader copies for this sample only.
diff --git a/samples/extensions/ray_tracing_invocation_reorder/README.adoc b/samples/extensions/ray_tracing_invocation_reorder/README.adoc
@@ -0,0 +1,261 @@
+////
+- Copyright (c) 2025-2026, Holochip Inc.
+-
+- SPDX-License-Identifier: Apache-2.0
+-
+- Licensed under the Apache License, Version 2.0 the "License";
+- you may not use this file except in compliance with the License.
+- You may obtain a copy of the License at
+-
+-     http://www.apache.org/licenses/LICENSE-2.0
+-
+- Unless required by applicable law or agreed to in writing, software
+- distributed under the License is distributed on an "AS IS" BASIS,
+- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+- See the License for the specific language governing permissions and
+- limitations under the License.
+-
+////
+
+= Shader Execution Reordering (SER) for Ray Tracing
+
+ifdef::site-gen-antora[]
+TIP: The source for this sample can be found in the https://github.com/KhronosGroup/Vulkan-Samples/tree/main/samples/extensions/ray_tracing_invocation_reorder[Khronos Vulkan samples github repository].
+endif::[]
+
+*Extensions*: https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[`VK_EXT_ray_tracing_invocation_reorder`]
+
+*GLSL Extensions*: https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GLSL_EXT_shader_invocation_reorder.txt[`GL_EXT_shader_invocation_reorder`]
+
+== Overview
+
+This sample demonstrates *Shader Execution Reordering (SER)*, a powerful optimization technique for ray tracing that reduces performance issues caused by divergence. SER allows you to reorganize shader invocations across the GPU to group similar work together, significantly improving coherency and performance.
+
+The sample shows how to use the `VK_EXT_ray_tracing_invocation_reorder` extension with hit objects and the `reorderThreadEXT()` function to achieve performance improvements of 20-50% or more in ray tracing workloads.
+
+NOTE: This sample requires a recent Vulkan SDK (1.4.304.0 or later) with support for `GL_EXT_shader_invocation_reorder` in the shader compiler. The extension was finalized in late 2024 and may not be available in older SDK versions.
+
+TIP: This sample includes both GLSL and **Slang** shader implementations. If you experience SDK compatibility issues with GLSL shaders, the Slang shaders provide a fully functional alternative and are recommended for the best compatibility and ease of use.
+
+== The Divergence Problem
+
+Ray tracing faces two major performance challenges:
+
+=== Control Flow Divergence
+
+GPUs execute shader code in parallel on groups of invocations (subgroups, typically 32 or 64 threads). When invocations in the same subgroup take different code paths—such as invoking different shaders or executing different branches—the GPU must serialize execution, with active invocations waiting for inactive ones to finish.
+
+In ray tracing, this commonly occurs when:
+
+* Adjacent rays hit different objects and invoke different closest-hit shaders
+* Some rays miss while others hit geometry
+* Rays terminate at different bounce depths
+
+=== Data Divergence
+
+When rays become incoherent, they access scattered memory locations for geometry data, textures, and acceleration structures. This leads to:
+
+* Poor cache utilization
+* Increased memory bandwidth requirements
+* Stalls waiting for memory subsystems
+
+== How Shader Execution Reordering Helps
+
+SER addresses these issues by introducing *hit objects* that separate ray traversal from shader invocation, allowing the GPU to pause execution and reorder invocations:
+
+[source,glsl]
+----
+// Traditional approach: traverse and invoke shaders in one call
+traceRayEXT(topLevelAS, rayFlags, cullMask, sbtOffset, sbtStride, 
+            missIndex, rayOrigin, rayTMin, rayDirection, rayTMax, payloadIndex);
+
+// SER approach: separate traversal from shader invocation
+hitObjectEXT hitObj;
+hitObjectRecordEmptyEXT(hitObj);
+
+// Step 1: Traverse acceleration structure
+hitObjectTraceRayEXT(hitObj, topLevelAS, rayFlags, cullMask,
+                     sbtOffset, sbtStride, missIndex,
+                     rayOrigin, rayTMin, rayDirection, rayTMax, payloadIndex);
+
+// Step 2: Reorder invocations for better coherency
+reorderThreadEXT(hitObj);
+
+// Step 3: Invoke the miss or closest-hit shader
+hitObjectExecuteShaderEXT(hitObj, payloadIndex);
+----
+
+The same concepts apply in **Slang** with HLSL-style syntax:
+
+[source,slang]
+----
+// Traditional approach: traverse and invoke shaders in one call
+TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 0, 0, 0, ray, payload);
+
+// SER approach: separate traversal from shader invocation
+RayDesc ray;
+ray.Origin = origin.xyz;
+ray.Direction = direction.xyz;
+ray.TMin = tmin;
+ray.TMax = tmax;
+
+// Step 1: Trace ray and store hit information in hit object
+HitObject hitObj = HitObject::TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, 
+                                       0, 0, 0, ray, payload);
+
+// Step 2: Reorder invocations for better coherency
+ReorderThread(hitObj);
+
+// Step 3: Execute the miss or closest-hit shader
+HitObject::Invoke(topLevelAS, hitObj, payload);
+----
+
+By calling `reorderThreadEXT()` (GLSL) or `ReorderThread()` (Slang), the GPU can:
+
+* Group invocations that will execute the same shader
+* Organize invocations accessing similar data
+* Reduce overall divergence and improve cache efficiency
+
+== Using Coherence Hints
+
+For even better performance, you can provide hints to guide the reordering:
+
+[source,glsl]
+----
+// Reorder with a coherence hint
+uint hint = 0;
+if (hitObjectIsHitEXT(hitObj))
+{
+    hint = hitObjectGetInstanceIdEXT(hitObj);
+}
+reorderThreadEXT(hitObj, hint, 8);  // Use 8 bits for the hint
+----
+
+The GPU sorts invocations by:
+
+1. *Shader ID* (highest priority - which shader will execute)
+2. *Your hint* (middle priority - custom application-specific data)
+3. *Implementation-specific data* (lowest priority)
+
+Good coherence hints include:
+
+* Material IDs or flags that affect control flow
+* Texture binding indices for similar data access
+* Early-exit conditions (e.g., path length, Russian Roulette)
+
+== Hit Objects Without Reordering
+
+Even if you don't need reordering, hit objects provide valuable functionality:
+
+* *Shadow/AO rays*: Skip shader invocation entirely with `hitObjectIsHitEXT()` or `hitObjectIsMissEXT()`
+* *Flexible payloads*: Use different payload types for traversal vs. shader invocation
+* *Direct hit access*: Query hit information (positions, normals, matrices) at the ray generation level
+
+== Best Practices
+
+=== When to Use SER
+
+SER provides the biggest benefits when you have:
+
+* *Path tracing* with multiple bounces and material diversity
+* *Multiple closest-hit shaders* representing different materials
+* *Secondary, scattered rays* (e.g., rough reflections)
+* *Stochastic effects* creating natural divergence
+
+SER may not help as much with:
+
+* Highly coherent primary rays
+* Simple shaders with minimal divergence
+* Single übershaders with minimal branching
+
+=== Minimizing Live State
+
+When `reorderThreadEXT()` is called, the GPU must save and restore the invocation's local variables (live state). To maximize performance:
+
+* Avoid keeping variables live across the `reorderThreadEXT()` call
+* Use smaller data types (FP16 instead of FP32 where appropriate)
+* Pack flags and enums into bit fields
+* Audit your ray payloads to remove unnecessary fields
+
+=== Device Support
+
+The extension has backwards-compatibility built in:
+
+* On devices with hardware SER support, `reorderThreadEXT()` actively reorders invocations
+* On older devices, `reorderThreadEXT()` becomes a no-op, but hit objects still work
+* Query `VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT` to check support:
+
+[source,cpp]
+----
+VkPhysicalDeviceRayTracingInvocationReorderPropertiesEXT serProperties{};
+serProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_INVOCATION_REORDER_PROPERTIES_EXT;
+
+VkPhysicalDeviceProperties2 deviceProperties{};
+deviceProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
+deviceProperties.pNext = &serProperties;
+
+vkGetPhysicalDeviceProperties2(physicalDevice, &deviceProperties);
+
+bool canReorder = (serProperties.rayTracingInvocationReorderReorderingHint == 
+                   VK_RAY_TRACING_INVOCATION_REORDER_MODE_REORDER_EXT);
+----
+
+== This Sample
+
+This sample demonstrates SER with an interactive comparison:
+
+* *Three material types* (metal, diffuse, glass) that create control flow divergence
+* *Toggle SER on/off* to see the performance difference
+* *Coherence hints* based on instance ID (can be toggled)
+* *Real-time UI* showing whether the device supports reordering
+
+The scene is intentionally designed to maximize divergence when SER is disabled, showing the benefits of reordering when enabled.
+
+=== Key Features
+
+* Enable/disable SER dynamically via UI
+* Toggle coherence hints to see their impact
+* Compare traditional `traceRayEXT()` vs. hit objects + `reorderThreadEXT()`
+* Device capability detection and display
+
+== Enabling the Extension
+
+To use SER in your application:
+
+[source,cpp]
+----
+// Enable the extension
+add_device_extension(VK_EXT_RAY_TRACING_INVOCATION_REORDER_EXTENSION_NAME);
+
+// Request the feature
+REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceRayTracingInvocationReorderFeaturesEXT, 
+                         rayTracingInvocationReorder);
+----
+
+In GLSL shaders:
+
+[source,glsl]
+----
+#extension GL_EXT_shader_invocation_reorder : enable
+----
+
+== Performance Expectations
+
+Real-world applications have seen:
+
+* *11-24%* improvement in path tracing (with live state optimization)
+* *40-50%* in synthetic benchmarks with high divergence
+* *30-40%* when combined with other optimizations (e.g., Opacity Micromaps)
+
+The actual gain depends on:
+
+* Scene complexity and material diversity
+* Amount of control flow and data divergence
+* Quality of coherence hints
+* Live state size
+
+== Resources
+
+* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_ray_tracing_invocation_reorder.html[VK_EXT_ray_tracing_invocation_reorder specification]
+* https://github.com/KhronosGroup/GLSL/blob/master/extensions/ext/GLSL_EXT_shader_invocation_reorder.txt[GL_EXT_shader_invocation_reorder specification]
+* https://github.com/microsoft/DirectX-Specs/blob/master/d3d/Raytracing.md#shader-execution-reordering[DirectX Shader Execution Reordering documentation]