Skip to content

KernelIntrinsics#403

Open
christiangnrd wants to merge 6 commits intoJuliaGPU:masterfrom
christiangnrd:intrinsics
Open

KernelIntrinsics#403
christiangnrd wants to merge 6 commits intoJuliaGPU:masterfrom
christiangnrd:intrinsics

Conversation

@christiangnrd
Copy link
Member

No description provided.

@github-actions
Copy link
Contributor

github-actions bot commented Nov 18, 2025

Your PR requires formatting changes to meet the project's style guidelines.
Please consider running Runic (git runic master) to apply these changes.

Click here to view the suggested changes.
diff --git a/lib/cl/device.jl b/lib/cl/device.jl
index 64bb1f9..cdfa4d5 100644
--- a/lib/cl/device.jl
+++ b/lib/cl/device.jl
@@ -315,7 +315,7 @@ function sub_group_size(d::Device)
     end
 end
 function sub_group_shuffle_supported_types(d::Device)
-    if "cl_khr_subgroup_shuffle" in d.extensions
+    return if "cl_khr_subgroup_shuffle" in d.extensions
         res = [Int8, UInt8, Int16, UInt16, Int32, UInt32, Int64, UInt64, Float32]
         "cl_khr_fp16" in d.extensions && push!(res, Float16)
         "cl_khr_fp64" in d.extensions && push!(res, Float64)
diff --git a/lib/intrinsics/src/synchronization.jl b/lib/intrinsics/src/synchronization.jl
index 7d88b69..c8abe6d 100644
--- a/lib/intrinsics/src/synchronization.jl
+++ b/lib/intrinsics/src/synchronization.jl
@@ -156,7 +156,9 @@ export barrier, work_group_barrier, sub_group_barrier
                     MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags))
 
 @inline sub_group_barrier(flags, scope = memory_scope_sub_group) =
-    control_barrier(Scope.Subgroup, cl_scope_to_spirv(scope),
-                    MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags))
+    control_barrier(
+    Scope.Subgroup, cl_scope_to_spirv(scope),
+    MemorySemantics.SequentiallyConsistent | mem_fence_flags_to_semantics(flags)
+)
 
 barrier(flags) = work_group_barrier(flags)
diff --git a/src/OpenCLKernels.jl b/src/OpenCLKernels.jl
index 626b7fa..4dcfbdc 100644
--- a/src/OpenCLKernels.jl
+++ b/src/OpenCLKernels.jl
@@ -129,9 +129,9 @@ end
 
 KI.argconvert(::OpenCLBackend, arg) = kernel_convert(arg)
 
-function KI.kernel_function(::OpenCLBackend, f::F, tt::TT=Tuple{}; name = nothing, kwargs...) where {F,TT}
+function KI.kernel_function(::OpenCLBackend, f::F, tt::TT = Tuple{}; name = nothing, kwargs...) where {F, TT}
     kern = clfunction(f, tt; name, kwargs...)
-    KI.Kernel{OpenCLBackend, typeof(kern)}(OpenCLBackend(), kern)
+    return KI.Kernel{OpenCLBackend, typeof(kern)}(OpenCLBackend(), kern)
 end
 
 function (obj::KI.Kernel{OpenCLBackend})(args...; numworkgroups = 1, workgroupsize = 1)
@@ -147,15 +147,15 @@ function (obj::KI.Kernel{OpenCLBackend})(args...; numworkgroups = 1, workgroupsi
 end
 
 
-function KI.kernel_max_work_group_size(kernel::KI.Kernel{<:OpenCLBackend}; max_work_items::Int=typemax(Int))::Int
+function KI.kernel_max_work_group_size(kernel::KI.Kernel{<:OpenCLBackend}; max_work_items::Int = typemax(Int))::Int
     wginfo = cl.work_group_info(kernel.kern.fun, cl.device())
-    Int(min(wginfo.size, max_work_items))
+    return Int(min(wginfo.size, max_work_items))
 end
 function KI.max_work_group_size(::OpenCLBackend)::Int
-    Int(cl.device().max_work_group_size)
+    return Int(cl.device().max_work_group_size)
 end
 function KI.multiprocessor_count(::OpenCLBackend)::Int
-    Int(cl.device().max_compute_units)
+    return Int(cl.device().max_compute_units)
 end
 
 ## Indexing Functions
diff --git a/test/intrinsics.jl b/test/intrinsics.jl
index 407faec..bac459c 100644
--- a/test/intrinsics.jl
+++ b/test/intrinsics.jl
@@ -166,82 +166,82 @@ end
     @test call_on_device(OpenCL.mad, x, y, z) ≈ x * y + z
 end
 
-if cl.sub_groups_supported(cl.device())
-
-struct SubgroupData
-    sub_group_size::UInt32
-    max_sub_group_size::UInt32
-    num_sub_groups::UInt32
-    sub_group_id::UInt32
-    sub_group_local_id::UInt32
-end
-function test_subgroup_kernel(results)
-    i = get_global_id(1)
-
-    if i <= length(results)
-        @inbounds results[i] = SubgroupData(
-            get_sub_group_size(),
-            get_max_sub_group_size(),
-            get_num_sub_groups(),
-            get_sub_group_id(),
-            get_sub_group_local_id()
-        )
-    end
-    return
-end
-
-@testset "Sub-groups" begin
-    sg_size = cl.sub_group_size(cl.device())
-
-    @testset "Indexing intrinsics" begin
-        # Test with small kernel
-        sg_n = 2
-        local_size = sg_size * sg_n
-        numworkgroups = 2
-        N = local_size * numworkgroups
-
-        results = CLVector{SubgroupData}(undef, N)
-        kernel = @opencl launch = false test_subgroup_kernel(results)
-
-        kernel(results; local_size, global_size=N)
-
-        host_results = Array(results)
-
-        # Verify results make sense
-        for (i, sg_data) in enumerate(host_results)
-            @test sg_data.sub_group_size == sg_size
-            @test sg_data.max_sub_group_size == sg_size
-            @test sg_data.num_sub_groups == sg_n
-
-            # Group ID should be 1-based
-            expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
-            @test sg_data.sub_group_id == expected_sub_group
-
-            # Local ID should be 1-based within group
-            expected_sg_local = ((i - 1) % sg_size) + 1
-            @test sg_data.sub_group_local_id == expected_sg_local
-        end
-    end
-
-    @testset "shuffle idx" begin
-        function shfl_idx_kernel(d)
-            i = get_sub_group_local_id()
-            j = get_sub_group_size() - i + 1
-
-            d[i] = sub_group_shuffle(d[i], j)
-
-            return
-        end
-
-        @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
-            a = rand(T, sg_size)
-            d_a = CLArray(a)
-            @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
-            @test Array(d_a) == reverse(a)
-        end
-    end
-end
-end # if cl.sub_groups_supported(cl.device())
+        if cl.sub_groups_supported(cl.device())
+
+            struct SubgroupData
+                sub_group_size::UInt32
+                max_sub_group_size::UInt32
+                num_sub_groups::UInt32
+                sub_group_id::UInt32
+                sub_group_local_id::UInt32
+            end
+            function test_subgroup_kernel(results)
+                i = get_global_id(1)
+
+                if i <= length(results)
+                    @inbounds results[i] = SubgroupData(
+                        get_sub_group_size(),
+                        get_max_sub_group_size(),
+                        get_num_sub_groups(),
+                        get_sub_group_id(),
+                        get_sub_group_local_id()
+                    )
+                end
+                return
+            end
+
+            @testset "Sub-groups" begin
+                sg_size = cl.sub_group_size(cl.device())
+
+                @testset "Indexing intrinsics" begin
+                    # Test with small kernel
+                    sg_n = 2
+                    local_size = sg_size * sg_n
+                    numworkgroups = 2
+                    N = local_size * numworkgroups
+
+                    results = CLVector{SubgroupData}(undef, N)
+                    kernel = @opencl launch = false test_subgroup_kernel(results)
+
+                    kernel(results; local_size, global_size = N)
+
+                    host_results = Array(results)
+
+                    # Verify results make sense
+                    for (i, sg_data) in enumerate(host_results)
+                        @test sg_data.sub_group_size == sg_size
+                        @test sg_data.max_sub_group_size == sg_size
+                        @test sg_data.num_sub_groups == sg_n
+
+                        # Group ID should be 1-based
+                        expected_sub_group = div(((i - 1) % local_size), sg_size) + 1
+                        @test sg_data.sub_group_id == expected_sub_group
+
+                        # Local ID should be 1-based within group
+                        expected_sg_local = ((i - 1) % sg_size) + 1
+                        @test sg_data.sub_group_local_id == expected_sg_local
+                    end
+                end
+
+                @testset "shuffle idx" begin
+                    function shfl_idx_kernel(d)
+                        i = get_sub_group_local_id()
+                        j = get_sub_group_size() - i + 1
+
+                        d[i] = sub_group_shuffle(d[i], j)
+
+                        return
+                    end
+
+                    @testset for T in cl.sub_group_shuffle_supported_types(cl.device())
+                        a = rand(T, sg_size)
+                        d_a = CLArray(a)
+                        @opencl local_size = sg_size global_size = sg_size shfl_idx_kernel(d_a)
+                        @test Array(d_a) == reverse(a)
+                    end
+                end
+            end
+        end # if cl.sub_groups_supported(cl.device())
 
 @testset "SIMD - $N x $T" for N in simd_ns, T in float_types
     # codegen emits i48 here, which SPIR-V doesn't support

@codecov
Copy link

codecov bot commented Nov 18, 2025

Codecov Report

❌ Patch coverage is 90.00000% with 3 lines in your changes missing coverage. Please review.
✅ Project coverage is 82.46%. Comparing base (a1f79db) to head (de7c136).

Files with missing lines Patch % Lines
src/OpenCLKernels.jl 88.88% 2 Missing ⚠️
src/compiler/compilation.jl 91.66% 1 Missing ⚠️
Additional details and impacted files
@@            Coverage Diff             @@
##           master     #403      +/-   ##
==========================================
+ Coverage   80.84%   82.46%   +1.62%     
==========================================
  Files          12       11       -1     
  Lines         736      730       -6     
==========================================
+ Hits          595      602       +7     
+ Misses        141      128      -13     

☔ View full report in Codecov by Sentry.
📢 Have feedback on the report? Share it here.

🚀 New features to boost your workflow:
  • ❄️ Test Analytics: Detect flaky tests, report on failures, and find test suite problems.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant