PERF: Remove Numba union-find kernel in favor of SciPy fallback

sharifhsn · claude · sharifhsn · commit eed58ad2ddf8 · 2026-03-17T11:56:46.000-04:00
Remove _union and _st_fused_ccl Numba functions. End-to-end benchmarks
show only ~10% speedup over the SciPy connected-components path, which
doesn't justify ~100 lines of Numba-specific code. The SciPy path
provides ~5x speedup over the original BFS without requiring Numba.

A permalink to the removed Numba kernel is preserved in a code comment
for future reference.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/mne/stats/cluster_level.py b/mne/stats/cluster_level.py
@@ -124,98 +124,6 @@ def _sum_cluster_data(data, tstep):
     return np.sign(data) * tstep
 
 
-@jit(inline="always")
-def _union(a_pos, b_pos, parent, rank):
-    """Find roots with path compression and union by rank."""
-    ra = a_pos
-    while parent[ra] != ra:
-        parent[ra] = parent[parent[ra]]
-        ra = parent[ra]
-    rb = b_pos
-    while parent[rb] != rb:
-        parent[rb] = parent[parent[rb]]
-        rb = parent[rb]
-    if ra != rb:
-        if rank[ra] < rank[rb]:
-            parent[ra] = rb
-        elif rank[ra] > rank[rb]:
-            parent[rb] = ra
-        else:
-            parent[rb] = ra
-            rank[ra] += 1
-
-
-@jit()
-def _st_fused_ccl(
-    active_idx, n_active, flat_to_active, adj_indptr, adj_indices, n_src, max_step
-):
-    """Label connected components among supra-threshold vertices via union-find.
-
-    Replaces the Python BFS in ``_get_clusters_st`` with a single-pass
-    union-find (disjoint-set) algorithm over spatial and temporal neighbors.
-    Data is organized as ``n_times x n_src``; spatial adjacency is stored in
-    CSR format (``adj_indptr``/``adj_indices``), and temporal neighbors are
-    the same source vertex at up to ``max_step`` earlier time points.
-
-    Each active vertex starts as its own component. A linear scan unions each
-    vertex with its active spatial and temporal neighbors. Path compression
-    and union-by-rank keep the amortized cost per union nearly O(1), making
-    the full pass O(n * alpha(n)) where alpha is the inverse Ackermann
-    function. The main practical speedup comes from running entirely inside
-    a single Numba-compiled function, eliminating the per-vertex
-    Python/Numba boundary crossings of the BFS approach.
-    """
-    # Union-find / disjoint-set forest:
-    # https://en.wikipedia.org/wiki/Disjoint-set_data_structure
-    # build flat→active mapping
-    for i in range(n_active):
-        flat_to_active[active_idx[i]] = i
-
-    parent = np.arange(n_active)
-    rank = np.zeros(n_active, dtype=np.int32)
-
-    for a_pos in range(n_active):
-        flat_i = active_idx[a_pos]
-        t_i = flat_i // n_src
-        s_i = flat_i - t_i * n_src
-
-        # spatial neighbors
-        for j_ptr in range(adj_indptr[s_i], adj_indptr[s_i + 1]):
-            s_j = adj_indices[j_ptr]
-            flat_j = t_i * n_src + s_j
-            b_pos = flat_to_active[flat_j]
-            if b_pos >= 0:
-                _union(a_pos, b_pos, parent, rank)
-
-        # temporal neighbors (same vertex, previous time steps)
-        for step in range(1, max_step + 1):
-            if t_i >= step:
-                flat_j = (t_i - step) * n_src + s_i
-                b_pos = flat_to_active[flat_j]
-                if b_pos >= 0:
-                    _union(a_pos, b_pos, parent, rank)
-
-    # final path compression + relabel to 0..n_components-1
-    label_map = -np.ones(n_active, dtype=np.intp)
-    next_label = np.intp(0)
-    components = np.empty(n_active, dtype=np.intp)
-    for i in range(n_active):
-        a = i
-        while parent[a] != a:
-            a = parent[a]
-        parent[i] = a
-        if label_map[a] == -1:
-            label_map[a] = next_label
-            next_label += 1
-        components[i] = label_map[a]
-
-    # clean up flat_to_active for next call
-    for i in range(n_active):
-        flat_to_active[active_idx[i]] = -1
-
-    return components
-
-
 def _get_clusters_spatial(s, neighbors):
     """Form spatial clusters using neighbor lists.
 
@@ -747,21 +655,11 @@ def _find_clusters_1dir(
                     return [], np.atleast_1d(np.array([]))
                 clusters = []
             else:
-                if has_numba:
-                    _flat_map = -np.ones(len(x_in), dtype=np.intp)
-                    components = _st_fused_ccl(
-                        active_idx,
-                        n_active,
-                        _flat_map,
-                        _indptr,
-                        _indices,
-                        _n_src,
-                        max_step,
-                    )
-                else:
-                    components = _get_clusters_st_scipy(
-                        x_in, (_indptr, _indices, _n_src), max_step
-                    )
+                # SciPy connected-components; for a Numba union-find
+                # alternative see https://github.com/sharifhsn/mne-python/blob/999ea49d9f180cea87dc3d522e530b51fba0dcc5/mne/stats/cluster_level.py#L122-L220
+                components = _get_clusters_st_scipy(
+                    x_in, (_indptr, _indices, _n_src), max_step
+                )
                 if _sums_only:
                     if t_power == 1:
                         sums = np.bincount(components, weights=x[active_idx])