Fix categorical colors wrongly assigned to points with non-sequential index (#358)

timtreis · claude · timtreis · commit 0f0db9930638 · 2026-03-31T00:46:51.000+02:00
When points have a shuffled or non-sequential index (e.g. from .sample()
or .subset()), _reparse_points sorts rows by index while adata.X retains
the original order. This causes get_values to return colors in sorted
order, misaligned with coordinates. Resetting the index to sequential
before adata construction and reparsing ensures both share the same
positional order.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/spatialdata_plot/pl/render.py b/src/spatialdata_plot/pl/render.py
@@ -743,6 +743,9 @@ def _render_points(
         )
         added_color_from_table = True
 
+    # Reset to sequential index so row order matches after _reparse_points round-trip (#358).
+    points = points.reset_index(drop=True)
+
     n_points = len(points)
     points_pd_with_color = points
     # When we pull colors from a table, keep the raw points (with color) for later,
@@ -758,7 +761,7 @@ def _render_points(
     if table_name is None:
         adata = AnnData(
             X=points[["x", "y"]].values,
-            obs=points[coords].reset_index(),
+            obs=points[coords],
             dtype=points[["x", "y"]].values.dtype,
         )
     else:
diff --git a/tests/pl/test_render_points.py b/tests/pl/test_render_points.py
@@ -607,6 +607,46 @@ def test_plot_groups_na_color_none_filters_points_datashader(self, sdata_blobs:
         ).pl.show(ax=axs[1], title="default (filtered)")
 
 
+@pytest.mark.parametrize("method", ["matplotlib", "datashader"])
+def test_shuffled_index_categorical_color_alignment(method: str):
+    """Regression test for #358: categorical colors must follow the data, not the index order."""
+    # Two spatially separated clusters so correct/incorrect coloring is distinguishable.
+    n = 100
+    rng = get_standard_RNG()
+    x = np.concatenate([rng.uniform(0, 10, n // 2), rng.uniform(90, 100, n // 2)])
+    y = np.concatenate([rng.uniform(0, 10, n // 2), rng.uniform(90, 100, n // 2)])
+    df = pd.DataFrame(
+        {
+            "x": x,
+            "y": y,
+            "cluster": pd.Categorical(["A"] * (n // 2) + ["B"] * (n // 2)),
+        }
+    )
+    # Shuffle rows so the index is non-sequential (simulates .sample() / .subset()).
+    shuffled = df.sample(frac=1, random_state=42)
+    assert shuffled.index.tolist() != list(range(n)), "sanity: index should be shuffled"
+
+    pts = PointsModel.parse(shuffled)
+    sdata = SpatialData(points={"pts": pts})
+
+    _, ax = plt.subplots()
+    sdata.pl.render_points("pts", color="cluster", method=method, size=20).pl.show(ax=ax)
+
+    # For datashader we can only check it doesn't error; for matplotlib we can
+    # inspect the scatter colors directly.
+    if method == "matplotlib":
+        colls = [c for c in ax.collections if hasattr(c, "get_offsets") and len(c.get_offsets()) > 0]
+        assert colls, "expected scatter points"
+        offsets = colls[-1].get_offsets()
+        colors = colls[-1].get_facecolors()
+        left_colors = np.unique(colors[offsets[:, 0] < 50], axis=0)
+        right_colors = np.unique(colors[offsets[:, 0] > 50], axis=0)
+        assert len(left_colors) == 1, f"left cluster should have 1 color, got {len(left_colors)}"
+        assert len(right_colors) == 1, f"right cluster should have 1 color, got {len(right_colors)}"
+        assert not np.array_equal(left_colors[0], right_colors[0]), "clusters should have different colors"
+    plt.close("all")
+
+
 def test_groups_na_color_none_no_match_points(sdata_blobs: SpatialData):
     """When no elements match the groups, the plot should render without error."""
     sdata_blobs["blobs_points"]["cat_color"] = pd.Series(["a", "b", "c", "a"] * 50, dtype="category")