Fix categorical colors wrongly assigned to points with non-sequential index (#358)

timtreis · claude · timtreis · commit a7828519ea2f · 2026-03-31T00:53:00.000+02:00
When points have a shuffled or non-sequential index (e.g. from .sample()
or .subset()), _reparse_points sorts rows by index while adata.X retains
the original order. This causes get_values to return colors in sorted
order, misaligned with coordinates. Resetting the index to sequential
before adata construction and reparsing ensures both share the same
positional order.

Co-Authored-By: Claude Opus 4.6 (1M context) &lt;noreply@anthropic.com&gt;
diff --git a/src/spatialdata_plot/pl/render.py b/src/spatialdata_plot/pl/render.py
@@ -743,6 +743,9 @@ def _render_points(
         )
         added_color_from_table = True
 
+    # Reset to sequential index so row order matches after _reparse_points round-trip (#358).
+    points = points.reset_index(drop=True)
+
     n_points = len(points)
     points_pd_with_color = points
     # When we pull colors from a table, keep the raw points (with color) for later,
@@ -758,7 +761,7 @@ def _render_points(
     if table_name is None:
         adata = AnnData(
             X=points[["x", "y"]].values,
-            obs=points[coords].reset_index(),
+            obs=points[coords],
             dtype=points[["x", "y"]].values.dtype,
         )
     else:
diff --git a/tests/pl/test_render_points.py b/tests/pl/test_render_points.py
@@ -607,6 +607,41 @@ def test_plot_groups_na_color_none_filters_points_datashader(self, sdata_blobs:
         ).pl.show(ax=axs[1], title="default (filtered)")
 
 
+def test_shuffled_index_categorical_color_alignment():
+    """Regression test for #358: categorical colors must follow the data, not the index order."""
+    n = 100
+    rng = get_standard_RNG()
+    x = np.concatenate([rng.uniform(0, 10, n // 2), rng.uniform(90, 100, n // 2)])
+    y = np.concatenate([rng.uniform(0, 10, n // 2), rng.uniform(90, 100, n // 2)])
+    df = pd.DataFrame(
+        {
+            "x": x,
+            "y": y,
+            "cluster": pd.Categorical(["A"] * (n // 2) + ["B"] * (n // 2)),
+        }
+    )
+    pts = PointsModel.parse(df)
+    sdata = SpatialData(points={"pts": pts})
+
+    # .sample() produces a non-sequential, shuffled index — the trigger for #358.
+    sampled = sdata.points["pts"].compute().sample(frac=0.8, random_state=42)
+    sdata.points["pts"] = PointsModel.parse(sampled)
+
+    _, ax = plt.subplots()
+    sdata.pl.render_points("pts", color="cluster", method="matplotlib", size=20).pl.show(ax=ax)
+
+    colls = [c for c in ax.collections if hasattr(c, "get_offsets") and len(c.get_offsets()) > 0]
+    assert colls, "expected scatter points"
+    offsets = colls[-1].get_offsets()
+    colors = colls[-1].get_facecolors()
+    left_colors = np.unique(colors[offsets[:, 0] < 50], axis=0)
+    right_colors = np.unique(colors[offsets[:, 0] > 50], axis=0)
+    assert len(left_colors) == 1, f"left cluster should have 1 color, got {len(left_colors)}"
+    assert len(right_colors) == 1, f"right cluster should have 1 color, got {len(right_colors)}"
+    assert not np.array_equal(left_colors[0], right_colors[0]), "clusters should have different colors"
+    plt.close("all")
+
+
 def test_groups_na_color_none_no_match_points(sdata_blobs: SpatialData):
     """When no elements match the groups, the plot should render without error."""
     sdata_blobs["blobs_points"]["cat_color"] = pd.Series(["a", "b", "c", "a"] * 50, dtype="category")