6464
6565_Normalize = Normalize | abc .Sequence [Normalize ]
6666
67+ # Sentinel category name used in datashader categorical paths to represent
68+ # missing (NaN) values. Must not collide with realistic user category names.
69+ _DS_NAN_CATEGORY = "ds_nan"
70+
6771
6872def _coerce_categorical_source (cat_source : Any ) -> pd .Categorical :
6973 """Return a pandas Categorical from known, concrete sources only."""
@@ -82,6 +86,26 @@ def _coerce_categorical_source(cat_source: Any) -> pd.Categorical:
8286 return pd .Categorical (pd .Series (cat_source ))
8387
8488
89+ def _build_datashader_color_key (
90+ cat_series : pd .Categorical ,
91+ color_vector : Any ,
92+ na_color_hex : str ,
93+ ) -> dict [str , str ]:
94+ """Build a datashader ``color_key`` dict from a categorical series and its color vector."""
95+ colors_arr = np .asarray (color_vector , dtype = object )
96+ color_key : dict [str , str ] = {}
97+ for cat in cat_series .categories :
98+ if cat == _DS_NAN_CATEGORY :
99+ key_color = na_color_hex
100+ else :
101+ idx = np .flatnonzero (cat_series == cat )
102+ key_color = colors_arr [idx [0 ]] if idx .size else na_color_hex
103+ if isinstance (key_color , str ) and key_color .startswith ("#" ):
104+ key_color = _hex_no_alpha (key_color )
105+ color_key [str (cat )] = key_color
106+ return color_key
107+
108+
85109def _split_colorbar_params (params : dict [str , object ] | None ) -> tuple [dict [str , object ], dict [str , object ], str | None ]:
86110 """Split colorbar params into layout hints, Matplotlib kwargs, and label override."""
87111 layout : dict [str , object ] = {}
@@ -185,6 +209,20 @@ def _render_shapes(
185209
186210 values_are_categorical = color_source_vector is not None
187211
212+ # When groups are specified and na_color is fully transparent (na_color=None),
213+ # filter out non-matching elements instead of showing them as invisible geometry.
214+ if groups is not None and values_are_categorical and render_params .cmap_params .na_color .alpha == "00" :
215+ csv_series = pd .Series (color_source_vector )
216+ keep = csv_series .isin (groups ).values
217+ shapes = shapes [keep ].reset_index (drop = True )
218+ sdata_filt [element ] = shapes
219+ color_source_vector = pd .Categorical (csv_series [keep ].reset_index (drop = True ))
220+ color_vector = (
221+ np .asarray (color_vector )[keep ]
222+ if not hasattr (color_vector , "reset_index" )
223+ else (color_vector [keep ].reset_index (drop = True ))
224+ )
225+
188226 # color_source_vector is None when the values aren't categorical
189227 if values_are_categorical and render_params .transfunc is not None :
190228 color_vector = render_params .transfunc (color_vector )
@@ -322,9 +360,9 @@ def _render_shapes(
322360 continuous_nan_shapes = None
323361 if col_for_color is not None and (render_params .groups is None or len (render_params .groups ) > 1 ):
324362 if color_by_categorical :
325- # add nan as a category so that shapes with nan value are colored in the nan color
363+ # add a sentinel category so that shapes with NaN value are colored in the na_color
326364 transformed_element [col_for_color ] = (
327- transformed_element [col_for_color ].cat .add_categories ("nan" ).fillna ("nan" )
365+ transformed_element [col_for_color ].cat .add_categories (_DS_NAN_CATEGORY ).fillna (_DS_NAN_CATEGORY )
328366 )
329367 agg = cvs .polygons (
330368 transformed_element ,
@@ -391,17 +429,9 @@ def _render_shapes(
391429 color_key : dict [str , str ] | None = None
392430 if color_by_categorical and col_for_color is not None :
393431 cat_series = _coerce_categorical_source (transformed_element [col_for_color ])
394- colors_arr = np .asarray (color_vector , dtype = object )
395- color_key = {}
396- for cat in cat_series .categories :
397- if cat == "nan" :
398- key_color = render_params .cmap_params .na_color .get_hex ()
399- else :
400- idx = np .flatnonzero (cat_series == cat )
401- key_color = colors_arr [idx [0 ]] if idx .size else render_params .cmap_params .na_color .get_hex ()
402- if isinstance (key_color , str ) and key_color .startswith ("#" ):
403- key_color = _hex_no_alpha (key_color )
404- color_key [str (cat )] = key_color
432+ color_key = _build_datashader_color_key (
433+ cat_series , color_vector , render_params .cmap_params .na_color .get_hex ()
434+ )
405435
406436 if color_by_categorical or col_for_color is None :
407437 ds_cmap = None
@@ -812,6 +842,27 @@ def _render_points(
812842 )
813843 points_dd = points_with_color_dd
814844
845+ # When groups are specified and na_color is fully transparent (na_color=None),
846+ # filter out non-matching points instead of rendering invisible geometry.
847+ if groups is not None and color_source_vector is not None and render_params .cmap_params .na_color .alpha == "00" :
848+ csv_series = pd .Series (color_source_vector )
849+ keep = csv_series .isin (groups ).values
850+ color_source_vector = pd .Categorical (csv_series [keep ].reset_index (drop = True ))
851+ color_vector = (
852+ np .asarray (color_vector )[keep ]
853+ if not hasattr (color_vector , "reset_index" )
854+ else (color_vector [keep ].reset_index (drop = True ))
855+ )
856+ # re-register filtered points in sdata_filt
857+ points_dd = dask .dataframe .from_pandas (points [keep ].reset_index (drop = True ), npartitions = 1 )
858+ sdata_filt .points [element ] = PointsModel .parse (points_dd , coordinates = {"x" : "x" , "y" : "y" })
859+ set_transformation (
860+ element = sdata_filt .points [element ],
861+ transformation = transformation_in_cs ,
862+ to_coordinate_system = coordinate_system ,
863+ )
864+ n_points = int (keep .sum ())
865+
815866 # color_source_vector is None when the values aren't categorical
816867 if color_source_vector is None and render_params .transfunc is not None :
817868 color_vector = render_params .transfunc (color_vector )
@@ -895,9 +946,9 @@ def _render_points(
895946 cat_series = cat_series .astype ("category" )
896947 if hasattr (cat_series .cat , "as_known" ):
897948 cat_series = cat_series .cat .as_known ()
898- if "nan" not in cat_series .cat .categories :
899- cat_series = cat_series .cat .add_categories ("nan" )
900- transformed_element [col_for_color ] = cat_series .fillna ("nan" )
949+ if _DS_NAN_CATEGORY not in cat_series .cat .categories :
950+ cat_series = cat_series .cat .add_categories (_DS_NAN_CATEGORY )
951+ transformed_element [col_for_color ] = cat_series .fillna (_DS_NAN_CATEGORY )
901952 agg = cvs .points (transformed_element , "x" , "y" , agg = ds .by (col_for_color , ds .count ()))
902953 else :
903954 reduction_name = render_params .ds_reduction if render_params .ds_reduction is not None else "sum"
@@ -942,17 +993,9 @@ def _render_points(
942993 color_key : dict [str , str ] | None = None
943994 if color_by_categorical and col_for_color is not None :
944995 cat_series = _coerce_categorical_source (transformed_element [col_for_color ])
945- colors_arr = np .asarray (color_vector , dtype = object )
946- color_key = {}
947- for cat in cat_series .categories :
948- if cat == "nan" :
949- key_color = render_params .cmap_params .na_color .get_hex ()
950- else :
951- idx = np .flatnonzero (cat_series == cat )
952- key_color = colors_arr [idx [0 ]] if idx .size else render_params .cmap_params .na_color .get_hex ()
953- if isinstance (key_color , str ) and key_color .startswith ("#" ):
954- key_color = _hex_no_alpha (key_color )
955- color_key [str (cat )] = key_color
996+ color_key = _build_datashader_color_key (
997+ cat_series , color_vector , render_params .cmap_params .na_color .get_hex ()
998+ )
956999
9571000 if (
9581001 color_vector is not None
0 commit comments