@@ -20,6 +20,47 @@ def df_draw_scatter(
2020 jitter = False ,
2121 show = True # if False, don't plt.show(); always return (fig, ax)
2222):
23+ """
24+ Create a scatter plot from a DataFrame with optional color, marker size, and jitter.
25+
26+ Parameters
27+ ----------
28+ df : pandas.DataFrame
29+ Input DataFrame containing the data.
30+ expr : str
31+ Expression in 'y:x' format specifying y-axis and x-axis columns (e.g., 'sigma:pTmin').
32+ selection : str, bool array, or callable, optional
33+ Filter to apply. Can be a pandas query string (engine='python'), a boolean mask,
34+ or a callable returning a mask (default: None, uses full df).
35+ color : str, optional
36+ Column name for color mapping (continuous or categorical, default: None).
37+ marker : str, optional
38+ Column name for marker size mapping (numeric, default: None).
39+ cmap : str, optional
40+ Colormap name (e.g., 'tab10', default: 'tab10').
41+ jitter : bool, optional
42+ Add small random jitter to x and y coordinates (default: False).
43+ show : bool, optional
44+ Display the plot if True (default: True); always returns (fig, ax).
45+
46+ Returns
47+ -------
48+ tuple
49+ (fig, ax) : matplotlib Figure and Axes objects for further customization.
50+
51+ Raises
52+ ------
53+ ValueError
54+ If expr is not in 'y:x' format or selection query fails.
55+ TypeError
56+ If selection is neither str, bool array, nor callable.
57+
58+ Notes
59+ -----
60+ - Filters NA values from x and y before plotting.
61+ - Jitter helps visualize quantized data (x: ±0.1, y: ±2e-4).
62+ - Colorbar is added for continuous color; categorical colors use the first color for NA.
63+ """
2364 # --- parse "y:x"
2465 try :
2566 y_col , x_col = expr .split (":" )
@@ -144,46 +185,69 @@ def df_draw_scatter_categorical(
144185 show : bool = False ,
145186):
146187 """
147- Scatter plot with categorical COLOR and MARKER SHAPE; flexible size control. Returns (fig, ax).
148-
149- Parameters
150- ----------
151- expr : str
152- ROOT-like "y:x" expression, e.g. "sigma:pTmin".
153- selection : str, optional
154- pandas query string evaluated with engine="python".
155- Example: "productionId.str.contains(r'(?:LHC25b8a|LHC24)', regex=True, na=False)".
156- color : str, optional
157- Categorical column used for colors (legend #1).
158- marker_style : str, optional
159- Categorical column used for marker shapes (legend #2).
160- marker_size : None | "" | number | str, optional
161- - None or "" → constant default size (150 pt^2).
162- - number → fixed size (pt^2) for all points.
163- - str (column):
164- * numeric → min–max normalize to [100, 400] pt^2
165- * non-numeric → map categories to sizes (150, 220, 290, …)
166- jitter : bool, default False
167- Add small uniform jitter to x and y.
168- top_k_color, other_label_color, order_color :
169- control color categories (reduce tail to 'Other', set order).
170- top_k_marker, other_label_marker, order_marker :
171- control marker-shape categories.
172- palette : list, optional
173- Colors to cycle through; defaults to repeating 'tab20'.
174- markers : list, optional
175- Marker shapes; defaults to ["o","s","^","D","P","X","v","<",">","h","H","*","p"].
176- legend_outside : bool, default True
177- Reserve right margin and place legends outside so they aren’t clipped.
178- legend_cols_color, legend_cols_marker : int
179- Number of columns for each legend block.
180- show : bool, default True
181- If True, plt.show() is called. Function always returns (fig, ax).
182-
183- Raises
184- ------
185- ValueError / TypeError on malformed expr or failed selection.
186- """
188+ Create a scatter plot with categorical colors and marker shapes from a DataFrame.
189+
190+ Parameters
191+ ----------
192+ df : pandas.DataFrame
193+ Input DataFrame containing the data.
194+ expr : str
195+ Expression in 'y:x' format specifying y-axis and x-axis columns (e.g., 'sigma:pTmin').
196+ selection : str, optional
197+ Pandas query string (engine='python') to filter data (e.g., "productionId.str.contains(...)").
198+ color : str, optional
199+ Column name for categorical color mapping (legend #1, default: None).
200+ marker_style : str, optional
201+ Column name for categorical marker shape mapping (legend #2, default: None).
202+ marker_size : None | "" | number | str, optional
203+ - None or "" : Constant size (150 pt²).
204+ - number : Fixed size (pt²) for all points.
205+ - str : Column name; numeric values normalized to [100, 400] pt², categorical cycled (150, 220, ...).
206+ jitter : bool, default False
207+ Add small uniform jitter to x and y coordinates.
208+ top_k_color : int, optional
209+ Keep top-K color categories, others mapped to `other_label_color` (default: None).
210+ other_label_color : str, default "Other"
211+ Label for non-top-K color categories.
212+ order_color : list, optional
213+ Explicit order for color legend categories (default: by frequency).
214+ top_k_marker : int, optional
215+ Keep top-K marker categories, others mapped to `other_label_marker` (default: None).
216+ other_label_marker : str, default "Other"
217+ Label for non-top-K marker categories.
218+ order_marker : list, optional
219+ Explicit order for marker legend categories (default: by frequency).
220+ palette : list, optional
221+ List of color specs to cycle (default: repeats 'tab20').
222+ markers : list, optional
223+ List of marker styles (default: ["o", "s", "^", ...]).
224+ legend_outside : bool, default True
225+ Place legends outside plot, reserving right margin.
226+ legend_cols_color : int, default 1
227+ Number of columns in color legend.
228+ legend_cols_marker : int, default 1
229+ Number of columns in marker legend.
230+ show : bool, default True
231+ Display the plot if True (default: True); always returns (fig, ax).
232+
233+ Returns
234+ -------
235+ tuple
236+ (fig, ax) : matplotlib Figure and Axes objects.
237+
238+ Raises
239+ ------
240+ ValueError
241+ If expr is not 'y:x' format or selection query fails.
242+ TypeError
243+ If selection is not a string or marker_size is invalid.
244+
245+ Notes
246+ -----
247+ - Designed for ALICE data visualization (e.g., D0 resolution plots).
248+ - Filters NA values and handles categorical data robustly.
249+ - Legends are added outside to avoid clipping; adjust `bbox_to_anchor` if needed.
250+ """
187251 # --- parse "y:x"
188252 try :
189253 y_col , x_col = expr .split (":" )
@@ -385,15 +449,15 @@ def drawExample():
385449 marker_size = 100 , # pt²
386450 )
387451 fig .savefig ("out.png" , dpi = 200 , bbox_inches = "tight" )
388-
452+ ##
389453 fig , ax = df_draw_scatter_categorical (
390454 df , "sigma:pTmin" ,
391455 selection = "productionId.str.contains(r'(?:LHC24|LHC25a5)', regex=True, na=False)" ,
392456 color = "productionId" ,
393457 marker_style = "centmin" ,
394458 marker_size = 100 , # pt²
395459 )
396- fig .savefig ("resol_LHC24_LHC25a5.png" , dpi = 200 , bbox_inches = "tight" )
460+ fig .savefig ("resol_LHC24_LHC25a5.png" , dpi = 200 )
397461
398462 fig , ax = df_draw_scatter_categorical (
399463 df , "sigma:pTmin" ,
0 commit comments