Skip to content

Commit 0ae7eac

Browse files
author
miranov25
committed
feat(dfextensions): add ND quantile fitting (Δq-centered) + tests & bench
- Introduces dfextensions/quantile_fit_nd: - quantile_fit_nd.py: per-channel ND fit, separable interpolation, evaluator, I/O - test_quantile_fit_nd.py: synthetic unit tests (uniform/poisson/gaussian, z nuisance) - bench_quantile_fit_nd.py: simple timing benchmark over N and distributions - Uses Δq-centered model: X = a(q0,n) + b(q0,n)·(Q − q0) - Enforces monotonicity with configurable b_min (auto/fixed) - Outputs DataFrame (Parquet/Arrow/ROOT) with diagnostics and metadata
1 parent 53db0b8 commit 0ae7eac

File tree

3 files changed

+629
-0
lines changed

3 files changed

+629
-0
lines changed
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
# dfextension/quantile_fit_nd/bench_quantile_fit_nd.py
2+
# Simple timing benchmark across N points, distributions, and grid sizes.
3+
import time
4+
import numpy as np
5+
import pandas as pd
6+
7+
from dfextensions.quantile_fit_nd.quantile_fit_nd import fit_quantile_linear_nd
8+
9+
RNG = np.random.default_rng(1234)
10+
11+
12+
def gen_data(n: int, dist: str = "uniform", sigma_X: float = 0.5):
13+
if dist == "uniform":
14+
Q = RNG.uniform(0, 1, size=n)
15+
elif dist == "poisson":
16+
lam = 20.0
17+
m = RNG.poisson(lam, size=n)
18+
from math import erf
19+
z = (m + 0.5 - lam) / np.sqrt(max(lam, 1e-6))
20+
Q = 0.5 * (1.0 + np.array([erf(zi / np.sqrt(2)) for zi in z]))
21+
Q = np.clip(Q, 0, 1)
22+
elif dist == "gaussian":
23+
g = RNG.normal(0.0, 1.0, size=n)
24+
from math import erf
25+
Q = 0.5 * (1.0 + np.array([erf(gi / np.sqrt(2)) for gi in g]))
26+
Q = np.clip(Q, 0, 1)
27+
else:
28+
raise ValueError
29+
30+
z = np.clip(RNG.normal(0.0, 5.0, size=n), -10, 10)
31+
a = 10.0 + 0.5 * z
32+
b = (50.0 + 2.0 * z / 10.0).clip(min=5.0)
33+
X = a + b * Q + RNG.normal(0.0, sigma_X, size=n)
34+
df = pd.DataFrame({"channel_id": "bench", "Q": Q, "X": X, "z_vtx": z, "is_outlier": False})
35+
return df
36+
37+
38+
def run_one(n, dist, q_bins=11, z_bins=10):
39+
df = gen_data(n, dist=dist)
40+
t0 = time.perf_counter()
41+
table = fit_quantile_linear_nd(
42+
df,
43+
channel_key="channel_id",
44+
q_centers=np.linspace(0, 1, q_bins),
45+
dq=0.05,
46+
nuisance_axes={"z": "z_vtx"},
47+
n_bins_axes={"z": z_bins},
48+
mask_col="is_outlier",
49+
b_min_option="auto",
50+
fit_mode="ols",
51+
kappa_w=1.3,
52+
)
53+
dt = time.perf_counter() - t0
54+
return dt, len(table)
55+
56+
57+
def main():
58+
Ns = [5_000, 50_000, 200_000]
59+
dists = ["uniform", "poisson", "gaussian"]
60+
print("N, dist, q_bins, z_bins, secs, rows")
61+
for n in Ns:
62+
for dist in dists:
63+
dt, rows = run_one(n, dist, q_bins=11, z_bins=10)
64+
print(f"{n:>8}, {dist:>8}, {11:>2}, {10:>2}, {dt:7.3f}, {rows:>6}")
65+
66+
67+
if __name__ == "__main__":
68+
main()

0 commit comments

Comments
 (0)