Skip to content

Commit ec9f424

Browse files
author
miranov25
committed
Forgottend commit of refernce test and bench log
1 parent 2b27e47 commit ec9f424

File tree

1 file changed

+81
-0
lines changed
  • UTILS/dfextensions/quantile_fit_nd

1 file changed

+81
-0
lines changed
Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
# dfextensions/quantile_fit_nd/utils.py
2+
import numpy as np
3+
from typing import Optional
4+
5+
def discrete_to_uniform_rank_poisson(
6+
k: np.ndarray,
7+
lam: float,
8+
mode: str = "randomized",
9+
rng: Optional[np.random.Generator] = None,
10+
) -> np.ndarray:
11+
"""
12+
Map Poisson counts k ~ Poisson(lam) to U ~ Uniform(0,1).
13+
14+
mode="randomized" (preferred, exact PIT):
15+
U = F(k-1) + V * (F(k) - F(k-1)), V ~ Unif(0,1)
16+
mode="midrank" (deterministic):
17+
U = 0.5 * (F(k-1) + F(k))
18+
19+
Returns U in [0,1].
20+
"""
21+
k = np.asarray(k, dtype=np.int64)
22+
if rng is None:
23+
rng = np.random.default_rng()
24+
25+
k_max = int(np.max(k)) if k.size else 0
26+
pmf = np.empty(k_max + 1, dtype=np.float64)
27+
pmf[0] = np.exp(-lam)
28+
for j in range(k_max):
29+
pmf[j + 1] = pmf[j] * lam / (j + 1)
30+
31+
cdf = np.cumsum(pmf)
32+
cdf = np.clip(cdf, 0.0, 1.0)
33+
34+
Fk = cdf[k]
35+
Fkm1 = np.where(k > 0, cdf[k - 1], 0.0)
36+
37+
if mode == "randomized":
38+
u = rng.random(size=k.size)
39+
U = Fkm1 + u * (Fk - Fkm1)
40+
elif mode == "midrank":
41+
U = 0.5 * (Fkm1 + Fk)
42+
else:
43+
raise ValueError(f"unknown mode {mode!r}")
44+
45+
return np.clip(U, 0.0, 1.0)
46+
47+
48+
def discrete_to_uniform_rank_empirical(
49+
x: np.ndarray,
50+
mode: str = "randomized",
51+
rng: Optional[np.random.Generator] = None,
52+
) -> np.ndarray:
53+
"""
54+
Generic discrete -> Uniform(0,1) using the empirical CDF of x.
55+
56+
For unique value v with mass p_v and cumulative F(v):
57+
randomized: U ~ Uniform(F(v-), F(v))
58+
midrank: U = 0.5 * (F(v-) + F(v))
59+
"""
60+
x = np.asarray(x)
61+
n = x.size
62+
if rng is None:
63+
rng = np.random.default_rng()
64+
if n == 0:
65+
return np.array([], dtype=np.float64)
66+
67+
uniq, inv = np.unique(x, return_inverse=True)
68+
counts = np.bincount(inv, minlength=uniq.size)
69+
cum = np.cumsum(counts)
70+
F_curr = cum / float(n)
71+
F_prev = (cum - counts) / float(n)
72+
73+
if mode == "randomized":
74+
u = rng.random(size=n)
75+
U = F_prev[inv] + u * (F_curr[inv] - F_prev[inv])
76+
elif mode == "midrank":
77+
U = 0.5 * (F_prev[inv] + F_curr[inv])
78+
else:
79+
raise ValueError(f"unknown mode {mode!r}")
80+
81+
return np.clip(U, 0.0, 1.0)

0 commit comments

Comments
 (0)