Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,7 @@
nitpick_ignore = [
("py:class", "pd.Series"),
("py:class", "pd.DataFrame"),
("py:class", "pandas.core.frame.DataFrame"),
("py:class", "ndarray"),
("py:class", "numpy._typing._generic_alias.ScalarType"),
("py:class", "pydantic.main.BaseModel"),
Expand Down
16 changes: 9 additions & 7 deletions pydeseq2/dds.py
Original file line number Diff line number Diff line change
Expand Up @@ -657,7 +657,7 @@ def fit_size_factors(
# Calculate logcounts for x > 0 and take the mean for each gene
log_counts = np.zeros_like(self.X, dtype=float)
np.log(self.X, out=log_counts, where=self.X != 0)
logmeans = log_counts.mean(0)
logmeans = log_counts.mean(axis=0)

# Determine which genes are usable (finite logmeans)
self.filtered_genes = (~np.isinf(logmeans)) & (logmeans > 0)
Expand Down Expand Up @@ -705,7 +705,7 @@ def sizeFactor(x):
raise ValueError("Counts matrix 'X' is None, cannot fit size factors.")

end = time.time()
self.var["_normed_means"] = self.layers["normed_counts"].mean(0)
self.var["_normed_means"] = self.layers["normed_counts"].mean(axis=0)

if not self.quiet:
print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)
Expand Down Expand Up @@ -793,7 +793,9 @@ def fit_genewise_dispersions(self, vst=False) -> None:
dispersions_, self.min_disp, self.max_disp
)

self.var["_genewise_converged"] = np.full(self.n_vars, np.nan)
self.var["_genewise_converged"] = pd.array(
[pd.NA] * self.n_vars, dtype="boolean"
)
self.var.loc[self.var["non_zero"], "_genewise_converged"] = l_bfgs_b_converged_

def fit_dispersion_trend(self, vst: bool = False) -> None:
Expand Down Expand Up @@ -919,7 +921,7 @@ def fit_MAP_dispersions(self) -> None:
dispersions_, self.min_disp, self.max_disp
)

self.var["_MAP_converged"] = np.full(self.n_vars, np.nan)
self.var["_MAP_converged"] = pd.array([pd.NA] * self.n_vars, dtype="boolean")
self.var.loc[self.var["non_zero"], "_MAP_converged"] = l_bfgs_b_converged_

# Filter outlier genes for which we won't apply shrinkage
Expand Down Expand Up @@ -980,7 +982,7 @@ def fit_LFC(self) -> None:
self.obsm["_mu_LFC"] = mu_
self.obsm["_hat_diagonals"] = hat_diagonals_

self.var["_LFC_converged"] = np.full(self.n_vars, np.nan)
self.var["_LFC_converged"] = pd.array([pd.NA] * self.n_vars, dtype="boolean")
self.var.loc[self.var["non_zero"], "_LFC_converged"] = converged_

def calculate_cooks(self) -> None:
Expand Down Expand Up @@ -1098,7 +1100,7 @@ def cooks_outlier(self):

cooks_outlier[cooks_outlier] = (
self.X[:, cooks_outlier] > self.X[:, cooks_outlier][pos, np.arange(len(pos))]
).sum(0) < 3
).sum(axis=0) < 3

if self.low_memory:
del self.layers["cooks"]
Expand Down Expand Up @@ -1423,7 +1425,7 @@ def _refit_without_outliers(
sub_dds.uns["trend_coeffs"] = self.uns["trend_coeffs"]
elif sub_dds.uns["disp_function_type"] == "mean":
sub_dds.uns["mean_disp"] = self.uns["mean_disp"]
sub_dds.var["_normed_means"] = sub_dds.layers["normed_counts"].mean(0)
sub_dds.var["_normed_means"] = sub_dds.layers["normed_counts"].mean(axis=0)
# Reshape in case there's a single gene to refit
sub_dds.var["fitted_dispersions"] = sub_dds.disp_function(
sub_dds.var["_normed_means"]
Expand Down
44 changes: 24 additions & 20 deletions pydeseq2/ds.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,29 +411,33 @@ def lfc_shrink(self, coeff: str, adapt: bool = True) -> None:
if not self.quiet:
print(f"... done in {end - start:.2f} seconds.\n", file=sys.stderr)

self.LFC.iloc[:, coeff_idx].update(
pd.Series(
np.array(lfcs)[:, coeff_idx],
index=self.dds.non_zero_genes,
)
new_lfc_values = np.array(lfcs)[:, coeff_idx]
new_se_values = np.array(
[
np.sqrt(np.abs(inv_hess[coeff_idx, coeff_idx]))
for inv_hess in inv_hessians
]
)

self.SE.update(
pd.Series(
np.array(
[
np.sqrt(np.abs(inv_hess[coeff_idx, coeff_idx]))
for inv_hess in inv_hessians
]
),
index=self.dds.non_zero_genes,
nan_mask = ~np.isfinite(new_lfc_values) | ~np.isfinite(new_se_values)

if nan_mask.any():
warnings.warn(
f"{nan_mask.sum()} gene(s) had NaN/infinite values during LFC shrinkage,"
" their LFCs and SEs were not updated.",
UserWarning,
stacklevel=2,
)
)

self._LFC_shrink_converged = pd.Series(np.nan, index=self.dds.var_names)
self._LFC_shrink_converged.update(
pd.Series(l_bfgs_b_converged_, index=self.dds.non_zero_genes)
# Only update genes with valid (non-NaN) shrinkage results
valid_genes = self.dds.non_zero_genes[~nan_mask]
self.LFC.loc[valid_genes, coeff] = new_lfc_values[~nan_mask]
self.SE.loc[valid_genes] = new_se_values[~nan_mask]

self._LFC_shrink_converged = pd.Series(
pd.array([pd.NA] * len(self.dds.var_names), dtype="boolean"),
index=self.dds.var_names,
)
self._LFC_shrink_converged.loc[self.dds.non_zero_genes] = l_bfgs_b_converged_

# Set a flag to indicate that LFCs were shrunk
self.shrunk_LFCs = True
Expand Down Expand Up @@ -511,7 +515,7 @@ def _independent_filtering(self) -> None:
U2 = self.p_values[use]
if not U2.empty:
result.loc[use, i] = false_discovery_control(U2, method="bh")
num_rej = (result < self.alpha).sum(0).to_numpy().astype(int)
num_rej = (result < self.alpha).sum(axis=0).to_numpy().astype(int)
lowess_res = lowess(theta, num_rej, frac=1 / 5)

if num_rej.max() <= 10:
Expand Down
4 changes: 2 additions & 2 deletions pydeseq2/grid_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ def vec_nb_nll(
-logbinom
+ (counts[:, None] + alpha_neg1) * np.log(mu[:, None] + alpha_neg1)
- (counts * np.log(mu))[:, None]
).sum(0)
).sum(axis=0)
else:
return n * alpha_neg1 * np.log(alpha) + (
-logbinom
+ (counts[:, None] + alpha_neg1) * np.log(mu + alpha_neg1)
- (counts[:, None] * np.log(mu))
).sum(0)
).sum(axis=0)


def grid_fit_alpha(
Expand Down
2 changes: 1 addition & 1 deletion pydeseq2/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def deseq2_norm_fit(counts: pd.DataFrame | np.ndarray) -> tuple[np.ndarray, np.n
# Compute gene-wise mean log counts
with np.errstate(divide="ignore"): # ignore division by zero warnings
log_counts = np.log(counts)
logmeans = log_counts.mean(0)
logmeans = log_counts.mean(axis=0)
# Filter out genes with -∞ log means
filtered_genes = ~np.isinf(logmeans)

Expand Down
10 changes: 5 additions & 5 deletions pydeseq2/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,7 +222,7 @@ def nb_nll(
- logbinom
+ (counts + alpha_neg1) * np.log(mu + alpha_neg1)
- (counts * np.log(mu))
).sum(0)
).sum(axis=0)
else:
return (
n * alpha_neg1 * np.log(alpha)
Expand Down Expand Up @@ -849,7 +849,7 @@ def fit_rough_dispersions(
y_hat = np.maximum(y_hat, 1)
alpha_rde = (
((normed_counts - y_hat) ** 2 - y_hat) / ((num_samples - num_vars) * y_hat**2)
).sum(0)
).sum(axis=0)
return np.maximum(alpha_rde, 0)


Expand Down Expand Up @@ -877,7 +877,7 @@ def fit_moments_dispersions(
# Exclude genes with all zeroes
normed_counts = normed_counts[:, ~(normed_counts == 0).all(axis=0)]
# mean inverse size factor
s_mean_inv = (1 / size_factors).mean()
s_mean_inv = (1 / size_factors).mean(axis=0)
mu = normed_counts.mean(0)
sigma = normed_counts.var(0, ddof=1)
# ddof=1 is to use an unbiased estimator, as in R
Expand Down Expand Up @@ -951,7 +951,7 @@ def robust_method_of_moments_disp(
np.ndarray, trimmed_variance(normed_counts)
) # Since normed_counts is always 2D, trimmed_variance returns ndarray

m = normed_counts.mean(0)
m = normed_counts.mean(axis=0)
alpha = (v - m) / m**2
# cannot use the typical min_disp = 1e-8 here or else all counts in the same
# group as the outlier count will get an extreme Cook's distance
Expand Down Expand Up @@ -1202,7 +1202,7 @@ def nbinomFn(

nll = (
counts * xbeta - (counts + size) * np.logaddexp(xbeta + offset, np.log(size))
).sum(0)
).sum(axis=0)

return prior - nll

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ requires = [ "hatchling" ]

[project]
name = "pydeseq2"
version = "0.5.3"
version = "0.5.4"
description = "A python implementation of DESeq2."
readme = "README.md"
license = { file = "LICENSE" }
Expand Down