Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,16 @@ classifiers = [
"Topic :: Scientific/Engineering :: Bio-Informatics",
]
dependencies = [
"anndata>=0.11.4",
"boltons>=25.0.0", # ctxcore dependency
"ctxcore",
"ncls>=0.0.70",
"polars>=1.31.0",
"pyarrow>=20.0.0",
"pybiomart>=0.2.0",
"requests>=2.32.4",
"scipy>=1.15.3",
"setuptools<81", # ctxcore dependency
"tmtoolkit>=0.12.0",
]

Expand Down Expand Up @@ -126,3 +130,6 @@ ban-relative-imports = "all"
[tool.mypy]
mypy_path = "$MYPY_CONFIG_FILE_DIR/stubs"
plugins = ["numpy.typing.mypy_plugin"]

[tool.uv.sources]
ctxcore = { git = "https://github.com/aertslab/ctxcore.git" }
4 changes: 4 additions & 0 deletions src/pycisTopic/cli/pycistopic.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

from pycisTopic.cli.subcommand.count_matrix import add_parser_count_matrix
from pycisTopic.cli.subcommand.qc import add_parser_qc
from pycisTopic.cli.subcommand.signature_enrichment import (
add_parser_signature_enrichment,
)
from pycisTopic.cli.subcommand.topic_modeling import add_parser_topic_modeling
from pycisTopic.cli.subcommand.tss import add_parser_tss

Expand All @@ -23,6 +26,7 @@ def main():
add_parser_topic_modeling(subparsers)
add_parser_tss(subparsers)
add_parser_count_matrix(subparsers)
add_parser_signature_enrichment(subparsers)

args = parser.parse_args()
args.func(args)
Expand Down
220 changes: 220 additions & 0 deletions src/pycisTopic/cli/subcommand/signature_enrichment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,220 @@
from argparse import _SubParsersAction

import polars as pl

OBSM_SIGN_ENRICHMENT_KEY="AUCell"
UNS_SIGN_ENRICHMENT_KEY="AUCell signature names"


def _region_names_to_polars_df(
region_names: list[str],
chrom_split: str = ":",
start_end_split: str = "-",
) -> pl.DataFrame:
chromosomes: list[str] = []
starts: list[int] = []
ends: list[int] = []
for region in region_names:
chrom, start, end = region.replace(
chrom_split, start_end_split
).split(start_end_split)
chromosomes.append(chrom)
starts.append(int(start))
ends.append(int(end))

return pl.from_dict(
{
"Chromosome": chromosomes,
"Start": starts,
"End": ends
}
)

def run_signature_enrichment(args):
import logging
import sys

import anndata as ad # type: ignore
import numpy as np

from pycisTopic.fragments import read_bed_to_polars_df
from pycisTopic.signature_enrichment import signature_enrichment

if len(args.signature_names) != len(args.signatures):
raise ValueError(
f"Length of signature_names ({len(args.signature_names)}) does not "
f"match the length of the signatures ({len(args.signatures)})!"
)

if args.verbose:
level = logging.INFO
log_format = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
handlers = [logging.StreamHandler(stream=sys.stdout)]
logging.basicConfig(level=level, format=log_format, handlers=handlers)
log = logging.getLogger("cisTopic")
else:
log = None

if log is not None:
log.info("Reading data ...")

ad_region_topic = ad.read_h5ad(args.region_topic)
ad_cell_topic = ad.read_h5ad(args.cell_topic)

assert isinstance(ad_region_topic.X, np.ndarray), \
"`ad_region_topic.X` should be numpy array"

assert isinstance(ad_cell_topic.X, np.ndarray), \
"`ad_cell_topic.X` should be numpy array"

region_topic_granges = _region_names_to_polars_df(
ad_region_topic.obs_names # type: ignore
)

signatures: dict[str, pl.DataFrame] = {}
for signature_name, signature_bed in zip(
args.signature_names, args.signatures,
strict=True
):
signatures[signature_name] = read_bed_to_polars_df(signature_bed)

auc_values = signature_enrichment(
region_topic=ad_region_topic.X,
cell_topic=ad_cell_topic.X.T,
region_topic_granges=region_topic_granges,
signatures=signatures,
chunk_size=args.chunk_size,
normalize=args.normalize,
min_frac_consensus=args.min_frac_consensus,
min_frac_signature=args.min_frac_signature,
seed=args.seed,
auc_threshold=args.auc_threshold,
log=log
)

if OBSM_SIGN_ENRICHMENT_KEY not in ad_cell_topic.obsm:
if log is not None:
log.info(
f"Storing AUCell matrix in .obsm[{OBSM_SIGN_ENRICHMENT_KEY}] "
f"and signature names in .uns[{UNS_SIGN_ENRICHMENT_KEY}]."
)
ad_cell_topic.obsm[OBSM_SIGN_ENRICHMENT_KEY] = auc_values
ad_cell_topic.uns[UNS_SIGN_ENRICHMENT_KEY] = list(signatures.keys())
else:
if log is not None:
log.info(
f"Concatenating new AUCell matrix with .obsm[{OBSM_SIGN_ENRICHMENT_KEY}] "
f"and extending signature names in .uns[{UNS_SIGN_ENRICHMENT_KEY}]."
)
ad_cell_topic.obsm[OBSM_SIGN_ENRICHMENT_KEY] = np.concatenate(
(ad_cell_topic.obsm[OBSM_SIGN_ENRICHMENT_KEY], auc_values),
axis=1
)
assert isinstance(ad_cell_topic.uns[UNS_SIGN_ENRICHMENT_KEY], list), \
"cell topic anndata contains AUCell values but no signature names in " + \
f".uns[{UNS_SIGN_ENRICHMENT_KEY}]."
ad_cell_topic.uns[UNS_SIGN_ENRICHMENT_KEY].extend(list(signatures.keys()))

if log is not None:
log.info(f"Saving cell topic anndata to: {args.out_file}")
ad_cell_topic.write(args.out_file)

def add_parser_signature_enrichment(
subparser: _SubParsersAction
):
parser_signature_enrichment = subparser.add_parser(
"signature_enrichment",
help="Run signature enrichment using AUCell."
)
parser_signature_enrichment.set_defaults(func=run_signature_enrichment)

parser_signature_enrichment.add_argument(
"-r",
"--region-topic",
dest="region_topic",
type=str,
required=True,
help="Path to region-topic AnnData (.h5ad) file."
)
parser_signature_enrichment.add_argument(
"-c",
"--cell-topic",
dest="cell_topic",
type=str,
required=True,
help="Path to cell-topic AnnData (.h5ad) file."
)
parser_signature_enrichment.add_argument(
"-n",
"--signature-names",
dest="signature_names",
nargs="+",
type=str,
required=True,
help="List of signature names (space-separated, same order as --signatures)."
)
parser_signature_enrichment.add_argument(
"-s",
"--signatures",
dest="signatures",
nargs="+",
type=str,
required=True,
help="List of BED files for signatures (space-separated, same order as --signature-names)."
)
parser_signature_enrichment.add_argument(
"-o",
"--out-file",
dest="out_file",
type=str,
required=True,
help="Output file for updated cell-topic AnnData (.h5ad)."
)
parser_signature_enrichment.add_argument(
"--chunk-size",
dest="chunk_size",
type=int,
default=1000,
help="Number of cells to process at once. Default: 1000."
)
parser_signature_enrichment.add_argument(
"--normalize",
dest="normalize",
action="store_true",
help="Normalize AUC values to a maximum of 1.0 per regulon."
)
parser_signature_enrichment.add_argument(
"--min-frac-consensus",
dest="min_frac_consensus",
type=float,
default=0.5,
help="Minimal fractional overlap of signature and consensus peak relative to consensus peak. Default: 0.5."
)
parser_signature_enrichment.add_argument(
"--min-frac-signature",
dest="min_frac_signature",
type=float,
default=0.5,
help="Minimal fractional overlap of signature and consensus peak relative to signature. Default: 0.5."
)
parser_signature_enrichment.add_argument(
"--seed",
dest="seed",
type=int,
default=12345,
help="Random seed for ranking. Default: 12345."
)
parser_signature_enrichment.add_argument(
"--auc-threshold",
dest="auc_threshold",
type=float,
default=0.05,
help="Fraction of ranked genome for AUC calculation. Default: 0.05."
)
parser_signature_enrichment.add_argument(
"-v",
"--verbose",
dest="verbose",
action="store_true",
help="Enable verbose logging."
)
Loading