Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@
* M. Hasan Celik <celik.hasan@external.gene.com>
* Carlo de Donno <carlo.de_donno@roche.com>
* Johannes Hingerl <hingerl.johannes@gene.com>
* Liudeng Zhang <zhangliudeng@gmail.com>
31 changes: 30 additions & 1 deletion src/grelu/lightning/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1120,7 +1120,7 @@ def input_intervals_to_output_bins(
Returns:start and end indices of the output bins corresponding
to each input interval.
"""
return pd.DataFrame(
result = pd.DataFrame(
{
"start": intervals.start.apply(
self.input_coord_to_output_bin, args=(start_pos,)
Expand All @@ -1132,6 +1132,35 @@ def input_intervals_to_output_bins(
}
)

# Check for intervals in the cropped-out region
issues = []
n_negative = (result["start"] < 0).sum()
if n_negative > 0:
issues.append(
f"{n_negative} interval(s) have start positions in the cropped-out "
"region, resulting in negative output bin indices."
)
seq_len = self.data_params.get("train", {}).get("seq_len")
if seq_len is not None:
bin_size = self.data_params["train"]["bin_size"]
crop_len = self.model_params["crop_len"]
max_bins = seq_len // bin_size - 2 * crop_len
n_over = (result["end"] > max_bins).sum()
if n_over > 0:
issues.append(
f"{n_over} interval(s) have end positions beyond the cropped-out "
f"region, resulting in output bin indices exceeding the maximum "
f"({max_bins})."
)
if issues:
warnings.warn(
" ".join(issues)
+ " Consider removing these intervals or manually setting their"
" boundaries to 0 or the maximum number of output bins."
)

return result


class LightningModelEnsemble(pl.LightningModule):
"""
Expand Down
15 changes: 15 additions & 0 deletions tests/test_lightning.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

import numpy as np
import pandas as pd
import torch
Expand Down Expand Up @@ -458,3 +460,16 @@ def test_input_intervals_to_output_bins():
assert output.equals(pd.DataFrame({"start": [3], "end": [12]}))
output = crop_bin_model.input_intervals_to_output_bins(intervals=intervals)
assert output.equals(pd.DataFrame({"start": [0], "end": [5]}))


def test_input_intervals_to_output_bins_cropped_warning():
"""Warn when intervals fall in the cropped-out region."""
# crop_model has crop_len=3, bin_size=1, so coords 0-2 produce negative bins
intervals = pd.DataFrame({"chrom": ["chr1"], "start": [0], "end": [2]})
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
output = crop_model.input_intervals_to_output_bins(intervals=intervals)
assert len(w) == 1
assert "cropped-out region" in str(w[0].message)
# Values are still returned (warning, not error)
assert (output["start"] < 0).any()
Loading