Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@ docs/source/generated
# gcloud cli
google-cloud-cli-*
google-cloud-sdk
.venv/
40 changes: 40 additions & 0 deletions malariagen_data/anopheles.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import plotly.express as px # type: ignore
import plotly.graph_objects as go # type: ignore
from numpydoc_decorator import doc # type: ignore
from .util import parse_single_region


from .anoph import (
Expand Down Expand Up @@ -553,6 +554,45 @@ def _sample_count_het(
)

return sample_id, sample_set, windows, counts

@check_types
@doc(
summary="Return windowed heterozygosity for a single sample over a genome region.",
)
def heterozygosity(
self,
sample: base_params.sample,
region: base_params.region,
window_size: het_params.window_size = het_params.window_size_default,
site_mask: Optional[base_params.site_mask] = base_params.DEFAULT,
sample_set: Optional[base_params.sample_set] = None,
chunks: base_params.chunks = base_params.native_chunks,
inline_array: base_params.inline_array = base_params.inline_array_default,
) -> Tuple[np.ndarray, np.ndarray]:
"""
Returns
-------
windows : np.ndarray of shape (n_windows, 2)
Start and end positions of each window.
counts : np.ndarray of shape (n_windows,)
Number of heterozygous sites in each window.
"""
# Ensure region object
resolved_region: Region = parse_single_region(self, region)
del region

# Delegate to the private helper
_, _, windows, counts = self._sample_count_het(
sample=sample,
region=resolved_region,
site_mask=site_mask,
window_size=window_size,
sample_set=sample_set,
chunks=chunks,
inline_array=inline_array,
)

return windows, counts

@check_types
@doc(
Expand Down
43 changes: 43 additions & 0 deletions tests/anoph/test_heterozygosity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import numpy as np
import pytest
import malariagen_data
from malariagen_data.anopheles import AnophelesDataResource
from malariagen_data.util import Region

@pytest.fixture
def fake_windows_counts():
# pretend we have two windows
windows = np.array([[0, 10], [10, 20]])
counts = np.array([3, 7])
return windows, counts

def test_heterozygosity_wraps_sample_count_het(monkeypatch, fake_windows_counts):
# Define a dummy logger with a debug method
class DummyLogger:
def debug(self, *args, **kwargs):
pass

# monkey-patch __init__ to set up a dummy _log attribute
monkeypatch.setattr(
AnophelesDataResource,
"__init__",
lambda self, *args, **kwargs: setattr(self, "_log", DummyLogger())
)

# monkey-patch the private helper to return (sid, sset, windows, counts)
def fake_sample_count_het(self, sample, region, site_mask, window_size, sample_set, chunks, inline_array):
return "S1", "setA", fake_windows_counts[0], fake_windows_counts[1]

monkeypatch.setattr(AnophelesDataResource, "_sample_count_het", fake_sample_count_het)

resource = AnophelesDataResource()
# call for public method
windows, counts = resource.heterozygosity(
sample="any_sample",
region=Region(contig="2L", start=100, end=200),
window_size=10,
)

# assert that we got exactly the arrays the fake helper returned
assert np.array_equal(windows, fake_windows_counts[0])
assert np.array_equal(counts, fake_windows_counts[1])