Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions src/nhp/model/model_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from nhp.model.activity_resampling import ActivityResampling

ModelRunResult = tuple[dict[str, pd.Series], pd.Series | None]
ModelRunResult = dict[str, pd.Series]

if TYPE_CHECKING:
from nhp.model.model import Model
Expand Down Expand Up @@ -161,7 +161,7 @@ def get_aggregate_results(self) -> ModelRunResult:
Can also be used to aggregate the baseline data by passing in the raw data.

Returns:
A tuple containing a dictionary of results, and the step counts.
A dictionary containing the aggregated results.
"""
aggregations = self.model.aggregate(self)

Expand All @@ -171,7 +171,11 @@ def get_aggregate_results(self) -> ModelRunResult:
avoided_activity_agg, "sex", "age_group"
)

return aggregations, self.get_step_counts()
step_counts = self.get_step_counts()
if step_counts is not None:
aggregations["step_counts"] = step_counts

return aggregations

def get_step_counts(self) -> pd.Series | None:
"""Get the step counts of a model run."""
Expand Down
61 changes: 25 additions & 36 deletions src/nhp/model/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,24 @@ def _complete_model_runs(
)


def _check_for_duplicates(res: pd.Series) -> pd.Series:
"""Check for duplicates in a pandas Series.

If there are duplicate index rows, this raises an AssertionError.

Args:
res (pd.Series): The pandas Series to check for duplicates.

Returns:
pd.Series: The original pandas Series if no duplicates are found.
"""
if res.index.has_duplicates:
duplicates = res[res.index.duplicated(keep=False)]
raise AssertionError(f"Duplicate rows found in {res.name} aggregation: {duplicates}")

return res


def _combine_model_results(
results: list[list[ModelRunResult]],
) -> dict[str, pd.DataFrame]:
Expand All @@ -61,16 +79,19 @@ def _combine_model_results(
Returns:
Dictionary containing the combined model results.
"""
aggregations = sorted(list({k for r in results for v, _ in r for k in v.keys()}))
aggregations = sorted(list({k for r in results for v in r for k in v.keys()}))

model_runs = len(results[0]) - 1

return {
k: _complete_model_runs(
[
aggregated_results[k].reset_index().assign(model_run=i)
# there was an issue historically (#288) where some of the aggregations had
# duplicated rows. this doesn't appear to be an issue now, but to be safe we check
# and raise an error if there are duplicates.
_check_for_duplicates(aggregated_results[k]).reset_index().assign(model_run=i)
for r in results
for (i, (aggregated_results, _step_counts)) in enumerate(r)
for (i, aggregated_results) in enumerate(r)
if k in aggregated_results
],
model_runs,
Expand All @@ -79,37 +100,6 @@ def _combine_model_results(
}


def _combine_step_counts(results: list) -> pd.DataFrame:
"""Combine the step counts of the monte carlo runs.

Takes as input a list of lists, where the outer list contains an item for inpatients,
outpatients and a&e runs, and the inner list contains the results of the monte carlo runs.

Args:
results: A list containing the model results.

Returns:
DataFrame containing the model step counts.
"""
model_runs = len(results[0]) - 1
return _complete_model_runs(
[
step_counts
# TODO: handle the case of daycase conversion, it's duplicating values
# need to figure out exactly why, but this masks the issue for now
.groupby(step_counts.index.names)
.sum()
.reset_index()
.assign(model_run=i)
for r in results
for i, (_aggregated_results, step_counts) in enumerate(r)
if i > 0
],
model_runs,
include_baseline=False,
)


def generate_results_json(
results: dict[str, pd.DataFrame],
params: dict,
Expand Down Expand Up @@ -325,7 +315,6 @@ def combine_results(
logging.info(" * starting to combine results")

combined_results = _combine_model_results(results)
combined_step_counts = _combine_step_counts(results)

# TODO: this is a bit of a hack, but we need to patch the converted SDEC activity
# because inpatients activity is aggregated differently to a&e, the a&e aggregations will be
Expand All @@ -334,4 +323,4 @@ def combine_results(
_patch_converted_sdec_activity(combined_results, "attendance_category", "1")

logging.info(" * finished combining results")
return {**combined_results, "step_counts": combined_step_counts}
return combined_results
5 changes: 3 additions & 2 deletions src/nhp/model/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,12 @@ def run_single_model_run(
print("running model... ", end="")
m_run = timeit(ModelIteration, model, model_run)
print("aggregating results... ", end="")
model_results, step_counts = timeit(m_run.get_aggregate_results)
model_results = timeit(m_run.get_aggregate_results)
print()
print("change factors:")
step_counts = (
step_counts.reset_index()
model_results["step_counts"]
.reset_index()
.groupby(["change_factor", "measure"], as_index=False)["value"]
.sum()
.pivot_table(index="change_factor", columns="measure")
Expand Down
4 changes: 1 addition & 3 deletions tests/integration/nhp/model/test_single_model_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,8 @@ def model_single_run_results(data_dir):
for k, model_class in [("ip", InpatientsModel), ("op", OutpatientsModel), ("aae", AaEModel)]:
model = model_class(params, data)
m_run = ModelIteration(model, 1)
model_results, step_counts = m_run.get_aggregate_results()
assert step_counts is not None
model_results = m_run.get_aggregate_results()
results[k] = model_results
results[k]["step_counts"] = step_counts
return results


Expand Down
34 changes: 27 additions & 7 deletions tests/unit/nhp/model/test_model_iteration.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,13 +233,11 @@ def test_get_aggregate_results(mock_model_iteration):
actual = mr_mock.get_aggregate_results()

# assert
assert actual == (
{
"default": "aggregated_results",
"avoided_activity": "agg",
},
"step_counts",
)
assert actual == {
"default": "aggregated_results",
"avoided_activity": "agg",
"step_counts": "step_counts",
}
mr_mock.model.aggregate.assert_called_once_with(mr_mock)
mr_mock.get_step_counts.assert_called_once_with()
mr_mock.model.process_results.assert_called_once_with(mr_mock.avoided_activity)
Expand All @@ -266,6 +264,28 @@ def test_get_aggregate_results_avoided_activity_empty_dataframe(mock_model_itera
mr_mock.model.process_results.assert_not_called()


@pytest.mark.unit
def test_get_aggregate_results_step_counts_empty(mock_model_iteration):
"""Test the get_aggregate_results method when step counts is None."""
# arrange
mr_mock = mock_model_iteration

mr_mock.model.aggregate.return_value = {"default": "aggregated_results"}
mr_mock.get_step_counts = Mock(return_value=None)
mr_mock.model.get_agg.return_value = "agg"
mr_mock.avoided_activity = pd.DataFrame({"x": ["avoided_activity"]})
mr_mock.model.process_results = Mock(return_value="avoided_activity_agg")

# act
actual = mr_mock.get_aggregate_results()

# assert
assert actual == {
"default": "aggregated_results",
"avoided_activity": "agg",
}


@pytest.mark.unit
def test_get_step_counts_empty(mock_model_iteration):
"""Test the get_step_counts method when step counts is empty."""
Expand Down
115 changes: 50 additions & 65 deletions tests/unit/nhp/model/test_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from nhp.model.results import (
_add_metadata_to_dataframe,
_check_for_duplicates,
_combine_model_results,
_combine_step_counts,
_complete_model_runs,
_patch_converted_sdec_activity,
_save_params_file,
Expand Down Expand Up @@ -76,86 +76,72 @@ def test_complete_model_runs_exclude_baseline():


@pytest.mark.unit
def test_combine_model_results(mocker):
def test_check_for_duplicates_no_duplicates():
# arrange
def r(*args):
return pd.Series(args, name="value")

results = [
[
({"default": r(1, 2, 3), "other": r(4, 5, 6)}, None),
({"default": r(4, 5, 6), "other": r(1, 2, 3)}, None),
],
[
(
{
"default": r(7),
},
None,
),
(
{
"default": r(8),
},
None,
),
],
]

cmr_mock = mocker.patch("nhp.model.results._complete_model_runs", return_value="cmr")
df = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 3, 4, 5])

# act
actual = _combine_model_results(results)
actual = _check_for_duplicates(df)

# assert
assert actual == {"default": "cmr", "other": "cmr"}

assert [i["value"].sum() for i in cmr_mock.call_args_list[0][0][0]] == [6, 15, 7, 8]
assert cmr_mock.call_args_list[0][0][1] == 1
assert [i["value"].sum() for i in cmr_mock.call_args_list[1][0][0]] == [15, 6]
assert cmr_mock.call_args_list[0][0][1] == 1
assert actual.equals(df)


@pytest.mark.unit
def test_combine_step_counts(mocker):
def test_check_for_duplicates_with_duplicates():
# arrange
df = pd.DataFrame(
[
{"a": 1, "b": 1, "value": 1},
{"a": 1, "b": 2, "value": 2},
{"a": 2, "b": 1, "value": 3},
{"a": 2, "b": 2, "value": 4},
]
).set_index(["a", "b"])
df = pd.Series([1, 2, 3, 4, 5], index=[1, 2, 2, 4, 5], name="value")

# act & assert
with pytest.raises(AssertionError, match=r"Duplicate rows found in value aggregation:"):
_check_for_duplicates(df)


@pytest.mark.unit
def test_combine_model_results(mocker):
# arrange
results = [
[
(None, df),
(None, df),
{
"default": pd.Series([1, 2, 3], name="value"),
"other": pd.Series([4, 5, 6], name="value"),
"step_counts": pd.Series([10, 20], name="value"),
},
{
"default": pd.Series([4, 5, 6], name="value"),
"other": pd.Series([1, 2, 3], name="value"),
"step_counts": pd.Series([30, 40], name="value"),
},
],
[
(None, df),
(None, df),
{
"default": pd.Series([7], name="value"),
"step_counts": pd.Series([50], name="value"),
},
{
"default": pd.Series([8], name="value"),
"step_counts": pd.Series([60], name="value"),
},
],
]

expected = {
"a": [1, 1, 2, 2],
"b": [1, 2, 1, 2],
"value": [1, 2, 3, 4],
"model_run": [1, 1, 1, 1],
}

cmr_mock = mocker.patch("nhp.model.results._complete_model_runs", return_value="cmr")
cfd_mock = mocker.patch("nhp.model.results._check_for_duplicates", side_effect=lambda df: df)

# act
actual = _combine_step_counts(results)
actual = _combine_model_results(results)

# assert
assert actual == "cmr"
assert all(i.to_dict("list") == expected for i in cmr_mock.call_args[0][0])
assert cmr_mock.call_args[0][1] == 1
assert cmr_mock.call_args[1] == {"include_baseline": False}
assert actual == {"default": "cmr", "other": "cmr", "step_counts": "cmr"}

assert [i["value"].sum() for i in cmr_mock.call_args_list[0][0][0]] == [6, 15, 7, 8]
assert cmr_mock.call_args_list[0][0][1] == 1
assert [i["value"].sum() for i in cmr_mock.call_args_list[1][0][0]] == [15, 6]
assert [i["value"].sum() for i in cmr_mock.call_args_list[2][0][0]] == [30, 70, 50, 60]
assert cmr_mock.call_args_list[1][0][1] == 1
assert cmr_mock.call_args_list[2][0][1] == 1

assert cfd_mock.call_count == 10


@pytest.mark.unit
Expand Down Expand Up @@ -281,22 +267,21 @@ def test_generate_results_json(mocker):
@pytest.mark.unit
def test_combine_results(mocker):
# arrange
ma = mocker.patch("nhp.model.results._combine_model_results", return_value={"a": "a", "b": "b"})
mb = mocker.patch("nhp.model.results._combine_step_counts", return_value="combined_step_counts")
combined = {"a": "a", "b": "b", "step_counts": "combined_step_counts"}
ma = mocker.patch("nhp.model.results._combine_model_results", return_value=combined)
ms = mocker.patch("nhp.model.results._patch_converted_sdec_activity")

# act
actual = combine_results("results") # type: ignore

# assert
assert actual == {"a": "a", "b": "b", "step_counts": "combined_step_counts"}
assert actual == combined
ma.assert_called_once_with("results")
mb.assert_called_once_with("results")

assert ms.call_count == 2
assert list(ms.call_args_list) == [
call({"a": "a", "b": "b"}, "acuity", "standard"),
call({"a": "a", "b": "b"}, "attendance_category", "1"),
call(combined, "acuity", "standard"),
call(combined, "attendance_category", "1"),
]


Expand Down
Loading
Loading