Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions cdisc_rules_engine/models/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,10 @@ def generate_targeted_error_object( # noqa: C901
targets,
)

grouping_variables = [
x.replace("--", self.dataset_metadata.wildcard_replacement or "")
for x in grouping_variables
]
missing_grouping_vars = [
var for var in grouping_variables if var not in data.columns
]
Expand Down
196 changes: 156 additions & 40 deletions tests/unit/test_actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,11 +105,159 @@ def test_targeted_error_object_with_dataset_sensitivity():
assert error["value"] == {"TEST": 1, "MISSING": "Not in dataset"}


def test_targeted_error_object_with_group_sensitivity():
@pytest.mark.parametrize(
"grouping_variables, expected_column, df, expected_groups",
[
# Case 1: explicit column name, no wildcard
(
["SETCD"],
"SETCD",
pd.DataFrame(
{
"TXPARMCD": [
"PLANSUB",
"PLANSUB",
"PLANSUB",
"OTHER",
"OTHER",
"OTHER",
"OTHER",
"VALUE",
"VALUE",
],
"SETCD": [
"SET1",
"SET1",
"SET1",
"SET2",
"SET2",
"SET2",
"SET2",
"SET3",
"SET3",
],
"USUBJID": [
"001",
"001",
"001",
"002",
"002",
"002",
"002",
"003",
"003",
],
}
),
["SET1", "SET2", "SET3"],
),
# Case 2: wildcard "--PARMCD" → "TXPARMCD", grouped only on TXPARMCD
# Each distinct TXPARMCD value should produce one value
(
["--PARMCD"],
"TXPARMCD",
pd.DataFrame(
{
"TXPARMCD": [
"PLANSUB",
"PLANSUB",
"PLANSUB",
"OTHER",
"OTHER",
"OTHER",
"OTHER",
"VALUE",
"VALUE",
],
"SETCD": [
"SET1",
"SET1",
"SET1",
"SET2",
"SET2",
"SET2",
"SET2",
"SET3",
"SET3",
],
"USUBJID": [
"001",
"001",
"001",
"002",
"002",
"002",
"002",
"003",
"003",
],
}
),
["PLANSUB", "OTHER", "VALUE"],
),
# Case 3: grouped on both "--PARMCD" (→ "TXPARMCD") and "SETCD"
# TXPARMCD varies within SETCD groups to prove both columns contribute to grouping:
# SET1 has PLANSUB + OTHER → 2 groups
# SET2 has OTHER only → 1 group
# SET3 has VALUE + PLANSUB → 2 groups
# Total: 5 distinct (TXPARMCD, SETCD) combinations
(
["--PARMCD", "SETCD"],
"SETCD",
pd.DataFrame(
{
"TXPARMCD": [
"PLANSUB",
"PLANSUB",
"OTHER",
"OTHER",
"OTHER",
"OTHER",
"OTHER",
"VALUE",
"VALUE",
"PLANSUB",
"PLANSUB",
],
"SETCD": [
"SET1",
"SET1",
"SET1",
"SET1",
"SET2",
"SET2",
"SET2",
"SET3",
"SET3",
"SET3",
"SET3",
],
"USUBJID": [
"001",
"001",
"001",
"001",
"002",
"002",
"002",
"003",
"003",
"003",
"003",
],
}
),
["SET1", "SET1", "SET2", "SET3", "SET3"], # two SET1s and two SET3s
),
],
)
def test_targeted_error_object_with_group_sensitivity(
grouping_variables, expected_column, df, expected_groups
):
dummy_rule = {
"core_id": "FB4607",
"sensitivity": "Group",
"grouping_variables": ["SETCD"],
"grouping_variables": grouping_variables,
"conditions": {
"all": [
{
Expand All @@ -128,35 +276,6 @@ def test_targeted_error_object_with_group_sensitivity():
"output_variables": ["SETCD", "TXPARMCD"],
}

# Create test data with 3 groups (SETCD), multiple records each
df = pd.DataFrame.from_dict(
{
"TXPARMCD": [
"PLANSUB",
"PLANSUB",
"PLANSUB",
"OTHER",
"OTHER",
"OTHER",
"OTHER",
"VALUE",
"VALUE",
],
"SETCD": [
"SET1",
"SET1",
"SET1",
"SET2",
"SET2",
"SET2",
"SET2",
"SET3",
"SET3",
],
"USUBJID": ["001", "001", "001", "002", "002", "002", "002", "003", "003"],
}
)

variable = DatasetVariable(PandasDataset(df))
dataset_metadata = SDTMDatasetMetadata(
first_record={"DOMAIN": "TX"}, filename="tx.xpt"
Expand All @@ -168,21 +287,18 @@ def test_targeted_error_object_with_group_sensitivity():
targets, df, "Missing required TXPARMCD value per set"
)

# Should have exactly 3 errors (one per SETCD group)
assert len(result.errors) == 3
assert len(result.errors) == len(expected_groups)

# Extract SETCD values from errors
setcd_values = [
error.to_representation()["value"]["SETCD"] for error in result.errors
]
assert sorted(setcd_values) == ["SET1", "SET2", "SET3"]
actual_values = sorted(
error.to_representation()["value"][expected_column] for error in result.errors
)
assert actual_values == sorted(expected_groups)

# Each error should have row information from the first record in its group
for error in result.errors:
error_repr = error.to_representation()
assert "row" in error_repr
assert "SETCD" in error_repr["value"]
assert "TXPARMCD" in error_repr["value"]
assert expected_column in error_repr["value"]


def test_group_sensitivity_missing_grouping_variables():
Expand Down
Loading