Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ def build(self):
dataset_location - Path to file
dataset_name - Name of the dataset
dataset_label - Label for the dataset
is_ap - Whether the domain is an AP domain
ap_suffix - The 2-character suffix from AP domains
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
return self.data_service.get_dataset_metadata(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ def build(self):
dataset_name - Name of the dataset
dataset_size - File size
dataset_domain - Domain of the dataset
is_ap - Whether the domain is an AP domain
ap_suffix - The 2-character suffix from AP domains
define_dataset_class - dataset class
define_dataset_domain - dataset domain from define
define_dataset_is_non_standard - whether a dataset is a standard
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ def build(self):
dataset_name - Name of the dataset
dataset_label - Label for the dataset
dataset_domain - Domain of the dataset
is_ap - Whether the domain is an AP domain
ap_suffix - The 2-character suffix from AP domains

Columns from Define XML:
define_dataset_name - dataset name from define_xml
Expand Down Expand Up @@ -85,13 +87,24 @@ def _get_define_xml_dataframe(self):
return self.dataset_implementation(columns=define_col_order)
return self.dataset_implementation.from_records(define_metadata)

def _ensure_required_columns(self, dataset_df, dataset_col_order):
if "dataset_size" not in dataset_df.columns:
dataset_df["dataset_size"] = None
if "is_ap" not in dataset_df.columns:
dataset_df["is_ap"] = False
if "ap_suffix" not in dataset_df.columns:
dataset_df["ap_suffix"] = ""
return self.dataset_implementation(dataset_df[dataset_col_order])

def _get_dataset_dataframe(self):
dataset_col_order = [
"dataset_size",
"dataset_location",
"dataset_name",
"dataset_label",
"dataset_domain",
"is_ap",
"ap_suffix",
]

if len(self.datasets) == 0:
Expand Down Expand Up @@ -126,7 +139,7 @@ def _get_dataset_dataframe(self):
"domain": "dataset_name",
}
dataset_df = datasets.rename(columns=data_col_mapping)
if "dataset_size" not in dataset_df.columns:
dataset_df["dataset_size"] = None
dataset_df = self.dataset_implementation(dataset_df[dataset_col_order])
dataset_df = self._ensure_required_columns(
dataset_df, dataset_col_order
)
return dataset_df
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ def build(self):
- dataset_location - Path to file
- dataset_name - Name of the dataset
- dataset_label - Label for the dataset
- is_ap - Whether the domain is an AP domain
- ap_suffix - The 2-character suffix from AP domains
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
dataset_metadata = self.data_service.get_dataset_metadata(
Expand Down
59 changes: 44 additions & 15 deletions cdisc_rules_engine/models/sdtm_dataset_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,21 +12,21 @@ class SDTMDatasetMetadata(DatasetMetadata):

"""
Examples
| name | unsplit_name | is_supp | domain | rdomain |
| -------- | ------------ | ------- | ------ | ------- |
| QS | QS | False | QS | None |
| QSX | QS | False | QS | None |
| QSXX | QS | False | QS | None |
| SUPPQS | SUPPQS | True | None | QS |
| SUPPQSX | SUPPQS | True | None | QS |
| SUPPQSXX | SUPPQS | True | None | QS |
| APQS | APQS | False | APQS | None |
| APQSX | APQS | False | APQS | None |
| APQSXX | APQS | False | APQS | None |
| SQAPQS | SQAPQS | True | None | APQS |
| SQAPQSX | SQAPQS | True | None | APQS |
| SQAPQSXX | SQAPQS | True | None | APQS |
| RELREC | RELREC | False | None | None |
| name | unsplit_name | is_supp | domain | rdomain | is_ap | ap_suffix |
| -------- | ------------ | ------- | ------ | ------- | ----- | --------- |
| QS | QS | False | QS | None | False | |
| QSX | QS | False | QS | None | False | |
| QSXX | QS | False | QS | None | False | |
| SUPPQS | SUPPQS | True | None | QS | False | |
| SUPPQSX | SUPPQS | True | None | QS | False | |
| SUPPQSXX | SUPPQS | True | None | QS | False | |
| APQS | APQS | False | APQS | None | True | QS |
| APQSX | APQS | False | APQS | None | True | QS |
| APQSXX | APQS | False | APQS | None | True | QS |
| SQAPQS | SQAPQS | True | None | APQS | True | |
| SQAPQSX | SQAPQS | True | None | APQS | True | |
| SQAPQSXX | SQAPQS | True | None | APQS | True | |
| RELREC | RELREC | False | None | None | False | |
"""

@property
Expand Down Expand Up @@ -57,3 +57,32 @@ def unsplit_name(self) -> str:
@property
def is_split(self) -> bool:
return self.name != self.unsplit_name

@property
def is_ap(self) -> bool:
"""
Returns true if APID variable exists in first_record for non-supp datasets,
or if rdomain is exactly 4 characters and starts with AP for supp datasets.
"""
if self.is_supp:
return (
isinstance(self.rdomain, str)
and len(self.rdomain) == 4
and self.rdomain.startswith("AP")
)
first_record = self.first_record or {}
return "APID" in first_record

@property
def ap_suffix(self) -> str:
"""
Returns the 2-character suffix (characters 3-4) from AP domains.
Returns empty string if not an AP domain or for supp datasets.
"""
if not self.is_ap:
return ""
if self.is_supp:
return ""
if isinstance(self.domain, str) and len(self.domain) >= 4:
return self.domain[2:4]
return ""
1 change: 1 addition & 0 deletions cdisc_rules_engine/operations/extract_metadata.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pandas as pd

from cdisc_rules_engine.operations.base_operation import BaseOperation


Expand Down
2 changes: 2 additions & 0 deletions cdisc_rules_engine/operations/operations_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
from cdisc_rules_engine.operations.mean import Mean
from cdisc_rules_engine.operations.domain_is_custom import DomainIsCustom
from cdisc_rules_engine.operations.domain_label import DomainLabel
from cdisc_rules_engine.operations.standard_domains import StandardDomains
from cdisc_rules_engine.operations.meddra_code_references_validator import (
MedDRACodeReferencesValidator,
)
Expand Down Expand Up @@ -121,6 +122,7 @@ class OperationsFactory(FactoryInterface):
"variable_is_null": VariableIsNull,
"domain_is_custom": DomainIsCustom,
"domain_label": DomainLabel,
"standard_domains": StandardDomains,
"required_variables": RequiredVariables,
"split_by": SplitBy,
"expected_variables": ExpectedVariables,
Expand Down
15 changes: 15 additions & 0 deletions cdisc_rules_engine/operations/standard_domains.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from cdisc_rules_engine.operations.base_operation import BaseOperation


class StandardDomains(BaseOperation):
def _execute_operation(self):
standard_data: dict = self.library_metadata.standard_metadata
domains = standard_data.get("domains", set())
if isinstance(domains, (set, list, tuple)):
return sorted(list(domains))
elif domains is None:
return []
raise TypeError(
f"Invalid type for 'domains' in standard_metadata: "
f"expected set, list, or tuple, got {type(domains).__name__}"
)
26 changes: 14 additions & 12 deletions cdisc_rules_engine/services/data_services/base_data_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,9 @@ def get_dataset_metadata(
"dataset_name": [dataset_metadata.name],
"dataset_label": [dataset_metadata.label],
"record_count": [dataset_metadata.record_count],
"is_ap": [dataset_metadata.is_ap],
"ap_suffix": [dataset_metadata.ap_suffix],
"domain": [dataset_metadata.domain],
}
return self.dataset_implementation.from_dict(metadata_to_return)

Expand All @@ -243,38 +246,37 @@ def _handle_special_cases(
if self._contains_topic_variable(dataset, dataset_metadata.domain, "OBJ"):
return FINDINGS_ABOUT
return FINDINGS
if self._is_associated_persons(dataset):
if dataset_metadata.is_ap:
return self._get_associated_persons_inherit_class(
file_path, datasets, dataset_metadata.domain
file_path, datasets, dataset_metadata
)
return None

def _is_associated_persons(self, dataset) -> bool:
"""
Check if AP-- domain.
"""
return "APID" in dataset

def _get_associated_persons_inherit_class(
self, file_path, datasets: Iterable[SDTMDatasetMetadata], domain: str
self,
file_path,
datasets: Iterable[SDTMDatasetMetadata],
dataset_metadata: SDTMDatasetMetadata,
):
"""
Check with inherit class AP-- belongs to.
"""
ap_suffix = domain[2:]
ap_suffix = dataset_metadata.ap_suffix
if not ap_suffix:
return None
directory_path = get_directory_path(file_path)
if len(datasets) > 1:
domain_details: SDTMDatasetMetadata = search_in_list_of_dicts(
datasets, lambda item: item.domain == ap_suffix
)
if domain_details:
if domain_details.is_ap:
raise ValueError("Nested Associated Persons domain reference")
file_name = domain_details.filename
new_file_path = os.path.join(directory_path, file_name)
new_domain_dataset = self.get_dataset(dataset_name=new_file_path)
else:
raise ValueError("Filename for domain doesn't exist")
if self._is_associated_persons(new_domain_dataset):
raise ValueError("Nested Associated Persons domain reference")
return self.get_dataset_class(
new_domain_dataset,
new_file_path,
Expand Down
8 changes: 1 addition & 7 deletions cdisc_rules_engine/utilities/rule_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@
from cdisc_rules_engine.utilities.utils import (
get_directory_path,
get_operations_cache_key,
is_ap_domain,
search_in_list_of_dicts,
get_dataset_name_from_details,
)
Expand Down Expand Up @@ -182,12 +181,7 @@ def _domain_matched_ap_or_supp(
supp_ap_domains.update({f"{AP_DOMAIN}--", f"{APFA_DOMAIN}--"})

return any(set(domains_to_check).intersection(supp_ap_domains)) and (
dataset_metadata.is_supp
or is_ap_domain(
dataset_metadata.domain
or dataset_metadata.rdomain
or dataset_metadata.name
)
dataset_metadata.is_supp or dataset_metadata.is_ap
)

def rule_applies_to_data_structure(
Expand Down
7 changes: 7 additions & 0 deletions resources/schema/Operations.json
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,13 @@
"required": ["id", "operator"],
"type": "object"
},
{
"properties": {
"operator": { "const": "standard_domains" }
},
"required": ["id", "operator"],
"type": "object"
},
{
"properties": {
"operator": {
Expand Down
45 changes: 45 additions & 0 deletions resources/schema/Operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,28 @@ Output
Laboratory Test Results
```

### standard_domains

Returns a list of valid SDTM domain names from the standard metadata. This can be used to compare extracted suffixes from DOMAIN values or dataset names.

Input

Product: sdtmig

Version: 3-4

```yaml
Operations:
- operator: standard_domains
id: $valid_domain_names
```

Output

```
["AE", "CM", "DM", "FA", "LB", "QS", ...]
```

### extract_metadata

Returns the requested dataset level metadata value for the current dataset. Possible name values are:
Expand All @@ -493,6 +515,9 @@ Returns the requested dataset level metadata value for the current dataset. Poss
- dataset_location
- dataset_name
- dataset_label
- domain
- is_ap
- ap_suffix

Example

Expand All @@ -512,6 +537,26 @@ Output:
Laboratory Test Results
```

Example: ap_suffix

Extracts the domain suffix (characters 3-4) from AP-related domains. For example, "FA" from "APFA" DOMAIN value.

Input:

Target domain: APFA

```yaml
- name: ap_suffix
operator: extract_metadata
id: $ap_suffix
```

Output:

```
FA
```

## IG & Model Variable Operations

Operations for working with Implementation Guide and model variable metadata.
Expand Down
Loading
Loading