Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
c516cdf
before test fixes
gerrycampion Apr 10, 2026
5cabd75
Merge branch 'main' into 1672-get_dataset-incorrect-calls
gerrycampion Apr 11, 2026
ead3ac8
Fixed unit tests
gerrycampion Apr 14, 2026
4b5a081
more test fixes
gerrycampion Apr 14, 2026
16bee96
test suite fixes
gerrycampion Apr 14, 2026
c02661f
simplify dummy data service
gerrycampion Apr 15, 2026
a4f337c
fixed dataset name in reports
gerrycampion Apr 15, 2026
bc78160
regression report fixes
gerrycampion Apr 15, 2026
b6ae02c
fix rule editor test
gerrycampion Apr 15, 2026
039f678
removed more dataset path
gerrycampion Apr 15, 2026
85eef27
remove unused method
gerrycampion Apr 15, 2026
9b836b1
remove unnecessary dataset path params
gerrycampion Apr 16, 2026
6da3024
missed a dataset_path
gerrycampion Apr 16, 2026
5429014
remove extra datasets references
gerrycampion Apr 17, 2026
4de4e0c
Merge branch 'main' into 1672-get_dataset-incorrect-calls
gerrycampion Apr 17, 2026
befd6b9
fix merged test code
gerrycampion Apr 17, 2026
d2b13d9
refactor operation params
gerrycampion Apr 17, 2026
a494794
removed unneeded self.params.domain from operations
gerrycampion Apr 21, 2026
02a2457
Merge branch 'main' into 1672-get_dataset-incorrect-calls
gerrycampion Apr 21, 2026
a61a74d
more fixes for operations dataset metadata source
gerrycampion Apr 21, 2026
67188e6
Merge branch 'main' into 1672-get_dataset-incorrect-calls
gerrycampion Apr 21, 2026
6285fd3
fixed test_contents_library_variables_dataset_builder
gerrycampion Apr 21, 2026
cb83194
Merge branch 'main' into 1672-get_dataset-incorrect-calls
gerrycampion Apr 27, 2026
3bf3810
Merge branch 'main' into 1672-get_dataset-incorrect-calls
RamilCDISC Apr 27, 2026
6bfae7a
Merge branch 'main' into 1672-get_dataset-incorrect-calls
SFJohnson24 Apr 28, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions .github/test/selenium_test_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@
"DM": [
{
"executionStatus": "success",
"dataset": "dm.xpt",
"dataset": "DM",
"domain": "DM",
"variables": [],
"message": None,
Expand All @@ -194,7 +194,7 @@
"FA": [
{
"executionStatus": "issue reported",
"dataset": "fa.xpt",
"dataset": "FA",
"domain": "FA",
"variables": [
"$val_dy",
Expand All @@ -215,7 +215,7 @@
"RFSTDTC": "2012-11-15",
"FADTC": "2012-12-02",
},
"dataset": "fa.xpt",
"dataset": "FA",
"row": 1,
"USUBJID": "CDISC002",
"SEQ": 1,
Expand All @@ -227,7 +227,7 @@
"RFSTDTC": "2013-10-08",
"FADTC": "2013-10-12",
},
"dataset": "fa.xpt",
"dataset": "FA",
"row": 2,
"USUBJID": "CDISC004",
"SEQ": 2,
Expand All @@ -239,7 +239,7 @@
"RFSTDTC": "2013-01-05",
"FADTC": "2012-12-02",
},
"dataset": "fa.xpt",
"dataset": "FA",
"row": 4,
"USUBJID": "CDISC007",
"SEQ": 4,
Expand All @@ -251,7 +251,7 @@
"RFSTDTC": "2014-05-11",
"FADTC": "2014-12-02",
},
"dataset": "fa.xpt",
"dataset": "FA",
"row": 5,
"USUBJID": "CDISC008",
"SEQ": 5,
Expand All @@ -262,7 +262,7 @@
"IE": [
{
"executionStatus": "issue reported",
"dataset": "ie.xpt",
"dataset": "IE",
"domain": "IE",
"variables": [
"$val_dy",
Expand All @@ -283,7 +283,7 @@
"RFSTDTC": "2022-03-20",
"IEDTC": "2022-03-17",
},
"dataset": "ie.xpt",
"dataset": "IE",
"row": 1,
"USUBJID": "CDISC-TEST-001",
"SEQ": 1,
Expand All @@ -294,7 +294,7 @@
"LB": [
{
"executionStatus": "issue reported",
"dataset": "lb.xpt",
"dataset": "LB",
"domain": "LB",
"variables": [
"$val_dy",
Expand All @@ -315,7 +315,7 @@
"LBDTC": "2022-03-30",
"LBDY": 2,
},
"dataset": "lb.xpt",
"dataset": "LB",
"row": 1,
"USUBJID": "CDISC-TEST-001",
"SEQ": 1,
Expand Down
4 changes: 2 additions & 2 deletions cdisc_rules_engine/constants/metadata_columns.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
SOURCE_FILENAME = "source_filename"
SOURCE_DATASET_NAME = "source_dataset_name"
SOURCE_ROW_NUMBER = "source_row_number"
METADATA_COLUMNS = {SOURCE_FILENAME, SOURCE_ROW_NUMBER}
METADATA_COLUMNS = {SOURCE_DATASET_NAME, SOURCE_ROW_NUMBER}
51 changes: 31 additions & 20 deletions cdisc_rules_engine/dataset_builders/base_dataset_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from cdisc_rules_engine.utilities.sdtm_utilities import (
tag_source,
)
from typing import List, Iterable, Optional
from typing import List, Optional
from cdisc_rules_engine.utilities import sdtm_utilities
from cdisc_rules_engine.utilities.rule_processor import RuleProcessor
from cdisc_rules_engine.models.dataset.dataset_interface import DatasetInterface
Expand All @@ -25,8 +25,6 @@ def __init__(
cache_service,
rule_processor: RuleProcessor,
data_processor,
dataset_path,
datasets: Iterable[SDTMDatasetMetadata],
dataset_metadata: SDTMDatasetMetadata,
define_xml_path,
standard,
Expand All @@ -38,8 +36,6 @@ def __init__(
self.cache = cache_service
self.data_processor = data_processor
self.rule_processor = rule_processor
self.dataset_path = dataset_path
self.datasets = datasets
self.dataset_metadata = dataset_metadata
self.rule = rule
self.define_xml_path = define_xml_path
Expand All @@ -56,18 +52,20 @@ def build(self) -> DatasetInterface:
"""
pass

def build_split_datasets(self, dataset_name, **kwargs) -> DatasetInterface:
def build_split_datasets(self, dataset_name: str, **kwargs) -> DatasetInterface:
"""
Returns correct dataframe to operate on.
Default implementation that temporarily sets dataset_path to dataset_name and calls build().
Default implementation that temporarily sets dataset_metadata and calls build().
"""
original_path = self.dataset_path
original_dataset_metadata = self.dataset_metadata
try:
self.dataset_path = dataset_name
self.dataset_metadata = self.data_service.get_raw_dataset_metadata(
dataset_name=dataset_name
)
result = self.build(**kwargs)
return result
finally:
self.dataset_path = original_path
self.dataset_metadata = original_dataset_metadata

def get_dataset(self, **kwargs):
# If validating dataset content, ensure split datasets are handled.
Expand All @@ -77,7 +75,7 @@ def get_dataset(self, **kwargs):
dataset: DatasetInterface = self.data_service.concat_split_datasets(
func_to_call=self.build_split_datasets,
datasets_metadata=get_corresponding_datasets(
self.datasets, self.dataset_metadata
self.data_service.get_datasets(), self.dataset_metadata
),
**kwargs,
)
Expand All @@ -95,14 +93,14 @@ def get_dataset_contents(self, **kwargs):
dataset: DatasetInterface = self.data_service.concat_split_datasets(
func_to_call=self.data_service.get_dataset,
datasets_metadata=get_corresponding_datasets(
self.datasets, self.dataset_metadata
self.data_service.get_datasets(), self.dataset_metadata
),
**kwargs,
)
else:
# single dataset. the most common case
dataset: DatasetInterface = self.data_service.get_dataset(
dataset_name=self.dataset_path
dataset_name=self.dataset_metadata.name
)
dataset = tag_source(dataset, self.dataset_metadata)
return dataset
Expand All @@ -126,7 +124,10 @@ def get_define_xml_item_group_metadata_for_dataset(
"""

define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
return define_xml_reader.extract_dataset_metadata(
dataset_metadata["dataset_name"]
Expand All @@ -149,7 +150,10 @@ def get_define_xml_item_group_metadata_for_domain(self, domain: str) -> List[dic
"""

define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
return define_xml_reader.extract_domain_metadata(domain)

Expand All @@ -164,7 +168,10 @@ def get_define_xml_variables_metadata(self) -> List[dict]:
| SUPPDM | DM |
"""
define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
domain = self.dataset_metadata.domain or self.dataset_metadata.rdomain
return define_xml_reader.extract_variables_metadata(
Expand All @@ -176,7 +183,10 @@ def get_define_xml_value_level_metadata(self) -> List[dict]:
Gets Define XML value level metadata and returns it as dataframe.
"""
define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
return define_xml_reader.extract_value_level_metadata(
domain_name=self.dataset_metadata.domain
Expand All @@ -188,7 +198,10 @@ def add_row_number(dataframe: DatasetInterface) -> None:

def get_define_metadata(self):
define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
return define_xml_reader.read()

Expand All @@ -205,9 +218,7 @@ def get_library_variables_metadata(self) -> DatasetInterface:
variables: List[dict] = sdtm_utilities.get_variables_metadata_from_standard(
library_metadata=self.library_metadata,
data_service=self.data_service,
datasets=self.datasets,
dataset_metadata=self.dataset_metadata,
dataset_path=self.dataset_path,
)
variables_metadata: dict = self.library_metadata.variables_metadata.get(
domain, {}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ def build(self):
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
return self.data_service.get_dataset_metadata(
dataset_name=self.dataset_path,
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
datasets=self.datasets,
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,21 @@ def build(self, **kwargs):
"""
Returns the contents of a file as a dataframe for evaluation.
"""
return self.data_service.get_dataset(dataset_name=self.dataset_path)
return self.data_service.get_dataset(dataset_name=self.dataset_metadata.name)

def build_split_datasets(self, dataset_name, **kwargs):
"""
Returns the contents of a file as a dataframe for evaluation.
"""
return self.data_service.get_dataset(
dataset_name=dataset_name, datasets=self.datasets
)
return self.data_service.get_dataset(dataset_name=dataset_name)

def get_dataset(self, **kwargs):
dataset = super().get_dataset(**kwargs)
length = sum(
[
dataset.record_count
for dataset in get_corresponding_datasets(
self.datasets, self.dataset_metadata
self.data_service.get_datasets(), self.dataset_metadata
)
]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@ def build(self):
...,
"""
data_contents_df = self.data_service.get_dataset(
dataset_name=self.dataset_path, datasets=self.datasets
dataset_name=self.dataset_metadata.name
)
# Build dataset metadata dataframe
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
dataset_metadata = self.data_service.get_dataset_metadata(
dataset_name=self.dataset_path, size_unit=size_unit, datasets=self.datasets
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
).to_dict(orient="records")[0]
# Build define xml dataframe
define = self.get_define_xml_item_group_metadata_for_dataset(dataset_metadata)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def build(self):
"""
# get dataset contents and convert it from wide to long
data_contents_df: DatasetInterface = self.data_service.get_dataset(
dataset_name=self.dataset_path
dataset_name=self.dataset_metadata.name
)
self.add_row_number(data_contents_df)
data_contents_long_df: DatasetInterface = ValuesDatasetBuilder.build(self)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ def get_service(
kwargs.get("cache_service"),
kwargs.get("rule_processor"),
kwargs.get("data_processor"),
kwargs.get("dataset_path"),
kwargs.get("datasets"),
kwargs.get("dataset_metadata", ""),
kwargs.get("define_xml_path"),
kwargs.get("standard"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,10 @@ def _get_define_xml_dataframe(self):
logger.info(f"No define_metadata is provided for {__name__}.")
return self.dataset_implementation(columns=define_col_order)
define_xml_reader = DefineXMLReaderFactory.get_define_xml_reader(
self.dataset_path, self.define_xml_path, self.data_service, self.cache
self.dataset_metadata.full_path,
self.define_xml_path,
self.data_service,
self.cache,
)
enriched_metadata = []
for basic_metadata in define_metadata:
Expand Down Expand Up @@ -131,23 +134,23 @@ def _get_dataset_dataframe(self):
"ap_suffix",
]

if len(self.datasets) == 0:
if len(self.data_service.get_datasets()) == 0:
dataset_df = self.dataset_implementation(columns=dataset_col_order)
logger.info(f"No datasets metadata is provided in {__name__}.")
else:
datasets = self.dataset_implementation()
for dataset in self.datasets:
for dataset_metadata in self.data_service.get_datasets():
ds_metadata = None
try:
ds_metadata = self.data_service.get_dataset_metadata(
dataset_name=dataset.filename
dataset_name=dataset_metadata.name
)
ds_metadata.data["dataset_domain"] = getattr(
dataset, "domain", None
dataset_metadata, "domain", None
)
if dataset.first_record:
if dataset_metadata.first_record:
ds_metadata.data["dataset_columns"] = [
list(dataset.first_record.keys())
list(dataset_metadata.first_record.keys())
]
else:
ds_metadata.data["dataset_columns"] = [[]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,8 @@ def build(self):
"""
size_unit: str = self.rule_processor.get_size_unit_from_rule(self.rule)
dataset_metadata = self.data_service.get_dataset_metadata(
dataset_name=self.dataset_path,
dataset_name=self.dataset_metadata.name,
size_unit=size_unit,
datasets=self.datasets,
)
dataset_metadata = dataset_metadata.to_dict(orient="records")[0]
data_contents_long_df = super().build()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,6 @@ def build(self):
"""

return self.dataset_implementation.from_records(
{ds.unsplit_name: ds.filename for ds in self.datasets}, index=[0]
{ds.unsplit_name: ds.filename for ds in self.data_service.get_datasets()},
index=[0],
)
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ def build(self):
1 EC ec.xpt EC False
2 SE None SE True
"""
domain_files = {ds.unsplit_name: ds.filename for ds in self.datasets}
domain_files = {
ds.unsplit_name: ds.filename for ds in self.data_service.get_datasets()
}
all_define_metadata = self.get_define_metadata()
records = []
for define_item in all_define_metadata:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def build(self):
...,
"""
data_contents_df: DatasetInterface = self.data_service.get_dataset(
dataset_name=self.dataset_path
dataset_name=self.dataset_metadata.name
)
self.add_row_number(data_contents_df)
values_df: DatasetInterface = data_contents_df.melt(
Expand Down
Loading
Loading