Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cdisc_rules_engine/check_operators/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -430,4 +430,3 @@ def flatten_list(data, items):
vectorized_get_dict_key = np.vectorize(get_dict_key_val)
vectorized_is_in = np.vectorize(is_in)
vectorized_case_insensitive_is_in = np.vectorize(case_insensitive_is_in)
vectorized_len = np.vectorize(len)
11 changes: 0 additions & 11 deletions cdisc_rules_engine/constants/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,6 @@

# a message like: [INFO 2021-12-29 17:10:26,575 - module.py:44] - Log Message
LOG_FORMAT: str = "[%(levelname)s %(asctime)s - %(filename)s:%(lineno)s] - %(message)s"
XPT_LABEL_PATTERN: str = (
"HEADER RECORD\\*{7}MEMBER {2}HEADER RECORD!{7}0{17}160{8}140 "
"{2}HEADER RECORD\\*{7}DSCRPTR HEADER RECORD!{7}0{30}"
" SAS\\s{5}.{8}SASDATA .{16}\\s{24}.{16}.{16}\\s{16}(?P<label>.{40})"
)

XPT_MODIFIED_DATE_PATTERN: str = (
"HEADER RECORD\\*{7}MEMBER {2}HEADER RECORD!{7}0{17}160{8}140 "
"{2}HEADER RECORD\\*{7}DSCRPTR HEADER RECORD!{7}0{30}"
" SAS\\s{5}.{8}SASDATA .{16}\\s{24}.{16}(?P<modified_date>.{16})\\s{16}.{40}"
)

NULL_FLAVORS = ["", None, {}, {None}, [], [None], np.nan]

Expand Down
1 change: 0 additions & 1 deletion cdisc_rules_engine/constants/domains.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
SUPPLEMENTARY_DOMAINS = ("SUPP", "SQ")
AP_DOMAIN_LENGTH: int = 4
AP_DOMAIN: str = "AP"
APFA_DOMAIN: str = "APFA"
APRELSUB_DOMAIN: str = "APRELSUB"
11 changes: 0 additions & 11 deletions cdisc_rules_engine/constants/patterns.py

This file was deleted.

1 change: 0 additions & 1 deletion cdisc_rules_engine/enums/default_file_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ class DefaultFilePaths(BaseEnum):
STANDARD_MODELS_CACHE_FILE = "standards_models.pkl"
VARIABLE_METADATA_CACHE_FILE = "variables_metadata.pkl"
VARIABLE_CODELIST_CACHE_FILE = "variable_codelist_maps.pkl"
CODELIST_TERM_MAP_CACHE_FILE = "codelist_term_maps.pkl"
CUSTOM_RULES_CACHE_FILE = "custom_rules.pkl"
CUSTOM_RULES_DICTIONARY = "custom_rules_dictionary.pkl"
LOCAL_XSD_FILE_DIR = join("resources", "schema", "xml")
Expand Down
2 changes: 0 additions & 2 deletions cdisc_rules_engine/exceptions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
DomainNotFoundInDefineXMLError,
InvalidDatasetFormat,
InvalidJSONFormat,
NumberOfAttemptsExceeded,
InvalidDictionaryVariable,
UnsupportedDictionaryType,
FailedSchemaValidation,
Expand All @@ -37,7 +36,6 @@
"DomainNotFoundInDefineXMLError",
"InvalidDatasetFormat",
"InvalidJSONFormat",
"NumberOfAttemptsExceeded",
"InvalidDictionaryVariable",
"UnsupportedDictionaryType",
"FailedSchemaValidation",
Expand Down
4 changes: 0 additions & 4 deletions cdisc_rules_engine/exceptions/custom_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,6 @@ class InvalidCSVFile(EngineError):
description = "CSV data is malformed."


class NumberOfAttemptsExceeded(EngineError):
pass


class InvalidDictionaryVariable(EngineError):
description = (
"Provided dictionary variable does not correspond to a dictionary term type"
Expand Down
9 changes: 0 additions & 9 deletions cdisc_rules_engine/interfaces/data_service_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,15 +57,6 @@ def get_variables_metadata(self, dataset_name: str) -> DatasetInterface:
Gets variables metadata of a dataset.
"""

@abstractmethod
def get_dataset_by_type(
self, dataset_name: str, dataset_type: str, **params
) -> DatasetInterface:
"""
Generic function to return dataset based on the type.
dataset_type param can be: contents, metadata, variables_metadata.
"""

@abstractmethod
def concat_split_datasets(
self,
Expand Down
4 changes: 0 additions & 4 deletions cdisc_rules_engine/models/dataset/dask_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,10 +405,6 @@ def partition_isin(partition):
result = self._data.map_partitions(partition_isin)
return result

def filter_by_value(self, column, values):
mask = self._data[column].isin(values)
return self.__class__(self._data[mask])

def max(self, *args, **kwargs):
result = self._data.max(*args, **kwargs)
return self.__class__(result)
3 changes: 1 addition & 2 deletions cdisc_rules_engine/models/dataset_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,5 @@

class DatasetTypes(BaseEnum):
CONTENTS = "contents"
METADATA = "metadata"
RAW_METADATA = "raw_metadata"
DATASET_METADATA = "dataset_metadata"
VARIABLES_METADATA = "variables_metadata"
39 changes: 0 additions & 39 deletions cdisc_rules_engine/models/record_variable.py

This file was deleted.

2 changes: 0 additions & 2 deletions cdisc_rules_engine/operations/base_operation.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
VariableMetadataNotFoundError,
DomainNotFoundInDefineXMLError,
InvalidDatasetFormat,
NumberOfAttemptsExceeded,
InvalidDictionaryVariable,
UnsupportedDictionaryType,
FailedSchemaValidation,
Expand Down Expand Up @@ -79,7 +78,6 @@ def execute(self) -> DatasetInterface:
VariableMetadataNotFoundError,
DomainNotFoundInDefineXMLError,
InvalidDatasetFormat,
NumberOfAttemptsExceeded,
InvalidDictionaryVariable,
UnsupportedDictionaryType,
FailedSchemaValidation,
Expand Down
4 changes: 4 additions & 0 deletions cdisc_rules_engine/operations/operations_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
)
from cdisc_rules_engine.operations.distinct import Distinct
from cdisc_rules_engine.operations.extract_metadata import ExtractMetadata
from cdisc_rules_engine.operations.get_library_class_domains import (
GetLibraryClassDomains,
)
from cdisc_rules_engine.operations.get_xhtml_errors import GetXhtmlErrors
from cdisc_rules_engine.operations.library_column_order import LibraryColumnOrder
from cdisc_rules_engine.operations.library_model_column_order import (
Expand Down Expand Up @@ -102,6 +105,7 @@ class OperationsFactory(FactoryInterface):
"get_column_order_from_dataset": DatasetColumnOrder,
"get_column_order_from_library": LibraryColumnOrder,
"get_codelist_attributes": CodeListAttributes,
"get_library_class_domains": GetLibraryClassDomains,
"get_model_column_order": LibraryModelColumnOrder,
"get_model_filtered_variables": LibraryModelVariablesFilter,
"get_parent_model_column_order": ParentLibraryModelColumnOrder,
Expand Down
13 changes: 0 additions & 13 deletions cdisc_rules_engine/plugin_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,19 +40,6 @@ def load(self):
group_plugins: EntryPoints = entry_points().select(group=group_name)
self.__register_group_plugins(factory_class, group_plugins)

@classmethod
def register_group_factory(
cls, group_name: str, factory_class: Type[FactoryInterface]
):
"""
Registers new plugin group and factory.
"""
if not issubclass(factory_class, FactoryInterface):
raise ValueError(
f"Given class {factory_class} must implement FactoryInterface"
)
cls.__group_factory_map[group_name] = factory_class

def __register_group_plugins(
self, factory_class: Type[FactoryInterface], group_plugins: EntryPoints
):
Expand Down
51 changes: 2 additions & 49 deletions cdisc_rules_engine/services/data_services/base_data_service.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
import asyncio
from abc import ABC, abstractmethod
from functools import wraps, partial
from functools import wraps
from typing import Callable, List, Optional, Iterable, Iterator
from concurrent.futures import ThreadPoolExecutor
import os
import numpy as np
import dask.dataframe as dd

Expand All @@ -30,7 +28,6 @@
from cdisc_rules_engine.models.dataset_metadata import DatasetMetadata
from cdisc_rules_engine.models.dataset_types import DatasetTypes
from cdisc_rules_engine.services import logger
from cdisc_rules_engine.services.cdisc_library_service import CDISCLibraryService
from cdisc_rules_engine.services.data_readers import DataReaderFactory
from cdisc_rules_engine.utilities.utils import (
get_dataset_cache_key_from_path,
Expand Down Expand Up @@ -105,9 +102,6 @@ def __init__(
self.cache_service = cache_service
self._reader_factory = reader_factory
self._config = config
self.cdisc_library_service: CDISCLibraryService = CDISCLibraryService(
self._config.getValue("CDISC_LIBRARY_API_KEY", ""), self.cache_service
)
self.standard = kwargs.get("standard")
self.version = (kwargs.get("standard_version") or "").replace(".", "-")
self.standard_substandard = kwargs.get("standard_substandard")
Expand All @@ -120,22 +114,6 @@ def __init__(
self._initialize_datasets_metadata(**kwargs)
)

def get_dataset_by_type(
self, dataset_name: str, dataset_type: str, **params
) -> DatasetInterface:
"""
Generic function to return dataset based on the type.
dataset_type param can be: contents, metadata, variables_metadata.
"""
dataset_type_to_function_map: dict = {
DatasetTypes.CONTENTS.value: self.get_dataset,
DatasetTypes.METADATA.value: self.get_dataset_metadata,
DatasetTypes.VARIABLES_METADATA.value: self.get_variables_metadata,
}
return dataset_type_to_function_map[dataset_type](
dataset_name=dataset_name, **params
)

def concat_split_datasets(
self,
func_to_call: Callable,
Expand Down Expand Up @@ -163,12 +141,6 @@ def concat_split_datasets(

return full_dataset

def check_filepath(self, dataset_names: List[str]) -> List:
"""
Check if single file with multiple datasets.
"""
return any(not os.path.exists(name) for name in dataset_names)

def get_dataset_class(
self,
dataset: DatasetInterface,
Expand Down Expand Up @@ -202,7 +174,7 @@ def get_data_structure(
return OCCDS
return OTHER

@cached_dataset(DatasetTypes.METADATA.value)
@cached_dataset(DatasetTypes.DATASET_METADATA.value)
def get_dataset_metadata(self, dataset_name: str, **params) -> DatasetInterface:
"""
Gets metadata of a dataset and returns it as a DataFrame.
Expand Down Expand Up @@ -349,14 +321,6 @@ def check_presence(key):
elif check_presence("RDOMAIN"):
return check_presence(variable)

def _domain_starts_with(self, domain, variable):
"""
Checks if the given dataset-class string starts with
a particular variable string.
Returns True/False
"""
return domain.startswith(variable)

@staticmethod
def _replace_nans_in_numeric_cols_with_none(dataset: DatasetInterface):
"""
Expand All @@ -383,17 +347,6 @@ def _replace_nans_in_specified_cols_with_none(
dataset.data = replace_nan_values_in_df(dataset.data, valid_columns)
return dataset

async def _async_get_dataset(
self, function_to_call: Callable, dataset_name: str, **kwargs
) -> DatasetInterface:
"""
Asynchronously executes passed function_to_call.
"""
loop = asyncio.get_event_loop()
return await loop.run_in_executor(
None, partial(function_to_call, dataset_name=dataset_name, **kwargs)
)

def _async_get_datasets(
self, function_to_call: Callable, dataset_names: List[str], **kwargs
) -> Iterator[DatasetInterface]:
Expand Down
13 changes: 0 additions & 13 deletions cdisc_rules_engine/services/data_services/dummy_data_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from cdisc_rules_engine.dummy_models.dummy_dataset import DummyDataset
from cdisc_rules_engine.interfaces import CacheServiceInterface, ConfigInterface
from cdisc_rules_engine.models.sdtm_dataset_metadata import SDTMDatasetMetadata
from cdisc_rules_engine.models.dataset_types import DatasetTypes
from cdisc_rules_engine.services.data_readers import DataReaderFactory
from cdisc_rules_engine.services.data_readers.json_reader import JSONReader
from cdisc_rules_engine.services.data_services import BaseDataService
Expand Down Expand Up @@ -112,18 +111,6 @@ def get_variables_metadata(self, dataset_name: str) -> PandasDataset:
] + [variable.format]
return PandasDataset.from_dict(metadata_to_return)

def get_dataset_by_type(
self, dataset_name: str, dataset_type: str, **params
) -> PandasDataset:
dataset_type_to_function_map: dict = {
DatasetTypes.CONTENTS.value: self.get_dataset,
DatasetTypes.METADATA.value: self.get_dataset_metadata,
DatasetTypes.VARIABLES_METADATA.value: self.get_variables_metadata,
}
return dataset_type_to_function_map[dataset_type](
dataset_name=dataset_name, **params
)

def get_define_xml_contents(self, dataset_name: str) -> bytes:
if not self.define_xml:
# Search for define xml locally
Expand Down
16 changes: 0 additions & 16 deletions cdisc_rules_engine/services/data_services/local_data_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,22 +170,6 @@ def get_define_xml_contents(self, dataset_name: str) -> bytes:
with open(dataset_name, "rb") as f:
return f.read()

def get_dataset_by_type(
self, dataset_name: str, dataset_type: str, **params
) -> DatasetInterface:
"""
Generic function to return dataset based on the type.
dataset_type param can be: contents, metadata, variables_metadata.
"""
dataset_type_to_function_map: dict = {
DatasetTypes.CONTENTS.value: self.get_dataset,
DatasetTypes.METADATA.value: self.get_dataset_metadata,
DatasetTypes.VARIABLES_METADATA.value: self.get_variables_metadata,
}
return dataset_type_to_function_map[dataset_type](
dataset_name=dataset_name, **params
)

def __read_metadata(
self,
dataset_path: str,
Expand Down
Loading
Loading