Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,8 @@ jobs:
run: |
python -m pip install --upgrade pip
pip install .
pip install pytest

- name: Run tests
run: |
pytest src/cadenzaanalytics/tests/ -v
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
### Added
- Support for measure aggregation types MEDIAN and STANDARD_DEVIATION_SAMPLE


### Changed
- Improved type annotations for mappings and parameter values
- Uses custom csv handling instead of pandas csv handling to fix various edge cases
- Minimum required python version reduced to 3.11

### Fixed
- the `basic-extension` example enrichment now actually enriches the data

## 10.4.0 - 2025-12-05
### Added
- New visual response of type text (TextResponse)
Expand Down
4 changes: 1 addition & 3 deletions examples/enrichment/extension/example_extensions.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
"""Example module for running a disy Cadenza analytics extension that
will execute a basic enrichment."""

import pandas as pd

import cadenzaanalytics as ca


def enrichment_basic_analytics_function(request: ca.AnalyticsRequest):
# pylint: disable=unused-argument
df_data = pd.DataFrame()
df_data = request["table"].data
df_data["new_value"] = "value"

result_metadata = [
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@ classifiers = [
]

[tool.poetry.dependencies]
python = "^3.12"
python = "^3.11"
Flask = "3.1.2"
Werkzeug = "3.1.4"
Flask-Cors = "6.0.1"
requests-toolbelt = "1.0.0"
pandas = " ^2.0.2"
chardet = "5.2.0"
Shapely = "2.1.2"
pytest = "9.0.2"

[project]
name = "cadenzaanalytics"
Expand Down
44 changes: 8 additions & 36 deletions src/cadenzaanalytics/cadenza_analytics_extension.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,9 @@
invoked via HTTP POST on the relative path."""
import json
import logging
from io import StringIO
from typing import Callable, List, Optional

import numpy as np
import pandas as pd
from flask import Response, request
from shapely import from_wkt

from cadenzaanalytics.data.analytics_extension import AnalyticsExtension
from cadenzaanalytics.data.extension_type import ExtensionType
Expand All @@ -21,6 +17,7 @@
from cadenzaanalytics.request.request_metadata import RequestMetadata
from cadenzaanalytics.request.request_table import RequestTable
from cadenzaanalytics.response.extension_response import ExtensionResponse
from cadenzaanalytics.util.csv import from_cadenza_csv


logger = logging.getLogger('cadenzaanalytics')
Expand Down Expand Up @@ -151,51 +148,26 @@ def _get_request_data(self, multipart_request) -> AnalyticsRequest:
if len(metadata.columns) > 0:
has_data = True
type_mapping = {}
na_values_mapping = {}
datetime_columns = []
geometry_columns = []

for column in metadata.columns:
if column.data_type == DataType.ZONEDDATETIME:
datetime_columns.append(column.name)
# must be empty list, otherwise pd.read_csv interprets empty strings as NA which
# is rejected by the parse_dates mechanism before it reaches the _parse_datetime function
na_values_mapping[column.name] = []
elif column.data_type == DataType.STRING:
# only empty strings must be considered as NA
# unfortunately there does not seem to be a way to interpret empty quotes as empty string
# and unquoted as None
na_values_mapping[column.name] = ['']
else:
# pandas default list of NA values, mostly relevant for numeric columns
na_values_mapping[column.name] = ['-1.#IND', '1.#QNAN', '1.#IND', '-1.#QNAN', '#N/A N/A',
'#N/A', 'N/A', 'n/a', 'NA', '<NA>', '#NA', 'NULL', 'null',
'NaN', '-NaN', 'nan', '-nan', 'None', '']

if column.data_type == DataType.GEOMETRY:
elif column.data_type == DataType.GEOMETRY:
geometry_columns.append(column.name)

type_mapping[column.name] = column.data_type.pandas_type()

csv_data = StringIO(self._get_from_request(multipart_request, 'data'))
# read_csv cannot distinguish None from empty strings
df_data = pd.read_csv(
csv_data = self._get_from_request(multipart_request, 'data')
# Use custom parser that properly handles quoted vs unquoted values
df_data = from_cadenza_csv(
csv_data,
sep=';',
dtype=type_mapping,
parse_dates=datetime_columns,
date_format='ISO8601',
na_values=na_values_mapping,
keep_default_na=False,
type_mapping=type_mapping,
datetime_columns=datetime_columns,
geometry_columns=geometry_columns
)

# Parse WKT geometries into shapely geometry objects using vectorized from_wkt
for gcol in geometry_columns:
values = df_data[gcol].to_numpy()
# from_wkt handles None values; replace empty strings with None
values = np.where((values == '') | pd.isna(values), None, values)
df_data[gcol] = from_wkt(values, on_invalid='warn')

logger.debug('Received data:\n%s', df_data.head())
else:
has_data = False
Expand Down
7 changes: 4 additions & 3 deletions src/cadenzaanalytics/data/parameter.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from typing import List, Any, Optional
from typing import List, Optional

from cadenzaanalytics.data.geometry_type import GeometryType
from cadenzaanalytics.data.data_object import DataObject
from cadenzaanalytics.data.parameter_type import ParameterType
from cadenzaanalytics.data.parameter_value_type import ParameterValueType


# pylint: disable=too-many-instance-attributes
Expand Down Expand Up @@ -31,7 +32,7 @@ def __init__(self, *,
geometry_types: Optional[List[GeometryType]] = None,
options: Optional[List[str]] = None,
required: bool = False,
default_value: Any = None,
default_value: Optional[ParameterValueType] = None,
requested_srs: Optional[str] = None) -> None:
"""Initialize a Parameter.

Expand All @@ -51,7 +52,7 @@ def __init__(self, *,
Whether the parameter is required, by default False.
For parameter type boolean, required=True makes submitting the value
True mandatory.
default_value : Any, optional
default_value : Optional[ParameterValueType], optional
Default value if the user doesn't provide one.
requested_srs : Optional[str], optional
Requested spatial reference system for GEOMETRY parameters.
Expand Down
9 changes: 5 additions & 4 deletions src/cadenzaanalytics/data/parameter_value.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from cadenzaanalytics.data.data_type import DataType
from cadenzaanalytics.data.geometry_type import GeometryType
from cadenzaanalytics.data.data_object import DataObject
from cadenzaanalytics.data.parameter_value_type import ParameterValueType


class ParameterValue(DataObject):
Expand Down Expand Up @@ -92,12 +93,12 @@ def data_type(self) -> DataType:
return self._data_type

@property
def value(self) -> Any:
def value(self) -> Optional[ParameterValueType]:
"""Get the typed value of the parameter.

Returns
-------
Any
Optional[ParameterValueType]
The value of the parameter, typed according to the data type.
"""
return self._value
Expand Down Expand Up @@ -125,7 +126,7 @@ def srs(self) -> Optional[str]:
return self._srs


def _parse_value(self, value: Any, data_type: DataType) -> Any:
def _parse_value(self, value: Any, data_type: DataType) -> Optional[ParameterValueType]:
"""Parse and convert a parameter value according to its data type.

Parameters
Expand All @@ -137,7 +138,7 @@ def _parse_value(self, value: Any, data_type: DataType) -> Any:

Returns
-------
Any
Optional[ParameterValueType]
The parsed value with appropriate type, or None if input is None.
"""
if value is None:
Expand Down
9 changes: 9 additions & 0 deletions src/cadenzaanalytics/data/parameter_value_type.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from __future__ import annotations

from datetime import datetime
from typing import Union

from shapely.geometry.base import BaseGeometry

# A typed value for parameters: numbers, strings, boolean, shapely geometries, and datetime
ParameterValueType = Union[int, float, str, bool, datetime, BaseGeometry]
2 changes: 1 addition & 1 deletion src/cadenzaanalytics/request/analytics_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from cadenzaanalytics.request.request_table import RequestTable


class AnalyticsRequest(collections.abc.Mapping):
class AnalyticsRequest(collections.abc.Mapping[str, RequestTable]):
"""Represents an incoming analytics request from Cadenza.

Provides access to request parameters and data tables. Supports dict-like
Expand Down
2 changes: 1 addition & 1 deletion src/cadenzaanalytics/request/request_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@


# pylint: disable=protected-access
class RequestMetadata(collections.abc.Mapping):
class RequestMetadata(collections.abc.Mapping[str, ColumnMetadata]):
"""Metadata describing the columns in a request table.

Provides access to column metadata by name and groupings by attribute group.
Expand Down
11 changes: 6 additions & 5 deletions src/cadenzaanalytics/request/request_parameter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import collections
from typing import Iterator, List, Optional, Any
from typing import Iterator, List, Optional

from cadenzaanalytics.data.parameter_value import ParameterValue
from cadenzaanalytics.request.view_parameter import ViewParameter
from cadenzaanalytics.data.parameter_value_type import ParameterValueType


class RequestParameter(collections.abc.Mapping):
class RequestParameter(collections.abc.Mapping[str, ParameterValueType]):
"""Provides access to parameters from an analytics request.

Supports dict-like access to parameter values via `params["name"]` syntax.
Expand Down Expand Up @@ -41,7 +42,7 @@ def view(self) -> ViewParameter:
device_pixel_ratio=device_pixel_ratio
)

def __getitem__(self, name: str) -> Any:
def __getitem__(self, name: str) -> Optional[ParameterValueType]:
parameter = self._get_parameter(name)
if parameter is not None:
return parameter.value
Expand Down Expand Up @@ -90,7 +91,7 @@ def _get_parameter(self, name: str) -> Optional[ParameterValue]:
return None


def _get_parameter_value(self, name: str) -> Any:
def _get_parameter_value(self, name: str) -> Optional[ParameterValueType]:
"""Returns a specific parameter value.

Parameters
Expand All @@ -100,7 +101,7 @@ def _get_parameter_value(self, name: str) -> Any:

Returns
-------
Any
Optional[ParameterValueType]
The value of the parameter if found, else None.
"""

Expand Down
48 changes: 10 additions & 38 deletions src/cadenzaanalytics/response/csv_response.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
import csv
import re
import sys
from typing import List, Optional
import logging

Expand All @@ -12,6 +9,7 @@
from cadenzaanalytics.request.request_table import RequestTable
from cadenzaanalytics.response.extension_data_response import ExtensionDataResponse
from cadenzaanalytics.response.missing_metadata_strategy import MissingMetadataStrategy
from cadenzaanalytics.util import to_cadenza_csv

logger = logging.getLogger('cadenzaanalytics')

Expand Down Expand Up @@ -107,41 +105,15 @@ def get_response(self, request_table: Optional[RequestTable] = None) -> Response
leftover_metadata_column_names = self._apply_missing_metadata_strategy()
self._validate_response(leftover_metadata_column_names)

python_3_12 = (3, 12)
if sys.version_info >= python_3_12 and len(self._data.columns) > 1:
# The quoting strategies QUOTE_NOTNULL or QUOTE_NULL would fail with the csv writer
# error "single empty field record must be quoted"
# if there is only one column and if there is any null-ish value available.
# Also refer to https://github.com/pandas-dev/pandas/issues/59116
# Thus we can only use this strategy if there is more than one column, else fallback to
# the fallback approach that always quotes and then removes quotes again.
# The limitation to python 3.12 comes from the option QUOTE_NOTNULL only becoming available on that version.
csv_data = self._data.to_csv(
sep=';',
encoding='utf-8',
quoting=csv.QUOTE_NOTNULL,
index=False,
na_rep=None, # missing/None/Null values are sent without quotes
quotechar='"',
lineterminator='\r\n',
date_format='%Y-%m-%dT%H:%M:%SZ')
else:
# info: this approach cannot distinguish empty strings from NULL
csv_data = self._data.to_csv(
sep=';',
encoding='utf-8',
quoting=csv.QUOTE_ALL,
index=False,
quotechar='"',
lineterminator='\r\n',
date_format='%Y-%m-%dT%H:%M:%SZ')
# Needed to make sure to send NULL/None values (unquoted empty content) and not empty strings
# (quoted empty content)
# as empty strings would only be valid for DataType.STRING and cause errors for other DataTypes.
# regex searches and replaces double quotes that are surrounded by separators
# (start file, end file, semicolon or newline)
# this way double-quotes that represent a single escaped quote character within a string value are retained
csv_data = re.sub(r'(^|;|\r\n)""(?=;|\r\n|$)', r'\1', csv_data)
datetime_columns = [c.name for c in self._column_meta_data if c.data_type == DataType.ZONEDDATETIME]
geometry_columns = [c.name for c in self._column_meta_data if c.data_type == DataType.GEOMETRY]
float_columns = [c.name for c in self._column_meta_data if c.data_type == DataType.FLOAT64]
int_columns = [c.name for c in self._column_meta_data if c.data_type == DataType.INT64]
csv_data = to_cadenza_csv(self._data,
datetime_columns=datetime_columns,
geometry_columns=geometry_columns,
float_columns=float_columns,
int_columns=int_columns)
return self._create_response(csv_data, self._column_meta_data)


Expand Down
1 change: 1 addition & 0 deletions src/cadenzaanalytics/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Test suite for cadenzaanalytics package."""
Loading