Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 44 additions & 5 deletions python/lib/sift_py/_internal/metadata.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from typing import Dict, List, Union
from typing import Any, Callable, Dict, List, Optional, Union

from sift.metadata.v1.metadata_pb2 import MetadataKey, MetadataKeyType, MetadataValue


def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[MetadataValue]:
def metadata_dict_to_pb(
_metadata: Dict[str, Union[str, float, bool, int]],
parse: Optional[Callable[[Any], Optional[Union[str, float, bool, int]]]] = None,
) -> List[MetadataValue]:
"""
Wraps metadata dictionary into a list of MetadataValue objects.

Args:
_metadata: Dictionary of metadata key-value pairs.

parse: Optional function to parse complex types into a compatible
metadata type (i.e, str, float, int, or bool). Function should raise an
Exception if it can't parse the value.
Returns:
List of MetadataValue objects.
"""
Expand All @@ -21,6 +26,12 @@ def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[M
boolean_value = None
number_value = None

if not isinstance(value, (str, float, bool, int)):
if parse:
value = parse(value)
else:
raise ValueError(f"Unsupported metadata value type for key '{key}': {value}")

if isinstance(value, str):
string_value = value
type = MetadataKeyType.METADATA_KEY_TYPE_STRING
Expand All @@ -46,7 +57,7 @@ def metadata_dict_to_pb(_metadata: Dict[str, Union[str, float, bool]]) -> List[M
return metadata


def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, float, bool]]:
def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, float, bool, int]]:
"""
Unwraps a list of MetadataValue objects into a dictionary.

Expand All @@ -56,7 +67,7 @@ def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, f
Returns:
Dictionary of metadata key-value pairs.
"""
unwrapped_metadata: Dict[str, Union[str, float, bool]] = {}
unwrapped_metadata: Dict[str, Union[str, float, bool, int]] = {}
for md in metadata:
if md.key.name in unwrapped_metadata:
raise ValueError(f"Key already exists: {md.key.name}")
Expand All @@ -68,3 +79,31 @@ def metadata_pb_to_dict(metadata: List[MetadataValue]) -> Dict[str, Union[str, f
unwrapped_metadata[md.key.name] = md.number_value

return unwrapped_metadata


def metadata_pb_to_dict_api(metadata: List[MetadataValue]) -> List[Dict[str, Any]]:
"""
Serializes a list of MetadataValue objects to a n API compatible dict,
preserving the proto structure.

Args:
metadata: List of MetadataValue objects.

Returns:
Dict representing the metadata with proto structure.
"""

def metadata_value_to_dict(md: MetadataValue) -> Dict[str, Any]:
value_dict: Dict[str, Any] = {"key": {"name": md.key.name, "type": md.key.type}}
if md.key.type == MetadataKeyType.METADATA_KEY_TYPE_STRING:
value_dict["string_value"] = md.string_value
elif md.key.type == MetadataKeyType.METADATA_KEY_TYPE_BOOLEAN:
value_dict["boolean_value"] = md.boolean_value
elif md.key.type == MetadataKeyType.METADATA_KEY_TYPE_NUMBER:
value_dict["number_value"] = md.number_value
else:
raise ValueError(f"{md.key.name} set to invalid type: {md.key.type}")
return value_dict

metadata_list = [metadata_value_to_dict(md) for md in metadata]
return metadata_list
123 changes: 119 additions & 4 deletions python/lib/sift_py/data_import/_tdms_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest
from nptdms import TdmsFile, types # type: ignore
from pytest_mock import MockFixture
from sift.metadata.v1.metadata_pb2 import MetadataKeyType

from sift_py.data_import.tdms import TdmsTimeFormat, TdmsUploadService, sanitize_string
from sift_py.rest import SiftRestConfig
Expand Down Expand Up @@ -40,7 +41,14 @@ def channels(self) -> List[MockTdmsChannel]:
class MockTdmsFile:
def __init__(self, groups: List[MockTdmsGroup]):
self._groups: List[MockTdmsGroup] = groups
self.properties: Dict[str, str] = {}
# Example properties for each type
self.properties: Dict[str, Any] = {
"string_prop": "example",
"int_prop": 42,
"float_prop": 3.14,
"bool_prop": True,
"datetime_prop": pd.Timestamp("2024-01-01T12:00:00"),
}

def groups(self) -> List[MockTdmsGroup]:
return self._groups
Expand All @@ -50,9 +58,9 @@ def as_dataframe(self, *_, **__):


class MockResponse:
def __init__(self):
self.status_code = 200
self.text = json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"})
def __init__(self, status_code=None, text=None):
self.status_code = status_code or 200
self.text = text or json.dumps({"uploadUrl": "some_url.com", "dataImportId": "123-123-123"})

def json(self) -> dict:
return json.loads(self.text)
Expand Down Expand Up @@ -730,3 +738,110 @@ def mock_tdms_file_constructor2(path):
tdms_time_format=TdmsTimeFormat.TIME_CHANNEL,
ignore_errors=True,
)


def test_tdms_upload_service_upload_with_metadata(
mocker: MockFixture, mock_waveform_tdms_file: MockTdmsFile
):
mock_path_is_file = mocker.patch("sift_py.data_import.tdms.Path.is_file")
mock_path_is_file.return_value = True

mock_path_getsize = mocker.patch("sift_py.data_import.csv.os.path.getsize")
mock_path_getsize.return_value = 10

# Patch TdmsFile to return our mock file
mocker.patch("sift_py.data_import.tdms.TdmsFile", return_value=mock_waveform_tdms_file)

# Patch requests.Session.post to simulate both run creation and data import
mock_requests_post = mocker.patch("sift_py.rest.requests.Session.post")

# The first call is for _create_run, second for config upload, third for file upload
def post_side_effect(*args, **kwargs):
url = kwargs.get("url") or (args[1] if len(args) > 1 else "")
if "run" in url:
# Simulate run creation response
return MockResponse(
status_code=200,
text=json.dumps({"run": {"runId": "new_run_id"}}),
)
elif "data-imports:upload" in url:
# Simulate config upload response
return MockResponse()
elif "some_url.com" in url:
# Simulate file upload response
return MockResponse()
else:
return MockResponse()

mock_requests_post.side_effect = post_side_effect

svc = TdmsUploadService(rest_config)

# Should raise if run_id is provided
with pytest.raises(ValueError, match="Metadata can only be included in new runs"):
svc.upload(
"some_tdms.tdms",
"asset_name",
include_metadata=True,
run_id="existing_run_id",
run_name="Run Name",
)

# Should raise if run_name is not provided
with pytest.raises(ValueError, match="Must provide a run_name to include metadata"):
svc.upload(
"some_tdms.tdms",
"asset_name",
include_metadata=True,
run_name=None,
)

# Should succeed and call _create_run via POST with metadata
svc.upload(
"some_tdms.tdms",
"asset_name",
include_metadata=True,
run_name="Run Name",
)

# Check that the first POST call was for run creation and included metadata
create_run_post_call = mock_requests_post.call_args_list[0]
create_run_post_data = json.loads(create_run_post_call.kwargs["data"])
assert create_run_post_data["name"] == "Run Name"

# Metadata should be present and contain expected keys
assert "metadata" in create_run_post_data
assert create_run_post_data["metadata"][0]["key"]["name"] == "string_prop"
assert (
create_run_post_data["metadata"][0]["key"]["type"]
== MetadataKeyType.METADATA_KEY_TYPE_STRING
)
assert create_run_post_data["metadata"][0]["string_value"] == "example"

assert create_run_post_data["metadata"][1]["key"]["name"] == "int_prop"
assert (
create_run_post_data["metadata"][1]["key"]["type"]
== MetadataKeyType.METADATA_KEY_TYPE_NUMBER
)
assert create_run_post_data["metadata"][1]["number_value"] == 42

assert create_run_post_data["metadata"][2]["key"]["name"] == "float_prop"
assert (
create_run_post_data["metadata"][2]["key"]["type"]
== MetadataKeyType.METADATA_KEY_TYPE_NUMBER
)
assert create_run_post_data["metadata"][2]["number_value"] == 3.14

assert create_run_post_data["metadata"][3]["key"]["name"] == "bool_prop"
assert (
create_run_post_data["metadata"][3]["key"]["type"]
== MetadataKeyType.METADATA_KEY_TYPE_BOOLEAN
)
assert create_run_post_data["metadata"][3]["boolean_value"] is True

assert create_run_post_data["metadata"][4]["key"]["name"] == "datetime_prop"
assert (
create_run_post_data["metadata"][4]["key"]["type"]
== MetadataKeyType.METADATA_KEY_TYPE_STRING
)
assert create_run_post_data["metadata"][4]["string_value"].startswith("2024-01-01T12:00:00")
47 changes: 47 additions & 0 deletions python/lib/sift_py/data_import/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import pandas as pd
from alive_progress import alive_bar # type: ignore
from sift.metadata.v1.metadata_pb2 import MetadataValue

from sift_py._internal.metadata import metadata_pb_to_dict_api
from sift_py.data_import.config import CsvConfig
from sift_py.data_import.status import DataImportService
from sift_py.data_import.time_format import TimeFormatType
Expand All @@ -18,6 +20,7 @@
class CsvUploadService(_RestService):
UPLOAD_PATH = "/api/v1/data-imports:upload"
URL_PATH = "/api/v1/data-imports:url"
RUN_PATH = "/api/v2/runs"

_rest_conf: SiftRestConfig
_upload_uri: str
Expand Down Expand Up @@ -258,6 +261,50 @@ def _mime_and_content_type_from_path(path: Path) -> Tuple[str, Optional[str], Op
mime, encoding = mimetypes.guess_type(path)
return file_name, mime, encoding

def _create_run(self, run_name: str, metadata: Optional[List[MetadataValue]] = None) -> str:
"""Create a new run using the REST service, and return a run_id.

Args:
run_name: The name of the Run.
metadata: Optional metadata fields to add to the run.

Returns:
The run id.
"""
run_uri = urljoin(self._base_uri, self.RUN_PATH)

req: Dict[str, Any] = {
"name": run_name,
"description": "",
}

if metadata:
req["metadata"] = metadata_pb_to_dict_api(metadata)

response = self._session.post(
url=run_uri,
headers={
"Content-Encoding": "application/json",
},
data=json.dumps(req),
)
if response.status_code != 200:
raise Exception(
f"Run creation failed with status code {response.status_code}. {response.text}"
)

try:
run_info = response.json()
except (json.decoder.JSONDecodeError, KeyError):
raise Exception(f"Invalid response: {response.text}")

if "run" not in run_info:
raise Exception("Response missing key: run")
if "runId" not in run_info["run"]:
raise Exception("Response missing key: runId")

return run_info["run"]["runId"]


class _ProgressFile:
"""Displays the status with alive_bar while reading the file."""
Expand Down
39 changes: 1 addition & 38 deletions python/lib/sift_py/data_import/hdf5.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import json
import uuid
from collections import defaultdict
from contextlib import ExitStack
from pathlib import Path
from typing import Dict, List, Tuple, Union, cast
from urllib.parse import urljoin

import numpy as np

Expand Down Expand Up @@ -37,7 +35,6 @@ class Hdf5UploadService:
Service to upload HDF5 files.
"""

_RUN_PATH = "/api/v2/runs"
_csv_upload_service: CsvUploadService
_prev_run_id: str

Expand Down Expand Up @@ -96,7 +93,7 @@ def upload(
# Perform now instead of before the config split to avoid creating a run any problems arise before ready to upload
# Active run_id copied to _prev_run_id for user reference
if hdf5_config._hdf5_config.run_name != "":
run_id = self._create_run(hdf5_config._hdf5_config.run_name)
run_id = self._csv_upload_service._create_run(hdf5_config._hdf5_config.run_name)
for _, csv_config in csv_items:
csv_config._csv_config.run_name = ""
csv_config._csv_config.run_id = run_id
Expand Down Expand Up @@ -127,40 +124,6 @@ def get_previous_upload_run_id(self) -> str:
"""Return the run_id used in the previous upload"""
return self._prev_run_id

def _create_run(self, run_name: str) -> str:
"""Create a new run using the REST service, and return a run_id"""
run_uri = urljoin(self._csv_upload_service._base_uri, self._RUN_PATH)

# Since CSVUploadService is already a RestService, we can utilize that
response = self._csv_upload_service._session.post(
url=run_uri,
headers={
"Content-Encoding": "application/json",
},
data=json.dumps(
{
"name": run_name,
"description": "",
}
),
)
if response.status_code != 200:
raise Exception(
f"Run creation failed with status code {response.status_code}. {response.text}"
)

try:
run_info = response.json()
except (json.decoder.JSONDecodeError, KeyError):
raise Exception(f"Invalid response: {response.text}")

if "run" not in run_info:
raise Exception("Response missing key: run")
if "runId" not in run_info["run"]:
raise Exception("Response missing key: runId")

return run_info["run"]["runId"]


def _convert_to_csv_file(
src_path: Union[str, Path],
Expand Down
Loading
Loading