Skip to content

Error accessing Rainfields310 radar data on JASMIN #207

@stevehadd

Description

@stevehadd

Trying to run the radar accessor on JASMIN, I'm getting a data not found error, despite the data being present (as far as I can tell). I suspect that the problem is that the format is not correct on disk having copied the data across from NCI.

For example if I try to run code:
radar_accessor = pyearthtools.data.archive.Rainfields3(variables='prcp-crate')['20210303']

On disk, the file 310_20220303.prcp-crate.zip is present on disk.

If I put a breakpoint at the line of code where the error occurs, then looking at the sel.search output, then it correctly finds the zip file and links it to the nc. files contained, but it then syas it can't find the data..

The stack trace is as follows:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:203](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=202), in FileSystemIndex.get(self, *args, **kwargs)
    202 try:
--> 203     return self.load(self.search(*args), **kwargs)
    204 except Exception as e:

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:1003](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=1002), in ArchiveIndex.search(self, *args)
   1002         pass
-> 1003 return super().search(*args)

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:153](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=152), in FileSystemIndex.search(self, *args, **kwargs)
    152     search_function = getattr(self, pyearthtools.utils.config.get("data.search_function"))
--> 153     return search_function(*args, **kwargs)
    154 except TypeError as e:

File [~/prog/pyearthtools_jasmin/src/site_archive_jasmin/Rainfields310.py:988](https://notebooks.jasmin.ac.uk/home/users/prog/pyearthtools_jasmin/src/site_archive_jasmin/Rainfields310.py#line=987), in Rainfields3.filesystem(self, query_dictionary)
    987 paths = [os.path.join(root, file) for file in files]
--> 988 relevant = [self.match_path(p, query_dictionary) for p in paths]
    989 relevant = [r for r in relevant if r]

File [~/prog/pyearthtools_jasmin/src/site_archive_jasmin/Rainfields310.py:1010](https://notebooks.jasmin.ac.uk/home/users/prog/pyearthtools_jasmin/src/site_archive_jasmin/Rainfields310.py#line=1009), in Rainfields3.match_path(self, path, query)
   1009 id_and_date, varname, _extension = filename.split(".")
-> 1010 _radar_id, datepart = id_and_date.split("_")
   1011 pdt = petdt(datepart)

ValueError: too many values to unpack (expected 2)

The above exception was the direct cause of the following exception:

DataNotFoundError                         Traceback (most recent call last)
Cell In[10], line 1
----> 1 radarpipe['20210404T00']

File [~/prog/pet_fork/packages/pipeline/src/pyearthtools/pipeline/controller.py:551](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/pipeline/src/pyearthtools/pipeline/controller.py#line=550), in Pipeline.__getitem__(self, idx)
    543     return map(self.__getitem__, indexes)
    545 # Start the pipeline with the raw[/initial](https://notebooks.jasmin.ac.uk/initial) data
    546 # `idx` here is the index of the sample within the dataset, not the
    547 #  position of the step within the list of steps
    548 # `sample` is actual data
    549 # `step_index` *is* the index of the sample provier within the list of steps
    550 # Initial just means untransformed by the pipeline
--> 551 sample, step_index = self._get_initial_sample(idx)
    552 LOG.debug(f"Call pipeline __getitem__ for {idx = }")
    554 # Apply each pipeline step to the sample, starting from the latest source

File [~/prog/pet_fork/packages/pipeline/src/pyearthtools/pipeline/controller.py:528](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/pipeline/src/pyearthtools/pipeline/controller.py#line=527), in Pipeline._get_initial_sample(self, idx)
    526 if isinstance(self.steps[0], (_Pipeline, Index)):
    527     LOG.debug(f"Getting initial sample from {self.steps[0]} at {idx}")
--> 528     sample = self.steps[0][idx]
    529     whereinthesequence = 0
    530     return sample, whereinthesequence

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/utilities/mixins/call_redirect.py:42](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/utilities/mixins/call_redirect.py#line=41), in CallRedirectMixin.__getitem__(self, idx)
     40 def __getitem__(self, idx: Any):
     41     """[] accessor"""
---> 42     return self.__call__(idx)

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:897](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=896), in AdvancedTimeIndex.__call__(self, *args, **kwargs)
    888     return self.series(
    889         start_time,
    890         end_time,
   (...)    893         **kwargs,
    894     )
    896 if len(args) == 1:
--> 897     return self.retrieve(*args, **kwargs)
    899 return self.series(*args, **kwargs)

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:778](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=777), in AdvancedTimeIndex.retrieve(self, querytime, aggregation, select, use_simple, **kwargs)
    775 querytime = Petdt(querytime)
    777 if not hasattr(self, "data_resolution") or not self.data_resolution or use_simple:
--> 778     data = super().retrieve(querytime, select=select, **kwargs)
    779     return data  # selectdata(querytime, data)
    781 if self.data_resolution:

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:671](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=670), in SingleTimeDataIndex.retrieve(self, transforms, *args, **kwargs)
    667 # kwargs.update(self._get_preprocess(kwargs.pop("preprocess", None)))  # type: ignore
    668 with ChangeValue(self, "_skip_transforms", True):
    669     # Skip transforms, so that they are only applied once
    670     # By applying transforms after time retrieve, prevents more then necessary data going to transforms
--> 671     return transforms(super().retrieve(*args, **kwargs))

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:462](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=461), in SingleTimeIndex.retrieve(self, querytime, select, round, *args, **kwargs)
    459     querytime = querytime.at_resolution(self.data_resolution)
    461 retrieval_function = getattr(super(), "retrieve", super().get)
--> 462 data = retrieval_function(querytime, *args, **kwargs)
    464 if not isinstance(data, (xr.Dataset, xr.DataArray)):
    465     return data

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:285](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=284), in DataIndex.retrieve(self, transforms, *args, **kwargs)
    282 kwargs.update(self._get_preprocess(kwargs.pop("preprocess", None)))  # type: ignore
    284 if self._skip_transforms:
--> 285     return self.get(*args, **kwargs)
    287 untransformed = self.get(*args, **kwargs)
    288 transformed = transforms(untransformed)

File [~/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py:205](https://notebooks.jasmin.ac.uk/home/users/prog/pet_fork/packages/data/src/pyearthtools/data/indexes/_indexes.py#line=204), in FileSystemIndex.get(self, *args, **kwargs)
    203     return self.load(self.search(*args), **kwargs)
    204 except Exception as e:
--> 205     raise DataNotFoundError(f"Data with args: {str(args)} could not be found.") from e

DataNotFoundError: Data with args: (Petdt('2021-04-04T00'),) could not be found.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions