Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions datasets/modis/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,16 @@ RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 10
# See https://github.com/mapbox/rasterio/issues/1289
ENV CURL_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt

# Install Python 3.8
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" \
&& bash "Mambaforge-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Mambaforge-$(uname)-$(uname -m).sh"
# Install Python via Miniforge (Mambaforge was deprecated)
RUN curl -L -O "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh" \
&& bash "Miniforge3-$(uname)-$(uname -m).sh" -b -p /opt/conda \
&& rm -rf "Miniforge3-$(uname)-$(uname -m).sh"

ENV PATH /opt/conda/bin:$PATH
ENV LD_LIBRARY_PATH /opt/conda/lib/:$LD_LIBRARY_PATH

RUN mamba install -y -c conda-forge python=3.8 gdal=3.3.3 pip setuptools cython numpy==1.21.5
# Install Python and GDAL with HDF4 support (required for MODIS .hdf files)
RUN mamba install -y -c conda-forge python=3.10 gdal libgdal-hdf4 pip setuptools cython numpy

RUN python -m pip install --upgrade pip

Expand Down
10 changes: 6 additions & 4 deletions datasets/modis/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Note the force reinstall of `rasterio` in the Dockerfile is necessary for raster
The update workflows were registered with

```shell
ls -1 datasets/modis/collection/ | xargs -I {} pctasks dataset process-items goes-update --is-update-workflow --dataset datasets/modis/dataset.yaml -u -c {}
ls -1 datasets/modis/collection/ | xargs -I {} bash -c 'echo y | pctasks dataset process-items goes-update --is-update-workflow --dataset datasets/modis/dataset.yaml -u -c {}'
```


Expand All @@ -30,10 +30,12 @@ ls -1 datasets/modis/collection/ | xargs -I {} pctasks dataset process-items goe

```
ls -1 datasets/modis/collection/ | \
xargs -I {} pctasks dataset process-items update \
xargs -I {} bash -c 'echo y | pctasks dataset process-items update \
-c {} \
--workflow-id {}-update \
--is-update-workflow \
--dataset datasets/modis/dataset.yaml \
--upsert
```
--upsert'
```

After running the Dynamic Updates command, the ingestion workflow will start using the latest code changes.
2 changes: 1 addition & 1 deletion datasets/modis/dataset.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
id: modis
image: ${{ args.registry }}/pctasks-modis:2023.7.6.0
image: ${{ args.registry }}/pctasks-modis:2025.12.15.0

args:
- registry
Expand Down
18 changes: 0 additions & 18 deletions datasets/modis/misc.py

This file was deleted.

58 changes: 42 additions & 16 deletions datasets/modis/modis.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,7 @@
import pystac
import stactools.modis.cog
import stactools.modis.stac
from azure.core.exceptions import ResourceNotFoundError
from stactools.core.utils.antimeridian import Strategy
from stactools.modis.file import File
from misc import add_platform_field

from pctasks.core.models.task import WaitTaskResult
from pctasks.core.storage import StorageFactory
Expand All @@ -25,6 +22,42 @@
COG_CONTAINER = "blob://modiseuwest/modis-061-cogs/"


# Add back in the platform property which NASA removed from their XML on March 13 2024
# On the MODIS side terra is distributed as MOD and aqua as MYD,
# but Within MPC both are distributed as MODxxx
# Copied the method from misc.py and deleted the file
def add_platform_field(item, href, logger):
"""
add_platform_field # noqa: E501

Adds the platform field to a STAC item based on the HDF file href.
NASA removed this property from their XML metadata on March 13, 2024.

:param item: The STAC item to update
:type item: pystac.Item
:param href: The href path containing MOD/MYD/MCD prefix
:type href: str
:param logger: Logger instance for debug/warning messages
:type logger: logging.Logger
:return: The updated STAC item with platform field
:rtype: pystac.Item
"""
if ("platform" not in item.properties) or (item.properties["platform"] == ""):
logger.debug("platform field missing, filling it in based on original xml href")
try:
if href.split('/')[4][0:3] == "MOD":
item.properties["platform"] = "terra"
elif href.split('/')[4][0:3] == "MYD":
item.properties["platform"] = "aqua"
elif href.split('/')[4][0:3] == "MCD":
item.properties["platform"] = "terra,aqua"
else:
logger.warning("href did not contain MOD/MYD/MCD in the usual spot")
except Exception as e:
logger.warning(f"href did not contain MOD/MYD/MCD in the usual spot, got error: {e}")
return item


class MODISCollection(Collection):
@classmethod
def create_item(
Expand All @@ -50,17 +83,9 @@ def create_item(
file = File(os.path.join(temporary_directory, os.path.basename(asset_uri)))
logger.debug(f"Downloading {asset_uri}")
asset_storage.download_file(asset_path, file.hdf_href)
logger.debug(f"Downloading {asset_uri}.xml")
try:
asset_storage.download_file(f"{asset_path}.xml", file.xml_href)
except ResourceNotFoundError as e:
logger.warning(f"Missing XML file, skipping: {e}")
return []

logger.debug("Creating item")
item = stactools.modis.stac.create_item(
file.xml_href, antimeridian_strategy=Strategy.NORMALIZE
)
item = stactools.modis.stac.create_item(file.hdf_href)

if create_cogs:
logger.debug(f"Adding COGS to item {item}")
Expand All @@ -83,10 +108,11 @@ def create_item(
file = File(asset_storage.get_url(asset_path))
logger.debug(f"Setting HDF asset href to {file.hdf_href}")
item.assets["hdf"].href = file.hdf_href
logger.debug(f"Setting metadata asset href to {file.xml_href}")
item.assets["metadata"].href = file.xml_href
item.assets["metadata"].href = file.xml_href

item = add_platform_field(item, file.xml_href, logger)
# Remove metadata asset if it exists since XML files are no longer provided
if "metadata" in item.assets:
del item.assets["metadata"]

item = add_platform_field(item, file.hdf_href, logger)

return [item]
2 changes: 1 addition & 1 deletion datasets/modis/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
git+https://github.com/stactools-packages/modis@419101223609805f9ac9d2a38401448a36331460
git+https://github.com/stactools-packages/modis@8854ceb263907e7b42a183fb5a79476baace3219
Loading