Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ repos:
- id: end-of-file-fixer
- repo: https://github.com/charliermarsh/ruff-pre-commit
# keep the version here in sync with the version in uv.lock
rev: "v0.12.9"
rev: "v0.13.0"
hooks:
- id: ruff-check
args: [--fix, --exit-non-zero-on-fix]
Expand Down
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.43.0] - 2025-09-12

- `tilebox-workflows`: Added progress tracking support to the `TaskRunner`.

## [0.42.0] - 2025-08-22

### Added
Expand Down Expand Up @@ -251,8 +255,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Released under the [MIT](https://opensource.org/license/mit) license.
- Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`


[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.42.0...HEAD
[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.43.0...HEAD
[0.43.0]: https://github.com/tilebox/tilebox-python/compare/v0.42.0...v0.43.0
[0.42.0]: https://github.com/tilebox/tilebox-python/compare/v0.41.0...v0.42.0
[0.41.0]: https://github.com/tilebox/tilebox-python/compare/v0.40.0...v0.41.0
[0.40.0]: https://github.com/tilebox/tilebox-python/compare/v0.39.0...v0.40.0
Expand Down
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,11 @@ Python library for [Tilebox](https://tilebox.com), a lightweight space data mana
## Install

```bash
pip install tilebox-datasets tilebox-workflows tilebox-storage
pip install tilebox
```

> [!TIP]
> For new projects we recommend using [uv](https://docs.astral.sh/uv/) - `uv add tilebox-datasets tilebox-workflows tilebox-storage`. Additional installation options are available [in our docs](https://docs.tilebox.com/sdks/python/install).
> For new projects we recommend using [uv](https://docs.astral.sh/uv/) - `uv add tilebox`. Additional installation options are available [in our docs](https://docs.tilebox.com/sdks/python/install).

## Documentation

Expand Down Expand Up @@ -78,7 +78,6 @@ results = s2a_l1c.query(
print(f"Found {results.sizes['time']} datapoints") # Found 979 datapoints
```


### Tilebox Workflows

A parallel processing engine to simplify the creation of dynamic tasks that can be executed across various computing environments, including on-premise and auto-scaling clusters in public clouds.
Expand Down
2 changes: 1 addition & 1 deletion buf.gen.workflows.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ plugins:
out: tilebox-workflows/tilebox/workflows
inputs:
# for local development
# - directory: ../api
# directory: ../api
- module: buf.build/tilebox/api
paths:
- "workflows"
8 changes: 7 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,13 @@ known-first-party = ["tilebox", "_tilebox"]
"*/tests/*" = ["INP001", "SLF001"]

[tool.pyright]
exclude = ["**/.ipynb_checkpoints", "**/__pycache__", ".venv"]
exclude = [
"**/.ipynb_checkpoints",
"**/__pycache__",
".venv",
"tilebox-datasets/tests/example_dataset/*", # auto-generated code
"tilebox-workflows/tests/proto/*", # auto-generated code
]

# ignore warnings in those files, but still type check them when used as a dependency in other files
ignore = [
Expand Down
2 changes: 1 addition & 1 deletion tilebox-datasets/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def test_datapoint_not_found() -> None:
s2_dataset = client.dataset("open_data.copernicus.sentinel2_msi")
collection = s2_dataset.collection("S2A_S2MSI1C")

with pytest.raises(NotFoundError, match="No such datapoint.*"):
with pytest.raises(NotFoundError, match=r"No such datapoint.*"):
collection.find("0181f4ef-2040-101a-1423-d818e4d1895e") # is in another collection


Expand Down
4 changes: 2 additions & 2 deletions tilebox-datasets/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,15 +186,15 @@ def test_timeseries_dataset_collection_find_invalid_id() -> None:
mocked.collection.find("invalid")

mocked.service.query_by_id.side_effect = ArgumentError
with pytest.raises(ValueError, match="Invalid datapoint id.*"):
with pytest.raises(ValueError, match=r"Invalid datapoint id.*"):
mocked.collection.find(uuid4())


def test_timeseries_dataset_collection_find_not_found() -> None:
"""Test that .find() of a collection raises a NotFoundError if the datapoint is not found."""
mocked = _mocked_collection()
mocked.service.query_by_id.side_effect = NotFoundError
with pytest.raises(NotFoundError, match="No such datapoint.*"):
with pytest.raises(NotFoundError, match=r"No such datapoint.*"):
mocked.collection.find("14eb91a2-a42f-421f-9397-1dab577f05a9")


Expand Down
2 changes: 1 addition & 1 deletion tilebox-grpc/tests/aio/test_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,5 +34,5 @@ async def _mock_rpc() -> None:
self.some_rpc = _mock_rpc

stub = with_pythonic_errors(Stub())
with pytest.raises(exception_type, match=".*"):
with pytest.raises(exception_type, match=r".*"):
await stub.some_rpc()
4 changes: 2 additions & 2 deletions tilebox-grpc/tests/test_channel.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,10 @@ def test_parse_channel_info_unix(url: str) -> None:


def test_parse_channel_invalid() -> None:
with pytest.raises(ValueError, match="Invalid"):
with pytest.raises(ValueError, match=r"Invalid"):
parse_channel_info("i'm not a url")


def test_parse_channel_port_required_for_http() -> None:
with pytest.raises(ValueError, match="Explicit port required"):
with pytest.raises(ValueError, match=r"Explicit port required"):
parse_channel_info("http://0.0.0.0")
2 changes: 1 addition & 1 deletion tilebox-grpc/tests/test_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,5 +38,5 @@ def _mock_rpc() -> None:
self.some_rpc = _mock_rpc

stub = with_pythonic_errors(Stub())
with pytest.raises(exception_type, match=".*"):
with pytest.raises(exception_type, match=r".*"):
stub.some_rpc()
8 changes: 4 additions & 4 deletions tilebox-storage/tests/test_granule.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def test_granule_from_asf_datapoint(granule: ASFStorageGranule) -> None:
def test_granule_from_asf_datapoints(granules: list[ASFStorageGranule]) -> None:
datapoints = [_asf_granule_to_datapoint(granule) for granule in granules]
dataset = xr.concat(datapoints, dim="time")
with pytest.raises(ValueError, match=".*more than one granule.*"):
with pytest.raises(ValueError, match=r".*more than one granule.*"):
ASFStorageGranule.from_data(dataset)

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
Expand Down Expand Up @@ -75,7 +75,7 @@ def test_granule_from_umbra_datapoint(granule: UmbraStorageGranule) -> None:
def test_granule_from_umbra_datapoints(granules: list[UmbraStorageGranule]) -> None:
datapoints = [_umbra_granule_to_datapoint(granule) for granule in granules]
dataset = xr.concat(datapoints, dim="time")
with pytest.raises(ValueError, match=".*more than one granule.*"):
with pytest.raises(ValueError, match=r".*more than one granule.*"):
UmbraStorageGranule.from_data(dataset)

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_granule_from_copernicus_datapoint(granule: CopernicusStorageGranule) ->
def test_granule_from_copernicus_datapoints(granules: list[CopernicusStorageGranule]) -> None:
datapoints = [_copernicus_granule_to_datapoint(granule) for granule in granules]
dataset = xr.concat(datapoints, dim="time")
with pytest.raises(ValueError, match=".*more than one granule.*"):
with pytest.raises(ValueError, match=r".*more than one granule.*"):
CopernicusStorageGranule.from_data(dataset)

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
Expand Down Expand Up @@ -169,7 +169,7 @@ def test_granule_from_landsat_datapoint(granule: USGSLandsatStorageGranule) -> N
def test_granule_from_landsat_datapoints(granules: list[USGSLandsatStorageGranule]) -> None:
datapoints = [_landsat_granule_to_datapoint(granule) for granule in granules]
dataset = xr.concat(datapoints, dim="time")
with pytest.raises(ValueError, match=".*more than one granule.*"):
with pytest.raises(ValueError, match=r".*more than one granule.*"):
USGSLandsatStorageGranule.from_data(dataset)

for i in range(len(granules)): # converting a dataset with a time dimension of 1 should still work though
Expand Down
4 changes: 3 additions & 1 deletion tilebox-storage/tests/test_providers.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import pytest
from httpx import AsyncClient, BasicAuth
from pytest_httpx import HTTPXMock
Expand All @@ -21,5 +23,5 @@ async def test_asf_login(httpx_mock: HTTPXMock) -> None:
@pytest.mark.asyncio
async def test_asf_login_invalid_auth(httpx_mock: HTTPXMock) -> None:
httpx_mock.add_response(401)
with pytest.raises(ValueError, match="Invalid username or password."):
with pytest.raises(ValueError, match=re.escape("Invalid username or password.")):
await _asf_login(("username", "password"))
7 changes: 4 additions & 3 deletions tilebox-storage/tests/test_storage_client.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from datetime import timedelta
from pathlib import Path
from tempfile import TemporaryDirectory
Expand Down Expand Up @@ -37,14 +38,14 @@ async def test_client_login(httpx_mock: HTTPXMock) -> None:
async def test_client_login_failed(httpx_mock: HTTPXMock) -> None:
httpx_mock.add_response(401)
client = _HttpClient(auth={"ASF": ("invalid-username", "password")})
with pytest.raises(ValueError, match="Invalid username or password."):
with pytest.raises(ValueError, match=re.escape("Invalid username or password.")):
await client._client("ASF")


@pytest.mark.asyncio
async def test_client_missing_credentials() -> None:
client = _HttpClient(auth={})
with pytest.raises(ValueError, match="Missing credentials.*"):
with pytest.raises(ValueError, match=r"Missing credentials.*"):
await client._client("ASF")


Expand Down Expand Up @@ -102,7 +103,7 @@ async def test_download_verify_md5(httpx_mock: HTTPXMock, tmp_path: Path, granul
httpx_mock.add_response(content=b"login-response")
httpx_mock.add_response(stream=IteratorStream([b"my-granule"]))
client = _HttpClient(auth={"ASF": ("username", "password")})
with pytest.raises(ValueError, match=".*md5sum mismatch.*"):
with pytest.raises(ValueError, match=r".*md5sum mismatch.*"):
await client.download(granule, tmp_path, extract=False, show_progress=False)


Expand Down
5 changes: 4 additions & 1 deletion tilebox-workflows/tests/automations/test_cron.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from datetime import datetime, timezone

import pytest
Expand All @@ -24,7 +25,9 @@ def test_cron_task_serialization_protobuf() -> None:


def test_cron_task_serialization_requires_trigger() -> None:
with pytest.raises(ValueError, match="CronTask cannot be submitted without being triggered. Use task.once()."):
with pytest.raises(
ValueError, match=re.escape("CronTask cannot be submitted without being triggered. Use task.once().")
):
ExampleCronTask("test", 42)._serialize()


Expand Down
4 changes: 3 additions & 1 deletion tilebox-workflows/tests/automations/test_storage_event.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import re

import pytest
from hypothesis import given
from tests.proto.test_pb2 import SampleArgs
Expand Down Expand Up @@ -30,7 +32,7 @@ def test_storage_event_task_serialization_protobuf() -> None:

def test_storage_event_task_serialization_requires_trigger() -> None:
with pytest.raises(
ValueError, match="StorageEventTask cannot be submitted without being triggered. Use task.once()."
ValueError, match=re.escape("StorageEventTask cannot be submitted without being triggered. Use task.once().")
):
ExampleStorageEventTask("test", 42)._serialize()

Expand Down
51 changes: 46 additions & 5 deletions tilebox-workflows/tests/runner/test_runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import os
import re
from pathlib import Path
from unittest.mock import MagicMock, patch

Expand All @@ -8,7 +9,7 @@
from tilebox.workflows import ExecutionContext, Task
from tilebox.workflows.cache import InMemoryCache, JobCache
from tilebox.workflows.client import Client
from tilebox.workflows.data import JobState, RunnerContext
from tilebox.workflows.data import JobState, ProgressBar, RunnerContext
from tilebox.workflows.runner.task_runner import TaskRunner


Expand Down Expand Up @@ -115,6 +116,40 @@ def test_runner_with_flaky_task() -> None:
assert job.state == JobState.COMPLETED


class ProgressTask(Task):
n: int

def execute(self, context: ExecutionContext) -> None:
context.progress("test").add(self.n)
context.submit_subtasks([ProgressLeafTask(i) for i in range(self.n)])


class ProgressLeafTask(Task):
i: int

def execute(self, context: ExecutionContext) -> None:
context.progress("test").done(1)


def test_runner_with_workflow_tracking_progress() -> None:
client = replay_client("progress.rpcs.bin")
job_client = client.jobs()

with patch("tilebox.workflows.jobs.client.get_trace_parent_of_current_span") as get_trace_parent_mock:
# we hardcode the trace parent for the job, which allows us to assert that every single outgoing request
# matches exactly byte for byte
get_trace_parent_mock.return_value = "00-98b9c13dbc61637ffb36f592a8236088-bc29f6909f0b7c5b-01"
job = client.jobs().submit("progress-task", ProgressTask(4))

cache = InMemoryCache()
runner = client.runner(tasks=[ProgressTask, ProgressLeafTask], cache=cache)

runner.run_all()
job = job_client.find(job) # load current job state
assert job.state == JobState.COMPLETED
assert job.progress_bars == [ProgressBar("test", 4, 4)]


def replay_client(replay_file: str, assert_request_matches: bool = True) -> Client:
replay = Path(__file__).parent / "testdata" / "recordings" / replay_file
replay_channel = open_replay_channel(replay, assert_request_matches)
Expand Down Expand Up @@ -179,26 +214,32 @@ def test_runner_disallow_duplicate_task_identifiers() -> None:

runner.register(FlakyTask)
with pytest.raises(
ValueError, match="Duplicate task identifier: A task 'FlakyTask' with version 'v0.0' is already registered."
ValueError,
match=re.escape("Duplicate task identifier: A task 'FlakyTask' with version 'v0.0' is already registered."),
):
runner.register(FlakyTask)

runner.register(SumResultTask)
with pytest.raises(
ValueError, match="Duplicate task identifier: A task 'SumResultTask' with version 'v0.0' is already registered."
ValueError,
match=re.escape("Duplicate task identifier: A task 'SumResultTask' with version 'v0.0' is already registered."),
):
runner.register(SumResultTask)

runner.register(ExplicitIdentifierTaskV1)
with pytest.raises(
ValueError,
match="Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v1.0' is already registered.",
match=re.escape(
"Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v1.0' is already registered."
),
):
runner.register(ExplicitIdentifierTaskV1)

runner.register(ExplicitIdentifierTaskV2) # this one has a different version, so it's fine
with pytest.raises(
ValueError,
match="Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v2.0' is already registered.",
match=re.escape(
"Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v2.0' is already registered."
),
):
runner.register(ExplicitIdentifierTaskV2)
Git LFS file not shown
Loading
Loading