tilebox · lukasbindreiter · Sep 12, 2025 · Sep 11, 2025 · Sep 11, 2025 · Sep 12, 2025
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,7 +6,7 @@ repos:
       - id: end-of-file-fixer
   - repo: https://github.com/charliermarsh/ruff-pre-commit
     # keep the version here in sync with the version in uv.lock
-    rev: "v0.12.9"
+    rev: "v0.13.0"
     hooks:
       - id: ruff-check
         args: [--fix, --exit-non-zero-on-fix]

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.43.0] - 2025-09-12
+
+- `tilebox-workflows`: Added progress tracking support to the `TaskRunner`.
+
 ## [0.42.0] - 2025-08-22
 
 ### Added
@@ -251,8 +255,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Released under the [MIT](https://opensource.org/license/mit) license.
 - Released packages: `tilebox-datasets`, `tilebox-workflows`, `tilebox-storage`, `tilebox-grpc`
 
-
-[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.42.0...HEAD
+[Unreleased]: https://github.com/tilebox/tilebox-python/compare/v0.43.0...HEAD
+[0.43.0]: https://github.com/tilebox/tilebox-python/compare/v0.42.0...v0.43.0
 [0.42.0]: https://github.com/tilebox/tilebox-python/compare/v0.41.0...v0.42.0
 [0.41.0]: https://github.com/tilebox/tilebox-python/compare/v0.40.0...v0.41.0
 [0.40.0]: https://github.com/tilebox/tilebox-python/compare/v0.39.0...v0.40.0

diff --git a/README.md b/README.md
@@ -36,11 +36,11 @@ Python library for [Tilebox](https://tilebox.com), a lightweight space data mana
 ## Install
 
 ```bash
-pip install tilebox-datasets tilebox-workflows tilebox-storage
+pip install tilebox
 ```
 
 > [!TIP]
-> For new projects we recommend using [uv](https://docs.astral.sh/uv/) - `uv add tilebox-datasets tilebox-workflows tilebox-storage`. Additional installation options are available [in our docs](https://docs.tilebox.com/sdks/python/install).
+> For new projects we recommend using [uv](https://docs.astral.sh/uv/) - `uv add tilebox`. Additional installation options are available [in our docs](https://docs.tilebox.com/sdks/python/install).
 
 ## Documentation
 
@@ -78,7 +78,6 @@ results = s2a_l1c.query(
 print(f"Found {results.sizes['time']} datapoints")  # Found 979 datapoints
 ```
 
-
 ### Tilebox Workflows
 
 A parallel processing engine to simplify the creation of dynamic tasks that can be executed across various computing environments, including on-premise and auto-scaling clusters in public clouds.

diff --git a/buf.gen.workflows.yaml b/buf.gen.workflows.yaml
@@ -12,7 +12,7 @@ plugins:
     out: tilebox-workflows/tilebox/workflows
 inputs:
   # for local development
-  # - directory: ../api
+  # directory: ../api
   - module: buf.build/tilebox/api
     paths:
       - "workflows"
diff --git a/pyproject.toml b/pyproject.toml
@@ -113,7 +113,13 @@ known-first-party = ["tilebox", "_tilebox"]
 "*/tests/*" = ["INP001", "SLF001"]
 
 [tool.pyright]
-exclude = ["**/.ipynb_checkpoints", "**/__pycache__", ".venv"]
+exclude = [
+    "**/.ipynb_checkpoints",
+    "**/__pycache__",
+    ".venv",
+    "tilebox-datasets/tests/example_dataset/*", # auto-generated code
+    "tilebox-workflows/tests/proto/*",          # auto-generated code
+]
 
 # ignore warnings in those files, but still type check them when used as a dependency in other files
 ignore = [

diff --git a/tilebox-datasets/tests/test_client.py b/tilebox-datasets/tests/test_client.py
@@ -126,7 +126,7 @@ def test_datapoint_not_found() -> None:
     s2_dataset = client.dataset("open_data.copernicus.sentinel2_msi")
     collection = s2_dataset.collection("S2A_S2MSI1C")
 
-    with pytest.raises(NotFoundError, match="No such datapoint.*"):
+    with pytest.raises(NotFoundError, match=r"No such datapoint.*"):
         collection.find("0181f4ef-2040-101a-1423-d818e4d1895e")  # is in another collection
 
 

diff --git a/tilebox-datasets/tests/test_timeseries.py b/tilebox-datasets/tests/test_timeseries.py
@@ -186,15 +186,15 @@ def test_timeseries_dataset_collection_find_invalid_id() -> None:
         mocked.collection.find("invalid")
 
     mocked.service.query_by_id.side_effect = ArgumentError
-    with pytest.raises(ValueError, match="Invalid datapoint id.*"):
+    with pytest.raises(ValueError, match=r"Invalid datapoint id.*"):
         mocked.collection.find(uuid4())
 
 
 def test_timeseries_dataset_collection_find_not_found() -> None:
     """Test that .find() of a collection raises a NotFoundError if the datapoint is not found."""
     mocked = _mocked_collection()
     mocked.service.query_by_id.side_effect = NotFoundError
-    with pytest.raises(NotFoundError, match="No such datapoint.*"):
+    with pytest.raises(NotFoundError, match=r"No such datapoint.*"):
         mocked.collection.find("14eb91a2-a42f-421f-9397-1dab577f05a9")
 
 

diff --git a/tilebox-grpc/tests/aio/test_error.py b/tilebox-grpc/tests/aio/test_error.py
@@ -34,5 +34,5 @@ async def _mock_rpc() -> None:
             self.some_rpc = _mock_rpc
 
     stub = with_pythonic_errors(Stub())
-    with pytest.raises(exception_type, match=".*"):
+    with pytest.raises(exception_type, match=r".*"):
         await stub.some_rpc()
diff --git a/tilebox-grpc/tests/test_channel.py b/tilebox-grpc/tests/test_channel.py
@@ -88,10 +88,10 @@ def test_parse_channel_info_unix(url: str) -> None:
 
 
 def test_parse_channel_invalid() -> None:
-    with pytest.raises(ValueError, match="Invalid"):
+    with pytest.raises(ValueError, match=r"Invalid"):
         parse_channel_info("i'm not a url")
 
 
 def test_parse_channel_port_required_for_http() -> None:
-    with pytest.raises(ValueError, match="Explicit port required"):
+    with pytest.raises(ValueError, match=r"Explicit port required"):
         parse_channel_info("http://0.0.0.0")
diff --git a/tilebox-grpc/tests/test_error.py b/tilebox-grpc/tests/test_error.py
@@ -38,5 +38,5 @@ def _mock_rpc() -> None:
             self.some_rpc = _mock_rpc
 
     stub = with_pythonic_errors(Stub())
-    with pytest.raises(exception_type, match=".*"):
+    with pytest.raises(exception_type, match=r".*"):
         stub.some_rpc()
diff --git a/tilebox-storage/tests/test_granule.py b/tilebox-storage/tests/test_granule.py
@@ -36,7 +36,7 @@ def test_granule_from_asf_datapoint(granule: ASFStorageGranule) -> None:
 def test_granule_from_asf_datapoints(granules: list[ASFStorageGranule]) -> None:
     datapoints = [_asf_granule_to_datapoint(granule) for granule in granules]
     dataset = xr.concat(datapoints, dim="time")
-    with pytest.raises(ValueError, match=".*more than one granule.*"):
+    with pytest.raises(ValueError, match=r".*more than one granule.*"):
         ASFStorageGranule.from_data(dataset)
 
     for i in range(len(granules)):  # converting a dataset with a time dimension of 1 should still work though
@@ -75,7 +75,7 @@ def test_granule_from_umbra_datapoint(granule: UmbraStorageGranule) -> None:
 def test_granule_from_umbra_datapoints(granules: list[UmbraStorageGranule]) -> None:
     datapoints = [_umbra_granule_to_datapoint(granule) for granule in granules]
     dataset = xr.concat(datapoints, dim="time")
-    with pytest.raises(ValueError, match=".*more than one granule.*"):
+    with pytest.raises(ValueError, match=r".*more than one granule.*"):
         UmbraStorageGranule.from_data(dataset)
 
     for i in range(len(granules)):  # converting a dataset with a time dimension of 1 should still work though
@@ -141,7 +141,7 @@ def test_granule_from_copernicus_datapoint(granule: CopernicusStorageGranule) ->
 def test_granule_from_copernicus_datapoints(granules: list[CopernicusStorageGranule]) -> None:
     datapoints = [_copernicus_granule_to_datapoint(granule) for granule in granules]
     dataset = xr.concat(datapoints, dim="time")
-    with pytest.raises(ValueError, match=".*more than one granule.*"):
+    with pytest.raises(ValueError, match=r".*more than one granule.*"):
         CopernicusStorageGranule.from_data(dataset)
 
     for i in range(len(granules)):  # converting a dataset with a time dimension of 1 should still work though
@@ -169,7 +169,7 @@ def test_granule_from_landsat_datapoint(granule: USGSLandsatStorageGranule) -> N
 def test_granule_from_landsat_datapoints(granules: list[USGSLandsatStorageGranule]) -> None:
     datapoints = [_landsat_granule_to_datapoint(granule) for granule in granules]
     dataset = xr.concat(datapoints, dim="time")
-    with pytest.raises(ValueError, match=".*more than one granule.*"):
+    with pytest.raises(ValueError, match=r".*more than one granule.*"):
         USGSLandsatStorageGranule.from_data(dataset)
 
     for i in range(len(granules)):  # converting a dataset with a time dimension of 1 should still work though

diff --git a/tilebox-storage/tests/test_providers.py b/tilebox-storage/tests/test_providers.py
@@ -1,3 +1,5 @@
+import re
+
 import pytest
 from httpx import AsyncClient, BasicAuth
 from pytest_httpx import HTTPXMock
@@ -21,5 +23,5 @@ async def test_asf_login(httpx_mock: HTTPXMock) -> None:
 @pytest.mark.asyncio
 async def test_asf_login_invalid_auth(httpx_mock: HTTPXMock) -> None:
     httpx_mock.add_response(401)
-    with pytest.raises(ValueError, match="Invalid username or password."):
+    with pytest.raises(ValueError, match=re.escape("Invalid username or password.")):
         await _asf_login(("username", "password"))
diff --git a/tilebox-storage/tests/test_storage_client.py b/tilebox-storage/tests/test_storage_client.py
@@ -1,3 +1,4 @@
+import re
 from datetime import timedelta
 from pathlib import Path
 from tempfile import TemporaryDirectory
@@ -37,14 +38,14 @@ async def test_client_login(httpx_mock: HTTPXMock) -> None:
 async def test_client_login_failed(httpx_mock: HTTPXMock) -> None:
     httpx_mock.add_response(401)
     client = _HttpClient(auth={"ASF": ("invalid-username", "password")})
-    with pytest.raises(ValueError, match="Invalid username or password."):
+    with pytest.raises(ValueError, match=re.escape("Invalid username or password.")):
         await client._client("ASF")
 
 
 @pytest.mark.asyncio
 async def test_client_missing_credentials() -> None:
     client = _HttpClient(auth={})
-    with pytest.raises(ValueError, match="Missing credentials.*"):
+    with pytest.raises(ValueError, match=r"Missing credentials.*"):
         await client._client("ASF")
 
 
@@ -102,7 +103,7 @@ async def test_download_verify_md5(httpx_mock: HTTPXMock, tmp_path: Path, granul
     httpx_mock.add_response(content=b"login-response")
     httpx_mock.add_response(stream=IteratorStream([b"my-granule"]))
     client = _HttpClient(auth={"ASF": ("username", "password")})
-    with pytest.raises(ValueError, match=".*md5sum mismatch.*"):
+    with pytest.raises(ValueError, match=r".*md5sum mismatch.*"):
         await client.download(granule, tmp_path, extract=False, show_progress=False)
 
 

diff --git a/tilebox-workflows/tests/automations/test_cron.py b/tilebox-workflows/tests/automations/test_cron.py
@@ -1,3 +1,4 @@
+import re
 from datetime import datetime, timezone
 
 import pytest
@@ -24,7 +25,9 @@ def test_cron_task_serialization_protobuf() -> None:
 
 
 def test_cron_task_serialization_requires_trigger() -> None:
-    with pytest.raises(ValueError, match="CronTask cannot be submitted without being triggered. Use task.once()."):
+    with pytest.raises(
+        ValueError, match=re.escape("CronTask cannot be submitted without being triggered. Use task.once().")
+    ):
         ExampleCronTask("test", 42)._serialize()
 
 

diff --git a/tilebox-workflows/tests/automations/test_storage_event.py b/tilebox-workflows/tests/automations/test_storage_event.py
@@ -1,3 +1,5 @@
+import re
+
 import pytest
 from hypothesis import given
 from tests.proto.test_pb2 import SampleArgs
@@ -30,7 +32,7 @@ def test_storage_event_task_serialization_protobuf() -> None:
 
 def test_storage_event_task_serialization_requires_trigger() -> None:
     with pytest.raises(
-        ValueError, match="StorageEventTask cannot be submitted without being triggered. Use task.once()."
+        ValueError, match=re.escape("StorageEventTask cannot be submitted without being triggered. Use task.once().")
     ):
         ExampleStorageEventTask("test", 42)._serialize()
 

diff --git a/tilebox-workflows/tests/runner/test_runner.py b/tilebox-workflows/tests/runner/test_runner.py
@@ -1,4 +1,5 @@
 import os
+import re
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -8,7 +9,7 @@
 from tilebox.workflows import ExecutionContext, Task
 from tilebox.workflows.cache import InMemoryCache, JobCache
 from tilebox.workflows.client import Client
-from tilebox.workflows.data import JobState, RunnerContext
+from tilebox.workflows.data import JobState, ProgressBar, RunnerContext
 from tilebox.workflows.runner.task_runner import TaskRunner
 
 
@@ -115,6 +116,40 @@ def test_runner_with_flaky_task() -> None:
     assert job.state == JobState.COMPLETED
 
 
+class ProgressTask(Task):
+    n: int
+
+    def execute(self, context: ExecutionContext) -> None:
+        context.progress("test").add(self.n)
+        context.submit_subtasks([ProgressLeafTask(i) for i in range(self.n)])
+
+
+class ProgressLeafTask(Task):
+    i: int
+
+    def execute(self, context: ExecutionContext) -> None:
+        context.progress("test").done(1)
+
+
+def test_runner_with_workflow_tracking_progress() -> None:
+    client = replay_client("progress.rpcs.bin")
+    job_client = client.jobs()
+
+    with patch("tilebox.workflows.jobs.client.get_trace_parent_of_current_span") as get_trace_parent_mock:
+        # we hardcode the trace parent for the job, which allows us to assert that every single outgoing request
+        # matches exactly byte for byte
+        get_trace_parent_mock.return_value = "00-98b9c13dbc61637ffb36f592a8236088-bc29f6909f0b7c5b-01"
+        job = client.jobs().submit("progress-task", ProgressTask(4))
+
+    cache = InMemoryCache()
+    runner = client.runner(tasks=[ProgressTask, ProgressLeafTask], cache=cache)
+
+    runner.run_all()
+    job = job_client.find(job)  # load current job state
+    assert job.state == JobState.COMPLETED
+    assert job.progress_bars == [ProgressBar("test", 4, 4)]
+
+
 def replay_client(replay_file: str, assert_request_matches: bool = True) -> Client:
     replay = Path(__file__).parent / "testdata" / "recordings" / replay_file
     replay_channel = open_replay_channel(replay, assert_request_matches)
@@ -179,26 +214,32 @@ def test_runner_disallow_duplicate_task_identifiers() -> None:
 
     runner.register(FlakyTask)
     with pytest.raises(
-        ValueError, match="Duplicate task identifier: A task 'FlakyTask' with version 'v0.0' is already registered."
+        ValueError,
+        match=re.escape("Duplicate task identifier: A task 'FlakyTask' with version 'v0.0' is already registered."),
     ):
         runner.register(FlakyTask)
 
     runner.register(SumResultTask)
     with pytest.raises(
-        ValueError, match="Duplicate task identifier: A task 'SumResultTask' with version 'v0.0' is already registered."
+        ValueError,
+        match=re.escape("Duplicate task identifier: A task 'SumResultTask' with version 'v0.0' is already registered."),
     ):
         runner.register(SumResultTask)
 
     runner.register(ExplicitIdentifierTaskV1)
     with pytest.raises(
         ValueError,
-        match="Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v1.0' is already registered.",
+        match=re.escape(
+            "Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v1.0' is already registered."
+        ),
     ):
         runner.register(ExplicitIdentifierTaskV1)
 
     runner.register(ExplicitIdentifierTaskV2)  # this one has a different version, so it's fine
     with pytest.raises(
         ValueError,
-        match="Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v2.0' is already registered.",
+        match=re.escape(
+            "Duplicate task identifier: A task 'tilebox.com/explicit' with version 'v2.0' is already registered."
+        ),
     ):
         runner.register(ExplicitIdentifierTaskV2)
diff --git a/tilebox-workflows/tests/runner/testdata/recordings/progress.rpcs.bin b/tilebox-workflows/tests/runner/testdata/recordings/progress.rpcs.bin