Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion docs/source/usage.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@ Prepares a dataset for release by uploading it to the staging area and updating
uv run datamanager prepare <dataset-name.sqlite> <path/to/local/file.sqlite>
```

When preparing a dataset, you will be prompted for an optional **Temoa Repository Hash** (git commit hash). This helps track which version of the temoa repository this database works against. You can:

- Enter a valid git commit hash (e.g., `abc1234` or `a1b2c3d4e5f6...`)
- Press Enter to skip (optional field)

After running `prepare`, follow the on-screen instructions:

1. `git add manifest.json`
Expand All @@ -44,12 +49,20 @@ After running `prepare`, follow the on-screen instructions:

### `list-datasets`

Lists all datasets currently tracked in `manifest.json`.
Lists all datasets currently tracked in `manifest.json`, including the latest version, update time, SHA256 hash, and Temoa repository hash (if available).

```bash
uv run datamanager list-datasets
```

The output includes:

- **Dataset Name**: The logical name of the dataset
- **Latest Version**: The most recent version tag
- **Last Updated**: When the latest version was created (relative time and absolute timestamp)
- **SHA256**: First 12 characters of the file hash
- **Temoa Hash**: First 12 characters of the temoa repository commit hash (or "N/A" if not specified)

![list_datasets](../../assets/list_datasets.png)

### `pull`
Expand Down
2 changes: 1 addition & 1 deletion docs/source/workflow.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ Use the `datamanager` tool to stage your changes. The `prepare` command handles
uv run datamanager prepare energy-data.sqlite ./local-files/new-energy.sqlite
```

The tool will guide you through the process. For other maintenance tasks like `rollback` or `delete`, use the corresponding command.
The tool will guide you through the process, including an optional prompt for the **Temoa Repository Hash** (git commit hash) to track which version of the temoa repository this database works against. For other maintenance tasks like `rollback` or `delete`, use the corresponding command.

## Step 3: Commit and Push

Expand Down
3 changes: 3 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"r2_object_key": "test_database/v4-6d60f0035a80de92c3f3df433212699e0584a09a7d4943693ae0889d98640641.sqlite",
"diffFromPrevious": "diffs/test_database.sqlite/diff-v3-to-v4.diff",
"commit": "5803a97",
"temoaRepoHash": null,
"description": "testing the autogenerating data docs script"
},
{
Expand All @@ -19,6 +20,7 @@
"r2_object_key": "test_database/v3-6c37e0744a6f49f8b3e5b24b74080c2ae845b925633ccefa81193201639bee12.sqlite",
"diffFromPrevious": "diffs/test_database.sqlite/diff-v2-to-v3.diff",
"commit": "ecc49b5",
"temoaRepoHash": null,
"description": "testing sql diffing with summary"
},
{
Expand All @@ -28,6 +30,7 @@
"r2_object_key": "test_database/v2-e287b00772296e3ae8d65699570662ff316d8dae50deef4041fde65ca73202a5.sqlite",
"diffFromPrevious": "diffs/test_database.sqlite/diff-v1-to-v2.diff",
"commit": "a621125",
"temoaRepoHash": null,
"description": "updating test_database to get multiple versions"
}
]
Expand Down
101 changes: 91 additions & 10 deletions src/datamanager/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import subprocess
from datetime import datetime, timezone
import tempfile
import re
from dateutil.parser import isoparse
from pathlib import Path

Expand Down Expand Up @@ -41,6 +42,22 @@ def _rel(iso: str) -> str:
return f"{hours} h ago"


def _validate_temoa_hash(temoa_hash: str) -> bool:
"""
Validates that a temoa repo hash looks like a valid git commit hash.
accepts 4-40 hexadecimal characters (case-insensitive).
"""
if not temoa_hash or not temoa_hash.strip():
return True # Empty is allowed (optional field)

temoa_hash = temoa_hash.strip()
# Git commit hashes are hexadecimal and can be 4-40 characters
if re.match(r"^[a-fA-F0-9]{4,40}$", temoa_hash):
return True

return False


# Initialize Typer app and Rich console
app = typer.Typer(
name="datamanager",
Expand Down Expand Up @@ -97,15 +114,26 @@ def verify(ctx: typer.Context) -> None:
def list_datasets(ctx: typer.Context) -> None:
"""Lists all datasets tracked in the manifest."""
data = manifest.read_manifest()
table = Table("Dataset Name", "Latest Version", "Last Updated", "SHA256")
table = Table(
"Dataset Name", "Latest Version", "Last Updated", "SHA256", "Temoa Hash"
)
for item in data:
latest = item["history"][0]
temoa_hash_display = "N/A"
if latest.get("temoaRepoHash"):
temoa_hash_display = (
f"{latest['temoaRepoHash'][:12]}..."
if len(str(latest["temoaRepoHash"])) > 12
else str(latest["temoaRepoHash"])
)

table.add_row(
item["fileName"],
latest["version"],
# latest["timestamp"],
f"{_rel(latest['timestamp'])} ({latest['timestamp']})",
f"{latest['sha256'][:12]}...",
temoa_hash_display,
)
console.print(table)

Expand All @@ -128,8 +156,12 @@ def _run_pull_logic(name: str, version: str, output: Optional[Path]) -> None:
else:
final_path = output

temoa_hash_info = ""
if version_entry.get("temoaRepoHash"):
temoa_hash_info = f", temoa: {version_entry['temoaRepoHash']}"

console.print(
f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}) to [cyan]{final_path}[/]"
f"Pulling version [magenta]{version_entry['version']}[/] (commit: {version_entry['commit']}{temoa_hash_info}) to [cyan]{final_path}[/]"
)

success = core.pull_and_verify(
Expand Down Expand Up @@ -190,10 +222,12 @@ def _pull_interactive(ctx: typer.Context) -> None:
console.print(f"[red]Error: No version history found for {selected_name}.[/]")
return

version_choices = [
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})"
for entry in dataset["history"]
]
version_choices = []
for entry in dataset["history"]:
temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}"
version_choices.append(
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})"
)
selected_version_str = questionary.select(
"Which version would you like to pull?", choices=version_choices
).ask()
Expand Down Expand Up @@ -231,6 +265,48 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
dataset = manifest.get_dataset(name)
client = core.get_r2_client() # Moved up to be available for diffing

# Prompt for temoa repo hash (optional)
temoa_hash = None
if not ctx.obj.get("no_prompt"):
console.print("\n[bold]Temoa Repository Hash[/]")
console.print(
"This helps track which version of the temoa repository this database works against."
)

while True:
temoa_hash_input = questionary.text(
"Enter the temoa repository commit hash (optional, press Enter to skip):",
default="",
).ask()

if not temoa_hash_input or not temoa_hash_input.strip():
console.print("Skipping temoa repo hash (optional field).")
break

temoa_hash_candidate = temoa_hash_input.strip()
if _validate_temoa_hash(temoa_hash_candidate):
temoa_hash = temoa_hash_candidate
console.print(f"Using temoa repo hash: [green]{temoa_hash}[/]")
break
else:
console.print(
f"[bold red]Invalid format:[/] '{temoa_hash_candidate}' doesn't look like a valid git commit hash."
)
console.print(
"Git commit hashes should contain only hexadecimal characters (0-9, a-f, A-F) and be 4-40 characters long."
)
retry = questionary.confirm(
"Would you like to try again?", default=True
).ask()
if not retry:
console.print("Skipping temoa repo hash.")
break
else:
# In non-interactive mode, temoa hash is not provided
console.print(
"Running in non-interactive mode - temoa repo hash not specified."
)

# Check for changes BEFORE doing any uploads.
if dataset:
latest_version = dataset["history"][0]
Expand Down Expand Up @@ -287,6 +363,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
if diff_git_path
else None, # Add path to entry
"commit": "pending-merge",
"temoaRepoHash": temoa_hash,
"description": "pending-merge",
}
manifest.add_history_entry(name, new_entry)
Expand All @@ -307,6 +384,7 @@ def _run_prepare_logic(ctx: typer.Context, name: str, file: Path) -> None:
"staging_key": staging_key,
"diffFromPrevious": None, # Explicitly None for new datasets
"commit": "pending-merge",
"temoaRepoHash": temoa_hash,
"description": "pending-merge",
}
],
Expand Down Expand Up @@ -419,6 +497,7 @@ def _run_rollback_logic(ctx: typer.Context, name: str, to_version: str) -> None:
"r2_object_key": target_entry["r2_object_key"],
"diffFromPrevious": None,
"commit": "pending-merge",
"temoaRepoHash": target_entry.get("temoaRepoHash"),
"description": f"Rollback to version {target_entry['version']}",
}

Expand Down Expand Up @@ -481,10 +560,12 @@ def _rollback_interactive(ctx: typer.Context) -> None:
return

# Exclude the latest version from the choices, as you can't roll back to it.
version_choices = [
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])})"
for entry in dataset["history"][1:] # Start from the second entry
]
version_choices = []
for entry in dataset["history"][1:]: # Start from the second entry
temoa_info = f", temoa: {entry.get('temoaRepoHash', 'N/A')}"
version_choices.append(
f"{entry['version']} (commit: {entry['commit']}, {_rel(entry['timestamp'])}{temoa_info})"
)
selected_version_str = questionary.select(
"Which version do you want to restore?", choices=version_choices
).ask()
Expand Down
26 changes: 26 additions & 0 deletions tests/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,12 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N
mocker.patch("datamanager.core.get_r2_client")
mock_upload = mocker.patch("datamanager.core.upload_to_staging")

# Mock the temoa hash prompt to return empty (skip)
mocker.patch(
"questionary.text",
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
)

result = runner.invoke(app, ["prepare", "new-dataset.sqlite", str(new_file)])

assert result.exit_code == 0, result.stdout
Expand All @@ -32,6 +38,7 @@ def test_prepare_for_create_success(test_repo: Path, mocker: MockerFixture) -> N
assert dataset is not None
assert dataset["history"][0]["diffFromPrevious"] is None
assert dataset["history"][0]["description"] == "pending-merge"
assert dataset["history"][0]["temoaRepoHash"] is None


def test_prepare_for_update_with_small_diff(
Expand All @@ -44,6 +51,12 @@ def test_prepare_for_update_with_small_diff(
mocker.patch("datamanager.core.upload_to_staging")
mocker.patch("datamanager.core.download_from_r2")

# Mock the temoa hash prompt to return empty (skip)
mocker.patch(
"questionary.text",
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
)

# Prepare a fake summary and full diff
fake_summary = "# summary: 1 add, 1 del\n"
fake_full = "--- a\n+++ b\n-foo\n+bar\n"
Expand Down Expand Up @@ -80,6 +93,13 @@ def test_prepare_for_update_with_large_diff(
mock_r2_client.head_object.return_value = {"ContentLength": 1024}
mocker.patch("datamanager.core.upload_to_staging")
mocker.patch("datamanager.core.download_from_r2")

# Mock the temoa hash prompt to return empty (skip)
mocker.patch(
"questionary.text",
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
)

# Make the full diff larger than the default limit, but still provide a summary
large_full = "line\n" * (settings.max_diff_lines + 1)
small_summary = "# summary: huge diff, see details in PR\n"
Expand Down Expand Up @@ -115,6 +135,12 @@ def test_prepare_no_changes(test_repo: Path, mocker: MockerFixture) -> None:
os.chdir(test_repo)
mock_upload = mocker.patch("datamanager.core.upload_to_staging")

# Mock the temoa hash prompt to return empty (skip)
mocker.patch(
"questionary.text",
return_value=mocker.Mock(ask=mocker.Mock(return_value="")),
)

result = runner.invoke(app, ["prepare", "core-dataset.sqlite", "new_data.sqlite"])

assert result.exit_code == 0, result.stdout
Expand Down
25 changes: 25 additions & 0 deletions tests/test_manifest.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pytest

from datamanager import manifest
from datamanager.__main__ import _validate_temoa_hash


def test_read_manifest(test_repo: Path) -> None:
Expand Down Expand Up @@ -48,3 +49,27 @@ def test_update_latest_history_entry(test_repo: Path) -> None:
assert data[0]["history"][0]["version"] == "v2"
assert data[0]["history"][0]["commit"] == "abcdef"
assert data[0]["latestVersion"] == "v2"


def test_validate_temoa_hash() -> None:
"""Test the temoa hash validation function."""
# Valid short hash
assert _validate_temoa_hash("abc123")
assert _validate_temoa_hash("ABCDEF")

# Valid long hash
assert _validate_temoa_hash("a" * 40)
assert _validate_temoa_hash("1234567890abcdef" * 2)

# Invalid formats
assert not _validate_temoa_hash("gggggg") # 'g' is not hex
assert not _validate_temoa_hash("abc123g") # contains 'g'
assert not _validate_temoa_hash("abc12345-") # contains dash
assert _validate_temoa_hash("") # Empty is allowed (optional)
assert _validate_temoa_hash(" ") # Whitespace only is allowed (optional)

# Too short
assert not _validate_temoa_hash("abc") # Less than 4 chars

# Too long
assert not _validate_temoa_hash("a" * 41) # More than 40 chars