Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 9 additions & 14 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ on: [push, pull_request]
jobs:
tests:
runs-on: ${{ matrix.os }}
timeout-minutes: ${{ (matrix.os == 'windows-latest' && 30) || 15 }}
timeout-minutes: ${{ (matrix.os == 'windows-latest' && 45) || 15 }}

strategy:
matrix:
Expand All @@ -19,22 +19,17 @@ jobs:
steps:
- uses: actions/checkout@v4

- uses: conda-incubator/setup-miniconda@v3
- uses: actions/setup-python@v5
with:
miniforge-version: latest
environment-file: conda/dev.yaml
channels: conda-forge,nodefaults
activate-environment: pysus
auto-update-conda: true
conda-solver: libmamba
python-version: ${{ matrix.python-version }}

- name: Install dependencies
shell: bash -l {0}
shell: bash
run: |
pip install poetry poetry-plugin-export
poetry config virtualenvs.create false
if [ "${{ runner.os }}" = "Linux" ]; then
poetry install --without dev --extras dbc
poetry install --without dev
pip install pre-commit
else
poetry install --without dev
Expand All @@ -43,21 +38,21 @@ jobs:

- name: Linting
if: matrix.os == 'ubuntu-latest'
shell: bash -l {0}
shell: bash
run: pre-commit run --files pysus/**/*

- name: Tests (Linux)
if: matrix.os != 'windows-latest'
shell: bash -l {0}
shell: bash
run: |
poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --cov=pysus --cov-report=xml:coverage.xml --cov-report=term-missing

- name: Tests (Windows)
if: matrix.os == 'windows-latest'
shell: bash -l {0}
shell: bash
run: |
export DUCKDB_NO_THREADS=1
poetry run pytest -vv pysus/tests/ --retries 3 --retry-delay 15 --timeout=480 -p no:cacheprovider -p no:asyncio
poetry run pytest -vv pysus/tests/ --timeout=480 -p no:cacheprovider

- name: Upload coverage to Codecov
if: matrix.os == 'ubuntu-latest'
Expand Down
14 changes: 3 additions & 11 deletions .github/workflows/release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,25 +84,17 @@ jobs:
if: ${{ !startsWith(github.ref, 'refs/tags/') }}
runs-on: ubuntu-latest

defaults:
run:
shell: bash -l {0}

steps:
- uses: actions/checkout@v4

- uses: conda-incubator/setup-miniconda@v3
- uses: actions/setup-python@v5
with:
miniforge-version: latest
environment-file: conda/dev.yaml
channels: conda-forge,nodefaults
activate-environment: pysus
auto-update-conda: true
conda-solver: libmamba
python-version: "3.12"

- name: Install dependencies
run: |
pip install poetry wget
poetry config virtualenvs.create false
poetry install --no-root --with docs

- name: Build docs
Expand Down
7 changes: 0 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,6 @@ PySUS is a Python package for accessing and analyzing Brazil's public health dat
pip install pysus
```

For DBC file support (requires libffi):
```bash
# Ubuntu/Debian
sudo apt install libffi-dev
pip install pysus[dbc]
```

For the terminal user interface (TUI):
```bash
pip install pysus[tui]
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ COPY docker/scripts/entrypoint.sh /entrypoint.sh
COPY docker/notebooks/ /home/pysus/Notebooks/

RUN pip install poetry \
&& cd /usr/src && poetry config virtualenvs.create false && poetry install --with docs --extras dbc \
&& cd /usr/src && poetry config virtualenvs.create false && poetry install --with docs \
&& pip install 'httpx<0.28' \
&& chown -R pysus:pysus /home/pysus

Expand Down
8 changes: 0 additions & 8 deletions docs/source/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,6 @@ The simplest way to install PySUS is via pip:
Extras
^^^^^^

For DBC file support (requires ``libffi``):

.. code-block:: bash

# Ubuntu/Debian
sudo apt install libffi-dev
pip install pysus[dbc]

For the terminal user interface (TUI):

.. code-block:: bash
Expand Down
4,912 changes: 2,666 additions & 2,246 deletions poetry.lock

Large diffs are not rendered by default.

10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,17 +38,15 @@ httpx = ">=0.28.0"
aioftp = "^0.21.4"
dbfread = "2.0.7"
bigtree = "^0.12.2"

pyreaddbc = { version = ">=1.1.0", optional = true }
pycparser = { version = "2.21", optional = true }
textual = { extras = ["syntax"], version = "^8.2.1", optional = true }
humanize = { version = "^4.8.0", optional = true }
pyreaddbc = ">=2.0.4"
dotenv = "^0.9.9"
boto3 = "^1.42.89"
typer = "^0.24.1"

humanize = { version = "^4.8.0", optional = true }
textual = { extras = ["syntax"], version = "^8.2.1", optional = true }

[tool.poetry.extras]
dbc = ["pyreaddbc", "pycparser"]
tui = ["textual", "humanize"]

[tool.poetry.group.dev.dependencies]
Expand Down
12 changes: 8 additions & 4 deletions pysus/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import enum
from collections.abc import Callable
from datetime import datetime
from datetime import datetime, timezone
from pathlib import Path
from typing import TYPE_CHECKING, Literal

Expand All @@ -16,6 +16,7 @@
from pysus import CACHEPATH
from sqlalchemy import DateTime, Enum, Integer, String, create_engine
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column, sessionmaker
from sqlalchemy.pool import NullPool

from .dadosgov import DadosGovClient
from .ducklake.client import DuckLake
Expand Down Expand Up @@ -61,7 +62,7 @@ class LocalFileState(Base):
sha256: Mapped[str | None] = mapped_column(String, nullable=True)
last_synced: Mapped[datetime] = mapped_column(
DateTime,
default=datetime.utcnow,
default=lambda: datetime.now(timezone.utc).replace(tzinfo=None),
)


Expand All @@ -85,7 +86,10 @@ def __init__(self, db_path: Path = CACHEPATH / "config.db"):
db_path.parent.mkdir(parents=True, exist_ok=True)

self.cachepath = db_path.parent
self.engine = create_engine(f"duckdb:///{db_path}")
self.engine = create_engine(
f"duckdb:///{db_path.resolve().as_posix()}",
poolclass=NullPool,
)
Base.metadata.create_all(self.engine)
self.Session = sessionmaker(bind=self.engine)

Expand Down Expand Up @@ -239,7 +243,7 @@ async def _update_state(
session.add(record)

record.status = status
record.last_synced = datetime.utcnow()
record.last_synced = datetime.now(timezone.utc).replace(tzinfo=None)
session.commit()

async def download(
Expand Down
21 changes: 9 additions & 12 deletions pysus/api/extensions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import asyncio
import csv
import ctypes.util
import gzip
import shutil
import sys
Expand All @@ -26,16 +25,9 @@
from .types import FileType

try:
LIBFFI = True
if sys.platform.startswith("linux"):
LIBFFI = ctypes.util.find_library("ffi") is not None
from pyreaddbc import dbc2dbf

if LIBFFI:
from pyreaddbc import dbc2dbf

DBC_IMPORT = True
else:
DBC_IMPORT = False
DBC_IMPORT = True
except ImportError:
DBC_IMPORT = False

Expand Down Expand Up @@ -781,7 +773,7 @@ def _extract():
return list(await asyncio.gather(*tasks))


class FTPNotImported(BaseTabularFile):
class DBCNotImported(BaseTabularFile):
"""Placeholder for DBC files when optional dependency is not installed."""

path: Path = Field(default_factory=lambda: Path("..."))
Expand Down Expand Up @@ -872,17 +864,22 @@ class ExtensionFactory:
".csv": CSV,
".parquet": Parquet,
".dbf": DBF,
".dbc": DBC if DBC_IMPORT else FTPNotImported, # type: ignore
".dbc": DBC if DBC_IMPORT else DBCNotImported, # type: ignore
".pdf": PDF,
".json": JSON,
}

_magic_available: bool = sys.platform != "win32"

@classmethod
async def _identify(cls, path: Path) -> type[BaseLocalFile] | None:
"""Identify the file class by its MIME type."""
if not cls._magic_available:
return None
try:
import magic
except (ImportError, OSError):
cls._magic_available = False
return None
try:
mime = await to_thread.run_sync(
Expand Down
4 changes: 2 additions & 2 deletions pysus/tests/api/ftp/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,15 +128,15 @@ def formatter(self, f):


@pytest.mark.asyncio
async def test_file_download_calls_client(mock_client, mock_dataset):
async def test_file_download_calls_client(mock_client, mock_dataset, tmp_path):
file = File(
path="/root/test.dbc",
_info={"path": "/root/test.dbc", "name": "test.dbc"},
type="file",
dataset=mock_dataset,
)

dest = Path("/tmp/test.dbc")
dest = Path(tmp_path / "test.dbc")
await file._download(output=dest)

mock_client._download_file.assert_called_once_with(file, dest, None)
37 changes: 21 additions & 16 deletions pysus/tests/api/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,11 +77,13 @@ def test_download_status_values(self):

class TestLocalFileState:
@pytest.mark.asyncio
async def test_update_state_creates_record(self, test_db_path):
async def test_update_state_creates_record(self, test_db_path, tmp_path):
client = PySUS(db_path=test_db_path)

local = pathlib.Path(tmp_path / "test.dbc")

await client._update_state(
local_path=pathlib.Path("/tmp/test.dbc"),
local_path=local,
remote_path="/remote/test.dbc",
client_name="ftp",
status=DownloadStatus.COMPLETED,
Expand All @@ -93,9 +95,7 @@ async def test_update_state_creates_record(self, test_db_path):

with client.Session() as session:
record = (
session.query(LocalFileState)
.filter_by(path="/tmp/test.dbc")
.first()
session.query(LocalFileState).filter_by(path=str(local)).first()
)
assert record is not None
assert record.remote_path == "/remote/test.dbc"
Expand All @@ -109,34 +109,36 @@ async def test_update_state_creates_record(self, test_db_path):
await client.__aexit__(None, None, None)

@pytest.mark.asyncio
async def test_delete_record_removes_entry(self, test_db_path):
async def test_delete_record_removes_entry(self, test_db_path, tmp_path):
client = PySUS(db_path=test_db_path)

local = pathlib.Path(tmp_path / "test.dbc")

await client._update_state(
local_path=pathlib.Path("/tmp/test.dbc"),
local_path=local,
remote_path="/remote/test.dbc",
client_name="ftp",
status=DownloadStatus.COMPLETED,
)

await client._delete_record("/tmp/test.dbc")
await client._delete_record(str(local))

with client.Session() as session:
record = (
session.query(LocalFileState)
.filter_by(path="/tmp/test.dbc")
.first()
session.query(LocalFileState).filter_by(path=str(local)).first()
)
assert record is None

await client.__aexit__(None, None, None)

@pytest.mark.asyncio
async def test_get_local_file_finds_existing(self, test_db_path):
async def test_get_local_file_finds_existing(self, test_db_path, tmp_path):
client = PySUS(db_path=test_db_path)

local = pathlib.Path(tmp_path / "test.dbc")

await client._update_state(
local_path=pathlib.Path("/tmp/test.dbc"),
local_path=local,
remote_path="/remote/test.dbc",
client_name="ftp",
status=DownloadStatus.COMPLETED,
Expand All @@ -158,17 +160,20 @@ async def test_get_local_file_finds_existing(self, test_db_path):

class TestGetCompletedRemotePaths:
@pytest.mark.asyncio
async def test_get_completed_remote_paths(self, test_db_path):
async def test_get_completed_remote_paths(self, test_db_path, tmp_path):
client = PySUS(db_path=test_db_path)

local1 = pathlib.Path(tmp_path / "test1.dbc")
local2 = pathlib.Path(tmp_path / "test2.dbc")

await client._update_state(
local_path=pathlib.Path("/tmp/test1.dbc"),
local_path=local1,
remote_path="/remote/test1.dbc",
client_name="ftp",
status=DownloadStatus.COMPLETED,
)
await client._update_state(
local_path=pathlib.Path("/tmp/test2.dbc"),
local_path=local2,
remote_path="/remote/test2.dbc",
client_name="ftp",
status=DownloadStatus.PENDING,
Expand Down
Loading
Loading