Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ repos:
hooks:
- id: flake8
args: [--max-line-length=80, --extend-ignore=E203]
exclude: ^docs/
additional_dependencies: [
'flake8-blind-except',
'flake8-bugbear',
Expand Down
19 changes: 13 additions & 6 deletions pysus/api/dadosgov/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,21 +154,30 @@ async def _download_file(
self,
file: BaseRemoteFile,
output: pathlib.Path,
callback: Callable[[int], None] | None = None,
callback: Callable[[int, int], None] | None = None,
) -> pathlib.Path:
"""Download a remote file to a local path."""
if self._client is None:
raise ConnectionError(
"Client not connected. Call login(token=...) first.",
)

async with self._client.stream("GET", str(file.path)) as response:
url = (
str(file.path)
.replace("https:/", "https://")
.replace("http:/", "http://")
)

async with self._client.stream("GET", url) as response:
response.raise_for_status()
total = int(response.headers.get("Content-Length", 0))
downloaded = 0
with open(output, "wb") as f:
async for chunk in response.aiter_bytes():
f.write(chunk)
downloaded += len(chunk)
if callback:
callback(len(chunk))
callback(downloaded, total)
return output


Expand All @@ -181,9 +190,7 @@ class Recurso(BaseModel):
title: str = Field(alias="titulo")
url: str = Field(alias="link")
api_size: int = Field(alias="tamanho")
last_modified: datetime | None = Field(
None, alias="dataUltimaAtualizacaoArquivo"
)
last_modified: DateTime = Field(None, alias="dataUltimaAtualizacaoArquivo")
file_name: str | None = Field(None, alias="nomeArquivo")

async def get_size(self) -> int:
Expand Down
244 changes: 232 additions & 12 deletions pysus/api/dadosgov/databases.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,39 @@
"""Pre-configured health database definitions accessible via dados.gov.br."""

import re
from typing import Any

from pysus.utils import zfill_year

from .models import Dataset

MONTHS: dict[str, int] = {
"jan": 1,
"fev": 2,
"mar": 3,
"abr": 4,
"mai": 5,
"jun": 6,
"jul": 7,
"ago": 8,
"set": 9,
"out": 10,
"nov": 11,
"dez": 12,
}


def _parse_year(val: str) -> int | None:
try:
y = int(val)
return y if 1970 <= y <= 2100 else None
except ValueError:
return None


def _skip(name: str) -> bool:
return name.startswith("get_") or name.lower().endswith(".pdf")


class CNES(Dataset):
"""Cadastro Nacional de Estabelecimentos de Saúde (CNES)."""
Expand Down Expand Up @@ -32,8 +62,23 @@ def description(self) -> str:
)

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse a CNES filename and extract metadata."""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.search(r"_(\d{2})-(\d{4})\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(2)),
"month": int(m.group(1)),
}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class PNI(Dataset):
Expand All @@ -49,6 +94,18 @@ class PNI(Dataset):
"9a25b796-80e3-444a-a4e7-405f5596d8ab",
]

_PNI_PREFIX = "doses-aplicadas-pelo-programa-de-nacional-de-imunizacoes-pni"

group_aliases: dict[str, str] = {
_PNI_PREFIX: "DPNI",
f"{_PNI_PREFIX}-2020": "DPNI",
f"{_PNI_PREFIX}-2021": "DPNI",
f"dataset-{_PNI_PREFIX}_2022": "DPNI",
f"{_PNI_PREFIX}-2023": "DPNI",
f"{_PNI_PREFIX}-2025": "DPNI",
f"{_PNI_PREFIX}-2026": "DPNI",
}

@property
def name(self) -> str:
"""Return the short name."""
Expand All @@ -64,8 +121,21 @@ def description(self) -> str:
return "O PNI monitora a cobertura vacinal e doses aplicadas no Brasil."

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse a PNI vaccination filename into month and year."""
try:
name = filename.strip().lower()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.match(r"vacinacao_(\w{3})_(\d{4})_csv\.zip", name)
if m:
month = MONTHS.get(m.group(1))
year = _parse_year(m.group(2))
return {"state": None, "year": year, "month": month}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class SIA(Dataset):
Expand All @@ -92,8 +162,31 @@ def description(self) -> str:
"""

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse an SIA filename into year."""
try:
name = filename.strip().lower()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.search(r"_(\d{4})_\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}

m = re.search(r"_(\w{3})-out_(\d{4})_\.csv$", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(2)),
"month": None,
}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class SINAN(Dataset):
Expand All @@ -104,8 +197,21 @@ class SINAN(Dataset):
"5699abe0-0510-4da8-b47d-209b3bb32b34",
"4557ba96-7d52-4a56-bd6f-f99a5af09f77",
"740ce8f4-7a5d-4351-aad4-7623f2490ada",
"cf044c1b-b966-4d0e-bab0-f3aa65897b7d",
"2d4997fb-cd11-4ce2-b217-09cd50e3151f",
"8a585222-4c2e-43b7-807d-59355ee79c48",
"527e8665-de64-4f81-b7c3-40b59c7d1d3c",
]

group_aliases: dict[str, str] = {
"arboviroses-dengue": "DENG",
"arboviroses-febre-de-chikungunya": "CHIK",
"arboviroses-zika-virus": "ZIKA",
"hanseniase": "HANS",
"dados-tuberculose": "TUBE",
"sifilis": "SIFA",
}

@property
def name(self) -> str:
"""Return the short name."""
Expand All @@ -124,8 +230,31 @@ def description(self) -> str:
"""

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse a SINAN filename into state and year."""
try:
name = filename.strip().upper()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.match(r"(\w{4})(BR)(\d{2})\.CSV\.ZIP", name)
if m:
return {
"state": m.group(2),
"year": zfill_year(m.group(3)),
"month": None,
}

m = re.match(r"MPX_(\d{4})_OPENDATASUS\.CSV\.ZIP", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class SIM(Dataset):
Expand All @@ -135,6 +264,10 @@ class SIM(Dataset):
"5f121f4d-47c6-428e-8ec6-e8ec56417172",
]

group_aliases: dict[str, str] = {
"sim-1979-2019": "DO",
}

@property
def name(self) -> str:
"""Return the short name."""
Expand All @@ -152,8 +285,31 @@ def description(self) -> str:
"""

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse a SIM filename into year."""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.search(r"Mortalidade_Geral_(\d{4})_csv\.zip", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}

m = re.match(r"DO(\d{2})OPEN", name)
if m:
return {
"state": None,
"year": zfill_year(m.group(1)),
"month": None,
}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class SINASC(Dataset):
Expand All @@ -163,6 +319,10 @@ class SINASC(Dataset):
"441cc6bd-684a-4afd-a88b-ba4734c9e83e",
]

group_aliases: dict[str, str] = {
"sistema-de-informacao-sobre-nascidos-vivos-sinasc-1996-a-20201": "DN",
}

@property
def name(self) -> str:
"""Return the short name."""
Expand All @@ -181,8 +341,67 @@ def description(self) -> str:
"""

def formatter(self, filename: str) -> dict[str, Any]:
"""Extract metadata from a filename (not yet implemented)."""
raise NotImplementedError()
"""Parse a SINASC filename into year."""
try:
name = filename.strip()
if _skip(name):
return {"state": None, "year": None, "month": None}

m = re.search(r"SINASC_(\d{4})_csv\.zip", name)
if m:
return {
"state": None,
"year": _parse_year(m.group(1)),
"month": None,
}

m = re.search(r"DNBR(\d{4})_csv\.zip", name)
if m:
return {
"state": "BR",
"year": _parse_year(m.group(1)),
"month": None,
}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


class COVID19(Dataset):
"""Casos Confirmados de COVID-19."""

ids: list[str] = [
"1ba1801e-aec0-4dba-ae2a-7732f0a0c9f7",
]

@property
def name(self) -> str:
"""Return the short name."""
return "COVID19"

@property
def long_name(self) -> str:
"""Return the human-readable name."""
return "Casos Confirmados de COVID-19"

@property
def description(self) -> str:
return "Dados anonimizados de casos confirmados de COVID-19."

def formatter(self, filename: str) -> dict[str, Any]:
"""Parse a COVID-19 filename."""
try:
name = filename.strip().lower()
if _skip(name) or name.endswith(".xlsx"):
return {"state": None, "year": None, "month": None}

if name.endswith(".csv"):
return {"state": None, "year": None, "month": None}

return {"state": None, "year": None, "month": None}
except (IndexError, ValueError):
return {"state": None, "year": None, "month": None}


AVAILABLE_DATABASES: list[type[Dataset]] = [
Expand All @@ -192,4 +411,5 @@ def formatter(self, filename: str) -> dict[str, Any]:
SIM,
SINAN,
SINASC,
COVID19,
]
Loading
Loading