Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"sphinx.ext.mathjax",
"sphinx.ext.viewcode",
"sphinx.ext.intersphinx",
"sphinx.ext.napoleon",
"nbsphinx",
]

Expand Down
272 changes: 261 additions & 11 deletions pysus/api/_impl/databases.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,45 @@ def _fetch_data(
show_progress: bool = True,
**kwargs,
) -> pd.DataFrame:
"""Query, download, and concatenate Parquet files for a given dataset."""
"""Query, download, and concatenate Parquet files for a given dataset.

Internally creates an async event loop, queries the PySUS API for matching
files, downloads them, and reads them into a single DataFrame.

Parameters
----------
dataset : str
Name of the dataset (e.g. ``"sinan"``, ``"sinasc"``).
group : str, optional
Group or disease code to filter by.
state : str, optional
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int], optional
Year or list of years to fetch.
month : int | list[int], optional
Month or list of months to fetch.
show_progress : bool, optional
Whether to display a tqdm progress bar during download. Default is
``True``.
**kwargs
Additional arguments forwarded to :meth:`PySUS.read_parquet`.

Returns
-------
pd.DataFrame
Concatenated data from all matching Parquet files. Returns an empty
DataFrame when no files are found.

Raises
------
RuntimeError
If an event loop is already running but ``nest_asyncio`` is not
installed.
"""

async def _fetch():
"""Coroutine that performs the actual API query, download, and read."""

async with PySUS() as pysus:
years = [year] if isinstance(year, int) else (year or [None])
months = [month] if isinstance(month, int) else (month or [None])
Expand Down Expand Up @@ -157,7 +193,25 @@ def sinan(
year: int | list[int],
**kwargs,
) -> pd.DataFrame:
"""Fetch SINAN records for a given disease and year(s)."""
"""Fetch SINAN records for a given disease and year(s).

SINAN (Sistema de Informação de Agravos de Notificação) is the Brazilian
notifiable-disease information system.

Parameters
----------
disease : Literal
Disease code (e.g. ``"DENG"`` for dengue, ``"ZIKA"`` for zika).
year : int | list[int]
Year or list of years to fetch.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
SINAN records for the specified disease and year(s).
"""
return _fetch_data(
dataset="sinan",
group=disease.upper(),
Expand All @@ -171,7 +225,27 @@ def sinasc(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch SINASC birth certificates for a given state, year(s), and group."""
"""Fetch SINASC birth certificates for a given state, year(s), and group.

SINASC (Sistema de Informação sobre Nascidos Vivos) is the Brazilian live
birth information system.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
SINASC birth records for the specified state, year(s), and group.
"""
return _fetch_data(
dataset="sinasc",
state=state.upper(),
Expand All @@ -186,7 +260,27 @@ def sim(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch SIM mortality records for a given state, year(s), and group."""
"""Fetch SIM mortality records for a given state, year(s), and group.

SIM (Sistema de Informação sobre Mortalidade) is the Brazilian mortality
information system.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
SIM mortality records for the specified state, year(s), and group.
"""
return _fetch_data(
dataset="sim",
state=state.upper(),
Expand All @@ -202,7 +296,29 @@ def sih(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch SIH hospital admissions for a state, year, month, and group."""
"""Fetch SIH hospital admissions for a state, year, month, and group.

SIH (Sistema de Informação Hospitalar) is the Brazilian hospital
admission information system.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
month : int | list[int]
Month or list of months to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
SIH hospital admission records.
"""
return _fetch_data(
dataset="sih",
state=state.upper(),
Expand All @@ -219,7 +335,29 @@ def sia(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch SIA ambulatory care for a state, year, month, and group."""
"""Fetch SIA ambulatory care for a state, year, month, and group.

SIA (Sistema de Informação Ambulatorial) is the Brazilian ambulatory care
information system.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
month : int | list[int]
Month or list of months to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
SIA ambulatory care records.
"""
return _fetch_data(
dataset="sia",
state=state.upper(),
Expand All @@ -235,7 +373,27 @@ def pni(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch PNI immunisation records for a given state, year(s), and group."""
"""Fetch PNI immunisation records for a given state, year(s), and group.

PNI (Programa Nacional de Imunizações) is the Brazilian national
immunisation programme.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
PNI immunisation records.
"""
return _fetch_data(
dataset="pni",
state=state.upper(),
Expand All @@ -249,7 +407,25 @@ def ibge(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch IBGE census data for given year(s) and optional group."""
"""Fetch IBGE census data for given year(s) and optional group.

IBGE (Instituto Brasileiro de Geografia e Estatística) provides census
and demographic data.

Parameters
----------
year : int | list[int]
Year or list of years to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
IBGE census data for the specified year(s) and group.
"""
return _fetch_data(dataset="ibge", group=group, year=year)


Expand All @@ -260,7 +436,29 @@ def cnes(
group: str | None = None,
**kwargs,
) -> pd.DataFrame:
"""Fetch CNES health facilities for a state, year, month, and group."""
"""Fetch CNES health facilities for a state, year, month, and group.

CNES (Cadastro Nacional de Estabelecimentos de Saúde) is the Brazilian
registry of health-care facilities.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
month : int | list[int]
Month or list of months to fetch.
group : str, optional
Additional grouping code.
**kwargs
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
CNES health-facility records.
"""
return _fetch_data(
dataset="cnes",
state=state.upper(),
Expand All @@ -277,7 +475,29 @@ def ciha(
group: str | None = "CIHA",
**kwargs,
) -> pd.DataFrame:
"""Fetch CIHA hospitalisation records for state, year, month, and group."""
"""Fetch CIHA hospitalisation records for state, year, month, and group.

CIHA (Comunicação de Internação Hospitalar) provides hospitalisation
records.

Parameters
----------
state : State
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int]
Year or list of years to fetch.
month : int | list[int]
Month or list of months to fetch.
group : str, optional
Additional grouping code. Default is ``"CIHA"``.
``**kwargs``
Additional arguments forwarded to :func:`_fetch_data`.

Returns
-------
pd.DataFrame
CIHA hospitalisation records.
"""
return _fetch_data(
dataset="ciha",
state=state.upper(),
Expand Down Expand Up @@ -306,9 +526,39 @@ def list_files(
month: int | list[int] | None = None,
**kwargs,
) -> pd.DataFrame:
"""List catalog files filtered by client, group, state, year, and month."""
"""List catalog files filtered by client, group, state, year, and month.

Queries the PySUS API metadata and returns a DataFrame with file name,
path, dataset, group, year, month, state, and last-modified timestamp for
every matching file without downloading the actual data.

Parameters
----------
dataset : Literal
Dataset name (e.g. ``"SINAN"``, ``"SINASC"``, etc.).
client : Literal["FTP", "DadosGov"], optional
Data source client to query.
group : str, optional
Group or disease code to filter by.
state : str, optional
Two-letter state abbreviation (e.g. ``"RJ"``).
year : int | list[int], optional
Year or list of years to filter by.
month : int | list[int], optional
Month or list of months to filter by.
**kwargs
Additional arguments forwarded to :meth:`PySUS.query`.

Returns
-------
pd.DataFrame
DataFrame with columns ``name``, ``path``, ``dataset``, ``group``,
``year``, ``month``, ``state``, and ``modify``.
"""

async def _list():
"""Coroutine that queries the PySUS API and builds the file list."""

async with PySUS() as pysus:
years = [year] if isinstance(year, int) else (year or [None])
months = [month] if isinstance(month, int) else (month or [None])
Expand Down
Loading
Loading