Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
- uses: jaxxstorm/action-install-gh-release@v1.10.0
with:
repo: houseabsolute/precious
tag: v0.9.0
tag: v0.10.0
cache: enable

# lint
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
# install dependencies
- uses: pdm-project/setup-pdm@v4
with:
python-version: 3.9
python-version: "3.10"
cache: true
cache-dependency-path: pylock.minimal.toml
- run: pdm install -G:all --lockfile pylock.minimal.toml
Expand Down
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ LDLite supports two modes of usage.

See the [Five Colleges Setup](https://github.com/Five-Colleges-Incorporated/ldlite-scripts) for an example of automating overnight data loads.

It is recommended to install the `psycopg[c]` package for optimal reliability and performance in a server context.

### Usage for ad-hoc local querying

To install LDLite or upgrade to the latest version:
Expand All @@ -31,6 +33,11 @@ $ python -m pip install --upgrade ldlite
(On some systems it might be `python3` rather than `python`.)
Check out the [migration guide](./MIGRATING.md) for more information about major version upgrades.

*Optional* If you intend to use the `connect_db_postgres()` method install the binary provider:
```
$ python -m pip install --upgrade psycopg[binary]
```

To extract and transform data:

```python
Expand Down
8 changes: 4 additions & 4 deletions environment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ channels:
- conda-forge
- nodefaults
dependencies:
- python>=3.7,<3.10
- pdm==2.25.4
- postgresql>=17
- precious==0.9.0
- python>=3.10,<3.11
- pdm==2.25.5
- precious==0.10.0
- libpq>=13.0
259 changes: 114 additions & 145 deletions pylock.maximal.toml

Large diffs are not rendered by default.

591 changes: 230 additions & 361 deletions pylock.minimal.toml

Large diffs are not rendered by default.

515 changes: 199 additions & 316 deletions pylock.toml

Large diffs are not rendered by default.

87 changes: 39 additions & 48 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,33 +7,29 @@ name = "ldlite"
version = "3.2.0"
description = "Lightweight analytics tool for FOLIO services"
authors = [
{name = "Katherine Bargar", email = "kbargar@fivecolleges.edu"},
{name = "Nassib Nassar", email = "nassib@indexdata.com"},
{ name = "Katherine Bargar", email = "kbargar@fivecolleges.edu" },
{ name = "Nassib Nassar", email = "nassib@indexdata.com" },
]
dependencies = [
# 0.6.1 was the original pinned dependency version
# 1.4 slightly changes data formats and fails tests
"duckdb>=0.6.1,<1.4",
# 2.9.5 was the original pinned dependency version
"psycopg2-binary>=2.9.5",
# 4.64.1 was the original pinned dependency version
"tqdm>=4.64.1",
# 3.0.6 was the original pinned dependency version
"XlsxWriter>=3.0.6",
# 0.2 has query parameter handling, 0.2.3 is a required bug fix version
"httpx-folio>=0.2.3",
# 3.9 introduces orjson.Fragment
"orjson>=3.9",
# 3.2 changes the sql.SQL.format signature
"psycopg>=3.2.0",
# 0.7.0 makes numpy an optional dependency, the minor version fixes a segfault
# 1.4 has a number of regressions so I'm letting it shake out until some fixes
"duckdb>=0.7.1,<1.4",
# 4.64 is the first release with type stubs
"tqdm>=4.64.0",
# 0.2 has query parameter handling, 0.2.3 is a required bug fix version
"httpx-folio>=0.2.3",
# 3.9 introduces orjson.Fragment
"orjson>=3.9",
# 3.2 changes the sql.SQL.format signature
"psycopg>=3.2.0",
]
requires-python = ">=3.9"
requires-python = ">=3.10"
readme = "README.md"
license = {text = "Apache-2.0"}
license = { text = "Apache-2.0" }
classifiers = [
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
]

[project.urls]
Expand All @@ -42,19 +38,14 @@ Homepage = "https://github.com/library-data-platform/ldlite"

[tool.pytest.ini_options]
pythonpath = "src"
addopts = [
"--import-mode=importlib",
]
addopts = ["--import-mode=importlib"]

[tool.mypy]
python_version = "3.9"
python_version = "3.10"
strict = true
[[tool.mypy.overrides]]
module = ["xlsxwriter.*"]
ignore_missing_imports = true

[tool.ruff]
target-version = "py39"
target-version = "py310"
[tool.ruff.lint]
select = ["ALL"]
fixable = ["ALL"]
Expand All @@ -63,37 +54,37 @@ pydocstyle.convention = "google"
[tool.ruff.lint.per-file-ignores]
"examples/*" = ["D", "INP001", "T201", "S106", "ERA001", "PERF203"]
"tests/*" = ["D", "S", "INP001", "N813", "PLC0415"]
"src/ldlite/{_csv.py,_jsonx.py,_select.py,_xlsx.py}" = ["S608"]
"src/ldlite/{_csv.py,_jsonx.py,_select.py}" = ["S608"]
"src/ldlite/__init__.py" = ["T201"]
[tool.ruff.lint.flake8-annotations]
mypy-init-return = true

[tool.coverage.run]
branch = true
include = ["src/**"]
omit = ["src/ldlite/_xlsx.py"]

[tool.pdm]
distribution = true
package-dir = "src"

[tool.pdm.scripts]
test.composite = ["rm -f .coverage", "python -m coverage run -m pytest -vv {args}", "python -m coverage report"]
lock.composite = ["rm -f pylock.toml pylock.maximal.toml pylock.minimal.toml", "pdm lock --python=3.9", "pdm lock --lockfile pylock.maximal.toml --python=3.13", "pdm lock --strategy direct_minimal_versions --lockfile pylock.minimal.toml --python=3.9"]
test-install.composite = ["pdm sync --lockfile=pylock.minimal.toml", "pdm sync --lockfile=pylock.toml"]

[dependency-groups]
lint = [
"mypy==1.17.0",
"ruff==0.12.5",
"pre-commit-hooks==5.0.0",
test.composite = [
"rm -f .coverage",
"python -m coverage run -m pytest -vv {args}",
"python -m coverage report",
]
test = [
"pytest>=8.4.1",
"pytest-cases>=3.9.1",
"coverage>=7.9.2",
lock.composite = [
"rm -f pylock.toml pylock.maximal.toml pylock.minimal.toml",
"pdm lock --python=3.10",
"pdm lock --lockfile pylock.maximal.toml --python=3.13",
"pdm lock --strategy direct_minimal_versions --lockfile pylock.minimal.toml --python=3.10",
]
types = [
"types-psycopg2>=2.9.21.20",
"types-tqdm>=4.66.0.5",
test-install.composite = [
"pdm sync --lockfile=pylock.minimal.toml",
"pdm sync --lockfile=pylock.toml",
]

[dependency-groups]
lint = ["mypy==1.18.2", "ruff==0.13.1", "pre-commit-hooks==6.0.0"]
test = ["pytest>=8.4.2", "pytest-cases>=3.9.1", "coverage>=7.10.7"]
types = ["types-tqdm>=4.64.0"]
100 changes: 8 additions & 92 deletions src/ldlite/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,12 @@

"""

from __future__ import annotations

import sqlite3
import sys
from itertools import count
from typing import TYPE_CHECKING, NoReturn, cast

import duckdb
import psycopg
import psycopg2
from httpx_folio.auth import FolioParams
from tqdm import tqdm

Expand All @@ -59,7 +55,6 @@
autocommit,
sqlid,
)
from ._xlsx import to_xlsx

if TYPE_CHECKING:
from _typeshed import dbapi
Expand Down Expand Up @@ -140,15 +135,13 @@ def _connect_db_duckdb(

return db.cursor()

def connect_db_postgresql(self, dsn: str) -> psycopg2.extensions.connection:
def connect_db_postgresql(self, dsn: str) -> psycopg.Connection:
"""Connects to a PostgreSQL database for storing data.

The data source name is specified by *dsn*. This method returns a
connection to the database which can be used to submit SQL queries.
The returned connection defaults to autocommit mode.

This will return a psycopg3 connection in the next major release of LDLite.

Example:
db = ld.connect_db_postgresql(dsn='dbname=ld host=localhost user=ldlite')

Expand All @@ -161,43 +154,11 @@ def connect_db_postgresql(self, dsn: str) -> psycopg2.extensions.connection:
lambda: cast("dbapi.DBAPIConnection", psycopg.connect(dsn)),
)

ret_db = psycopg2.connect(dsn)
ret_db = psycopg.connect(dsn)
ret_db.rollback()
ret_db.set_session(autocommit=True)
ret_db.set_autocommit(True)
return ret_db

def experimental_connect_db_sqlite(
self,
filename: str | None = None,
) -> sqlite3.Connection:
"""Deprecated; this will be removed in the next major release of LDLite.

Connects to an embedded SQLite database for storing data.

The optional *filename* designates a local file containing the SQLite
database or where the database will be created if it does not exist.
If *filename* is not specified, the database will be stored in memory
and will not be persisted to disk.

This method returns a connection to the database which can be used to
submit SQL queries.

Example:
db = ld.connect_db_sqlite(filename='ldlite.db')

"""
self.dbtype = DBType.SQLITE
fn = filename if filename is not None else "file::memory:?cache=shared"
self.db = sqlite3.connect(fn)
self._db = DBTypeDatabase(
DBType.SQLITE,
lambda: cast("dbapi.DBAPIConnection", sqlite3.connect(fn)),
)

db = sqlite3.connect(fn)
autocommit(db, self.dbtype, True)
return self.db

def _check_folio(self) -> None:
if self._folio is None:
msg = "connection to folio not configured: use connect_folio()"
Expand Down Expand Up @@ -242,8 +203,6 @@ def drop_tables(self, table: str) -> None:
schema_table = table.strip().split(".")
if len(schema_table) != 1 and len(schema_table) != 2:
raise ValueError("invalid table name: " + table)
if len(schema_table) == 2 and self.dbtype == DBType.SQLITE:
table = schema_table[0] + "_" + schema_table[1]
prefix = Prefix(table)
self._db.drop_prefix(prefix)

Expand Down Expand Up @@ -292,10 +251,7 @@ def query( # noqa: C901, PLR0912, PLR0913, PLR0915
"""Submits a query to a FOLIO module, and transforms and stores the result.

The retrieved result is stored in *table* within the reporting
database. the *table* name may include a schema name;
however, if the database is SQLite, which does not support
schemas, the schema name will be added to the table name as a
prefix.
database. the *table* name may include a schema name.

The *path* parameter is the request path.

Expand Down Expand Up @@ -348,8 +304,6 @@ def query( # noqa: C901, PLR0912, PLR0913, PLR0915
if self.db is None or self._db is None:
self._check_db()
return []
if len(schema_table) == 2 and self.dbtype == DBType.SQLITE:
table = schema_table[0] + "_" + schema_table[1]
prefix = Prefix(table)
if not self._quiet:
print("ldlite: querying: " + path, file=sys.stderr)
Expand All @@ -367,6 +321,10 @@ def query( # noqa: C901, PLR0912, PLR0913, PLR0915
if self._verbose:
print("ldlite: estimated row count: " + str(total), file=sys.stderr)

class PbarNoop:
def update(self, _: int) -> None: ...
def close(self) -> None: ...

p_count = count(1)
processed = 0
pbar: tqdm | PbarNoop # type:ignore[type-arg]
Expand All @@ -381,11 +339,6 @@ def query( # noqa: C901, PLR0912, PLR0913, PLR0915
bar_format="{desc} {bar}{postfix}",
)
else:

class PbarNoop:
def update(self, _: int) -> None: ...
def close(self) -> None: ...

pbar = PbarNoop()

def on_processed() -> bool:
Expand Down Expand Up @@ -547,43 +500,6 @@ def to_csv(self) -> NoReturn: # pragma: nocover
msg = "to_csv() is no longer supported: use export_csv()"
raise ValueError(msg)

def export_excel(
self,
filename: str,
table: str,
header: bool = True,
) -> None: # pragma: nocover
"""Deprecated; this will be removed in the next major release of LDLite.

Export a table in the reporting database to an Excel file.

All rows of *table* are exported to *filename*, or *filename*.xlsx if
*filename* does not have an extension.

If *header* is True (the default), the worksheet will begin with a row
containing the column names.

Example:
ld.export_excel(table='g', filename='g')

"""
if self.db is None:
self._check_db()
return

autocommit(self.db, self.dbtype, False)
try:
to_xlsx(self.db, self.dbtype, table, filename, header)
if (pgdb := as_postgres(self.db, self.dbtype)) is not None:
pgdb.rollback()
finally:
autocommit(self.db, self.dbtype, True)

def to_xlsx(self) -> NoReturn: # pragma: nocover
"""Deprecated; use export_excel()."""
msg = "to_xlsx() is no longer supported: use export_excel()"
raise ValueError(msg)

def verbose(self, enable: bool) -> None:
"""Configures verbose output.

Expand Down
Loading
Loading