Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
103 changes: 103 additions & 0 deletions backend/maint-scripts/update_titles_from_latest_books.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#!/usr/bin/env python3
# ruff: noqa: T201

from typing import Any

from sqlalchemy import select
from sqlalchemy.orm import Session as OrmSession

from cms_backend import logger
from cms_backend.db import Session
from cms_backend.db.models import Book, Title
from cms_backend.db.title import get_title_by_id, title_is_missing_mandatory_metadata


def get_latest_book_for_title(session: OrmSession, title: Title) -> Book | None:
"""Get the latest prod/staging book for a title..

Assumes book has passed all the checks done by the mill when it processes
a zimfarm notification.
"""
stmt = (
select(Book)
.where(
Book.title_id == title.id,
Book.location_kind.in_(["prod", "staging"]),
Book.needs_processing.is_(False),
Book.has_error.is_(False),
Book.needs_file_operation.is_(False),
)
.order_by(
# let prod books take precedence by sorting location_kind in ascending order
Book.location_kind.asc(),
Book.created_at.desc(),
)
.limit(1)
)
return session.scalars(stmt).first()


def process_title(session: OrmSession, title: Title) -> tuple[bool, str]:
"""Process a single title: fetch latest book and update metadata."""
if title.archived:
logger.info(f"Skipping archived title {title.id} ({title.name})")
return (False, "Title is archived")

book = get_latest_book_for_title(session, title)

if not book:
logger.info(f"No prod/staging books found for title {title.id} ({title.name})")
return (False, "No prod/staging book found meet constraints")

if title_is_missing_mandatory_metadata(title):
title.title = book.zim_metadata["Title"]
title.creator = book.zim_metadata["Creator"]
title.publisher = book.zim_metadata["Publisher"]
title.description = book.zim_metadata["Description"]
title.language = book.zim_metadata["Language"]
title.illustration_48x48_at_1 = book.zim_metadata["Illustration_48x48@1"]
title.long_description = book.zim_metadata.get("LongDescription")
title.license = book.zim_metadata.get("License")
title.relation = book.zim_metadata.get("Relation")
title.source = book.zim_metadata.get("Source")
logger.info(f"✓ Updated title {title.id} ({title.name}) from book {book.id}")
return (True, "")
else:
logger.info(f"No updates needed for title {title.id} ({title.name}) ")
return (True, "")


def main():

with Session.begin() as session:
title_ids = session.scalars(select(Title.id)).all()
logger.info(f"Found {len(title_ids)} titles to process")
nb_titles_updated = 0
nb_titles_skipped = 0
reasons: list[dict[str, Any]] = []

for title_id in title_ids:
title = get_title_by_id(session, title_id=title_id)
processed, reason = process_title(session, title)
if processed:
nb_titles_updated += 1
else:
nb_titles_skipped += 1
reasons.append({title.name: reason})

logger.info(
f"Updated {nb_titles_updated} title(s) metadata, skipped "
f"{nb_titles_skipped} titles(s)"
)

if reasons:
print("\nSkipped titles summary:")
print("| Title Name | Reason |")
print("|------------|--------|")
for entry in reasons:
for title_name, reason in entry.items():
print(f"| {title_name} | {reason} |")


if __name__ == "__main__":
main()
12 changes: 12 additions & 0 deletions backend/src/cms_backend/api/routes/fields.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import base64
from typing import Annotated, Any

from pydantic import (
Expand Down Expand Up @@ -35,10 +36,21 @@ def not_empty(value: str) -> str:
return value.strip()


def validate_base64(value: str) -> str:
"""Validate that a string is a base64 string."""
try:
base64.b64decode(value, validate=True)
except Exception as exc:
raise ValueError(f"Invalid base64 string: {exc}") from exc
return value


NoNullCharString = Annotated[str, AfterValidator(no_null_char)]

NotEmptyString = Annotated[NoNullCharString, AfterValidator(not_empty)]

SkipField = Annotated[int, Field(ge=0), WrapValidator(skip_validation)]

LimitFieldMax200 = Annotated[int, Field(ge=1, le=200), WrapValidator(skip_validation)]

Base64Str = Annotated[NotEmptyString, AfterValidator(validate_base64)]
69 changes: 42 additions & 27 deletions backend/src/cms_backend/api/routes/titles.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,12 @@
get_current_account_or_none,
require_permission,
)
from cms_backend.api.routes.fields import LimitFieldMax200, NotEmptyString, SkipField
from cms_backend.api.routes.fields import (
Base64Str,
LimitFieldMax200,
NotEmptyString,
SkipField,
)
from cms_backend.api.routes.http_errors import ForbiddenError
from cms_backend.api.routes.models import ListResponse, calculate_pagination_metadata
from cms_backend.db import gen_dbsession
Expand All @@ -19,7 +24,7 @@
from cms_backend.db.title import archive_title as db_archive_title
from cms_backend.db.title import archive_titles as db_archive_titles
from cms_backend.db.title import create_title as db_create_title
from cms_backend.db.title import create_title_full_schema
from cms_backend.db.title import create_title_full_schema, create_title_light_schema
from cms_backend.db.title import get_title_by_id as db_get_title_by_id
from cms_backend.db.title import get_title_by_name as db_get_title_by_name
from cms_backend.db.title import get_titles as db_get_titles
Expand Down Expand Up @@ -51,6 +56,16 @@ class RestoreTitlesSchema(BaseModel):

class BaseTitleCreateUpdateSchema(BaseModel):
collection_titles: list[BaseTitleCollectionSchema] | None = None
long_description: NotEmptyString | None = None
license: NotEmptyString | None = None
relation: NotEmptyString | None = None
source: NotEmptyString | None = None
title: NotEmptyString | None = None
creator: NotEmptyString | None = None
description: NotEmptyString | None = None
publisher: NotEmptyString | None = None
language: NotEmptyString | None = None
illustration_48x48_at_1: Base64Str | None = None

@model_validator(mode="after")
def validate_unique_collection_titles(self) -> Self:
Expand Down Expand Up @@ -133,13 +148,18 @@ def create_title(
name=title_data.name,
maturity=title_data.maturity,
collection_titles=title_data.collection_titles,
_title=title_data.title,
creator=title_data.creator,
publisher=title_data.publisher,
language=title_data.language,
illustration_48x48_at_1=title_data.illustration_48x48_at_1,
license_=title_data.license,
relation=title_data.relation,
source=title_data.source,
long_description=title_data.long_description,
description=title_data.description,
)
return TitleLightSchema(
id=title.id,
name=title.name,
maturity=title.maturity,
archived=title.archived,
)
return create_title_light_schema(title)


@router.patch(
Expand All @@ -151,20 +171,25 @@ def update_title(
title_data: TitleUpdateSchema,
session: OrmSession = Depends(gen_dbsession),
) -> TitleLightSchema:
"""Update a title's maturity and/or collection_titles"""
"""Update a title"""
title = db_update_title(
session,
title_id=title_id,
name=title_data.name,
maturity=title_data.maturity,
collection_titles=title_data.collection_titles,
_title=title_data.title,
creator=title_data.creator,
description=title_data.description,
long_description=title_data.long_description,
publisher=title_data.publisher,
language=title_data.language,
illustration_48x48_at_1=title_data.illustration_48x48_at_1,
license_=title_data.license,
relation=title_data.relation,
source=title_data.source,
)
return TitleLightSchema(
id=title.id,
name=title.name,
maturity=title.maturity,
archived=title.archived,
)
return create_title_light_schema(title)


@router.post(
Expand Down Expand Up @@ -210,12 +235,7 @@ def archive_title(
session,
title_identifier=title_id,
)
return TitleLightSchema(
id=title.id,
name=title.name,
maturity=title.maturity,
archived=title.archived,
)
return create_title_light_schema(title)


@router.patch(
Expand All @@ -231,9 +251,4 @@ def restore_archived_title(
session,
title_identifier=title_id,
)
return TitleLightSchema(
id=title.id,
name=title.name,
maturity=title.maturity,
archived=title.archived,
)
return create_title_light_schema(title)
18 changes: 11 additions & 7 deletions backend/src/cms_backend/api/routes/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def build_library_xml(
library_elem.set("version", "20110515")

for entry in entries:
book, download_base_url, path, filename = entry
book, title, download_base_url, path, filename = entry
if not book.zim_metadata:
continue

Expand All @@ -30,19 +30,23 @@ def build_library_xml(

# Metadata from zim_metadata dict
zim_meta = book.zim_metadata
book_elem.set("title", zim_meta.get("Title", ""))
book_elem.set("description", zim_meta.get("Description", ""))
book_elem.set("language", zim_meta.get("Language", ""))
book_elem.set("creator", zim_meta.get("Creator", ""))
book_elem.set("publisher", zim_meta.get("Publisher", ""))
book_elem.set("title", title.title or zim_meta.get("Title", ""))
book_elem.set(
"description", title.description or zim_meta.get("Description", "")
)
book_elem.set("language", title.language or zim_meta.get("Language", ""))
book_elem.set("creator", title.creator or zim_meta.get("Creator", ""))
book_elem.set("publisher", title.publisher or zim_meta.get("Publisher", ""))
book_elem.set("name", zim_meta.get("Name", ""))
book_elem.set("date", zim_meta.get("Date", ""))

# always set tags to at least have special tags
tags = zim_meta.get("Tags", "")
book_elem.set("tags", ";".join(convert_tags(tags)))

favicon = zim_meta.get("Illustration_48x48@1", "")
favicon = title.illustration_48x48_at_1 or zim_meta.get(
"Illustration_48x48@1", ""
)
if favicon:
book_elem.set("favicon", favicon)
book_elem.set("faviconMimeType", "image/png")
Expand Down
32 changes: 32 additions & 0 deletions backend/src/cms_backend/db/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def create_book_full_schema(book: Book) -> BookFullSchema:
date=book.date,
deletion_date=book.deletion_date,
flavour=book.flavour,
issues=book.issues,
article_count=book.article_count,
media_count=book.media_count,
size=book.size,
Expand Down Expand Up @@ -339,6 +340,37 @@ def recover_book(session: OrmSession, book_id: UUID) -> Book:
return book


def get_differing_metadata_keys(book: Book) -> list[str]:
"""Get the list of metadata keys that are different between book and it's title.

Assumes book and title both have mandatory metadata set.
Assumes that the book name and title name already match, thus aren't checked.
"""

if book.title is None:
raise ValueError("Book has no associated title.")

book_metadata = {
"Title": book.zim_metadata["Title"],
"Creator": book.zim_metadata["Creator"],
"Publisher": book.zim_metadata["Publisher"],
"Description": book.zim_metadata["Description"],
"Language": book.zim_metadata["Language"],
"Illustration_48x48@1": book.zim_metadata["Illustration_48x48@1"],
}

title_metadata = {
"Title": book.title.title,
"Creator": book.title.creator,
"Publisher": book.title.publisher,
"Description": book.title.description,
"Language": book.title.language,
"Illustration_48x48@1": book.title.illustration_48x48_at_1,
}

return [key for key in book_metadata if book_metadata[key] != title_metadata[key]]


def update_book(session: OrmSession, book_id: UUID, *, flavour: str) -> Book:
book = get_book(session, book_id)
if book.location_kind == "deleted":
Expand Down
3 changes: 3 additions & 0 deletions backend/src/cms_backend/db/books.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ def get_books(
Book.name,
Book.date,
Book.flavour,
Book.issues,
).order_by(
Book.has_error.desc(),
Book.location_kind,
Expand Down Expand Up @@ -122,6 +123,7 @@ def get_books(
name=name,
date=date,
flavour=flavour,
issues=book_issues,
)
for (
book_id_result,
Expand All @@ -135,6 +137,7 @@ def get_books(
name,
date,
flavour,
book_issues,
) in session.execute(
stmt.offset(skip)
.limit(limit)
Expand Down
Loading
Loading