Merge pull request #691 from VariantEffect/release-2026.1.2

bencap · web-flow · commit 380538d6bdea · 2026-03-24T23:53:02.000+11:00
Release 2026.1.2
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 
 [tool.poetry]
 name = "mavedb"
-version = "2026.1.1"
+version = "2026.1.2"
 description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect."
 license = "AGPL-3.0-only"
 readme = "README.md"
diff --git a/src/mavedb/__init__.py b/src/mavedb/__init__.py
@@ -6,7 +6,7 @@
 logger = module_logging.getLogger(__name__)
 
 __project__ = "mavedb-api"
-__version__ = "2026.1.1"
+__version__ = "2026.1.2"
 
 logger.info(f"MaveDB {__version__}")
 
diff --git a/src/mavedb/db/session.py b/src/mavedb/db/session.py
@@ -9,14 +9,15 @@
 DB_DATABASE_NAME = os.getenv("DB_DATABASE_NAME")
 DB_USERNAME = os.getenv("DB_USERNAME")
 DB_PASSWORD = os.getenv("DB_PASSWORD")
+DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5"))
+DB_MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "10"))
 
-# DB_URL = "sqlite:///./sql_app.db"
 DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE_NAME}"
 
 engine = create_engine(
-    # For PostgreSQL:
-    DB_URL
-    # For SQLite:
-    # DB_URL, connect_args={"check_same_thread": False}
+    DB_URL,
+    pool_size=DB_POOL_SIZE,
+    max_overflow=DB_MAX_OVERFLOW,
+    pool_pre_ping=True,
 )
 SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)
diff --git a/src/mavedb/lib/score_sets.py b/src/mavedb/lib/score_sets.py
@@ -238,18 +238,27 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
     score_sets: list[ScoreSet] = (
         query.join(ScoreSet.experiment)
         .options(
+            # Use selectinload for ALL relationships loaded via the main query. The presence of
+            # contains_eager disables SQLAlchemy's subquery-wrapping logic for the ENTIRE query,
+            # not just the relationships nested inside it. This means any joinedload that adds a
+            # LEFT OUTER JOIN to the main SQL query — even for many-to-one relationships — can
+            # corrupt the LIMIT clause by applying it to joined rows rather than unique score sets,
+            # causing fewer results than expected and suppressing the count query fallback.
+            # The only JOINs that should remain in the main query are the explicit experiment
+            # INNER JOIN (required by contains_eager) and the superseding score set LEFT OUTER JOIN
+            # added by the filter builder.
             contains_eager(ScoreSet.experiment).options(
-                joinedload(Experiment.experiment_set),
-                joinedload(Experiment.keyword_objs).joinedload(
+                selectinload(Experiment.experiment_set),
+                selectinload(Experiment.keyword_objs).joinedload(
                     ExperimentControlledKeywordAssociation.controlled_keyword
                 ),
-                joinedload(Experiment.created_by),
-                joinedload(Experiment.modified_by),
-                joinedload(Experiment.doi_identifiers),
-                joinedload(Experiment.publication_identifier_associations).joinedload(
+                selectinload(Experiment.created_by),
+                selectinload(Experiment.modified_by),
+                selectinload(Experiment.doi_identifiers),
+                selectinload(Experiment.publication_identifier_associations).joinedload(
                     ExperimentPublicationIdentifierAssociation.publication
                 ),
-                joinedload(Experiment.raw_read_identifiers),
+                selectinload(Experiment.raw_read_identifiers),
                 selectinload(Experiment.score_sets).options(
                     joinedload(ScoreSet.doi_identifiers),
                     joinedload(ScoreSet.publication_identifier_associations).joinedload(
@@ -264,12 +273,12 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
                     ),
                 ),
             ),
-            joinedload(ScoreSet.license),
-            joinedload(ScoreSet.doi_identifiers),
-            joinedload(ScoreSet.publication_identifier_associations).joinedload(
+            selectinload(ScoreSet.license),
+            selectinload(ScoreSet.doi_identifiers),
+            selectinload(ScoreSet.publication_identifier_associations).joinedload(
                 ScoreSetPublicationIdentifierAssociation.publication
             ),
-            joinedload(ScoreSet.target_genes).options(
+            selectinload(ScoreSet.target_genes).options(
                 joinedload(TargetGene.ensembl_offset).joinedload(EnsemblOffset.identifier),
                 joinedload(TargetGene.refseq_offset).joinedload(RefseqOffset.identifier),
                 joinedload(TargetGene.uniprot_offset).joinedload(UniprotOffset.identifier),
@@ -292,7 +301,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
         # query.
         score_sets = score_sets[: search.limit]
         count_query = db.query(ScoreSet)
-        build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
+        count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
         num_score_sets = count_query.order_by(None).limit(None).count()
 
     save_to_logging_context({"matching_resources": num_score_sets})
diff --git a/src/mavedb/routers/experiments.py b/src/mavedb/routers/experiments.py
@@ -195,10 +195,18 @@ def get_experiment_score_sets(
         .all()
     )
 
-    filter_superseded_score_set_tails = [
-        find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in score_set_result
-    ]
-    filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None]
+    # Multiple chain heads can resolve to the same visible ancestor via find_superseded_score_set_tail
+    # (e.g. when several private superseding score sets all trace back to the same published score set).
+    # Deduplicate by ID to avoid returning the same score set more than once.
+    seen_ids: set[int] = set()
+    filtered_score_sets: list[ScoreSet] = []
+    for ss in score_set_result:
+        tail = find_superseded_score_set_tail(ss, Action.READ, user_data)
+        tail_id = tail.id if tail is not None else None
+        if tail is not None and tail_id is not None and tail_id not in seen_ids:
+            seen_ids.add(tail_id)
+            filtered_score_sets.append(tail)
+
     if not filtered_score_sets:
         save_to_logging_context({"associated_resources": []})
         logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context())
diff --git a/src/mavedb/routers/score_calibrations.py b/src/mavedb/routers/score_calibrations.py
@@ -6,7 +6,7 @@
 
 from mavedb import deps
 from mavedb.lib.authentication import get_current_user
-from mavedb.lib.authorization import require_current_user_with_email
+from mavedb.lib.authorization import require_current_user, require_current_user_with_email
 from mavedb.lib.flexible_model_loader import json_or_form_loader
 from mavedb.lib.logging import LoggedRoute
 from mavedb.lib.logging.context import (
@@ -31,14 +31,15 @@
 from mavedb.models.score_calibration import ScoreCalibration
 from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
 from mavedb.models.score_set import ScoreSet
+from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES
 from mavedb.view_models import score_calibration
 
 logger = logging.getLogger(__name__)
 
 router = APIRouter(
     prefix="/api/v1/score-calibrations",
     tags=["Score Calibrations"],
-    responses={404: {"description": "Not found"}},
+    responses={**PUBLIC_ERROR_RESPONSES},
     route_class=LoggedRoute,
 )
 
@@ -54,6 +55,27 @@
 )
 
 
+@router.get(
+    "/me",
+    status_code=200,
+    response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn],
+    responses={**ACCESS_CONTROL_ERROR_RESPONSES},
+    summary="List my calibrations",
+)
+def list_my_calibrations(
+    *,
+    db: Session = Depends(deps.get_db),
+    user_data: UserData = Depends(require_current_user),
+) -> list[ScoreCalibration]:
+    """List all score calibrations created by the current user."""
+    return (
+        db.query(ScoreCalibration)
+        .filter(ScoreCalibration.created_by_id == user_data.user.id)
+        .options(selectinload(ScoreCalibration.score_set).selectinload(ScoreSet.contributors))
+        .all()
+    )
+
+
 @router.get(
     "/{urn}",
     response_model=score_calibration.ScoreCalibrationWithScoreSetUrn,
diff --git a/src/mavedb/routers/score_sets.py b/src/mavedb/routers/score_sets.py
@@ -672,6 +672,86 @@ def search_my_score_sets(
     return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets}
 
 
+RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT = 20
+
+
+@router.get(
+    "/score-sets/recently-published",
+    status_code=200,
+    response_model=list[score_set.ScoreSet],
+    response_model_exclude_none=True,
+    summary="List recently published score sets",
+)
+def list_recently_published_score_sets(
+    limit: int = Query(
+        default=10,
+        ge=1,
+        le=RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT,
+        description=f"Number of score sets to return (maximum {RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT}).",
+    ),
+    db: Session = Depends(deps.get_db),
+    user_data: Optional[UserData] = Depends(get_current_user),
+) -> Any:
+    """
+    Return the most recently published score sets, ordered by publication date descending.
+    """
+    save_to_logging_context({"requested_resource": "recently-published", "limit": limit})
+
+    items = (
+        db.query(ScoreSet)
+        .filter(ScoreSet.published_date.isnot(None), ScoreSet.private.is_(False))
+        .order_by(ScoreSet.published_date.desc(), ScoreSet.urn.desc())
+        .limit(limit)
+        .all()
+    )
+
+    result = []
+    for item in items:
+        if not has_permission(user_data, item, Action.READ).permitted:
+            continue
+        if (
+            item.superseding_score_set
+            and not has_permission(user_data, item.superseding_score_set, Action.READ).permitted
+        ):
+            item.superseding_score_set = None
+        enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
+        result.append(score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment}))
+
+    return result
+
+
+@router.get(
+    "/score-sets/",
+    status_code=200,
+    response_model=list[score_set.ScoreSet],
+    responses={**ACCESS_CONTROL_ERROR_RESPONSES},
+    response_model_exclude_none=True,
+    summary="Fetch score sets by URN list",
+)
+async def show_score_sets(
+    *,
+    urns: str = Query(..., description="Comma-separated list of score set URNs"),
+    db: Session = Depends(deps.get_db),
+    user_data: UserData = Depends(get_current_user),
+) -> Any:
+    """
+    Fetch score sets identified by a list of URNs.
+    """
+    urn_list = [urn.strip() for urn in urns.split(",") if urn.strip()]
+    if not urn_list:
+        raise HTTPException(status_code=422, detail="At least one URN is required")
+
+    save_to_logging_context({"requested_resource": urn_list})
+    response_items: list[score_set.ScoreSet] = []
+    for urn in urn_list:
+        item = await fetch_score_set_by_urn(db, urn, user_data, None, False)
+        enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
+        response_item = score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment})
+        response_items.append(response_item)
+
+    return response_items
+
+
 @router.get(
     "/score-sets/{urn}",
     status_code=200,
diff --git a/src/mavedb/server_main.py b/src/mavedb/server_main.py
@@ -9,6 +9,7 @@
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.middleware.gzip import GZipMiddleware
 from fastapi.openapi.utils import get_openapi
+from pydantic.json_schema import models_json_schema
 from sqlalchemy.orm import configure_mappers
 from starlette.requests import Request
 from starlette.responses import JSONResponse
@@ -240,6 +241,23 @@ def customize_openapi_schema():
         variants.metadata,
     ]
 
+    # ScoreCalibrationModify (and its sub-models) are used in the PUT /score-calibrations/{urn}
+    # endpoint's openapi_extra $ref, but FastAPI only registers schemas it discovers through
+    # direct Body() parameters or response_model — not through Depends(). The flexible_model_loader
+    # pattern wraps the model in a generic async function (return type `T`), so FastAPI never sees
+    # the concrete type and never adds it to components/schemas. We register those missing schemas
+    # here explicitly to keep the generated OpenAPI spec valid. Eventually, this schema may be
+    # registered in other endpoints and this workaround can be removed, but for now this is the only
+    # endpoint where we use the ScoreCalibrationModify model.
+    from mavedb.view_models.score_calibration import ScoreCalibrationModify
+
+    _, extra_schemas = models_json_schema(
+        [(ScoreCalibrationModify, "validation")],
+        ref_template="#/components/schemas/{model}",
+    )
+    for name, schema in extra_schemas.get("$defs", {}).items():
+        openapi_schema["components"]["schemas"].setdefault(name, schema)
+
     app.openapi_schema = openapi_schema
     return app.openapi_schema
 
diff --git a/tests/helpers/util/score_set.py b/tests/helpers/util/score_set.py
@@ -165,9 +165,9 @@ def create_seq_score_set_with_variants(
         count_columns_metadata_json_path,
     )
 
-    assert score_set["numVariants"] == 3, (
-        f"Could not create sequence based score set with variants within experiment {experiment_urn}"
-    )
+    assert (
+        score_set["numVariants"] == 3
+    ), f"Could not create sequence based score set with variants within experiment {experiment_urn}"
 
     jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
     return score_set
@@ -196,9 +196,9 @@ def create_acc_score_set_with_variants(
         count_columns_metadata_json_path,
     )
 
-    assert score_set["numVariants"] == 3, (
-        f"Could not create sequence based score set with variants within experiment {experiment_urn}"
-    )
+    assert (
+        score_set["numVariants"] == 3
+    ), f"Could not create sequence based score set with variants within experiment {experiment_urn}"
 
     jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
     return score_set
diff --git a/tests/routers/test_experiments.py b/tests/routers/test_experiments.py
diff --git a/tests/routers/test_score_calibrations.py b/tests/routers/test_score_calibrations.py
diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py