Skip to content

Commit 380538d

Browse files
authored
Merge pull request #691 from VariantEffect/release-2026.1.2
Release 2026.1.2
2 parents 19b77c4 + c93b38e commit 380538d

12 files changed

Lines changed: 723 additions & 109 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
44

55
[tool.poetry]
66
name = "mavedb"
7-
version = "2026.1.1"
7+
version = "2026.1.2"
88
description = "API for MaveDB, the database of Multiplexed Assays of Variant Effect."
99
license = "AGPL-3.0-only"
1010
readme = "README.md"

src/mavedb/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
logger = module_logging.getLogger(__name__)
77

88
__project__ = "mavedb-api"
9-
__version__ = "2026.1.1"
9+
__version__ = "2026.1.2"
1010

1111
logger.info(f"MaveDB {__version__}")
1212

src/mavedb/db/session.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@
99
DB_DATABASE_NAME = os.getenv("DB_DATABASE_NAME")
1010
DB_USERNAME = os.getenv("DB_USERNAME")
1111
DB_PASSWORD = os.getenv("DB_PASSWORD")
12+
DB_POOL_SIZE = int(os.getenv("DB_POOL_SIZE", "5"))
13+
DB_MAX_OVERFLOW = int(os.getenv("DB_MAX_OVERFLOW", "10"))
1214

13-
# DB_URL = "sqlite:///./sql_app.db"
1415
DB_URL = f"postgresql://{DB_USERNAME}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_DATABASE_NAME}"
1516

1617
engine = create_engine(
17-
# For PostgreSQL:
18-
DB_URL
19-
# For SQLite:
20-
# DB_URL, connect_args={"check_same_thread": False}
18+
DB_URL,
19+
pool_size=DB_POOL_SIZE,
20+
max_overflow=DB_MAX_OVERFLOW,
21+
pool_pre_ping=True,
2122
)
2223
SessionLocal = sessionmaker(autocommit=False, autoflush=False, bind=engine)

src/mavedb/lib/score_sets.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -238,18 +238,27 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
238238
score_sets: list[ScoreSet] = (
239239
query.join(ScoreSet.experiment)
240240
.options(
241+
# Use selectinload for ALL relationships loaded via the main query. The presence of
242+
# contains_eager disables SQLAlchemy's subquery-wrapping logic for the ENTIRE query,
243+
# not just the relationships nested inside it. This means any joinedload that adds a
244+
# LEFT OUTER JOIN to the main SQL query — even for many-to-one relationships — can
245+
# corrupt the LIMIT clause by applying it to joined rows rather than unique score sets,
246+
# causing fewer results than expected and suppressing the count query fallback.
247+
# The only JOINs that should remain in the main query are the explicit experiment
248+
# INNER JOIN (required by contains_eager) and the superseding score set LEFT OUTER JOIN
249+
# added by the filter builder.
241250
contains_eager(ScoreSet.experiment).options(
242-
joinedload(Experiment.experiment_set),
243-
joinedload(Experiment.keyword_objs).joinedload(
251+
selectinload(Experiment.experiment_set),
252+
selectinload(Experiment.keyword_objs).joinedload(
244253
ExperimentControlledKeywordAssociation.controlled_keyword
245254
),
246-
joinedload(Experiment.created_by),
247-
joinedload(Experiment.modified_by),
248-
joinedload(Experiment.doi_identifiers),
249-
joinedload(Experiment.publication_identifier_associations).joinedload(
255+
selectinload(Experiment.created_by),
256+
selectinload(Experiment.modified_by),
257+
selectinload(Experiment.doi_identifiers),
258+
selectinload(Experiment.publication_identifier_associations).joinedload(
250259
ExperimentPublicationIdentifierAssociation.publication
251260
),
252-
joinedload(Experiment.raw_read_identifiers),
261+
selectinload(Experiment.raw_read_identifiers),
253262
selectinload(Experiment.score_sets).options(
254263
joinedload(ScoreSet.doi_identifiers),
255264
joinedload(ScoreSet.publication_identifier_associations).joinedload(
@@ -264,12 +273,12 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
264273
),
265274
),
266275
),
267-
joinedload(ScoreSet.license),
268-
joinedload(ScoreSet.doi_identifiers),
269-
joinedload(ScoreSet.publication_identifier_associations).joinedload(
276+
selectinload(ScoreSet.license),
277+
selectinload(ScoreSet.doi_identifiers),
278+
selectinload(ScoreSet.publication_identifier_associations).joinedload(
270279
ScoreSetPublicationIdentifierAssociation.publication
271280
),
272-
joinedload(ScoreSet.target_genes).options(
281+
selectinload(ScoreSet.target_genes).options(
273282
joinedload(TargetGene.ensembl_offset).joinedload(EnsemblOffset.identifier),
274283
joinedload(TargetGene.refseq_offset).joinedload(RefseqOffset.identifier),
275284
joinedload(TargetGene.uniprot_offset).joinedload(UniprotOffset.identifier),
@@ -292,7 +301,7 @@ def search_score_sets(db: Session, owner_or_contributor: Optional[User], search:
292301
# query.
293302
score_sets = score_sets[: search.limit]
294303
count_query = db.query(ScoreSet)
295-
build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
304+
count_query = build_search_score_sets_query_filter(db, count_query, owner_or_contributor, search)
296305
num_score_sets = count_query.order_by(None).limit(None).count()
297306

298307
save_to_logging_context({"matching_resources": num_score_sets})

src/mavedb/routers/experiments.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -195,10 +195,18 @@ def get_experiment_score_sets(
195195
.all()
196196
)
197197

198-
filter_superseded_score_set_tails = [
199-
find_superseded_score_set_tail(score_set, Action.READ, user_data) for score_set in score_set_result
200-
]
201-
filtered_score_sets = [score_set for score_set in filter_superseded_score_set_tails if score_set is not None]
198+
# Multiple chain heads can resolve to the same visible ancestor via find_superseded_score_set_tail
199+
# (e.g. when several private superseding score sets all trace back to the same published score set).
200+
# Deduplicate by ID to avoid returning the same score set more than once.
201+
seen_ids: set[int] = set()
202+
filtered_score_sets: list[ScoreSet] = []
203+
for ss in score_set_result:
204+
tail = find_superseded_score_set_tail(ss, Action.READ, user_data)
205+
tail_id = tail.id if tail is not None else None
206+
if tail is not None and tail_id is not None and tail_id not in seen_ids:
207+
seen_ids.add(tail_id)
208+
filtered_score_sets.append(tail)
209+
202210
if not filtered_score_sets:
203211
save_to_logging_context({"associated_resources": []})
204212
logger.info(msg="No score sets are associated with the requested experiment.", extra=logging_context())

src/mavedb/routers/score_calibrations.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
from mavedb import deps
88
from mavedb.lib.authentication import get_current_user
9-
from mavedb.lib.authorization import require_current_user_with_email
9+
from mavedb.lib.authorization import require_current_user, require_current_user_with_email
1010
from mavedb.lib.flexible_model_loader import json_or_form_loader
1111
from mavedb.lib.logging import LoggedRoute
1212
from mavedb.lib.logging.context import (
@@ -31,14 +31,15 @@
3131
from mavedb.models.score_calibration import ScoreCalibration
3232
from mavedb.models.score_calibration_functional_classification import ScoreCalibrationFunctionalClassification
3333
from mavedb.models.score_set import ScoreSet
34+
from mavedb.routers.shared import ACCESS_CONTROL_ERROR_RESPONSES, PUBLIC_ERROR_RESPONSES
3435
from mavedb.view_models import score_calibration
3536

3637
logger = logging.getLogger(__name__)
3738

3839
router = APIRouter(
3940
prefix="/api/v1/score-calibrations",
4041
tags=["Score Calibrations"],
41-
responses={404: {"description": "Not found"}},
42+
responses={**PUBLIC_ERROR_RESPONSES},
4243
route_class=LoggedRoute,
4344
)
4445

@@ -54,6 +55,27 @@
5455
)
5556

5657

58+
@router.get(
59+
"/me",
60+
status_code=200,
61+
response_model=list[score_calibration.ScoreCalibrationWithScoreSetUrn],
62+
responses={**ACCESS_CONTROL_ERROR_RESPONSES},
63+
summary="List my calibrations",
64+
)
65+
def list_my_calibrations(
66+
*,
67+
db: Session = Depends(deps.get_db),
68+
user_data: UserData = Depends(require_current_user),
69+
) -> list[ScoreCalibration]:
70+
"""List all score calibrations created by the current user."""
71+
return (
72+
db.query(ScoreCalibration)
73+
.filter(ScoreCalibration.created_by_id == user_data.user.id)
74+
.options(selectinload(ScoreCalibration.score_set).selectinload(ScoreSet.contributors))
75+
.all()
76+
)
77+
78+
5779
@router.get(
5880
"/{urn}",
5981
response_model=score_calibration.ScoreCalibrationWithScoreSetUrn,

src/mavedb/routers/score_sets.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -672,6 +672,86 @@ def search_my_score_sets(
672672
return {"score_sets": enriched_score_sets, "num_score_sets": num_score_sets}
673673

674674

675+
RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT = 20
676+
677+
678+
@router.get(
679+
"/score-sets/recently-published",
680+
status_code=200,
681+
response_model=list[score_set.ScoreSet],
682+
response_model_exclude_none=True,
683+
summary="List recently published score sets",
684+
)
685+
def list_recently_published_score_sets(
686+
limit: int = Query(
687+
default=10,
688+
ge=1,
689+
le=RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT,
690+
description=f"Number of score sets to return (maximum {RECENTLY_PUBLISHED_SCORE_SETS_MAX_LIMIT}).",
691+
),
692+
db: Session = Depends(deps.get_db),
693+
user_data: Optional[UserData] = Depends(get_current_user),
694+
) -> Any:
695+
"""
696+
Return the most recently published score sets, ordered by publication date descending.
697+
"""
698+
save_to_logging_context({"requested_resource": "recently-published", "limit": limit})
699+
700+
items = (
701+
db.query(ScoreSet)
702+
.filter(ScoreSet.published_date.isnot(None), ScoreSet.private.is_(False))
703+
.order_by(ScoreSet.published_date.desc(), ScoreSet.urn.desc())
704+
.limit(limit)
705+
.all()
706+
)
707+
708+
result = []
709+
for item in items:
710+
if not has_permission(user_data, item, Action.READ).permitted:
711+
continue
712+
if (
713+
item.superseding_score_set
714+
and not has_permission(user_data, item.superseding_score_set, Action.READ).permitted
715+
):
716+
item.superseding_score_set = None
717+
enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
718+
result.append(score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment}))
719+
720+
return result
721+
722+
723+
@router.get(
724+
"/score-sets/",
725+
status_code=200,
726+
response_model=list[score_set.ScoreSet],
727+
responses={**ACCESS_CONTROL_ERROR_RESPONSES},
728+
response_model_exclude_none=True,
729+
summary="Fetch score sets by URN list",
730+
)
731+
async def show_score_sets(
732+
*,
733+
urns: str = Query(..., description="Comma-separated list of score set URNs"),
734+
db: Session = Depends(deps.get_db),
735+
user_data: UserData = Depends(get_current_user),
736+
) -> Any:
737+
"""
738+
Fetch score sets identified by a list of URNs.
739+
"""
740+
urn_list = [urn.strip() for urn in urns.split(",") if urn.strip()]
741+
if not urn_list:
742+
raise HTTPException(status_code=422, detail="At least one URN is required")
743+
744+
save_to_logging_context({"requested_resource": urn_list})
745+
response_items: list[score_set.ScoreSet] = []
746+
for urn in urn_list:
747+
item = await fetch_score_set_by_urn(db, urn, user_data, None, False)
748+
enriched_experiment = enrich_experiment_with_num_score_sets(item.experiment, user_data)
749+
response_item = score_set.ScoreSet.model_validate(item).copy(update={"experiment": enriched_experiment})
750+
response_items.append(response_item)
751+
752+
return response_items
753+
754+
675755
@router.get(
676756
"/score-sets/{urn}",
677757
status_code=200,

src/mavedb/server_main.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from fastapi.middleware.cors import CORSMiddleware
1010
from fastapi.middleware.gzip import GZipMiddleware
1111
from fastapi.openapi.utils import get_openapi
12+
from pydantic.json_schema import models_json_schema
1213
from sqlalchemy.orm import configure_mappers
1314
from starlette.requests import Request
1415
from starlette.responses import JSONResponse
@@ -240,6 +241,23 @@ def customize_openapi_schema():
240241
variants.metadata,
241242
]
242243

244+
# ScoreCalibrationModify (and its sub-models) are used in the PUT /score-calibrations/{urn}
245+
# endpoint's openapi_extra $ref, but FastAPI only registers schemas it discovers through
246+
# direct Body() parameters or response_model — not through Depends(). The flexible_model_loader
247+
# pattern wraps the model in a generic async function (return type `T`), so FastAPI never sees
248+
# the concrete type and never adds it to components/schemas. We register those missing schemas
249+
# here explicitly to keep the generated OpenAPI spec valid. Eventually, this schema may be
250+
# registered in other endpoints and this workaround can be removed, but for now this is the only
251+
# endpoint where we use the ScoreCalibrationModify model.
252+
from mavedb.view_models.score_calibration import ScoreCalibrationModify
253+
254+
_, extra_schemas = models_json_schema(
255+
[(ScoreCalibrationModify, "validation")],
256+
ref_template="#/components/schemas/{model}",
257+
)
258+
for name, schema in extra_schemas.get("$defs", {}).items():
259+
openapi_schema["components"]["schemas"].setdefault(name, schema)
260+
243261
app.openapi_schema = openapi_schema
244262
return app.openapi_schema
245263

tests/helpers/util/score_set.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -165,9 +165,9 @@ def create_seq_score_set_with_variants(
165165
count_columns_metadata_json_path,
166166
)
167167

168-
assert score_set["numVariants"] == 3, (
169-
f"Could not create sequence based score set with variants within experiment {experiment_urn}"
170-
)
168+
assert (
169+
score_set["numVariants"] == 3
170+
), f"Could not create sequence based score set with variants within experiment {experiment_urn}"
171171

172172
jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
173173
return score_set
@@ -196,9 +196,9 @@ def create_acc_score_set_with_variants(
196196
count_columns_metadata_json_path,
197197
)
198198

199-
assert score_set["numVariants"] == 3, (
200-
f"Could not create sequence based score set with variants within experiment {experiment_urn}"
201-
)
199+
assert (
200+
score_set["numVariants"] == 3
201+
), f"Could not create sequence based score set with variants within experiment {experiment_urn}"
202202

203203
jsonschema.validate(instance=score_set, schema=ScoreSet.model_json_schema())
204204
return score_set

0 commit comments

Comments
 (0)