Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions machine_types.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
prefix machine_type
VH Illumina-NextSeq
D Illumina-HiSeq
M Illumina-MiSeq
A Illumina-NovaSeq
NB Illumina-MiniSeq
LH Illumina-NovaSeqX
SH Illumina-MiSeq
29 changes: 14 additions & 15 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,23 +54,22 @@ register_run_file = "sample_registry.register:register_illumina_file"
unregister_samples = "sample_registry.register:unregister_samples"
register_samples = "sample_registry.register:register_samples"
modify_sample = "sample_registry.register:modify_sample"
register_annotations = "sample_registry.register:register_annotations"
modify_annotation = "sample_registry.register:modify_annotation"
register_host_species = "sample_registry.register:register_host_species"
register_sample_types = "sample_registry.register:register_sample_types"
export_samples = "sample_registry.export:export_samples"
create_test_db = "sample_registry.db:create_test_db"
sample_registry_version = "sample_registry:sample_registry_version"
register_annotations = "sample_registry.register:register_annotations"
modify_annotation = "sample_registry.register:modify_annotation"
export_samples = "sample_registry.export:export_samples"
create_test_db = "sample_registry.db:create_test_db"
sample_registry_version = "sample_registry:sample_registry_version"

[tool.setuptools]
packages = ["sample_registry"]
[tool.setuptools]
packages = ["sample_registry", "sample_registry.data"]

[tool.setuptools.package-data]
"sample_registry" = [
"templates/*.html",
"static/*",
"static/img/*",
]
[tool.setuptools.package-data]
"sample_registry" = [
"templates/*.html",
"static/*",
"static/img/*",
"data/*.tsv",
]

[build-system]
requires = ["setuptools>=61.0.0", "wheel"]
Expand Down
147 changes: 75 additions & 72 deletions sample_registry/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,9 @@
from io import StringIO
from pathlib import Path
from sample_registry import ARCHIVE_ROOT, SQLALCHEMY_DATABASE_URI
from sample_registry.models import (
Base,
Annotation,
Run,
Sample,
StandardHostSpecies,
StandardSampleType,
)
from sample_registry.db import run_to_dataframe, query_tag_stats, STANDARD_TAGS
from sample_registry.models import Base, Annotation, Run, Sample
from sample_registry.db import run_to_dataframe, query_tag_stats, STANDARD_TAGS
from sample_registry.standards import STANDARD_HOST_SPECIES, STANDARD_SAMPLE_TYPES
from werkzeug.middleware.proxy_fix import ProxyFix

app = Flask(__name__)
Expand Down Expand Up @@ -191,39 +185,46 @@ def show_runs(run_acc=None):


@app.route("/stats")
def show_stats():
num_samples = db.session.query(Sample).count()
num_samples_with_sampletype = (
db.session.query(Sample).filter(Sample.sample_type is not None).count()
)
num_samples_with_standard_sampletype = (
db.session.query(Sample)
.join(StandardSampleType, Sample.sample_type == StandardSampleType.sample_type)
.count()
)
standard_sampletype_counts = (
db.session.query(
StandardSampleType.sample_type,
db.func.count(Sample.sample_accession),
StandardSampleType.host_associated,
)
.join(Sample, Sample.sample_type == StandardSampleType.sample_type)
.group_by(StandardSampleType.sample_type)
.order_by(
db.func.count(Sample.sample_accession).desc(),
StandardSampleType.sample_type,
)
.all()
)
standard_sampletypes = set(
s.sample_type for s in db.session.query(StandardSampleType.sample_type).all()
)
nonstandard_sampletype_counts = (
db.session.query(Sample.sample_type, db.func.count(Sample.sample_accession))
.filter(Sample.sample_type.notin_(standard_sampletypes))
.group_by(Sample.sample_type)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.sample_type)
)
def show_stats():
standard_sampletypes = set(STANDARD_SAMPLE_TYPES.names())
standard_hostspecies = set(STANDARD_HOST_SPECIES.names())

num_samples = db.session.query(Sample).count()
num_samples_with_sampletype = (
db.session.query(Sample).filter(Sample.sample_type is not None).count()
)
num_samples_with_standard_sampletype = (
db.session.query(Sample)
.filter(Sample.sample_type.in_(standard_sampletypes))
.count()
if standard_sampletypes
else 0
)
standard_sampletype_counts = (
db.session.query(Sample.sample_type, db.func.count(Sample.sample_accession))
.filter(Sample.sample_type.in_(standard_sampletypes))
.group_by(Sample.sample_type)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.sample_type)
.all()
if standard_sampletypes
else []
)
nonstandard_sampletype_counts = (
db.session.query(Sample.sample_type, db.func.count(Sample.sample_accession))
.filter(
Sample.sample_type.isnot(None),
Sample.sample_type.notin_(standard_sampletypes),
)
.group_by(Sample.sample_type)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.sample_type)
.all()
if standard_sampletypes
else db.session.query(Sample.sample_type, db.func.count(Sample.sample_accession))
.filter(Sample.sample_type.isnot(None))
.group_by(Sample.sample_type)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.sample_type)
.all()
)

num_subjectid = (
db.session.query(Sample.subject_id)
Expand All @@ -239,36 +240,38 @@ def show_stats():
num_samples_with_hostspecies = (
db.session.query(Sample).filter(Sample.host_species is not None).count()
)
num_samples_with_standard_hostspecies = (
db.session.query(Sample)
.join(
StandardHostSpecies, Sample.host_species == StandardHostSpecies.host_species
)
.count()
)
standard_hostspecies_counts = (
db.session.query(
StandardHostSpecies.host_species,
db.func.count(Sample.sample_accession),
StandardHostSpecies.ncbi_taxon_id,
)
.join(Sample, Sample.host_species == StandardHostSpecies.host_species)
.group_by(StandardHostSpecies.host_species)
.order_by(
db.func.count(Sample.sample_accession).desc(),
StandardHostSpecies.host_species,
)
.all()
)
standard_hostspecies = set(
s.host_species for s in db.session.query(StandardHostSpecies.host_species).all()
)
nonstandard_hostspecies_counts = (
db.session.query(Sample.host_species, db.func.count(Sample.sample_accession))
.filter(Sample.host_species.notin_(standard_hostspecies))
.group_by(Sample.host_species)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.host_species)
)
num_samples_with_standard_hostspecies = (
db.session.query(Sample)
.filter(Sample.host_species.in_(standard_hostspecies))
.count()
if standard_hostspecies
else 0
)
standard_hostspecies_counts = (
db.session.query(Sample.host_species, db.func.count(Sample.sample_accession))
.filter(Sample.host_species.in_(standard_hostspecies))
.group_by(Sample.host_species)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.host_species)
.all()
if standard_hostspecies
else []
)
nonstandard_hostspecies_counts = (
db.session.query(Sample.host_species, db.func.count(Sample.sample_accession))
.filter(
Sample.host_species.isnot(None),
Sample.host_species.notin_(standard_hostspecies),
)
.group_by(Sample.host_species)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.host_species)
.all()
if standard_hostspecies
else db.session.query(Sample.host_species, db.func.count(Sample.sample_accession))
.filter(Sample.host_species.isnot(None))
.group_by(Sample.host_species)
.order_by(db.func.count(Sample.sample_accession).desc(), Sample.host_species)
.all()
)

num_samples_with_primer = (
db.session.query(Sample).filter(Sample.primer_sequence != "").count()
Expand Down
1 change: 1 addition & 0 deletions sample_registry/data/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Package data for SampleRegistry reference tables."""
8 changes: 8 additions & 0 deletions sample_registry/data/machine_types.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
prefix machine_type
VH Illumina-NextSeq
D Illumina-HiSeq
M Illumina-MiSeq
A Illumina-NovaSeq
NB Illumina-MiniSeq
LH Illumina-NovaSeqX
SH Illumina-MiSeq
13 changes: 13 additions & 0 deletions sample_registry/data/standard_host_species.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
host_species scientific_name ncbi_taxid
Dog Canis lupus familiaris 9615
Fruit fly Drosophila melanogaster 7227
Human Homo sapiens 9606
Mouse Mus musculus 10090
Naked mole rat Heterocephalus glaber 10181
Pig Sus scrofa domesticus 9825
Pigeon Columba livia 8932
Rabbit Oryctolagus cuniculus 9986
Rat Rattus norvegicus 10116
Rhesus macaque Macaca mulatta 9544
Cow Bos taurus 9913
Sheep Ovis aries 9940
73 changes: 73 additions & 0 deletions sample_registry/data/standard_sample_types.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
sample_type rarity host_associated description
Amniotic fluid Rare 1 NA
BAL Uncommon 1 NA
Bedding Uncommon 0 NA
Biofilm Rare 1 NA
Bioreactor Uncommon 0 NA
Blank swab Common 0 Swab taken out of packaging in sequencing lab immediately before extraction.
Blood Uncommon 1 NA
Breast milk Rare 1 NA
Buffer Uncommon 0 NA
Cecum Uncommon 1 NA
Cell lysate Uncommon 1 NA
Cervical swab Rare 1 NA
Cheek swab Uncommon 1 NA
Crop Rare 1 NA
Dental plaque Uncommon 1 NA
Duodenum Rare 1 NA
Dust Uncommon 0 NA
Elution buffer Common 0 NA
Empty well Common 0 NA
Endometrial swab Rare 1 NA
Environmental control Common 0 Includes Air swab, Environmental swab, Environmental blank
Esophageal biopsy Rare 1 NA
Esophagus Rare 1 NA
Feces Common 1 Human and animal fecal material.
Feed Uncommon 0 NA
Fistula Common 1 NA
Fistula swab Uncommon 1 NA
Fly food Rare 0 NA
Fruit fly Rare 1 NA
Ileostomy fluid Uncommon 1 NA
Ileum Uncommon 1 NA
Kveim reagent Uncommon 0 NA
Lab water Uncommon 0 NA
Macular Retina Rare 1 NA
Meconium Rare 1 NA
Medium Rare 0 NA
Microbial culture Common 0 NA
Mock DNA Common 0 NA
Mouse chow Rare 0 NA
Nasal swab Common 1 NA
Nasopharyngeal swab Uncommon 1 NA
Oral swab Common 1 NA
Oral wash Uncommon 1 NA
Oropharyngeal swab Uncommon 1 NA
Ostomy fluid Uncommon 1 NA
Pancreatic fluid Rare 1 NA
PCR water Uncommon 0 NA
Peripheral retina Rare 1 NA
Placenta Rare 1 NA
Plasma Uncommon 1 NA
Rectal biopsy Common 1 NA
Rectal swab Common 1 We have observed that results are sensitive to exact collection method employed, please include notes in publication.
Saline Uncommon 0 NA
Saliva Uncommon 1 NA
Sediment Uncommon 0 NA
Serum Uncommon 1 NA
Skin swab Common 1 NA
Small intestine Uncommon 1 NA
Soil Rare 1 NA
Sputum Common 1 NA
Surface swab Common 0 NA
Tongue swab Common 1 NA
Tonsil Rare 1 NA
Tracheal aspirate Common 1 NA
Tracheal control Uncommon 1 NA
Urethral swab Rare 1 NA
Urine Uncommon 1 NA
Water Uncommon 0 NA
Weighing paper Common 0 NA
Whole gut Uncommon 1 From dissection.
Large intestine mucosa Common 1 "NA"
Large intestine lumen Common 1 "NA"
Loading
Loading