Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions PMR/resources/etls/pmr-datasets.xml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@
<column>relationship</column>
<column>method</column>
<column>objectid</column>
<column>created</column>
<column>modified</column>
</sourceColumns>
<sourceFilters>
<sourceFilter column="QCState/Label" operator="eq" value="Completed"/>
Expand Down Expand Up @@ -171,6 +173,8 @@
<column>conception</column>
<column>conceptualDay</column>
<column>objectid</column>
<column>created</column>
<column>modified</column>
</sourceColumns>
<sourceFilters>
<sourceFilter column="QCState/Label" operator="eq" value="Completed"/>
Expand Down
2 changes: 2 additions & 0 deletions PMR/resources/etls/pmr-demographics.xml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
<column>calculated_status</column>
<column>QCState/Label</column>
<column>objectid</column>
<column>created</column>
<column>modified</column>
</sourceColumns>
</source>
<destination schemaName="study" queryName="demographics" targetOption="truncate" bulkLoad="true" batchSize="2500">
Expand Down
10 changes: 10 additions & 0 deletions mGAP/resources/data/species.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
common_name scientific_name mhc_prefix
Cotton-top Tamarin Saguinus oedipus Saoe
Cynomolgus macaque Macaca fascicularis Mafa
Marmoset Callithrix jacchus Caja
Pigtail macaque Macaca nemestrina Mane
Rhesus macaque Macaca mulatta Mamu
Sooty Mangabey Cercocebus atys Ceat
Stump Tailed Macaca Arctoides Maar
Vervet Chlorocebus sabaeus Chsa
Japanese macaque Macaca fuscata Mafu
6 changes: 6 additions & 0 deletions mGAP/resources/etls/prime-seq.xml
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,24 @@
<column>releaseId</column>
<column>trackName</column>
<column>label</column>
<column>species</column>
<column>totalSamples</column>
<column>category</column>
<column>url</column>
<column>source</column>
<column>description</column>
<column>isprimarytrack</column>
<column>shouldindex</column>
<column>vcfIndexId/dataid/DataFileUrl</column>
<column>vcfIndexId/library_id/name</column>
<column>vcfId/dataid/DataFileUrl</column>
<column>vcfId/library_id/name</column>
</sourceColumns>
</source>
<destination schemaName="mGAP" queryName="tracksPerRelease" bulkLoad="true" targetOption="truncate">
<columnTransforms>
<column source="vcfId/dataid/DataFileUrl" target="vcfId" transformClass="org.labkey.mgap.columnTransforms.TrackOutputFileTransform" />
<column source="vcfIndexId/dataid/DataFileUrl" target="vcfIndexId" transformClass="org.labkey.mgap.columnTransforms.TrackOutputFileTransform" />
</columnTransforms>
</destination>
</transform>
Expand Down Expand Up @@ -101,6 +106,7 @@
<column>releaseDate</column>
<column>vcfId/dataid/DataFileUrl</column>
<column>genomeId/name</column>
<column>species</column>
<column>totalSubjects</column>
<column>totalVariants</column>
<column>dbSnpId</column>
Expand Down
12 changes: 6 additions & 6 deletions mGAP/resources/queries/mGAP/combinedPedigree.sql
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
SELECT
s.subjectname,
s.Id as subjectname,
s.gender,
s.mother as dam,
s.father as sire,
s.Id.parents.dam as dam,
s.Id.parents.sire as sire,
s.species,
s.geographic_origin,
s.geographic_origin

FROM laboratory.subjects s
FROM "/Internal/PMR/".study.demographics s

UNION ALL

Expand All @@ -19,4 +19,4 @@ SELECT
null as geographic_origin

FROM mgap.demographics d
WHERE d.subjectname NOT IN (SELECT DISTINCT s.subjectname FROM laboratory.subjects s)
WHERE d.subjectname NOT IN (SELECT DISTINCT s.Id FROM "/Internal/PMR/".study.demographics s)
2 changes: 1 addition & 1 deletion mGAP/resources/queries/mGAP/releaseTracks/.qview.xml
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
<column name="rowid"/>
<column name="trackName"/>
<column name="label"/>
<column name="species"/>
<column name="source"/>
<column name="category"/>
<column name="totalSamples"/>
<column name="url"/>
<column name="vcfId"/>
<column name="description"/>
<column name="isprimarytrack"/>
<column name="mergepriority"/>
<column name="skipvalidation"/>
<column name="vcfId/container/Name">
<properties>
Expand Down
7 changes: 7 additions & 0 deletions mGAP/resources/queries/mGAP/sampleSummary.query.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,15 @@
<metadata>
<tables xmlns="http://labkey.org/data/xml">
<table tableName="" tableDbType="TABLE">
<javaCustomizer class="org.labkey.mgap.query.SampleSummaryCustomizer"/>
<pkColumnName>subjectName</pkColumnName>
<tableTitle>mGAP Subject/gVCF Summary</tableTitle>
<columns>
<column columnName="aliasSubjectName">
<columnTitle>SubjectId Listed In Alias Table</columnTitle>
<isHidden>true</isHidden>
</column>
</columns>
</table>
</tables>
</metadata>
Expand Down
3 changes: 2 additions & 1 deletion mGAP/resources/queries/mGAP/sampleSummary.sql
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ SELECT
ss.center,
t.tracks,
t.total,
CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics
CASE WHEN ss.originalId IS NULL OR ss.gender IS NULL or ss.species IS NULL or ss.center IS NULL THEN true ELSE false END as missingDemographics,
am.subjectname as aliasSubjectName

FROM (SELECT
COALESCE(o.readset.subjectId, rt.subjectId) as subjectId,
Expand Down
5 changes: 5 additions & 0 deletions mGAP/resources/queries/mGAP/sampleSummary/.qview.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<customView xmlns="http://labkey.org/data/xml/queryCustomView">
<sorts>
<sort column="subjectId" descending="false"/>
</sorts>
</customView>
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
<customView xmlns="http://labkey.org/data/xml/queryCustomView">
<columns>
<column name="version"/>
<column name="species"/>
<column name="releaseDate"/>
<column name="totalSubjects"/>
<column name="totalVariants"/>
Expand Down
115 changes: 115 additions & 0 deletions mGAP/resources/r/UpdateTracks.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
library(Rlabkey)
library(dplyr)

# This script is designed to be run externally per release, to identify subject that need to be added to the releaseTrackSubsets table:

testByCenter <- function(centerName, trackName) {
dat <- suppressWarnings(labkey.selectRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="sampleSummary",
viewName="",
colSelect="subjectId,externalAlias",
colFilter=makeFilter(
c("tracks", "DOES_NOT_CONTAIN", trackName),
c("center", "EQUAL", centerName)),
containerFilter=NULL,
colNameOpt="rname"
))

print(paste0(trackName, ': ', nrow(dat)))

if (nrow(dat) == 0) {
return(NULL)
}

return(data.frame(trackName = trackName, subjectId = dat$subjectid))
}

testBySpecies <- function(speciesList, trackName) {
dat <- suppressWarnings(labkey.selectRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="sampleSummary",
viewName="",
colSelect="subjectId,externalAlias",
colFilter=makeFilter(
c("tracks", "DOES_NOT_CONTAIN", trackName),
c("species", "IN", paste0(speciesList, collapse = ';'))),
containerFilter=NULL,
colNameOpt="rname"
))

print(paste0(trackName, ': ', nrow(dat)))

if (nrow(dat) == 0) {
return(NULL)
}

return(data.frame(trackName = trackName, subjectId = dat$subjectid))
}

toInsert <- rbind(
testByCenter('CNPRC', 'CNPRC Animals'),
testByCenter('TNPRC', 'TNPRC Animals'),
testByCenter('ENPRC', 'ENPRC Animals'),
testByCenter('NEPRC', 'NEPRC Animals'),
testByCenter('SNPRC', 'SNPRC Animals'),
testByCenter('ONPRC', 'ONPRC Animals'),
testByCenter('MDA', 'MDA Animals'),
testByCenter('WFU', 'WFU Animals'),
testByCenter('CPRC', 'CPRC Animals'),
testBySpecies(c('RHESUS MACAQUE', 'Rhesus', 'Macaca mulatta'), 'Rhesus Macaques'),
testBySpecies(c('JAPANESE MACAQUE', 'Macaca fuscata'), 'Japanese Macaques')
)


if (FALSE) {
added <- labkey.insertRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="releaseTrackSubsets",
toInsert = toInsert
)
}


# Now ensure all tracks exist:
existingTracks <- labkey.selectRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="releaseTracks",
colNameOpt="rname"
)

missingTrackNames <- labkey.selectRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="releaseTrackSubsets",
colSelect="trackName",
colNameOpt="rname"
) %>%
filter(!trackname %in% existingTracks$trackname) %>%
select(trackname) %>% unique()

if (nrow(missingTrackNames) > 0) {
toAdd <- data.frame(trackName = missingTrackNames$trackname, label = missingTrackNames$trackname, isprimarytrack = FALSE)
toAdd$Category <- 'Species Dataset'
# Add anything else desired, like species, source, url, description, category

if (FALSE) {
added <- labkey.insertRows(
baseUrl="https://prime-seq.ohsu.edu",
folderPath="/Internal/ColonyData",
schemaName="mgap",
queryName="releaseTracks",
toInsert = toAdd
)
}
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000);
ALTER TABLE mGAP.releaseTracks ADD species varchar(1000);
ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE mGAP.releaseTracks ADD shouldindex boolean default false;
ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int;

ALTER TABLE mGAP.tracksPerRelease ADD shouldindex boolean default false;
ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int;
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
ALTER TABLE mGAP.variantCatalogReleases ADD species varchar(1000);
ALTER TABLE mGAP.releaseTracks ADD species varchar(1000);
ALTER TABLE mGAP.releaseTracks DROP COLUMN mergepriority;
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
ALTER TABLE mGAP.releaseTracks ADD shouldindex bit default 0;
ALTER TABLE mGAP.releaseTracks ADD vcfIndexId int;

ALTER TABLE mGAP.tracksPerRelease ADD shouldindex bit default 0;
ALTER TABLE mGAP.tracksPerRelease ADD vcfIndexId int;
54 changes: 45 additions & 9 deletions mGAP/resources/schemas/mgap.xml
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,15 @@
<isUserEditable>false</isUserEditable>
<columnTitle>Row Id</columnTitle>
</column>
<column columnName="species">
<columnTitle>Species</columnTitle>
<fk>
<fkDbSchema>laboratory</fkDbSchema>
<fkTable>species</fkTable>
<fkColumnName>common_name</fkColumnName>
<fkDisplayColumnName useRawValue="true"/>
</fk>
</column>
<column columnName="version">
<columnTitle>Version</columnTitle>
<nullable>false</nullable>
Expand Down Expand Up @@ -644,13 +653,22 @@
<columnTitle>Label</columnTitle>
<nullable>false</nullable>
</column>
<column columnName="species">
<columnTitle>Species</columnTitle>
<fk>
<fkDbSchema>laboratory</fkDbSchema>
<fkTable>species</fkTable>
<fkColumnName>common_name</fkColumnName>
<fkDisplayColumnName useRawValue="true"/>
</fk>
</column>
<column columnName="source">
<columnTitle>Source</columnTitle>
<url>${url}</url>
</column>
<column columnName="category">
<columnTitle>Category</columnTitle>
<nullable>false</nullable>
<nullable>true</nullable>
</column>
<column columnName="url">
<columnTitle>URL</columnTitle>
Expand All @@ -669,14 +687,21 @@
<inputType>textarea</inputType>
</column>
<column columnName="isprimarytrack">
<columnTitle>Is Primary Track?</columnTitle>
</column>
<column columnName="mergepriority">
<columnTitle>Merge Priority Order</columnTitle>
<description>This order will be used for genotype priority order when merging to create the primary release VCF. Lower numbers have higher priority. Set to -1 to exclude this track when merging.</description>
<columnTitle>Is Primary Track For Species?</columnTitle>
</column>
<column columnName="skipvalidation">
<columnTitle>Skip Validation?</columnTitle>
<columnTitle>Skip Annotation Checks?</columnTitle>
</column>
<column columnName="shouldindex">
<columnTitle>Should Include Lucene Index?</columnTitle>
</column>
<column columnName="vcfIndexId">
<columnTitle>Lucene Index Id</columnTitle>
<fk>
<fkDbSchema>sequenceanalysis</fkDbSchema>
<fkTable>outputfiles</fkTable>
<fkColumnName>rowid</fkColumnName>
</fk>
</column>
<column columnName="container">
<isHidden>true</isHidden>
Expand Down Expand Up @@ -812,7 +837,7 @@
</column>
<column columnName="category">
<columnTitle>Category</columnTitle>
<nullable>false</nullable>
<nullable>true</nullable>
</column>
<column columnName="url">
<columnTitle>URL</columnTitle>
Expand All @@ -831,9 +856,20 @@
<inputType>textarea</inputType>
</column>
<column columnName="isprimarytrack">
<columnTitle>Is Primary Track?</columnTitle>
<columnTitle>Is Primary Track For Species?</columnTitle>
<defaultValue>false</defaultValue>
</column>
<column columnName="shouldindex">
<columnTitle>Should Include Lucene Index?</columnTitle>
</column>
<column columnName="vcfIndexId">
<columnTitle>Lucene Index Id</columnTitle>
<fk>
<fkDbSchema>sequenceanalysis</fkDbSchema>
<fkTable>outputfiles</fkTable>
<fkColumnName>rowid</fkColumnName>
</fk>
</column>
<column columnName="container">
<isHidden>true</isHidden>
</column>
Expand Down
Loading