Skip to content

Commit 341ad5d

Browse files
authored
Merge pull request #184 from LabKey/fb_merge_22.7_to_develop
Merge discvr-22.7 to develop
2 parents afe8d08 + 9f9c6e2 commit 341ad5d

25 files changed

+354
-58
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ public static void registerPipelineSteps()
303303
SequencePipelineService.get().registerPipelineStep(new VariantsToTableStep.Provider());
304304
SequencePipelineService.get().registerPipelineStep(new VariantQCStep.Provider());
305305
SequencePipelineService.get().registerPipelineStep(new PlinkPcaStep.Provider());
306+
SequencePipelineService.get().registerPipelineStep(new KingInferenceStep.Provider());
306307
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
307308
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
308309

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -253,16 +253,10 @@ public File ensureVcfIndex(File vcf, Logger log, boolean forceRecreate) throws I
253253
try
254254
{
255255
FileType gz = new FileType(".gz");
256-
File expected = new File(vcf.getPath() + FileExtensions.TRIBBLE_INDEX);
257-
File tbi = new File(vcf.getPath() + ".tbi");
258-
259-
if (!forceRecreate && expected.exists())
260-
{
261-
return expected;
262-
}
263-
else if (!forceRecreate && tbi.exists())
256+
File expectedIdx = gz.isType(vcf) ? new File(vcf.getPath() + ".tbi") : new File(vcf.getPath() + FileExtensions.TRIBBLE_INDEX);
257+
if (!forceRecreate && expectedIdx.exists())
264258
{
265-
return tbi;
259+
return expectedIdx;
266260
}
267261
else
268262
{
@@ -272,15 +266,23 @@ else if (!forceRecreate && tbi.exists())
272266
{
273267
TabixRunner r = new TabixRunner(log);
274268
r.execute(vcf);
269+
if (!expectedIdx.exists())
270+
{
271+
throw new PipelineJobException("Expected index was not created: " + expectedIdx.getPath());
272+
}
275273

276-
return tbi;
274+
return expectedIdx;
277275
}
278276
else
279277
{
280278
Index idx = IndexFactory.createDynamicIndex(vcf, new VCFCodec());
281279
idx.writeBasedOnFeatureFile(vcf);
280+
if (!expectedIdx.exists())
281+
{
282+
throw new PipelineJobException("Expected index was not created: " + expectedIdx.getPath());
283+
}
282284

283-
return expected;
285+
return expectedIdx;
284286
}
285287
}
286288
}
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
package org.labkey.sequenceanalysis.run.variant;
2+
3+
import htsjdk.samtools.SAMSequenceDictionary;
4+
import htsjdk.samtools.SAMSequenceRecord;
5+
import htsjdk.samtools.util.Interval;
6+
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
7+
import org.apache.commons.io.FileUtils;
8+
import org.apache.commons.lang3.StringUtils;
9+
import org.apache.commons.lang3.math.NumberUtils;
10+
import org.apache.logging.log4j.Logger;
11+
import org.json.old.JSONObject;
12+
import org.labkey.api.pipeline.PipelineJobException;
13+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
14+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
15+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
16+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
17+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
18+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
19+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
20+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
21+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
22+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
23+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
24+
25+
import javax.annotation.Nullable;
26+
import java.io.File;
27+
import java.io.IOException;
28+
import java.util.ArrayList;
29+
import java.util.Arrays;
30+
import java.util.List;
31+
32+
public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep
33+
{
34+
public KingInferenceStep(PipelineStepProvider<?> provider, PipelineContext ctx)
35+
{
36+
super(provider, ctx, new KingInferenceStep.KingWrapper(ctx.getLogger()));
37+
}
38+
39+
public static class Provider extends AbstractVariantProcessingStepProvider<KingInferenceStep>
40+
{
41+
public Provider()
42+
{
43+
super("KingInferenceStep", "KING/Relatedness", "", "This will run KING to infer kinship from a VCF", Arrays.asList(
44+
ToolParameterDescriptor.create("limitToChromosomes", "Limit to Chromosomes", "If checked, the analysis will include only the primary chromosomes", "checkbox", new JSONObject(){{
45+
put("checked", true);
46+
}}, true)
47+
), null, "https://www.kingrelatedness.com/manual.shtml");
48+
}
49+
50+
@Override
51+
public KingInferenceStep create(PipelineContext ctx)
52+
{
53+
return new KingInferenceStep(this, ctx);
54+
}
55+
}
56+
57+
@Override
58+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
59+
{
60+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
61+
62+
output.addInput(inputVCF, "Input VCF");
63+
output.addInput(genome.getWorkingFastaFile(), "Reference Genome");
64+
65+
File plinkOut = new File(outputDirectory, "plink");
66+
output.addIntermediateFile(new File(plinkOut.getPath() + ".bed"));
67+
output.addIntermediateFile(new File(plinkOut.getPath() + ".fam"));
68+
output.addIntermediateFile(new File(plinkOut.getPath() + ".bim"));
69+
output.addIntermediateFile(new File(plinkOut.getPath() + ".log"));
70+
output.addIntermediateFile(new File(plinkOut.getPath() + "-temporary.psam"));
71+
72+
PlinkPcaStep.PlinkWrapper plink = new PlinkPcaStep.PlinkWrapper(getPipelineCtx().getLogger());
73+
List<String> plinkArgs = new ArrayList<>();
74+
plinkArgs.add(plink.getExe().getPath());
75+
plinkArgs.add("--vcf");
76+
plinkArgs.add(inputVCF.getPath());
77+
78+
plinkArgs.add("--make-bed");
79+
80+
boolean limitToChromosomes = getProvider().getParameterByName("limitToChromosomes").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
81+
if (limitToChromosomes)
82+
{
83+
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(genome.getSequenceDictionary().toPath());
84+
List<String> toKeep = dict.getSequences().stream().filter(s -> {
85+
String name = StringUtils.replaceIgnoreCase(s.getSequenceName(), "^chr", "");
86+
87+
return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name);
88+
}).map(SAMSequenceRecord::getSequenceName).toList();
89+
90+
plinkArgs.add("--chr");
91+
plinkArgs.add(StringUtils.join(toKeep, ","));
92+
}
93+
else
94+
{
95+
plinkArgs.add("--allow-extra-chr");
96+
}
97+
98+
plinkArgs.add("--silent");
99+
100+
plinkArgs.add("--max-alleles");
101+
plinkArgs.add("2");
102+
103+
plinkArgs.add("--out");
104+
plinkArgs.add(plinkOut.getPath());
105+
106+
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
107+
if (threads != null)
108+
{
109+
plinkArgs.add("--threads");
110+
plinkArgs.add(threads.toString());
111+
}
112+
113+
//TODO: consider --memory (in MB)
114+
115+
plink.execute(plinkArgs);
116+
117+
File plinkOutBed = new File(plinkOut.getPath() + ".bed");
118+
if (!plinkOutBed.exists())
119+
{
120+
throw new PipelineJobException("Unable to find file: " + plinkOutBed.getPath());
121+
}
122+
123+
KingWrapper wrapper = new KingWrapper(getPipelineCtx().getLogger());
124+
wrapper.setWorkingDir(outputDirectory);
125+
126+
List<String> kingArgs = new ArrayList<>();
127+
kingArgs.add(wrapper.getExe().getPath());
128+
129+
kingArgs.add("-b");
130+
kingArgs.add(plinkOutBed.getPath());
131+
132+
kingArgs.add("--prefix");
133+
kingArgs.add(SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()));
134+
135+
if (threads != null)
136+
{
137+
kingArgs.add("--cpus");
138+
kingArgs.add(threads.toString());
139+
}
140+
141+
kingArgs.add("--kinship");
142+
143+
File kinshipOutput = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".kin");
144+
wrapper.execute(kingArgs);
145+
if (!kinshipOutput.exists())
146+
{
147+
throw new PipelineJobException("Unable to find file: " + kinshipOutput.getPath());
148+
}
149+
150+
File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt");
151+
if (kinshipOutputTxt.exists())
152+
{
153+
kinshipOutputTxt.delete();
154+
}
155+
156+
try
157+
{
158+
FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
159+
}
160+
catch (IOException e)
161+
{
162+
throw new PipelineJobException(e);
163+
}
164+
165+
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), null);
166+
167+
return output;
168+
}
169+
170+
public static class KingWrapper extends AbstractCommandWrapper
171+
{
172+
public KingWrapper(@Nullable Logger logger)
173+
{
174+
super(logger);
175+
}
176+
177+
public File getExe()
178+
{
179+
return SequencePipelineService.get().getExeForPackage("KINGPATH", "king");
180+
}
181+
}
182+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.io.PrintWriter;
4343
import java.util.ArrayList;
4444
import java.util.Arrays;
45+
import java.util.Collection;
4546
import java.util.HashMap;
4647
import java.util.HashSet;
4748
import java.util.List;
@@ -255,12 +256,13 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
255256
filter.addCondition(FieldKey.fromString("application"), allowableApplications, CompareType.IN);
256257
}
257258

258-
Set<String> applications = new HashSet<>(new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application"), filter, null).getArrayList(String.class));
259-
if (applications.size() == 1)
259+
Collection<Map<String, Object>> results = new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application", "rowid"), filter, null).getMapCollection();
260+
if (results.size() == 1)
260261
{
261-
writer.println(sample + "\t" + applications.iterator().next());
262+
Map<String, Object> row = results.iterator().next();
263+
writer.println(sample + "\t" + row.get("application") + "\t" + row.get("rowid"));
262264
}
263-
else if (applications.size() > 1)
265+
else if (results.size() > 1)
264266
{
265267
duplicates.add(sample);
266268
}

jbrowse/build.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,20 @@ dependencies {
2424
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:SequenceAnalysis", depProjectConfig: "published", depExtension: "module")
2525
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: BuildUtils.getPlatformModuleProjectPath(project.gradle, "pipeline"), depProjectConfig: "published", depExtension: "module")
2626
}
27+
28+
def jbPkgTask = project.tasks.named("npm_run_jb-pkg")
29+
30+
jbPkgTask.configure {
31+
outputs.cacheIf {false}
32+
33+
inputs.dir(project.file("./node_modules/@jbrowse/cli")).withPathSensitivity(PathSensitivity.RELATIVE)
34+
outputs.dir(project.file("./resources/external/jb-cli"))
35+
}
36+
37+
project.tasks.named("npm_run_build-prod").configure {
38+
finalizedBy jbPkgTask.get()
39+
}
40+
41+
project.tasks.named("npm_run_build-dev").configure {
42+
finalizedBy jbPkgTask.get()
43+
}

jbrowse/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"build": "npm run build-dev",
88
"start": "cross-env NODE_ENV=development LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack-dev-server --config config/watch.config.js",
99
"build-no-pkg": "npm run clean && cross-env NODE_ENV=development LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/dev.config.js --progress --profile",
10-
"build-dev": "npm run build-no-pkg && npm run jb-pkg",
11-
"build-prod": "npm run clean && cross-env NODE_ENV=production PROD_SOURCE_MAP=source-map LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/prod.config.js --progress --profile && npm run jb-pkg",
10+
"build-dev": "npm run build-no-pkg",
11+
"build-prod": "npm run clean && cross-env NODE_ENV=production PROD_SOURCE_MAP=source-map LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/prod.config.js --progress --profile",
1212
"clean": "rimraf resources/web/gen && rimraf resources/web/jbrowse/gen && rimraf resources/views/gen && rimraf resources/views/browser*",
1313
"test": "cross-env NODE_ENV=test jest",
1414
"jb-pkg": "pkg ./node_modules/@jbrowse/cli --out-path ./resources/external/jb-cli"

singlecell/resources/chunks/AvgExpression.R

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,45 @@
1+
GenerateAveragedData <- function(seuratObj, groupFields, addMetadata) {
2+
if (addMetadata && !'cDNA_ID' %in% names(seuratObj@meta.data)) {
3+
stop('A field names cDNA_ID must exist when addMetadata=TRUE')
4+
}
5+
6+
if (addMetadata && !'cDNA_ID' %in% groupFields) {
7+
stop('When addMetadata=TRUE, cDNA_ID must be part of groupFields')
8+
}
9+
10+
meta <- unique(seuratObj@meta.data[,groupFields, drop = F])
11+
rownames(meta) <- apply(meta, 1, function(y){
12+
return(paste0(y, collapse = '_'))
13+
})
14+
15+
Seurat::Idents(seuratObj) <- rownames(meta)
16+
17+
for (assayName in names(seuratObj@assays)) {
18+
if (!(!identical(seuratObj@assays[[assayName]]@counts, seuratObj@assays[[assayName]]@data))){
19+
print(paste0('Seurat assay', assayName, ' does not appear to be normalized, running now:'))
20+
seuratObj <- Seurat::NormalizeData(seuratObj, verbose = FALSE, assay = assayName)
21+
}
22+
}
23+
24+
a <- Seurat::AverageExpression(seuratObj, return.seurat = T, verbose = F)
25+
a <- Seurat::AddMetaData(a, meta)
26+
27+
totals <- seuratObj@meta.data %>% group_by_at(groupFields) %>% summarise(TotalCells = n())
28+
a$TotalCells <- totals$TotalCells
29+
30+
if (addMetadata) {
31+
a <- Rdiscvr::QueryAndApplyMetadataUsingCDNA(a)
32+
}
33+
34+
return(a)
35+
}
36+
137
for (datasetId in names(seuratObjects)) {
238
printName(datasetId)
339
seuratObj <- readRDS(seuratObjects[[datasetId]])
440

5-
df <- CellMembrane::AvgExpression(seuratObj, groupField = groupField)
6-
write.table(df, file = paste0(outputPrefix, '.', makeLegalFileName(datasetId), '.avg.', groupField, '.txt'), sep = '\t', row.names = FALSE, quote = FALSE)
7-
rm(df)
41+
seuratObj <- GenerateAveragedData(seuratObj, groupFields = groupFields, addMetadata = addMetadata)
42+
saveData(seuratObj, datasetId)
843

944
# Cleanup
1045
rm(seuratObj)

singlecell/resources/chunks/CalculateUCellScores.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ for (datasetId in names(seuratObjects)) {
33
seuratObj <- readRDS(seuratObjects[[datasetId]])
44

55
message(paste0('Loading dataset ', datasetId, ', with total cells: ', ncol(seuratObj)))
6-
seuratObj <- RIRA::CalculateUCellScores(seuratObj)
6+
seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName)
77

88
saveData(seuratObj, datasetId)
99

singlecell/resources/chunks/CiteSeqDimReduxDist.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ for (datasetId in names(seuratObjects)) {
22
printName(datasetId)
33
seuratObj <- readRDS(seuratObjects[[datasetId]])
44

5-
if (!('ADT' %in% names(seuratObj@assays))) {
6-
print('ADT assay not present, skipping')
5+
if (!(assayName %in% names(seuratObj@assays))) {
6+
print(paste0(assayName, ' assay not present, skipping'))
77
} else {
88
tryCatch({
99
seuratObj <- bindArgs(CellMembrane::CiteSeqDimRedux.Dist, seuratObj)()

singlecell/resources/chunks/CiteSeqDimReduxPca.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ for (datasetId in names(seuratObjects)) {
22
printName(datasetId)
33
seuratObj <- readRDS(seuratObjects[[datasetId]])
44

5-
if (!('ADT' %in% names(seuratObj@assays))) {
6-
print('ADT assay not present, skipping')
5+
if (!(assayName %in% names(seuratObj@assays))) {
6+
print(paste0(assayName, ' assay not present, skipping'))
77
} else {
88
seuratObj <- bindArgs(CellMembrane::CiteSeqDimRedux.PCA, seuratObj)()
99
}

0 commit comments

Comments
 (0)