Skip to content

Commit 45ef66b

Browse files
authored
Merge pull request #177 from BimberLab/22.11_fb_merge
Merge 22.7 to 22.11
2 parents afe8d08 + e02b6cc commit 45ef66b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+440
-103
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,7 @@ public static void registerPipelineSteps()
303303
SequencePipelineService.get().registerPipelineStep(new VariantsToTableStep.Provider());
304304
SequencePipelineService.get().registerPipelineStep(new VariantQCStep.Provider());
305305
SequencePipelineService.get().registerPipelineStep(new PlinkPcaStep.Provider());
306+
SequencePipelineService.get().registerPipelineStep(new KingInferenceStep.Provider());
306307
SequencePipelineService.get().registerPipelineStep(new MendelianViolationReportStep.Provider());
307308
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
308309

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisServiceImpl.java

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -253,16 +253,10 @@ public File ensureVcfIndex(File vcf, Logger log, boolean forceRecreate) throws I
253253
try
254254
{
255255
FileType gz = new FileType(".gz");
256-
File expected = new File(vcf.getPath() + FileExtensions.TRIBBLE_INDEX);
257-
File tbi = new File(vcf.getPath() + ".tbi");
258-
259-
if (!forceRecreate && expected.exists())
260-
{
261-
return expected;
262-
}
263-
else if (!forceRecreate && tbi.exists())
256+
File expectedIdx = gz.isType(vcf) ? new File(vcf.getPath() + ".tbi") : new File(vcf.getPath() + FileExtensions.TRIBBLE_INDEX);
257+
if (!forceRecreate && expectedIdx.exists())
264258
{
265-
return tbi;
259+
return expectedIdx;
266260
}
267261
else
268262
{
@@ -272,15 +266,23 @@ else if (!forceRecreate && tbi.exists())
272266
{
273267
TabixRunner r = new TabixRunner(log);
274268
r.execute(vcf);
269+
if (!expectedIdx.exists())
270+
{
271+
throw new PipelineJobException("Expected index was not created: " + expectedIdx.getPath());
272+
}
275273

276-
return tbi;
274+
return expectedIdx;
277275
}
278276
else
279277
{
280278
Index idx = IndexFactory.createDynamicIndex(vcf, new VCFCodec());
281279
idx.writeBasedOnFeatureFile(vcf);
280+
if (!expectedIdx.exists())
281+
{
282+
throw new PipelineJobException("Expected index was not created: " + expectedIdx.getPath());
283+
}
282284

283-
return expected;
285+
return expectedIdx;
284286
}
285287
}
286288
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/pipeline/ProcessVariantsHandler.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -677,8 +677,15 @@ private void processFile(File input, Integer libraryId, Integer readsetId, JobCo
677677
so1.setCreated(new Date());
678678
so1.setModified(new Date());
679679
so1.setReadset(readsetId);
680-
so1.setDescription("Total samples: " + sampleCount);
680+
String description = "Total samples: " + sampleCount;
681681

682+
String extraDescription = StringUtils.trimToNull(ctx.getParams().optString("jobDescription"));
683+
if (extraDescription != null)
684+
{
685+
description = description + '\n' + extraDescription;
686+
}
687+
688+
so1.setDescription(description);
682689
_resumer.getFileManager().addSequenceOutput(so1);
683690
}
684691
}
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
package org.labkey.sequenceanalysis.run.variant;
2+
3+
import htsjdk.samtools.SAMSequenceDictionary;
4+
import htsjdk.samtools.SAMSequenceRecord;
5+
import htsjdk.samtools.util.Interval;
6+
import htsjdk.variant.utils.SAMSequenceDictionaryExtractor;
7+
import org.apache.commons.io.FileUtils;
8+
import org.apache.commons.lang3.StringUtils;
9+
import org.apache.commons.lang3.math.NumberUtils;
10+
import org.apache.logging.log4j.Logger;
11+
import org.json.old.JSONObject;
12+
import org.labkey.api.pipeline.PipelineJobException;
13+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
14+
import org.labkey.api.sequenceanalysis.pipeline.AbstractVariantProcessingStepProvider;
15+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
16+
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
17+
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
18+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
19+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
20+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
21+
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
22+
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
23+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
24+
25+
import javax.annotation.Nullable;
26+
import java.io.File;
27+
import java.io.IOException;
28+
import java.util.ArrayList;
29+
import java.util.Arrays;
30+
import java.util.List;
31+
32+
public class KingInferenceStep extends AbstractCommandPipelineStep<KingInferenceStep.KingWrapper> implements VariantProcessingStep
33+
{
34+
public KingInferenceStep(PipelineStepProvider<?> provider, PipelineContext ctx)
35+
{
36+
super(provider, ctx, new KingInferenceStep.KingWrapper(ctx.getLogger()));
37+
}
38+
39+
public static class Provider extends AbstractVariantProcessingStepProvider<KingInferenceStep>
40+
{
41+
public Provider()
42+
{
43+
super("KingInferenceStep", "KING/Relatedness", "", "This will run KING to infer kinship from a VCF", Arrays.asList(
44+
ToolParameterDescriptor.create("limitToChromosomes", "Limit to Chromosomes", "If checked, the analysis will include only the primary chromosomes", "checkbox", new JSONObject(){{
45+
put("checked", true);
46+
}}, true)
47+
), null, "https://www.kingrelatedness.com/manual.shtml");
48+
}
49+
50+
@Override
51+
public KingInferenceStep create(PipelineContext ctx)
52+
{
53+
return new KingInferenceStep(this, ctx);
54+
}
55+
}
56+
57+
@Override
58+
public Output processVariants(File inputVCF, File outputDirectory, ReferenceGenome genome, @Nullable List<Interval> intervals) throws PipelineJobException
59+
{
60+
VariantProcessingStepOutputImpl output = new VariantProcessingStepOutputImpl();
61+
62+
output.addInput(inputVCF, "Input VCF");
63+
output.addInput(genome.getWorkingFastaFile(), "Reference Genome");
64+
65+
File plinkOut = new File(outputDirectory, "plink");
66+
output.addIntermediateFile(new File(plinkOut.getPath() + ".bed"));
67+
//output.addIntermediateFile(new File(plinkOut.getPath() + ".fam"));
68+
output.addIntermediateFile(new File(plinkOut.getPath() + ".bim"));
69+
output.addIntermediateFile(new File(plinkOut.getPath() + ".log"));
70+
output.addIntermediateFile(new File(plinkOut.getPath() + "-temporary.psam"));
71+
72+
PlinkPcaStep.PlinkWrapper plink = new PlinkPcaStep.PlinkWrapper(getPipelineCtx().getLogger());
73+
List<String> plinkArgs = new ArrayList<>();
74+
plinkArgs.add(plink.getExe().getPath());
75+
plinkArgs.add("--vcf");
76+
plinkArgs.add(inputVCF.getPath());
77+
78+
plinkArgs.add("--make-bed");
79+
80+
boolean limitToChromosomes = getProvider().getParameterByName("limitToChromosomes").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
81+
if (limitToChromosomes)
82+
{
83+
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(genome.getSequenceDictionary().toPath());
84+
List<String> toKeep = dict.getSequences().stream().filter(s -> {
85+
String name = StringUtils.replaceIgnoreCase(s.getSequenceName(), "^chr", "");
86+
87+
return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name);
88+
}).map(SAMSequenceRecord::getSequenceName).toList();
89+
90+
plinkArgs.add("--chr");
91+
plinkArgs.add(StringUtils.join(toKeep, ","));
92+
}
93+
94+
plinkArgs.add("--allow-extra-chr");
95+
plinkArgs.add("--silent");
96+
97+
plinkArgs.add("--max-alleles");
98+
plinkArgs.add("2");
99+
100+
plinkArgs.add("--out");
101+
plinkArgs.add(plinkOut.getPath());
102+
103+
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
104+
if (threads != null)
105+
{
106+
plinkArgs.add("--threads");
107+
plinkArgs.add(threads.toString());
108+
}
109+
110+
//TODO: consider --memory (in MB)
111+
112+
plink.execute(plinkArgs);
113+
114+
File plinkOutBed = new File(plinkOut.getPath() + ".bed");
115+
if (!plinkOutBed.exists())
116+
{
117+
throw new PipelineJobException("Unable to find file: " + plinkOutBed.getPath());
118+
}
119+
120+
KingWrapper wrapper = new KingWrapper(getPipelineCtx().getLogger());
121+
wrapper.setWorkingDir(outputDirectory);
122+
123+
List<String> kingArgs = new ArrayList<>();
124+
kingArgs.add(wrapper.getExe().getPath());
125+
126+
kingArgs.add("-b");
127+
kingArgs.add(plinkOutBed.getPath());
128+
129+
kingArgs.add("--prefix");
130+
kingArgs.add(SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()));
131+
132+
if (threads != null)
133+
{
134+
kingArgs.add("--cpus");
135+
kingArgs.add(threads.toString());
136+
}
137+
138+
kingArgs.add("--related");
139+
140+
File kinshipOutput = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".kin");
141+
wrapper.execute(kingArgs);
142+
if (!kinshipOutput.exists())
143+
{
144+
throw new PipelineJobException("Unable to find file: " + kinshipOutput.getPath());
145+
}
146+
147+
File kinshipOutputTxt = new File(kinshipOutput.getPath() + ".txt");
148+
if (kinshipOutputTxt.exists())
149+
{
150+
kinshipOutputTxt.delete();
151+
}
152+
153+
try
154+
{
155+
FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
156+
}
157+
catch (IOException e)
158+
{
159+
throw new PipelineJobException(e);
160+
}
161+
162+
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), null);
163+
164+
return output;
165+
}
166+
167+
public static class KingWrapper extends AbstractCommandWrapper
168+
{
169+
public KingWrapper(@Nullable Logger logger)
170+
{
171+
super(logger);
172+
}
173+
174+
public File getExe()
175+
{
176+
return SequencePipelineService.get().getExeForPackage("KINGPATH", "king");
177+
}
178+
}
179+
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/PlinkPcaStep.java

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import java.io.PrintWriter;
4343
import java.util.ArrayList;
4444
import java.util.Arrays;
45+
import java.util.Collection;
4546
import java.util.HashMap;
4647
import java.util.HashSet;
4748
import java.util.List;
@@ -255,12 +256,13 @@ public void init(PipelineJob job, SequenceAnalysisJobSupport support, List<Seque
255256
filter.addCondition(FieldKey.fromString("application"), allowableApplications, CompareType.IN);
256257
}
257258

258-
Set<String> applications = new HashSet<>(new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application"), filter, null).getArrayList(String.class));
259-
if (applications.size() == 1)
259+
Collection<Map<String, Object>> results = new TableSelector(QueryService.get().getUserSchema(getPipelineCtx().getJob().getUser(), targetContainer, SequenceAnalysisSchema.SCHEMA_NAME).getTable(SequenceAnalysisSchema.TABLE_READSETS), PageFlowUtil.set("application", "rowid"), filter, null).getMapCollection();
260+
if (results.size() == 1)
260261
{
261-
writer.println(sample + "\t" + applications.iterator().next());
262+
Map<String, Object> row = results.iterator().next();
263+
writer.println(sample + "\t" + row.get("application") + "\t" + row.get("rowid"));
262264
}
263-
else if (applications.size() > 1)
265+
else if (results.size() > 1)
264266
{
265267
duplicates.add(sample);
266268
}

jbrowse/build.gradle

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,20 @@ dependencies {
2424
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: ":server:modules:DiscvrLabKeyModules:SequenceAnalysis", depProjectConfig: "published", depExtension: "module")
2525
BuildUtils.addLabKeyDependency(project: project, config: "modules", depProjectPath: BuildUtils.getPlatformModuleProjectPath(project.gradle, "pipeline"), depProjectConfig: "published", depExtension: "module")
2626
}
27+
28+
def jbPkgTask = project.tasks.named("npm_run_jb-pkg")
29+
30+
jbPkgTask.configure {
31+
outputs.cacheIf {false}
32+
33+
inputs.dir(project.file("./node_modules/@jbrowse/cli")).withPathSensitivity(PathSensitivity.RELATIVE)
34+
outputs.dir(project.file("./resources/external/jb-cli"))
35+
}
36+
37+
project.tasks.named("npm_run_build-prod").configure {
38+
finalizedBy jbPkgTask.get()
39+
}
40+
41+
project.tasks.named("npm_run_build-dev").configure {
42+
finalizedBy jbPkgTask.get()
43+
}

jbrowse/package.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@
77
"build": "npm run build-dev",
88
"start": "cross-env NODE_ENV=development LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack-dev-server --config config/watch.config.js",
99
"build-no-pkg": "npm run clean && cross-env NODE_ENV=development LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/dev.config.js --progress --profile",
10-
"build-dev": "npm run build-no-pkg && npm run jb-pkg",
11-
"build-prod": "npm run clean && cross-env NODE_ENV=production PROD_SOURCE_MAP=source-map LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/prod.config.js --progress --profile && npm run jb-pkg",
10+
"build-dev": "npm run build-no-pkg",
11+
"build-prod": "npm run clean && cross-env NODE_ENV=production PROD_SOURCE_MAP=source-map LK_MODULE_CONTAINER=DiscvrLabKeyModules LK_MODULE=jbrowse webpack --config config/prod.config.js --progress --profile",
1212
"clean": "rimraf resources/web/gen && rimraf resources/web/jbrowse/gen && rimraf resources/views/gen && rimraf resources/views/browser*",
1313
"test": "cross-env NODE_ENV=test jest",
1414
"jb-pkg": "pkg ./node_modules/@jbrowse/cli --out-path ./resources/external/jb-cli"

singlecell/resources/chunks/AvgExpression.R

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,53 @@
1+
if (!file.exists('/homeDir/.netrc')) {
2+
print(list.files('/homeDir'))
3+
stop('Unable to find file: /homeDir/.netrc')
4+
}
5+
6+
invisible(Rlabkey::labkey.setCurlOptions(NETRC_FILE = '/homeDir/.netrc'))
7+
Rdiscvr::SetLabKeyDefaults(baseUrl = serverBaseUrl, defaultFolder = defaultLabKeyFolder)
8+
9+
GenerateAveragedData <- function(seuratObj, groupFields, addMetadata) {
10+
if (addMetadata && !'cDNA_ID' %in% names(seuratObj@meta.data)) {
11+
stop('A field names cDNA_ID must exist when addMetadata=TRUE')
12+
}
13+
14+
if (addMetadata && !'cDNA_ID' %in% groupFields) {
15+
stop('When addMetadata=TRUE, cDNA_ID must be part of groupFields')
16+
}
17+
18+
meta <- unique(seuratObj@meta.data[,groupFields, drop = F])
19+
rownames(meta) <- apply(meta, 1, function(y){
20+
return(paste0(y, collapse = '_'))
21+
})
22+
23+
Seurat::Idents(seuratObj) <- rownames(meta)
24+
25+
for (assayName in names(seuratObj@assays)) {
26+
if (!(!identical(seuratObj@assays[[assayName]]@counts, seuratObj@assays[[assayName]]@data))){
27+
print(paste0('Seurat assay', assayName, ' does not appear to be normalized, running now:'))
28+
seuratObj <- Seurat::NormalizeData(seuratObj, verbose = FALSE, assay = assayName)
29+
}
30+
}
31+
32+
a <- Seurat::AverageExpression(seuratObj, return.seurat = T, verbose = F)
33+
a <- Seurat::AddMetaData(a, meta)
34+
35+
totals <- seuratObj@meta.data %>% group_by_at(groupFields) %>% summarise(TotalCells = n())
36+
a$TotalCells <- totals$TotalCells
37+
38+
if (addMetadata) {
39+
a <- Rdiscvr::QueryAndApplyMetadataUsingCDNA(a)
40+
}
41+
42+
return(a)
43+
}
44+
145
for (datasetId in names(seuratObjects)) {
246
printName(datasetId)
347
seuratObj <- readRDS(seuratObjects[[datasetId]])
448

5-
df <- CellMembrane::AvgExpression(seuratObj, groupField = groupField)
6-
write.table(df, file = paste0(outputPrefix, '.', makeLegalFileName(datasetId), '.avg.', groupField, '.txt'), sep = '\t', row.names = FALSE, quote = FALSE)
7-
rm(df)
49+
seuratObj <- GenerateAveragedData(seuratObj, groupFields = groupFields, addMetadata = addMetadata)
50+
saveData(seuratObj, datasetId)
851

952
# Cleanup
1053
rm(seuratObj)

singlecell/resources/chunks/CalculateUCellScores.R

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ for (datasetId in names(seuratObjects)) {
33
seuratObj <- readRDS(seuratObjects[[datasetId]])
44

55
message(paste0('Loading dataset ', datasetId, ', with total cells: ', ncol(seuratObj)))
6-
seuratObj <- RIRA::CalculateUCellScores(seuratObj)
6+
seuratObj <- RIRA::CalculateUCellScores(seuratObj, storeRanks = storeRanks, assayName = assayName)
77

88
saveData(seuratObj, datasetId)
99

singlecell/resources/chunks/CiteSeqDimReduxDist.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ for (datasetId in names(seuratObjects)) {
22
printName(datasetId)
33
seuratObj <- readRDS(seuratObjects[[datasetId]])
44

5-
if (!('ADT' %in% names(seuratObj@assays))) {
6-
print('ADT assay not present, skipping')
5+
if (!(assayName %in% names(seuratObj@assays))) {
6+
print(paste0(assayName, ' assay not present, skipping'))
77
} else {
88
tryCatch({
99
seuratObj <- bindArgs(CellMembrane::CiteSeqDimRedux.Dist, seuratObj)()

0 commit comments

Comments
 (0)