Skip to content

Commit 55b3207

Browse files
committed
Support SVTyper
1 parent 11fddd3 commit 55b3207

File tree

3 files changed

+208
-0
lines changed

3 files changed

+208
-0
lines changed

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,21 @@ then
274274
else
275275
echo "Already installed"
276276
fi
277+
278+
if [[ ! -e ${LKTOOLS_DIR}/svtyper || ! -z $FORCE_REINSTALL ]];
279+
then
280+
echo "Cleaning up previous installs"
281+
rm -Rf $LKTOOLS_DIR/svtyper*
282+
283+
# NOTE: this fork is used to ensure python3 compatibility
284+
#python3 -m pip install --user git+https://github.com/hall-lab/svtyper.git
285+
python3 -m pip install --user git+https://github.com/bbimber/svtyper.git
286+
287+
SVTYPER=`which svtyper`
288+
ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper
289+
290+
SVTYPER=`which svtyper-sso`
291+
ln -s $SVTYPER ${LKTOOLS_DIR}/svtyper-sso
292+
else
293+
echo "Already installed"
294+
fi

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@
101101
import org.labkey.sequenceanalysis.run.alignment.MosaikWrapper;
102102
import org.labkey.sequenceanalysis.run.alignment.ParagraphStep;
103103
import org.labkey.sequenceanalysis.run.alignment.Pbmm2Wrapper;
104+
import org.labkey.sequenceanalysis.run.alignment.SVTyperStep;
104105
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
105106
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
106107
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
@@ -403,6 +404,7 @@ public static void registerPipelineSteps()
403404
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
404405
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
405406
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
407+
SequenceAnalysisService.get().registerFileHandler(new SVTyperStep());
406408
SequenceAnalysisService.get().registerFileHandler(new UpdateReadsetFilesHandler());
407409

408410
SequenceAnalysisService.get().registerReadsetHandler(new MultiQCHandler());
Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
package org.labkey.sequenceanalysis.run.alignment;
2+
3+
import org.apache.commons.io.FileUtils;
4+
import org.apache.commons.lang3.StringUtils;
5+
import org.json.JSONObject;
6+
import org.labkey.api.module.ModuleLoader;
7+
import org.labkey.api.pipeline.PipelineJob;
8+
import org.labkey.api.pipeline.PipelineJobException;
9+
import org.labkey.api.pipeline.RecordedAction;
10+
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
11+
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
12+
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
13+
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
14+
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
15+
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
16+
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
17+
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
18+
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
19+
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
20+
import org.labkey.sequenceanalysis.util.SequenceUtil;
21+
22+
import java.io.File;
23+
import java.io.IOException;
24+
import java.util.ArrayList;
25+
import java.util.Arrays;
26+
import java.util.List;
27+
28+
//https://github.com/hall-lab/svtyper
29+
30+
public class SVTyperStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
31+
{
32+
public SVTyperStep()
33+
{
34+
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "SVTyper SV Genotyping", "This will run SVTyper on one or more BAM files to genotype SVs", null, Arrays.asList(
35+
ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject()
36+
{{
37+
put("allowBlank", false);
38+
}}, null),
39+
ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{
40+
put("checked", true);
41+
}}, true)
42+
));
43+
}
44+
45+
@Override
46+
public boolean doSplitJobs()
47+
{
48+
return true;
49+
}
50+
51+
@Override
52+
public boolean canProcess(SequenceOutputFile o)
53+
{
54+
return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile());
55+
}
56+
57+
@Override
58+
public boolean doRunRemote()
59+
{
60+
return true;
61+
}
62+
63+
@Override
64+
public boolean doRunLocal()
65+
{
66+
return false;
67+
}
68+
69+
@Override
70+
public SequenceOutputProcessor getProcessor()
71+
{
72+
return new Processor();
73+
}
74+
75+
public static class Processor implements SequenceOutputProcessor
76+
{
77+
@Override
78+
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
79+
{
80+
81+
}
82+
83+
@Override
84+
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
85+
{
86+
int svVcfId = ctx.getParams().optInt("svVCF", 0);
87+
if (svVcfId == 0)
88+
{
89+
throw new PipelineJobException("svVCF param was null");
90+
}
91+
92+
File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId);
93+
if (svVcf == null)
94+
{
95+
throw new PipelineJobException("File not found for ID: " + svVcfId);
96+
}
97+
else if (!svVcf.exists())
98+
{
99+
throw new PipelineJobException("Missing file: " + svVcf.getPath());
100+
}
101+
102+
Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
103+
104+
for (SequenceOutputFile so : inputFiles)
105+
{
106+
List<String> jsonArgs = new ArrayList<>();
107+
SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger());
108+
jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath());
109+
jsonArgs.add("-b");
110+
jsonArgs.add(so.getFile().getPath());
111+
112+
File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json");
113+
jsonArgs.add("-l");
114+
jsonArgs.add(coverageJson.getPath());
115+
116+
117+
if (threads != null)
118+
{
119+
jsonArgs.add("--max_reads");
120+
jsonArgs.add(threads.toString());
121+
}
122+
123+
File doneFile = new File(ctx.getWorkingDirectory(), "json.done");
124+
ctx.getFileManager().addIntermediateFile(doneFile);
125+
if (doneFile.exists())
126+
{
127+
ctx.getLogger().info("BAM json already generated, skipping");
128+
}
129+
else
130+
{
131+
wrapper.execute(jsonArgs);
132+
try
133+
{
134+
FileUtils.touch(doneFile);
135+
ctx.getFileManager().addIntermediateFile(doneFile);
136+
}
137+
catch (IOException e)
138+
{
139+
throw new PipelineJobException(e);
140+
}
141+
}
142+
143+
if (!coverageJson.exists())
144+
{
145+
throw new PipelineJobException("Missing file: " + coverageJson.getPath());
146+
}
147+
ctx.getFileManager().addIntermediateFile(coverageJson);
148+
149+
List<String> svtyperArgs = new ArrayList<>();
150+
svtyperArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper-sso", null, true).getPath());
151+
152+
svtyperArgs.add("-i");
153+
svtyperArgs.add(svVcf.getPath());
154+
155+
svtyperArgs.add("-B");
156+
svtyperArgs.add(so.getFile().getPath());
157+
158+
svtyperArgs.add("-l");
159+
svtyperArgs.add(coverageJson.getPath());
160+
161+
if (threads != null)
162+
{
163+
svtyperArgs.add("--core");
164+
svtyperArgs.add(threads.toString());
165+
}
166+
167+
File genotypes = new File(ctx.getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".svtyper.vcf.gz");
168+
wrapper.execute(Arrays.asList("/bin/bash", "-c", StringUtils.join(svtyperArgs, " ") + "| bgzip -c"), ProcessBuilder.Redirect.to(genotypes));
169+
170+
if (!genotypes.exists())
171+
{
172+
throw new PipelineJobException("Missing file: " + genotypes.getPath());
173+
}
174+
175+
try
176+
{
177+
SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger());
178+
}
179+
catch (IOException e)
180+
{
181+
throw new PipelineJobException(e);
182+
}
183+
184+
ctx.getFileManager().addSequenceOutput(genotypes, "SVTyper Genotypes: " + so.getName(), "SVTyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")");
185+
}
186+
}
187+
}
188+
}

0 commit comments

Comments
 (0)