Skip to content

Commit 35b3ae0

Browse files
committed
Support sawfish --sample-csv arg
1 parent 689af3e commit 35b3ae0

File tree

3 files changed

+111
-8
lines changed

3 files changed

+111
-8
lines changed

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -326,10 +326,10 @@ then
326326
rm -Rf $LKTOOLS_DIR/sawfish*
327327

328328
wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.0.0/sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz
329-
tar -xzf sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz
329+
tar -xzf sawfish-v2.0.4-x86_64-unknown-linux-gnu.tar.gz
330330

331-
mv sawfish-v2.0.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/
332-
ln -s $LKTOOLS_DIR/sawfish-v2.0.0/bin/sawfish $LKTOOLS_DIR/
331+
mv sawfish-v2.0.4-x86_64-unknown-linux-gnu $LKTOOLS_DIR/
332+
ln -s $LKTOOLS_DIR/sawfish-v2.0.4/bin/sawfish $LKTOOLS_DIR/
333333
else
334334
echo "Already installed"
335335
fi

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishAnalysis.java

Lines changed: 62 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ public static class Provider extends AbstractAnalysisStepProvider<SawfishAnalysi
3333
{
3434
public Provider()
3535
{
36-
super("sawfish", "Sawfish Analysis", null, "This will run sawfish SV dicvoery and calling on the selected BAMs", List.of(), null, null);
36+
super("sawfish", "Sawfish Analysis", null, "This will run sawfish SV discovery and calling on the selected CRAMs/BAMs", List.of(), null, null);
3737
}
3838

3939

@@ -45,16 +45,38 @@ public SawfishAnalysis create(PipelineContext ctx)
4545
}
4646

4747
@Override
48-
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
48+
public Output performAnalysisPerSampleRemote(Readset rs, File inputBamOrCram, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
4949
{
5050
AnalysisOutputImpl output = new AnalysisOutputImpl();
5151

52+
File inputFile = inputBamOrCram;
53+
if (SequenceUtil.FILETYPE.cram.getFileType().isType(inputFile))
54+
{
55+
CramToBam samtoolsRunner = new CramToBam(getPipelineCtx().getLogger());
56+
File bam = new File(getPipelineCtx().getWorkingDirectory(), inputFile.getName().replaceAll(".cram$", ".bam"));
57+
File bamIdx = new File(bam.getPath() + ".bai");
58+
if (!bamIdx.exists())
59+
{
60+
samtoolsRunner.convert(inputFile, bam, referenceGenome.getWorkingFastaFile(), SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger()));
61+
new SamtoolsIndexer(getPipelineCtx().getLogger()).execute(bam);
62+
}
63+
else
64+
{
65+
getPipelineCtx().getLogger().debug("BAM index exists, will not re-convert CRAM");
66+
}
67+
68+
inputFile = bam;
69+
70+
output.addIntermediateFile(bam);
71+
output.addIntermediateFile(bamIdx);
72+
}
73+
5274
List<String> args = new ArrayList<>();
5375
args.add(getExe().getPath());
5476
args.add("discover");
5577

5678
args.add("--bam");
57-
args.add(inputBam.getPath());
79+
args.add(inputFile.getPath());
5880

5981
// NOTE: sawfish stores the absolute path of the FASTA in the output JSON, so dont rely on working copies:
6082
args.add("--ref");
@@ -102,4 +124,41 @@ private File getExe()
102124
{
103125
return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish");
104126
}
127+
128+
private static class CramToBam extends SamtoolsRunner
129+
{
130+
public CramToBam(Logger log)
131+
{
132+
super(log);
133+
}
134+
135+
public void convert(File inputCram, File outputBam, File fasta, @Nullable Integer threads) throws PipelineJobException
136+
{
137+
getLogger().info("Converting CRAM to BAM");
138+
139+
execute(getParams(inputCram, outputBam, fasta, threads));
140+
}
141+
142+
private List<String> getParams(File inputCram, File outputBam, File fasta, @Nullable Integer threads)
143+
{
144+
List<String> params = new ArrayList<>();
145+
params.add(getSamtoolsPath().getPath());
146+
params.add("view");
147+
params.add("-b");
148+
params.add("-T");
149+
params.add(fasta.getPath());
150+
params.add("-o");
151+
params.add(outputBam.getPath());
152+
153+
if (threads != null)
154+
{
155+
params.add("-@");
156+
params.add(String.valueOf(threads));
157+
}
158+
159+
params.add(inputCram.getPath());
160+
161+
return params;
162+
}
163+
}
105164
}

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/SawfishJointCallingHandler.java

Lines changed: 46 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,20 @@
11
package org.labkey.sequenceanalysis.run.analysis;
22

3+
import au.com.bytecode.opencsv.CSVWriter;
4+
import htsjdk.samtools.util.IOUtil;
35
import org.apache.commons.io.FileUtils;
46
import org.json.JSONObject;
7+
import org.labkey.api.exp.api.ExpData;
8+
import org.labkey.api.exp.api.ExpRun;
9+
import org.labkey.api.exp.api.ExperimentService;
510
import org.labkey.api.module.ModuleLoader;
611
import org.labkey.api.pipeline.PipelineJob;
712
import org.labkey.api.pipeline.PipelineJobException;
813
import org.labkey.api.pipeline.RecordedAction;
914
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
1015
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
1116
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
17+
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
1218
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1319
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
1420
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
@@ -66,6 +72,38 @@ public SequenceOutputProcessor getProcessor()
6672

6773
public static class Processor implements SequenceOutputProcessor
6874
{
75+
@Override
76+
public void init(JobContext ctx, List<SequenceOutputFile> inputFiles, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
77+
{
78+
try (CSVWriter csv = new CSVWriter(IOUtil.openFileForBufferedUtf8Writing(getSampleCsvFile(ctx))))
79+
{
80+
for (SequenceOutputFile so : inputFiles)
81+
{
82+
if (so.getRunId() == null)
83+
{
84+
throw new PipelineJobException("Unable to find ExperimentRun for: " + so.getRowid());
85+
}
86+
87+
ExpRun run = ExperimentService.get().getExpRun(so.getRunId());
88+
List<? extends ExpData> inputs = run.getInputDatas("Input BAM File", null);
89+
if (inputs.isEmpty())
90+
{
91+
throw new PipelineJobException("Unable to find input BAMs for: " + so.getRowid());
92+
}
93+
else if (inputs.size() > 1)
94+
{
95+
throw new PipelineJobException("More than one input BAM found for ExperimentRun: " + so.getRunId());
96+
}
97+
98+
csv.writeNext(new String[]{so.getFile().getParentFile().getPath(), inputs.get(0).getFile().getPath()});
99+
}
100+
}
101+
catch (IOException e)
102+
{
103+
throw new PipelineJobException(e);
104+
}
105+
}
106+
69107
@Override
70108
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
71109
{
@@ -89,8 +127,6 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
89127
outputBaseName = outputBaseName.replaceAll(".vcf$", "");
90128
}
91129

92-
File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");
93-
94130
File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName);
95131

96132
SequenceOutputFile so = new SequenceOutputFile();
@@ -102,6 +138,11 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
102138
ctx.addSequenceOutput(so);
103139
}
104140

141+
private File getSampleCsvFile(PipelineContext ctx)
142+
{
143+
return new File(ctx.getSourceDirectory(), "sawfish.samples.csv");
144+
}
145+
105146
private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException
106147
{
107148
if (inputs.isEmpty())
@@ -126,6 +167,9 @@ private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome g
126167
args.add(sample.getParentFile().getPath());
127168
}
128169

170+
args.add("--sample-csv");
171+
args.add(getSampleCsvFile(ctx).getPath());
172+
129173
File outDir = new File(ctx.getOutputDir(), "sawfish");
130174
args.add("--output-dir");
131175
args.add(outDir.getPath());

0 commit comments

Comments
 (0)