Skip to content

Commit e98e5ea

Browse files
committed
Add tabix indexing to pacbio
1 parent 048afbf commit e98e5ea

File tree

3 files changed

+27
-34
lines changed

3 files changed

+27
-34
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvAnalysis.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
package org.labkey.sequenceanalysis.run.analysis;
22

3-
import org.json.JSONObject;
43
import org.labkey.api.pipeline.PipelineJobException;
54
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
65
import org.labkey.api.sequenceanalysis.model.Readset;
@@ -12,9 +11,9 @@
1211
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
1312
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
1413
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
15-
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
1614
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
1715
import org.labkey.api.util.FileUtil;
16+
import org.labkey.sequenceanalysis.run.util.TabixRunner;
1817

1918
import java.io.File;
2019
import java.util.ArrayList;
@@ -66,6 +65,17 @@ public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, Referenc
6665

6766
output.addSequenceOutput(svOut, rs.getName() + ": pbsv", "PBSV Output", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);
6867

68+
// Ensure we create index:
69+
TabixRunner tabix = new TabixRunner(getPipelineCtx().getLogger());
70+
List<String> args2 = Arrays.asList(tabix.getExe().getPath(), "-f", "-s", "3", "-b", "4", "-e", "4", "-c", "#", svOut.getPath());
71+
tabix.execute(args2);
72+
73+
File idx = new File(svOut.getPath() + ".tbi");
74+
if (!idx.exists())
75+
{
76+
throw new PipelineJobException("Missing index: " + idx.getPath());
77+
}
78+
6979
return output;
7080
}
7181

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java

Lines changed: 13 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -25,14 +25,17 @@
2525
import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
2626
import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob;
2727
import org.labkey.sequenceanalysis.run.util.AbstractGenomicsDBImportHandler;
28+
import org.labkey.sequenceanalysis.run.util.GenotypeGVCFsWrapper;
2829
import org.labkey.sequenceanalysis.util.SequenceUtil;
2930

3031
import java.io.File;
3132
import java.io.IOException;
3233
import java.util.ArrayList;
3334
import java.util.Arrays;
35+
import java.util.HashSet;
3436
import java.util.LinkedHashSet;
3537
import java.util.List;
38+
import java.util.Set;
3639
import java.util.stream.Collectors;
3740

3841
public class PbsvJointCallingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor> implements SequenceOutputHandler.TracksVCF, VariantProcessingStep.SupportsScatterGather, VariantProcessingStep.MayRequirePrepareTask
@@ -94,37 +97,16 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
9497
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
9598
{
9699
List<File> inputs = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toList());
100+
Set<File> toDelete = new HashSet<>();
101+
List<File> filesToProcess = new ArrayList<>();
97102
if (doCopyLocal(ctx.getParams()))
98103
{
99-
ctx.getLogger().info("Copying inputs locally");
100-
try
101-
{
102-
List<File> copiedInputs = new ArrayList<>();
103-
for (File f : inputs)
104-
{
105-
File copied = new File(ctx.getWorkingDirectory(), f.getName());
106-
if (copiedInputs.contains(copied))
107-
{
108-
throw new PipelineJobException("Duplicate input filenames, cannot use with copyLocally option: " + copied.getName());
109-
}
110-
111-
if (copied.exists())
112-
{
113-
copied.delete();
114-
}
115-
116-
FileUtils.copyFile(f, copied);
117-
copiedInputs.add(copied);
118-
119-
ctx.getFileManager().addIntermediateFile(copied);
120-
}
121-
122-
inputs = copiedInputs;
123-
}
124-
catch (IOException e)
125-
{
126-
throw new PipelineJobException(e);
127-
}
104+
ctx.getLogger().info("making local copies of svsig files");
105+
filesToProcess.addAll(GenotypeGVCFsWrapper.copyVcfsLocally(ctx, inputs, toDelete, false));
106+
}
107+
else
108+
{
109+
filesToProcess.addAll(inputs);
128110
}
129111

130112
ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenomes().iterator().next();
@@ -150,12 +132,12 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
150132
throw new PipelineJobException("Expected all intervals to start on the first base: " + i.toString());
151133
}
152134

153-
outputs.add(runPbsvCall(ctx, inputs, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig()));
135+
outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig()));
154136
}
155137
}
156138
else
157139
{
158-
outputs.add(runPbsvCall(ctx, inputs, genome, outputBaseName, null));
140+
outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName, null));
159141
}
160142

161143
File vcfOutGz;

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/util/GenotypeGVCFsWrapper.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,8 @@ public static List<File> copyVcfsLocally(SequenceOutputHandler.JobContext ctx, C
161161
File movedIdx = null;
162162
File doneFile = new File(localWorkDir, destFile.getName() + ".copyDone");
163163

164-
if (GVCF.isType(f))
164+
// gVCF or Pacbio:
165+
if (GVCF.isType(f) || f.getName().toLowerCase().endsWith("svsig.gz"))
165166
{
166167
origIdx = new File(f.getPath() + ".tbi");
167168
if (!origIdx.exists())

0 commit comments

Comments
 (0)