Skip to content

Commit 611dee6

Browse files
committed
Make pbsv check each index for usage of the contig, since pbsv fails when running over a contig where any sample lacks data
1 parent 630d6b6 commit 611dee6

File tree

1 file changed

+39
-2
lines changed

1 file changed

+39
-2
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/analysis/PbsvJointCallingHandler.java

Lines changed: 39 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob;
2727
import org.labkey.sequenceanalysis.run.util.AbstractGenomicsDBImportHandler;
2828
import org.labkey.sequenceanalysis.run.util.GenotypeGVCFsWrapper;
29+
import org.labkey.sequenceanalysis.run.util.TabixRunner;
2930
import org.labkey.sequenceanalysis.util.SequenceUtil;
3031

3132
import java.io.File;
@@ -132,7 +133,11 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
132133
throw new PipelineJobException("Expected all intervals to start on the first base: " + i.toString());
133134
}
134135

135-
outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig()));
136+
File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig());
137+
if (o != null)
138+
{
139+
outputs.add(o);
140+
}
136141
}
137142
}
138143
else
@@ -206,7 +211,39 @@ private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome geno
206211

207212
args.add(genome.getWorkingFastaFile().getPath());
208213

209-
inputs.forEach(f -> {
214+
// Check indexes to determine whether each sample actually includes this contig. This is mostly relevant for small contigs:
215+
List<File> samplesToUse = new ArrayList<>();
216+
if (contig != null)
217+
{
218+
ctx.getLogger().info("Checking each input for usage of contig: " + contig);
219+
SimpleScriptWrapper runner = new SimpleScriptWrapper(ctx.getLogger());
220+
TabixRunner tabix = new TabixRunner(ctx.getLogger());
221+
222+
for (File s : inputs)
223+
{
224+
String ret = runner.executeWithOutput(Arrays.asList("/bin/bash", "-c", tabix.getExe().getPath() + " -l '" + s.getPath() + "' | grep -e '" + contig + "' | wc -l"));
225+
if ("0".equals(ret))
226+
{
227+
ctx.getLogger().info("Sample is missing contig: " + contig + ", skipping: " + s.getPath());
228+
}
229+
else
230+
{
231+
samplesToUse.add(s);
232+
}
233+
}
234+
}
235+
else
236+
{
237+
samplesToUse = inputs;
238+
}
239+
240+
if (samplesToUse.isEmpty())
241+
{
242+
ctx.getLogger().info("No samples had data for contig: " + contig + ", skipping");
243+
return null;
244+
}
245+
246+
samplesToUse.forEach(f -> {
210247
args.add(f.getPath());
211248
});
212249

0 commit comments

Comments
 (0)