|
26 | 26 | import org.labkey.sequenceanalysis.pipeline.VariantProcessingJob; |
27 | 27 | import org.labkey.sequenceanalysis.run.util.AbstractGenomicsDBImportHandler; |
28 | 28 | import org.labkey.sequenceanalysis.run.util.GenotypeGVCFsWrapper; |
| 29 | +import org.labkey.sequenceanalysis.run.util.TabixRunner; |
29 | 30 | import org.labkey.sequenceanalysis.util.SequenceUtil; |
30 | 31 |
|
31 | 32 | import java.io.File; |
@@ -132,7 +133,11 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c |
132 | 133 | throw new PipelineJobException("Expected all intervals to start on the first base: " + i.toString()); |
133 | 134 | } |
134 | 135 |
|
135 | | - outputs.add(runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig())); |
| 136 | + File o = runPbsvCall(ctx, filesToProcess, genome, outputBaseName + (getVariantPipelineJob(ctx.getJob()).getIntervalsForTask().size() == 1 ? "" : "." + i.getContig()), i.getContig()); |
| 137 | + if (o != null) |
| 138 | + { |
| 139 | + outputs.add(o); |
| 140 | + } |
136 | 141 | } |
137 | 142 | } |
138 | 143 | else |
@@ -206,7 +211,39 @@ private File runPbsvCall(JobContext ctx, List<File> inputs, ReferenceGenome geno |
206 | 211 |
|
207 | 212 | args.add(genome.getWorkingFastaFile().getPath()); |
208 | 213 |
|
209 | | - inputs.forEach(f -> { |
| 214 | + // Check indexes to determine whether each sample actually includes this contig. This is mostly relevant for small contigs: |
| 215 | + List<File> samplesToUse = new ArrayList<>(); |
| 216 | + if (contig != null) |
| 217 | + { |
| 218 | + ctx.getLogger().info("Checking each input for usage of contig: " + contig); |
| 219 | + SimpleScriptWrapper runner = new SimpleScriptWrapper(ctx.getLogger()); |
| 220 | + TabixRunner tabix = new TabixRunner(ctx.getLogger()); |
| 221 | + |
| 222 | + for (File s : inputs) |
| 223 | + { |
| 224 | + String ret = runner.executeWithOutput(Arrays.asList("/bin/bash", "-c", tabix.getExe().getPath() + " -l '" + s.getPath() + "' | grep -e '" + contig + "' | wc -l")); |
| 225 | + if ("0".equals(ret)) |
| 226 | + { |
| 227 | + ctx.getLogger().info("Sample is missing contig: " + contig + ", skipping: " + s.getPath()); |
| 228 | + } |
| 229 | + else |
| 230 | + { |
| 231 | + samplesToUse.add(s); |
| 232 | + } |
| 233 | + } |
| 234 | + } |
| 235 | + else |
| 236 | + { |
| 237 | + samplesToUse = inputs; |
| 238 | + } |
| 239 | + |
| 240 | + if (samplesToUse.isEmpty()) |
| 241 | + { |
| 242 | + ctx.getLogger().info("No samples had data for contig: " + contig + ", skipping"); |
| 243 | + return null; |
| 244 | + } |
| 245 | + |
| 246 | + samplesToUse.forEach(f -> { |
210 | 247 | args.add(f.getPath()); |
211 | 248 | }); |
212 | 249 |
|
|
0 commit comments