Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,13 @@

import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.PipelineJobService;
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
* User: bimber
Expand All @@ -22,4 +26,32 @@ public static File getBcfToolsPath()
{
return SequencePipelineService.get().getExeForPackage("BCFTOOLSPATH", "bcftools");
}

public static boolean isBcftoolsFound()
{
return BcftoolsRunner.resolveFileInPath("bcftools", null, false) != null;
}

public void doIndex(File vcf) throws PipelineJobException
{
List<String> args = new ArrayList<>();
args.add(getBcfToolsPath().getAbsolutePath());
args.add("index");
args.add("-t");
args.add("-f");

if (!PipelineJobService.get().isWebServer())
{
Integer threads = SequencePipelineService.get().getMaxThreads(getLogger());
if (threads != null)
{
args.add("--threads");
args.add(String.valueOf(threads));
}
}

args.add(vcf.getAbsolutePath());

execute(args);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.SequenceDataProvider;
import org.labkey.api.sequenceanalysis.model.Readset;
import org.labkey.api.sequenceanalysis.pipeline.BcftoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsCramConverter;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
Expand Down Expand Up @@ -267,8 +268,16 @@ public File ensureVcfIndex(File vcf, Logger log, boolean forceRecreate) throws I
//note: there is a bug in htsjdk's index creation with gz inputs
if (gz.isType(vcf) && !SystemUtils.IS_OS_WINDOWS)
{
TabixRunner r = new TabixRunner(log);
r.execute(vcf);
// preferentially use bcftools since it supports multithreading:
if (BcftoolsRunner.isBcftoolsFound())
{
new BcftoolsRunner(log).doIndex(vcf);
}
else
{
new TabixRunner(log).execute(vcf);
}

if (!expectedIdx.exists())
{
throw new PipelineJobException("Expected index was not created: " + expectedIdx.getPath());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
boolean dropGenotypes = params.optBoolean("dropGenotypes", false);
boolean useBcfTools = params.optBoolean("useBcfTools", false);
boolean doNotRetainUnmapped = params.optBoolean("doNotRetainUnmapped", false);
if (doNotRetainUnmapped && !useBcfTools)
if (!doNotRetainUnmapped && !useBcfTools)
{
ctx.getLogger().debug("Picard LiftoverVcf requires an output file for rejected sites, so setting doNotRetainUnmapped to true");
doNotRetainUnmapped = true;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package org.labkey.sequenceanalysis.run.variant;

import org.apache.logging.log4j.Logger;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.run.AbstractGatk4Wrapper;
import org.labkey.api.writer.PrintWriters;

import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;

public class GatherVcfsCloudWrapper extends AbstractGatk4Wrapper
{
public GatherVcfsCloudWrapper(Logger log)
{
super(log);
}

public void gatherVcfs(File output, List<File> inputVcfs) throws PipelineJobException
{
List<String> args = new ArrayList<>(getBaseArgs("GatherVcfsCloud"));
args.add("-O");
args.add(output.getPath());

File argFile = new File(output.getParentFile(), "inputs.list");
try (PrintWriter writer = PrintWriters.getPrintWriter(argFile))
{
inputVcfs.forEach(f -> writer.println(f.getPath()));
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

args.add("-I");
args.add(argFile.getPath());

execute(args);

argFile.delete();

try
{
SequenceAnalysisService.get().ensureVcfIndex(output, getLogger());
}
catch (IOException e)
{
throw new PipelineJobException(e);
}
}
}
Loading