Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d3bb905
Add developer link to ShowUtilityActions
bbimber Jun 30, 2025
e39489a
Use remote API to load JBrowse search results (#339)
bbimber Jul 2, 2025
63a4474
Allow subset to use dplyr
bbimber Jul 2, 2025
5f7d8c3
Bugfix to vireo when using donor file
bbimber Jul 2, 2025
c83a8e9
Avoid dplyr loading
bbimber Jul 2, 2025
16e9682
Handle missing values
bbimber Jul 3, 2025
a45c0c0
Bugfix to FindClustersAndDimRedux
bbimber Jul 3, 2025
5a42760
Bugfix to FindClustersAndDimRedux
bbimber Jul 3, 2025
057d027
Update vireo args
bbimber Jul 9, 2025
77203ce
Update vireo args
bbimber Jul 9, 2025
392881b
Reduce logging
bbimber Jul 9, 2025
c57d7e7
Defer sorting of cellsnp VCF
bbimber Jul 9, 2025
ac24a99
More obvious link to ManageFileRootAction
bbimber Jul 9, 2025
fdfaab2
Report summary for vireo
bbimber Jul 9, 2025
05be6ca
Always re-cache lookups after clear
bbimber Jul 9, 2025
7382511
TSVs need tab
bbimber Jul 9, 2025
7d11ce9
Support sawfish
bbimber Jul 10, 2025
6b18016
Correct sawfish argument
bbimber Jul 10, 2025
23bf999
Provide sawfish with BAM input
bbimber Jul 11, 2025
311f538
Reduce logging
bbimber Jul 11, 2025
dccf4c7
Ensure BAM index exists
bbimber Jul 11, 2025
3c454e3
Bugfix to sawfish
bbimber Jul 11, 2025
a7abcb9
Bugfix to sawfish
bbimber Jul 11, 2025
6c3015b
Drop ref arg to sawfish
bbimber Jul 11, 2025
2914e9f
Allow sawfish to process CRAMs
bbimber Jul 11, 2025
3213c67
Remove unused code
bbimber Jul 22, 2025
6a3389a
Initial commit for JSON-based study definition (#336)
bbimber Jul 23, 2025
f522a76
Correct typo
bbimber Jul 23, 2025
6790e2d
Improve test logging
bbimber Jul 23, 2025
0b9c1cb
Resolve merge conflicts
bbimber Jul 31, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ abstract public class AbstractCommandWrapper implements CommandWrapper
private File _outputDir = null;
private File _workingDir = null;
private Logger _log;
private boolean _logPath = false;
private Level _logLevel = Level.DEBUG;
private boolean _warnNonZeroExits = true;
private boolean _throwNonZeroExits = true;
Expand Down Expand Up @@ -205,9 +206,11 @@ private void setPath(ProcessBuilder pb)
{
String path = System.getenv("PATH");

getLogger().debug("Existing PATH: " + path);
getLogger().debug("toolDir: " + toolDir);

if (_logPath)
{
getLogger().debug("Existing PATH: " + path);
getLogger().debug("toolDir: " + toolDir);
}

if (path == null)
{
Expand All @@ -229,11 +232,19 @@ private void setPath(ProcessBuilder pb)
path = fileExe.getParent() + File.pathSeparatorChar + path;
}

getLogger().debug("using path: " + path);
if (_logPath)
{
getLogger().debug("using path: " + path);
}
pb.environment().put("PATH", path);
}
}

public void setLogPath(boolean logPath)
{
_logPath = logPath;
}

public void setOutputDir(File outputDir)
{
_outputDir = outputDir;
Expand Down
14 changes: 14 additions & 0 deletions SequenceAnalysis/pipeline_code/extra_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -319,3 +319,17 @@ then
else
echo "Already installed"
fi

if [[ ! -e ${LKTOOLS_DIR}/sawfish || ! -z $FORCE_REINSTALL ]];
then
echo "Cleaning up previous installs"
rm -Rf $LKTOOLS_DIR/sawfish*

wget https://github.com/PacificBiosciences/sawfish/releases/download/v2.0.0/sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz
tar -xzf sawfish-v2.0.0-x86_64-unknown-linux-gnu.tar.gz

mv sawfish-v2.0.0-x86_64-unknown-linux-gnu $LKTOOLS_DIR/
ln -s $LKTOOLS_DIR/sawfish-v2.0.0/bin/sawfish $LKTOOLS_DIR/
else
echo "Already installed"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@
import org.labkey.sequenceanalysis.run.analysis.PbsvAnalysis;
import org.labkey.sequenceanalysis.run.analysis.PbsvJointCallingHandler;
import org.labkey.sequenceanalysis.run.analysis.PindelAnalysis;
import org.labkey.sequenceanalysis.run.analysis.SawfishAnalysis;
import org.labkey.sequenceanalysis.run.analysis.SawfishJointCallingHandler;
import org.labkey.sequenceanalysis.run.analysis.SequenceBasedTypingAnalysis;
import org.labkey.sequenceanalysis.run.analysis.SnpCountAnalysis;
import org.labkey.sequenceanalysis.run.analysis.SubreadAnalysis;
Expand Down Expand Up @@ -342,6 +344,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new PindelAnalysis.Provider());
SequencePipelineService.get().registerPipelineStep(new PbsvAnalysis.Provider());
SequencePipelineService.get().registerPipelineStep(new GenrichStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SawfishAnalysis.Provider());

SequencePipelineService.get().registerPipelineStep(new PARalyzerAnalysis.Provider());
SequencePipelineService.get().registerPipelineStep(new RnaSeQCStep.Provider());
Expand Down Expand Up @@ -400,6 +403,7 @@ public static void registerPipelineSteps()
SequenceAnalysisService.get().registerFileHandler(new NextCladeHandler());
SequenceAnalysisService.get().registerFileHandler(new ConvertToCramHandler());
SequenceAnalysisService.get().registerFileHandler(new PbsvJointCallingHandler());
SequenceAnalysisService.get().registerFileHandler(new SawfishJointCallingHandler());
SequenceAnalysisService.get().registerFileHandler(new DeepVariantHandler());
SequenceAnalysisService.get().registerFileHandler(new GLNexusHandler());
SequenceAnalysisService.get().registerFileHandler(new ParagraphStep());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
package org.labkey.sequenceanalysis.run.analysis;

import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.model.AnalysisModel;
import org.labkey.api.sequenceanalysis.model.Readset;
import org.labkey.api.sequenceanalysis.pipeline.AbstractAnalysisStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.AbstractPipelineStep;
import org.labkey.api.sequenceanalysis.pipeline.AnalysisOutputImpl;
import org.labkey.api.sequenceanalysis.pipeline.AnalysisStep;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineStepProvider;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsIndexer;
import org.labkey.api.sequenceanalysis.pipeline.SamtoolsRunner;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.sequenceanalysis.util.SequenceUtil;

import java.io.File;
import java.util.ArrayList;
import java.util.List;

public class SawfishAnalysis extends AbstractPipelineStep implements AnalysisStep
{
public SawfishAnalysis(PipelineStepProvider<?> provider, PipelineContext ctx)
{
super(provider, ctx);
}

public static class Provider extends AbstractAnalysisStepProvider<SawfishAnalysis>
{
public Provider()
{
super("sawfish", "Sawfish Analysis", null, "This will run sawfish SV dicvoery and calling on the selected BAMs", List.of(), null, null);
}


@Override
public SawfishAnalysis create(PipelineContext ctx)
{
return new SawfishAnalysis(this, ctx);
}
}

@Override
public Output performAnalysisPerSampleRemote(Readset rs, File inputBam, ReferenceGenome referenceGenome, File outputDir) throws PipelineJobException
{
AnalysisOutputImpl output = new AnalysisOutputImpl();

List<String> args = new ArrayList<>();
args.add(getExe().getPath());
args.add("discover");

args.add("--bam");
args.add(inputBam.getPath());

// NOTE: sawfish stores the absolute path of the FASTA in the output JSON, so dont rely on working copies:
args.add("--ref");
args.add(referenceGenome.getSourceFastaFile().getPath());

File svOutDir = new File(outputDir, "sawfish");
args.add("--output-dir");
args.add(svOutDir.getPath());

Integer maxThreads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
if (maxThreads != null)
{
args.add("--threads");
args.add(String.valueOf(maxThreads));
}

File bcf = new File(svOutDir, "candidate.sv.bcf");
File bcfIdx = new File(bcf.getPath() + ".csi");
if (bcfIdx.exists())
{
getPipelineCtx().getLogger().debug("BCF index already exists, reusing output");
}
else
{
new SimpleScriptWrapper(getPipelineCtx().getLogger()).execute(args);
}

if (!bcf.exists())
{
throw new PipelineJobException("Unable to find file: " + bcf.getPath());
}

output.addSequenceOutput(bcf, rs.getName() + ": sawfish", "Sawfish SV Discovery", rs.getReadsetId(), null, referenceGenome.getGenomeId(), null);

return output;
}

@Override
public Output performAnalysisPerSampleLocal(AnalysisModel model, File inputBam, File referenceFasta, File outDir) throws PipelineJobException
{
return null;
}

private File getExe()
{
return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
package org.labkey.sequenceanalysis.run.analysis;

import org.apache.commons.io.FileUtils;
import org.json.JSONObject;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.pipeline.RecordedAction;
import org.labkey.api.sequenceanalysis.SequenceAnalysisService;
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
import org.labkey.sequenceanalysis.util.SequenceUtil;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.stream.Collectors;

public class SawfishJointCallingHandler extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>
{
private static final String OUTPUT_CATEGORY = "Sawfish VCF";

public SawfishJointCallingHandler()
{
super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.NAME), "Sawfish Joint-Call", "Runs sawfish joint-call, which jointly calls SVs from PacBio CCS data", new LinkedHashSet<>(List.of("sequenceanalysis/panel/VariantScatterGatherPanel.js")), Arrays.asList(
ToolParameterDescriptor.create("fileName", "VCF Filename", "The name of the resulting file.", "textfield", new JSONObject(){{
put("allowBlank", false);
put("doNotIncludeInTemplates", true);
}}, null)
));
}

@Override
public boolean canProcess(SequenceOutputFile o)
{
return o.getFile() != null && SequenceUtil.FILETYPE.bcf.getFileType().isType(o.getFile());
}

@Override
public boolean doRunRemote()
{
return true;
}

@Override
public boolean doRunLocal()
{
return false;
}

@Override
public SequenceOutputProcessor getProcessor()
{
return new Processor();
}

public static class Processor implements SequenceOutputProcessor
{
@Override
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
{

}

@Override
public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException
{
List<File> filesToProcess = inputFiles.stream().map(SequenceOutputFile::getFile).collect(Collectors.toList());

ReferenceGenome genome = ctx.getSequenceSupport().getCachedGenomes().iterator().next();
String outputBaseName = ctx.getParams().getString("fileName");
if (!outputBaseName.toLowerCase().endsWith(".gz"))
{
outputBaseName = outputBaseName.replaceAll(".gz$", "");
}

if (!outputBaseName.toLowerCase().endsWith(".vcf"))
{
outputBaseName = outputBaseName.replaceAll(".vcf$", "");
}

File expectedFinalOutput = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");

File ouputVcf = runSawfishCall(ctx, filesToProcess, genome, outputBaseName);

SequenceOutputFile so = new SequenceOutputFile();
so.setName("Sawfish call: " + outputBaseName);
so.setFile(ouputVcf);
so.setCategory(OUTPUT_CATEGORY);
so.setLibrary_id(genome.getGenomeId());

ctx.addSequenceOutput(so);
}

private File runSawfishCall(JobContext ctx, List<File> inputs, ReferenceGenome genome, String outputBaseName) throws PipelineJobException
{
if (inputs.isEmpty())
{
throw new PipelineJobException("No inputs provided");
}

List<String> args = new ArrayList<>();
args.add(getExe().getPath());
args.add("joint-call");

Integer maxThreads = SequencePipelineService.get().getMaxThreads(ctx.getLogger());
if (maxThreads != null)
{
args.add("--threads");
args.add(String.valueOf(maxThreads));
}

for (File sample : inputs)
{
args.add("--sample");
args.add(sample.getParentFile().getPath());
}

File outDir = new File(ctx.getOutputDir(), "sawfish");
args.add("--output-dir");
args.add(outDir.getPath());

new SimpleScriptWrapper(ctx.getLogger()).execute(args);

File vcfOut = new File(outDir, "genotyped.sv.vcf.gz");
if (!vcfOut.exists())
{
throw new PipelineJobException("Unable to find file: " + vcfOut.getPath());
}

File vcfOutFinal = new File(ctx.getOutputDir(), outputBaseName + ".vcf.gz");

try
{
if (vcfOutFinal.exists())
{
vcfOutFinal.delete();
}
FileUtils.moveFile(vcfOut, vcfOutFinal);

File targetIndex = new File(vcfOutFinal.getPath() + ".tbi");
if (targetIndex.exists())
{
targetIndex.delete();
}

File origIndex = new File(vcfOut.getPath() + ".tbi");
if (origIndex.exists())
{
FileUtils.moveFile(origIndex, targetIndex);
}
else
{
SequenceAnalysisService.get().ensureVcfIndex(vcfOutFinal, ctx.getLogger(), true);
}
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

return vcfOutFinal;
}

private File getExe()
{
return SequencePipelineService.get().getExeForPackage("SAWFISHPATH", "sawfish");
}
}
}
Loading