Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
59 commits
Select commit Hold shift + click to select a range
a09715d
Improve paragraph logging
bbimber Nov 21, 2024
74ee60a
Improve paragraph logging
bbimber Nov 21, 2024
0891dd9
Support bcftools fill-from-fasta
bbimber Nov 21, 2024
60c109d
Improve error handling in paragraph
bbimber Nov 21, 2024
cb7d5e1
Update CellRangerVDJWrapper to support CR9
bbimber Nov 22, 2024
caaeada
Drop sudo from docker
bbimber Nov 26, 2024
a79c0ec
Improve slurm sacct parsing
bbimber Nov 26, 2024
9c0f8c2
Fix BWA-Mem2 inheritance
bbimber Nov 27, 2024
c33a698
Update slurm memory logging
bbimber Nov 29, 2024
5f02031
Update slurm memory logging
bbimber Nov 29, 2024
b1289f4
Handle FileNotFoundException
bbimber Nov 29, 2024
55cc6f4
Attempt to remove copyJars() from SequenceAnalysis gradle build
bbimber Nov 29, 2024
00d3422
Merge pull request #306 from BimberLab/24.7_fb_copyJars
bbimber Nov 30, 2024
4a66ff3
Update ParagraphStep parameters
bbimber Nov 30, 2024
5049933
Set paragraph temp dir
bbimber Nov 30, 2024
bf2cb04
Drop UID from docker
bbimber Nov 30, 2024
dfae19f
Refactor nimble to use DockerWrapper
bbimber Dec 1, 2024
06d3358
Refactor nimble to use DockerWrapper
bbimber Dec 1, 2024
11401ed
Add entrypoint to ParagraphStep
bbimber Dec 1, 2024
d01899d
Append to log when logging slurm memory
bbimber Dec 2, 2024
9a4feeb
Pass tmpDir to nimble using environment
bbimber Dec 2, 2024
44aa8ba
Parse request memory and mem used for slurm jobs
bbimber Dec 2, 2024
c0af553
Option to run docker prune before jobs
bbimber Dec 2, 2024
d569189
Switch nimble to pass align-specific tempDir on the command line, rat…
bbimber Dec 2, 2024
8157f85
Add --group-add keep-groups to docker
bbimber Dec 2, 2024
753356c
Convert AbstractSingleCellPipelineStep to use DockerWrapper
bbimber Dec 3, 2024
edd587a
Signficiant refactor of docker in pipeline jobs to migrate everything…
bbimber Dec 3, 2024
4c56e4d
Missed with prior commit
bbimber Dec 3, 2024
b9a0139
Change handling of legacy docker volumes
bbimber Dec 3, 2024
d923842
Remove entrypoint from ParagraphStep
bbimber Dec 4, 2024
db9085c
Add spaces to DockerWrapper
bbimber Dec 4, 2024
d2a0fe9
Declare inputs in ParagraphStep
bbimber Dec 4, 2024
1d931db
Add quotes to environment vars in DockerWrapper
bbimber Dec 4, 2024
97a71bb
Set scripts executable
bbimber Dec 4, 2024
70d7361
Separate docker and process environments
bbimber Dec 4, 2024
f9e70c2
For docker
bbimber Dec 4, 2024
64f36fa
Call docker bash script directly
bbimber Dec 4, 2024
fb63b35
Null check to docker volumes
bbimber Dec 4, 2024
13a6719
Add debugging is leidenalg not found
bbimber Dec 4, 2024
2105d24
Jackson cannot serialize unmodifiableCollection
bbimber Dec 4, 2024
8af4f20
Jackson cannot serialize unmodifiableCollection
bbimber Dec 4, 2024
23c3a30
Improve leidenalg debugging
bbimber Dec 4, 2024
4501fe3
Add ability to conditionally use multiseq on large datasets
bbimber Dec 4, 2024
65db685
Expand duplicated prototype query
bbimber Dec 5, 2024
4caf4ac
Merge discvr-24.7 to discvr-24.11
bbimber Dec 5, 2024
e75375f
Make nimble output gzipped results file
bbimber Dec 5, 2024
e4b3625
Merge pull request #307 from BimberLab/24.11_fb_merge
bbimber Dec 5, 2024
ad8f951
Fix error in cell hashing
bbimber Dec 5, 2024
68205ae
Fix case in Sys.getenv()
bbimber Dec 5, 2024
d2485fc
Add placeholder for multiqc install
bbimber Dec 5, 2024
5cc7346
Fix SQL error
bbimber Dec 6, 2024
c959503
Merge discvr-24.7 to discvr-24.11
bbimber Dec 6, 2024
fb4d4c3
Merge pull request #308 from BimberLab/24.11_fb_merge
bbimber Dec 6, 2024
9224d02
Remove scMetabolism
bbimber Dec 7, 2024
121a5f8
Simplify merge VCF code
bbimber Dec 7, 2024
9a12e35
Allow KING to exclude contigs
bbimber Dec 11, 2024
db9dc60
Auto-expand JBrowse browser
bbimber Dec 12, 2024
d9a3e04
Add calculated column showing unique genomes used by readset
bbimber Dec 12, 2024
7e67f1c
Merge discvr-24.11 to develop
bbimber Dec 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package org.labkey.api.sequenceanalysis.pipeline;

import org.jetbrains.annotations.Nullable;
import org.labkey.api.data.Container;

import java.io.File;
import java.util.Collection;
import java.util.List;

Expand All @@ -15,4 +17,9 @@ public interface JobResourceSettings
List<ToolParameterDescriptor> getParams();

Collection<String> getDockerVolumes(Container c);

default @Nullable File inferDockerVolume(File input)
{
return null;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance)

abstract public Collection<String> getDockerVolumes(Container c);

/**
* The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container.
* This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder.
*/
abstract public @Nullable File inferDockerVolume(File input);

abstract public List<File> getSequenceJobInputFiles(PipelineJob job);

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ default void validateScatter(ScatterGatherMethod method, PipelineJob job) throws

}

default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
{
ctx.getLogger().debug("No additional merge tasks are implemented for: " + getClass().getName());
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package org.labkey.api.sequenceanalysis.run;

import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
import org.labkey.api.pipeline.PipelineJobException;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
Expand All @@ -13,13 +13,24 @@
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;

public class DockerWrapper extends AbstractCommandWrapper
{
private final String _containerName;
private final PipelineContext _ctx;
private File _tmpDir = null;
private String _entryPoint = null;
private boolean _runPrune = true;
private String _alternateUserHome = null;
private final Map<String, String> _dockerEnvironment = new HashMap<>();

public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
{
Expand All @@ -28,12 +39,32 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
_ctx = ctx;
}

public void setAlternateUserHome(String alternateUserHome)
{
_alternateUserHome = alternateUserHome;
}

public void setTmpDir(File tmpDir)
{
_tmpDir = tmpDir;
}

public void setEntryPoint(String entryPoint)
{
_entryPoint = entryPoint;
}

public void setRunPrune(boolean runPrune)
{
_runPrune = runPrune;
}

public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
{
executeWithDocker(containerArgs, workDir, tracker, null);
}

public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection<File> inputFiles) throws PipelineJobException
{
File localBashScript = new File(workDir, "docker.sh");
File dockerBashScript = new File(workDir, "dockerRun.sh");
Expand All @@ -45,70 +76,131 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
{
writer.println("#!/bin/bash");
writer.println("set -x");
writer.println("WD=`pwd`");
writer.println("HOME=`echo ~/`");
writer.println("set -e");

writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
writer.println("sudo $DOCKER pull " + _containerName);
writer.println("sudo $DOCKER run --rm=true \\");
writer.println("\t-v \"${WD}:/work\" \\");
writer.println("\t-v \"${HOME}:/homeDir\" \\");
_ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
writer.println("$DOCKER pull " + _containerName);
if (_runPrune)
{
writer.println("$DOCKER image prune -f");
}

writer.println("$DOCKER run --rm=true \\");
writer.println("\t--group-add keep-groups \\");

// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
File homeDir = new File(System.getProperty("user.home"));
if (homeDir.exists())
{
if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith))
{
writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\");
}
else
{
_ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add");
}

_dockerEnvironment.put("USER_HOME", homeDir.getPath());
}

if (_alternateUserHome != null)
{
_dockerEnvironment.put("HOME", _alternateUserHome);
}

_ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
if (inputFiles != null)
{
inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
}

if (_tmpDir != null)
{
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith))
{
writer.println("\t-v '" + _tmpDir.getPath() + "':/tmp \\");
}
else
{
_ctx.getLogger().debug("tmpDir already present in docker volumes, omitting");
}

addToDockerEnvironment("TMPDIR", _tmpDir.getPath());
}

if (_entryPoint != null)
{
writer.println("\t--entrypoint \"" + _entryPoint + "\"\\");
}
writer.println("\t--entrypoint /bin/bash \\");
writer.println("\t-w /work \\");

writer.println("\t-w " + workDir.getPath() + " \\");
addToDockerEnvironment("WORK_DIR", workDir.getPath());

Integer maxRam = SequencePipelineService.get().getMaxRam();
if (maxRam != null)
{
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
writer.println("\t--memory='" + maxRam + "g' \\");
}

for (String key : _dockerEnvironment.keySet())
{
writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\");
}
writer.println("\t" + _containerName + " \\");
writer.println("\t/work/" + dockerBashScript.getName());
writer.println("EXIT_CODE=$?");
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
writer.println("exit $EXIT_CODE");
writer.println("\t" + dockerBashScript.getPath());
writer.println("DOCKER_EXIT_CODE=$?");
writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE");
writer.println("exit $DOCKER_EXIT_CODE");

dockerWriter.println("#!/bin/bash");
dockerWriter.println("set -x");
dockerWriter.println(StringUtils.join(containerArgs, " "));
dockerWriter.println("EXIT_CODE=$?");
dockerWriter.println("echo 'Exit code: '$?");
dockerWriter.println("exit $EXIT_CODE");
dockerWriter.println("BASH_EXIT_CODE=$?");
dockerWriter.println("echo 'Bash exit code: '$BASH_EXIT_CODE");
dockerWriter.println("exit $BASH_EXIT_CODE");
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

localBashScript.setExecutable(true);
dockerBashScript.setExecutable(true);
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
}

public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
public void addToDockerEnvironment(String key, String value)
{
try
_dockerEnvironment.put(key, value);
}

private Collection<File> inspectInputFiles(Collection<File> inputFiles)
{
Set<File> toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet());
if (!toAdd.isEmpty())
{
if (workingDirectory.equals(input.getParentFile()))
{
return input;
}
Set<File> paths = new HashSet<>();
toAdd.forEach(x -> {
_ctx.getLogger().debug("Adding volume for path: " + x.getPath());

File local = new File(workingDirectory, input.getName());
if (!local.exists())
{
getLogger().debug("Copying file locally: " + input.getPath());
FileUtils.copyFile(input, local);
}
File converted = SequencePipelineService.get().inferDockerVolume(x);
if (!x.equals(converted))
{
_ctx.getLogger().debug("added as: " + converted.getPath());
}

output.addIntermediateFile(local);
if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith))
{
paths.add(converted);
}
});

return local;
}
catch (IOException e)
{
throw new PipelineJobException(e);
return paths;
}

return Collections.emptySet();
}
}
18 changes: 1 addition & 17 deletions SequenceAnalysis/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -194,20 +194,4 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null &
<< "\ncontext.pipelineConfig=${configDir.getAbsolutePath().replace("\\", "\\\\")}"
}
}
}

project.tasks.register("copyJars", Copy)
{ CopySpec copy ->
copy.group = "Build"
copy.description = "Copy commons-math3 JAR to module's lib directory"

copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE)
copy.from(project.configurations.external)
copy.into new File("${project.labkey.explodedModuleLibDir}")
copy.include {
"**commons-math3-**.jar"
}
}

project.tasks.named('module').configure { dependsOn(project.tasks.copyJars) }
project.tasks.named('copyJars').configure { mustRunAfter(project.tasks.populateExplodedLib) }
}
14 changes: 14 additions & 0 deletions SequenceAnalysis/pipeline_code/extra_tools_install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -188,9 +188,12 @@ then
unzip paragraph-v2.4a-binary.zip
rm paragraph-v2.4a-binary.zip

python3 -m pip install pysam intervaltree

cd ../
cp -R paragraph $LKTOOLS_DIR
ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph
ln -s ${LKTOOLS_DIR}/paragraph/bin/idxdepth ${LKTOOLS_DIR}/idxdepth
ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py
else
echo "Already installed"
Expand All @@ -215,3 +218,14 @@ then
else
echo "Already installed"
fi

if [[ ! -e ${LKTOOLS_DIR}/multiqc || ! -z $FORCE_REINSTALL ]];
then
echo "Cleaning up previous installs"
rm -Rf multiqc*
rm -Rf $LKTOOLS_DIR/multiqc*

python3 -m pip install --user multiqc
else
echo "Already installed"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillFromFastaStep;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep;
import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis;
Expand Down Expand Up @@ -365,6 +366,7 @@ public static void registerPipelineSteps()
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider());
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider());
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());

//handlers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -472,16 +472,28 @@ public Collection<String> getDockerVolumes(Container c)
{
if (settings.isAvailable(c))
{
for (String volume : settings.getDockerVolumes(c))
{
volumeLines.add("-v '" + volume + "':'" + volume + "'");
}
return Collections.unmodifiableCollection(settings.getDockerVolumes(c));
}
}

return volumeLines;
}

@Override
public @Nullable File inferDockerVolume(File input)
{
for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings())
{
File ret = settings.inferDockerVolume(input);
if (ret != null)
{
return ret;
}
}

return input;
}

@Override
public List<File> getSequenceJobInputFiles(PipelineJob job)
{
Expand Down Expand Up @@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings)
@Override
public Set<JobResourceSettings> getResourceSettings()
{
return _resourceSettings;
return Collections.unmodifiableSet(_resourceSettings);
}

@Override
Expand Down
Loading
Loading