Skip to content

Commit 7e67f1c

Browse files
committed
Merge discvr-24.11 to develop
2 parents 2a39d2b + d9a3e04 commit 7e67f1c

File tree

53 files changed

+831
-961
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

53 files changed

+831
-961
lines changed

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/JobResourceSettings.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
package org.labkey.api.sequenceanalysis.pipeline;
22

3+
import org.jetbrains.annotations.Nullable;
34
import org.labkey.api.data.Container;
45

6+
import java.io.File;
57
import java.util.Collection;
68
import java.util.List;
79

@@ -15,4 +17,9 @@ public interface JobResourceSettings
1517
List<ToolParameterDescriptor> getParams();
1618

1719
Collection<String> getDockerVolumes(Container c);
20+
21+
default @Nullable File inferDockerVolume(File input)
22+
{
23+
return null;
24+
}
1825
}

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/SequencePipelineService.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,12 @@ static public void setInstance(SequencePipelineService instance)
102102

103103
abstract public Collection<String> getDockerVolumes(Container c);
104104

105+
/**
106+
* The purpose of this method is to assist with translating from raw filepath to the desired volume to mount in a docker container.
107+
* This is mostly relevant for situations where the NFS root should be mounted, rather than a child folder.
108+
*/
109+
abstract public @Nullable File inferDockerVolume(File input);
110+
105111
abstract public List<File> getSequenceJobInputFiles(PipelineJob job);
106112

107113
/**

SequenceAnalysis/api-src/org/labkey/api/sequenceanalysis/pipeline/VariantProcessingStep.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,7 @@ default void validateScatter(ScatterGatherMethod method, PipelineJob job) throws
9797

9898
}
9999

100-
default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
100+
default void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
101101
{
102102
ctx.getLogger().debug("No additional merge tasks are implemented for: " + getClass().getName());
103103
}
Lines changed: 128 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package org.labkey.api.sequenceanalysis.run;
22

3-
import org.apache.commons.io.FileUtils;
43
import org.apache.commons.lang3.StringUtils;
54
import org.apache.logging.log4j.Logger;
5+
import org.jetbrains.annotations.Nullable;
66
import org.labkey.api.pipeline.PipelineJobException;
77
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
88
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
@@ -13,13 +13,24 @@
1313
import java.io.IOException;
1414
import java.io.PrintWriter;
1515
import java.util.Arrays;
16+
import java.util.Collection;
17+
import java.util.Collections;
18+
import java.util.HashMap;
19+
import java.util.HashSet;
1620
import java.util.List;
21+
import java.util.Map;
22+
import java.util.Set;
23+
import java.util.stream.Collectors;
1724

1825
public class DockerWrapper extends AbstractCommandWrapper
1926
{
2027
private final String _containerName;
2128
private final PipelineContext _ctx;
2229
private File _tmpDir = null;
30+
private String _entryPoint = null;
31+
private boolean _runPrune = true;
32+
private String _alternateUserHome = null;
33+
private final Map<String, String> _dockerEnvironment = new HashMap<>();
2334

2435
public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
2536
{
@@ -28,12 +39,32 @@ public DockerWrapper(String containerName, Logger log, PipelineContext ctx)
2839
_ctx = ctx;
2940
}
3041

42+
public void setAlternateUserHome(String alternateUserHome)
43+
{
44+
_alternateUserHome = alternateUserHome;
45+
}
46+
3147
public void setTmpDir(File tmpDir)
3248
{
3349
_tmpDir = tmpDir;
3450
}
3551

52+
public void setEntryPoint(String entryPoint)
53+
{
54+
_entryPoint = entryPoint;
55+
}
56+
57+
public void setRunPrune(boolean runPrune)
58+
{
59+
_runPrune = runPrune;
60+
}
61+
3662
public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker) throws PipelineJobException
63+
{
64+
executeWithDocker(containerArgs, workDir, tracker, null);
65+
}
66+
67+
public void executeWithDocker(List<String> containerArgs, File workDir, PipelineOutputTracker tracker, @Nullable Collection<File> inputFiles) throws PipelineJobException
3768
{
3869
File localBashScript = new File(workDir, "docker.sh");
3970
File dockerBashScript = new File(workDir, "dockerRun.sh");
@@ -45,70 +76,131 @@ public void executeWithDocker(List<String> containerArgs, File workDir, Pipeline
4576
{
4677
writer.println("#!/bin/bash");
4778
writer.println("set -x");
48-
writer.println("WD=`pwd`");
49-
writer.println("HOME=`echo ~/`");
79+
writer.println("set -e");
80+
5081
writer.println("DOCKER='" + SequencePipelineService.get().getDockerCommand() + "'");
51-
writer.println("sudo $DOCKER pull " + _containerName);
52-
writer.println("sudo $DOCKER run --rm=true \\");
53-
writer.println("\t-v \"${WD}:/work\" \\");
54-
writer.println("\t-v \"${HOME}:/homeDir\" \\");
55-
_ctx.getDockerVolumes().forEach(ln -> writer.println(ln + " \\"));
82+
writer.println("$DOCKER pull " + _containerName);
83+
if (_runPrune)
84+
{
85+
writer.println("$DOCKER image prune -f");
86+
}
87+
88+
writer.println("$DOCKER run --rm=true \\");
89+
writer.println("\t--group-add keep-groups \\");
90+
91+
// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
92+
File homeDir = new File(System.getProperty("user.home"));
93+
if (homeDir.exists())
94+
{
95+
if (_ctx.getDockerVolumes().stream().noneMatch(homeDir.getPath()::startsWith))
96+
{
97+
writer.println("\t-v '" + homeDir.getPath() + "':'" + homeDir.getPath() + "' \\");
98+
}
99+
else
100+
{
101+
_ctx.getLogger().debug("homeDir already present in docker volumes, will not re-add");
102+
}
103+
104+
_dockerEnvironment.put("USER_HOME", homeDir.getPath());
105+
}
106+
107+
if (_alternateUserHome != null)
108+
{
109+
_dockerEnvironment.put("HOME", _alternateUserHome);
110+
}
111+
112+
_ctx.getDockerVolumes().forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
113+
if (inputFiles != null)
114+
{
115+
inspectInputFiles(inputFiles).forEach(v -> writer.println("\t-v '" + v + "':'" + v + "' \\"));
116+
}
117+
56118
if (_tmpDir != null)
57119
{
58-
writer.println("\t-v \"" + _tmpDir.getPath() + ":/tmp\" \\");
120+
// NOTE: getDockerVolumes() should be refactored to remove the -v and this logic should be updated accordingly:
121+
if (_ctx.getDockerVolumes().stream().noneMatch(_tmpDir.getPath()::startsWith))
122+
{
123+
writer.println("\t-v '" + _tmpDir.getPath() + "':/tmp \\");
124+
}
125+
else
126+
{
127+
_ctx.getLogger().debug("tmpDir already present in docker volumes, omitting");
128+
}
129+
130+
addToDockerEnvironment("TMPDIR", _tmpDir.getPath());
131+
}
132+
133+
if (_entryPoint != null)
134+
{
135+
writer.println("\t--entrypoint \"" + _entryPoint + "\"\\");
59136
}
60-
writer.println("\t--entrypoint /bin/bash \\");
61-
writer.println("\t-w /work \\");
137+
138+
writer.println("\t-w " + workDir.getPath() + " \\");
139+
addToDockerEnvironment("WORK_DIR", workDir.getPath());
140+
62141
Integer maxRam = SequencePipelineService.get().getMaxRam();
63142
if (maxRam != null)
64143
{
65144
writer.println("\t-e SEQUENCEANALYSIS_MAX_RAM=" + maxRam + " \\");
66145
writer.println("\t--memory='" + maxRam + "g' \\");
67146
}
147+
148+
for (String key : _dockerEnvironment.keySet())
149+
{
150+
writer.println("\t-e " + key + "='" + _dockerEnvironment.get(key) + "' \\");
151+
}
68152
writer.println("\t" + _containerName + " \\");
69-
writer.println("\t/work/" + dockerBashScript.getName());
70-
writer.println("EXIT_CODE=$?");
71-
writer.println("echo 'Docker run exit code: '$EXIT_CODE");
72-
writer.println("exit $EXIT_CODE");
153+
writer.println("\t" + dockerBashScript.getPath());
154+
writer.println("DOCKER_EXIT_CODE=$?");
155+
writer.println("echo 'Docker run exit code: '$DOCKER_EXIT_CODE");
156+
writer.println("exit $DOCKER_EXIT_CODE");
73157

74158
dockerWriter.println("#!/bin/bash");
75159
dockerWriter.println("set -x");
76160
dockerWriter.println(StringUtils.join(containerArgs, " "));
77-
dockerWriter.println("EXIT_CODE=$?");
78-
dockerWriter.println("echo 'Exit code: '$?");
79-
dockerWriter.println("exit $EXIT_CODE");
161+
dockerWriter.println("BASH_EXIT_CODE=$?");
162+
dockerWriter.println("echo 'Bash exit code: '$BASH_EXIT_CODE");
163+
dockerWriter.println("exit $BASH_EXIT_CODE");
80164
}
81165
catch (IOException e)
82166
{
83167
throw new PipelineJobException(e);
84168
}
85169

170+
localBashScript.setExecutable(true);
171+
dockerBashScript.setExecutable(true);
86172
execute(Arrays.asList("/bin/bash", localBashScript.getPath()));
87173
}
88174

89-
public File ensureLocalCopy(File input, File workingDirectory, PipelineOutputTracker output) throws PipelineJobException
175+
public void addToDockerEnvironment(String key, String value)
90176
{
91-
try
177+
_dockerEnvironment.put(key, value);
178+
}
179+
180+
private Collection<File> inspectInputFiles(Collection<File> inputFiles)
181+
{
182+
Set<File> toAdd = inputFiles.stream().map(f -> f.isDirectory() ? f : f.getParentFile()).filter(x -> _ctx.getDockerVolumes().stream().noneMatch(x.getPath()::startsWith)).collect(Collectors.toSet());
183+
if (!toAdd.isEmpty())
92184
{
93-
if (workingDirectory.equals(input.getParentFile()))
94-
{
95-
return input;
96-
}
185+
Set<File> paths = new HashSet<>();
186+
toAdd.forEach(x -> {
187+
_ctx.getLogger().debug("Adding volume for path: " + x.getPath());
97188

98-
File local = new File(workingDirectory, input.getName());
99-
if (!local.exists())
100-
{
101-
getLogger().debug("Copying file locally: " + input.getPath());
102-
FileUtils.copyFile(input, local);
103-
}
189+
File converted = SequencePipelineService.get().inferDockerVolume(x);
190+
if (!x.equals(converted))
191+
{
192+
_ctx.getLogger().debug("added as: " + converted.getPath());
193+
}
104194

105-
output.addIntermediateFile(local);
195+
if (_ctx.getDockerVolumes().stream().noneMatch(converted.getPath()::startsWith))
196+
{
197+
paths.add(converted);
198+
}
199+
});
106200

107-
return local;
108-
}
109-
catch (IOException e)
110-
{
111-
throw new PipelineJobException(e);
201+
return paths;
112202
}
203+
204+
return Collections.emptySet();
113205
}
114206
}

SequenceAnalysis/build.gradle

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -194,20 +194,4 @@ if (project.findProject(BuildUtils.getTestProjectPath(project.gradle)) != null &
194194
<< "\ncontext.pipelineConfig=${configDir.getAbsolutePath().replace("\\", "\\\\")}"
195195
}
196196
}
197-
}
198-
199-
project.tasks.register("copyJars", Copy)
200-
{ CopySpec copy ->
201-
copy.group = "Build"
202-
copy.description = "Copy commons-math3 JAR to module's lib directory"
203-
204-
copy.setDuplicatesStrategy(DuplicatesStrategy.EXCLUDE)
205-
copy.from(project.configurations.external)
206-
copy.into new File("${project.labkey.explodedModuleLibDir}")
207-
copy.include {
208-
"**commons-math3-**.jar"
209-
}
210-
}
211-
212-
project.tasks.named('module').configure { dependsOn(project.tasks.copyJars) }
213-
project.tasks.named('copyJars').configure { mustRunAfter(project.tasks.populateExplodedLib) }
197+
}

SequenceAnalysis/pipeline_code/extra_tools_install.sh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,9 +188,12 @@ then
188188
unzip paragraph-v2.4a-binary.zip
189189
rm paragraph-v2.4a-binary.zip
190190

191+
python3 -m pip install pysam intervaltree
192+
191193
cd ../
192194
cp -R paragraph $LKTOOLS_DIR
193195
ln -s ${LKTOOLS_DIR}/paragraph/bin/paragraph ${LKTOOLS_DIR}/paragraph
196+
ln -s ${LKTOOLS_DIR}/paragraph/bin/idxdepth ${LKTOOLS_DIR}/idxdepth
194197
ln -s ${LKTOOLS_DIR}/paragraph/bin/multigrmpy.py ${LKTOOLS_DIR}/multigrmpy.py
195198
else
196199
echo "Already installed"
@@ -215,3 +218,14 @@ then
215218
else
216219
echo "Already installed"
217220
fi
221+
222+
if [[ ! -e ${LKTOOLS_DIR}/multiqc || ! -z $FORCE_REINSTALL ]];
223+
then
224+
echo "Cleaning up previous installs"
225+
rm -Rf multiqc*
226+
rm -Rf $LKTOOLS_DIR/multiqc*
227+
228+
python3 -m pip install --user multiqc
229+
else
230+
echo "Already installed"
231+
fi

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequenceAnalysisModule.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,7 @@
104104
import org.labkey.sequenceanalysis.run.alignment.StarWrapper;
105105
import org.labkey.sequenceanalysis.run.alignment.VulcanWrapper;
106106
import org.labkey.sequenceanalysis.run.analysis.BamIterator;
107+
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillFromFastaStep;
107108
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFillTagsStep;
108109
import org.labkey.sequenceanalysis.run.analysis.BcftoolsFixploidyStep;
109110
import org.labkey.sequenceanalysis.run.analysis.DeepVariantAnalysis;
@@ -365,6 +366,7 @@ public static void registerPipelineSteps()
365366
SequencePipelineService.get().registerPipelineStep(new SummarizeGenotypeQualityStep.Provider());
366367
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillTagsStep.Provider());
367368
SequencePipelineService.get().registerPipelineStep(new BcftoolsFixploidyStep.Provider());
369+
SequencePipelineService.get().registerPipelineStep(new BcftoolsFillFromFastaStep.Provider());
368370
SequencePipelineService.get().registerPipelineStep(new SVAnnotateStep.Provider());
369371

370372
//handlers

SequenceAnalysis/src/org/labkey/sequenceanalysis/SequencePipelineServiceImpl.java

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -472,16 +472,28 @@ public Collection<String> getDockerVolumes(Container c)
472472
{
473473
if (settings.isAvailable(c))
474474
{
475-
for (String volume : settings.getDockerVolumes(c))
476-
{
477-
volumeLines.add("-v '" + volume + "':'" + volume + "'");
478-
}
475+
return Collections.unmodifiableCollection(settings.getDockerVolumes(c));
479476
}
480477
}
481478

482479
return volumeLines;
483480
}
484481

482+
@Override
483+
public @Nullable File inferDockerVolume(File input)
484+
{
485+
for (JobResourceSettings settings : SequencePipelineServiceImpl.get().getResourceSettings())
486+
{
487+
File ret = settings.inferDockerVolume(input);
488+
if (ret != null)
489+
{
490+
return ret;
491+
}
492+
}
493+
494+
return input;
495+
}
496+
485497
@Override
486498
public List<File> getSequenceJobInputFiles(PipelineJob job)
487499
{
@@ -570,7 +582,7 @@ public void registerResourceSettings(JobResourceSettings settings)
570582
@Override
571583
public Set<JobResourceSettings> getResourceSettings()
572584
{
573-
return _resourceSettings;
585+
return Collections.unmodifiableSet(_resourceSettings);
574586
}
575587

576588
@Override

0 commit comments

Comments
 (0)