Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions mGAP/resources/views/releaseNotes.html
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
<h4>Release 3.0:</h4>
<ul>
<li>This release involves a major change in the processing of variants. All prior releases omitted variants in complex or repetitive regions. We originally excluded these because genotypes can be less accurate; however, we made this change because some repetitive regions overlap coding regions and can contain valuable information. This also results in a significant increase in the total number of variants.</a></li>
<li>This is the first release to include a second species. The dataset now contains both Rhesus macaques and Japanese macaques (which are separated into a separate track).</li>
</ul>

<h4>Release 2.5:</h4>
<ul>
<li>This release includes a major change in how we perform variant annotation, <a href="mgap-annotation.view">described in more detail here.</a></li>
Expand Down
5 changes: 3 additions & 2 deletions mGAP/src/org/labkey/mgap/pipeline/AnnotationStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStepOutputImpl;
import org.labkey.api.sequenceanalysis.run.AbstractCommandPipelineStep;
import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.api.writer.PrintWriters;
Expand Down Expand Up @@ -327,8 +328,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
File liftoverRejects = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(currentVcf.getName()) + ".liftoverReject" + grch37Genome.getGenomeId() + ".vcf.gz");
if (!indexExists(liftoverRejects) || !indexExists(liftedToGRCh37))
{
LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(getPipelineCtx().getLogger());
liftoverVcfRunner.doLiftover(currentVcf, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95);
LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(getPipelineCtx().getLogger());
liftoverVcfRunner.doLiftover(currentVcf, chainFile, genome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37);
}
else
{
Expand Down
80 changes: 0 additions & 80 deletions mGAP/src/org/labkey/mgap/pipeline/LiftoverVcfRunner.java

This file was deleted.

23 changes: 13 additions & 10 deletions mGAP/src/org/labkey/mgap/pipeline/mGapReleaseGenerator.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
import org.labkey.api.data.TableSelector;
import org.labkey.api.exp.api.ExpData;
import org.labkey.api.exp.api.ExperimentService;
import org.labkey.api.jbrowse.JBrowseService;
import org.labkey.api.module.ModuleLoader;
import org.labkey.api.pipeline.PipelineJob;
import org.labkey.api.pipeline.PipelineJobException;
Expand All @@ -56,6 +55,7 @@
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor;
import org.labkey.api.sequenceanalysis.run.GeneToNameTranslator;
import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
import org.labkey.api.util.FileType;
import org.labkey.api.util.FileUtil;
Expand All @@ -77,7 +77,6 @@
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -723,7 +722,11 @@ private Double parseCadd(String cadd)
{
if (cadd.contains("|"))
{
return Arrays.stream(cadd.split("\\|")).map(Double::parseDouble).max(Double::compare).get();
return Arrays.stream(cadd.split("\\|")).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(null);
}
else if (".".equals(cadd))
{
return null;
}

return Double.parseDouble(cadd);
Expand Down Expand Up @@ -1020,7 +1023,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c

File sitesOnlyVcf = getSitesOnlyVcf(ctx, primaryTrackVcf, genome);

File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, grch37Genome);
File lifted = liftToHuman(ctx, primaryTrackVcf, sitesOnlyVcf, genome, grch37Genome);
SequenceOutputFile output3 = new SequenceOutputFile();
output3.setFile(lifted);
output3.setName("mGAP Release: " + species + " " + releaseVersion + " Lifted to Human");
Expand Down Expand Up @@ -1091,7 +1094,7 @@ private File getSitesOnlyVcf(JobContext ctx, File primaryTrackVcf, ReferenceGeno
return noGenotypes;
}

private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome grch37Genome) throws PipelineJobException
private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes, ReferenceGenome sourceGenome, ReferenceGenome grch37Genome) throws PipelineJobException
{
//lift to target genome
Integer chainFileId = ctx.getSequenceSupport().getCachedObject(AnnotationStep.CHAIN_FILE, Integer.class);
Expand All @@ -1104,8 +1107,8 @@ private File liftToHuman(JobContext ctx, File primaryTrackVcf, File noGenotypes,
File liftoverRejects = new File(ctx.getOutputDir(), SequenceAnalysisService.get().getUnzippedBaseName(primaryTrackVcf.getName()) + ".liftoverRejectGRCh37.vcf.gz");
if (!indexExists(liftoverRejects))
{
LiftoverVcfRunner liftoverVcfRunner = new LiftoverVcfRunner(ctx.getLogger());
liftoverVcfRunner.doLiftover(noGenotypes, chainFile, grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37, 0.95);
LiftoverBcfToolsWrapper liftoverVcfRunner = new LiftoverBcfToolsWrapper(ctx.getLogger());
liftoverVcfRunner.doLiftover(noGenotypes, chainFile, sourceGenome.getWorkingFastaFile(), grch37Genome.getWorkingFastaFile(), liftoverRejects, liftedToGRCh37);
}
else
{
Expand Down Expand Up @@ -1388,12 +1391,12 @@ private void inspectAndSummarizeVcf(JobContext ctx, File vcfInput, GeneToNameTra
try
{
String as = StringUtils.trimToNull(polyphenScores.get(alleleIdx));
if (as == null)
if (as == null || ".".equals(as))
{
continue;
}

Double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).map(Double::parseDouble).max(Double::compare).orElse(-1.0);
double maxScore = Arrays.stream(as.split("\\|")).filter(x -> !x.isEmpty()).filter(x -> !".".equals(x)).map(Double::parseDouble).max(Double::compare).orElse(-1.0);
if (maxScore == 0.0)
{
ctx.getLogger().error("Suspicious values for Polyphen2_HVAR_S: " + maxScore + ", at position: " + vc.toStringWithoutGenotypes());
Expand Down Expand Up @@ -1581,7 +1584,7 @@ else if (af != null)
cadd = Arrays.stream(String.valueOf(cadd).split("\\|")).map(x -> {
try
{
double y = Double.parseDouble(x);
Double.parseDouble(x);
}
catch (Exception e)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ public SequenceOutputProcessor getProcessor()
return new Processor();
}

public class Processor implements SequenceOutputProcessor
public static class Processor implements SequenceOutputProcessor
{
@Override
public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport
Map<String, SequenceOutputFile> fileMap = new HashMap<>();
for (SequenceOutputFile f : inputFiles)
{
action.addInputIfNotPresent(f.getFile(), "Methlylation Rates");
action.addInputIfNotPresent(f.getFile(), "Methylation Rates");
fileMap.put(f.getRowid().toString(), f);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.labkey.api.pipeline.RemoteExecutionEngine;
import org.labkey.api.query.FieldKey;
import org.labkey.api.security.UserManager;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.Job;
Expand Down Expand Up @@ -84,7 +85,7 @@ public void run(Logger log)
jobGuids.addAll(ts2.getArrayList(String.class));

JobRunner jr = JobRunner.getDefault();
for (RemoteExecutionEngine engine : PipelineJobService.get().getRemoteExecutionEngines())
for (RemoteExecutionEngine<?> engine : PipelineJobService.get().getRemoteExecutionEngines())
{
log.info("Starting maintenance task for: " + engine.getType());

Expand Down Expand Up @@ -158,6 +159,8 @@ public void run(Logger log)
//hacky, but this is only planned to be used by us
inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/workDir/"));
inspectFolder(log, new File("/home/exacloud/gscratch/prime-seq/cachedData/"));

runDockerPrune(log);
}

private void deleteDirectory(File child, Logger log)
Expand Down Expand Up @@ -230,6 +233,23 @@ private void inspectFolder(Logger log, File workDirBase)

}

private static void runDockerPrune(Logger log)
{
try
{
new SimpleScriptWrapper(log).execute(Arrays.asList(
SequencePipelineService.get().getDockerCommand(),
"system",
"prune",
"-f"
));
}
catch (PipelineJobException e)
{
_log.error("Error running docker prune", e);
}
}

public static class TestCase extends Assert
{
@Test
Expand Down