LabKey · bbimber · Dec 13, 2024 · Nov 25, 2024 · Nov 26, 2024 · Dec 1, 2024
diff --git a/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java b/mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
@@ -565,7 +565,7 @@ private Map<String, String> getSampleToAlias(File input) throws PipelineJobExcep
     }
 
     @Override
-    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
+    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
     {
         job.getLogger().info("Merging additional track VCFs");
         Map<String, List<String>> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true)));
@@ -609,7 +609,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
             so.setCategory(TRACK_CATEGORY);
             so.setLibrary_id(genome.getGenomeId());
             so.setDescription("mGAP track: " + trackName + ", total samples: " + trackToSamples.get(trackName).size());
-            manager.addSequenceOutput(so);
+            ctx.getFileManager().addSequenceOutput(so);
         }
 
         if (getAnnotationReferenceVcf() != null)
@@ -653,7 +653,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
             so.setCategory(TRACK_CATEGORY);
             so.setLibrary_id(genome.getGenomeId());
             so.setDescription("These are novel sites in mGAP v" + releaseVersion + " for " + species);
-            manager.addSequenceOutput(so);
+            ctx.getFileManager().addSequenceOutput(so);
         }
     }
 

diff --git a/mGAP/src/org/labkey/mgap/pipeline/GroupCompareStep.java b/mGAP/src/org/labkey/mgap/pipeline/GroupCompareStep.java
@@ -84,7 +84,7 @@ public GroupCompareStep create(PipelineContext ctx)
     }
 
     @Override
-    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
+    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
     {
         job.getLogger().info("Merging variant tables");
         List<File> toConcat = orderedScatterOutputs.stream().map(f -> {
@@ -138,7 +138,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
         so.setFile(combined);
         so.setCategory("Variant List");
         so.setLibrary_id(genome.getGenomeId());
-        manager.addSequenceOutput(so);
+        ctx.getFileManager().addSequenceOutput(so);
     }
 
     @Override

diff --git a/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseComparisonStep.java b/mGAP/src/org/labkey/mgap/pipeline/mGapReleaseComparisonStep.java
@@ -109,7 +109,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
     }
 
     @Override
-    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
+    public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
     {
         job.getLogger().info("Merging missing sites VCFs");
         List<File> toConcat = orderedScatterOutputs.stream().map(f -> {
@@ -142,6 +142,6 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
         so.setFile(combined);
         so.setCategory("Missing Sites VCF");
         so.setLibrary_id(genome.getGenomeId());
-        manager.addSequenceOutput(so);
+        ctx.getFileManager().addSequenceOutput(so);
     }
 }
diff --git a/mcc/package-lock.json b/mcc/package-lock.json
diff --git a/mcc/package.json b/mcc/package.json
@@ -23,7 +23,8 @@
     "react-dom": "^17.0.2",
     "react-tooltip": "^5.28.0",
     "tsv": "^0.2.0",
-    "uuid": "^10.0.0"
+    "uuid": "^10.0.0",
+    "google-palette": "^1.1.1"
   },
   "devDependencies": {
     "@labkey/build": "^7.7.1",

diff --git a/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx b/mcc/src/client/GeneticsPlot/GeneticsPlot.tsx
@@ -108,10 +108,34 @@ export function GeneticsPlot() {
         <>
         <ErrorBoundary>
             <div style={{paddingBottom: 20, maxWidth: 1000}}>
-                Population structure analysis using PCA is a helpful way to summarize the genetic relationships among animals in the MCC. The PCA results can be thought of as a simple type of genetic clustering - animals with more similar principal component loadings are more genetically similar. A more precise description of the relationship between two animals is provided by kinship coefficients – these are quantitative measures of relatedness that can be calculated by comparing two genomes, and interpreted using genealogical language, such as ‘parent-child’, ‘uncle-nephew’, ‘first cousins’, etc.
-            </div>
-            <div style={{paddingBottom: 20, maxWidth: 1000}}>
-                Whole genome sequencing was performed on each animal and genotypes were called with GATK haplotype caller. Principal components analysis was performed with GCTA (https://yanglab.westlake.edu.cn/software/gcta/#PCA) and kinship coefficients were calculated with KING (https://www.kingrelatedness.com/). Analyses were performed by Ric del Rosario (Broad Institute).
+                Over the past few years, the MCC team has been working on extracting, sequencing and analyzing DNA from
+                marmosets across the participating breeding centers. While we have deposited the raw sequence data for
+                578 marmosets on NCBI's Sequence Read Archive (SRA), we are excited to report that the MCC portal now
+                houses a call set with single nucleotide variants and short indels for over 800 individuals.
+                <p/>
+                The MCC genomic database is extensive, with each individual being genotype at millions of variants
+                across the genome. One way to summarize a large dataset can be done using Principal Component Analysis
+                (PCA). PCA is a technique used across disciplines (from astronomy to genomics) that reduces the
+                information in a multi-dimensional dataset to (fewer) principal components (PC) that retain overall
+                trends and patterns in the original data. Biologically, this could mean merging together two variants
+                that are always inherited together into just one PC, making the data easier to analyze while maintaining
+                its most important patterns. See the **Visualization with PCA** tab below.
+                <p/>
+                Although PCA is useful for broad-scale comparisons, it is not very useful when trying to distinguish
+                whether two individuals are siblings or first-cousins, for instance. For that, we have better statistics
+                that can describe the genetic relatedness between two individuals. We estimated genetic relatedness for
+                all pairs of individuals for which we have whole-genome data, and made these available under the
+                **Kinship** tab. There you will find the inferred relationships between pairs of individuals as well as
+                the calculated kinship coefficient, which is a quantitative measure of genetic relatedness
+                (see <a href="https://en.wikipedia.org/wiki/Coefficient_of_relationship#Kinship_coefficient">here</a> for more details).
+                <p/>
+                It is possible to explore the full MCC database of variants with a graphical interface by accessing the
+                **Genome Browser** tab. There you can, for example, visualize all the variants present in your gene of
+                interest by typing it's name in the search bar.
+                <p/>
+                The genetic analyses described here were performed by Karina Ray (ONPRC), Murillo Rodrigues (ONPRC), and
+                Ric del Rosario (Broad Institute). Please contact us at <a href="mailto:mcc@ohsu.edu">mcc@ohsu.edu</a> with any
+                questions.
             </div>
             <Box sx={{ borderBottom: 1, borderColor: 'divider' }}>
                 <Tabs value={value} onChange={handleChange} aria-label="basic tabs example">

diff --git a/mcc/src/client/GeneticsPlot/ScatterChart.tsx b/mcc/src/client/GeneticsPlot/ScatterChart.tsx
@@ -5,23 +5,14 @@ import {
     LineElement,
     Tooltip,
     Legend
-} from 'chart.js';
+} from 'chart.js'
+import palette from 'google-palette'
 
 import { Scatter } from 'react-chartjs-2';
 import React, { useEffect, useRef, useState } from 'react';
 
 ChartJS.register(LinearScale, PointElement, LineElement, Tooltip, Legend);
 
-const CHART_COLORS = [
-    'rgb(255, 99, 132)', //red
-    'rgb(255, 159, 64)', //orange
-    'rgb(255, 205, 86)', //yellow
-    'rgb(75, 192, 192)', //green
-    'rgb(54, 162, 235)', //blue
-    'rgb(153, 102, 255)', //purple
-    'rgb(201, 203, 207)' //grey
-]
-
 export default function ScatterChart(props: {data: any}) {
     const { data } = props;
 
@@ -39,11 +30,13 @@ export default function ScatterChart(props: {data: any}) {
     });
 
     const dataByColony = []
-    const uniqueColonies = [...new Set(collectedData.map(x => x.colony))]
+    const uniqueColonies = [...new Set(collectedData.map(x => String(x.colony)))]
+    const colors = palette(['Set1', 'qualitative'], uniqueColonies.length);
+
     uniqueColonies.forEach((colonyName : string, idx) => {
         dataByColony.push({
             label: colonyName,
-            backgroundColor: CHART_COLORS[idx],
+            backgroundColor: '#' + colors[idx],
             data: collectedData.filter(x => x.colony == colonyName)
         })
     })

diff --git a/primeseq/resources/external/comb-p/Dockerfile b/primeseq/resources/external/comb-p/Dockerfile
diff --git a/primeseq/resources/external/comb-p/combpWrapper.sh b/primeseq/resources/external/comb-p/combpWrapper.sh
diff --git a/primeseq/src/org/labkey/primeseq/PrimeseqModule.java b/primeseq/src/org/labkey/primeseq/PrimeseqModule.java
@@ -72,8 +72,6 @@ protected void init()
     @Override
     protected void doStartupAfterSpringConfig(ModuleContext moduleContext)
     {
-        SequencePipelineService.get().registerResourceSettings(new ExacloudResourceSettings());
-
         SystemMaintenance.addTask(new ClusterMaintenanceTask());
 
         ClusterService.get().registerResourceAllocator(new BlastPipelineJobResourceAllocator.Factory());
@@ -109,6 +107,8 @@ public PipelineStartup()
             }
             else
             {
+                SequencePipelineService.get().registerResourceSettings(new ExacloudResourceSettings());
+
                 SequencePipelineService.get().registerPipelineStep(new BismarkWrapper.Provider());
                 SequencePipelineService.get().registerPipelineStep(new BismarkWrapper.MethylationExtractorProvider());
 

diff --git a/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparisonHandler.java b/primeseq/src/org/labkey/primeseq/analysis/MethylationRateComparisonHandler.java
@@ -29,18 +29,19 @@
 import org.labkey.api.security.User;
 import org.labkey.api.sequenceanalysis.SequenceOutputFile;
 import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
+import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
 import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
 import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
 import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
 import org.labkey.api.sequenceanalysis.run.CommandWrapper;
+import org.labkey.api.sequenceanalysis.run.DockerWrapper;
 import org.labkey.api.util.FileType;
 import org.labkey.api.util.FileUtil;
 import org.labkey.api.util.Pair;
 import org.labkey.api.view.ActionURL;
 import org.labkey.api.writer.PrintWriters;
 import org.labkey.primeseq.PrimeseqModule;
-import org.labkey.primeseq.pipeline.CombpRunner;
 
 import java.io.BufferedReader;
 import java.io.File;
@@ -487,8 +488,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
                         Double seed = ctx.getParams().optDouble("seed", 0.05);
                         Integer step = ctx.getParams().optInt("step", 100);
 
-                        CombpRunner combp = new CombpRunner(ctx.getLogger());
-                        File outBed = combp.runCompP(finalOut, ctx.getOutputDir(), dist, seed, step);
+                        File outBed = runCompP(finalOut, ctx.getOutputDir(), dist, seed, step, ctx, ctx.getFileManager());
                         SequenceOutputFile so2 = new SequenceOutputFile();
                         so2.setName("Comb-p: " + ctx.getJob().getDescription());
                         so2.setDescription("Comb-p: " + jobDescription);
@@ -726,4 +726,36 @@ else if (i2.compareTo(il) > 0)
             }
         }
     }
+
+    private File runCompP(File inputBed, File outputDir, int dist, double seed, int stepSize, PipelineContext ctx, PipelineOutputTracker tracker) throws PipelineJobException
+    {
+        // See: https://github.com/bbimber/combpdocker
+        DockerWrapper wrapper = new DockerWrapper("bbimber/combpdocker", ctx.getLogger(), ctx);
+        File outputPrefix = new File(outputDir, FileUtil.getBaseName(inputBed) + ".combp");
+
+        List<String> args = new ArrayList<>();
+        args.add("comb-p");
+        args.add("pipeline");
+        args.add("-c");
+        args.add("5");
+        args.add("--dist");
+        args.add(String.valueOf(dist));
+        args.add("--step");
+        args.add(String.valueOf(stepSize));
+        args.add("--seed");
+        args.add(String.valueOf(seed));
+        args.add("--p");
+        args.add(outputPrefix.getPath());
+        args.add(inputBed.getPath());
+
+        wrapper.executeWithDocker(args, ctx.getWorkingDirectory(), tracker, Arrays.asList(inputBed));
+
+        File outputBed = new File(outputPrefix.getPath() + ".regions.bed");
+        if (!outputBed.exists())
+        {
+            throw new PipelineJobException("Unable to find expected output: " + outputBed.getPath());
+        }
+
+        return outputBed;
+    }
 }