Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions mGAP/src/org/labkey/mgap/pipeline/GenerateMgapTracksStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -565,7 +565,7 @@ private Map<String, String> getSampleToAlias(File input) throws PipelineJobExcep
}

@Override
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
{
job.getLogger().info("Merging additional track VCFs");
Map<String, List<String>> trackToSamples = parseSampleMap(getSampleNameFile(getPipelineCtx().getSourceDirectory(true)));
Expand Down Expand Up @@ -609,7 +609,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
so.setCategory(TRACK_CATEGORY);
so.setLibrary_id(genome.getGenomeId());
so.setDescription("mGAP track: " + trackName + ", total samples: " + trackToSamples.get(trackName).size());
manager.addSequenceOutput(so);
ctx.getFileManager().addSequenceOutput(so);
}

if (getAnnotationReferenceVcf() != null)
Expand Down Expand Up @@ -653,7 +653,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
so.setCategory(TRACK_CATEGORY);
so.setLibrary_id(genome.getGenomeId());
so.setDescription("These are novel sites in mGAP v" + releaseVersion + " for " + species);
manager.addSequenceOutput(so);
ctx.getFileManager().addSequenceOutput(so);
}
}

Expand Down
4 changes: 2 additions & 2 deletions mGAP/src/org/labkey/mgap/pipeline/GroupCompareStep.java
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public GroupCompareStep create(PipelineContext ctx)
}

@Override
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
{
job.getLogger().info("Merging variant tables");
List<File> toConcat = orderedScatterOutputs.stream().map(f -> {
Expand Down Expand Up @@ -138,7 +138,7 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
so.setFile(combined);
so.setCategory("Variant List");
so.setLibrary_id(genome.getGenomeId());
manager.addSequenceOutput(so);
ctx.getFileManager().addSequenceOutput(so);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
}

@Override
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, TaskFileManager manager, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, PipelineJob job, ReferenceGenome genome, List<File> orderedScatterOutputs, List<String> orderedJobDirs) throws PipelineJobException
{
job.getLogger().info("Merging missing sites VCFs");
List<File> toConcat = orderedScatterOutputs.stream().map(f -> {
Expand Down Expand Up @@ -142,6 +142,6 @@ public void performAdditionalMergeTasks(SequenceOutputHandler.JobContext ctx, Pi
so.setFile(combined);
so.setCategory("Missing Sites VCF");
so.setLibrary_id(genome.getGenomeId());
manager.addSequenceOutput(so);
ctx.getFileManager().addSequenceOutput(so);
}
}
42 changes: 28 additions & 14 deletions mcc/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion mcc/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@
"react-dom": "^17.0.2",
"react-tooltip": "^5.28.0",
"tsv": "^0.2.0",
"uuid": "^10.0.0"
"uuid": "^10.0.0",
"google-palette": "^1.1.1"
},
"devDependencies": {
"@labkey/build": "^7.7.1",
Expand Down
32 changes: 28 additions & 4 deletions mcc/src/client/GeneticsPlot/GeneticsPlot.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -108,10 +108,34 @@ export function GeneticsPlot() {
<>
<ErrorBoundary>
<div style={{paddingBottom: 20, maxWidth: 1000}}>
Population structure analysis using PCA is a helpful way to summarize the genetic relationships among animals in the MCC. The PCA results can be thought of as a simple type of genetic clustering - animals with more similar principal component loadings are more genetically similar. A more precise description of the relationship between two animals is provided by kinship coefficients – these are quantitative measures of relatedness that can be calculated by comparing two genomes, and interpreted using genealogical language, such as ‘parent-child’, ‘uncle-nephew’, ‘first cousins’, etc.
</div>
<div style={{paddingBottom: 20, maxWidth: 1000}}>
Whole genome sequencing was performed on each animal and genotypes were called with GATK haplotype caller. Principal components analysis was performed with GCTA (https://yanglab.westlake.edu.cn/software/gcta/#PCA) and kinship coefficients were calculated with KING (https://www.kingrelatedness.com/). Analyses were performed by Ric del Rosario (Broad Institute).
Over the past few years, the MCC team has been working on extracting, sequencing and analyzing DNA from
marmosets across the participating breeding centers. While we have deposited the raw sequence data for
578 marmosets on NCBI's Sequence Read Archive (SRA), we are excited to report that the MCC portal now
houses a call set with single nucleotide variants and short indels for over 800 individuals.
<p/>
The MCC genomic database is extensive, with each individual being genotype at millions of variants
across the genome. One way to summarize a large dataset can be done using Principal Component Analysis
(PCA). PCA is a technique used across disciplines (from astronomy to genomics) that reduces the
information in a multi-dimensional dataset to (fewer) principal components (PC) that retain overall
trends and patterns in the original data. Biologically, this could mean merging together two variants
that are always inherited together into just one PC, making the data easier to analyze while maintaining
its most important patterns. See the **Visualization with PCA** tab below.
<p/>
Although PCA is useful for broad-scale comparisons, it is not very useful when trying to distinguish
whether two individuals are siblings or first-cousins, for instance. For that, we have better statistics
that can describe the genetic relatedness between two individuals. We estimated genetic relatedness for
all pairs of individuals for which we have whole-genome data, and made these available under the
**Kinship** tab. There you will find the inferred relationships between pairs of individuals as well as
the calculated kinship coefficient, which is a quantitative measure of genetic relatedness
(see <a href="https://en.wikipedia.org/wiki/Coefficient_of_relationship#Kinship_coefficient">here</a> for more details).
<p/>
It is possible to explore the full MCC database of variants with a graphical interface by accessing the
**Genome Browser** tab. There you can, for example, visualize all the variants present in your gene of
interest by typing it's name in the search bar.
<p/>
The genetic analyses described here were performed by Karina Ray (ONPRC), Murillo Rodrigues (ONPRC), and
Ric del Rosario (Broad Institute). Please contact us at <a href="mailto:mcc@ohsu.edu">mcc@ohsu.edu</a> with any
questions.
</div>
<Box sx={{ borderBottom: 1, borderColor: 'divider' }}>
<Tabs value={value} onChange={handleChange} aria-label="basic tabs example">
Expand Down
19 changes: 6 additions & 13 deletions mcc/src/client/GeneticsPlot/ScatterChart.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,14 @@ import {
LineElement,
Tooltip,
Legend
} from 'chart.js';
} from 'chart.js'
import palette from 'google-palette'

import { Scatter } from 'react-chartjs-2';
import React, { useEffect, useRef, useState } from 'react';

ChartJS.register(LinearScale, PointElement, LineElement, Tooltip, Legend);

const CHART_COLORS = [
'rgb(255, 99, 132)', //red
'rgb(255, 159, 64)', //orange
'rgb(255, 205, 86)', //yellow
'rgb(75, 192, 192)', //green
'rgb(54, 162, 235)', //blue
'rgb(153, 102, 255)', //purple
'rgb(201, 203, 207)' //grey
]

export default function ScatterChart(props: {data: any}) {
const { data } = props;

Expand All @@ -39,11 +30,13 @@ export default function ScatterChart(props: {data: any}) {
});

const dataByColony = []
const uniqueColonies = [...new Set(collectedData.map(x => x.colony))]
const uniqueColonies = [...new Set(collectedData.map(x => String(x.colony)))]
const colors = palette(['Set1', 'qualitative'], uniqueColonies.length);

uniqueColonies.forEach((colonyName : string, idx) => {
dataByColony.push({
label: colonyName,
backgroundColor: CHART_COLORS[idx],
backgroundColor: '#' + colors[idx],
data: collectedData.filter(x => x.colony == colonyName)
})
})
Expand Down
10 changes: 0 additions & 10 deletions primeseq/resources/external/comb-p/Dockerfile

This file was deleted.

21 changes: 0 additions & 21 deletions primeseq/resources/external/comb-p/combpWrapper.sh

This file was deleted.

4 changes: 2 additions & 2 deletions primeseq/src/org/labkey/primeseq/PrimeseqModule.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,6 @@ protected void init()
@Override
protected void doStartupAfterSpringConfig(ModuleContext moduleContext)
{
SequencePipelineService.get().registerResourceSettings(new ExacloudResourceSettings());

SystemMaintenance.addTask(new ClusterMaintenanceTask());

ClusterService.get().registerResourceAllocator(new BlastPipelineJobResourceAllocator.Factory());
Expand Down Expand Up @@ -109,6 +107,8 @@ public PipelineStartup()
}
else
{
SequencePipelineService.get().registerResourceSettings(new ExacloudResourceSettings());

SequencePipelineService.get().registerPipelineStep(new BismarkWrapper.Provider());
SequencePipelineService.get().registerPipelineStep(new BismarkWrapper.MethylationExtractorProvider());

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,19 @@
import org.labkey.api.security.User;
import org.labkey.api.sequenceanalysis.SequenceOutputFile;
import org.labkey.api.sequenceanalysis.pipeline.PipelineContext;
import org.labkey.api.sequenceanalysis.pipeline.PipelineOutputTracker;
import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport;
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper;
import org.labkey.api.sequenceanalysis.run.CommandWrapper;
import org.labkey.api.sequenceanalysis.run.DockerWrapper;
import org.labkey.api.util.FileType;
import org.labkey.api.util.FileUtil;
import org.labkey.api.util.Pair;
import org.labkey.api.view.ActionURL;
import org.labkey.api.writer.PrintWriters;
import org.labkey.primeseq.PrimeseqModule;
import org.labkey.primeseq.pipeline.CombpRunner;

import java.io.BufferedReader;
import java.io.File;
Expand Down Expand Up @@ -487,8 +488,7 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
Double seed = ctx.getParams().optDouble("seed", 0.05);
Integer step = ctx.getParams().optInt("step", 100);

CombpRunner combp = new CombpRunner(ctx.getLogger());
File outBed = combp.runCompP(finalOut, ctx.getOutputDir(), dist, seed, step);
File outBed = runCompP(finalOut, ctx.getOutputDir(), dist, seed, step, ctx, ctx.getFileManager());
SequenceOutputFile so2 = new SequenceOutputFile();
so2.setName("Comb-p: " + ctx.getJob().getDescription());
so2.setDescription("Comb-p: " + jobDescription);
Expand Down Expand Up @@ -726,4 +726,36 @@ else if (i2.compareTo(il) > 0)
}
}
}

private File runCompP(File inputBed, File outputDir, int dist, double seed, int stepSize, PipelineContext ctx, PipelineOutputTracker tracker) throws PipelineJobException
{
// See: https://github.com/bbimber/combpdocker
DockerWrapper wrapper = new DockerWrapper("bbimber/combpdocker", ctx.getLogger(), ctx);
File outputPrefix = new File(outputDir, FileUtil.getBaseName(inputBed) + ".combp");

List<String> args = new ArrayList<>();
args.add("comb-p");
args.add("pipeline");
args.add("-c");
args.add("5");
args.add("--dist");
args.add(String.valueOf(dist));
args.add("--step");
args.add(String.valueOf(stepSize));
args.add("--seed");
args.add(String.valueOf(seed));
args.add("--p");
args.add(outputPrefix.getPath());
args.add(inputBed.getPath());

wrapper.executeWithDocker(args, ctx.getWorkingDirectory(), tracker, Arrays.asList(inputBed));

File outputBed = new File(outputPrefix.getPath() + ".regions.bed");
if (!outputBed.exists())
{
throw new PipelineJobException("Unable to find expected output: " + outputBed.getPath());
}

return outputBed;
}
}
Loading
Loading