Skip to content
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -90,20 +90,15 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
plinkArgs.add("--vcf");
plinkArgs.add(inputVCF.getPath());

plinkArgs.add("--make-bed");

// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
plinkArgs.add("--maj-ref");

boolean limitToChromosomes = getProvider().getParameterByName("limitToChromosomes").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
if (limitToChromosomes)
{
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(genome.getSequenceDictionary().toPath());
List<String> toKeep = dict.getSequences().stream().filter(s -> {
String name = StringUtils.replaceIgnoreCase(s.getSequenceName(), "^chr", "");
List<String> toKeep = dict.getSequences().stream().map(SAMSequenceRecord::getSequenceName).filter(sequenceName -> {
String name = StringUtils.replaceIgnoreCase(sequenceName, "^chr", "");

return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name);
}).map(SAMSequenceRecord::getSequenceName).toList();
}).toList();

if (toKeep.isEmpty())
{
Expand All @@ -129,26 +124,64 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
plinkArgs.add("--max-alleles");
plinkArgs.add("2");

plinkArgs.add("--out");
plinkArgs.add(plinkOut.getPath());

Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
if (threads != null)
{
plinkArgs.add("--threads");
plinkArgs.add(threads.toString());
}

//TODO: consider --memory (in MB)
List<String> plinkArgs1 = new ArrayList<>(plinkArgs);
plinkArgs1.add("--make-bed");

plink.execute(plinkArgs);
// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
plinkArgs1.add("--maj-ref");
plinkArgs1.add("--out");
plinkArgs1.add(plinkOut.getPath());

plink.execute(plinkArgs1);

File plinkOutBed = new File(plinkOut.getPath() + ".bed");
if (!plinkOutBed.exists())
{
throw new PipelineJobException("Unable to find file: " + plinkOutBed.getPath());
}

// Compute kinship with plink2:
List<String> plinkArgs2 = new ArrayList<>(plinkArgs);
plinkArgs2.add("--make-king-table");
File plinkOutKing = new File(outputDirectory, "plinkKinship");
plinkArgs2.add("--out");
plinkArgs2.add(plinkOutKing.getPath());

plink.execute(plinkArgs2);

File plinkOutKingFile = new File(plinkOutKing.getPath() + ".kin0");
if (!plinkOutKingFile.exists())
{
throw new PipelineJobException("Unable to find file: " + plinkOutKingFile.getPath());
}

File plinkOutKingFileTxt = new File(plinkOutKingFile.getPath() + ".txt.gz");
if (plinkOutKingFileTxt.exists())
{
plinkOutKingFileTxt.delete();
}

long lineCount = SequencePipelineService.get().getLineCount(plinkOutKingFile)-1;
try
{
Compress.compressGzip(plinkOutKingFile, plinkOutKingFileTxt);
FileUtils.delete(plinkOutKingFile);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

output.addSequenceOutput(plinkOutKingFileTxt, "PLINK2 Relatedness: " + inputVCF.getName(), "PLINK2 Kinship", null, null, genome.getGenomeId(), "Total lines: " + lineCount);

// Also with KING:
KingWrapper wrapper = new KingWrapper(getPipelineCtx().getLogger());
wrapper.setWorkingDir(outputDirectory);

Expand All @@ -172,7 +205,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
}

File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"));
kingArgs.add("--ped");
kingArgs.add("--fam");
kingArgs.add(kingFam.getPath());

output.addIntermediateFile(kingFam);
Expand All @@ -184,7 +217,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
kingArgs.add(threads.toString());
}

kingArgs.add("--related");
kingArgs.add("--kinship");
kingArgs.add("--rplot");

File kinshipOutput = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".kin");
wrapper.execute(kingArgs);
Expand All @@ -199,17 +233,18 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
kinshipOutputTxt.delete();
}

lineCount = SequencePipelineService.get().getLineCount(kinshipOutput)-1;
try
{
kinshipOutput = Compress.compressGzip(kinshipOutput);
FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
Compress.compressGzip(kinshipOutput, kinshipOutputTxt);
FileUtils.delete(kinshipOutput);
}
catch (IOException e)
{
throw new PipelineJobException(e);
}

output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), null);
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), "Total lines: " + lineCount);

return output;
}
Expand Down
Loading