Skip to content

Commit bb1ed8f

Browse files
committed
Compute kinship using both KING and plink's implementation of KING
1 parent 44b4875 commit bb1ed8f

File tree

1 file changed

+49
-15
lines changed

1 file changed

+49
-15
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java

Lines changed: 49 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,15 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
9090
plinkArgs.add("--vcf");
9191
plinkArgs.add(inputVCF.getPath());
9292

93-
plinkArgs.add("--make-bed");
94-
95-
// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
96-
plinkArgs.add("--maj-ref");
97-
9893
boolean limitToChromosomes = getProvider().getParameterByName("limitToChromosomes").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
9994
if (limitToChromosomes)
10095
{
10196
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(genome.getSequenceDictionary().toPath());
102-
List<String> toKeep = dict.getSequences().stream().filter(s -> {
103-
String name = StringUtils.replaceIgnoreCase(s.getSequenceName(), "^chr", "");
97+
List<String> toKeep = dict.getSequences().stream().map(SAMSequenceRecord::getSequenceName).filter(sequenceName -> {
98+
String name = StringUtils.replaceIgnoreCase(sequenceName, "^chr", "");
10499

105100
return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name);
106-
}).map(SAMSequenceRecord::getSequenceName).toList();
101+
}).toList();
107102

108103
if (toKeep.isEmpty())
109104
{
@@ -129,17 +124,20 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
129124
plinkArgs.add("--max-alleles");
130125
plinkArgs.add("2");
131126

132-
plinkArgs.add("--out");
133-
plinkArgs.add(plinkOut.getPath());
134-
135127
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
136128
if (threads != null)
137129
{
138130
plinkArgs.add("--threads");
139131
plinkArgs.add(threads.toString());
140132
}
141133

142-
//TODO: consider --memory (in MB)
134+
List<String> plinkArgs1 = new ArrayList<>(plinkArgs);
135+
plinkArgs1.add("--make-bed");
136+
137+
// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
138+
plinkArgs1.add("--maj-ref");
139+
plinkArgs1.add("--out");
140+
plinkArgs1.add(plinkOut.getPath());
143141

144142
plink.execute(plinkArgs);
145143

@@ -149,6 +147,41 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
149147
throw new PipelineJobException("Unable to find file: " + plinkOutBed.getPath());
150148
}
151149

150+
// Compute kinship with plink2:
151+
List<String> plinkArgs2 = new ArrayList<>(plinkArgs);
152+
plinkArgs2.add("--make-king-table");
153+
File plinkOutKing = new File(outputDirectory, "plinkKinship");
154+
plinkArgs2.add("--out");
155+
plinkArgs2.add(plinkOutKing.getPath());
156+
157+
plink.execute(plinkArgs1);
158+
159+
File plinkOutKingFile = new File(plinkOut.getPath() + ".kin0");
160+
if (!plinkOutKingFile.exists())
161+
{
162+
throw new PipelineJobException("Unable to find file: " + plinkOutKingFile.getPath());
163+
}
164+
165+
File plinkOutKingFileTxt = new File(plinkOutKingFile.getPath() + ".txt.gz");
166+
if (plinkOutKingFileTxt.exists())
167+
{
168+
plinkOutKingFileTxt.delete();
169+
}
170+
171+
long lineCount = SequencePipelineService.get().getLineCount(plinkOutKingFile)-1;
172+
try
173+
{
174+
Compress.compressGzip(plinkOutKingFile, plinkOutKingFileTxt);
175+
FileUtils.delete(plinkOutKingFile);
176+
}
177+
catch (IOException e)
178+
{
179+
throw new PipelineJobException(e);
180+
}
181+
182+
output.addSequenceOutput(plinkOutKingFileTxt, "PLINK2 Relatedness: " + inputVCF.getName(), "PLINK2 Kinship", null, null, genome.getGenomeId(), "Total lines: " + lineCount);
183+
184+
// Also with KING:
152185
KingWrapper wrapper = new KingWrapper(getPipelineCtx().getLogger());
153186
wrapper.setWorkingDir(outputDirectory);
154187

@@ -200,17 +233,18 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
200233
kinshipOutputTxt.delete();
201234
}
202235

236+
lineCount = SequencePipelineService.get().getLineCount(kinshipOutput)-1;
203237
try
204238
{
205-
kinshipOutput = Compress.compressGzip(kinshipOutput);
206-
FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
239+
Compress.compressGzip(kinshipOutput, kinshipOutputTxt);
240+
FileUtils.delete(kinshipOutput);
207241
}
208242
catch (IOException e)
209243
{
210244
throw new PipelineJobException(e);
211245
}
212246

213-
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), null);
247+
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), "Total lines: " + lineCount);
214248

215249
return output;
216250
}

0 commit comments

Comments
 (0)