Skip to content

Commit cb8a8ac

Browse files
authored
Merge pull request #335 from LabKey/fb_merge_24.11_to_develop
Merge discvr-24.11 to develop
2 parents a5a396f + 4238fee commit cb8a8ac

File tree

6 files changed

+1261
-1918
lines changed

6 files changed

+1261
-1918
lines changed

SequenceAnalysis/src/org/labkey/sequenceanalysis/run/variant/KingInferenceStep.java

Lines changed: 53 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -90,20 +90,15 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
9090
plinkArgs.add("--vcf");
9191
plinkArgs.add(inputVCF.getPath());
9292

93-
plinkArgs.add("--make-bed");
94-
95-
// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
96-
plinkArgs.add("--maj-ref");
97-
9893
boolean limitToChromosomes = getProvider().getParameterByName("limitToChromosomes").extractValue(getPipelineCtx().getJob(), getProvider(), getStepIdx(), Boolean.class, true);
9994
if (limitToChromosomes)
10095
{
10196
SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor.extractDictionary(genome.getSequenceDictionary().toPath());
102-
List<String> toKeep = dict.getSequences().stream().filter(s -> {
103-
String name = StringUtils.replaceIgnoreCase(s.getSequenceName(), "^chr", "");
97+
List<String> toKeep = dict.getSequences().stream().map(SAMSequenceRecord::getSequenceName).filter(sequenceName -> {
98+
String name = StringUtils.replaceIgnoreCase(sequenceName, "^chr", "");
10499

105100
return NumberUtils.isCreatable(name) || "X".equalsIgnoreCase(name) || "Y".equalsIgnoreCase(name);
106-
}).map(SAMSequenceRecord::getSequenceName).toList();
101+
}).toList();
107102

108103
if (toKeep.isEmpty())
109104
{
@@ -129,26 +124,64 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
129124
plinkArgs.add("--max-alleles");
130125
plinkArgs.add("2");
131126

132-
plinkArgs.add("--out");
133-
plinkArgs.add(plinkOut.getPath());
134-
135127
Integer threads = SequencePipelineService.get().getMaxThreads(getPipelineCtx().getLogger());
136128
if (threads != null)
137129
{
138130
plinkArgs.add("--threads");
139131
plinkArgs.add(threads.toString());
140132
}
141133

142-
//TODO: consider --memory (in MB)
134+
List<String> plinkArgs1 = new ArrayList<>(plinkArgs);
135+
plinkArgs1.add("--make-bed");
143136

144-
plink.execute(plinkArgs);
137+
// Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
138+
plinkArgs1.add("--maj-ref");
139+
plinkArgs1.add("--out");
140+
plinkArgs1.add(plinkOut.getPath());
141+
142+
plink.execute(plinkArgs1);
145143

146144
File plinkOutBed = new File(plinkOut.getPath() + ".bed");
147145
if (!plinkOutBed.exists())
148146
{
149147
throw new PipelineJobException("Unable to find file: " + plinkOutBed.getPath());
150148
}
151149

150+
// Compute kinship with plink2:
151+
List<String> plinkArgs2 = new ArrayList<>(plinkArgs);
152+
plinkArgs2.add("--make-king-table");
153+
File plinkOutKing = new File(outputDirectory, "plinkKinship");
154+
plinkArgs2.add("--out");
155+
plinkArgs2.add(plinkOutKing.getPath());
156+
157+
plink.execute(plinkArgs2);
158+
159+
File plinkOutKingFile = new File(plinkOutKing.getPath() + ".kin0");
160+
if (!plinkOutKingFile.exists())
161+
{
162+
throw new PipelineJobException("Unable to find file: " + plinkOutKingFile.getPath());
163+
}
164+
165+
File plinkOutKingFileTxt = new File(plinkOutKingFile.getPath() + ".txt.gz");
166+
if (plinkOutKingFileTxt.exists())
167+
{
168+
plinkOutKingFileTxt.delete();
169+
}
170+
171+
long lineCount = SequencePipelineService.get().getLineCount(plinkOutKingFile)-1;
172+
try
173+
{
174+
Compress.compressGzip(plinkOutKingFile, plinkOutKingFileTxt);
175+
FileUtils.delete(plinkOutKingFile);
176+
}
177+
catch (IOException e)
178+
{
179+
throw new PipelineJobException(e);
180+
}
181+
182+
output.addSequenceOutput(plinkOutKingFileTxt, "PLINK2 Relatedness: " + inputVCF.getName(), "PLINK2 Kinship", null, null, genome.getGenomeId(), "Total lines: " + lineCount);
183+
184+
// Also with KING:
152185
KingWrapper wrapper = new KingWrapper(getPipelineCtx().getLogger());
153186
wrapper.setWorkingDir(outputDirectory);
154187

@@ -172,7 +205,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
172205
}
173206

174207
File kingFam = createFamFile(pedFile, new File(plinkOutBed.getParentFile(), "plink.fam"));
175-
kingArgs.add("--ped");
208+
kingArgs.add("--fam");
176209
kingArgs.add(kingFam.getPath());
177210

178211
output.addIntermediateFile(kingFam);
@@ -184,7 +217,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
184217
kingArgs.add(threads.toString());
185218
}
186219

187-
kingArgs.add("--related");
220+
kingArgs.add("--kinship");
221+
kingArgs.add("--rplot");
188222

189223
File kinshipOutput = new File(outputDirectory, SequenceAnalysisService.get().getUnzippedBaseName(inputVCF.getName()) + ".kin");
190224
wrapper.execute(kingArgs);
@@ -199,17 +233,18 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
199233
kinshipOutputTxt.delete();
200234
}
201235

236+
lineCount = SequencePipelineService.get().getLineCount(kinshipOutput)-1;
202237
try
203238
{
204-
kinshipOutput = Compress.compressGzip(kinshipOutput);
205-
FileUtils.moveFile(kinshipOutput, kinshipOutputTxt);
239+
Compress.compressGzip(kinshipOutput, kinshipOutputTxt);
240+
FileUtils.delete(kinshipOutput);
206241
}
207242
catch (IOException e)
208243
{
209244
throw new PipelineJobException(e);
210245
}
211246

212-
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), null);
247+
output.addSequenceOutput(kinshipOutputTxt, "King Relatedness: " + inputVCF.getName(), "KING Relatedness", null, null, genome.getGenomeId(), "Total lines: " + lineCount);
213248

214249
return output;
215250
}

0 commit comments

Comments
 (0)