@@ -90,20 +90,15 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
9090 plinkArgs .add ("--vcf" );
9191 plinkArgs .add (inputVCF .getPath ());
9292
93- plinkArgs .add ("--make-bed" );
94-
95- // Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
96- plinkArgs .add ("--maj-ref" );
97-
9893 boolean limitToChromosomes = getProvider ().getParameterByName ("limitToChromosomes" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , true );
9994 if (limitToChromosomes )
10095 {
10196 SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor .extractDictionary (genome .getSequenceDictionary ().toPath ());
102- List <String > toKeep = dict .getSequences ().stream ().filter (s -> {
103- String name = StringUtils .replaceIgnoreCase (s . getSequenceName () , "^chr" , "" );
97+ List <String > toKeep = dict .getSequences ().stream ().map ( SAMSequenceRecord :: getSequenceName ). filter (sequenceName -> {
98+ String name = StringUtils .replaceIgnoreCase (sequenceName , "^chr" , "" );
10499
105100 return NumberUtils .isCreatable (name ) || "X" .equalsIgnoreCase (name ) || "Y" .equalsIgnoreCase (name );
106- }).map ( SAMSequenceRecord :: getSequenceName ). toList ();
101+ }).toList ();
107102
108103 if (toKeep .isEmpty ())
109104 {
@@ -129,17 +124,20 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
129124 plinkArgs .add ("--max-alleles" );
130125 plinkArgs .add ("2" );
131126
132- plinkArgs .add ("--out" );
133- plinkArgs .add (plinkOut .getPath ());
134-
135127 Integer threads = SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ());
136128 if (threads != null )
137129 {
138130 plinkArgs .add ("--threads" );
139131 plinkArgs .add (threads .toString ());
140132 }
141133
142- //TODO: consider --memory (in MB)
134+ List <String > plinkArgs1 = new ArrayList <>(plinkArgs );
135+ plinkArgs1 .add ("--make-bed" );
136+
137+ // Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
138+ plinkArgs1 .add ("--maj-ref" );
139+ plinkArgs1 .add ("--out" );
140+ plinkArgs1 .add (plinkOut .getPath ());
143141
144142 plink .execute (plinkArgs );
145143
@@ -149,6 +147,41 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
149147 throw new PipelineJobException ("Unable to find file: " + plinkOutBed .getPath ());
150148 }
151149
150+ // Compute kinship with plink2:
151+ List <String > plinkArgs2 = new ArrayList <>(plinkArgs );
152+ plinkArgs2 .add ("--make-king-table" );
153+ File plinkOutKing = new File (outputDirectory , "plinkKinship" );
154+ plinkArgs2 .add ("--out" );
155+ plinkArgs2 .add (plinkOutKing .getPath ());
156+
157+ plink .execute (plinkArgs1 );
158+
159+ File plinkOutKingFile = new File (plinkOut .getPath () + ".kin0" );
160+ if (!plinkOutKingFile .exists ())
161+ {
162+ throw new PipelineJobException ("Unable to find file: " + plinkOutKingFile .getPath ());
163+ }
164+
165+ File plinkOutKingFileTxt = new File (plinkOutKingFile .getPath () + ".txt.gz" );
166+ if (plinkOutKingFileTxt .exists ())
167+ {
168+ plinkOutKingFileTxt .delete ();
169+ }
170+
171+ long lineCount = SequencePipelineService .get ().getLineCount (plinkOutKingFile )-1 ;
172+ try
173+ {
174+ Compress .compressGzip (plinkOutKingFile , plinkOutKingFileTxt );
175+ FileUtils .delete (plinkOutKingFile );
176+ }
177+ catch (IOException e )
178+ {
179+ throw new PipelineJobException (e );
180+ }
181+
182+ output .addSequenceOutput (plinkOutKingFileTxt , "PLINK2 Relatedness: " + inputVCF .getName (), "PLINK2 Kinship" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
183+
184+ // Also with KING:
152185 KingWrapper wrapper = new KingWrapper (getPipelineCtx ().getLogger ());
153186 wrapper .setWorkingDir (outputDirectory );
154187
@@ -172,7 +205,7 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
172205 }
173206
174207 File kingFam = createFamFile (pedFile , new File (plinkOutBed .getParentFile (), "plink.fam" ));
175- kingArgs .add ("--ped " );
208+ kingArgs .add ("--fam " );
176209 kingArgs .add (kingFam .getPath ());
177210
178211 output .addIntermediateFile (kingFam );
@@ -184,7 +217,8 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
184217 kingArgs .add (threads .toString ());
185218 }
186219
187- kingArgs .add ("--related" );
220+ kingArgs .add ("--kinship" );
221+ kingArgs .add ("--rplot" );
188222
189223 File kinshipOutput = new File (outputDirectory , SequenceAnalysisService .get ().getUnzippedBaseName (inputVCF .getName ()) + ".kin" );
190224 wrapper .execute (kingArgs );
@@ -199,17 +233,18 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
199233 kinshipOutputTxt .delete ();
200234 }
201235
236+ lineCount = SequencePipelineService .get ().getLineCount (kinshipOutput )-1 ;
202237 try
203238 {
204- kinshipOutput = Compress .compressGzip (kinshipOutput );
205- FileUtils .moveFile (kinshipOutput , kinshipOutputTxt );
239+ Compress .compressGzip (kinshipOutput , kinshipOutputTxt );
240+ FileUtils .delete (kinshipOutput );
206241 }
207242 catch (IOException e )
208243 {
209244 throw new PipelineJobException (e );
210245 }
211246
212- output .addSequenceOutput (kinshipOutputTxt , "King Relatedness: " + inputVCF .getName (), "KING Relatedness" , null , null , genome .getGenomeId (), null );
247+ output .addSequenceOutput (kinshipOutputTxt , "King Relatedness: " + inputVCF .getName (), "KING Relatedness" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
213248
214249 return output ;
215250 }
0 commit comments