@@ -90,20 +90,15 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
9090 plinkArgs .add ("--vcf" );
9191 plinkArgs .add (inputVCF .getPath ());
9292
93- plinkArgs .add ("--make-bed" );
94-
95- // Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
96- plinkArgs .add ("--maj-ref" );
97-
9893 boolean limitToChromosomes = getProvider ().getParameterByName ("limitToChromosomes" ).extractValue (getPipelineCtx ().getJob (), getProvider (), getStepIdx (), Boolean .class , true );
9994 if (limitToChromosomes )
10095 {
10196 SAMSequenceDictionary dict = SAMSequenceDictionaryExtractor .extractDictionary (genome .getSequenceDictionary ().toPath ());
102- List <String > toKeep = dict .getSequences ().stream ().filter (s -> {
103- String name = StringUtils .replaceIgnoreCase (s . getSequenceName () , "^chr" , "" );
97+ List <String > toKeep = dict .getSequences ().stream ().map ( SAMSequenceRecord :: getSequenceName ). filter (sequenceName -> {
98+ String name = StringUtils .replaceIgnoreCase (sequenceName , "^chr" , "" );
10499
105100 return NumberUtils .isCreatable (name ) || "X" .equalsIgnoreCase (name ) || "Y" .equalsIgnoreCase (name );
106- }).map ( SAMSequenceRecord :: getSequenceName ). toList ();
101+ }).toList ();
107102
108103 if (toKeep .isEmpty ())
109104 {
@@ -129,17 +124,20 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
129124 plinkArgs .add ("--max-alleles" );
130125 plinkArgs .add ("2" );
131126
132- plinkArgs .add ("--out" );
133- plinkArgs .add (plinkOut .getPath ());
134-
135127 Integer threads = SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ());
136128 if (threads != null )
137129 {
138130 plinkArgs .add ("--threads" );
139131 plinkArgs .add (threads .toString ());
140132 }
141133
142- //TODO: consider --memory (in MB)
134+ List <String > plinkArgs1 = new ArrayList <>(plinkArgs );
135+ plinkArgs1 .add ("--make-bed" );
136+
137+ // Added since KING is designed for plink1.9. This avoids the "Too many first alleles as the major allele" error.
138+ plinkArgs1 .add ("--maj-ref" );
139+ plinkArgs1 .add ("--out" );
140+ plinkArgs1 .add (plinkOut .getPath ());
143141
144142 plink .execute (plinkArgs );
145143
@@ -149,6 +147,41 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
149147 throw new PipelineJobException ("Unable to find file: " + plinkOutBed .getPath ());
150148 }
151149
150+ // Compute kinship with plink2:
151+ List <String > plinkArgs2 = new ArrayList <>(plinkArgs );
152+ plinkArgs2 .add ("--make-king-table" );
153+ File plinkOutKing = new File (outputDirectory , "plinkKinship" );
154+ plinkArgs2 .add ("--out" );
155+ plinkArgs2 .add (plinkOutKing .getPath ());
156+
157+ plink .execute (plinkArgs1 );
158+
159+ File plinkOutKingFile = new File (plinkOut .getPath () + ".kin0" );
160+ if (!plinkOutKingFile .exists ())
161+ {
162+ throw new PipelineJobException ("Unable to find file: " + plinkOutKingFile .getPath ());
163+ }
164+
165+ File plinkOutKingFileTxt = new File (plinkOutKingFile .getPath () + ".txt.gz" );
166+ if (plinkOutKingFileTxt .exists ())
167+ {
168+ plinkOutKingFileTxt .delete ();
169+ }
170+
171+ long lineCount = SequencePipelineService .get ().getLineCount (plinkOutKingFile )-1 ;
172+ try
173+ {
174+ Compress .compressGzip (plinkOutKingFile , plinkOutKingFileTxt );
175+ FileUtils .delete (plinkOutKingFile );
176+ }
177+ catch (IOException e )
178+ {
179+ throw new PipelineJobException (e );
180+ }
181+
182+ output .addSequenceOutput (plinkOutKingFileTxt , "PLINK2 Relatedness: " + inputVCF .getName (), "PLINK2 Kinship" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
183+
184+ // Also with KING:
152185 KingWrapper wrapper = new KingWrapper (getPipelineCtx ().getLogger ());
153186 wrapper .setWorkingDir (outputDirectory );
154187
@@ -200,17 +233,18 @@ public Output processVariants(File inputVCF, File outputDirectory, ReferenceGeno
200233 kinshipOutputTxt .delete ();
201234 }
202235
236+ lineCount = SequencePipelineService .get ().getLineCount (kinshipOutput )-1 ;
203237 try
204238 {
205- kinshipOutput = Compress .compressGzip (kinshipOutput );
206- FileUtils .moveFile (kinshipOutput , kinshipOutputTxt );
239+ Compress .compressGzip (kinshipOutput , kinshipOutputTxt );
240+ FileUtils .delete (kinshipOutput );
207241 }
208242 catch (IOException e )
209243 {
210244 throw new PipelineJobException (e );
211245 }
212246
213- output .addSequenceOutput (kinshipOutputTxt , "King Relatedness: " + inputVCF .getName (), "KING Relatedness" , null , null , genome .getGenomeId (), null );
247+ output .addSequenceOutput (kinshipOutputTxt , "King Relatedness: " + inputVCF .getName (), "KING Relatedness" , null , null , genome .getGenomeId (), "Total lines: " + lineCount );
214248
215249 return output ;
216250 }
0 commit comments