3030import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
3131import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
3232import org .labkey .api .sequenceanalysis .pipeline .VariantProcessingStep ;
33+ import org .labkey .api .sequenceanalysis .run .LiftoverBcfToolsWrapper ;
3334import org .labkey .api .sequenceanalysis .run .SelectVariantsWrapper ;
3435import org .labkey .api .util .FileType ;
35- import org .labkey .api .util .FileUtil ;
3636import org .labkey .api .view .ActionURL ;
3737import org .labkey .api .writer .PrintWriters ;
3838import org .labkey .sequenceanalysis .SequenceAnalysisModule ;
3939import org .labkey .sequenceanalysis .pipeline .ProcessVariantsHandler ;
40- import org .labkey .api . sequenceanalysis .run . LiftoverBcfToolsWrapper ;
40+ import org .labkey .sequenceanalysis .pipeline . SequenceOutputHandlerJob ;
4141import org .labkey .sequenceanalysis .run .util .LiftoverVcfWrapper ;
4242import org .labkey .sequenceanalysis .util .SequenceUtil ;
4343
5353/**
5454 * Created by bimber on 8/26/2014.
5555 */
56- public class LiftoverHandler implements SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >, VariantProcessingStep .SupportsScatterGather
56+ public class LiftoverHandler implements SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor >, VariantProcessingStep .SupportsScatterGather , SequenceOutputHandler . TracksVCF
5757{
58- private final FileType _bedFileType = new FileType (".bed" , false );
59- //private FileType _gffFileType = new FileType("gff", false);
60- private final FileType _vcfFileType = new FileType (Arrays .asList (".vcf" , ".bcf" ), ".vcf" , false , FileType .gzSupportLevel .SUPPORT_GZ );
58+ private static final FileType _bedFileType = new FileType (".bed" , false );
59+ private static final FileType _vcfFileType = new FileType (Arrays .asList (".vcf" , ".bcf" ), ".vcf" , false , FileType .gzSupportLevel .SUPPORT_GZ );
6160
6261 public LiftoverHandler ()
6362 {
@@ -132,6 +131,30 @@ public boolean canProcess(SequenceOutputFile f)
132131 _vcfFileType .isType (f .getFile ()));
133132 }
134133
134+ private static File getUnmappedOutputFile (File liftedVcfFile )
135+ {
136+ return new File (liftedVcfFile .getParentFile (), liftedVcfFile .getName ().replaceAll (".lifted-" , ".unmapped-" ));
137+ }
138+
139+ private static String getOutputExtension (File inputFile )
140+ {
141+ String ext = null ;
142+ if (_bedFileType .isType (inputFile ))
143+ {
144+ ext = ".bed" ;
145+ }
146+ else if (_vcfFileType .isType (inputFile ))
147+ {
148+ ext = ".vcf.gz" ;
149+ }
150+ else
151+ {
152+ throw new UnsupportedOperationException ("Unsupported file type: " + inputFile .getName ());
153+ }
154+
155+ return ext ;
156+ }
157+
135158 @ Override
136159 public SequenceOutputProcessor getProcessor ()
137160 {
@@ -141,7 +164,7 @@ public SequenceOutputProcessor getProcessor()
141164 @ Override
142165 public boolean doSplitJobs ()
143166 {
144- return false ;
167+ return true ;
145168 }
146169
147170 public class Processor implements SequenceOutputProcessor
@@ -180,139 +203,110 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
180203
181204 boolean dropGenotypes = params .optBoolean ("dropGenotypes" , false );
182205 boolean useBcfTools = params .optBoolean ("useBcfTools" , false );
183- boolean doNotRetainUnmapped = params .optBoolean ("doNotRetainUnmapped" , true );
184- if (!doNotRetainUnmapped && !useBcfTools )
185- {
186- ctx .getLogger ().debug ("Picard LiftoverVcf requires an output file for rejected sites, so setting doNotRetainUnmapped to true" );
187- doNotRetainUnmapped = true ;
188- }
206+ boolean retainUnmapped = params .optBoolean ("retainUnmapped" , false );
189207
190208 int chainFileId = params .getInt ("chainFileId" );
191209 File chainFile = ctx .getSequenceSupport ().getCachedData (chainFileId );
192210 int targetGenomeId = params .getInt ("targetGenomeId" );
193211
194- for ( SequenceOutputFile f : inputFiles )
212+ if ( inputFiles . size () > 1 )
195213 {
196- job .getLogger ().info ("processing output: " + f .getFile ().getName ());
214+ throw new PipelineJobException ("Expected single input file" );
215+ }
216+
217+ SequenceOutputFile f = inputFiles .get (0 );
218+ job .getLogger ().info ("processing output: " + f .getFile ().getName ());
197219
198- RecordedAction action = new RecordedAction (getName ());
199- action .setStartTime (new Date ());
220+ RecordedAction action = new RecordedAction (getName ());
221+ action .setStartTime (new Date ());
200222
201- boolean isGzip = f .getFile ().getPath ().toLowerCase ().endsWith ("gz" );
202- int dots = isGzip ? 2 : 1 ;
203- String baseName = FileUtil .getBaseName (f .getFile (), dots );
223+ double pct = params .has ("pct" ) ? params .getDouble ("pct" ) : 0.95 ;
224+ job .getLogger ().info ("using minimum percent match: " + pct );
204225
205- double pct = params . has ( "pct" ) ? params . getDouble ( "pct" ) : 0.95 ;
206- job . getLogger (). info ( "using minimum percent match: " + pct );
226+ action . addInput ( f . getFile (), "Input File" ) ;
227+ action . addInput ( chainFile , "Chain File" );
207228
208- action .addInput (f .getFile (), "Input File" );
209- action .addInput (chainFile , "Chain File" );
229+ File outDir = ((FileAnalysisJobSupport ) job ).getAnalysisDirectory ();
230+ String baseName = SequenceAnalysisService .get ().getUnzippedBaseName (f .getFile ().getName ());
231+ File lifted = new File (outDir , baseName + ".lifted-" + targetGenomeId + getOutputExtension (f .getFile ()));
232+ File unmappedOutput = retainUnmapped ? getUnmappedOutputFile (lifted ) : null ;
210233
211- File outDir = (( FileAnalysisJobSupport ) job ). getAnalysisDirectory ();
212- String ext = null ;
234+ try
235+ {
213236 if (_bedFileType .isType (f .getFile ()))
214237 {
215- ext = ".bed" ;
238+ liftOverBed ( chainFile , f . getFile (), lifted , unmappedOutput , job , pct ) ;
216239 }
217240 else if (_vcfFileType .isType (f .getFile ()))
218241 {
219- ext = ".vcf.gz" ;
220- }
221- else
222- {
223- throw new UnsupportedOperationException ("Unsupported file type: " + f .getFile ().getName ());
242+ ReferenceGenome targetGenome = ctx .getSequenceSupport ().getCachedGenome (targetGenomeId );
243+ ReferenceGenome sourceGenome = ctx .getSequenceSupport ().getCachedGenome (f .getLibrary_id ());
244+ liftOverVcf (ctx , targetGenome , sourceGenome , chainFile , f .getFile (), lifted , unmappedOutput , job , pct , dropGenotypes , useBcfTools , intervals );
224245 }
246+ }
247+ catch (Exception e )
248+ {
249+ throw new PipelineJobException (e );
250+ }
225251
226- File lifted = new File (outDir , baseName + ".lifted-" + targetGenomeId + ext );
227- File unmappedOutput = doNotRetainUnmapped ? null : new File (outDir , baseName + ".unmapped-" + targetGenomeId + ext );
252+ job .getLogger ().info ("adding outputs" );
253+ action .addOutput (lifted , "Lifted Features" , lifted .exists (), true );
254+ if (lifted .exists ())
255+ {
256+ job .getLogger ().info ("adding lifted features: " + lifted .getName ());
257+
258+ SequenceOutputFile so1 = new SequenceOutputFile ();
259+ so1 .setName (f .getName () + " (lifted)" );
260+ so1 .setDescription ("Contains features from " + f .getName () + " after liftover" );
261+ so1 .setFile (lifted );
262+ so1 .setLibrary_id (targetGenomeId );
263+ so1 .setReadset (f .getReadset ());
264+ so1 .setAnalysis_id (f .getAnalysis_id ());
265+ so1 .setCategory (f .getCategory ());
266+ so1 .setContainer (job .getContainerId ());
267+ so1 .setCreated (new Date ());
268+ so1 .setModified (new Date ());
269+
270+ ctx .addSequenceOutput (so1 );
271+ }
228272
229- try
230- {
231- if (_bedFileType .isType (f .getFile ()))
232- {
233- liftOverBed (chainFile , f .getFile (), lifted , unmappedOutput , job , pct );
234- }
235- else if (_vcfFileType .isType (f .getFile ()))
236- {
237- ReferenceGenome targetGenome = ctx .getSequenceSupport ().getCachedGenome (targetGenomeId );
238- ReferenceGenome sourceGenome = ctx .getSequenceSupport ().getCachedGenome (f .getLibrary_id ());
239- liftOverVcf (ctx , targetGenome , sourceGenome , chainFile , f .getFile (), lifted , unmappedOutput , job , pct , dropGenotypes , useBcfTools , intervals );
240- }
241- }
242- catch (Exception e )
243- {
244- throw new PipelineJobException (e );
245- }
273+ if (unmappedOutput == null )
274+ {
275+ // skip
276+ }
277+ else if (!unmappedOutput .exists ())
278+ {
279+ job .getLogger ().info ("no unmapped intervals" );
280+ }
281+ else if (!SequenceUtil .hasLineCount (unmappedOutput ))
282+ {
283+ job .getLogger ().info ("no unmapped intervals" );
284+ unmappedOutput .delete ();
285+ }
286+ else
287+ {
288+ job .getLogger ().info ("adding unmapped features: " + unmappedOutput .getName ());
246289
247- job .getLogger ().info ("adding outputs" );
248- action .addOutput (lifted , "Lifted Features" , lifted .exists (), true );
249- if (lifted .exists ())
250- {
251- job .getLogger ().info ("adding lifted features: " + lifted .getName ());
252-
253- SequenceOutputFile so1 = new SequenceOutputFile ();
254- so1 .setName (f .getName () + " (lifted)" );
255- so1 .setDescription ("Contains features from " + f .getName () + " after liftover" );
256- //ExpData liftedData = ExperimentService.get().createData(job.getContainer(), new DataType("Liftover Output"));
257- //liftedData.setDataFileURI(lifted.toURI());
258- //liftedData.setName(lifted.getName());
259- //liftedData.save(job.getUser());
260- //so1.setDataId(liftedData.getRowId());
261- so1 .setFile (lifted );
262- so1 .setLibrary_id (targetGenomeId );
263- so1 .setReadset (f .getReadset ());
264- so1 .setAnalysis_id (f .getAnalysis_id ());
265- so1 .setCategory (f .getCategory ());
266- so1 .setContainer (job .getContainerId ());
267- so1 .setCreated (new Date ());
268- so1 .setModified (new Date ());
269-
270- ctx .addSequenceOutput (so1 );
271- }
290+ action .addOutput (unmappedOutput , "Unmapped features" , false , true );
272291
273- if (unmappedOutput == null )
274- {
275- // skip
276- }
277- else if (!unmappedOutput .exists ())
278- {
279- job .getLogger ().info ("no unmapped intervals" );
280- }
281- else if (!SequenceUtil .hasLineCount (unmappedOutput ))
282- {
283- job .getLogger ().info ("no unmapped intervals" );
284- unmappedOutput .delete ();
285- }
286- else
287- {
288- job .getLogger ().info ("adding unmapped features: " + unmappedOutput .getName ());
289-
290- action .addOutput (unmappedOutput , "Unmapped features" , false , true );
291-
292- SequenceOutputFile so2 = new SequenceOutputFile ();
293- so2 .setName (f .getName () + " (lifted/unmapped)" );
294- so2 .setDescription ("Contains the unmapped features after attempted liftover of " + f .getName ());
295-
296- //ExpData unmappedData = ExperimentService.get().createData(job.getContainer(), new DataType("Liftover Output"));
297- //unmappedData.setName(unmappedOutput.getName());
298- //unmappedData.setDataFileURI(unmappedOutput.toURI());
299- //unmappedData.save(job.getUser());
300- //so2.setDataId(unmappedData.getRowId());
301- so2 .setFile (unmappedOutput );
302- so2 .setLibrary_id (f .getLibrary_id ());
303- so2 .setReadset (f .getReadset ());
304- so2 .setAnalysis_id (f .getAnalysis_id ());
305- so2 .setCategory (f .getCategory ());
306- so2 .setContainer (job .getContainerId ());
307- so2 .setCreated (new Date ());
308- so2 .setModified (new Date ());
309-
310- ctx .addSequenceOutput (so2 );
311- }
292+ SequenceOutputFile so2 = new SequenceOutputFile ();
293+ so2 .setName (f .getName () + " (lifted/unmapped)" );
294+ so2 .setDescription ("Contains the unmapped features after attempted liftover of " + f .getName ());
295+
296+ so2 .setFile (unmappedOutput );
297+ so2 .setLibrary_id (f .getLibrary_id ());
298+ so2 .setReadset (f .getReadset ());
299+ so2 .setAnalysis_id (f .getAnalysis_id ());
300+ so2 .setCategory (f .getCategory ());
301+ so2 .setContainer (job .getContainerId ());
302+ so2 .setCreated (new Date ());
303+ so2 .setModified (new Date ());
312304
313- action .setEndTime (new Date ());
314- ctx .addActions (action );
305+ ctx .addSequenceOutput (so2 );
315306 }
307+
308+ action .setEndTime (new Date ());
309+ ctx .addActions (action );
316310 }
317311 }
318312
@@ -364,8 +358,21 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
364358 }
365359 else
366360 {
361+ File unmappedOutputFile = unmappedOutput ;
362+ if (unmappedOutputFile == null )
363+ {
364+ unmappedOutputFile = new File (currentVCF .getParentFile (), "liftoverRejects.vcf.gz" );
365+ }
366+
367367 LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper (job .getLogger ());
368368 wrapper .doLiftover (currentVCF , chain , targetGenome .getWorkingFastaFile (), unmappedOutput , output , pct );
369+
370+ if (unmappedOutput == null )
371+ {
372+ ctx .getLogger ().debug ("Deleting liftover rejects VCF" );
373+ new File (unmappedOutputFile .getPath () + ".tbi" ).delete ();
374+ unmappedOutputFile .delete ();
375+ }
369376 }
370377
371378 Long mapped = null ;
@@ -378,7 +385,7 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
378385 SequenceAnalysisService .get ().ensureVcfIndex (output , job .getLogger ());
379386 }
380387
381- Long unmapped = 0L ;
388+ long unmapped = 0L ;
382389 if (unmappedOutput != null && unmappedOutput .exists ())
383390 {
384391 String unmappedStr = ProcessVariantsHandler .getVCFLineCount (unmappedOutput , job .getLogger (), false , true );
@@ -390,11 +397,32 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
390397
391398 if (mapped != null )
392399 {
393- Double fraction = (double )mapped / (mapped + unmapped );
400+ double fraction = (double )mapped / (mapped + unmapped );
394401 job .getLogger ().info ("fraction mapped of total: " + fraction );
395402 }
396403 }
397404
405+ @ Override
406+ public File getScatterJobOutput (JobContext ctx ) throws PipelineJobException
407+ {
408+ if (ctx .getJob () instanceof SequenceOutputHandlerJob sj )
409+ {
410+ SequenceOutputFile so = sj .getFiles ().get (0 );
411+
412+ return ProcessVariantsHandler .getScatterOutputByCategory (ctx , so .getCategory ());
413+ }
414+ else
415+ {
416+ throw new IllegalStateException ("Expected job to be instanceof SequenceOutputHandlerJob , was " + ctx .getJob ().getClass ().getName ());
417+ }
418+ }
419+
420+ @ Override
421+ public SequenceOutputFile createFinalSequenceOutput (PipelineJob job , File processed , List <SequenceOutputFile > inputFiles ) throws PipelineJobException
422+ {
423+ return ProcessVariantsHandler .createSequenceOutput (job , processed , inputFiles , inputFiles .get (0 ).getCategory ());
424+ }
425+
398426 public void liftOverBed (File chain , File input , File output , @ Nullable File unmappedOutput , PipelineJob job , double pct ) throws IOException , PipelineJobException
399427 {
400428 LiftOver lo = new LiftOver (chain );
0 commit comments