22
33import htsjdk .samtools .util .Interval ;
44import org .apache .commons .lang3 .StringUtils ;
5- import org .apache .logging .log4j .Logger ;
65import org .jetbrains .annotations .NotNull ;
76import org .labkey .api .pipeline .AbstractTaskFactory ;
87import org .labkey .api .pipeline .AbstractTaskFactorySettings ;
1514import org .labkey .api .sequenceanalysis .SequenceOutputFile ;
1615import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
1716import org .labkey .api .sequenceanalysis .pipeline .SequenceOutputHandler ;
18- import org .labkey .api .sequenceanalysis .run . AbstractDiscvrSeqWrapper ;
17+ import org .labkey .api .sequenceanalysis .pipeline . VariantProcessingStep ;
1918import org .labkey .api .util .FileType ;
20- import org .labkey .api . writer . PrintWriters ;
19+ import org .labkey .sequenceanalysis . run . variant . OutputVariantsStartingInIntervalsStep ;
2120
2221import java .io .File ;
2322import java .io .IOException ;
24- import java .io .PrintWriter ;
2523import java .util .ArrayList ;
2624import java .util .Collections ;
2725import java .util .HashSet ;
@@ -112,6 +110,7 @@ private VariantProcessingJob getPipelineJob()
112110 SequenceTaskHelper .logModuleVersions (getJob ().getLogger ());
113111 RecordedAction action = new RecordedAction (ACTION_NAME );
114112 TaskFileManagerImpl manager = new TaskFileManagerImpl (getPipelineJob (), _wd .getDir (), _wd );
113+ JobContextImpl ctx = new JobContextImpl (getPipelineJob (), getPipelineJob ().getSequenceSupport (), getPipelineJob ().getParameterJson (), _wd .getDir (), new TaskFileManagerImpl (getPipelineJob (), _wd .getDir (), _wd ), _wd );
115114
116115 File finalOut ;
117116 SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler = getPipelineJob ().getHandler ();
@@ -121,7 +120,7 @@ private VariantProcessingJob getPipelineJob()
121120 }
122121 else
123122 {
124- finalOut = runDefaultVariantMerge (manager , action , handler );
123+ finalOut = runDefaultVariantMerge (ctx , manager , action , handler );
125124 }
126125
127126 Map <String , File > scatterOutputs = getPipelineJob ().getScatterJobOutputs ();
@@ -153,7 +152,7 @@ private VariantProcessingJob getPipelineJob()
153152 return new RecordedActionSet (action );
154153 }
155154
156- private File runDefaultVariantMerge (TaskFileManagerImpl manager , RecordedAction action , SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler ) throws PipelineJobException
155+ private File runDefaultVariantMerge (JobContextImpl ctx , TaskFileManagerImpl manager , RecordedAction action , SequenceOutputHandler <SequenceOutputHandler .SequenceOutputProcessor > handler ) throws PipelineJobException
157156 {
158157 Map <String , List <Interval >> jobToIntervalMap = getPipelineJob ().getJobToIntervalMap ();
159158 getJob ().setStatus (PipelineJob .TaskStatus .running , "Combining Per-Contig VCFs: " + jobToIntervalMap .size ());
@@ -180,12 +179,9 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
180179 if (ensureOutputsWithinIntervals )
181180 {
182181 getJob ().getLogger ().debug ("Ensuring ensure scatter outputs respect intervals" );
183- List <Interval > expectedIntervals = jobToIntervalMap .get (name );
184182
185- File intervalFile = new File (vcf .getParentFile (), "scatterIntervals.list" );
186183 File subsetVcf = new File (vcf .getParentFile (), SequenceAnalysisService .get ().getUnzippedBaseName (vcf .getName ()) + ".subset.vcf.gz" );
187184 File subsetVcfIdx = new File (subsetVcf .getPath () + ".tbi" );
188- manager .addIntermediateFile (intervalFile );
189185 manager .addIntermediateFile (subsetVcf );
190186 manager .addIntermediateFile (subsetVcfIdx );
191187
@@ -195,19 +191,8 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
195191 }
196192 else
197193 {
198- try (PrintWriter writer = PrintWriters .getPrintWriter (intervalFile ))
199- {
200- expectedIntervals .forEach (interval -> {
201- writer .println (interval .getContig () + ":" + interval .getStart () + "-" + interval .getEnd ());
202- });
203- }
204- catch (IOException e )
205- {
206- throw new PipelineJobException (e );
207- }
208-
209- Wrapper wrapper = new Wrapper (getJob ().getLogger ());
210- wrapper .execute (vcf , subsetVcf , intervalFile );
194+ OutputVariantsStartingInIntervalsStep .Wrapper wrapper = new OutputVariantsStartingInIntervalsStep .Wrapper (getJob ().getLogger ());
195+ wrapper .execute (vcf , subsetVcf , getPipelineJob ().getIntervalsForTask ());
211196 }
212197
213198 toConcat .add (subsetVcf );
@@ -222,6 +207,15 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
222207 manager .addIntermediateFile (new File (vcf .getPath () + ".tbi" ));
223208 }
224209
210+ Set <Integer > genomeIds = new HashSet <>();
211+ getPipelineJob ().getFiles ().forEach (x -> genomeIds .add (x .getLibrary_id ()));
212+ if (genomeIds .size () != 1 )
213+ {
214+ throw new PipelineJobException ("Expected a single genome, found: " + StringUtils .join (genomeIds , ", " ));
215+ }
216+
217+ ReferenceGenome genome = getPipelineJob ().getSequenceSupport ().getCachedGenome (genomeIds .iterator ().next ());
218+
225219 String basename = SequenceAnalysisService .get ().getUnzippedBaseName (toConcat .get (0 ).getName ());
226220 File combined = new File (getPipelineJob ().getAnalysisDirectory (), basename + ".vcf.gz" );
227221 File combinedIdx = new File (combined .getPath () + ".tbi" );
@@ -236,47 +230,16 @@ private File runDefaultVariantMerge(TaskFileManagerImpl manager, RecordedAction
236230 throw new PipelineJobException ("Missing one of more VCFs: " + missing .stream ().map (File ::getPath ).collect (Collectors .joining ("," )));
237231 }
238232
239- Set <Integer > genomeIds = new HashSet <>();
240- getPipelineJob ().getFiles ().forEach (x -> genomeIds .add (x .getLibrary_id ()));
241- if (genomeIds .size () != 1 )
242- {
243- throw new PipelineJobException ("Expected a single genome, found: " + StringUtils .join (genomeIds , ", " ));
244- }
245-
246- ReferenceGenome genome = getPipelineJob ().getSequenceSupport ().getCachedGenome (genomeIds .iterator ().next ());
247- combined = SequenceAnalysisService .get ().combineVcfs (toConcat , combined , genome , getJob ().getLogger (), true , null );
233+ boolean sortAfterMerge = handler instanceof VariantProcessingStep .SupportsScatterGather && ((VariantProcessingStep .SupportsScatterGather )handler ).doSortAfterMerge ();
234+ combined = SequenceAnalysisService .get ().combineVcfs (toConcat , combined , genome , getJob ().getLogger (), true , null , sortAfterMerge );
248235 }
249236 manager .addOutput (action , "Merged VCF" , combined );
250237
251- return combined ;
252- }
253-
254- public static class Wrapper extends AbstractDiscvrSeqWrapper
255- {
256- public Wrapper (Logger log )
238+ if (handler instanceof VariantProcessingStep .SupportsScatterGather )
257239 {
258- super ( log );
240+ (( VariantProcessingStep . SupportsScatterGather ) handler ). performAdditionalMergeTasks ( ctx , getPipelineJob (), manager , genome , toConcat );
259241 }
260242
261- public void execute (File inputVcf , File outputVcf , File intervalFile ) throws PipelineJobException
262- {
263- List <String > args = new ArrayList <>(getBaseArgs ());
264- args .add ("OutputVariantsStartingInIntervals" );
265-
266- args .add ("-V" );
267- args .add (inputVcf .getPath ());
268-
269- args .add ("-O" );
270- args .add (outputVcf .getPath ());
271-
272- args .add ("-L" );
273- args .add (intervalFile .getPath ());
274-
275- execute (args );
276- if (!outputVcf .exists ())
277- {
278- throw new PipelineJobException ("Missing file: " + outputVcf .getPath ());
279- }
280- }
243+ return combined ;
281244 }
282245}
0 commit comments