Skip to content

Commit 9740678

Browse files
committed
Improve liftover behavior for scatter/gather
1 parent 4e5cb43 commit 9740678

File tree

2 files changed

+153
-133
lines changed

2 files changed

+153
-133
lines changed

SequenceAnalysis/resources/web/SequenceAnalysis/window/LiftoverWindow.js

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', {
119119
fieldLabel: 'Use bcftools'
120120
}, {
121121
xtype: 'checkbox',
122-
itemId: 'doNotRetainUnmapped',
123-
checked: true,
124-
fieldLabel: 'Do Not Retain Unmapped'
122+
itemId: 'retainUnmapped',
123+
checked: false,
124+
fieldLabel: 'Retain Unmapped Variants'
125125
}].concat(SequenceAnalysis.window.OutputHandlerWindow.getCfgForToolParameters(this.toolParameters)).concat([{
126126
xtype: 'sequenceanalysis-variantscattergatherpanel',
127127
defaultFieldWidth: 500,
@@ -163,17 +163,9 @@ Ext4.define('SequenceAnalysis.window.LiftoverWindow', {
163163
params.pct = this.down('#pctField').getValue();
164164
}
165165

166-
if (this.down('#dropGenotypes').getValue()){
167-
params.dropGenotypes = this.down('#dropGenotypes').getValue();
168-
}
169-
170-
if (this.down('#useBcfTools').getValue()){
171-
params.useBcfTools = this.down('#useBcfTools').getValue();
172-
}
173-
174-
if (this.down('#doNotRetainUnmapped').getValue()){
175-
params.doNotRetainUnmapped = this.down('#doNotRetainUnmapped').getValue();
176-
}
166+
params.dropGenotypes = !!this.down('#dropGenotypes').getValue();
167+
params.useBcfTools = !!this.down('#useBcfTools').getValue();
168+
params.retainUnmapped = !!this.down('#retainUnmapped').getValue();
177169

178170
Ext4.Array.forEach(this.down('sequenceanalysis-variantscattergatherpanel').query('field'), function(field){
179171
params[field.name] = field.getValue();

SequenceAnalysis/src/org/labkey/sequenceanalysis/analysis/LiftoverHandler.java

Lines changed: 147 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@
3030
import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler;
3131
import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService;
3232
import org.labkey.api.sequenceanalysis.pipeline.VariantProcessingStep;
33+
import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
3334
import org.labkey.api.sequenceanalysis.run.SelectVariantsWrapper;
3435
import org.labkey.api.util.FileType;
35-
import org.labkey.api.util.FileUtil;
3636
import org.labkey.api.view.ActionURL;
3737
import org.labkey.api.writer.PrintWriters;
3838
import org.labkey.sequenceanalysis.SequenceAnalysisModule;
3939
import org.labkey.sequenceanalysis.pipeline.ProcessVariantsHandler;
40-
import org.labkey.api.sequenceanalysis.run.LiftoverBcfToolsWrapper;
40+
import org.labkey.sequenceanalysis.pipeline.SequenceOutputHandlerJob;
4141
import org.labkey.sequenceanalysis.run.util.LiftoverVcfWrapper;
4242
import org.labkey.sequenceanalysis.util.SequenceUtil;
4343

@@ -53,11 +53,10 @@
5353
/**
5454
* Created by bimber on 8/26/2014.
5555
*/
56-
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>, VariantProcessingStep.SupportsScatterGather
56+
public class LiftoverHandler implements SequenceOutputHandler<SequenceOutputHandler.SequenceOutputProcessor>, VariantProcessingStep.SupportsScatterGather, SequenceOutputHandler.TracksVCF
5757
{
58-
private final FileType _bedFileType = new FileType(".bed", false);
59-
//private FileType _gffFileType = new FileType("gff", false);
60-
private final FileType _vcfFileType = new FileType(Arrays.asList(".vcf", ".bcf"), ".vcf", false, FileType.gzSupportLevel.SUPPORT_GZ);
58+
private static final FileType _bedFileType = new FileType(".bed", false);
59+
private static final FileType _vcfFileType = new FileType(Arrays.asList(".vcf", ".bcf"), ".vcf", false, FileType.gzSupportLevel.SUPPORT_GZ);
6160

6261
public LiftoverHandler()
6362
{
@@ -132,6 +131,30 @@ public boolean canProcess(SequenceOutputFile f)
132131
_vcfFileType.isType(f.getFile()));
133132
}
134133

134+
private static File getUnmappedOutputFile(File liftedVcfFile)
135+
{
136+
return new File(liftedVcfFile.getParentFile(), liftedVcfFile.getName().replaceAll(".lifted-", ".unmapped-"));
137+
}
138+
139+
private static String getOutputExtension(File inputFile)
140+
{
141+
String ext = null;
142+
if (_bedFileType.isType(inputFile))
143+
{
144+
ext = ".bed";
145+
}
146+
else if (_vcfFileType.isType(inputFile))
147+
{
148+
ext = ".vcf.gz";
149+
}
150+
else
151+
{
152+
throw new UnsupportedOperationException("Unsupported file type: " + inputFile.getName());
153+
}
154+
155+
return ext;
156+
}
157+
135158
@Override
136159
public SequenceOutputProcessor getProcessor()
137160
{
@@ -141,7 +164,7 @@ public SequenceOutputProcessor getProcessor()
141164
@Override
142165
public boolean doSplitJobs()
143166
{
144-
return false;
167+
return true;
145168
}
146169

147170
public class Processor implements SequenceOutputProcessor
@@ -180,139 +203,110 @@ public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext c
180203

181204
boolean dropGenotypes = params.optBoolean("dropGenotypes", false);
182205
boolean useBcfTools = params.optBoolean("useBcfTools", false);
183-
boolean doNotRetainUnmapped = params.optBoolean("doNotRetainUnmapped", true);
184-
if (!doNotRetainUnmapped && !useBcfTools)
185-
{
186-
ctx.getLogger().debug("Picard LiftoverVcf requires an output file for rejected sites, so setting doNotRetainUnmapped to true");
187-
doNotRetainUnmapped = true;
188-
}
206+
boolean retainUnmapped = params.optBoolean("retainUnmapped", false);
189207

190208
int chainFileId = params.getInt("chainFileId");
191209
File chainFile = ctx.getSequenceSupport().getCachedData(chainFileId);
192210
int targetGenomeId = params.getInt("targetGenomeId");
193211

194-
for (SequenceOutputFile f : inputFiles)
212+
if (inputFiles.size() > 1)
195213
{
196-
job.getLogger().info("processing output: " + f.getFile().getName());
214+
throw new PipelineJobException("Expected single input file");
215+
}
216+
217+
SequenceOutputFile f = inputFiles.get(0);
218+
job.getLogger().info("processing output: " + f.getFile().getName());
197219

198-
RecordedAction action = new RecordedAction(getName());
199-
action.setStartTime(new Date());
220+
RecordedAction action = new RecordedAction(getName());
221+
action.setStartTime(new Date());
200222

201-
boolean isGzip = f.getFile().getPath().toLowerCase().endsWith("gz");
202-
int dots = isGzip ? 2 : 1;
203-
String baseName = FileUtil.getBaseName(f.getFile(), dots);
223+
double pct = params.has("pct") ? params.getDouble("pct") : 0.95;
224+
job.getLogger().info("using minimum percent match: " + pct);
204225

205-
double pct = params.has("pct") ? params.getDouble("pct") : 0.95;
206-
job.getLogger().info("using minimum percent match: " + pct);
226+
action.addInput(f.getFile(), "Input File");
227+
action.addInput(chainFile, "Chain File");
207228

208-
action.addInput(f.getFile(), "Input File");
209-
action.addInput(chainFile, "Chain File");
229+
File outDir = ((FileAnalysisJobSupport) job).getAnalysisDirectory();
230+
String baseName = SequenceAnalysisService.get().getUnzippedBaseName(f.getFile().getName());
231+
File lifted = new File(outDir, baseName + ".lifted-" + targetGenomeId + getOutputExtension(f.getFile()));
232+
File unmappedOutput = retainUnmapped ? getUnmappedOutputFile(lifted) : null;
210233

211-
File outDir = ((FileAnalysisJobSupport) job).getAnalysisDirectory();
212-
String ext = null;
234+
try
235+
{
213236
if (_bedFileType.isType(f.getFile()))
214237
{
215-
ext = ".bed";
238+
liftOverBed(chainFile, f.getFile(), lifted, unmappedOutput, job, pct);
216239
}
217240
else if (_vcfFileType.isType(f.getFile()))
218241
{
219-
ext = ".vcf.gz";
220-
}
221-
else
222-
{
223-
throw new UnsupportedOperationException("Unsupported file type: " + f.getFile().getName());
242+
ReferenceGenome targetGenome = ctx.getSequenceSupport().getCachedGenome(targetGenomeId);
243+
ReferenceGenome sourceGenome = ctx.getSequenceSupport().getCachedGenome(f.getLibrary_id());
244+
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes, useBcfTools, intervals);
224245
}
246+
}
247+
catch (Exception e)
248+
{
249+
throw new PipelineJobException(e);
250+
}
225251

226-
File lifted = new File(outDir, baseName + ".lifted-" + targetGenomeId + ext);
227-
File unmappedOutput = doNotRetainUnmapped ? null : new File(outDir, baseName + ".unmapped-" + targetGenomeId + ext);
252+
job.getLogger().info("adding outputs");
253+
action.addOutput(lifted, "Lifted Features", lifted.exists(), true);
254+
if (lifted.exists())
255+
{
256+
job.getLogger().info("adding lifted features: " + lifted.getName());
257+
258+
SequenceOutputFile so1 = new SequenceOutputFile();
259+
so1.setName(f.getName() + " (lifted)");
260+
so1.setDescription("Contains features from " + f.getName() + " after liftover");
261+
so1.setFile(lifted);
262+
so1.setLibrary_id(targetGenomeId);
263+
so1.setReadset(f.getReadset());
264+
so1.setAnalysis_id(f.getAnalysis_id());
265+
so1.setCategory(f.getCategory());
266+
so1.setContainer(job.getContainerId());
267+
so1.setCreated(new Date());
268+
so1.setModified(new Date());
269+
270+
ctx.addSequenceOutput(so1);
271+
}
228272

229-
try
230-
{
231-
if (_bedFileType.isType(f.getFile()))
232-
{
233-
liftOverBed(chainFile, f.getFile(), lifted, unmappedOutput, job, pct);
234-
}
235-
else if (_vcfFileType.isType(f.getFile()))
236-
{
237-
ReferenceGenome targetGenome = ctx.getSequenceSupport().getCachedGenome(targetGenomeId);
238-
ReferenceGenome sourceGenome = ctx.getSequenceSupport().getCachedGenome(f.getLibrary_id());
239-
liftOverVcf(ctx, targetGenome, sourceGenome, chainFile, f.getFile(), lifted, unmappedOutput, job, pct, dropGenotypes, useBcfTools, intervals);
240-
}
241-
}
242-
catch (Exception e)
243-
{
244-
throw new PipelineJobException(e);
245-
}
273+
if (unmappedOutput == null)
274+
{
275+
// skip
276+
}
277+
else if (!unmappedOutput.exists())
278+
{
279+
job.getLogger().info("no unmapped intervals");
280+
}
281+
else if (!SequenceUtil.hasLineCount(unmappedOutput))
282+
{
283+
job.getLogger().info("no unmapped intervals");
284+
unmappedOutput.delete();
285+
}
286+
else
287+
{
288+
job.getLogger().info("adding unmapped features: " + unmappedOutput.getName());
246289

247-
job.getLogger().info("adding outputs");
248-
action.addOutput(lifted, "Lifted Features", lifted.exists(), true);
249-
if (lifted.exists())
250-
{
251-
job.getLogger().info("adding lifted features: " + lifted.getName());
252-
253-
SequenceOutputFile so1 = new SequenceOutputFile();
254-
so1.setName(f.getName() + " (lifted)");
255-
so1.setDescription("Contains features from " + f.getName() + " after liftover");
256-
//ExpData liftedData = ExperimentService.get().createData(job.getContainer(), new DataType("Liftover Output"));
257-
//liftedData.setDataFileURI(lifted.toURI());
258-
//liftedData.setName(lifted.getName());
259-
//liftedData.save(job.getUser());
260-
//so1.setDataId(liftedData.getRowId());
261-
so1.setFile(lifted);
262-
so1.setLibrary_id(targetGenomeId);
263-
so1.setReadset(f.getReadset());
264-
so1.setAnalysis_id(f.getAnalysis_id());
265-
so1.setCategory(f.getCategory());
266-
so1.setContainer(job.getContainerId());
267-
so1.setCreated(new Date());
268-
so1.setModified(new Date());
269-
270-
ctx.addSequenceOutput(so1);
271-
}
290+
action.addOutput(unmappedOutput, "Unmapped features", false, true);
272291

273-
if (unmappedOutput == null)
274-
{
275-
// skip
276-
}
277-
else if (!unmappedOutput.exists())
278-
{
279-
job.getLogger().info("no unmapped intervals");
280-
}
281-
else if (!SequenceUtil.hasLineCount(unmappedOutput))
282-
{
283-
job.getLogger().info("no unmapped intervals");
284-
unmappedOutput.delete();
285-
}
286-
else
287-
{
288-
job.getLogger().info("adding unmapped features: " + unmappedOutput.getName());
289-
290-
action.addOutput(unmappedOutput, "Unmapped features", false, true);
291-
292-
SequenceOutputFile so2 = new SequenceOutputFile();
293-
so2.setName(f.getName() + " (lifted/unmapped)");
294-
so2.setDescription("Contains the unmapped features after attempted liftover of " + f.getName());
295-
296-
//ExpData unmappedData = ExperimentService.get().createData(job.getContainer(), new DataType("Liftover Output"));
297-
//unmappedData.setName(unmappedOutput.getName());
298-
//unmappedData.setDataFileURI(unmappedOutput.toURI());
299-
//unmappedData.save(job.getUser());
300-
//so2.setDataId(unmappedData.getRowId());
301-
so2.setFile(unmappedOutput);
302-
so2.setLibrary_id(f.getLibrary_id());
303-
so2.setReadset(f.getReadset());
304-
so2.setAnalysis_id(f.getAnalysis_id());
305-
so2.setCategory(f.getCategory());
306-
so2.setContainer(job.getContainerId());
307-
so2.setCreated(new Date());
308-
so2.setModified(new Date());
309-
310-
ctx.addSequenceOutput(so2);
311-
}
292+
SequenceOutputFile so2 = new SequenceOutputFile();
293+
so2.setName(f.getName() + " (lifted/unmapped)");
294+
so2.setDescription("Contains the unmapped features after attempted liftover of " + f.getName());
295+
296+
so2.setFile(unmappedOutput);
297+
so2.setLibrary_id(f.getLibrary_id());
298+
so2.setReadset(f.getReadset());
299+
so2.setAnalysis_id(f.getAnalysis_id());
300+
so2.setCategory(f.getCategory());
301+
so2.setContainer(job.getContainerId());
302+
so2.setCreated(new Date());
303+
so2.setModified(new Date());
312304

313-
action.setEndTime(new Date());
314-
ctx.addActions(action);
305+
ctx.addSequenceOutput(so2);
315306
}
307+
308+
action.setEndTime(new Date());
309+
ctx.addActions(action);
316310
}
317311
}
318312

@@ -364,8 +358,21 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
364358
}
365359
else
366360
{
361+
File unmappedOutputFile = unmappedOutput;
362+
if (unmappedOutputFile == null)
363+
{
364+
unmappedOutputFile = new File(currentVCF.getParentFile(), "liftoverRejects.vcf.gz");
365+
}
366+
367367
LiftoverVcfWrapper wrapper = new LiftoverVcfWrapper(job.getLogger());
368368
wrapper.doLiftover(currentVCF, chain, targetGenome.getWorkingFastaFile(), unmappedOutput, output, pct);
369+
370+
if (unmappedOutput == null)
371+
{
372+
ctx.getLogger().debug("Deleting liftover rejects VCF");
373+
new File(unmappedOutputFile.getPath() + ".tbi").delete();
374+
unmappedOutputFile.delete();
375+
}
369376
}
370377

371378
Long mapped = null;
@@ -378,7 +385,7 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
378385
SequenceAnalysisService.get().ensureVcfIndex(output, job.getLogger());
379386
}
380387

381-
Long unmapped = 0L;
388+
long unmapped = 0L;
382389
if (unmappedOutput != null && unmappedOutput.exists())
383390
{
384391
String unmappedStr = ProcessVariantsHandler.getVCFLineCount(unmappedOutput, job.getLogger(), false, true);
@@ -390,11 +397,32 @@ public void liftOverVcf(JobContext ctx, ReferenceGenome targetGenome, ReferenceG
390397

391398
if (mapped != null)
392399
{
393-
Double fraction = (double)mapped / (mapped + unmapped);
400+
double fraction = (double)mapped / (mapped + unmapped);
394401
job.getLogger().info("fraction mapped of total: " + fraction);
395402
}
396403
}
397404

405+
@Override
406+
public File getScatterJobOutput(JobContext ctx) throws PipelineJobException
407+
{
408+
if (ctx.getJob() instanceof SequenceOutputHandlerJob sj)
409+
{
410+
SequenceOutputFile so = sj.getFiles().get(0);
411+
412+
return ProcessVariantsHandler.getScatterOutputByCategory(ctx, so.getCategory());
413+
}
414+
else
415+
{
416+
throw new IllegalStateException("Expected job to be instanceof SequenceOutputHandlerJob , was " + ctx.getJob().getClass().getName());
417+
}
418+
}
419+
420+
@Override
421+
public SequenceOutputFile createFinalSequenceOutput(PipelineJob job, File processed, List<SequenceOutputFile> inputFiles) throws PipelineJobException
422+
{
423+
return ProcessVariantsHandler.createSequenceOutput(job, processed, inputFiles, inputFiles.get(0).getCategory());
424+
}
425+
398426
public void liftOverBed(File chain, File input, File output, @Nullable File unmappedOutput, PipelineJob job, double pct) throws IOException, PipelineJobException
399427
{
400428
LiftOver lo = new LiftOver(chain);

0 commit comments

Comments
 (0)