|
| 1 | +package org.labkey.sequenceanalysis.run.alignment; |
| 2 | + |
| 3 | +import org.apache.commons.io.FileUtils; |
| 4 | +import org.apache.commons.lang3.StringUtils; |
| 5 | +import org.json.JSONObject; |
| 6 | +import org.labkey.api.module.ModuleLoader; |
| 7 | +import org.labkey.api.pipeline.PipelineJob; |
| 8 | +import org.labkey.api.pipeline.PipelineJobException; |
| 9 | +import org.labkey.api.pipeline.RecordedAction; |
| 10 | +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
| 11 | +import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
| 12 | +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; |
| 13 | +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; |
| 14 | +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; |
| 15 | +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; |
| 16 | +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; |
| 17 | +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; |
| 18 | +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; |
| 19 | +import org.labkey.sequenceanalysis.SequenceAnalysisModule; |
| 20 | +import org.labkey.sequenceanalysis.util.SequenceUtil; |
| 21 | + |
| 22 | +import java.io.File; |
| 23 | +import java.io.IOException; |
| 24 | +import java.util.ArrayList; |
| 25 | +import java.util.Arrays; |
| 26 | +import java.util.List; |
| 27 | + |
| 28 | +//https://github.com/hall-lab/svtyper |
| 29 | + |
| 30 | +public class SVTyperStep extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor> |
| 31 | +{ |
| 32 | + public SVTyperStep() |
| 33 | + { |
| 34 | + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "SVTyper SV Genotyping", "This will run SVTyper on one or more BAM files to genotype SVs", null, Arrays.asList( |
| 35 | + ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() |
| 36 | + {{ |
| 37 | + put("allowBlank", false); |
| 38 | + }}, null), |
| 39 | + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ |
| 40 | + put("checked", true); |
| 41 | + }}, true) |
| 42 | + )); |
| 43 | + } |
| 44 | + |
| 45 | + @Override |
| 46 | + public boolean doSplitJobs() |
| 47 | + { |
| 48 | + return true; |
| 49 | + } |
| 50 | + |
| 51 | + @Override |
| 52 | + public boolean canProcess(SequenceOutputFile o) |
| 53 | + { |
| 54 | + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); |
| 55 | + } |
| 56 | + |
| 57 | + @Override |
| 58 | + public boolean doRunRemote() |
| 59 | + { |
| 60 | + return true; |
| 61 | + } |
| 62 | + |
| 63 | + @Override |
| 64 | + public boolean doRunLocal() |
| 65 | + { |
| 66 | + return false; |
| 67 | + } |
| 68 | + |
| 69 | + @Override |
| 70 | + public SequenceOutputProcessor getProcessor() |
| 71 | + { |
| 72 | + return new Processor(); |
| 73 | + } |
| 74 | + |
| 75 | + public static class Processor implements SequenceOutputProcessor |
| 76 | + { |
| 77 | + @Override |
| 78 | + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException |
| 79 | + { |
| 80 | + |
| 81 | + } |
| 82 | + |
| 83 | + @Override |
| 84 | + public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException |
| 85 | + { |
| 86 | + int svVcfId = ctx.getParams().optInt("svVCF", 0); |
| 87 | + if (svVcfId == 0) |
| 88 | + { |
| 89 | + throw new PipelineJobException("svVCF param was null"); |
| 90 | + } |
| 91 | + |
| 92 | + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); |
| 93 | + if (svVcf == null) |
| 94 | + { |
| 95 | + throw new PipelineJobException("File not found for ID: " + svVcfId); |
| 96 | + } |
| 97 | + else if (!svVcf.exists()) |
| 98 | + { |
| 99 | + throw new PipelineJobException("Missing file: " + svVcf.getPath()); |
| 100 | + } |
| 101 | + |
| 102 | + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); |
| 103 | + |
| 104 | + for (SequenceOutputFile so : inputFiles) |
| 105 | + { |
| 106 | + List<String> jsonArgs = new ArrayList<>(); |
| 107 | + SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger()); |
| 108 | + jsonArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper", null, true).getPath()); |
| 109 | + jsonArgs.add("-b"); |
| 110 | + jsonArgs.add(so.getFile().getPath()); |
| 111 | + |
| 112 | + File coverageJson = new File(ctx.getWorkingDirectory(), "bam.json"); |
| 113 | + jsonArgs.add("-l"); |
| 114 | + jsonArgs.add(coverageJson.getPath()); |
| 115 | + |
| 116 | + |
| 117 | + if (threads != null) |
| 118 | + { |
| 119 | + jsonArgs.add("--max_reads"); |
| 120 | + jsonArgs.add(threads.toString()); |
| 121 | + } |
| 122 | + |
| 123 | + File doneFile = new File(ctx.getWorkingDirectory(), "json.done"); |
| 124 | + ctx.getFileManager().addIntermediateFile(doneFile); |
| 125 | + if (doneFile.exists()) |
| 126 | + { |
| 127 | + ctx.getLogger().info("BAM json already generated, skipping"); |
| 128 | + } |
| 129 | + else |
| 130 | + { |
| 131 | + wrapper.execute(jsonArgs); |
| 132 | + try |
| 133 | + { |
| 134 | + FileUtils.touch(doneFile); |
| 135 | + ctx.getFileManager().addIntermediateFile(doneFile); |
| 136 | + } |
| 137 | + catch (IOException e) |
| 138 | + { |
| 139 | + throw new PipelineJobException(e); |
| 140 | + } |
| 141 | + } |
| 142 | + |
| 143 | + if (!coverageJson.exists()) |
| 144 | + { |
| 145 | + throw new PipelineJobException("Missing file: " + coverageJson.getPath()); |
| 146 | + } |
| 147 | + ctx.getFileManager().addIntermediateFile(coverageJson); |
| 148 | + |
| 149 | + List<String> svtyperArgs = new ArrayList<>(); |
| 150 | + svtyperArgs.add(AbstractCommandWrapper.resolveFileInPath("svtyper-sso", null, true).getPath()); |
| 151 | + |
| 152 | + svtyperArgs.add("-i"); |
| 153 | + svtyperArgs.add(svVcf.getPath()); |
| 154 | + |
| 155 | + svtyperArgs.add("-B"); |
| 156 | + svtyperArgs.add(so.getFile().getPath()); |
| 157 | + |
| 158 | + svtyperArgs.add("-l"); |
| 159 | + svtyperArgs.add(coverageJson.getPath()); |
| 160 | + |
| 161 | + if (threads != null) |
| 162 | + { |
| 163 | + svtyperArgs.add("--core"); |
| 164 | + svtyperArgs.add(threads.toString()); |
| 165 | + } |
| 166 | + |
| 167 | + File genotypes = new File(ctx.getWorkingDirectory(), SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".svtyper.vcf.gz"); |
| 168 | + wrapper.execute(Arrays.asList("/bin/bash", "-c", StringUtils.join(svtyperArgs, " ") + "| bgzip -c"), ProcessBuilder.Redirect.to(genotypes)); |
| 169 | + |
| 170 | + if (!genotypes.exists()) |
| 171 | + { |
| 172 | + throw new PipelineJobException("Missing file: " + genotypes.getPath()); |
| 173 | + } |
| 174 | + |
| 175 | + try |
| 176 | + { |
| 177 | + SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger()); |
| 178 | + } |
| 179 | + catch (IOException e) |
| 180 | + { |
| 181 | + throw new PipelineJobException(e); |
| 182 | + } |
| 183 | + |
| 184 | + ctx.getFileManager().addSequenceOutput(genotypes, "SVTyper Genotypes: " + so.getName(), "SVTyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); |
| 185 | + } |
| 186 | + } |
| 187 | + } |
| 188 | +} |
0 commit comments