|
| 1 | +package org.labkey.sequenceanalysis.run.alignment; |
| 2 | + |
| 3 | +import org.json.JSONObject; |
| 4 | +import org.labkey.api.module.ModuleLoader; |
| 5 | +import org.labkey.api.pipeline.PipelineJob; |
| 6 | +import org.labkey.api.pipeline.PipelineJobException; |
| 7 | +import org.labkey.api.pipeline.RecordedAction; |
| 8 | +import org.labkey.api.sequenceanalysis.SequenceAnalysisService; |
| 9 | +import org.labkey.api.sequenceanalysis.SequenceOutputFile; |
| 10 | +import org.labkey.api.sequenceanalysis.pipeline.AbstractParameterizedOutputHandler; |
| 11 | +import org.labkey.api.sequenceanalysis.pipeline.ReferenceGenome; |
| 12 | +import org.labkey.api.sequenceanalysis.pipeline.SequenceAnalysisJobSupport; |
| 13 | +import org.labkey.api.sequenceanalysis.pipeline.SequenceOutputHandler; |
| 14 | +import org.labkey.api.sequenceanalysis.pipeline.SequencePipelineService; |
| 15 | +import org.labkey.api.sequenceanalysis.pipeline.ToolParameterDescriptor; |
| 16 | +import org.labkey.api.sequenceanalysis.run.AbstractCommandWrapper; |
| 17 | +import org.labkey.api.sequenceanalysis.run.SimpleScriptWrapper; |
| 18 | +import org.labkey.sequenceanalysis.SequenceAnalysisModule; |
| 19 | +import org.labkey.sequenceanalysis.util.SequenceUtil; |
| 20 | + |
| 21 | +import java.io.File; |
| 22 | +import java.io.IOException; |
| 23 | +import java.util.ArrayList; |
| 24 | +import java.util.Arrays; |
| 25 | +import java.util.List; |
| 26 | + |
| 27 | +//https://github.com/hall-lab/svtyper |
| 28 | + |
| 29 | +public class Graphtyper extends AbstractParameterizedOutputHandler<SequenceOutputHandler.SequenceOutputProcessor> |
| 30 | +{ |
| 31 | + public Graphtyper() |
| 32 | + { |
| 33 | + super(ModuleLoader.getInstance().getModule(SequenceAnalysisModule.class), "Graphtyper (SV Genotyping)", "This will run Graphtyper on one or more BAM files to genotype SVs", null, Arrays.asList( |
| 34 | + ToolParameterDescriptor.createExpDataParam("svVCF", "Input VCF", "This is the DataId of the VCF containing the SVs to genotype", "ldk-expdatafield", new JSONObject() |
| 35 | + {{ |
| 36 | + put("allowBlank", false); |
| 37 | + }}, null), |
| 38 | + ToolParameterDescriptor.create("useOutputFileContainer", "Submit to Source File Workbook", "If checked, each job will be submitted to the same workbook as the input file, as opposed to submitting all jobs to the same workbook. This is primarily useful if submitting a large batch of files to process separately. This only applies if 'Run Separately' is selected.", "checkbox", new JSONObject(){{ |
| 39 | + put("checked", true); |
| 40 | + }}, true) |
| 41 | + )); |
| 42 | + } |
| 43 | + |
| 44 | + @Override |
| 45 | + public boolean doSplitJobs() |
| 46 | + { |
| 47 | + return true; |
| 48 | + } |
| 49 | + |
| 50 | + @Override |
| 51 | + public boolean canProcess(SequenceOutputFile o) |
| 52 | + { |
| 53 | + return o.getFile() != null && o.getFile().exists() && SequenceUtil.FILETYPE.bamOrCram.getFileType().isType(o.getFile()); |
| 54 | + } |
| 55 | + |
| 56 | + @Override |
| 57 | + public boolean doRunRemote() |
| 58 | + { |
| 59 | + return true; |
| 60 | + } |
| 61 | + |
| 62 | + @Override |
| 63 | + public boolean doRunLocal() |
| 64 | + { |
| 65 | + return false; |
| 66 | + } |
| 67 | + |
| 68 | + @Override |
| 69 | + public SequenceOutputProcessor getProcessor() |
| 70 | + { |
| 71 | + return new Processor(); |
| 72 | + } |
| 73 | + |
| 74 | + public static class Processor implements SequenceOutputProcessor |
| 75 | + { |
| 76 | + @Override |
| 77 | + public void processFilesOnWebserver(PipelineJob job, SequenceAnalysisJobSupport support, List<SequenceOutputFile> inputFiles, JSONObject params, File outputDir, List<RecordedAction> actions, List<SequenceOutputFile> outputsToCreate) throws UnsupportedOperationException, PipelineJobException |
| 78 | + { |
| 79 | + |
| 80 | + } |
| 81 | + |
| 82 | + @Override |
| 83 | + public void processFilesRemote(List<SequenceOutputFile> inputFiles, JobContext ctx) throws UnsupportedOperationException, PipelineJobException |
| 84 | + { |
| 85 | + int svVcfId = ctx.getParams().optInt("svVCF", 0); |
| 86 | + if (svVcfId == 0) |
| 87 | + { |
| 88 | + throw new PipelineJobException("svVCF param was null"); |
| 89 | + } |
| 90 | + |
| 91 | + File svVcf = ctx.getSequenceSupport().getCachedData(svVcfId); |
| 92 | + if (svVcf == null) |
| 93 | + { |
| 94 | + throw new PipelineJobException("File not found for ID: " + svVcfId); |
| 95 | + } |
| 96 | + else if (!svVcf.exists()) |
| 97 | + { |
| 98 | + throw new PipelineJobException("Missing file: " + svVcf.getPath()); |
| 99 | + } |
| 100 | + |
| 101 | + Integer threads = SequencePipelineService.get().getMaxThreads(ctx.getLogger()); |
| 102 | + |
| 103 | + for (SequenceOutputFile so : inputFiles) |
| 104 | + { |
| 105 | + List<String> args = new ArrayList<>(); |
| 106 | + SimpleScriptWrapper wrapper = new SimpleScriptWrapper(ctx.getLogger()); |
| 107 | + args.add(AbstractCommandWrapper.resolveFileInPath("graphtyper", null, true).getPath()); |
| 108 | + args.add("genotype_sv"); |
| 109 | + |
| 110 | + ReferenceGenome rg = ctx.getSequenceSupport().getCachedGenome(so.getLibrary_id()); |
| 111 | + if (rg == null) |
| 112 | + { |
| 113 | + throw new PipelineJobException("Missing reference genome: " + so.getLibrary_id()); |
| 114 | + } |
| 115 | + |
| 116 | + args.add(rg.getWorkingFastaFile().getPath()); |
| 117 | + args.add(svVcf.toString()); |
| 118 | + |
| 119 | + args.add("--sam"); |
| 120 | + args.add(so.getFile().getPath()); |
| 121 | + |
| 122 | + if (threads != null) |
| 123 | + { |
| 124 | + args.add("--threads"); |
| 125 | + args.add(threads.toString()); |
| 126 | + } |
| 127 | + |
| 128 | + wrapper.execute(args); |
| 129 | + |
| 130 | + File genotypes = new File(ctx.getWorkingDirectory(), "sv_results/" + SequenceAnalysisService.get().getUnzippedBaseName(so.getName()) + ".vcf.gz"); |
| 131 | + if (!genotypes.exists()) |
| 132 | + { |
| 133 | + throw new PipelineJobException("Missing file: " + genotypes.getPath()); |
| 134 | + } |
| 135 | + |
| 136 | + try |
| 137 | + { |
| 138 | + SequenceAnalysisService.get().ensureVcfIndex(genotypes, ctx.getLogger()); |
| 139 | + } |
| 140 | + catch (IOException e) |
| 141 | + { |
| 142 | + throw new PipelineJobException(e); |
| 143 | + } |
| 144 | + |
| 145 | + ctx.getFileManager().addSequenceOutput(genotypes, "Graphtyper Genotypes: " + so.getName(), "Graphtyper Genoypes", so.getReadset(), null, so.getLibrary_id(), "Input VCF: " + svVcf.getName() + " (" + svVcfId + ")"); |
| 146 | + } |
| 147 | + } |
| 148 | + } |
| 149 | +} |
0 commit comments