1+ package org .labkey .sequenceanalysis .run .analysis ;
2+
3+ import org .apache .logging .log4j .Logger ;
4+ import org .jetbrains .annotations .Nullable ;
5+ import org .labkey .api .pipeline .PipelineJobException ;
6+ import org .labkey .api .sequenceanalysis .model .AnalysisModel ;
7+ import org .labkey .api .sequenceanalysis .model .Readset ;
8+ import org .labkey .api .sequenceanalysis .pipeline .AbstractAnalysisStepProvider ;
9+ import org .labkey .api .sequenceanalysis .pipeline .AbstractPipelineStep ;
10+ import org .labkey .api .sequenceanalysis .pipeline .AnalysisOutputImpl ;
11+ import org .labkey .api .sequenceanalysis .pipeline .AnalysisStep ;
12+ import org .labkey .api .sequenceanalysis .pipeline .PipelineContext ;
13+ import org .labkey .api .sequenceanalysis .pipeline .PipelineStepProvider ;
14+ import org .labkey .api .sequenceanalysis .pipeline .ReferenceGenome ;
15+ import org .labkey .api .sequenceanalysis .pipeline .SamtoolsIndexer ;
16+ import org .labkey .api .sequenceanalysis .pipeline .SamtoolsRunner ;
17+ import org .labkey .api .sequenceanalysis .pipeline .SequencePipelineService ;
18+ import org .labkey .api .sequenceanalysis .run .SimpleScriptWrapper ;
19+ import org .labkey .sequenceanalysis .util .SequenceUtil ;
20+
21+ import java .io .File ;
22+ import java .util .ArrayList ;
23+ import java .util .List ;
24+
25+ public class SawfishAnalysis extends AbstractPipelineStep implements AnalysisStep
26+ {
27+ public SawfishAnalysis (PipelineStepProvider <?> provider , PipelineContext ctx )
28+ {
29+ super (provider , ctx );
30+ }
31+
32+ public static class Provider extends AbstractAnalysisStepProvider <SawfishAnalysis >
33+ {
34+ public Provider ()
35+ {
36+ super ("sawfish" , "Sawfish Analysis" , null , "This will run sawfish SV dicvoery and calling on the selected BAMs" , List .of (), null , null );
37+ }
38+
39+
40+ @ Override
41+ public SawfishAnalysis create (PipelineContext ctx )
42+ {
43+ return new SawfishAnalysis (this , ctx );
44+ }
45+ }
46+
47+ @ Override
48+ public Output performAnalysisPerSampleRemote (Readset rs , File inputBam , ReferenceGenome referenceGenome , File outputDir ) throws PipelineJobException
49+ {
50+ AnalysisOutputImpl output = new AnalysisOutputImpl ();
51+
52+ File inputFile = inputBam ;
53+ if (SequenceUtil .FILETYPE .cram .getFileType ().isType (inputFile ))
54+ {
55+ CramToBam samtoolsRunner = new CramToBam (getPipelineCtx ().getLogger ());
56+ File bam = new File (getPipelineCtx ().getWorkingDirectory (), inputFile .getName ().replaceAll (".cram$" , ".bam" ));
57+ File bamIdx = new File (bam .getPath () + ".bai" );
58+ if (!bamIdx .exists ())
59+ {
60+ samtoolsRunner .convert (inputFile , bam , referenceGenome .getWorkingFastaFile (), SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ()));
61+ new SamtoolsIndexer (getPipelineCtx ().getLogger ()).execute (bam );
62+ }
63+ else
64+ {
65+ getPipelineCtx ().getLogger ().debug ("BAM index exists, will not re-convert CRAM" );
66+ }
67+
68+ inputFile = bam ;
69+
70+ output .addIntermediateFile (bam );
71+ output .addIntermediateFile (bamIdx );
72+ }
73+
74+ List <String > args = new ArrayList <>();
75+ args .add (getExe ().getPath ());
76+ args .add ("discover" );
77+
78+ args .add ("--bam" );
79+ args .add (inputFile .getPath ());
80+
81+ args .add ("--ref" );
82+ args .add (referenceGenome .getWorkingFastaFile ().getPath ());
83+
84+ File svOutDir = new File (outputDir , "sawfish" );
85+ args .add ("--output-dir" );
86+ args .add (svOutDir .getPath ());
87+
88+ Integer maxThreads = SequencePipelineService .get ().getMaxThreads (getPipelineCtx ().getLogger ());
89+ if (maxThreads != null )
90+ {
91+ args .add ("--threads" );
92+ args .add (String .valueOf (maxThreads ));
93+ }
94+
95+ File bcf = new File (svOutDir , "candidate.sv.bcf" );
96+ File bcfIdx = new File (bcf .getPath () + ".csi" );
97+ if (bcfIdx .exists ())
98+ {
99+ getPipelineCtx ().getLogger ().debug ("BCF index already exists, reusing output" );
100+ }
101+ else
102+ {
103+ new SimpleScriptWrapper (getPipelineCtx ().getLogger ()).execute (args );
104+ }
105+
106+ if (!bcf .exists ())
107+ {
108+ throw new PipelineJobException ("Unable to find file: " + bcf .getPath ());
109+ }
110+
111+ output .addSequenceOutput (bcf , rs .getName () + ": sawfish" , "Sawfish SV Discovery" , rs .getReadsetId (), null , referenceGenome .getGenomeId (), null );
112+
113+ return output ;
114+ }
115+
116+ @ Override
117+ public Output performAnalysisPerSampleLocal (AnalysisModel model , File inputBam , File referenceFasta , File outDir ) throws PipelineJobException
118+ {
119+ return null ;
120+ }
121+
122+ private File getExe ()
123+ {
124+ return SequencePipelineService .get ().getExeForPackage ("SAWFISHPATH" , "sawfish" );
125+ }
126+
127+ private static class CramToBam extends SamtoolsRunner
128+ {
129+ public CramToBam (Logger log )
130+ {
131+ super (log );
132+ }
133+
134+ public void convert (File inputCram , File outputBam , File fasta , @ Nullable Integer threads ) throws PipelineJobException
135+ {
136+ getLogger ().info ("Converting CRAM to BAM" );
137+
138+ execute (getParams (inputCram , outputBam , fasta , threads ));
139+ }
140+
141+ private List <String > getParams (File inputCram , File outputBam , File fasta , @ Nullable Integer threads )
142+ {
143+ List <String > params = new ArrayList <>();
144+ params .add (getSamtoolsPath ().getPath ());
145+ params .add ("view" );
146+ params .add ("-b" );
147+ params .add ("-T" );
148+ params .add (fasta .getPath ());
149+ params .add ("-o" );
150+ params .add (outputBam .getPath ());
151+
152+ if (threads != null )
153+ {
154+ params .add ("-@" );
155+ params .add (String .valueOf (threads ));
156+ }
157+
158+ params .add (inputCram .getPath ());
159+
160+ return params ;
161+ }
162+ }
163+ }
0 commit comments