Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ FROM ubuntu

RUN apt-get update
RUN apt-get install -y git wget make gcc zlib1g-dev ncurses-dev g++ python python-pip
RUN pip install PyVCF
RUN pip install PyVCF subprocess32

WORKDIR /opt

RUN mkdir /opt/bin
RUN mkdir /opt/bin
RUN wget https://github.com/samtools/samtools/releases/download/1.2/samtools-1.2.tar.bz2
RUN tar xvjf samtools-1.2.tar.bz2
RUN cd /opt/samtools-1.2 && make && make install
Expand Down
26 changes: 26 additions & 0 deletions Dockstore.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"reference": {
"path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz",
"class": "File"
},
"normal": {
"path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam",
"class": "File"
},
"tumor": {
"path":"https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam",
"class": "File"
},
"somatic_vcf": {
"path": "/tmp/somatic.vcf",
"class": "File"
},
"vcf": {
"path": "/tmp/pindel.vcf",
"class": "File"
},
"rawFile": {
"path": "/tmp/pindel.raw",
"class": "File"
}
}
130 changes: 68 additions & 62 deletions pindel-somatic.cwl.yaml → pindel-somatic.cwl
Original file line number Diff line number Diff line change
@@ -1,190 +1,196 @@

class: CommandLineTool
label: Pindel-Somatic
cwlVersion: v1.0
class: CommandLineTool

doc: "pindel somatic mutation calling"

baseCommand: [python, /opt/pindel.py, -t, NORMAL, -t TUMOR]
requirements:
- class: "DockerRequirement"
dockerPull: "opengenomics/pindel:0.2.5b8"

hints:
DockerRequirement:
dockerPull: quay.io/opengenomics/pindel

inputs:
- id: "normal"
normal:
type: File
inputBinding:
prefix: -b

- id: "tumor"
tumor:
type: File
inputBinding:
prefix: -b

- id: "reference"
reference:
type: File
doc: |
the genome reference file
inputBinding:
prefix: -r

- id: "centromere"
type: File
centromere:
type: File?
doc: |
list of regions to exclude (chr, start, end)
inputBinding:
prefix: -J

- id: "referenceName"
type: string
default: HG19
referenceName:
type: string?
doc: |
the genome reference ID, e.g. HG19 (default: genome)
inputBinding:
prefix: -R

- id: "windowSize"
type: int
windowSize:
type: int?
default: 2
inputBinding:
prefix: --window_size

- id: "procs"
type: int
procs:
type: int?
default: 2
inputBinding:
prefix: --number_of_procs

- id: "normal_insert_size"
type: ['null', int]
normal_insert_size:
type: int?
inputBinding:
prefix: -s

- id: "tumor_insert_size"
type: ['null', int]
tumor_insert_size:
type: int?
inputBinding:
prefix: -s

- id: "report_inversions"
report_inversions:
type: boolean?
default: false
inputBinding:
prefix: --report_inversions

- id: "report_duplications"
report_duplications:
type: boolean?
default: false
inputBinding:
prefix: --report_duplications

- id: "report_long_insertions"
report_long_insertions:
type: boolean?
default: false
inputBinding:
prefix: --report_long_insertions

- id: "report_breakpoints"
report_breakpoints:
type: boolean?
default: false
inputBinding:
prefix: --report_breakpoints

- id: "report_only_close_mapped_reads"
report_only_close_mapped_reads:
type: boolean?
default: false
inputBinding:
prefix: -S

- id: "outputRawFile"
type: string
outputRawFile:
type: string?
default: pindel.raw
inputBinding:
prefix: -o1

- id: "outputVcfFile"
type: string
outputVcfFile:
type: string?
default: pindel.vcf
inputBinding:
prefix: -o2

- id: "outputSomaticVcfFile"
type: string
outputSomaticVcfFile:
type: string?
default: pindel_somatic.vcf
inputBinding:
prefix: -o3

- id: "somatic_vaf"
type: float
somatic_vaf:
type: float?
default: 0.08
inputBinding:
prefix: --somatic_vaf

- id: "somatic_cov"
type: int
somatic_cov:
type: int?
default: 20
inputBinding:
prefix: --somatic_cov

- id: "somatic_hom"
type: int
somatic_hom:
type: int?
default: 6
inputBinding:
prefix: --somatic_hom

- id: "min_inversion_size"
type: int
min_inversion_size:
type: int?
default: 50
inputBinding:
prefix: --min_inversion_size

- id: min_num_matched_bases
type: int
min_num_matched_bases:
type: int?
default: 30
inputBinding:
prefix: -d

- id: max_range_index
type: int
max_range_index:
type: int?
default: 4
inputBinding:
prefix: -x

- id: additional_mismatch
type: int
additional_mismatch:
type: int?
default: 1
inputBinding:
prefix: -a

- id: min_perfect_match_around_BP
type: int
min_perfect_match_around_BP:
type: int?
default: 3
inputBinding:
prefix: -m

- id: sequencing_error_rate
type: float
sequencing_error_rate:
type: float?
default: 0.01
inputBinding:
prefix: --sequencing_error_rate

- id: maximum_allowed_mismatch_rate
type: float
maximum_allowed_mismatch_rate:
type: float?
default: 0.02
inputBinding:
prefix: -u

- id: sensitivity
type: float
sensitivity:
type: float?
default: 0.95
inputBinding:
prefix: --sensitivity


outputs:
- id: "vcf"
vcf:
type: File
outputBinding:
glob: pindel.vcf
glob: $(inputs.outputVcfFile)

- id: "somatic_vcf"
somatic_vcf:
type: File
outputBinding:
glob: pindel_somatic.vcf
glob: $(inputs.outputSomaticVcfFile)

- id: "rawFile"
rawFile:
type: File
outputBinding:
glob: pindel.raw
glob: $(inputs.outputRawFile)


18 changes: 17 additions & 1 deletion pindel.py
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,15 @@
from multiprocessing import Pool
import vcf

def gunzip(infile, outfile):
cmd = ' '.join(['zcat', infile])
with open(outfile, 'w') as outF:
p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE)
stdout,stderr = p.communicate()
if len(stderr):
print "unzip command failed:", stderr
raise Exception("unzip failed")

def execute(cmd, output=None):
import subprocess, sys, shlex
# function to execute a cmd and report if an error occur
Expand Down Expand Up @@ -181,7 +190,7 @@ def __main__():
logging.basicConfig(level=logging.INFO)
time.sleep(1) #small hack, sometimes it seems like docker file systems aren't avalible instantly
parser = argparse.ArgumentParser(description='')
parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file')
parser.add_argument('-r', dest='inputFastaFile', required=True, help='the reference file, can be gzipped')
parser.add_argument('-R', dest='inputFastaName', default="genome", help='the reference name')

parser.add_argument('-b', dest='inputBamFiles', default=[], action="append", help='the bam file')
Expand Down Expand Up @@ -268,6 +277,13 @@ def __main__():
seq_hash = {}
newInputFiles = []
i = 0
# unzip input genome if necessary
if args.inputFastaFile.endswith('.gz'):
new_ref = os.path.join(args.workdir, "reference.fa")
gunzip(args.inputFastaFile, new_ref)
subprocess.check_call( ["samtools", "faidx", new_ref] )
args.inputFastaFile = new_ref

#make sure the BAMs are indexed and get the mean insert sizes
for inputBamFile, inputBamIndex, insertSize, sampleTag in zip(inputBamFiles, inputBamFileIndexes, insertSizes, sampleTags ):
inputFastaFile, inputBamFile = indexBam(args.workdir, args.inputFastaFile, inputBamFile, i, inputBamIndex)
Expand Down