From 26281687e8f19a51dabc48dc74822aa8b43ea3d9 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Mon, 27 Feb 2017 18:15:29 +0000 Subject: [PATCH 1/3] upgraded cwl to v1.0, add option to input gzipped genome, change default docker container to quay.io, add test input Dockerfile.json --- Dockstore.json | 18 ++++++++++ mutect.cwl.yaml => mutect.cwl | 64 ++++++++++++++++++----------------- mutect.py | 13 ++++++- 3 files changed, 63 insertions(+), 32 deletions(-) create mode 100644 Dockstore.json rename mutect.cwl.yaml => mutect.cwl (62%) diff --git a/Dockstore.json b/Dockstore.json new file mode 100644 index 0000000..6b45ccd --- /dev/null +++ b/Dockstore.json @@ -0,0 +1,18 @@ +{ + "reference": { + "path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz", + "class": "File" + }, + "normal": { + "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam", + "class": "File" + }, + "tumor": { + "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam", + "class": "File" + }, + "mutations": { + "path": "/tmp/mutect.vcf", + "class": "File" + } +} diff --git a/mutect.cwl.yaml b/mutect.cwl similarity index 62% rename from mutect.cwl.yaml rename to mutect.cwl index 86ffdd7..4d6ef46 100644 --- a/mutect.cwl.yaml +++ b/mutect.cwl @@ -1,79 +1,81 @@ cwlVersion: v1.0 class: CommandLineTool -label: MuTect + +doc: "Mutect 1.1.5" + +hints: + DockerRequirement: + dockerPull: quay.io/opengenomics/mutect + baseCommand: ['python', '/opt/mutect.py'] -requirements: - - class: "DockerRequirement" - dockerImageId: "mutect:1.1.5" + inputs: - - id: "#tumor" + tumor: type: File inputBinding: prefix: --input_file:tumor - secondaryFiles: - - .bai - - id: "#normal" + normal: type: File inputBinding: prefix: --input_file:normal - secondaryFiles: - - .bai - - id: "#reference" + reference: type: File inputBinding: prefix: --reference_sequence - secondaryFiles: - - .fai - - ^.dict - - id: "#cosmic" - type: File + cosmic: + type: File? inputBinding: prefix: --cosmic - - id: "#dbsnp" - type: File + dbsnp: + type: File? inputBinding: prefix: --dbsnp secondaryFiles: .tbi - - id: "#tumor_lod" - type: float + tumor_lod: + type: float? default: 6.3 inputBinding: prefix: --tumor_lod - - id: "#initial_tumor_lod" - type: float + initial_tumor_lod: + type: float? default: 4.0 inputBinding: prefix: --initial_tumor_lod - - id: "#out" - type: string + ncpus: + type: int? + inputBinding: + position: 2 + prefix: --ncpus + out: + type: string? default: call_stats.txt inputBinding: prefix: --out - - id: "#coverage_file" - type: string + coverage_file: + type: string? default: coverage.wig.txt inputBinding: prefix: --coverage_file - - id: "#vcf" - type: string + vcf: + type: string? default: mutations.vcf inputBinding: prefix: --vcf outputs: - - id: "#coverage" + coverage: type: File outputBinding: glob: $(inputs.coverage_file) - - id: "#call_stats" + call_stats: type: File outputBinding: glob: $(inputs.out) - - id: "#mutations" + mutations: type: File outputBinding: glob: $(inputs.vcf) diff --git a/mutect.py b/mutect.py index 39b99f3..cd73491 100755 --- a/mutect.py +++ b/mutect.py @@ -9,9 +9,17 @@ import vcf import argparse import logging +import gzip from string import Template from multiprocessing import Pool +def gunzip(infile, outfile): + inF = gzip.GzipFile(infile, 'rb') + s = inF.read() + inF.close() + with open(outfile, 'wb') as outF: + outF.write(s) + def fai_chunk(path, blocksize): seq_map = {} with open( path ) as handle: @@ -121,7 +129,10 @@ def run_mutect(args): ref_seq = os.path.join(workdir, "ref_genome.fasta") ref_dict = os.path.join(workdir, "ref_genome.dict") - os.symlink(os.path.abspath(args['reference_sequence']), ref_seq) + if args['reference_sequence'].endswith('.gz'): + gunzip(args['reference_sequence'], ref_seq) + else: + os.symlink(os.path.abspath(args['reference_sequence']), ref_seq) subprocess.check_call( ["/usr/bin/samtools", "faidx", ref_seq] ) subprocess.check_call( [args['java'], "-jar", args['dict_jar'], From a0e6c8d2c683e9b3f93123ef662dc44870390357 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Wed, 1 Mar 2017 02:28:38 +0000 Subject: [PATCH 2/3] changed gunzip to zcat to avoid memory issues with larger files --- mutect.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/mutect.py b/mutect.py index cd73491..8725146 100755 --- a/mutect.py +++ b/mutect.py @@ -9,16 +9,17 @@ import vcf import argparse import logging -import gzip from string import Template from multiprocessing import Pool def gunzip(infile, outfile): - inF = gzip.GzipFile(infile, 'rb') - s = inF.read() - inF.close() - with open(outfile, 'wb') as outF: - outF.write(s) + cmd = (' ').join(['zcat', infile]) + with open(outfile, 'w') as outF: + p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) + stdout,stderr = p.communicate() + if len(stderr): + print "unzip command failed:", stderr + raise Exception("unzip failed") def fai_chunk(path, blocksize): seq_map = {} From 0acf5aa47265eb8427c228edcbbfe2f068aeb666 Mon Sep 17 00:00:00 2001 From: Jeltje Date: Tue, 4 Apr 2017 17:39:34 +0000 Subject: [PATCH 3/3] removed redundant parentheses --- mutect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mutect.py b/mutect.py index 8725146..6fb894c 100755 --- a/mutect.py +++ b/mutect.py @@ -13,7 +13,7 @@ from multiprocessing import Pool def gunzip(infile, outfile): - cmd = (' ').join(['zcat', infile]) + cmd = ' '.join(['zcat', infile]) with open(outfile, 'w') as outF: p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE) stdout,stderr = p.communicate()