From 26281687e8f19a51dabc48dc74822aa8b43ea3d9 Mon Sep 17 00:00:00 2001
From: Jeltje <jeltje.van.baren@gmail.com>
Date: Mon, 27 Feb 2017 18:15:29 +0000
Subject: [PATCH 1/3] upgraded cwl to v1.0, add option to input gzipped genome,
 change default docker container to quay.io, add test input Dockerfile.json

---
 Dockstore.json                | 18 ++++++++++
 mutect.cwl.yaml => mutect.cwl | 64 ++++++++++++++++++-----------------
 mutect.py                     | 13 ++++++-
 3 files changed, 63 insertions(+), 32 deletions(-)
 create mode 100644 Dockstore.json
 rename mutect.cwl.yaml => mutect.cwl (62%)

diff --git a/Dockstore.json b/Dockstore.json
new file mode 100644
index 0000000..6b45ccd
--- /dev/null
+++ b/Dockstore.json
@@ -0,0 +1,18 @@
+{
+  "reference": {
+    "path": "http://hgwdev.cse.ucsc.edu/~jeltje/public_data/genome.fa.gz",
+    "class": "File"
+  },
+  "normal": {
+    "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143_BL.bam",
+    "class": "File"
+  },
+  "tumor": {
+    "path": "https://dcc.icgc.org/api/v1/download?fn=/PCAWG/reference_data/data_for_testing/HCC1143_ds/HCC1143.bam",
+    "class": "File"
+  },
+  "mutations": {
+    "path": "/tmp/mutect.vcf",
+    "class": "File"
+  }
+}
diff --git a/mutect.cwl.yaml b/mutect.cwl
similarity index 62%
rename from mutect.cwl.yaml
rename to mutect.cwl
index 86ffdd7..4d6ef46 100644
--- a/mutect.cwl.yaml
+++ b/mutect.cwl
@@ -1,79 +1,81 @@
 cwlVersion: v1.0
 class: CommandLineTool
-label: MuTect
+
+doc: "Mutect 1.1.5"
+
+hints:
+  DockerRequirement:
+    dockerPull: quay.io/opengenomics/mutect
+
 baseCommand: ['python', '/opt/mutect.py']
-requirements:
-  - class: "DockerRequirement"
-    dockerImageId: "mutect:1.1.5"
+
 inputs:
-  - id: "#tumor"
+  tumor:
     type: File
     inputBinding:
       prefix: --input_file:tumor
-    secondaryFiles:
-      - .bai
-  - id: "#normal"
+  normal:
     type: File
     inputBinding:
       prefix: --input_file:normal
-    secondaryFiles:
-      - .bai
-  - id: "#reference"
+  reference:
     type: File
     inputBinding:
       prefix: --reference_sequence
-    secondaryFiles:
-      - .fai
-      - ^.dict
-  - id: "#cosmic"
-    type: File
+  cosmic:
+    type: File?
     inputBinding:
       prefix: --cosmic
-  - id: "#dbsnp"
-    type: File
+  dbsnp:
+    type: File?
     inputBinding:
       prefix: --dbsnp
     secondaryFiles: .tbi
-  - id: "#tumor_lod"
-    type: float
+  tumor_lod:
+    type: float?
     default: 6.3
     inputBinding:
       prefix: --tumor_lod
-  - id: "#initial_tumor_lod"
-    type: float
+  initial_tumor_lod:
+    type: float?
     default: 4.0
     inputBinding:
       prefix: --initial_tumor_lod
-  - id: "#out"
-    type: string
+  ncpus:
+    type: int?
+    inputBinding:
+      position: 2
+      prefix: --ncpus
+  out:
+    type: string?
     default: call_stats.txt
     inputBinding:
       prefix: --out
-  - id: "#coverage_file"
-    type: string
+  coverage_file:
+    type: string?
     default: coverage.wig.txt
     inputBinding:
       prefix: --coverage_file
-  - id: "#vcf"
-    type: string
+  vcf:
+    type: string?
     default: mutations.vcf
     inputBinding:
       prefix: --vcf
 
 outputs:
-  - id: "#coverage"
+  coverage:
     type: File
     outputBinding:
       glob: $(inputs.coverage_file)
 
 
-  - id: "#call_stats"
+  call_stats:
     type: File
     outputBinding:
       glob: $(inputs.out)
 
         
-  - id: "#mutations"
+  mutations:
     type: File
     outputBinding:
       glob: $(inputs.vcf)
diff --git a/mutect.py b/mutect.py
index 39b99f3..cd73491 100755
--- a/mutect.py
+++ b/mutect.py
@@ -9,9 +9,17 @@
 import vcf
 import argparse
 import logging
+import gzip
 from string import Template
 from multiprocessing import Pool
 
+def gunzip(infile, outfile):
+    inF = gzip.GzipFile(infile, 'rb')
+    s = inF.read()
+    inF.close()
+    with open(outfile, 'wb') as outF:
+        outF.write(s)
+
 def fai_chunk(path, blocksize):
     seq_map = {}
     with open( path ) as handle:
@@ -121,7 +129,10 @@ def run_mutect(args):
 
     ref_seq = os.path.join(workdir, "ref_genome.fasta")
     ref_dict = os.path.join(workdir, "ref_genome.dict")
-    os.symlink(os.path.abspath(args['reference_sequence']), ref_seq)
+    if args['reference_sequence'].endswith('.gz'):
+        gunzip(args['reference_sequence'], ref_seq)
+    else:
+        os.symlink(os.path.abspath(args['reference_sequence']), ref_seq)
     subprocess.check_call( ["/usr/bin/samtools", "faidx", ref_seq] )
     subprocess.check_call( [args['java'], "-jar",
         args['dict_jar'],

From a0e6c8d2c683e9b3f93123ef662dc44870390357 Mon Sep 17 00:00:00 2001
From: Jeltje <jeltje.van.baren@gmail.com>
Date: Wed, 1 Mar 2017 02:28:38 +0000
Subject: [PATCH 2/3] changed gunzip to zcat to avoid memory issues with larger
 files

---
 mutect.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/mutect.py b/mutect.py
index cd73491..8725146 100755
--- a/mutect.py
+++ b/mutect.py
@@ -9,16 +9,17 @@
 import vcf
 import argparse
 import logging
-import gzip
 from string import Template
 from multiprocessing import Pool
 
 def gunzip(infile, outfile):
-    inF = gzip.GzipFile(infile, 'rb')
-    s = inF.read()
-    inF.close()
-    with open(outfile, 'wb') as outF:
-        outF.write(s)
+    cmd = (' ').join(['zcat', infile])
+    with open(outfile, 'w') as outF:
+        p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE)
+    stdout,stderr =  p.communicate()
+    if len(stderr):
+        print "unzip command failed:", stderr
+        raise Exception("unzip failed")
 
 def fai_chunk(path, blocksize):
     seq_map = {}

From 0acf5aa47265eb8427c228edcbbfe2f068aeb666 Mon Sep 17 00:00:00 2001
From: Jeltje <jeltje.van.baren@gmail.com>
Date: Tue, 4 Apr 2017 17:39:34 +0000
Subject: [PATCH 3/3] removed redundant parentheses

---
 mutect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mutect.py b/mutect.py
index 8725146..6fb894c 100755
--- a/mutect.py
+++ b/mutect.py
@@ -13,7 +13,7 @@
 from multiprocessing import Pool
 
 def gunzip(infile, outfile):
-    cmd = (' ').join(['zcat', infile])
+    cmd = ' '.join(['zcat', infile])
     with open(outfile, 'w') as outF:
         p = subprocess.Popen(cmd, shell=True, stdout=outF, stderr=subprocess.PIPE)
     stdout,stderr =  p.communicate()