Skip to content

Commit eec6095

Browse files
committed
Fix output directory for genomic references and better precondition checks
1 parent aa78680 commit eec6095

File tree

2 files changed

+14
-8
lines changed

2 files changed

+14
-8
lines changed

rnaseq_pipeline/targets.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,17 @@ class RsemReference(luigi.Target):
1010
"""
1111
Represents the target of rsem-prepare-reference script.
1212
"""
13-
def __init__(self, prefix, taxon):
14-
self.prefix = prefix
13+
def __init__(self, path, taxon):
14+
self.path = path
1515
self.taxon = taxon
1616

17+
@property
18+
def prefix(self):
19+
return join(self.path, '{}_0'.format(self.taxon))
20+
1721
def exists(self):
18-
exts = ['grp', 'ti', 'seq', 'chrlist']
19-
return all(exists(join(self.prefix, '{}_0.{}'.format(self.taxon, ext)))
22+
exts = ['chrlist', 'grp', 'idx.fa', 'ng2.idx.fa', 'seq', 'ti', 'transcripts.fa']
23+
return all(exists(self.prefix + '.' + ext)
2024
for ext in exts)
2125

2226
class GemmaDatasetPlatform(luigi.Target):

rnaseq_pipeline/tasks.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,9 +194,11 @@ class PrepareReference(ScheduledExternalProgramTask):
194194
def input(self):
195195
genome_dir = join(cfg.OUTPUT_DIR, cfg.GENOMES, self.reference_id)
196196
gtf_files = glob(join(genome_dir, '*.gtf'))
197-
fasta_files = glob(join(genome_dir, '*.fn?a'))
197+
fasta_files = glob(join(genome_dir, '*.f*a')) # FIXME: this pattern is too broad
198198
if len(gtf_files) != 1:
199-
raise ValueError('Only one GTF file is expected in {}.'.format(genome_dir))
199+
raise ValueError('Exactly one GTF file is expected in {}.'.format(genome_dir))
200+
if len(fasta_files) < 1:
201+
raise ValueError('At least one FASTA (with .fa or .fna extension) file is expected in {}.'.format(genome_dir))
200202
return [luigi.LocalTarget(gtf_files[0]),
201203
[luigi.LocalTarget(f) for f in fasta_files]]
202204

@@ -212,12 +214,12 @@ def program_args(self):
212214

213215
args.extend([t.path for t in genome_fasta])
214216

215-
args.append(join(self.output().prefix))
217+
args.append(self.output().prefix)
216218

217219
return args
218220

219221
def run(self):
220-
os.makedirs(self.output().prefix, exist_ok=True)
222+
os.makedirs(self.output().path, exist_ok=True)
221223
return super().run()
222224

223225
def output(self):

0 commit comments

Comments
 (0)