def _get_sequence_format(self):
"""Guess sequence format"""
- projects = glob(os.path.join(self.pathname, 'Project_*'))
+ project_glob = os.path.join(self.pathname, 'Project_*')
+ LOGGER.debug("Scanning: %s" % (project_glob,))
+ projects = glob(project_glob)
if len(projects) > 0:
# Hey we look like a demultiplexed run
return 'fastq'
+ seqs = glob(os.path.join(self.pathname, '*_seq.txt'))
+ if len(seqs) > 0:
+ return 'srf'
return 'qseq'
- #qseqs = glob(os.path.join(self.pathname, '*_qseq.txt'))
- #if len(qseqs) > 0:
- # return 'qseq'
- #seqs = glob(os.path.join(self.pathname, '*_seq.txt'))
- #if len(seqs) > 0:
- # return 'srf'
- return None
sequence_format = property(_get_sequence_format)
def _get_software_version(self):
output_base_dir = os.getcwd()
for r in runs:
- result_dir = os.path.join(output_base_dir, r.flowcell_id)
- LOGGER.info("Using %s as result directory" % (result_dir,))
- if not os.path.exists(result_dir):
- os.mkdir(result_dir)
-
- # create cycle_dir
- cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
- LOGGER.info("Filling in %s" % (cycle,))
- cycle_dir = os.path.join(result_dir, cycle)
- cycle_dir = os.path.abspath(cycle_dir)
- if os.path.exists(cycle_dir):
- LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
- continue
- else:
- os.mkdir(cycle_dir)
-
- # save run file
- r.save(cycle_dir)
-
- # save illumina flowcell status report
- save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
- cycle_dir)
-
- # save stuff from bustard
- # grab IVC plot
- save_ivc_plot(r.bustard, cycle_dir)
-
- # build base call saving commands
- if site is not None:
- save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
-
- # save stuff from GERALD
- # copy stuff out of the main run
- g = r.gerald
-
- # save summary file
- save_summary_file(r, cycle_dir)
-
- # compress eland result files
- compress_eland_results(g, cycle_dir, num_jobs)
-
- # md5 all the compressed files once we're done
- md5_commands = srf.make_md5_commands(cycle_dir)
- srf.run_commands(cycle_dir, md5_commands, num_jobs)
+ result_dir = os.path.join(output_base_dir, r.flowcell_id)
+ LOGGER.info("Using %s as result directory" % (result_dir,))
+ if not os.path.exists(result_dir):
+ os.mkdir(result_dir)
+
+ # create cycle_dir
+ cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
+ LOGGER.info("Filling in %s" % (cycle,))
+ cycle_dir = os.path.join(result_dir, cycle)
+ cycle_dir = os.path.abspath(cycle_dir)
+ if os.path.exists(cycle_dir):
+ LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
+ continue
+ else:
+ os.mkdir(cycle_dir)
+
+ # save run file
+ r.save(cycle_dir)
+
+ # save illumina flowcell status report
+ save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
+ cycle_dir)
+
+ # save stuff from bustard
+ # grab IVC plot
+ save_ivc_plot(r.bustard, cycle_dir)
+
+ # build base call saving commands
+ if site is not None:
+ save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
+
+ # save stuff from GERALD
+ # copy stuff out of the main run
+ g = r.gerald
+
+ # save summary file
+ save_summary_file(r, cycle_dir)
+
+ # compress eland result files
+ compress_eland_results(g, cycle_dir, num_jobs)
+
+ # md5 all the compressed files once we're done
+ md5_commands = srf.make_md5_commands(cycle_dir)
+ srf.run_commands(cycle_dir, md5_commands, num_jobs)
def save_raw_data(num_jobs, r, site, raw_format, cycle_dir):
lanes = []
parser.add_option('--site', default=None,
help='create srf files tagged with the provided '\
'site name')
- parser.add_option('--raw-format', dest="raw_format", default='qseq',
- choices=['qseq', 'srf', 'fastq'],
+ parser.add_option('--raw-format', dest="raw_format", default=None,
+ choices=['qseq', 'srf', 'fastq', None],
help='Specify which type of raw format to use. '
'Currently supported options: qseq, srf, fastq')
parser.add_option('-u', '--use-run', dest='use_run', default=None,