Remove default sequence-format so the new auto-detector code is actually called
authorDiane Trout <diane@caltech.edu>
Mon, 16 Jul 2012 22:09:00 +0000 (15:09 -0700)
committerDiane Trout <diane@caltech.edu>
Mon, 16 Jul 2012 22:09:00 +0000 (15:09 -0700)
htsworkflow/pipelines/bustard.py
htsworkflow/pipelines/runfolder.py
scripts/htsw-runfolder

index eefaef4d82acc122b8fda2e35ffcbb9cfca100b5..2993d06e6b128b00e1beba1154a74c00b3463818 100644 (file)
@@ -241,18 +241,16 @@ class Bustard(object):
 
     def _get_sequence_format(self):
         """Guess sequence format"""
-        projects = glob(os.path.join(self.pathname, 'Project_*'))
+        project_glob = os.path.join(self.pathname, 'Project_*')
+        LOGGER.debug("Scanning: %s" % (project_glob,))
+        projects = glob(project_glob)
         if len(projects) > 0:
             # Hey we look like a demultiplexed run
             return 'fastq'
+        seqs = glob(os.path.join(self.pathname, '*_seq.txt'))
+        if len(seqs) > 0:
+            return 'srf'
         return 'qseq'
-        #qseqs = glob(os.path.join(self.pathname, '*_qseq.txt'))
-        #if len(qseqs) > 0:
-        #    return 'qseq'
-        #seqs = glob(os.path.join(self.pathname, '*_seq.txt'))
-        #if len(seqs) > 0:
-        #    return 'srf'
-        return None
     sequence_format = property(_get_sequence_format)
 
     def _get_software_version(self):
index 71d5b6d0a5ac3d1db396ecd061c340f2d1ef8441..1e46e177075fb30362cb32aed6b6d9598f8010d4 100644 (file)
@@ -583,50 +583,50 @@ def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1, r
         output_base_dir = os.getcwd()
 
     for r in runs:
-      result_dir = os.path.join(output_base_dir, r.flowcell_id)
-      LOGGER.info("Using %s as result directory" % (result_dir,))
-      if not os.path.exists(result_dir):
-        os.mkdir(result_dir)
-
-      # create cycle_dir
-      cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
-      LOGGER.info("Filling in %s" % (cycle,))
-      cycle_dir = os.path.join(result_dir, cycle)
-      cycle_dir = os.path.abspath(cycle_dir)
-      if os.path.exists(cycle_dir):
-        LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
-        continue
-      else:
-        os.mkdir(cycle_dir)
-
-      # save run file
-      r.save(cycle_dir)
-
-      # save illumina flowcell status report
-      save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
-                            cycle_dir)
-
-      # save stuff from bustard
-      # grab IVC plot
-      save_ivc_plot(r.bustard, cycle_dir)
-
-      # build base call saving commands
-      if site is not None:
-          save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
-
-      # save stuff from GERALD
-      # copy stuff out of the main run
-      g = r.gerald
-
-      # save summary file
-      save_summary_file(r, cycle_dir)
-
-      # compress eland result files
-      compress_eland_results(g, cycle_dir, num_jobs)
-
-      # md5 all the compressed files once we're done
-      md5_commands = srf.make_md5_commands(cycle_dir)
-      srf.run_commands(cycle_dir, md5_commands, num_jobs)
+        result_dir = os.path.join(output_base_dir, r.flowcell_id)
+        LOGGER.info("Using %s as result directory" % (result_dir,))
+        if not os.path.exists(result_dir):
+            os.mkdir(result_dir)
+
+        # create cycle_dir
+        cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
+        LOGGER.info("Filling in %s" % (cycle,))
+        cycle_dir = os.path.join(result_dir, cycle)
+        cycle_dir = os.path.abspath(cycle_dir)
+        if os.path.exists(cycle_dir):
+            LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
+            continue
+        else:
+            os.mkdir(cycle_dir)
+
+        # save run file
+        r.save(cycle_dir)
+
+        # save illumina flowcell status report
+        save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
+                              cycle_dir)
+
+        # save stuff from bustard
+        # grab IVC plot
+        save_ivc_plot(r.bustard, cycle_dir)
+
+        # build base call saving commands
+        if site is not None:
+            save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
+
+        # save stuff from GERALD
+        # copy stuff out of the main run
+        g = r.gerald
+
+        # save summary file
+        save_summary_file(r, cycle_dir)
+
+        # compress eland result files
+        compress_eland_results(g, cycle_dir, num_jobs)
+
+        # md5 all the compressed files once we're done
+        md5_commands = srf.make_md5_commands(cycle_dir)
+        srf.run_commands(cycle_dir, md5_commands, num_jobs)
 
 def save_raw_data(num_jobs, r, site, raw_format, cycle_dir):
     lanes = []
index 27df866b67471ebdde139c23c7ca992efbd475f4..2f542bc5ac7df50f30090ad162ba2f2d3e383d3e 100755 (executable)
@@ -173,8 +173,8 @@ def make_parser():
     parser.add_option('--site', default=None,
                       help='create srf files tagged with the provided '\
                       'site name')
-    parser.add_option('--raw-format', dest="raw_format", default='qseq',
-                      choices=['qseq', 'srf', 'fastq'],
+    parser.add_option('--raw-format', dest="raw_format", default=None,
+                      choices=['qseq', 'srf', 'fastq', None],
                       help='Specify which type of raw format to use. '
                            'Currently supported options: qseq, srf, fastq')
     parser.add_option('-u', '--use-run', dest='use_run', default=None,