Modify the srf utility to tar.bz2 the qseq files instead of the using
[htsworkflow.git] / scripts / srf
index 248728f824fac0441c6a5b088d3387a78eebfa75..bcf835dbfa29c1d3707d0e9e602d5c5882093ce4 100644 (file)
@@ -1,70 +1,14 @@
 #!/usr/bin/python
 
-from glob import glob
 import logging
 import optparse
 import os
-import subprocess
 import sys
 
-from htsworkflow.util import queuecommands
-
-def make_commands(run_name, lanes, site_name, destdir):
-  """
-  make a subprocess-friendly list of command line arguments to run solexa2srf
-  generates files like: 
-  woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
-   site        run name                    lane
-             
-  run_name - most of the file name (run folder name is a good choice)
-  lanes - list of integers corresponding to which lanes to process
-  site_name - name of your "sequencing site" or "Individual"
-  destdir - where to write all the srf files
-  """
-  cmd_list = []
-  for lane in lanes:
-    name_prefix = '%s_%%l_%%t_' % (run_name,)
-    destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
-    destdir = os.path.normpath(destdir)
-    dest_path = os.path.join(destdir, destname)
-    seq_pattern = 's_%d_*_seq.txt' % (lane,)
-
-    cmd = ['solexa2srf', 
-           '-N', name_prefix,
-           '-n', '%3x:%3y', 
-           '-o', dest_path, 
-          seq_pattern]
-
-    cmd_list.append(" ".join(cmd))
-  return cmd_list
-
-def pathname_to_run_name(base):
-  """
-  Convert a pathname to a base runfolder name
-  handle the case with a trailing /
-  """
-  name = ""
-  while len(name) == 0:
-    base, name = os.path.split(base)
-    if len(base) == 0:
-      return None
-  return name
-
-def find_bustard_dir(pathname):
-  # fixme: for don't repeat yourself this should some how be related 
-  # fixme: to pipelines.runfolder
-
-  datadir = os.path.join(pathname, 'Data')
-  logging.info("searching in %s" % (datadir,))
-  
-  bustard_dirs = []
-  for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
-    bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
-    for bustard_pathname in glob(bustard_glob):
-      bustard_dirs.append(bustard_pathname)
-  return bustard_dirs
-    
-
+from htsworkflow.pipelines import runfolder
+from htsworkflow.pipelines.srf import make_srf_commands, make_qseq_commands, \
+                                      run_commands, pathname_to_run_name
+from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
 
 def make_parser():
   usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
@@ -86,8 +30,19 @@ def make_parser():
   )
   parser.add_option('-j', '--jobs', default=1, type='int',
                     help='how many jobs to run simultaneously')
+  parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
+                    help='Which class of srf file should we attempt to create\n'
+                         '0 = Solexa pipeline 0.2.6 - 0.3\n'
+                         '1 = illumina pipeline 1.0\n'
+                         '2 = illumina pipeline 1.1rc1 and later \n')
                      
-
+  parser.add_option('-v', '--verbose', dest='verbose',
+                    default=False, action='store_true',
+                    help='report more about internals (INFO)')
+  parser.add_option('--debug', dest='debug',
+                    default=False, action='store_true',
+                    help='report even more about internals (DEBUG)')
   return parser
 
 def parse_lane_arg(lane_arg):
@@ -108,7 +63,14 @@ def parse_lane_arg(lane_arg):
 def main(cmdline=None):
     parser = make_parser()
     opts, args = parser.parse_args(cmdline)
-  
+   
+    if opts.debug: 
+        logging.basicConfig(level=logging.DEBUG)
+    elif opts.verbose:
+        logging.basicConfig(level=logging.INFO)
+    else:
+        logging.basicConfig(level=logging.WARNING)
+
     if len(args) == 0:
         parser.error('need runfolder arguments')
 
@@ -123,36 +85,35 @@ def main(cmdline=None):
         parser.error(
           "Number of lane arguments must match number of runfolders"
         )
-    
+
+    make_commands = make_qseq_commands
     # build list of commands
     cmds = {}
     for runfolder_path, lanes in zip(args, lanes_list):
         # normalize paths, either relative to home dirs or current dir
         runfolder_path = os.path.abspath(runfolder_path)
-        # the last part of the path should be a runfolder name
-        name = pathname_to_run_name(runfolder_path)
+        run_name = pathname_to_run_name(runfolder_path)
         # so any bustard directories?
-        bustard_dir = find_bustard_dir(runfolder_path)
-        # give up if there are anything other than 1 bustard dir
-        if len(bustard_dir) > 1:
-          print 'ERROR: Too many bustard directories',
-          print "\n ".join(bustard_dir)
+        runs = runfolder.get_runs(runfolder_path)
+        # give up if there are anything other than 1 run
+        if len(runs) > 1:
+          print 'ERROR: Too many run directories in %s' %(runfolder_path,)
           return 1
-
-        elif len(bustard_dir) == 1:
-          bustard_dir = bustard_dir[0]
-          cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir)
+        elif len(runs) == 1:
+          bustard_dir = runs[0].bustard.pathname
+          cmds[bustard_dir] = make_commands(run_name,
+                                            bustard_dir,
+                                            lanes,
+                                            opts.site,
+                                            opts.dest_dir,
+                                            opts.runfolder_version)
         else:
           print "ERROR: Couldn't find a bustard directory in", runfolder_path
           return 1
 
     if not opts.dry_run:
       for cwd, cmd_list in cmds.items():
-        curdir = os.getcwd()
-        os.chdir(cwd)
-        q = queuecommands.QueueCommands(cmd_list, opts.jobs)
-        q.run()
-        os.chdir(curdir)
+        run_commands(cwd, cmd_list, opts.jobs)
     else:
       for cwd, cmd_list in cmds.items():
         print cwd
@@ -162,5 +123,4 @@ def main(cmdline=None):
     return 0
 
 if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
     sys.exit(main(sys.argv[1:]))