Allow picking runfolder variant we should be trying to extract srf files from.

[htsworkflow.git] / scripts / srf
diff --git a/scripts/srf b/scripts/srf

index fcca2ea320c3e982f2c9e6a090c5017ec5c86430..e7478a9d5ff38da60eb26393cc0a9ba50eb0264e 100644 (file)
--- a/scripts/srf
+++ b/scripts/srf
@@ -7,9 +7,14 @@ import os
  import subprocess
  import sys
  
-from gaworkflow.util import queuecommands
+from htsworkflow.util import queuecommands
+from htsworkflow.pipelines import runfolder
  
-def make_commands(run_name, lanes, site_name, destdir):
+SOLEXA2SRF = 0
+ILLUMINA2SRF10 = 1
+ILLUMINA2SRF11 = 2
+
+def make_commands(run_name, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
    """
    make a subprocess-friendly list of command line arguments to run solexa2srf
    generates files like: 
@@ -24,48 +29,45 @@ def make_commands(run_name, lanes, site_name, destdir):
    cmd_list = []
    for lane in lanes:
      name_prefix = '%s_%%l_%%t_' % (run_name,)
-    destname = '%s:%s-%d.srf' % (site_name, run_name, lane)
+    destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
      destdir = os.path.normpath(destdir)
      dest_path = os.path.join(destdir, destname)
      seq_pattern = 's_%d_*_seq.txt' % (lane,)
  
-    cmd = ['solexa2srf', 
-           '-N', name_prefix,
-           '-n', '%3x:%3y', 
-           '-o', dest_path, 
-          seq_pattern]
+    if cmdlevel == SOLEXA2SRF:
+        cmd = ['solexa2srf', 
+               '-N', name_prefix,
+               '-n', '%3x:%3y', 
+               '-o', dest_path, 
+               seq_pattern]
+    elif cmdlevel == ILLUMINA2SRF10:
+        cmd = ['illumina2srf', 
+               '-v1.0',
+               '-o', dest_path,
+               seq_pattern]
+    elif cmdlevel == ILLUMINA2SRF11:
+        seq_pattern = 's_%d_*_qseq.txt' % (lane,)
+        cmd = ['illumina2srf', 
+               '-o', dest_path,
+               seq_pattern]
+    else:
+        raise ValueError("Unrecognized run level %d" % (cmdlevel,))
  
      cmd_list.append(" ".join(cmd))
    return cmd_list
  
-def pathname_to_run_name(pathname):
+def pathname_to_run_name(base):
    """
    Convert a pathname to a base runfolder name
    handle the case with a trailing /
    """
-  name = None
-  while name is None:
-    base, name = os.path.split(pathname)
+  name = ""
+  while len(name) == 0:
+    base, name = os.path.split(base)
      if len(base) == 0:
        return None
    return name
  
-def find_bustard_dir(pathname):
-  # fixme: for don't repeat yourself this should some how be related 
-  # fixme: to pipeline.runfolder
-
-  datadir = os.path.join(pathname, 'Data')
-  logging.info("searching in %s" % (datadir,))
-  
-  bustard_dirs = []
-  for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
-    bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
-    for bustard_pathname in glob(bustard_glob):
-      bustard_dirs.append(bustard_pathname)
-  return bustard_dirs
-    
-
-
  def make_parser():
    usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
  
@@ -86,8 +88,19 @@ def make_parser():
    )
    parser.add_option('-j', '--jobs', default=1, type='int',
                      help='how many jobs to run simultaneously')
+  parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
+                    help='Which class of srf file should we attempt to create\n'
+                         '0 = Solexa pipeline 0.2.6 - 0.3\n'
+                         '1 = illumina pipeline 1.0\n'
+                         '2 = illumina pipeline 1.1rc1 and later \n')
                       
-
+  parser.add_option('-v', '--verbose', dest='verbose',
+                    default=False, action='store_true',
+                    help='report more about internals (INFO)')
+  parser.add_option('--debug', dest='debug',
+                    default=False, action='store_true',
+                    help='report even more about internals (DEBUG)')
+ 
    return parser
  
  def parse_lane_arg(lane_arg):
@@ -108,7 +121,14 @@ def parse_lane_arg(lane_arg):
  def main(cmdline=None):
      parser = make_parser()
      opts, args = parser.parse_args(cmdline)
-  
+   
+    if opts.debug: 
+        logging.basicConfig(level=logging.DEBUG)
+    elif opts.verbose:
+        logging.basicConfig(level=logging.INFO)
+    else:
+        logging.basicConfig(level=logging.WARNING)
+
      if len(args) == 0:
          parser.error('need runfolder arguments')
  
@@ -127,14 +147,21 @@ def main(cmdline=None):
      # build list of commands
      cmds = {}
      for runfolder_path, lanes in zip(args, lanes_list):
+        # normalize paths, either relative to home dirs or current dir
+        runfolder_path = os.path.abspath(runfolder_path)
+        # the last part of the path should be a runfolder name
          name = pathname_to_run_name(runfolder_path)
-        bustard_dir = find_bustard_dir(runfolder_path)
-        if len(bustard_dir) == 1:
-          bustard_dir = bustard_dir[0]
-          cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir)
+        # so any bustard directories?
+        runs = runfolder.get_runs(runfolder_path)
+        # give up if there are anything other than 1 run
+        if len(runs) > 1:
+          print 'ERROR: Too many run directories in %s' %(runfolder_path,)
+          return 1
+        elif len(runs) == 1:
+          bustard_dir = runs[0].bustard.pathname
+          cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir, opts.runfolder_version)
          else:
-          print 'ERROR: Too many bustard directories'
-          print "\n ".join(bustard_dir)
+          print "ERROR: Couldn't find a bustard directory in", runfolder_path
            return 1
  
      if not opts.dry_run:
@@ -142,7 +169,7 @@ def main(cmdline=None):
          curdir = os.getcwd()
          os.chdir(cwd)
          q = queuecommands.QueueCommands(cmd_list, opts.jobs)
-        q.start_jobs()
+        q.run()
          os.chdir(curdir)
      else:
        for cwd, cmd_list in cmds.items():
@@ -153,5 +180,4 @@ def main(cmdline=None):
      return 0
  
  if __name__ == "__main__":
-    logging.basicConfig(level=logging.DEBUG)
      sys.exit(main(sys.argv[1:]))