scripts/srf

   1 #!/usr/bin/python
   2
   3 from glob import glob
   4 import logging
   5 import optparse
   6 import os
   7 import subprocess
   8 import sys
   9
  10 from gaworkflow.util import queuecommands
  11
  12 def make_commands(run_name, lanes, site_name, destdir):
  13   """
  14   make a subprocess-friendly list of command line arguments to run solexa2srf
  15   generates files like:
  16   woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
  17    site        run name                    lane
  18
  19   run_name - most of the file name (run folder name is a good choice)
  20   lanes - list of integers corresponding to which lanes to process
  21   site_name - name of your "sequencing site" or "Individual"
  22   destdir - where to write all the srf files
  23   """
  24   cmd_list = []
  25   for lane in lanes:
  26     name_prefix = '%s_%%l_%%t_' % (run_name,)
  27     destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
  28     destdir = os.path.normpath(destdir)
  29     dest_path = os.path.join(destdir, destname)
  30     seq_pattern = 's_%d_*_seq.txt' % (lane,)
  31
  32     cmd = ['solexa2srf',
  33            '-N', name_prefix,
  34            '-n', '%3x:%3y',
  35            '-o', dest_path,
  36            seq_pattern]
  37
  38     cmd_list.append(" ".join(cmd))
  39   return cmd_list
  40
  41 def pathname_to_run_name(base):
  42   """
  43   Convert a pathname to a base runfolder name
  44   handle the case with a trailing /
  45   """
  46   name = ""
  47   while len(name) == 0:
  48     base, name = os.path.split(base)
  49     if len(base) == 0:
  50       return None
  51   return name
  52
  53 def find_bustard_dir(pathname):
  54   # fixme: for don't repeat yourself this should some how be related
  55   # fixme: to pipeline.runfolder
  56
  57   datadir = os.path.join(pathname, 'Data')
  58   logging.info("searching in %s" % (datadir,))
  59
  60   bustard_dirs = []
  61   for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")):
  62     bustard_glob = os.path.join(firecrest_pathname, "Bustard*")
  63     for bustard_pathname in glob(bustard_glob):
  64       bustard_dirs.append(bustard_pathname)
  65   return bustard_dirs
  66
  67
  68
  69 def make_parser():
  70   usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
  71
  72   parser = optparse.OptionParser(usage)
  73   parser.add_option('--dry-run', action='store_true',
  74                     help='print what would be done',
  75                     default=False)
  76
  77   parser.add_option('-d', '--dest-dir', dest='dest_dir',
  78                     help='location to write srf files to',
  79                     default='.')
  80   parser.add_option('-s', '--site',
  81                     help='site name',
  82                     default='Individual')
  83   parser.add_option('-l', '--lanes', dest='lanes', action="append",
  84          default=[],
  85          help='comma seperated list of lanes to add to srf'
  86   )
  87   parser.add_option('-j', '--jobs', default=1, type='int',
  88                     help='how many jobs to run simultaneously')
  89
  90
  91   return parser
  92
  93 def parse_lane_arg(lane_arg):
  94     """
  95     Convert comma sperated list of lane ids to a list of integers
  96     """
  97     lanes = []
  98     for lane in lane_arg.split(','):
  99         try:
 100             lane = int(lane)
 101             if lane < 1 or lane > 8:
 102                 parser.error('Lanes must be in range [1..8]')
 103             lanes.append(lane)
 104         except ValueError:
 105             parser.error('Lane selections must be integers')
 106     return lanes
 107
 108 def main(cmdline=None):
 109     parser = make_parser()
 110     opts, args = parser.parse_args(cmdline)
 111
 112     if len(args) == 0:
 113         parser.error('need runfolder arguments')
 114
 115     # parse lane arguemnts
 116     lanes_list = []
 117     if len(opts.lanes) == 0:
 118         lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
 119     elif len(opts.lanes) == len(args):
 120         for lane_arg in opts.lanes:
 121             lanes_list.append(parse_lane_arg(lane_arg))
 122     else:
 123         parser.error(
 124           "Number of lane arguments must match number of runfolders"
 125         )
 126
 127     # build list of commands
 128     cmds = {}
 129     for runfolder_path, lanes in zip(args, lanes_list):
 130         # normalize paths, either relative to home dirs or current dir
 131         runfolder_path = os.path.abspath(runfolder_path)
 132         # the last part of the path should be a runfolder name
 133         name = pathname_to_run_name(runfolder_path)
 134         # so any bustard directories?
 135         bustard_dir = find_bustard_dir(runfolder_path)
 136         # give up if there are anything other than 1 bustard dir
 137         if len(bustard_dir) > 1:
 138           print 'ERROR: Too many bustard directories',
 139           print "\n ".join(bustard_dir)
 140           return 1
 141
 142         elif len(bustard_dir) == 1:
 143           bustard_dir = bustard_dir[0]
 144           cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir)
 145         else:
 146           print "ERROR: Couldn't find a bustard directory in", runfolder_path
 147           return 1
 148
 149     if not opts.dry_run:
 150       for cwd, cmd_list in cmds.items():
 151         curdir = os.getcwd()
 152         os.chdir(cwd)
 153         q = queuecommands.QueueCommands(cmd_list, opts.jobs)
 154         q.run()
 155         os.chdir(curdir)
 156     else:
 157       for cwd, cmd_list in cmds.items():
 158         print cwd
 159         print cmd_list
 160         print 'jobs: ', opts.jobs
 161
 162     return 0
 163
 164 if __name__ == "__main__":
 165     logging.basicConfig(level=logging.DEBUG)
 166     sys.exit(main(sys.argv[1:]))