scripts/srf

   1 #!/usr/bin/python
   2
   3 from glob import glob
   4 import logging
   5 import optparse
   6 import os
   7 import subprocess
   8 import sys
   9
  10 from htsworkflow.util import queuecommands
  11 from htsworkflow.pipelines import runfolder
  12
  13 def make_commands(run_name, lanes, site_name, destdir):
  14   """
  15   make a subprocess-friendly list of command line arguments to run solexa2srf
  16   generates files like:
  17   woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
  18    site        run name                    lane
  19
  20   run_name - most of the file name (run folder name is a good choice)
  21   lanes - list of integers corresponding to which lanes to process
  22   site_name - name of your "sequencing site" or "Individual"
  23   destdir - where to write all the srf files
  24   """
  25   cmd_list = []
  26   for lane in lanes:
  27     name_prefix = '%s_%%l_%%t_' % (run_name,)
  28     destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
  29     destdir = os.path.normpath(destdir)
  30     dest_path = os.path.join(destdir, destname)
  31     seq_pattern = 's_%d_*_seq.txt' % (lane,)
  32
  33     cmd = ['solexa2srf',
  34            '-N', name_prefix,
  35            '-n', '%3x:%3y',
  36            '-o', dest_path,
  37            seq_pattern]
  38
  39     cmd_list.append(" ".join(cmd))
  40   return cmd_list
  41
  42 def pathname_to_run_name(base):
  43   """
  44   Convert a pathname to a base runfolder name
  45   handle the case with a trailing /
  46   """
  47   name = ""
  48   while len(name) == 0:
  49     base, name = os.path.split(base)
  50     if len(base) == 0:
  51       return None
  52   return name
  53
  54 def make_parser():
  55   usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
  56
  57   parser = optparse.OptionParser(usage)
  58   parser.add_option('--dry-run', action='store_true',
  59                     help='print what would be done',
  60                     default=False)
  61
  62   parser.add_option('-d', '--dest-dir', dest='dest_dir',
  63                     help='location to write srf files to',
  64                     default='.')
  65   parser.add_option('-s', '--site',
  66                     help='site name',
  67                     default='Individual')
  68   parser.add_option('-l', '--lanes', dest='lanes', action="append",
  69          default=[],
  70          help='comma seperated list of lanes to add to srf'
  71   )
  72   parser.add_option('-j', '--jobs', default=1, type='int',
  73                     help='how many jobs to run simultaneously')
  74
  75
  76   return parser
  77
  78 def parse_lane_arg(lane_arg):
  79     """
  80     Convert comma sperated list of lane ids to a list of integers
  81     """
  82     lanes = []
  83     for lane in lane_arg.split(','):
  84         try:
  85             lane = int(lane)
  86             if lane < 1 or lane > 8:
  87                 parser.error('Lanes must be in range [1..8]')
  88             lanes.append(lane)
  89         except ValueError:
  90             parser.error('Lane selections must be integers')
  91     return lanes
  92
  93 def main(cmdline=None):
  94     parser = make_parser()
  95     opts, args = parser.parse_args(cmdline)
  96
  97     if len(args) == 0:
  98         parser.error('need runfolder arguments')
  99
 100     # parse lane arguemnts
 101     lanes_list = []
 102     if len(opts.lanes) == 0:
 103         lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
 104     elif len(opts.lanes) == len(args):
 105         for lane_arg in opts.lanes:
 106             lanes_list.append(parse_lane_arg(lane_arg))
 107     else:
 108         parser.error(
 109           "Number of lane arguments must match number of runfolders"
 110         )
 111
 112     # build list of commands
 113     cmds = {}
 114     for runfolder_path, lanes in zip(args, lanes_list):
 115         # normalize paths, either relative to home dirs or current dir
 116         runfolder_path = os.path.abspath(runfolder_path)
 117         # the last part of the path should be a runfolder name
 118         name = pathname_to_run_name(runfolder_path)
 119         # so any bustard directories?
 120         runs = runfolder.get_runs(runfolder_path)
 121         # give up if there are anything other than 1 run
 122         if len(runs) > 1:
 123           print 'ERROR: Too many run directories in %s' %(runfolder_path,)
 124           return 1
 125         elif len(runs) == 1:
 126           bustard_dir = runs[0].bustard.pathname
 127           cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir)
 128         else:
 129           print "ERROR: Couldn't find a bustard directory in", runfolder_path
 130           return 1
 131
 132     if not opts.dry_run:
 133       for cwd, cmd_list in cmds.items():
 134         curdir = os.getcwd()
 135         os.chdir(cwd)
 136         q = queuecommands.QueueCommands(cmd_list, opts.jobs)
 137         q.run()
 138         os.chdir(curdir)
 139     else:
 140       for cwd, cmd_list in cmds.items():
 141         print cwd
 142         print cmd_list
 143         print 'jobs: ', opts.jobs
 144
 145     return 0
 146
 147 if __name__ == "__main__":
 148     logging.basicConfig(level=logging.DEBUG)
 149     sys.exit(main(sys.argv[1:]))