scripts/srf

   1 #!/usr/bin/python
   2
   3 from glob import glob
   4 import logging
   5 import optparse
   6 import os
   7 import subprocess
   8 import sys
   9
  10 from htsworkflow.util import queuecommands
  11 from htsworkflow.pipelines import runfolder
  12
  13 SOLEXA2SRF = 0
  14 ILLUMINA2SRF10 = 1
  15 ILLUMINA2SRF11 = 2
  16
  17 def make_commands(run_name, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
  18   """
  19   make a subprocess-friendly list of command line arguments to run solexa2srf
  20   generates files like:
  21   woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
  22    site        run name                    lane
  23
  24   run_name - most of the file name (run folder name is a good choice)
  25   lanes - list of integers corresponding to which lanes to process
  26   site_name - name of your "sequencing site" or "Individual"
  27   destdir - where to write all the srf files
  28   """
  29   cmd_list = []
  30   for lane in lanes:
  31     name_prefix = '%s_%%l_%%t_' % (run_name,)
  32     destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
  33     destdir = os.path.normpath(destdir)
  34     dest_path = os.path.join(destdir, destname)
  35     seq_pattern = 's_%d_*_seq.txt' % (lane,)
  36
  37     if cmdlevel == SOLEXA2SRF:
  38         cmd = ['solexa2srf',
  39                '-N', name_prefix,
  40                '-n', '%3x:%3y',
  41                '-o', dest_path,
  42                seq_pattern]
  43     elif cmdlevel == ILLUMINA2SRF10:
  44         cmd = ['illumina2srf',
  45                '-v1.0',
  46                '-o', dest_path,
  47                seq_pattern]
  48     elif cmdlevel == ILLUMINA2SRF11:
  49         seq_pattern = 's_%d_*_qseq.txt' % (lane,)
  50         cmd = ['illumina2srf',
  51                '-o', dest_path,
  52                seq_pattern]
  53     else:
  54         raise ValueError("Unrecognized run level %d" % (cmdlevel,))
  55
  56     cmd_list.append(" ".join(cmd))
  57   return cmd_list
  58
  59 def pathname_to_run_name(base):
  60   """
  61   Convert a pathname to a base runfolder name
  62   handle the case with a trailing /
  63   """
  64   name = ""
  65   while len(name) == 0:
  66     base, name = os.path.split(base)
  67     if len(base) == 0:
  68       return None
  69   return name
  70
  71 def make_parser():
  72   usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
  73
  74   parser = optparse.OptionParser(usage)
  75   parser.add_option('--dry-run', action='store_true',
  76                     help='print what would be done',
  77                     default=False)
  78
  79   parser.add_option('-d', '--dest-dir', dest='dest_dir',
  80                     help='location to write srf files to',
  81                     default='.')
  82   parser.add_option('-s', '--site',
  83                     help='site name',
  84                     default='Individual')
  85   parser.add_option('-l', '--lanes', dest='lanes', action="append",
  86          default=[],
  87          help='comma seperated list of lanes to add to srf'
  88   )
  89   parser.add_option('-j', '--jobs', default=1, type='int',
  90                     help='how many jobs to run simultaneously')
  91   parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int',
  92                     help='Which class of srf file should we attempt to create\n'
  93                          '0 = Solexa pipeline 0.2.6 - 0.3\n'
  94                          '1 = illumina pipeline 1.0\n'
  95                          '2 = illumina pipeline 1.1rc1 and later \n')
  96
  97   parser.add_option('-v', '--verbose', dest='verbose',
  98                     default=False, action='store_true',
  99                     help='report more about internals (INFO)')
 100   parser.add_option('--debug', dest='debug',
 101                     default=False, action='store_true',
 102                     help='report even more about internals (DEBUG)')
 103
 104   return parser
 105
 106 def parse_lane_arg(lane_arg):
 107     """
 108     Convert comma sperated list of lane ids to a list of integers
 109     """
 110     lanes = []
 111     for lane in lane_arg.split(','):
 112         try:
 113             lane = int(lane)
 114             if lane < 1 or lane > 8:
 115                 parser.error('Lanes must be in range [1..8]')
 116             lanes.append(lane)
 117         except ValueError:
 118             parser.error('Lane selections must be integers')
 119     return lanes
 120
 121 def main(cmdline=None):
 122     parser = make_parser()
 123     opts, args = parser.parse_args(cmdline)
 124
 125     if opts.debug:
 126         logging.basicConfig(level=logging.DEBUG)
 127     elif opts.verbose:
 128         logging.basicConfig(level=logging.INFO)
 129     else:
 130         logging.basicConfig(level=logging.WARNING)
 131
 132     if len(args) == 0:
 133         parser.error('need runfolder arguments')
 134
 135     # parse lane arguemnts
 136     lanes_list = []
 137     if len(opts.lanes) == 0:
 138         lanes_list = [[1,2,3,4,5,6,7,8]] * len(args)
 139     elif len(opts.lanes) == len(args):
 140         for lane_arg in opts.lanes:
 141             lanes_list.append(parse_lane_arg(lane_arg))
 142     else:
 143         parser.error(
 144           "Number of lane arguments must match number of runfolders"
 145         )
 146
 147     # build list of commands
 148     cmds = {}
 149     for runfolder_path, lanes in zip(args, lanes_list):
 150         # normalize paths, either relative to home dirs or current dir
 151         runfolder_path = os.path.abspath(runfolder_path)
 152         # the last part of the path should be a runfolder name
 153         name = pathname_to_run_name(runfolder_path)
 154         # so any bustard directories?
 155         runs = runfolder.get_runs(runfolder_path)
 156         # give up if there are anything other than 1 run
 157         if len(runs) > 1:
 158           print 'ERROR: Too many run directories in %s' %(runfolder_path,)
 159           return 1
 160         elif len(runs) == 1:
 161           bustard_dir = runs[0].bustard.pathname
 162           cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir, opts.runfolder_version)
 163         else:
 164           print "ERROR: Couldn't find a bustard directory in", runfolder_path
 165           return 1
 166
 167     if not opts.dry_run:
 168       for cwd, cmd_list in cmds.items():
 169         curdir = os.getcwd()
 170         os.chdir(cwd)
 171         q = queuecommands.QueueCommands(cmd_list, opts.jobs)
 172         q.run()
 173         os.chdir(curdir)
 174     else:
 175       for cwd, cmd_list in cmds.items():
 176         print cwd
 177         print cmd_list
 178         print 'jobs: ', opts.jobs
 179
 180     return 0
 181
 182 if __name__ == "__main__":
 183     sys.exit(main(sys.argv[1:]))