From 71508b08c29cc8c4e4711e7783ad7458e1ee72d6 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 30 Sep 2008 23:43:25 +0000 Subject: [PATCH] Pull tool to make srf files from trunk (depends on util/queuecommands.py) --- htswdataprod/scripts/srf | 166 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 166 insertions(+) create mode 100644 htswdataprod/scripts/srf diff --git a/htswdataprod/scripts/srf b/htswdataprod/scripts/srf new file mode 100644 index 0000000..f8617ea --- /dev/null +++ b/htswdataprod/scripts/srf @@ -0,0 +1,166 @@ +#!/usr/bin/python + +from glob import glob +import logging +import optparse +import os +import subprocess +import sys + +from gaworkflow.util import queuecommands + +def make_commands(run_name, lanes, site_name, destdir): + """ + make a subprocess-friendly list of command line arguments to run solexa2srf + generates files like: + woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf + site run name lane + + run_name - most of the file name (run folder name is a good choice) + lanes - list of integers corresponding to which lanes to process + site_name - name of your "sequencing site" or "Individual" + destdir - where to write all the srf files + """ + cmd_list = [] + for lane in lanes: + name_prefix = '%s_%%l_%%t_' % (run_name,) + destname = '%s_%s_%d.srf' % (site_name, run_name, lane) + destdir = os.path.normpath(destdir) + dest_path = os.path.join(destdir, destname) + seq_pattern = 's_%d_*_seq.txt' % (lane,) + + cmd = ['solexa2srf', + '-N', name_prefix, + '-n', '%3x:%3y', + '-o', dest_path, + seq_pattern] + + cmd_list.append(" ".join(cmd)) + return cmd_list + +def pathname_to_run_name(base): + """ + Convert a pathname to a base runfolder name + handle the case with a trailing / + """ + name = "" + while len(name) == 0: + base, name = os.path.split(base) + if len(base) == 0: + return None + return name + +def find_bustard_dir(pathname): + # fixme: for don't repeat yourself this should some how be related + # fixme: to pipeline.runfolder + + datadir = os.path.join(pathname, 'Data') + logging.info("searching in %s" % (datadir,)) + + bustard_dirs = [] + for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")): + bustard_glob = os.path.join(firecrest_pathname, "Bustard*") + for bustard_pathname in glob(bustard_glob): + bustard_dirs.append(bustard_pathname) + return bustard_dirs + + + +def make_parser(): + usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]' + + parser = optparse.OptionParser(usage) + parser.add_option('--dry-run', action='store_true', + help='print what would be done', + default=False) + + parser.add_option('-d', '--dest-dir', dest='dest_dir', + help='location to write srf files to', + default='.') + parser.add_option('-s', '--site', + help='site name', + default='Individual') + parser.add_option('-l', '--lanes', dest='lanes', action="append", + default=[], + help='comma seperated list of lanes to add to srf' + ) + parser.add_option('-j', '--jobs', default=1, type='int', + help='how many jobs to run simultaneously') + + + return parser + +def parse_lane_arg(lane_arg): + """ + Convert comma sperated list of lane ids to a list of integers + """ + lanes = [] + for lane in lane_arg.split(','): + try: + lane = int(lane) + if lane < 1 or lane > 8: + parser.error('Lanes must be in range [1..8]') + lanes.append(lane) + except ValueError: + parser.error('Lane selections must be integers') + return lanes + +def main(cmdline=None): + parser = make_parser() + opts, args = parser.parse_args(cmdline) + + if len(args) == 0: + parser.error('need runfolder arguments') + + # parse lane arguemnts + lanes_list = [] + if len(opts.lanes) == 0: + lanes_list = [[1,2,3,4,5,6,7,8]] * len(args) + elif len(opts.lanes) == len(args): + for lane_arg in opts.lanes: + lanes_list.append(parse_lane_arg(lane_arg)) + else: + parser.error( + "Number of lane arguments must match number of runfolders" + ) + + # build list of commands + cmds = {} + for runfolder_path, lanes in zip(args, lanes_list): + # normalize paths, either relative to home dirs or current dir + runfolder_path = os.path.abspath(runfolder_path) + # the last part of the path should be a runfolder name + name = pathname_to_run_name(runfolder_path) + # so any bustard directories? + bustard_dir = find_bustard_dir(runfolder_path) + # give up if there are anything other than 1 bustard dir + if len(bustard_dir) > 1: + print 'ERROR: Too many bustard directories', + print "\n ".join(bustard_dir) + return 1 + + elif len(bustard_dir) == 1: + bustard_dir = bustard_dir[0] + cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir) + else: + print "ERROR: Couldn't find a bustard directory in", runfolder_path + return 1 + + if not opts.dry_run: + for cwd, cmd_list in cmds.items(): + curdir = os.getcwd() + os.chdir(cwd) + q = queuecommands.QueueCommands(cmd_list, opts.jobs) + q.run() + os.chdir(curdir) + else: + for cwd, cmd_list in cmds.items(): + print cwd + print cmd_list + print 'jobs: ', opts.jobs + + return 0 + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + sys.exit(main(sys.argv[1:])) -- 2.30.2