From: Diane Trout Date: Thu, 14 Aug 2008 00:09:09 +0000 (+0000) Subject: Utility to create srf files from a bustard directory X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~45 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=e3cd2acc664670981e943bc5281e834b17b16861 Utility to create srf files from a bustard directory this version works, as long as you launch it in the bustard directory in question. There seems to be some messiness in the interaction between how the list of arguments passed to Popen with shell=True has any file globs expanded. I had to switch from passing a list of arguments to Popen to string, and I'm still not sure if any of the code to try and change the directory to the bustard directory actually worked correctly. (which is why it only works when launching from the bustard directory) --- diff --git a/gaworkflow/util/queuecommands.py b/gaworkflow/util/queuecommands.py index 4873a51..0c34292 100644 --- a/gaworkflow/util/queuecommands.py +++ b/gaworkflow/util/queuecommands.py @@ -14,7 +14,7 @@ class QueueCommands(object): finish. """ - def __init__(self, cmd_list, N=0): + def __init__(self, cmd_list, N=0, cwd=None): """ cmd_list is a list of elements suitable for subprocess N is the number of simultanious processes to run. @@ -27,6 +27,7 @@ class QueueCommands(object): self.to_run = cmd_list[:] self.running = {} self.N = N + self.cwd = cwd def under_process_limit(self): """ @@ -46,10 +47,11 @@ class QueueCommands(object): (or have run out of jobs) """ queue_log = logging.getLogger('queue') + queue_log.info('using %s as cwd' % (self.cwd,)) while (len(self.to_run) > 0) and self.under_process_limit(): cmd = self.to_run.pop(0) - p = subprocess.Popen(cmd, stdout=PIPE) + p = subprocess.Popen(cmd, stdout=PIPE, cwd=self.cwd, shell=True) self.running[p.stdout] = p queue_log.info("Created process %d from %s" % (p.pid, str(cmd))) diff --git a/scripts/srf b/scripts/srf new file mode 100644 index 0000000..fcca2ea --- /dev/null +++ b/scripts/srf @@ -0,0 +1,157 @@ +#!/usr/bin/python + +from glob import glob +import logging +import optparse +import os +import subprocess +import sys + +from gaworkflow.util import queuecommands + +def make_commands(run_name, lanes, site_name, destdir): + """ + make a subprocess-friendly list of command line arguments to run solexa2srf + generates files like: + woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf + site run name lane + + run_name - most of the file name (run folder name is a good choice) + lanes - list of integers corresponding to which lanes to process + site_name - name of your "sequencing site" or "Individual" + destdir - where to write all the srf files + """ + cmd_list = [] + for lane in lanes: + name_prefix = '%s_%%l_%%t_' % (run_name,) + destname = '%s:%s-%d.srf' % (site_name, run_name, lane) + destdir = os.path.normpath(destdir) + dest_path = os.path.join(destdir, destname) + seq_pattern = 's_%d_*_seq.txt' % (lane,) + + cmd = ['solexa2srf', + '-N', name_prefix, + '-n', '%3x:%3y', + '-o', dest_path, + seq_pattern] + + cmd_list.append(" ".join(cmd)) + return cmd_list + +def pathname_to_run_name(pathname): + """ + Convert a pathname to a base runfolder name + handle the case with a trailing / + """ + name = None + while name is None: + base, name = os.path.split(pathname) + if len(base) == 0: + return None + return name + +def find_bustard_dir(pathname): + # fixme: for don't repeat yourself this should some how be related + # fixme: to pipeline.runfolder + + datadir = os.path.join(pathname, 'Data') + logging.info("searching in %s" % (datadir,)) + + bustard_dirs = [] + for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")): + bustard_glob = os.path.join(firecrest_pathname, "Bustard*") + for bustard_pathname in glob(bustard_glob): + bustard_dirs.append(bustard_pathname) + return bustard_dirs + + + +def make_parser(): + usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]' + + parser = optparse.OptionParser(usage) + parser.add_option('--dry-run', action='store_true', + help='print what would be done', + default=False) + + parser.add_option('-d', '--dest-dir', dest='dest_dir', + help='location to write srf files to', + default='.') + parser.add_option('-s', '--site', + help='site name', + default='Individual') + parser.add_option('-l', '--lanes', dest='lanes', action="append", + default=[], + help='comma seperated list of lanes to add to srf' + ) + parser.add_option('-j', '--jobs', default=1, type='int', + help='how many jobs to run simultaneously') + + + return parser + +def parse_lane_arg(lane_arg): + """ + Convert comma sperated list of lane ids to a list of integers + """ + lanes = [] + for lane in lane_arg.split(','): + try: + lane = int(lane) + if lane < 1 or lane > 8: + parser.error('Lanes must be in range [1..8]') + lanes.append(lane) + except ValueError: + parser.error('Lane selections must be integers') + return lanes + +def main(cmdline=None): + parser = make_parser() + opts, args = parser.parse_args(cmdline) + + if len(args) == 0: + parser.error('need runfolder arguments') + + # parse lane arguemnts + lanes_list = [] + if len(opts.lanes) == 0: + lanes_list = [[1,2,3,4,5,6,7,8]] * len(args) + elif len(opts.lanes) == len(args): + for lane_arg in opts.lanes: + lanes_list.append(parse_lane_arg(lane_arg)) + else: + parser.error( + "Number of lane arguments must match number of runfolders" + ) + + # build list of commands + cmds = {} + for runfolder_path, lanes in zip(args, lanes_list): + name = pathname_to_run_name(runfolder_path) + bustard_dir = find_bustard_dir(runfolder_path) + if len(bustard_dir) == 1: + bustard_dir = bustard_dir[0] + cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir) + else: + print 'ERROR: Too many bustard directories' + print "\n ".join(bustard_dir) + return 1 + + if not opts.dry_run: + for cwd, cmd_list in cmds.items(): + curdir = os.getcwd() + os.chdir(cwd) + q = queuecommands.QueueCommands(cmd_list, opts.jobs) + q.start_jobs() + os.chdir(curdir) + else: + for cwd, cmd_list in cmds.items(): + print cwd + print cmd_list + print 'jobs: ', opts.jobs + + return 0 + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG) + sys.exit(main(sys.argv[1:]))