From: Diane Trout Date: Wed, 28 May 2008 00:42:19 +0000 (+0000) Subject: Compute all the details needed to create our 25bp rerun given just X-Git-Tag: stanford.caltech-merged-database-2009-jan-15~56 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=4f67393572899735a20a2dd1f543ec9fe8fd54da Compute all the details needed to create our 25bp rerun given just a runfolder. (This assumes more than the --gerald/-o version that I first implemented, which is still available). Now you can give rerun_eland a runfolder name, and it will (if there's only 1 run found by pipeline.runfolder) extract the bases from that into a new Data/C1- directory and should launch eland. --- diff --git a/scripts/rerun_eland.py b/scripts/rerun_eland.py index 25aecc4..c271970 100644 --- a/scripts/rerun_eland.py +++ b/scripts/rerun_eland.py @@ -7,6 +7,7 @@ import subprocess import sys from gaworkflow.pipeline import gerald +from gaworkflow.pipeline import runfolder def make_query_filename(eland_obj, output_dir): query_name = '%s_%s_eland_query.txt' @@ -43,7 +44,7 @@ def extract_sequence(inpathname, query_pathname, length, dry_run=False): finally: outstream.close() instream.close() - + def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False): cmdline = ['eland_%d' % (length,), query_name, genome, result_name] if multi: @@ -63,6 +64,11 @@ def rerun(gerald_dir, output_dir, length=25, dry_run=False): logging.info("Extracting %d bp from files in %s" % (length, gerald_dir)) g = gerald.gerald(gerald_dir) + # this will only work if we're only missing the last dir in output_dir + if not os.path.exists(output_dir): + logging.info("Making %s" %(output_dir,)) + if not dry_run: os.mkdir(output_dir) + processes = [] for lane_id, lane_param in g.lanes.items(): eland = g.eland_results[lane_id] @@ -85,7 +91,7 @@ def rerun(gerald_dir, output_dir, length=25, dry_run=False): p.wait() def make_parser(): - usage = '%prog: --gerald -o ' + usage = '%prog: [options] runfolder' parser = OptionParser(usage) @@ -115,15 +121,30 @@ def main(cmdline=None): parser = make_parser() opts, args = parser.parse_args(cmdline) - if opts.gerald is None: + if opts.length < 16 or opts.length > 32: + parser.error("eland can only process reads in the range 16-32") + + if len(args) > 1: + parser.error("Can only process one runfolder directory") + elif len(args) == 1: + runs = runfolder.get_runs(args[0]) + if len(runs) != 1: + parser.error("Not a runfolder") + opts.gerald = runs[0].gerald.pathname + if opts.output is None: + opts.output = os.path.join( + runs[0].pathname, + 'Data', + # pythons 0..n ==> elands 1..n+1 + 'C1-%d' % (opts.length+1,) + ) + + elif opts.gerald is None: parser.error("need gerald directory") if opts.output is None: parser.error("specify location for the new eland files") - if opts.length < 16 or opts.length > 32: - parser.error("eland can only process reads in the range 16-32") - if opts.verbose: root_logger = logging.getLogger() root_logger.setLevel(logging.INFO)