4 from optparse import OptionParser
9 from gaworkflow.pipeline import gerald
11 def make_query_filename(eland_obj, output_dir):
12 query_name = '%s_%s_eland_query.txt'
13 query_name %= (eland_obj.sample_name, eland_obj.lane_id)
15 query_pathname = os.path.join(output_dir, query_name)
17 if os.path.exists(query_pathname):
18 logging.warn("overwriting %s" % (query_pathname,))
22 def make_result_filename(eland_obj, output_dir):
23 result_name = '%s_%s_eland_result.txt'
24 result_name %= (eland_obj.sample_name, eland_obj.lane_id)
26 result_pathname = os.path.join(output_dir, result_name)
28 if os.path.exists(result_pathname):
29 logging.warn("overwriting %s" % (result_pathname,))
31 return result_pathname
33 def extract_sequence(inpathname, query_pathname, length, dry_run=False):
34 logging.info('extracting %d bases' %(length,))
35 logging.info('extracting from %s' %(inpathname,))
36 logging.info('extracting to %s' %(query_pathname,))
40 instream = open(inpathname, 'r')
41 outstream = open(query_pathname, 'w')
42 gerald.extract_eland_sequence(instream, outstream, 0, length)
47 def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False):
48 cmdline = ['eland_%d' % (length,), query_name, genome, result_name]
50 cmdline += ['--multi']
52 logging.info('running eland: ' + " ".join(cmdline))
54 return subprocess.Popen(cmdline)
59 def rerun(gerald_dir, output_dir, length=25, dry_run=False):
61 look for eland files in gerald_dir and write a subset to output_dir
63 logging.info("Extracting %d bp from files in %s" % (length, gerald_dir))
64 g = gerald.gerald(gerald_dir)
67 for lane_id, lane_param in g.lanes.items():
68 eland = g.eland_results[lane_id]
70 inpathname = eland.pathname
71 query_pathname = make_query_filename(eland, output_dir)
72 result_pathname = make_result_filename(eland, output_dir)
74 extract_sequence(inpathname, query_pathname, length, dry_run=dry_run)
78 lane_param.eland_genome,
88 usage = '%prog: --gerald <gerald dir> -o <new dir>'
90 parser = OptionParser(usage)
92 parser.add_option('--gerald',
93 help='specify location of GERALD directory',
95 parser.add_option('-o', '--output',
96 help='specify output location of files',
98 parser.add_option('-l', '--read-length', type='int',
99 help='specify new eland length',
102 parser.add_option('--dry-run', action='store_true',
103 help='only pretend to run',
105 parser.add_option('-v', '--verbose', action='store_true',
106 help='increase verbosity',
112 def main(cmdline=None):
113 logging.basicConfig(level=logging.WARNING)
115 parser = make_parser()
116 opts, args = parser.parse_args(cmdline)
118 if opts.gerald is None:
119 parser.error("need gerald directory")
121 if opts.output is None:
122 parser.error("specify location for the new eland files")
124 if opts.length < 16 or opts.length > 32:
125 parser.error("eland can only process reads in the range 16-32")
128 root_logger = logging.getLogger()
129 root_logger.setLevel(logging.INFO)
131 rerun(opts.gerald, opts.output, opts.length, dry_run=opts.dry_run)
135 if __name__ == "__main__":
136 sys.exit(main(sys.argv[1:]))