mark the example submission rule files as being raw, so the escapes dont get confused
[htsworkflow.git] / scripts / htsw-gerald2bed
1 #!/usr/bin/python
2 """
3 Convert a group of eland_result files from a sequencer run to bed files.
4 """
5 from glob import glob
6 import logging
7 import optparse
8 import sys
9 import os
10
11 from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
12
13 LOGGER = logging.getLogger(__name__)
14
15 def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
16     """
17     convert s_[1-8]_eland_result.txt to corresponding bed files
18     """
19     eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
20     out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
21     if len(out_files) > 0:
22         raise RuntimeError("please move old bedfiles")
23
24     LOGGER.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
25     for pathname in eland_files:
26         path, name = os.path.split(pathname)
27         lane = int(name[2])
28         outname = 's_%d_eland_result.bed' %(lane,)
29         LOGGER.info('Converting lane %d to %s' % (lane, outname))
30
31         outpathname = os.path.join(eland_dir, outname)
32         # look up descriptions
33         bed_name, description = make_description(database, flowcell, lane)
34
35         # open files
36         instream = open(pathname,'r')
37         outstream = open(outpathname,'w')
38
39         make_bed_from_eland_stream(
40           instream, outstream, name, description, prefix
41         )
42
43 def make_parser():
44   usage = """%prog: --flowcell <flowcell id> directory_name
45
46 directory should contain a set of 8 eland result files named like
47 s_[12345678]_eland_result.txt"""
48
49
50   parser = optparse.OptionParser(usage)
51
52   parser.add_option('-o', '--output', dest='output',
53                     help="destination directory for our bed files" \
54                          "defaults to eland directory",
55                     default=None)
56   parser.add_option('--chromosome', dest='prefix',
57                     help='Set the chromosome prefix name. defaults to "chr"',
58                     default='chr')
59   parser.add_option("--database", dest='database',
60                     help="specify location of fctracker database",
61                     default=None)
62   parser.add_option("--flowcell", dest='flowcell',
63                     help="specify the flowcell id for this run",
64                     default=None)
65   parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
66                     help='increase verbosity',
67                     default=False)
68   return parser
69
70 def main(command_line=None):
71     LOGGER.basicConfig(level=logging.WARNING)
72     if command_line is None:
73         command_line = sys.argv[1:]
74
75     parser = make_parser()
76     (opts, args) = parser.parse_args(command_line)
77
78     if len(args) != 1:
79         parser.error('Directory name required')
80
81     eland_dir = args[0]
82     if not os.path.isdir(eland_dir):
83         parser.error('%s must be a directory' % (eland_dir,))
84
85     if opts.flowcell is None:
86         parser.error('Flowcell ID required')
87
88     if opts.verbose:
89         logger = logging.getLogger()
90         logger.setLevel(logging.INFO)
91
92     make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
93
94     return 0
95
96 if __name__ == "__main__":
97     sys.exit(main(sys.argv[1:]))
98