Merge branch 'master' into debianized
[htsworkflow.git] / scripts / htsw-gerald2bed
1 #!/usr/bin/python
2 """
3 Convert a group of eland_result files from a sequencer run to bed files.
4 """
5 from glob import glob
6 import logging
7 import optparse
8 import sys
9 import os
10
11 from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description
12
13 def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell):
14     """
15     convert s_[1-8]_eland_result.txt to corresponding bed files
16     """
17     eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt'))
18     out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed'))
19     if len(out_files) > 0:
20         raise RuntimeError("please move old bedfiles")
21
22     logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell))
23     for pathname in eland_files:
24         path, name = os.path.split(pathname)
25         lane = int(name[2])
26         outname = 's_%d_eland_result.bed' %(lane,)
27         logging.info('Converting lane %d to %s' % (lane, outname))
28
29         outpathname = os.path.join(eland_dir, outname)
30         # look up descriptions
31         bed_name, description = make_description(database, flowcell, lane)
32
33         # open files
34         instream = open(pathname,'r')
35         outstream = open(outpathname,'w')
36
37         make_bed_from_eland_stream(
38           instream, outstream, name, description, prefix
39         )
40
41 def make_parser():
42   usage = """%prog: --flowcell <flowcell id> directory_name
43
44 directory should contain a set of 8 eland result files named like
45 s_[12345678]_eland_result.txt"""
46
47
48   parser = optparse.OptionParser(usage)
49
50   parser.add_option('-o', '--output', dest='output',
51                     help="destination directory for our bed files" \
52                          "defaults to eland directory",
53                     default=None)
54   parser.add_option('--chromosome', dest='prefix',
55                     help='Set the chromosome prefix name. defaults to "chr"',
56                     default='chr')
57   parser.add_option("--database", dest='database',
58                     help="specify location of fctracker database",
59                     default=None)
60   parser.add_option("--flowcell", dest='flowcell',
61                     help="specify the flowcell id for this run",
62                     default=None)
63   parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
64                     help='increase verbosity',
65                     default=False)
66   return parser
67
68 def main(command_line=None):
69     logging.basicConfig(level=logging.WARNING)
70     if command_line is None:
71         command_line = sys.argv[1:]
72
73     parser = make_parser()
74     (opts, args) = parser.parse_args(command_line)
75
76     if len(args) != 1:
77         parser.error('Directory name required')
78
79     eland_dir = args[0]
80     if not os.path.isdir(eland_dir):
81         parser.error('%s must be a directory' % (eland_dir,))
82
83     if opts.flowcell is None:
84         parser.error('Flowcell ID required')
85
86     if opts.verbose:
87         logger = logging.getLogger()
88         logger.setLevel(logging.INFO)
89
90     make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell)
91
92     return 0
93
94 if __name__ == "__main__":
95     sys.exit(main(sys.argv[1:]))
96