Add utility program to help find what exists in georgi's submission dirs
[htsworkflow.git] / extra / ucsc_encode_submission / scan_extension.py
1 from optparse import OptionParser
2 import os
3 import sys
4 from pprint import pprint
5
6 def main(cmdline=None):
7     parser = make_parser()
8     opts, args = parser.parse_args(cmdline)
9
10     extensions = scan(args)
11     #pprint(extensions)
12     print find_common_suffix(extensions)
13
14 def make_parser():
15     parser = OptionParser("%prog: directory [directory...]")
16     return parser
17
18 def scan(toscan):
19     index = {}
20     for cur_scan_dir in toscan:
21         for path, dirnames, filenames in os.walk(cur_scan_dir):
22             for filename in filenames:
23                 next_index = index
24                 for c in filename[::-1]:
25                     next_index = next_index.setdefault(c, {})
26     return index
27
28 def find_common_suffix(index, tail=[]):
29     if len(tail) > 0 and len(index) > 1:
30         return "".join(tail[::-1])
31
32     results = []
33     for key, choice in index.items():
34         r = find_common_suffix(choice, tail+[key])
35         if r is not None:
36             results.append (r)
37         
38     if len(results) == 0:
39         return None
40     elif len(results) == 1:
41         return results[0]
42     else:
43         return results
44
45 if __name__ == "__main__":
46     main()