Test htsworkflow under several different django & python versions
[htsworkflow.git] / encode_submission / scan_extension.py
1 from __future__ import print_function, unicode_literals
2
3 from optparse import OptionParser
4 import os
5 import sys
6 from pprint import pprint
7
8 def main(cmdline=None):
9     parser = make_parser()
10     opts, args = parser.parse_args(cmdline)
11
12     extensions = scan(args)
13     common_extensions = find_common_suffix(extensions)
14
15     if opts.rdf:
16         print_rdf(common_extensions)
17     else:
18         print(common_extensions)
19         
20 def make_parser():
21     parser = OptionParser("%prog: directory [directory...]")
22     parser.add_option('--rdf', action="store_true", default=False,
23                       help="Produce rdf configuration file for ucsc_gather")
24     return parser
25
26 def scan(toscan):
27     index = {}
28     for cur_scan_dir in toscan:
29         for path, dirnames, filenames in os.walk(cur_scan_dir):
30             for filename in filenames:
31                 base, ext = os.path.splitext(filename)
32                 if ext in ('.daf', 'ddf'):
33                     continue
34                 next_index = index
35                 for c in filename[::-1]:
36                     next_index = next_index.setdefault(c, {})
37     return index
38
39 def find_common_suffix(index, tail=[]):
40     if len(tail) > 0 and len(index) > 1:
41         return "".join(tail[::-1])
42
43     results = []
44     for key, choice in index.items():
45         r = find_common_suffix(choice, tail+[key])
46         if r is not None:
47             results.append (r)
48         
49     if len(results) == 0:
50         return None
51     elif len(results) == 1:
52         return results[0]
53     else:
54         return results
55
56 def print_rdf(common_extensions):
57     import RDF
58     from htsworkflow.util import rdfhelp
59     model = rdfhelp.get_model()
60
61     viewName = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/NAME/view/'
62     subView = RDF.NS(viewName)
63     fileReTerm = rdfhelp.dafTermOntology['filename_re']
64
65     count = 1
66     for ext in common_extensions:
67         s = RDF.Statement(subView['VIEW{0}'.format(count)],
68                           fileReTerm,
69                           '.*{0}$'.format(ext.replace('.', '\\.')))
70         model.add_statement(s)
71         count += 1
72         
73     writer = rdfhelp.get_serializer()
74     writer.set_namespace('thisSubmissionView', subView._prefix)
75     print(writer.serialize_model_to_string(model))
76
77 if __name__ == "__main__":
78     main()