mark the example submission rule files as being raw, so the escapes dont get confused
[htsworkflow.git] / encode_submission / scan_extension.py
1 from optparse import OptionParser
2 import os
3 import sys
4 from pprint import pprint
5
6 def main(cmdline=None):
7     parser = make_parser()
8     opts, args = parser.parse_args(cmdline)
9
10     extensions = scan(args)
11     common_extensions = find_common_suffix(extensions)
12
13     if opts.rdf:
14         print_rdf(common_extensions)
15     else:
16         print common_extensions
17         
18 def make_parser():
19     parser = OptionParser("%prog: directory [directory...]")
20     parser.add_option('--rdf', action="store_true", default=False,
21                       help="Produce rdf configuration file for ucsc_gather")
22     return parser
23
24 def scan(toscan):
25     index = {}
26     for cur_scan_dir in toscan:
27         for path, dirnames, filenames in os.walk(cur_scan_dir):
28             for filename in filenames:
29                 base, ext = os.path.splitext(filename)
30                 if ext in ('.daf', 'ddf'):
31                     continue
32                 next_index = index
33                 for c in filename[::-1]:
34                     next_index = next_index.setdefault(c, {})
35     return index
36
37 def find_common_suffix(index, tail=[]):
38     if len(tail) > 0 and len(index) > 1:
39         return "".join(tail[::-1])
40
41     results = []
42     for key, choice in index.items():
43         r = find_common_suffix(choice, tail+[key])
44         if r is not None:
45             results.append (r)
46         
47     if len(results) == 0:
48         return None
49     elif len(results) == 1:
50         return results[0]
51     else:
52         return results
53
54 def print_rdf(common_extensions):
55     import RDF
56     from htsworkflow.util import rdfhelp
57     model = rdfhelp.get_model()
58
59     viewName = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/NAME/view/'
60     subView = RDF.NS(viewName)
61     fileReTerm = rdfhelp.dafTermOntology['filename_re']
62
63     count = 1
64     for ext in common_extensions:
65         s = RDF.Statement(subView['VIEW{0}'.format(count)],
66                           fileReTerm,
67                           '.*{0}$'.format(ext.replace('.', '\\.')))
68         model.add_statement(s)
69         count += 1
70         
71     writer = rdfhelp.get_serializer()
72     writer.set_namespace('thisSubmissionView', subView._prefix)
73     print writer.serialize_model_to_string(model)
74
75 if __name__ == "__main__":
76     main()