Make it possible to filter which genome to run report against
[htsworkflow.git] / encode_submission / submission_report.py
1 import argparse
2 import RDF
3 import jinja2
4
5 from htsworkflow.util.rdfhelp import \
6      dafTermOntology, \
7      dublinCoreNS, \
8      get_model, \
9      get_serializer, \
10      sparql_query, \
11      submissionOntology, \
12      libraryOntology, \
13      load_into_model, \
14      rdfNS, \
15      rdfsNS, \
16      xsdNS
17 TYPE_N = rdfNS['type']
18 CREATION_DATE = libraryOntology['date']
19
20 from encode_find import DBDIR
21
22 def main(cmdline=None):
23     parser = make_parser()
24     args = parser.parse_args(cmdline)
25     model = get_model('encode', DBDIR)
26     report = what_have_we_done(model, genome=args.genome)
27     print report
28
29
30 def make_parser():
31     parser = argparse.ArgumentParser()
32     parser.add_argument('--genome', default=None,
33                         help='limit to one genome')
34     return parser
35
36 SUBMISSION_QUERY = """
37 PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
38 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
39 PREFIX ucscSubmission:<http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
40 PREFIX libraryOntology:<http://jumpgate.caltech.edu/wiki/LibraryOntology#>
41 PREFIX daf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
42 PREFIX ddf: <http://encodesubmit.ucsc.edu/pipeline/download_ddf#>
43
44 SELECT distinct ?assembly ?experiment ?library_urn ?library_name ?submission ?submission_status ?date
45 WHERE {{
46   ?submission ucscSubmission:library_urn ?library_urn ;
47               ucscSubmission:has_status ?status ;
48               libraryOntology:date ?date .
49   ?status daf:assembly ?assembly ;
50           ucscSubmission:status ?submission_status .
51   OPTIONAL {{ ?library_urn libraryOntology:name ?library_name . }}
52   OPTIONAL {{ ?library_urn libraryOntology:experiment_type ?experiment . }}
53   FILTER(!regex(?submission_status, "revoked", "i"))
54   {assembly_filter}
55 }}
56 ORDER BY ?assembly ?experiment ?library_urn ?submission
57 """
58
59 SUBMISSION_TEMPLATE = """
60 <html>
61 <head>
62 <style type="text/css">
63 table { border-width: 0 0 1px 1px; border-style: solid; }
64 th,td { border-width: 1px 1px 0 0; border-style: solid; margin: 0;}
65 </style>
66 </head>
67 <body>
68 <table>
69 <thead>
70   <tr>
71   <td>Assembly</td>
72   <td>Experiment</td>
73   <td>Library ID</td>
74   <td>Submission ID</td>
75   <td>Last Updated</td><td>Status</td>
76   <td>Library Name</td>
77   </tr>
78 </thead>
79 <tbody>
80 {% for record in submissions %}
81   <tr>
82     <td>{{record.assembly}}</td>
83     <td>{{record.experiment}}</td>
84     <td><a href="{{record.library_urn}}">{{ record.library_urn | trim_rdf}}</a></td>
85     <td><a href="{{record.submission}}">{{record.submission|trim_rdf}}</a></td>
86     <td>{{ record.date|timestamp_to_date }}</td>
87     <td>{{ record.submission_status }}</td>
88     <td>{{ record.library_name }}</td>
89   </tr>
90 {% endfor %}
91 </tbody>
92 </table>
93 </body>
94 </html>
95 """
96
97 def what_have_we_done(model, genome=None):
98     assembly_filter = ''
99     if genome is not None:
100         assembly_filter = 'FILTER(regex(?assembly, "{0}", "i"))'.format(genome)
101
102     query = SUBMISSION_QUERY.format(
103         assembly_filter=assembly_filter
104     )
105     compiled_query = RDF.SPARQLQuery(query)
106     submissions = compiled_query.execute(model)
107     environment = jinja2.Environment()
108     environment.filters['trim_rdf'] = trim_rdf
109     environment.filters['timestamp_to_date'] = timestamp_to_date
110     template = environment.from_string(SUBMISSION_TEMPLATE)
111     return template.render(submissions = submissions)
112
113 def trim_rdf(value):
114     if value is None:
115         return
116     value = str(value)
117     if value[-1] == '/':
118         value = value[:-1]
119     split_value = value.split('/')
120     if len(split_value) == 0:
121         return value
122     return split_value[-1]
123
124 def timestamp_to_date(value):
125     datestamp, timestamp = str(value).split('T')
126     return datestamp
127
128 if __name__ == "__main__":
129     main()