mark the example submission rule files as being raw, so the escapes dont get confused
[htsworkflow.git] / encode_submission / submission_report.py
1 import argparse
2 import RDF
3 import jinja2
4 from pprint import pprint
5
6 from htsworkflow.util.rdfhelp import \
7      get_model, \
8      get_serializer, \
9      sparql_query, \
10      libraryOntology, \
11      load_into_model
12 from htsworkflow.util.rdfns import *
13 TYPE_N = rdfNS['type']
14 CREATION_DATE = libraryOntology['date']
15
16 from encode_find import DBDIR
17
18 DEFAULT_GENOME='hg19'
19 DEFAULT_OUTPUT='/tmp/submission_report.html'
20
21 def main(cmdline=None):
22     parser = make_parser()
23     args = parser.parse_args(cmdline)
24     model = get_model('encode', DBDIR)
25     report = what_have_we_done(model, genome=args.genome)
26     with open(DEFAULT_OUTPUT,'w') as stream:
27         stream.write(report)
28
29 def make_parser():
30     parser = argparse.ArgumentParser()
31     parser.add_argument('--genome', default=DEFAULT_GENOME,
32                         help='limit to one genome')
33     parser.add_argument('--output', default='/tmp/submission_report.html',
34                         help="specify where to write to write report")
35     return parser
36
37 SUBMISSION_QUERY = """
38 PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
39 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
40 PREFIX ucscSubmission:<http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
41 PREFIX libraryOntology:<http://jumpgate.caltech.edu/wiki/LibraryOntology#>
42 PREFIX daf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
43 PREFIX ddf: <http://encodesubmit.ucsc.edu/pipeline/download_ddf#>
44
45 SELECT distinct ?assembly ?experiment ?library_urn ?library_name ?submission ?submission_status ?submission_name ?date
46 WHERE {{
47   ?submission ucscSubmission:library_urn ?library_urn ;
48               ucscSubmission:has_status ?status ;
49               ucscSubmission:name ?submission_name ;
50               libraryOntology:date ?date .
51   ?status daf:assembly ?assembly ;
52           ucscSubmission:status ?submission_status .
53   OPTIONAL {{ ?library_urn libraryOntology:name ?library_name . }}
54   OPTIONAL {{ ?library_urn libraryOntology:experiment_type ?experiment . }}
55   FILTER(!regex(?submission_status, "revoked", "i"))
56   {assembly_filter}
57 }}
58 ORDER BY ?assembly ?experiment ?library_urn ?submission
59 """
60
61 SUBMISSION_TEMPLATE = '''
62 <html>
63 <head>
64 <style type="text/css">
65 table { border-width: 0 0 1px 1px; border-style: solid; }
66 th,td { border-width: 1px 1px 0 0; border-style: solid; margin: 0;}
67 .library { font-size: 18pt; background-color: #EEF; }
68 .submission { font-size: 12pt; background-color: #EFE;}
69 </style>
70   <title>Submission report for {{ genome }}</title>
71 </head>
72 <body>
73 <h1>Genome: {{ genome }}</h1>
74 {% for experiment in libraries %}
75   <h2>{{ experiment }}</h2>
76   <table>
77     <thead>
78       <tr class="library">
79       <td>Library ID</td>
80       <td colspan="3">Library Name</td>
81       </tr>
82       <tr class="submission">
83       <td>Submission ID</td>
84       <td>Last Updated</td>
85       <td>Status</td>
86       <td>Submission name</td>
87       </tr>
88     </thead>
89     <tbody>
90       {% for liburn, records in libraries[experiment]|dictsort %}
91       <!-- {{ liburn }} -->
92       <tr class="library">
93         <td>
94           <a href="{{libraries[experiment][liburn].0.library_urn}}">
95             {{ libraries[experiment][liburn].0.library_urn | trim_rdf}}
96           </a>
97         </td>
98         <td colspan="3">{{ libraries[experiment][liburn].0.library_name }}</td>
99       </tr>
100       {% for record in records|sort %}
101       <tr class="submission">
102         <td><a href="{{record.submission}}">{{record.submission|trim_rdf}}</a></td>
103         <td>{{ record.date|timestamp_to_date }}</td>
104         <td>{{ record.submission_status }}</td>
105         <td>{{ record.submission_name }}</td>
106       </tr>
107       {% endfor %}
108     {% endfor %}
109     </tbody>
110   </table>
111 {% endfor %}
112   </body>
113 </html>
114 '''
115
116 def what_have_we_done(model, genome):
117     assembly_filter = ''
118     assembly_filter = 'FILTER(regex(?assembly, "{0}", "i"))'.format(genome)
119
120     query = SUBMISSION_QUERY.format(
121         assembly_filter=assembly_filter
122     )
123     compiled_query = RDF.SPARQLQuery(query)
124     submissions = compiled_query.execute(model)
125     libraries = group_by_library(submissions)
126     environment = jinja2.Environment()
127     environment.filters['trim_rdf'] = trim_rdf
128     environment.filters['timestamp_to_date'] = timestamp_to_date
129     template = environment.from_string(SUBMISSION_TEMPLATE)
130     return template.render(libraries=libraries,
131                            genome=genome)
132
133 def group_by_library(submissions):
134     libraries = {}
135     for record in submissions:
136         urn = str(record['library_urn'].uri)
137         experiment = str(record['experiment'])
138         libraries.setdefault(experiment, {}).setdefault(urn, []).append(record)
139     return libraries
140
141 def trim_rdf(value):
142     if value is None:
143         return
144     value = str(value)
145     if len(value) == 0:
146         return value
147     if value[-1] == '/':
148         value = value[:-1]
149     split_value = value.split('/')
150     if len(split_value) == 0:
151         return value
152     return split_value[-1]
153
154 def timestamp_to_date(value):
155     datestamp, timestamp = str(value).split('T')
156     return datestamp
157
158 if __name__ == "__main__":
159     main()