3f5547933f462318d5e5619c7f3b85ae5ea32e4b
[htsworkflow.git] / encode_submission / submission_report.py
1 import argparse
2 import RDF
3 import jinja2
4 from pprint import pprint
5
6 from htsworkflow.util.rdfhelp import \
7      dafTermOntology, \
8      dublinCoreNS, \
9      get_model, \
10      get_serializer, \
11      sparql_query, \
12      submissionOntology, \
13      libraryOntology, \
14      load_into_model, \
15      rdfNS, \
16      rdfsNS, \
17      xsdNS
18 TYPE_N = rdfNS['type']
19 CREATION_DATE = libraryOntology['date']
20
21 from encode_find import DBDIR
22
23 DEFAULT_GENOME='hg19'
24 DEFAULT_OUTPUT='/tmp/submission_report.html'
25
26 def main(cmdline=None):
27     parser = make_parser()
28     args = parser.parse_args(cmdline)
29     model = get_model('encode', DBDIR)
30     report = what_have_we_done(model, genome=args.genome)
31     with open(DEFAULT_OUTPUT,'w') as stream:
32         stream.write(report)
33
34 def make_parser():
35     parser = argparse.ArgumentParser()
36     parser.add_argument('--genome', default=DEFAULT_GENOME,
37                         help='limit to one genome')
38     parser.add_argument('--output', default='/tmp/submission_report.html',
39                         help="specify where to write to write report")
40     return parser
41
42 SUBMISSION_QUERY = """
43 PREFIX xsd:<http://www.w3.org/2001/XMLSchema#>
44 PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
45 PREFIX ucscSubmission:<http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
46 PREFIX libraryOntology:<http://jumpgate.caltech.edu/wiki/LibraryOntology#>
47 PREFIX daf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
48 PREFIX ddf: <http://encodesubmit.ucsc.edu/pipeline/download_ddf#>
49
50 SELECT distinct ?assembly ?experiment ?library_urn ?library_name ?submission ?submission_status ?submission_name ?date
51 WHERE {{
52   ?submission ucscSubmission:library_urn ?library_urn ;
53               ucscSubmission:has_status ?status ;
54               ucscSubmission:name ?submission_name ;
55               libraryOntology:date ?date .
56   ?status daf:assembly ?assembly ;
57           ucscSubmission:status ?submission_status .
58   OPTIONAL {{ ?library_urn libraryOntology:name ?library_name . }}
59   OPTIONAL {{ ?library_urn libraryOntology:experiment_type ?experiment . }}
60   FILTER(!regex(?submission_status, "revoked", "i"))
61   {assembly_filter}
62 }}
63 ORDER BY ?assembly ?experiment ?library_urn ?submission
64 """
65
66 SUBMISSION_TEMPLATE = '''
67 <html>
68 <head>
69 <style type="text/css">
70 table { border-width: 0 0 1px 1px; border-style: solid; }
71 th,td { border-width: 1px 1px 0 0; border-style: solid; margin: 0;}
72 .library { font-size: 18pt; background-color: #EEF; }
73 .submission { font-size: 12pt; background-color: #EFE;}
74 </style>
75   <title>Submission report for {{ genome }}</title>
76 </head>
77 <body>
78 <h1>Genome: {{ genome }}</h1>
79 {% for experiment in libraries %}
80   <h2>{{ experiment }}</h2>
81   <table>
82     <thead>
83       <tr class="library">
84       <td>Library ID</td>
85       <td colspan="3">Library Name</td>
86       </tr>
87       <tr class="submission">
88       <td>Submission ID</td>
89       <td>Last Updated</td>
90       <td>Status</td>
91       <td>Submission name</td>
92       </tr>
93     </thead>
94     <tbody>
95       {% for liburn, records in libraries[experiment]|dictsort %}
96       <!-- {{ liburn }} -->
97       <tr class="library">
98         <td>
99           <a href="{{libraries[experiment][liburn].0.library_urn}}">
100             {{ libraries[experiment][liburn].0.library_urn | trim_rdf}}
101           </a>
102         </td>
103         <td colspan="3">{{ libraries[experiment][liburn].0.library_name }}</td>
104       </tr>
105       {% for record in records|sort %}
106       <tr class="submission">
107         <td><a href="{{record.submission}}">{{record.submission|trim_rdf}}</a></td>
108         <td>{{ record.date|timestamp_to_date }}</td>
109         <td>{{ record.submission_status }}</td>
110         <td>{{ record.submission_name }}</td>
111       </tr>
112       {% endfor %}
113     {% endfor %}
114     </tbody>
115   </table>
116 {% endfor %}
117   </body>
118 </html>
119 '''
120
121 def what_have_we_done(model, genome):
122     assembly_filter = ''
123     assembly_filter = 'FILTER(regex(?assembly, "{0}", "i"))'.format(genome)
124
125     query = SUBMISSION_QUERY.format(
126         assembly_filter=assembly_filter
127     )
128     compiled_query = RDF.SPARQLQuery(query)
129     submissions = compiled_query.execute(model)
130     libraries = group_by_library(submissions)
131     environment = jinja2.Environment()
132     environment.filters['trim_rdf'] = trim_rdf
133     environment.filters['timestamp_to_date'] = timestamp_to_date
134     template = environment.from_string(SUBMISSION_TEMPLATE)
135     return template.render(libraries=libraries,
136                            genome=genome)
137
138 def group_by_library(submissions):
139     libraries = {}
140     for record in submissions:
141         urn = str(record['library_urn'].uri)
142         experiment = str(record['experiment'])
143         libraries.setdefault(experiment, {}).setdefault(urn, []).append(record)
144     return libraries
145
146 def trim_rdf(value):
147     if value is None:
148         return
149     value = str(value)
150     if len(value) == 0:
151         return value
152     if value[-1] == '/':
153         value = value[:-1]
154     split_value = value.split('/')
155     if len(split_value) == 0:
156         return value
157     return split_value[-1]
158
159 def timestamp_to_date(value):
160     datestamp, timestamp = str(value).split('T')
161     return datestamp
162
163 if __name__ == "__main__":
164     main()