import argparse
import RDF
import jinja2
+from pprint import pprint
from htsworkflow.util.rdfhelp import \
dafTermOntology, \
from encode_find import DBDIR
+DEFAULT_GENOME='hg19'
+DEFAULT_OUTPUT='/tmp/submission_report.html'
+
def main(cmdline=None):
parser = make_parser()
args = parser.parse_args(cmdline)
model = get_model('encode', DBDIR)
report = what_have_we_done(model, genome=args.genome)
- print report
-
+ with open(DEFAULT_OUTPUT,'w') as stream:
+ stream.write(report)
def make_parser():
parser = argparse.ArgumentParser()
- parser.add_argument('--genome', default=None,
+ parser.add_argument('--genome', default=DEFAULT_GENOME,
help='limit to one genome')
+ parser.add_argument('--output', default='/tmp/submission_report.html',
+ help="specify where to write to write report")
return parser
SUBMISSION_QUERY = """
PREFIX daf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
PREFIX ddf: <http://encodesubmit.ucsc.edu/pipeline/download_ddf#>
-SELECT distinct ?assembly ?experiment ?library_urn ?library_name ?submission ?submission_status ?date
+SELECT distinct ?assembly ?experiment ?library_urn ?library_name ?submission ?submission_status ?submission_name ?date
WHERE {{
?submission ucscSubmission:library_urn ?library_urn ;
ucscSubmission:has_status ?status ;
+ ucscSubmission:name ?submission_name ;
libraryOntology:date ?date .
?status daf:assembly ?assembly ;
ucscSubmission:status ?submission_status .
ORDER BY ?assembly ?experiment ?library_urn ?submission
"""
-SUBMISSION_TEMPLATE = """
+SUBMISSION_TEMPLATE = '''
<html>
<head>
<style type="text/css">
table { border-width: 0 0 1px 1px; border-style: solid; }
th,td { border-width: 1px 1px 0 0; border-style: solid; margin: 0;}
+.library { font-size: 18pt; background-color: #EEF; }
+.submission { font-size: 12pt; background-color: #EFE;}
</style>
+ <title>Submission report for {{ genome }}</title>
</head>
<body>
-<table>
-<thead>
- <tr>
- <td>Assembly</td>
- <td>Experiment</td>
- <td>Library ID</td>
- <td>Submission ID</td>
- <td>Last Updated</td><td>Status</td>
- <td>Library Name</td>
- </tr>
-</thead>
-<tbody>
-{% for record in submissions %}
- <tr>
- <td>{{record.assembly}}</td>
- <td>{{record.experiment}}</td>
- <td><a href="{{record.library_urn}}">{{ record.library_urn | trim_rdf}}</a></td>
- <td><a href="{{record.submission}}">{{record.submission|trim_rdf}}</a></td>
- <td>{{ record.date|timestamp_to_date }}</td>
- <td>{{ record.submission_status }}</td>
- <td>{{ record.library_name }}</td>
- </tr>
+<h1>Genome: {{ genome }}</h1>
+{% for experiment in libraries %}
+ <h2>{{ experiment }}</h2>
+ <table>
+ <thead>
+ <tr class="library">
+ <td>Library ID</td>
+ <td colspan="3">Library Name</td>
+ </tr>
+ <tr class="submission">
+ <td>Submission ID</td>
+ <td>Last Updated</td>
+ <td>Status</td>
+ <td>Submission name</td>
+ </tr>
+ </thead>
+ <tbody>
+ {% for liburn, records in libraries[experiment]|dictsort %}
+ <!-- {{ liburn }} -->
+ <tr class="library">
+ <td>
+ <a href="{{libraries[experiment][liburn].0.library_urn}}">
+ {{ libraries[experiment][liburn].0.library_urn | trim_rdf}}
+ </a>
+ </td>
+ <td colspan="3">{{ libraries[experiment][liburn].0.library_name }}</td>
+ </tr>
+ {% for record in records|sort %}
+ <tr class="submission">
+ <td><a href="{{record.submission}}">{{record.submission|trim_rdf}}</a></td>
+ <td>{{ record.date|timestamp_to_date }}</td>
+ <td>{{ record.submission_status }}</td>
+ <td>{{ record.submission_name }}</td>
+ </tr>
+ {% endfor %}
+ {% endfor %}
+ </tbody>
+ </table>
{% endfor %}
-</tbody>
-</table>
-</body>
+ </body>
</html>
-"""
+'''
-def what_have_we_done(model, genome=None):
+def what_have_we_done(model, genome):
assembly_filter = ''
- if genome is not None:
- assembly_filter = 'FILTER(regex(?assembly, "{0}", "i"))'.format(genome)
+ assembly_filter = 'FILTER(regex(?assembly, "{0}", "i"))'.format(genome)
query = SUBMISSION_QUERY.format(
assembly_filter=assembly_filter
)
compiled_query = RDF.SPARQLQuery(query)
submissions = compiled_query.execute(model)
+ libraries = group_by_library(submissions)
environment = jinja2.Environment()
environment.filters['trim_rdf'] = trim_rdf
environment.filters['timestamp_to_date'] = timestamp_to_date
template = environment.from_string(SUBMISSION_TEMPLATE)
- return template.render(submissions = submissions)
+ return template.render(libraries=libraries,
+ genome=genome)
+
+def group_by_library(submissions):
+ libraries = {}
+ for record in submissions:
+ urn = str(record['library_urn'].uri)
+ experiment = str(record['experiment'])
+ libraries.setdefault(experiment, {}).setdefault(urn, []).append(record)
+ return libraries
def trim_rdf(value):
if value is None:
return
value = str(value)
+ if len(value) == 0:
+ return value
if value[-1] == '/':
value = value[:-1]
split_value = value.split('/')