From cc57620b896e1ff651df9814edcb292fdc8df6d6 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 22 May 2012 17:36:07 -0700 Subject: [PATCH] Generate html reports when doing sparql queries with encode_find. I have a further improved simplify_uri function which extracts a meaningful name from some pretty arbitrary rdf nodes. (This does mean that an earlier attempt which is still in the code probably can be removed -- that one depended on knowing the namespace) --- encode_submission/encode_find.py | 5 +- htsworkflow/templates/rdf_report.html | 37 ++++++++++++++ htsworkflow/util/rdfhelp.py | 74 +++++++++++++++++++++++++-- 3 files changed, 112 insertions(+), 4 deletions(-) create mode 100644 htsworkflow/templates/rdf_report.html diff --git a/encode_submission/encode_find.py b/encode_submission/encode_find.py index 24ed168..70f280f 100644 --- a/encode_submission/encode_find.py +++ b/encode_submission/encode_find.py @@ -20,6 +20,9 @@ import sys import urllib import urlparse +if not 'DJANGO_SETTINGS_MODULE' in os.environ: + os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings' + from htsworkflow.submission import daf, ucsc from htsworkflow.util import api @@ -124,7 +127,7 @@ def main(cmdline=None): load_encodedcc_files(model, **track_info ) if opts.sparql is not None: - sparql_query(model, opts.sparql) + sparql_query(model, opts.sparql, 'html') if opts.find_submission_with_no_library: report_submissions_with_no_library(model) diff --git a/htsworkflow/templates/rdf_report.html b/htsworkflow/templates/rdf_report.html new file mode 100644 index 0000000..298e93a --- /dev/null +++ b/htsworkflow/templates/rdf_report.html @@ -0,0 +1,37 @@ + + + + Report + + + + + + {% for k in results.0 %} + {% endfor %} + + + + {% for row in results %}{% for value in row.values %} + {% endspaceless %}{% endfor %} + + {% endfor %} + +
{{k}}
{% spaceless %} + {% if value.url %}{% endif%} + {{value.simple}} + {% if value.url %}{% endif %}
+ + diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index 6aa9627..3ca27fb 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -1,5 +1,6 @@ """Helper features for working with librdf """ +import collections from datetime import datetime from urlparse import urlparse, urlunparse from urllib2 import urlopen @@ -34,14 +35,17 @@ ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f" ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S" -def sparql_query(model, query_filename): +def sparql_query(model, query_filename, output_format='text'): """Execute sparql query from file """ logger.info("Opening: %s" % (query_filename,)) query_body = open(query_filename, 'r').read() query = RDF.SPARQLQuery(query_body) results = query.execute(model) - display_query_results(results) + if output_format == 'html': + html_query_results(results) + else: + display_query_results(results) def display_query_results(results): @@ -52,6 +56,30 @@ def display_query_results(results): print "{0}: {1}".format(k, v) print +def html_query_results(result_stream): + from django.conf import settings + from django.template import Context, loader + + # I did this because I couldn't figure out how to + # get simplify_rdf into the django template as a filter + class Simplified(object): + def __init__(self, value): + self.simple = simplify_rdf(value) + if value.is_resource(): + self.url = value + else: + self.url = None + + template = loader.get_template('rdf_report.html') + results = [] + for row in result_stream: + new_row = collections.OrderedDict() + row_urls = [] + for k,v in row.items(): + new_row[k] = Simplified(v) + results.append(new_row) + context = Context({'results': results,}) + print template.render(context) def blankOrUri(value=None): """Return a blank node for None or a resource node for strings. @@ -145,6 +173,47 @@ def get_node_type(node): return value_type.replace(str(xsdNS[''].uri), '') +def simplify_rdf(value): + """Return a short name for a RDF object + e.g. The last part of a URI or an untyped string. + """ + if isinstance(value, RDF.Node): + if value.is_resource(): + name = simplify_uri(str(value.uri)) + elif value.is_blank(): + name = '' + else: + name = value.literal_value['string'] + elif isinstance(value, RDF.Uri): + name = split_uri(str(value)) + else: + name = value + return str(name) + + +def simplify_uri(uri): + """Split off the end of a uri + + >>> simplify_uri('http://asdf.org/foo/bar') + 'bar' + >>> simplify_uri('http://asdf.org/foo/bar#bleem') + 'bleem' + >>> simplify_uri('http://asdf.org/foo/bar/') + 'bar' + >>> simplify_uri('http://asdf.org/foo/bar?was=foo') + 'was=foo' + """ + parsed = urlparse(uri) + if len(parsed.query) > 0: + return parsed.query + elif len(parsed.fragment) > 0: + return parsed.fragment + elif len(parsed.path) > 0: + for element in reversed(parsed.path.split('/')): + if len(element) > 0: + return element + raise ValueError("Unable to simplify %s" % (uri,)) + def simplifyUri(namespace, term): """Remove the namespace portion of a term @@ -253,7 +322,6 @@ def guess_parser(content_type, pathname): return 'turtle' return 'guess' - def get_serializer(name='turtle'): """Return a serializer with our standard prefixes loaded """ -- 2.30.2