Generate html reports when doing sparql queries with encode_find.
authorDiane Trout <diane@caltech.edu>
Wed, 23 May 2012 00:36:07 +0000 (17:36 -0700)
committerDiane Trout <diane@caltech.edu>
Wed, 23 May 2012 00:36:07 +0000 (17:36 -0700)
I have a further improved simplify_uri function which extracts
a meaningful name from some pretty arbitrary rdf nodes.

(This does mean that an earlier attempt which is still in the code
probably can be removed -- that one depended on knowing the namespace)

encode_submission/encode_find.py
htsworkflow/templates/rdf_report.html [new file with mode: 0644]
htsworkflow/util/rdfhelp.py

index 24ed16872cf7654d25cd40f5b0ad977976512e52..70f280f7e4f4f18c46c8206674ab27960001a82f 100644 (file)
@@ -20,6 +20,9 @@ import sys
 import urllib
 import urlparse
 
+if not 'DJANGO_SETTINGS_MODULE' in os.environ:
+    os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
+
 from htsworkflow.submission import daf, ucsc
 
 from htsworkflow.util import api
@@ -124,7 +127,7 @@ def main(cmdline=None):
             load_encodedcc_files(model, **track_info )
 
     if opts.sparql is not None:
-        sparql_query(model, opts.sparql)
+        sparql_query(model, opts.sparql, 'html')
 
     if opts.find_submission_with_no_library:
         report_submissions_with_no_library(model)
diff --git a/htsworkflow/templates/rdf_report.html b/htsworkflow/templates/rdf_report.html
new file mode 100644 (file)
index 0000000..298e93a
--- /dev/null
@@ -0,0 +1,37 @@
+<!DOCTYPE html>
+<html>
+  <head>
+    <title>Report</title>
+    <style>
+table {
+    border-spacing: 0;
+}
+
+table, td {
+    border-style: solid;
+}
+
+td { margin: 0;
+     border-width: 1px 1px 0 0;
+     padding: 4px;
+}    </style>
+  </head>
+  <body>
+    <table>
+      <thead>
+        <tr>{% for k in results.0 %}
+          <td>{{k}}</td>{% endfor %}
+        </tr>
+      </thead>
+      <tbody>
+        {% for row in results %}<tr>{% for value in row.values %}
+          <td>{% spaceless %}
+            {% if value.url %}<a href="{{ value.url }}">{% endif%}
+            {{value.simple}}
+            {% if value.url %}</a>{% endif %}</td>{% endspaceless %}{% endfor %}
+        </tr>
+      {% endfor %}
+      </tbody>
+    </table>
+  </body>
+</html>
index 6aa9627f9e5bcbf833080f8cfa24f31908b4a390..3ca27fb97315b968644724a977e55aedd1bb5443 100644 (file)
@@ -1,5 +1,6 @@
 """Helper features for working with librdf
 """
+import collections
 from datetime import datetime
 from urlparse import urlparse, urlunparse
 from urllib2 import urlopen
@@ -34,14 +35,17 @@ ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f"
 ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S"
 
 
-def sparql_query(model, query_filename):
+def sparql_query(model, query_filename, output_format='text'):
     """Execute sparql query from file
     """
     logger.info("Opening: %s" % (query_filename,))
     query_body = open(query_filename, 'r').read()
     query = RDF.SPARQLQuery(query_body)
     results = query.execute(model)
-    display_query_results(results)
+    if output_format == 'html':
+        html_query_results(results)
+    else:
+        display_query_results(results)
 
 
 def display_query_results(results):
@@ -52,6 +56,30 @@ def display_query_results(results):
             print "{0}: {1}".format(k, v)
         print
 
+def html_query_results(result_stream):
+    from django.conf import settings
+    from django.template import Context, loader
+
+    # I did this because I couldn't figure out how to
+    # get simplify_rdf into the django template as a filter
+    class Simplified(object):
+        def __init__(self, value):
+            self.simple = simplify_rdf(value)
+            if value.is_resource():
+                self.url = value
+            else:
+                self.url = None
+
+    template = loader.get_template('rdf_report.html')
+    results = []
+    for row in result_stream:
+        new_row = collections.OrderedDict()
+        row_urls = []
+        for k,v in row.items():
+            new_row[k] = Simplified(v)
+        results.append(new_row)
+    context = Context({'results': results,})
+    print template.render(context)
 
 def blankOrUri(value=None):
     """Return a blank node for None or a resource node for strings.
@@ -145,6 +173,47 @@ def get_node_type(node):
         return value_type.replace(str(xsdNS[''].uri), '')
 
 
+def simplify_rdf(value):
+    """Return a short name for a RDF object
+    e.g. The last part of a URI or an untyped string.
+    """
+    if isinstance(value, RDF.Node):
+        if value.is_resource():
+            name = simplify_uri(str(value.uri))
+        elif value.is_blank():
+            name = '<BLANK>'
+        else:
+            name = value.literal_value['string']
+    elif isinstance(value, RDF.Uri):
+        name = split_uri(str(value))
+    else:
+        name = value
+    return str(name)
+
+
+def simplify_uri(uri):
+    """Split off the end of a uri
+
+    >>> simplify_uri('http://asdf.org/foo/bar')
+    'bar'
+    >>> simplify_uri('http://asdf.org/foo/bar#bleem')
+    'bleem'
+    >>> simplify_uri('http://asdf.org/foo/bar/')
+    'bar'
+    >>> simplify_uri('http://asdf.org/foo/bar?was=foo')
+    'was=foo'
+    """
+    parsed = urlparse(uri)
+    if len(parsed.query) > 0:
+        return parsed.query
+    elif len(parsed.fragment) > 0:
+        return parsed.fragment
+    elif len(parsed.path) > 0:
+        for element in reversed(parsed.path.split('/')):
+            if len(element) > 0:
+                return element
+    raise ValueError("Unable to simplify %s" % (uri,))
+
 def simplifyUri(namespace, term):
     """Remove the namespace portion of a term
 
@@ -253,7 +322,6 @@ def guess_parser(content_type, pathname):
             return 'turtle'
     return 'guess'
 
-
 def get_serializer(name='turtle'):
     """Return a serializer with our standard prefixes loaded
     """