split a function into two. (A query execute function and a reporting fuction)
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 from datetime import datetime
4 import logging
5 import os
6 import types
7
8 import RDF
9
10 logger = logging.getLogger(__name__)
11
12 # standard ontology namespaces
13 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
14 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
15 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
16 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
17 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
18
19 # internal ontologies
20 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
21 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
22 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
23 inventoryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/InventoryOntology#")
24 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
25
26 ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f"
27 ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S"
28
29 def sparql_query(model, query_filename):
30     """Execute sparql query from file
31     """
32     logger.info("Opening: %s" % (query_filename,))
33     query_body = open(query_filename,'r').read()
34     query = RDF.SPARQLQuery(query_body)
35     results = query.execute(model)
36     display_query_results(results)
37
38 def display_query_results(results):
39     for row in results:
40         output = []
41         for k,v in row.items()[::-1]:
42             print "{0}: {1}".format(k,v)
43         print
44
45
46 def blankOrUri(value=None):
47     node = None
48     if value is None:
49         node = RDF.Node()
50     elif type(value) in types.StringTypes:
51         node = RDF.Node(uri_string=value)
52     elif isinstance(value, RDF.Node):
53         node = value
54
55     return node
56
57
58 def toTypedNode(value):
59     if type(value) == types.BooleanType:
60         value_type = xsdNS['boolean'].uri
61         if value:
62             value = u'1'
63         else:
64             value = u'0'
65     elif type(value) in (types.IntType, types.LongType):
66         value_type = xsdNS['decimal'].uri
67         value = unicode(value)
68     elif type(value) == types.FloatType:
69         value_type = xsdNS['float'].uri
70         value = unicode(value)
71     elif isinstance(value, datetime):
72         value_type = xsdNS['dateTime'].uri
73         if value.microsecond == 0:
74             value = value.strftime(ISOFORMAT_SHORT)
75         else:
76             value = value.strftime(ISOFORMAT_MS)
77     else:
78         value_type = None
79         value = unicode(value)
80
81     if value_type is not None:
82         node = RDF.Node(literal=value, datatype=value_type)
83     else:
84         node = RDF.Node(literal=unicode(value).encode('utf-8'))
85     return node
86
87 def fromTypedNode(node):
88     if node is None:
89         return None
90
91     value_type = str(node.literal_value['datatype'])
92     # chop off xml schema declaration
93     value_type = value_type.replace(str(xsdNS[''].uri),'')
94     literal = node.literal_value['string']
95     literal_lower = literal.lower()
96
97     if value_type == 'boolean':
98         if literal_lower in ('1', 'yes', 'true'):
99             return True
100         elif literal_lower in ('0', 'no', 'false'):
101             return False
102         else:
103             raise ValueError("Unrecognized boolean %s" % (literal,))
104     elif value_type == 'decimal' and literal.find('.') == -1:
105         return int(literal)
106     elif value_type in ('decimal', 'float', 'double'):
107         return float(literal)
108     elif value_type in ('string'):
109         return literal
110     elif value_type in ('dateTime'):
111         try:
112             return datetime.strptime(literal, ISOFORMAT_MS)
113         except ValueError, e:
114             return datetime.strptime(literal, ISOFORMAT_SHORT)
115     return literal
116
117
118 def get_model(model_name=None, directory=None):
119     if directory is None:
120         directory = os.getcwd()
121
122     if model_name is None:
123         storage = RDF.MemoryStorage()
124         logger.info("Using RDF Memory model")
125     else:
126         options = "hash-type='bdb',dir='{0}'".format(directory)
127         storage = RDF.HashStorage(model_name,
128                       options=options)
129         logger.info("Using {0} with options {1}".format(model_name, options))
130     model = RDF.Model(storage)
131     return model
132
133
134 def load_into_model(model, parser_name, filename, ns=None):
135     if not os.path.exists(filename):
136         raise IOError("Can't find {0}".format(filename))
137
138     data = open(filename, 'r').read()
139     load_string_into_model(model, parser_name, data, ns)
140
141
142 def load_string_into_model(model, parser_name, data, ns=None):
143     if ns is None:
144         ns = "http://localhost/"
145
146     rdf_parser = RDF.Parser(name=parser_name)
147     rdf_parser.parse_string_into_model(model, data, ns)
148
149
150 def get_serializer(name='turtle'):
151     """Return a serializer with our standard prefixes loaded
152     """
153     writer = RDF.Serializer(name=name)
154     # really standard stuff
155     writer.set_namespace('owl', owlNS._prefix)
156     writer.set_namespace('rdf', rdfNS._prefix)
157     writer.set_namespace('rdfs', rdfsNS._prefix)
158     writer.set_namespace('xsd', xsdNS._prefix)
159
160     # should these be here, kind of specific to an application
161     writer.set_namespace('libraryOntology', libraryOntology._prefix)
162     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
163     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
164     return writer
165
166 def dump_model(model):
167     serializer = get_serializer()
168     print serializer.serialize_model_to_string(model)