3b2bfe0ce3ccafacde089eabf34b2d03c5844356
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 from datetime import datetime
4 import logging
5 import os
6 import types
7
8 import RDF
9
10 logger = logging.getLogger(__name__)
11
12 # standard ontology namespaces
13 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
14 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
15 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
16 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
17 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
18
19 # internal ontologies
20 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
21 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
22 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
23 inventoryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/InventoryOntology#")
24 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
25
26 ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f"
27 ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S"
28
29 def sparql_query(model, query_filename):
30     """Execute sparql query from file
31     """
32     logger.info("Opening: %s" % (query_filename,))
33     query_body = open(query_filename,'r').read()
34     query = RDF.SPARQLQuery(query_body)
35     results = query.execute(model)
36     for row in results:
37         output = []
38         for k,v in row.items()[::-1]:
39             print "{0}: {1}".format(k,v)
40         print
41
42
43 def blankOrUri(value=None):
44     node = None
45     if value is None:
46         node = RDF.Node()
47     elif type(value) in types.StringTypes:
48         node = RDF.Node(uri_string=value)
49     elif isinstance(value, RDF.Node):
50         node = value
51
52     return node
53
54
55 def toTypedNode(value):
56     if type(value) == types.BooleanType:
57         value_type = xsdNS['boolean'].uri
58         if value:
59             value = u'1'
60         else:
61             value = u'0'
62     elif type(value) in (types.IntType, types.LongType):
63         value_type = xsdNS['decimal'].uri
64         value = unicode(value)
65     elif type(value) == types.FloatType:
66         value_type = xsdNS['float'].uri
67         value = unicode(value)
68     elif isinstance(value, datetime):
69         value_type = xsdNS['dateTime'].uri
70         if value.microsecond == 0:
71             value = value.strftime(ISOFORMAT_SHORT)
72         else:
73             value = value.strftime(ISOFORMAT_MS)
74     else:
75         value_type = None
76         value = unicode(value)
77
78     if value_type is not None:
79         node = RDF.Node(literal=value, datatype=value_type)
80     else:
81         node = RDF.Node(literal=unicode(value).encode('utf-8'))
82     return node
83
84 def fromTypedNode(node):
85     if node is None:
86         return None
87
88     value_type = str(node.literal_value['datatype'])
89     # chop off xml schema declaration
90     value_type = value_type.replace(str(xsdNS[''].uri),'')
91     literal = node.literal_value['string']
92     literal_lower = literal.lower()
93
94     if value_type == 'boolean':
95         if literal_lower in ('1', 'yes', 'true'):
96             return True
97         elif literal_lower in ('0', 'no', 'false'):
98             return False
99         else:
100             raise ValueError("Unrecognized boolean %s" % (literal,))
101     elif value_type == 'decimal' and literal.find('.') == -1:
102         return int(literal)
103     elif value_type in ('decimal', 'float', 'double'):
104         return float(literal)
105     elif value_type in ('string'):
106         return literal
107     elif value_type in ('dateTime'):
108         try:
109             return datetime.strptime(literal, ISOFORMAT_MS)
110         except ValueError, e:
111             return datetime.strptime(literal, ISOFORMAT_SHORT)
112     return literal
113
114
115 def get_model(model_name=None, directory=None):
116     if directory is None:
117         directory = os.getcwd()
118
119     if model_name is None:
120         storage = RDF.MemoryStorage()
121         logger.info("Using RDF Memory model")
122     else:
123         options = "hash-type='bdb',dir='{0}'".format(directory)
124         storage = RDF.HashStorage(model_name,
125                       options=options)
126         logger.info("Using {0} with options {1}".format(model_name, options))
127     model = RDF.Model(storage)
128     return model
129
130
131 def load_into_model(model, parser_name, filename, ns=None):
132     if not os.path.exists(filename):
133         raise IOError("Can't find {0}".format(filename))
134
135     data = open(filename, 'r').read()
136     load_string_into_model(model, parser_name, data, ns)
137
138
139 def load_string_into_model(model, parser_name, data, ns=None):
140     if ns is None:
141         ns = "http://localhost/"
142
143     rdf_parser = RDF.Parser(name=parser_name)
144     rdf_parser.parse_string_into_model(model, data, ns)
145
146
147 def get_serializer(name='turtle'):
148     """Return a serializer with our standard prefixes loaded
149     """
150     writer = RDF.Serializer(name=name)
151     # really standard stuff
152     writer.set_namespace('owl', owlNS._prefix)
153     writer.set_namespace('rdf', rdfNS._prefix)
154     writer.set_namespace('rdfs', rdfsNS._prefix)
155     writer.set_namespace('xsd', xsdNS._prefix)
156
157     # should these be here, kind of specific to an application
158     writer.set_namespace('libraryOntology', libraryOntology._prefix)
159     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
160     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
161     return writer
162
163 def dump_model(model):
164     serializer = get_serializer()
165     print serializer.serialize_model_to_string(model)