Add ability to refresh library data from our htsw server
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 from datetime import datetime
4 import logging
5 import os
6 import types
7
8 import RDF
9
10 logger = logging.getLogger(__name__)
11
12 # standard ontology namespaces
13 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
14 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
15 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
16 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
17 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
18
19 # internal ontologies
20 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
21 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
22 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
23 inventoryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/InventoryOntology#")
24 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
25
26 ISOFORMAT_MS = "%Y-%m-%dT%H:%M:%S.%f"
27 ISOFORMAT_SHORT = "%Y-%m-%dT%H:%M:%S"
28
29 def sparql_query(model, query_filename):
30     """Execute sparql query from file
31     """
32     logger.info("Opening: %s" % (query_filename,))
33     query_body = open(query_filename,'r').read()
34     query = RDF.SPARQLQuery(query_body)
35     results = query.execute(model)
36     display_query_results(results)
37
38 def display_query_results(results):
39     for row in results:
40         output = []
41         for k,v in row.items()[::-1]:
42             print "{0}: {1}".format(k,v)
43         print
44
45
46 def blankOrUri(value=None):
47     node = None
48     if value is None:
49         node = RDF.Node()
50     elif type(value) in types.StringTypes:
51         node = RDF.Node(uri_string=value)
52     elif isinstance(value, RDF.Node):
53         node = value
54
55     return node
56
57
58 def toTypedNode(value):
59     if type(value) == types.BooleanType:
60         value_type = xsdNS['boolean'].uri
61         if value:
62             value = u'1'
63         else:
64             value = u'0'
65     elif type(value) in (types.IntType, types.LongType):
66         value_type = xsdNS['decimal'].uri
67         value = unicode(value)
68     elif type(value) == types.FloatType:
69         value_type = xsdNS['float'].uri
70         value = unicode(value)
71     elif isinstance(value, datetime):
72         value_type = xsdNS['dateTime'].uri
73         if value.microsecond == 0:
74             value = value.strftime(ISOFORMAT_SHORT)
75         else:
76             value = value.strftime(ISOFORMAT_MS)
77     else:
78         value_type = None
79         value = unicode(value)
80
81     if value_type is not None:
82         node = RDF.Node(literal=value, datatype=value_type)
83     else:
84         node = RDF.Node(literal=unicode(value).encode('utf-8'))
85     return node
86
87 def fromTypedNode(node):
88     if node is None:
89         return None
90
91     value_type = str(node.literal_value['datatype'])
92     # chop off xml schema declaration
93     value_type = value_type.replace(str(xsdNS[''].uri),'')
94     literal = node.literal_value['string']
95     literal_lower = literal.lower()
96
97     if value_type == 'boolean':
98         if literal_lower in ('1', 'yes', 'true'):
99             return True
100         elif literal_lower in ('0', 'no', 'false'):
101             return False
102         else:
103             raise ValueError("Unrecognized boolean %s" % (literal,))
104     elif value_type == 'integer':
105         return int(literal)
106     elif value_type == 'decimal' and literal.find('.') == -1:
107         return int(literal)
108     elif value_type in ('decimal', 'float', 'double'):
109         return float(literal)
110     elif value_type in ('string'):
111         return literal
112     elif value_type in ('dateTime'):
113         try:
114             return datetime.strptime(literal, ISOFORMAT_MS)
115         except ValueError, e:
116             return datetime.strptime(literal, ISOFORMAT_SHORT)
117     return literal
118
119
120 def get_model(model_name=None, directory=None):
121     if directory is None:
122         directory = os.getcwd()
123
124     if model_name is None:
125         storage = RDF.MemoryStorage()
126         logger.info("Using RDF Memory model")
127     else:
128         options = "hash-type='bdb',dir='{0}'".format(directory)
129         storage = RDF.HashStorage(model_name,
130                       options=options)
131         logger.info("Using {0} with options {1}".format(model_name, options))
132     model = RDF.Model(storage)
133     return model
134
135
136 def load_into_model(model, parser_name, filename, ns=None):
137     if not os.path.exists(filename):
138         raise IOError("Can't find {0}".format(filename))
139
140     data = open(filename, 'r').read()
141     load_string_into_model(model, parser_name, data, ns)
142
143
144 def load_string_into_model(model, parser_name, data, ns=None):
145     if ns is None:
146         ns = "http://localhost/"
147
148     rdf_parser = RDF.Parser(name=parser_name)
149     rdf_parser.parse_string_into_model(model, data, ns)
150
151
152 def get_serializer(name='turtle'):
153     """Return a serializer with our standard prefixes loaded
154     """
155     writer = RDF.Serializer(name=name)
156     # really standard stuff
157     writer.set_namespace('owl', owlNS._prefix)
158     writer.set_namespace('rdf', rdfNS._prefix)
159     writer.set_namespace('rdfs', rdfsNS._prefix)
160     writer.set_namespace('xsd', xsdNS._prefix)
161
162     # should these be here, kind of specific to an application
163     writer.set_namespace('libraryOntology', libraryOntology._prefix)
164     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
165     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
166     return writer
167
168 def dump_model(model):
169     serializer = get_serializer()
170     print serializer.serialize_model_to_string(model)