8fb1424960571d4ab327ab2c4261b293eb1efb56
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 import logging
4 import os
5 import types
6
7 import RDF
8
9 logger = logging.getLogger(__name__)
10
11 # standard ontology namespaces
12 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
13 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
14 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
15 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
16 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
17
18 # internal ontologies
19 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
20 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
21 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
22 inventoryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/InventoryOntology#")
23 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
24
25 def sparql_query(model, query_filename):
26     """Execute sparql query from file
27     """
28     logger.info("Opening: %s" % (query_filename,))
29     query_body = open(query_filename,'r').read()
30     query = RDF.SPARQLQuery(query_body)
31     results = query.execute(model)
32     for row in results:
33         output = []
34         for k,v in row.items()[::-1]:
35             print "{0}: {1}".format(k,v)
36         print
37
38
39 def blankOrUri(value=None):
40     node = None
41     if value is None:
42         node = RDF.Node()
43     elif type(value) in types.StringTypes:
44         node = RDF.Node(uri_string=value)
45     elif isinstance(value, RDF.Node):
46         node = value
47
48     return node
49
50
51 def toTypedNode(value):
52     if type(value) == types.BooleanType:
53         value_type = xsdNS['boolean'].uri
54         if value:
55             value = u'1'
56         else:
57             value = u'0'
58     elif type(value) in (types.IntType, types.LongType):
59         value_type = xsdNS['decimal'].uri
60         value = unicode(value)
61     elif type(value) == types.FloatType:
62         value_type = xsdNS['float'].uri
63         value = unicode(value)
64     else:
65         value_type = None
66         value = unicode(value)
67
68     if value_type is not None:
69         node = RDF.Node(literal=value, datatype=value_type)
70     else:
71         node = RDF.Node(literal=unicode(value).encode('utf-8'))
72     return node
73
74 def fromTypedNode(node):
75     if node is None:
76         return None
77
78     value_type = str(node.literal_value['datatype'])
79     # chop off xml schema declaration
80     value_type = value_type.replace(str(xsdNS[''].uri),'')
81     literal = node.literal_value['string']
82     literal_lower = literal.lower()
83
84     if value_type == 'boolean':
85         if literal_lower in ('1', 'yes', 'true'):
86             return True
87         elif literal_lower in ('0', 'no', 'false'):
88             return False
89         else:
90             raise ValueError("Unrecognized boolean %s" % (literal,))
91     elif value_type == 'decimal' and literal.find('.') == -1:
92         return int(literal)
93     elif value_type in ('decimal', 'float', 'double'):
94         return float(literal)
95     elif value_type in ('string'):
96         return literal
97     elif value_type in ('dateTime'):
98         raise NotImplemented('need to parse isoformat date-time')
99
100     return literal
101
102
103 def get_model(model_name=None, directory=None):
104     if directory is None:
105         directory = os.getcwd()
106
107     if model_name is None:
108         storage = RDF.MemoryStorage()
109         logger.info("Using RDF Memory model")
110     else:
111         options = "hash-type='bdb',dir='{0}'".format(directory)
112         storage = RDF.HashStorage(model_name,
113                       options=options)
114         logger.info("Using {0} with options {1}".format(model_name, options))
115     model = RDF.Model(storage)
116     return model
117
118
119 def load_into_model(model, parser_name, filename, ns=None):
120     if not os.path.exists(filename):
121         raise IOError("Can't find {0}".format(filename))
122
123     data = open(filename, 'r').read()
124     load_string_into_model(model, parser_name, data, ns)
125
126
127 def load_string_into_model(model, parser_name, data, ns=None):
128     if ns is None:
129         ns = "http://localhost/"
130
131     rdf_parser = RDF.Parser(name=parser_name)
132     rdf_parser.parse_string_into_model(model, data, ns)
133
134
135 def get_serializer(name='turtle'):
136     """Return a serializer with our standard prefixes loaded
137     """
138     writer = RDF.Serializer(name=name)
139     # really standard stuff
140     writer.set_namespace('owl', owlNS._prefix)
141     writer.set_namespace('rdf', rdfNS._prefix)
142     writer.set_namespace('rdfs', rdfsNS._prefix)
143     writer.set_namespace('xsd', xsdNS._prefix)
144
145     # should these be here, kind of specific to an application
146     writer.set_namespace('libraryOntology', libraryOntology._prefix)
147     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
148     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
149     return writer
150
151 def dump_model(model):
152     serializer = get_serializer()
153     print serializer.serialize_model_to_string(model)