Don't use xsd:string for string data types in toTypedNode
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 import logging
4 import os
5 import types
6
7 import RDF
8
9 logger = logging.getLogger(__name__)
10
11 # standard ontology namespaces
12 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
13 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
14 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
15 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
16 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
17
18 # internal ontologies
19 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
20 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
21 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
22 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
23
24 def sparql_query(model, query_filename):
25     """Execute sparql query from file
26     """
27     logger.info("Opening: %s" % (query_filename,))
28     query_body = open(query_filename,'r').read()
29     query = RDF.SPARQLQuery(query_body)
30     results = query.execute(model)
31     for row in results:
32         output = []
33         for k,v in row.items()[::-1]:
34             print "{0}: {1}".format(k,v)
35         print 
36
37
38 def blankOrUri(value=None):
39     node = None
40     if value is None:
41         node = RDF.Node()
42     elif type(value) in types.StringTypes:
43         node = RDF.Node(uri_string=value)
44     elif isinstance(value, RDF.Node):
45         node = value
46
47     return node
48
49
50 def toTypedNode(value):
51     if type(value) == types.BooleanType:
52         value_type = xsdNS['boolean'].uri
53         if value:
54             value = u'1'
55         else:
56             value = u'0'
57     elif type(value) in (types.IntType, types.LongType):
58         value_type = xsdNS['decimal'].uri
59         value = unicode(value)
60     elif type(value) == types.FloatType:
61         value_type = xsdNS['float'].uri
62         value = unicode(value)
63     else:
64         value_type = None
65         value = unicode(value)
66
67     if value_type is not None:
68         node = RDF.Node(literal=value, datatype=value_type)
69     else:
70         node = RDF.Node(literal=value)
71     return node
72
73 def fromTypedNode(node):
74     if node is None:
75         return None
76
77     value_type = str(node.literal_value['datatype'])
78     # chop off xml schema declaration
79     value_type = value_type.replace(str(xsdNS[''].uri),'')
80     literal = node.literal_value['string']
81     literal_lower = literal.lower()
82
83     if value_type == 'boolean':
84         if literal_lower in ('1', 'yes', 'true'):
85             return True
86         elif literal_lower in ('0', 'no', 'false'):
87             return False
88         else:
89             raise ValueError("Unrecognized boolean %s" % (literal,))
90     elif value_type == 'decimal' and literal.find('.') == -1:
91         return int(literal)
92     elif value_type in ('decimal', 'float', 'double'):
93         return float(literal)
94     elif value_type in ('string'):
95         return literal
96     elif value_type in ('dateTime'):
97         raise NotImplemented('need to parse isoformat date-time')
98
99     return literal
100
101     
102 def get_model(model_name=None, directory=None):
103     if directory is None:
104         directory = os.getcwd()
105         
106     if model_name is None:
107         storage = RDF.MemoryStorage()
108         logger.info("Using RDF Memory model")
109     else:
110         options = "hash-type='bdb',dir='{0}'".format(directory)
111         storage = RDF.HashStorage(model_name,
112                       options=options)
113         logger.info("Using {0} with options {1}".format(model_name, options))
114     model = RDF.Model(storage)
115     return model
116         
117
118 def load_into_model(model, parser_name, filename, ns=None):
119     if not os.path.exists(filename):
120         raise IOError("Can't find {0}".format(filename))
121     
122     data = open(filename, 'r').read()
123     rdf_parser = RDF.Parser(name=parser_name)
124     rdf_parser.parse_string_into_model(model, data, ns)
125
126
127 def get_serializer(name='turtle'):
128     """Return a serializer with our standard prefixes loaded
129     """
130     writer = RDF.Serializer(name=name)
131     # really standard stuff
132     writer.set_namespace('owl', owlNS._prefix)
133     writer.set_namespace('rdf', rdfNS._prefix)
134     writer.set_namespace('rdfs', rdfsNS._prefix)
135     writer.set_namespace('xsd', xsdNS._prefix)
136
137     # should these be here, kind of specific to an application
138     writer.set_namespace('libraryOntology', libraryOntology._prefix)
139     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
140     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
141     return writer
142