Further clean up ddf generation.
[htsworkflow.git] / htsworkflow / util / rdfhelp.py
1 """Helper features for working with librdf
2 """
3 import os
4 import types
5
6 import RDF
7
8 # standard ontology namespaces
9 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
10 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
11 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
12 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
13 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
14
15 # internal ontologies
16 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
17 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
18 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
19 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
20
21 def sparql_query(model, query_filename):
22     """Execute sparql query from file
23     """
24     query_body = open(query_filename,'r').read()
25     query = RDF.SPARQLQuery(query_body)
26     results = query.execute(model)
27     for row in results:
28         output = []
29         for k,v in row.items()[::-1]:
30             print "{0}: {1}".format(k,v)
31         print 
32
33
34 def blankOrUri(value=None):
35     node = None
36     if value is None:
37         node = RDF.Node()
38     elif type(value) in types.StringTypes:
39         node = RDF.Node(uri_string=value)
40     elif isinstance(value, RDF.Node):
41         node = value
42
43     return node
44
45
46 def toTypedNode(value):
47     if type(value) == types.BooleanType:
48         value_type = xsdNS['boolean'].uri
49         if value:
50             value = u'1'
51         else:
52             value = u'0'
53     elif type(value) in (types.IntType, types.LongType):
54         value_type = xsdNS['decimal'].uri
55         value = unicode(value)
56     elif type(value) == types.FloatType:
57         value_type = xsdNS['float'].uri
58         value = unicode(value)
59     elif type(value) in types.StringTypes:
60         value_type = xsdNS['string'].uri
61     else:
62         value_type = None
63         value = unicode(value)
64
65     return RDF.Node(literal=value, datatype=value_type)
66
67 def fromTypedNode(node):
68     if node is None:
69         return None
70
71     value_type = str(node.literal_value['datatype'])
72     # chop off xml schema declaration
73     value_type = value_type.replace(str(xsdNS[''].uri),'')
74     literal = node.literal_value['string']
75     literal_lower = literal.lower()
76
77     if value_type == 'boolean':
78         if literal_lower in ('1', 'yes', 'true'):
79             return True
80         elif literal_lower in ('0', 'no', 'false'):
81             return False
82         else:
83             raise ValueError("Unrecognized boolean %s" % (literal,))
84     elif value_type == 'decimal' and literal.find('.') == -1:
85         return int(literal)
86     elif value_type in ('decimal', 'float', 'double'):
87         return float(literal)
88     elif value_type in ('string'):
89         return literal
90     elif value_type in ('dateTime'):
91         raise NotImplemented('need to parse isoformat date-time')
92
93     return literal
94
95
96 def get_model(model_name=None, directory=None):
97     if directory is None:
98         directory = os.getcwd()
99         
100     if model_name is None:
101         storage = RDF.MemoryStorage()
102     else:
103         storage = RDF.HashStorage(model_name,
104                       options="hash-type='bdb',dir='{0}'".format(directory))
105     model = RDF.Model(storage)
106     return model
107         
108
109 def load_into_model(model, parser_name, filename, ns=None):
110     if not os.path.exists(filename):
111         raise IOError("Can't find {0}".format(filename))
112     
113     data = open(filename, 'r').read()
114     rdf_parser = RDF.Parser(name=parser_name)
115     rdf_parser.parse_string_into_model(model, data, ns)
116
117
118 def get_serializer(name='turtle'):
119     """Return a serializer with our standard prefixes loaded
120     """
121     writer = RDF.Serializer(name=name)
122     # really standard stuff
123     writer.set_namespace('owl', owlNS._prefix)
124     writer.set_namespace('rdf', rdfNS._prefix)
125     writer.set_namespace('rdfs', rdfsNS._prefix)
126     writer.set_namespace('xsd', xsdNS._prefix)
127
128     # should these be here, kind of specific to an application
129     writer.set_namespace('libraryOntology', libraryOntology._prefix)
130     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
131     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
132     return writer
133