Minor tweaks to deal with the older version of librdf on ubuntu 10.04
[htsworkflow.git] / htsworkflow / util / test / test_rdfhelp.py
1 import os
2 import unittest
3 import types
4
5
6 from datetime import datetime
7
8 from htsworkflow.util.rdfhelp import \
9      add_default_schemas, \
10      blankOrUri, \
11      dcNS, \
12      dump_model, \
13      fromTypedNode, \
14      get_model, \
15      guess_parser, \
16      guess_parser_by_extension, \
17      load_string_into_model, \
18      owlNS, \
19      rdfNS, \
20      rdfsNS, \
21      remove_schemas, \
22      toTypedNode, \
23      stripNamespace, \
24      simplify_uri, \
25      sanitize_literal, \
26      xsdNS
27
28 try:
29     import RDF
30
31     class TestRDFHelp(unittest.TestCase):
32         def test_from_none(self):
33           self.failUnlessEqual(fromTypedNode(None), None)
34
35         def test_typed_node_boolean(self):
36             node = toTypedNode(True)
37             self.failUnlessEqual(node.literal_value['string'], u'1')
38             self.failUnlessEqual(str(node.literal_value['datatype']),
39                                  'http://www.w3.org/2001/XMLSchema#boolean')
40
41         def test_bad_boolean(self):
42             node = RDF.Node(literal='bad', datatype=xsdNS['boolean'].uri)
43             self.failUnlessRaises(ValueError, fromTypedNode, node)
44
45         def test_typed_node_string(self):
46             node = toTypedNode('hello')
47             self.failUnlessEqual(node.literal_value['string'], u'hello')
48             self.failUnless(node.literal_value['datatype'] is None)
49
50         def test_typed_real_like(self):
51             num = 3.14
52             node = toTypedNode(num)
53             self.failUnlessEqual(fromTypedNode(node), num)
54
55         def test_typed_integer(self):
56             num = 3
57             node = toTypedNode(num)
58             self.failUnlessEqual(fromTypedNode(node), num)
59             self.failUnlessEqual(type(fromTypedNode(node)), type(num))
60
61         def test_typed_node_string(self):
62             s = "Argh matey"
63             node = toTypedNode(s)
64             self.failUnlessEqual(fromTypedNode(node), s)
65             self.failUnlessEqual(type(fromTypedNode(node)), types.UnicodeType)
66
67         def test_blank_or_uri_blank(self):
68             node = blankOrUri()
69             self.failUnlessEqual(node.is_blank(), True)
70
71         def test_blank_or_uri_url(self):
72             s = 'http://google.com'
73             node = blankOrUri(s)
74             self.failUnlessEqual(node.is_resource(), True)
75             self.failUnlessEqual(str(node.uri), s)
76
77         def test_blank_or_uri_node(self):
78             s = RDF.Node(RDF.Uri('http://google.com'))
79             node = blankOrUri(s)
80             self.failUnlessEqual(node.is_resource(), True)
81             self.failUnlessEqual(node, s)
82
83         def test_unicode_node_roundtrip(self):
84             literal = u'\u5927'
85             roundtrip = fromTypedNode(toTypedNode(literal))
86             self.failUnlessEqual(roundtrip, literal)
87             self.failUnlessEqual(type(roundtrip), types.UnicodeType)
88
89         def test_datetime_no_microsecond(self):
90             dateTimeType = xsdNS['dateTime'].uri
91             short_isostamp = '2011-12-20T11:44:25'
92             short_node = RDF.Node(literal=short_isostamp,
93                                  datatype=dateTimeType)
94             short_datetime = datetime(2011,12,20,11,44,25)
95
96             self.assertEqual(fromTypedNode(short_node), short_datetime)
97             self.assertEqual(toTypedNode(short_datetime), short_node)
98             self.assertEqual(fromTypedNode(toTypedNode(short_datetime)),
99                              short_datetime)
100
101         def test_datetime_with_microsecond(self):
102             dateTimeType = xsdNS['dateTime'].uri
103             long_isostamp = '2011-12-20T11:44:25.081776'
104             long_node = RDF.Node(literal=long_isostamp,
105                                  datatype=dateTimeType)
106             long_datetime = datetime(2011,12,20,11,44,25,81776)
107
108             self.assertEqual(fromTypedNode(long_node), long_datetime)
109             self.assertEqual(toTypedNode(long_datetime), long_node)
110             self.assertEqual(fromTypedNode(toTypedNode(long_datetime)),
111                              long_datetime)
112
113         def test_strip_namespace_uri(self):
114             nsOrg = RDF.NS('example.org/example#')
115             nsCom = RDF.NS('example.com/example#')
116
117             term = 'foo'
118             node = nsOrg[term]
119             self.failUnlessEqual(stripNamespace(nsOrg, node), term)
120             self.failUnlessEqual(stripNamespace(nsCom, node), None)
121             self.failUnlessEqual(stripNamespace(nsOrg, node.uri), term)
122
123         def test_strip_namespace_exceptions(self):
124             nsOrg = RDF.NS('example.org/example#')
125             nsCom = RDF.NS('example.com/example#')
126
127             node = toTypedNode('bad')
128             self.failUnlessRaises(ValueError, stripNamespace, nsOrg, node)
129             self.failUnlessRaises(ValueError, stripNamespace, nsOrg, nsOrg)
130
131         def test_simplify_uri(self):
132             DATA = [('http://asdf.org/foo/bar', 'bar'),
133                     ('http://asdf.org/foo/bar#bleem', 'bleem'),
134                     ('http://asdf.org/foo/bar/', 'bar'),
135                     ('http://asdf.org/foo/bar?was=foo', 'was=foo')]
136
137             for uri, expected in DATA:
138                 self.assertEqual(simplify_uri(uri), expected)
139
140             for uri, expected in DATA:
141                 n = RDF.Uri(uri)
142                 self.assertEqual(simplify_uri(n), expected)
143
144             for uri, expected in DATA:
145                 n = RDF.Node(RDF.Uri(uri))
146                 self.assertEqual(simplify_uri(n), expected)
147
148             # decoding literals is questionable
149             n = toTypedNode('http://foo/bar')
150             self.assertRaises(ValueError, simplify_uri, n)
151
152         def test_owl_import(self):
153             path, name = os.path.split(__file__)
154             #loc = 'file://'+os.path.abspath(path)+'/'
155             loc = os.path.abspath(path)+'/'
156             model = get_model()
157             fragment = '''
158 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
159 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
160 @prefix owl: <http://www.w3.org/2002/07/owl#> .
161
162 _:a owl:imports "{loc}extra.turtle" .
163 '''.format(loc=loc)
164             load_string_into_model(model, 'turtle', fragment, loc)
165             tc = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/wiki/TestCase'))
166             query = RDF.Statement(tc, rdfsNS['label'], None)
167             result = list(model.find_statements(query))
168             self.failUnlessEqual(len(result), 1)
169             self.failUnlessEqual(str(result[0].object), 'TestCase')
170
171         def test_sanitize_literal_text(self):
172             self.failUnlessRaises(ValueError, sanitize_literal, "hi")
173             hello_text = "hello"
174             hello_none = RDF.Node(hello_text)
175             self.failUnlessEqual(str(sanitize_literal(hello_none)),
176                                  hello_text)
177             hello_str = RDF.Node(literal=hello_text,
178                                  datatype=xsdNS['string'].uri)
179             hello_clean = sanitize_literal(hello_str)
180             self.failUnlessEqual(hello_clean.literal_value['string'],
181                                  hello_text)
182
183         def test_sanitize_literal_empty_string(self):
184             value = ""
185             value_node = RDF.Node(value)
186             self.assertEqual(str(sanitize_literal(value_node)), value)
187
188         def test_sanitize_literal_html(self):
189             hello = "hello <a onload='javascript:alert(\"foo\");' href='http://google.com'>google.com</a>, whats up?"
190             hello_clean = 'hello <a href="http://google.com">google.com</a>, whats up?'
191             hello_node = RDF.Node(literal=hello,
192                                   datatype=xsdNS['string'].uri)
193             hello_sanitized = sanitize_literal(hello_node)
194             self.failUnlessEqual(hello_sanitized.literal_value['string'],
195                                  hello_clean)
196
197             hostile = "hi <b>there</b><script type='text/javascript>alert('boo');</script><a href='javascript:alert('poke')>evil</a> scammer"
198             hostile_node = RDF.Node(hostile)
199             hostile_sanitized = sanitize_literal(hostile_node)
200             # so it drops the stuff after the javascript link.
201             # I suppose it could be worse
202             hostile_result = """hi <b>there</b>"""
203             self.failUnlessEqual(str(hostile_sanitized), hostile_result)
204
205         def test_guess_parser_from_file(self):
206             DATA = [
207                 ('/a/b/c.rdf', 'rdfxml'),
208                 ('/a/b/c.xml', 'rdfxml'),
209                 ('/a/b/c.html', 'rdfa'),
210                 ('/a/b/c.turtle', 'turtle'),
211                 ('http://foo.bar/bleem.turtle', 'turtle')]
212             for path, parser in DATA:
213                 self.assertEqual(guess_parser_by_extension(path), parser)
214                 self.assertEqual(guess_parser(None, path), parser)
215
216             DATA = [
217                 ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
218                 ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
219                 ('text/html', 'http://a.org/b/c', 'rdfa'),
220                 ('text/html', 'http://a.org/b/c.html', 'rdfa'),
221                 ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
222                 ('text/plain', 'http://a.org/b/c', 'guess')
223             ]
224             for contenttype, url, parser in DATA:
225                 self.assertEqual(guess_parser(contenttype, url), parser)
226
227     class TestRDFSchemas(unittest.TestCase):
228         def test_rdf_schema(self):
229             """Does it basically work?
230             """
231             model = get_model()
232             self.assertEqual(model.size(), 0)
233             add_default_schemas(model)
234             self.assertTrue(model.size() > 0)
235             remove_schemas(model)
236             self.assertEqual(model.size(), 0)
237
238         def test_included_schemas(self):
239             model = get_model()
240             add_default_schemas(model)
241
242             # rdf test
243             s = RDF.Statement(rdfNS[''], dcNS['title'], None)
244             title = model.get_target(rdfNS[''], dcNS['title'])
245             self.assertTrue(title is not None)
246
247             s = RDF.Statement(rdfNS['Property'], rdfNS['type'], rdfsNS['Class'])
248             self.assertTrue(model.contains_statement(s))
249
250             # rdfs test
251             s = RDF.Statement(rdfsNS['Class'], rdfNS['type'], rdfsNS['Class'])
252             self.assertTrue(model.contains_statement(s))
253
254             s = RDF.Statement(owlNS['inverseOf'], rdfNS['type'],
255                               rdfNS['Property'])
256             self.assertTrue(model.contains_statement(s))
257
258
259     def suite():
260         return unittest.makeSuite(TestRDFHelp, 'test')
261 except ImportError, e:
262     print "Unable to test rdfhelp"
263
264     def suite():
265         return None
266
267 if __name__ == "__main__":
268     unittest.main(defaultTest='suite')