Rename stripNamespace strip_namespace
[htsworkflow.git] / htsworkflow / util / test / test_rdfhelp.py
1 import os
2 import types
3 from unittest2 import TestCase
4
5 from datetime import datetime
6
7 from htsworkflow.util.rdfhelp import \
8      add_default_schemas, \
9      blankOrUri, \
10      dcNS, \
11      dump_model, \
12      fromTypedNode, \
13      get_model, \
14      guess_parser, \
15      guess_parser_by_extension, \
16      load_string_into_model, \
17      owlNS, \
18      rdfNS, \
19      rdfsNS, \
20      remove_schemas, \
21      toTypedNode, \
22      strip_namespace, \
23      simplify_uri, \
24      sanitize_literal, \
25      xsdNS
26
27 try:
28     import RDF
29
30     class TestRDFHelp(TestCase):
31         def test_from_none(self):
32           self.assertEqual(fromTypedNode(None), None)
33
34         def test_typed_node_boolean(self):
35             node = toTypedNode(True)
36             self.assertIn(node.literal_value['string'], (u'1', u'true'))
37             self.assertEqual(str(node.literal_value['datatype']),
38                                  'http://www.w3.org/2001/XMLSchema#boolean')
39
40         def test_bad_boolean(self):
41             node = RDF.Node(literal='bad', datatype=xsdNS['boolean'].uri)
42             # older versions of librdf ~< 1.0.16 left the literal
43             # alone. and thus should fail the fromTypedNode call
44             # newer versions coerced the odd value to false.
45             try:
46                 self.assertFalse(fromTypedNode(node))
47             except ValueError as e:
48                 pass
49
50         def test_typed_node_string(self):
51             node = toTypedNode('hello')
52             self.assertEqual(node.literal_value['string'], u'hello')
53             self.assertTrue(node.literal_value['datatype'] is None)
54
55         def test_typed_real_like(self):
56             num = 3.14
57             node = toTypedNode(num)
58             self.assertEqual(fromTypedNode(node), num)
59
60         def test_typed_integer(self):
61             num = 3
62             node = toTypedNode(num)
63             self.assertEqual(fromTypedNode(node), num)
64             self.assertEqual(type(fromTypedNode(node)), type(num))
65
66         def test_typed_node_string(self):
67             s = "Argh matey"
68             node = toTypedNode(s)
69             self.assertEqual(fromTypedNode(node), s)
70             self.assertEqual(type(fromTypedNode(node)), types.UnicodeType)
71
72         def test_blank_or_uri_blank(self):
73             node = blankOrUri()
74             self.assertEqual(node.is_blank(), True)
75
76         def test_blank_or_uri_url(self):
77             s = 'http://google.com'
78             node = blankOrUri(s)
79             self.assertEqual(node.is_resource(), True)
80             self.assertEqual(str(node.uri), s)
81
82         def test_blank_or_uri_node(self):
83             s = RDF.Node(RDF.Uri('http://google.com'))
84             node = blankOrUri(s)
85             self.assertEqual(node.is_resource(), True)
86             self.assertEqual(node, s)
87
88         def test_unicode_node_roundtrip(self):
89             literal = u'\u5927'
90             roundtrip = fromTypedNode(toTypedNode(literal))
91             self.assertEqual(roundtrip, literal)
92             self.assertEqual(type(roundtrip), types.UnicodeType)
93
94         def test_datetime_no_microsecond(self):
95             dateTimeType = xsdNS['dateTime'].uri
96             short_isostamp = '2011-12-20T11:44:25'
97             short_node = RDF.Node(literal=short_isostamp,
98                                  datatype=dateTimeType)
99             short_datetime = datetime(2011,12,20,11,44,25)
100
101             self.assertEqual(fromTypedNode(short_node), short_datetime)
102             self.assertEqual(toTypedNode(short_datetime), short_node)
103             self.assertEqual(fromTypedNode(toTypedNode(short_datetime)),
104                              short_datetime)
105
106         def test_datetime_with_microsecond(self):
107             dateTimeType = xsdNS['dateTime'].uri
108             long_isostamp = '2011-12-20T11:44:25.081776'
109             long_node = RDF.Node(literal=long_isostamp,
110                                  datatype=dateTimeType)
111             long_datetime = datetime(2011,12,20,11,44,25,81776)
112
113             self.assertEqual(fromTypedNode(long_node), long_datetime)
114             self.assertEqual(toTypedNode(long_datetime), long_node)
115             self.assertEqual(fromTypedNode(toTypedNode(long_datetime)),
116                              long_datetime)
117
118         def test_strip_namespace_uri(self):
119             nsOrg = RDF.NS('example.org/example#')
120             nsCom = RDF.NS('example.com/example#')
121
122             term = 'foo'
123             node = nsOrg[term]
124             self.assertEqual(strip_namespace(nsOrg, node), term)
125             self.assertEqual(strip_namespace(nsCom, node), None)
126             self.assertEqual(strip_namespace(nsOrg, node.uri), term)
127
128         def test_strip_namespace_exceptions(self):
129             nsOrg = RDF.NS('example.org/example#')
130             nsCom = RDF.NS('example.com/example#')
131
132             node = toTypedNode('bad')
133             self.assertRaises(ValueError, strip_namespace, nsOrg, node)
134             self.assertRaises(ValueError, strip_namespace, nsOrg, nsOrg)
135
136         def test_simplify_uri(self):
137             DATA = [('http://asdf.org/foo/bar', 'bar'),
138                     ('http://asdf.org/foo/bar#bleem', 'bleem'),
139                     ('http://asdf.org/foo/bar/', 'bar'),
140                     ('http://asdf.org/foo/bar?was=foo', 'was=foo')]
141
142             for uri, expected in DATA:
143                 self.assertEqual(simplify_uri(uri), expected)
144
145             for uri, expected in DATA:
146                 n = RDF.Uri(uri)
147                 self.assertEqual(simplify_uri(n), expected)
148
149             for uri, expected in DATA:
150                 n = RDF.Node(RDF.Uri(uri))
151                 self.assertEqual(simplify_uri(n), expected)
152
153             # decoding literals is questionable
154             n = toTypedNode('http://foo/bar')
155             self.assertRaises(ValueError, simplify_uri, n)
156
157         def test_owl_import(self):
158             path, name = os.path.split(__file__)
159             #loc = 'file://'+os.path.abspath(path)+'/'
160             loc = os.path.abspath(path)+'/'
161             model = get_model()
162             fragment = '''
163 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
164 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
165 @prefix owl: <http://www.w3.org/2002/07/owl#> .
166
167 _:a owl:imports "{loc}extra.turtle" .
168 '''.format(loc=loc)
169             load_string_into_model(model, 'turtle', fragment, loc)
170             tc = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/wiki/TestCase'))
171             query = RDF.Statement(tc, rdfsNS['label'], None)
172             result = list(model.find_statements(query))
173             self.assertEqual(len(result), 1)
174             self.assertEqual(str(result[0].object), 'TestCase')
175
176         def test_sanitize_literal_text(self):
177             self.assertRaises(ValueError, sanitize_literal, "hi")
178             hello_text = "hello"
179             hello_none = RDF.Node(hello_text)
180             self.assertEqual(str(sanitize_literal(hello_none)),
181                                  hello_text)
182             hello_str = RDF.Node(literal=hello_text,
183                                  datatype=xsdNS['string'].uri)
184             hello_clean = sanitize_literal(hello_str)
185             self.assertEqual(hello_clean.literal_value['string'],
186                                  hello_text)
187
188         def test_sanitize_literal_empty_string(self):
189             value = ""
190             value_node = RDF.Node(value)
191             self.assertEqual(str(sanitize_literal(value_node)), value)
192
193         def test_sanitize_literal_html(self):
194             hello = "hello <a onload='javascript:alert(\"foo\");' href='http://google.com'>google.com</a>, whats up?"
195             hello_clean = 'hello <a href="http://google.com">google.com</a>, whats up?'
196             hello_node = RDF.Node(literal=hello,
197                                   datatype=xsdNS['string'].uri)
198             hello_sanitized = sanitize_literal(hello_node)
199             self.assertEqual(hello_sanitized.literal_value['string'],
200                                  hello_clean)
201
202             hostile = "hi <b>there</b><script type='text/javascript>alert('boo');</script><a href='javascript:alert('poke')>evil</a> scammer"
203             hostile_node = RDF.Node(hostile)
204             hostile_sanitized = sanitize_literal(hostile_node)
205             # so it drops the stuff after the javascript link.
206             # I suppose it could be worse
207             hostile_result = """hi <b>there</b>"""
208             self.assertEqual(str(hostile_sanitized), hostile_result)
209
210         def test_guess_parser_from_file(self):
211             DATA = [
212                 ('/a/b/c.rdf', 'rdfxml'),
213                 ('/a/b/c.xml', 'rdfxml'),
214                 ('/a/b/c.html', 'rdfa'),
215                 ('/a/b/c.turtle', 'turtle'),
216                 ('http://foo.bar/bleem.turtle', 'turtle')]
217             for path, parser in DATA:
218                 self.assertEqual(guess_parser_by_extension(path), parser)
219                 self.assertEqual(guess_parser(None, path), parser)
220
221             DATA = [
222                 ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
223                 ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
224                 ('text/html', 'http://a.org/b/c', 'rdfa'),
225                 ('text/html', 'http://a.org/b/c.html', 'rdfa'),
226                 ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
227                 ('text/plain', 'http://a.org/b/c', 'guess')
228             ]
229             for contenttype, url, parser in DATA:
230                 self.assertEqual(guess_parser(contenttype, url), parser)
231
232     class TestRDFSchemas(TestCase):
233         def test_rdf_schema(self):
234             """Does it basically work?
235             """
236             model = get_model()
237             self.assertEqual(model.size(), 0)
238             add_default_schemas(model)
239             self.assertTrue(model.size() > 0)
240             remove_schemas(model)
241             self.assertEqual(model.size(), 0)
242
243         def test_included_schemas(self):
244             model = get_model()
245             add_default_schemas(model)
246
247             # rdf test
248             s = RDF.Statement(rdfNS[''], dcNS['title'], None)
249             title = model.get_target(rdfNS[''], dcNS['title'])
250             self.assertTrue(title is not None)
251
252             s = RDF.Statement(rdfNS['Property'], rdfNS['type'], rdfsNS['Class'])
253             self.assertTrue(model.contains_statement(s))
254
255             # rdfs test
256             s = RDF.Statement(rdfsNS['Class'], rdfNS['type'], rdfsNS['Class'])
257             self.assertTrue(model.contains_statement(s))
258
259             s = RDF.Statement(owlNS['inverseOf'], rdfNS['type'],
260                               rdfNS['Property'])
261             self.assertTrue(model.contains_statement(s))
262
263
264 except ImportError, e:
265     print "Unable to test rdfhelp"
266
267 def suite():
268     from unittest2 import TestSuite, defaultTestLoader
269     suite = TestSuite()
270     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFHelp))
271     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFSchemas))
272     return suite
273
274
275 if __name__ == "__main__":
276     from unittest2 import main
277     main(defaultTest="suite")