2643aef22029d48d00df0df58959a8dbf3fc42db
[htsworkflow.git] / htsworkflow / util / test / test_rdfhelp.py
1 import os
2 import unittest
3 import types
4
5
6 from datetime import datetime
7
8 from htsworkflow.util.rdfhelp import \
9      blankOrUri, \
10      dump_model, \
11      fromTypedNode, \
12      get_model, \
13      load_string_into_model, \
14      rdfsNS, \
15      toTypedNode, \
16      stripNamespace, \
17      simplify_uri, \
18      sanitize_literal, \
19      xsdNS
20
21 try:
22     import RDF
23
24     class TestRDFHelp(unittest.TestCase):
25         def test_from_none(self):
26           self.failUnlessEqual(fromTypedNode(None), None)
27
28         def test_typed_node_boolean(self):
29             node = toTypedNode(True)
30             self.failUnlessEqual(node.literal_value['string'], u'1')
31             self.failUnlessEqual(str(node.literal_value['datatype']),
32                                  'http://www.w3.org/2001/XMLSchema#boolean')
33
34         def test_bad_boolean(self):
35             node = RDF.Node(literal='bad', datatype=xsdNS['boolean'].uri)
36             self.failUnlessRaises(ValueError, fromTypedNode, node)
37
38         def test_typed_node_string(self):
39             node = toTypedNode('hello')
40             self.failUnlessEqual(node.literal_value['string'], u'hello')
41             self.failUnless(node.literal_value['datatype'] is None)
42
43         def test_typed_real_like(self):
44             num = 3.14
45             node = toTypedNode(num)
46             self.failUnlessEqual(fromTypedNode(node), num)
47
48         def test_typed_integer(self):
49             num = 3
50             node = toTypedNode(num)
51             self.failUnlessEqual(fromTypedNode(node), num)
52             self.failUnlessEqual(type(fromTypedNode(node)), type(num))
53
54         def test_typed_node_string(self):
55             s = "Argh matey"
56             node = toTypedNode(s)
57             self.failUnlessEqual(fromTypedNode(node), s)
58             self.failUnlessEqual(type(fromTypedNode(node)), types.UnicodeType)
59
60         def test_blank_or_uri_blank(self):
61             node = blankOrUri()
62             self.failUnlessEqual(node.is_blank(), True)
63
64         def test_blank_or_uri_url(self):
65             s = 'http://google.com'
66             node = blankOrUri(s)
67             self.failUnlessEqual(node.is_resource(), True)
68             self.failUnlessEqual(str(node.uri), s)
69
70         def test_blank_or_uri_node(self):
71             s = RDF.Node(RDF.Uri('http://google.com'))
72             node = blankOrUri(s)
73             self.failUnlessEqual(node.is_resource(), True)
74             self.failUnlessEqual(node, s)
75
76         def test_unicode_node_roundtrip(self):
77             literal = u'\u5927'
78             roundtrip = fromTypedNode(toTypedNode(literal))
79             self.failUnlessEqual(roundtrip, literal)
80             self.failUnlessEqual(type(roundtrip), types.UnicodeType)
81
82         def test_datetime_no_microsecond(self):
83             dateTimeType = xsdNS['dateTime'].uri
84             short_isostamp = '2011-12-20T11:44:25'
85             short_node = RDF.Node(literal=short_isostamp,
86                                  datatype=dateTimeType)
87             short_datetime = datetime(2011,12,20,11,44,25)
88
89             self.assertEqual(fromTypedNode(short_node), short_datetime)
90             self.assertEqual(toTypedNode(short_datetime), short_node)
91             self.assertEqual(fromTypedNode(toTypedNode(short_datetime)),
92                              short_datetime)
93
94         def test_datetime_with_microsecond(self):
95             dateTimeType = xsdNS['dateTime'].uri
96             long_isostamp = '2011-12-20T11:44:25.081776'
97             long_node = RDF.Node(literal=long_isostamp,
98                                  datatype=dateTimeType)
99             long_datetime = datetime(2011,12,20,11,44,25,81776)
100
101             self.assertEqual(fromTypedNode(long_node), long_datetime)
102             self.assertEqual(toTypedNode(long_datetime), long_node)
103             self.assertEqual(fromTypedNode(toTypedNode(long_datetime)),
104                              long_datetime)
105
106         def test_strip_namespace_uri(self):
107             nsOrg = RDF.NS('example.org/example#')
108             nsCom = RDF.NS('example.com/example#')
109
110             term = 'foo'
111             node = nsOrg[term]
112             self.failUnlessEqual(stripNamespace(nsOrg, node), term)
113             self.failUnlessEqual(stripNamespace(nsCom, node), None)
114             self.failUnlessEqual(stripNamespace(nsOrg, node.uri), term)
115
116         def test_strip_namespace_exceptions(self):
117             nsOrg = RDF.NS('example.org/example#')
118             nsCom = RDF.NS('example.com/example#')
119
120             node = toTypedNode('bad')
121             self.failUnlessRaises(ValueError, stripNamespace, nsOrg, node)
122             self.failUnlessRaises(ValueError, stripNamespace, nsOrg, nsOrg)
123
124         def test_simplify_uri(self):
125             DATA = [('http://asdf.org/foo/bar', 'bar'),
126                     ('http://asdf.org/foo/bar#bleem', 'bleem'),
127                     ('http://asdf.org/foo/bar/', 'bar'),
128                     ('http://asdf.org/foo/bar?was=foo', 'was=foo')]
129
130             for uri, expected in DATA:
131                 self.assertEqual(simplify_uri(uri), expected)
132
133             for uri, expected in DATA:
134                 n = RDF.Uri(uri)
135                 self.assertEqual(simplify_uri(n), expected)
136
137             for uri, expected in DATA:
138                 n = RDF.Node(RDF.Uri(uri))
139                 self.assertEqual(simplify_uri(n), expected)
140
141             # decoding literals is questionable
142             n = toTypedNode('http://foo/bar')
143             self.assertRaises(ValueError, simplify_uri, n)
144
145         def test_owl_import(self):
146             path, name = os.path.split(__file__)
147             loc = 'file://'+os.path.abspath(path)+'/'
148             model = get_model()
149             fragment = '''
150 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
151 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
152 @prefix owl: <http://www.w3.org/2002/07/owl#> .
153
154 _:a owl:imports "{loc}extra.turtle" .
155 '''.format(loc=loc)
156             load_string_into_model(model, 'turtle', fragment, loc)
157             tc = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/wiki/TestCase'))
158             query = RDF.Statement(tc, rdfsNS['label'], None)
159             result = list(model.find_statements(query))
160             self.failUnlessEqual(len(result), 1)
161             self.failUnlessEqual(str(result[0].object), 'TestCase')
162
163         def test_sanitize_literal_text(self):
164             self.failUnlessRaises(ValueError, sanitize_literal, "hi")
165             hello_text = "hello"
166             hello_none = RDF.Node(hello_text)
167             self.failUnlessEqual(str(sanitize_literal(hello_none)),
168                                  hello_text)
169             hello_str = RDF.Node(literal=hello_text,
170                                  datatype=xsdNS['string'].uri)
171             hello_clean = sanitize_literal(hello_str)
172             self.failUnlessEqual(hello_clean.literal_value['string'],
173                                  hello_text)
174
175         def test_sanitize_literal_empty_string(self):
176             value = ""
177             value_node = RDF.Node(value)
178             self.assertEqual(str(sanitize_literal(value_node)), value)
179
180         def test_sanitize_literal_html(self):
181             hello = "hello <a onload='javascript:alert(\"foo\");' href='http://google.com'>google.com</a>, whats up?"
182             hello_clean = 'hello <a href="http://google.com">google.com</a>, whats up?'
183             hello_node = RDF.Node(literal=hello,
184                                   datatype=xsdNS['string'].uri)
185             hello_sanitized = sanitize_literal(hello_node)
186             self.failUnlessEqual(hello_sanitized.literal_value['string'],
187                                  hello_clean)
188
189             hostile = "hi <b>there</b><script type='text/javascript>alert('boo');</script><a href='javascript:alert('poke')>evil</a> scammer"
190             hostile_node = RDF.Node(hostile)
191             hostile_sanitized = sanitize_literal(hostile_node)
192             # so it drops the stuff after the javascript link.
193             # I suppose it could be worse
194             hostile_result = """hi <b>there</b>"""
195             self.failUnlessEqual(str(hostile_sanitized), hostile_result)
196
197
198     def suite():
199         return unittest.makeSuite(TestRDFHelp, 'test')
200 except ImportError, e:
201     print "Unable to test rdfhelp"
202
203     def suite():
204         return None
205
206 if __name__ == "__main__":
207     unittest.main(defaultTest='suite')