Port rdfhelp to rdflib from redland librdf.
[htsworkflow.git] / htsworkflow / util / test / test_rdfhelp.py
1 from __future__ import print_function
2
3 import os
4 import types
5 from unittest import TestCase
6 from datetime import datetime
7 import six
8
9 from rdflib import BNode, ConjunctiveGraph, Literal, Namespace, URIRef, Graph
10
11 from htsworkflow.util.rdfhelp import \
12      add_default_schemas, \
13      DC, \
14      dump_model, \
15      guess_parser, \
16      guess_parser_by_extension, \
17      load_string_into_model, \
18      OWL, \
19      remove_schemas, \
20      RDF, \
21      RDFS, \
22      strip_namespace, \
23      simplify_uri, \
24      sanitize_literal, \
25      XSD
26
27 class TestRDFHelp(TestCase):
28     def test_typed_node_boolean(self):
29         node = Literal(True)
30         self.assertTrue(node.value)
31         self.assertEqual(str(node.datatype),
32                          'http://www.w3.org/2001/XMLSchema#boolean')
33
34     def test_typed_node_string_node_attributes(self):
35         node = Literal('hello')
36         self.assertEqual(node.value, 'hello')
37         self.assertTrue(node.datatype is None)
38
39     def test_typed_real_like(self):
40         num = 3.14
41         node = Literal(num)
42         self.assertEqual(node.toPython(), num)
43         self.assertEqual(type(node.toPython()), float)
44
45     def test_typed_integer(self):
46         num = 3
47         node = Literal(num)
48         self.assertEqual(node.toPython(), num)
49         self.assertEqual(type(node.toPython()), type(num))
50
51     def test_typed_node_string(self):
52         s = "Argh matey"
53         node = Literal(s)
54         self.assertEqual(node.toPython(), s)
55         self.assertTrue(isinstance(node.toPython(), six.text_type))
56
57     def test_unicode_node_roundtrip(self):
58         literal = u'\u5927'
59         roundtrip = Literal(literal).toPython()
60         self.assertTrue(isinstance(roundtrip, six.text_type))
61
62     def test_datetime_no_microsecond(self):
63         dateTimeType = XSD.dateTime
64         short_isostamp = '2011-12-20T11:44:25'
65         short_node = Literal(short_isostamp,
66                              datatype=dateTimeType)
67         short_datetime = datetime(2011,12,20,11,44,25)
68
69         self.assertEqual(short_node.toPython(), short_datetime)
70         self.assertEqual(Literal(short_datetime), short_node)
71         self.assertEqual(Literal(short_datetime).toPython(),
72                          short_datetime)
73
74     def test_datetime_with_microsecond(self):
75         dateTimeType = XSD.dateTime
76         long_isostamp = '2011-12-20T11:44:25.081776'
77         long_node = Literal(long_isostamp,
78                             datatype=dateTimeType)
79         long_datetime = datetime(2011,12,20,11,44,25,81776)
80
81         self.assertEqual(long_node.toPython(), long_datetime)
82         self.assertEqual(Literal(long_datetime), long_node)
83         self.assertEqual(Literal(long_datetime).toPython(),
84                          long_datetime)
85
86     def test_strip_namespace_uri(self):
87         nsOrg = Namespace('example.org/example#')
88         nsCom = Namespace('example.com/example#')
89
90         term = 'foo'
91         node = nsOrg[term]
92         self.assertEqual(strip_namespace(nsOrg, node), term)
93         self.assertEqual(strip_namespace(nsCom, node), None)
94
95     def test_strip_namespace_exceptions(self):
96         nsOrg = Namespace('example.org/example#')
97         nsCom = Namespace('example.com/example#')
98
99         node = Literal('bad')
100         self.assertRaises(ValueError, strip_namespace, nsOrg, node)
101         self.assertRaises(ValueError, strip_namespace, nsOrg, nsOrg)
102         self.assertRaises(ValueError, strip_namespace, nsOrg, str(node))
103
104     def test_simplify_uri(self):
105         DATA = [('http://asdf.org/foo/bar', 'bar'),
106                 ('http://asdf.org/foo/bar#bleem', 'bleem'),
107                 ('http://asdf.org/foo/bar/', 'bar'),
108                 ('http://asdf.org/foo/bar?was=foo', 'was=foo')]
109
110         for uri, expected in DATA:
111             self.assertEqual(simplify_uri(uri), expected)
112
113         for uri, expected in DATA:
114             n = URIRef(uri)
115             self.assertEqual(simplify_uri(n), expected)
116
117         for uri, expected in DATA:
118             n = Literal(URIRef(uri), datatype=XSD.anyURI)
119             self.assertEqual(simplify_uri(n), expected)
120
121         # decoding literals is questionable
122         n = Literal('http://foo/bar')
123         self.assertRaises(ValueError, simplify_uri, n)
124
125     def test_owl_import(self):
126         path, name = os.path.split(__file__)
127         #loc = 'file://'+os.path.abspath(path)+'/'
128         loc = os.path.abspath(path)+'/'
129         model = Graph()
130         fragment = '''
131 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
132 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
133 @prefix owl: <http://www.w3.org/2002/07/owl#> .
134
135 _:a owl:imports "{loc}extra.turtle" .
136 '''.format(loc=loc)
137         load_string_into_model(model, 'turtle', fragment, loc)
138         tc = URIRef('http://jumpgate.caltech.edu/wiki/TestCase')
139         result = list(model.triples((tc, RDFS.label, None)))
140         self.assertEqual(len(result), 1)
141         self.assertEqual(str(result[0][2]), 'TestCase')
142
143     def test_sanitize_literal_text(self):
144         self.assertRaises(ValueError, sanitize_literal, "hi")
145         hello_text = "hello"
146         hello_none = Literal(hello_text)
147         self.assertEqual(str(sanitize_literal(hello_none)),
148                              hello_text)
149         hello_str = Literal(hello_text,
150                              datatype=XSD['string'])
151         hello_clean = sanitize_literal(hello_str)
152         self.assertEqual(hello_clean.value, hello_text)
153
154     def test_sanitize_literal_empty_string(self):
155         value = ""
156         value_node = Literal(value)
157         self.assertEqual(str(sanitize_literal(value_node)), value)
158
159     def test_sanitize_literal_html(self):
160         hello = "hello <a onload='javascript:alert(\"foo\");' href='http://google.com'>google.com</a>, whats up?"
161         hello_clean = 'hello <a href="http://google.com">google.com</a>, whats up?'
162         hello_node = Literal(hello,
163                               datatype=XSD['string'])
164         hello_sanitized = sanitize_literal(hello_node)
165         self.assertEqual(hello_sanitized.value, hello_clean)
166
167         hostile = "hi <b>there</b><script type='text/javascript>alert('boo');</script><a href='javascript:alert('poke')>evil</a> scammer"
168         hostile_node = Literal(hostile)
169         hostile_sanitized = sanitize_literal(hostile_node)
170         # so it drops the stuff after the javascript link.
171         # I suppose it could be worse
172         hostile_result = """hi <b>there</b>"""
173         self.assertEqual(str(hostile_sanitized), hostile_result)
174
175     def test_guess_parser_from_file(self):
176         DATA = [
177             ('/a/b/c.rdf', 'rdfxml'),
178             ('/a/b/c.xml', 'rdfxml'),
179             ('/a/b/c.html', 'rdfa'),
180             ('/a/b/c.turtle', 'turtle'),
181             ('http://foo.bar/bleem.turtle', 'turtle')]
182         for path, parser in DATA:
183             self.assertEqual(guess_parser_by_extension(path), parser)
184             self.assertEqual(guess_parser(None, path), parser)
185
186         DATA = [
187             ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
188             ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
189             ('text/html', 'http://a.org/b/c', 'rdfa'),
190             ('text/html', 'http://a.org/b/c.html', 'rdfa'),
191             ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
192             ('text/plain', 'http://a.org/b/c', 'guess')
193         ]
194         for contenttype, url, parser in DATA:
195             self.assertEqual(guess_parser(contenttype, url), parser)
196
197 class TestRDFSchemas(TestCase):
198     def test_rdf_schema(self):
199         """Does it basically work?
200         """
201         model = ConjunctiveGraph()
202         self.assertEqual(len(model), 0)
203         add_default_schemas(model)
204         self.assertTrue(len(model) > 0)
205         remove_schemas(model)
206         self.assertEqual(len(model), 0)
207
208     def test_included_schemas(self):
209         model = ConjunctiveGraph()
210         add_default_schemas(model)
211
212         # rdf test
213         s = [RDF, DC['title'], None]
214         title = model.objects(RDF, DC['title'])
215         self.assertTrue(title is not None)
216
217         s = [RDF['Property'], RDF['type'], RDFS['Class']]
218         self.assertIn(s, model)
219
220         # rdfs test
221         s = [RDFS['Class'], RDF['type'], RDFS['Class']]
222         self.assertIn(s, model)
223
224         s = [OWL['inverseOf'], RDF['type'], RDF['Property']]
225         self.assertIn(s, model)
226
227 def suite():
228     from unittest import TestSuite, defaultTestLoader
229     suite = TestSuite()
230     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFHelp))
231     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFSchemas))
232     return suite
233
234
235 if __name__ == "__main__":
236     from unittest import main
237     main(defaultTest="suite")