Refactor property type validator to support multiple classes for domain/range.
authorDiane Trout <diane@caltech.edu>
Wed, 19 Sep 2012 23:04:35 +0000 (16:04 -0700)
committerDiane Trout <diane@caltech.edu>
Wed, 19 Sep 2012 23:04:35 +0000 (16:04 -0700)
Also test to make sure we can have more than one domain/range statement.

htsworkflow/util/rdfinfer.py
htsworkflow/util/test/test_rdfinfer.py

index 8f12f5c6e4e27761087cbe2346207f5b3b65e7e9..aac3afa849178cecffa7f8185c29144f554ebe23 100644 (file)
@@ -59,7 +59,6 @@ class Infer(object):
                 for msg in method():
                     yield msg
 
-
     def _rule_class(self):
         """resolve class chains.
         e.g. if a is an BClass, and a BClass is an AClass
@@ -180,8 +179,33 @@ class Infer(object):
                         {space} ?type .
         }}"""
 
-        wrong_domain_type = "Domain of {0} was not {1}"
-        wrong_range_type = "Range of {0} was not {1}"
+        def check_node_space(node, predicate, space, errmsg):
+            """Check that a node conforms to it's allowable space of types.
+
+            e.g. is a subject (node) the domain (space) of this property
+            and is the object (node) the range of of this property.
+            """
+            # check domain
+            query = RDF.SPARQLQuery(property_template.format(
+                predicate=predicate,
+                space=space))
+            seen = []
+            for r in query.execute(self.model):
+                if r['type'] == rdfsNS['Resource']:
+                    continue
+                seen.append(str(r['type']))
+                check = RDF.Statement(node, rdfNS['type'], r['type'])
+                if self.model.contains_statement(check):
+                    return
+
+            # need the seen check, because we're surpressing checking
+            # rdfs:Resource types
+            if len(seen) > 0:
+                return errmsg + ",".join(seen)
+
+
+        wrong_domain_type = "Domain of {0} was not in:"
+        wrong_range_type = "Range of {0} was not in:"
 
         count = 0
         schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
@@ -189,27 +213,13 @@ class Infer(object):
             if context == schema:
                 continue
             # check domain
-            query = RDF.SPARQLQuery(property_template.format(
-                predicate=s.predicate,
-                space='rdfs:domain'))
-            for r in query.execute(self.model):
-                if r['type'] == rdfsNS['Resource']:
-                    continue
-                check = RDF.Statement(s.subject, rdfNS['type'], r['type'])
-                if not self.model.contains_statement(check):
-                    yield wrong_domain_type.format(str(s),
-                                                   str(r['type']))
+            msg = check_node_space(s.subject, s.predicate, 'rdfs:domain',
+                                   wrong_domain_type.format(str(s)))
+            if msg is not None: yield msg
             # check range
-            query = RDF.SPARQLQuery(property_template.format(
-                predicate=s.predicate,
-                space='rdfs:range'))
-            for r in query.execute(self.model):
-                if r['type'] == rdfsNS['Resource']:
-                    continue
-                check = RDF.Statement(s.object, rdfNS['type'], r['type'])
-                if not self.model.contains_statement(check):
-                    yield wrong_range_type.format(str(s),
-                                                  str(r['type']))
-
+            msg = check_node_space(s.object, s.predicate, 'rdfs:range',
+                                   wrong_range_type.format(str(s)))
+            if msg is not None: yield msg
         return
 
+
index 4ed231698d1eeb1346249227f39a2dadf425629e..380c1e640bd8133aadf5ff8ce7325026a1981814 100644 (file)
@@ -172,7 +172,10 @@ class TestInfer(unittest.TestCase):
         self.model.append(s)
         errors = list(inference._validate_property_types())
         self.assertEqual(len(errors), 1)
-        self.assertTrue(errors[0].startswith('Domain of http://example.org'))
+        startswith = 'Domain of <http://example.org/me.jpg>'
+        self.assertEqual(errors[0][:len(startswith)], startswith)
+        endswith = 'http://xmlns.com/foaf/0.1/Person'
+        self.assertEqual(errors[0][-len(endswith):], endswith)
         del self.model[s]
 
         errors = list(inference._validate_property_types())
@@ -186,9 +189,43 @@ class TestInfer(unittest.TestCase):
 
         errors = list(inference._validate_property_types())
         self.assertEqual(len(errors), 1)
-        self.assertTrue(errors[0].startswith('Range of http://example.org'))
+        startswith = 'Range of <http://example.org/me.jpg>'
+        self.assertEqual(errors[0][:len(startswith)], startswith)
+        endswith = 'http://www.w3.org/2002/07/owl#Thing'
+        self.assertEqual(errors[0][-len(endswith):], endswith)
         del self.model[s]
 
+    def test_property_multiple_domain_types(self):
+        """Can we process a property with multiple domain types?
+        """
+        turtle = """
+        @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
+        @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+        @prefix foo: <http://example.org/> .
+        @prefix bar: <http://example.com/> .
+
+        foo:AClass a rdfs:Class .
+        foo:BClass a rdfs:Class .
+        bar:ABarClass a rdfs:Class .
+
+        foo:aprop a rdf:Property ;
+            rdfs:domain foo:AClass ;
+            rdfs:domain bar:ABarClass ;
+            rdfs:range foo:BClass .
+
+        foo:object a foo:BClass .
+        foo:subject a foo:AClass ;
+           foo:aprop foo:object .
+        bar:subject a bar:ABarClass ;
+           foo:aprop foo:object .
+        """
+        load_string_into_model(self.model, 'turtle', turtle)
+        inference = Infer(self.model)
+
+        errmsg = list(inference._validate_property_types())
+        print errmsg
+        self.failUnlessEqual(len(errmsg), 0)
+
 def suite():
     return unittest.makeSuite(TestInfer, 'test')