From da57ac1c65ff0fa96c831ff6adb787c64e99cc8d Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Wed, 19 Sep 2012 16:04:35 -0700 Subject: [PATCH] Refactor property type validator to support multiple classes for domain/range. Also test to make sure we can have more than one domain/range statement. --- htsworkflow/util/rdfinfer.py | 58 +++++++++++++++----------- htsworkflow/util/test/test_rdfinfer.py | 41 +++++++++++++++++- 2 files changed, 73 insertions(+), 26 deletions(-) diff --git a/htsworkflow/util/rdfinfer.py b/htsworkflow/util/rdfinfer.py index 8f12f5c..aac3afa 100644 --- a/htsworkflow/util/rdfinfer.py +++ b/htsworkflow/util/rdfinfer.py @@ -59,7 +59,6 @@ class Infer(object): for msg in method(): yield msg - def _rule_class(self): """resolve class chains. e.g. if a is an BClass, and a BClass is an AClass @@ -180,8 +179,33 @@ class Infer(object): {space} ?type . }}""" - wrong_domain_type = "Domain of {0} was not {1}" - wrong_range_type = "Range of {0} was not {1}" + def check_node_space(node, predicate, space, errmsg): + """Check that a node conforms to it's allowable space of types. + + e.g. is a subject (node) the domain (space) of this property + and is the object (node) the range of of this property. + """ + # check domain + query = RDF.SPARQLQuery(property_template.format( + predicate=predicate, + space=space)) + seen = [] + for r in query.execute(self.model): + if r['type'] == rdfsNS['Resource']: + continue + seen.append(str(r['type'])) + check = RDF.Statement(node, rdfNS['type'], r['type']) + if self.model.contains_statement(check): + return + + # need the seen check, because we're surpressing checking + # rdfs:Resource types + if len(seen) > 0: + return errmsg + ",".join(seen) + + + wrong_domain_type = "Domain of {0} was not in:" + wrong_range_type = "Range of {0} was not in:" count = 0 schema = RDF.Node(RDF.Uri(SCHEMAS_URL)) @@ -189,27 +213,13 @@ class Infer(object): if context == schema: continue # check domain - query = RDF.SPARQLQuery(property_template.format( - predicate=s.predicate, - space='rdfs:domain')) - for r in query.execute(self.model): - if r['type'] == rdfsNS['Resource']: - continue - check = RDF.Statement(s.subject, rdfNS['type'], r['type']) - if not self.model.contains_statement(check): - yield wrong_domain_type.format(str(s), - str(r['type'])) + msg = check_node_space(s.subject, s.predicate, 'rdfs:domain', + wrong_domain_type.format(str(s))) + if msg is not None: yield msg # check range - query = RDF.SPARQLQuery(property_template.format( - predicate=s.predicate, - space='rdfs:range')) - for r in query.execute(self.model): - if r['type'] == rdfsNS['Resource']: - continue - check = RDF.Statement(s.object, rdfNS['type'], r['type']) - if not self.model.contains_statement(check): - yield wrong_range_type.format(str(s), - str(r['type'])) - + msg = check_node_space(s.object, s.predicate, 'rdfs:range', + wrong_range_type.format(str(s))) + if msg is not None: yield msg return + diff --git a/htsworkflow/util/test/test_rdfinfer.py b/htsworkflow/util/test/test_rdfinfer.py index 4ed2316..380c1e6 100644 --- a/htsworkflow/util/test/test_rdfinfer.py +++ b/htsworkflow/util/test/test_rdfinfer.py @@ -172,7 +172,10 @@ class TestInfer(unittest.TestCase): self.model.append(s) errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 1) - self.assertTrue(errors[0].startswith('Domain of http://example.org')) + startswith = 'Domain of ' + self.assertEqual(errors[0][:len(startswith)], startswith) + endswith = 'http://xmlns.com/foaf/0.1/Person' + self.assertEqual(errors[0][-len(endswith):], endswith) del self.model[s] errors = list(inference._validate_property_types()) @@ -186,9 +189,43 @@ class TestInfer(unittest.TestCase): errors = list(inference._validate_property_types()) self.assertEqual(len(errors), 1) - self.assertTrue(errors[0].startswith('Range of http://example.org')) + startswith = 'Range of ' + self.assertEqual(errors[0][:len(startswith)], startswith) + endswith = 'http://www.w3.org/2002/07/owl#Thing' + self.assertEqual(errors[0][-len(endswith):], endswith) del self.model[s] + def test_property_multiple_domain_types(self): + """Can we process a property with multiple domain types? + """ + turtle = """ + @prefix rdf: . + @prefix rdfs: . + @prefix foo: . + @prefix bar: . + + foo:AClass a rdfs:Class . + foo:BClass a rdfs:Class . + bar:ABarClass a rdfs:Class . + + foo:aprop a rdf:Property ; + rdfs:domain foo:AClass ; + rdfs:domain bar:ABarClass ; + rdfs:range foo:BClass . + + foo:object a foo:BClass . + foo:subject a foo:AClass ; + foo:aprop foo:object . + bar:subject a bar:ABarClass ; + foo:aprop foo:object . + """ + load_string_into_model(self.model, 'turtle', turtle) + inference = Infer(self.model) + + errmsg = list(inference._validate_property_types()) + print errmsg + self.failUnlessEqual(len(errmsg), 0) + def suite(): return unittest.makeSuite(TestInfer, 'test') -- 2.30.2