From: Diane Trout Date: Tue, 18 Sep 2012 18:34:25 +0000 (-0700) Subject: Improvements to rdfinfer. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=6703ce111e468b78f7c72c1539a24085dd00a21e Improvements to rdfinfer. Add rule to infer class and subClassOf memberships, add testing for the class case. Add code to run all the validation rules. --- diff --git a/htsworkflow/util/rdfinfer.py b/htsworkflow/util/rdfinfer.py index 221063f..8f12f5c 100644 --- a/htsworkflow/util/rdfinfer.py +++ b/htsworkflow/util/rdfinfer.py @@ -1,3 +1,7 @@ +import logging +import os +import sys + import RDF from htsworkflow.util.rdfns import * @@ -15,7 +19,7 @@ class Infer(object): self._context = RDF.Node(RDF.Uri(INFER_URL)) - def update(self, max_iterations=None): + def think(self, max_iterations=None): """Update model with with inferred statements. max_iterations puts a limit on the number of times we @@ -38,6 +42,64 @@ class Infer(object): # we didn't add anything new return + def validate(self, destination=None): + if destination is None: + destination = sys.stdout + + for msg in self.run_validation(): + destination.write(msg) + destination.write(os.linesep) + + def run_validation(self): + """Apply validation rules to our model. + """ + for method_name in dir(self): + if method_name.startswith('_validate_'): + method = getattr(self, method_name) + for msg in method(): + yield msg + + + def _rule_class(self): + """resolve class chains. + e.g. if a is an BClass, and a BClass is an AClass + then a is both a BClass and AClass. + """ + body = """ + prefix rdf: + prefix rdfs: + prefix owl: + + select ?obj ?class + where { + ?alias a ?class . + ?obj a ?alias . + }""" + query = RDF.SPARQLQuery(body) + for r in query.execute(self.model): + s = RDF.Statement(r['obj'], rdfNS['type'], r['class']) + if s not in self.model: + self.model.append(s, self._context) + + def _rule_subclass(self): + """A subclass is a parent class + """ + body = """ + prefix rdf: + prefix rdfs: + prefix owl: + + select ?obj ?subclass ?parent + where { + ?subclass rdfs:subClassOf ?parent . + ?obj a ?subclass . + }""" + query = RDF.SPARQLQuery(body) + for r in query.execute(self.model): + s = RDF.Statement(r['obj'], rdfNS['type'], r['parent']) + if s not in self.model: + self.model.append(s, self._context) + def _rule_inverse_of(self): """Add statements computed with inverseOf """ @@ -118,8 +180,8 @@ class Infer(object): {space} ?type . }}""" - wrong_domain_type = "Domain of {0} {1} {2} not {3}" - wrong_range_type = "Range of {0} {1} {2} not {3}" + wrong_domain_type = "Domain of {0} was not {1}" + wrong_range_type = "Range of {0} was not {1}" count = 0 schema = RDF.Node(RDF.Uri(SCHEMAS_URL)) @@ -135,9 +197,7 @@ class Infer(object): continue check = RDF.Statement(s.subject, rdfNS['type'], r['type']) if not self.model.contains_statement(check): - yield wrong_domain_type.format(str(s.subject), - str(s.predicate), - str(s.object), + yield wrong_domain_type.format(str(s), str(r['type'])) # check range query = RDF.SPARQLQuery(property_template.format( @@ -148,9 +208,8 @@ class Infer(object): continue check = RDF.Statement(s.object, rdfNS['type'], r['type']) if not self.model.contains_statement(check): - yield wrong_range_type.format(str(s.subject), - str(s.predicate), - str(s.object), + yield wrong_range_type.format(str(s), str(r['type'])) return + diff --git a/htsworkflow/util/test/test_rdfinfer.py b/htsworkflow/util/test/test_rdfinfer.py index 48462e4..4ed2316 100644 --- a/htsworkflow/util/test/test_rdfinfer.py +++ b/htsworkflow/util/test/test_rdfinfer.py @@ -94,6 +94,19 @@ class TestInfer(unittest.TestCase): add_default_schemas(self.model) load_string_into_model(self.model, 'turtle', MINI_FOAF_ONTOLOGY) + def test_class(self): + fooNS = RDF.NS('http://example.org/') + load_string_into_model(self.model, 'turtle', FOAF_DATA) + inference = Infer(self.model) + + s = RDF.Statement(fooNS['me.jpg'], rdfNS['type'], rdfsNS['Class']) + found = list(self.model.find_statements(s)) + self.assertEqual(len(found), 0) + inference._rule_class() + s = RDF.Statement(fooNS['me.jpg'], rdfNS['type'], rdfsNS['Class']) + found = list(self.model.find_statements(s)) + self.assertEqual(len(found), 1) + def test_inverse_of(self): fooNS = RDF.NS('http://example.org/') load_string_into_model(self.model, 'turtle', FOAF_DATA)