5 from rdflib import ConjunctiveGraph, BNode, Literal, URIRef
6 from rdflib.plugins.sparql import prepareQuery
8 from htsworkflow.util.rdfns import *
9 from htsworkflow.util.rdfhelp import SCHEMAS_URL
11 INFER_URL='http://jumpgate.caltech.edu/phony/infer'
12 LOGGER = logging.getLogger(__name__)
15 """Provide some simple inference.
17 Provides a few default rules as methods starting with _rule_
19 def __init__(self, model):
20 if not isinstance(model, ConjunctiveGraph):
21 raise ValueError("Inferences require a ConjunctiveGraph")
24 self._context = URIRef(INFER_URL)
27 def think(self, max_iterations=None):
28 """Update model with with inferred statements.
30 max_iterations puts a limit on the number of times we
33 it will also try to exit if nothing new has been inferred.
35 Also this is the naive solution.
36 There's probably better ones out there.
39 while max_iterations is None or iterations != max_iterations:
40 starting_size = self.model.size()
42 for method_name in dir(self):
43 if method_name.startswith('_rule_'):
44 LOGGER.info("Running: %s", method_name)
45 method = getattr(self, method_name)
47 if self.model.size() == starting_size:
48 # we didn't add anything new
51 def validate(self, destination=None):
52 if destination is None:
53 destination = sys.stdout
55 for msg in self.run_validation():
56 destination.write(msg)
57 destination.write(os.linesep)
59 def run_validation(self):
60 """Apply validation rules to our model.
62 for method_name in dir(self):
63 if method_name.startswith('_validate_'):
64 LOGGER.info("Running: %s", method_name)
65 method = getattr(self, method_name)
69 def _rule_class(self):
70 """resolve class chains.
71 e.g. if a is an BClass, and a BClass is an AClass
72 then a is both a BClass and AClass.
75 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
76 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
77 prefix owl: <http://www.w3.org/2002/07/owl#>
84 for r in self.model.query(body):
85 s = (r['obj'], RDF['type'], r['class'], self._context)
86 if s not in self.model:
89 def _rule_subclass(self):
90 """A subclass is a parent class
93 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
94 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
95 prefix owl: <http://www.w3.org/2002/07/owl#>
97 select ?obj ?subclass ?parent
99 ?subclass rdfs:subClassOf ?parent .
102 for r in self.model.query(body):
103 s = (r['obj'], RDF['type'], r['parent'], self._context)
104 if s not in self.model:
107 def _rule_inverse_of(self):
108 """Add statements computed with inverseOf
111 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
112 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
113 prefix owl: <http://www.w3.org/2002/07/owl#>
115 select ?o ?reverse ?s
120 ?term owl:inverseOf ?reverse .
121 ?term rdfs:domain ?subject_type ;
122 rdfs:range ?object_type .
123 ?reverse rdfs:domain ?object_type ;
124 rdfs:range ?subject_type .
126 for r in self.model.query(body):
127 s = (r['o'], r['reverse'], r['s'], self._context)
128 if s not in self.model:
131 def _validate_types(self):
133 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
134 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
135 prefix owl: <http://www.w3.org/2002/07/owl#>
136 prefix xhtmlv: <http://www.w3.org/1999/xhtml/vocab#>
138 select ?subject ?predicate ?object
140 ?subject ?predicate ?object
141 OPTIONAL { ?subject a ?class }
142 FILTER(!bound(?class))
143 FILTER(?predicate != xhtmlv:stylesheet)
146 errmsg = "Missing type for: {0}"
147 for r in self.model.query(body):
148 yield errmsg.format(str(r[0]))
150 def _validate_undefined_properties(self):
151 """Find properties that aren't defined.
154 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
155 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
156 prefix owl: <http://www.w3.org/2002/07/owl#>
158 select ?subject ?predicate ?object
160 ?subject ?predicate ?object
161 OPTIONAL { ?predicate a ?predicate_class }
162 FILTER(!bound(?predicate_class))
164 msg = "Undefined property in {0} {1} {2}"
165 for r in self.model.query(body):
166 yield msg.format(r['subject'],
170 def _validate_property_types(self):
171 """Find resources that don't have a type
173 property_query = prepareQuery("""
174 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
175 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
177 select ?type ?predicate
179 ?predicate a rdf:Property ;
183 def check_node_space(node, predicate, space, errmsg):
184 """Check that a node conforms to it's allowable space of types.
186 e.g. is a subject (node) the domain (space) of this property
187 and is the object (node) the range of of this property.
189 resource_error = "Expected resource for {0} in range {1}"
190 type_error = "Type of {0} was {1} not {2}"
194 for i, r in enumerate(self.model.query(property_query,
196 'predicate': predicate,
198 # Make sure we have a resource if we're expecting one
199 expected_type = r['type']
201 if isinstance(node, Literal):
202 if expected_type == RDFS['Literal']:
204 elif node.datatype == expected_type:
207 # not currently handling type hierarchy.
208 # a integer could pass a range of decimal for instance.
210 "Type error: {} was type {}, expected {}".format(
214 elif expected_type == RDFS['Resource']:
215 if isinstance(node, Literal):
216 errors.append(resource_error.format(str(node), space))
220 check = (node, RDF['type'], expected_type)
221 if check not in self.model:
222 errors.append(errmsg + str(node) + ' was not a ' + str(expected_type))
227 ### End nested function
229 wrong_domain_type = "Domain of {0} was not in:"
230 wrong_range_type = "Range of {0} was not in:"
233 schema = ConjunctiveGraph(identifier=SCHEMAS_URL)
234 for subject, predicate, obj, context in self.model.quads():
235 stmt = (subject, predicate, obj)
237 if context == schema:
240 for error in check_node_space(subject, predicate, RDFS.domain,
241 wrong_domain_type.format(str(stmt))):
244 for error in check_node_space(obj, predicate, RDFS.range,
245 wrong_range_type.format(str(stmt))):