7 from htsworkflow.util.rdfns import *
8 from htsworkflow.util.rdfhelp import SCHEMAS_URL
10 INFER_URL='http://jumpgate.caltech.edu/phony/infer'
13 """Provide some simple inference.
15 Provides a few default rules as methods starting with _rule_
17 def __init__(self, model):
19 self._context = RDF.Node(RDF.Uri(INFER_URL))
22 def think(self, max_iterations=None):
23 """Update model with with inferred statements.
25 max_iterations puts a limit on the number of times we
28 it will also try to exit if nothing new has been inferred.
30 Also this is the naive solution.
31 There's probably better ones out there.
34 while max_iterations is None or iterations != max_iterations:
35 starting_size = self.model.size()
37 for method_name in dir(self):
38 if method_name.startswith('_rule_'):
39 method = getattr(self, method_name)
41 if self.model.size() == starting_size:
42 # we didn't add anything new
45 def validate(self, destination=None):
46 if destination is None:
47 destination = sys.stdout
49 for msg in self.run_validation():
50 destination.write(msg)
51 destination.write(os.linesep)
53 def run_validation(self):
54 """Apply validation rules to our model.
56 for method_name in dir(self):
57 if method_name.startswith('_validate_'):
58 method = getattr(self, method_name)
62 def _rule_class(self):
63 """resolve class chains.
64 e.g. if a is an BClass, and a BClass is an AClass
65 then a is both a BClass and AClass.
68 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
69 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
70 prefix owl: <http://www.w3.org/2002/07/owl#>
77 query = RDF.SPARQLQuery(body)
78 for r in query.execute(self.model):
79 s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
80 if s not in self.model:
81 self.model.append(s, self._context)
83 def _rule_subclass(self):
84 """A subclass is a parent class
87 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
88 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
89 prefix owl: <http://www.w3.org/2002/07/owl#>
91 select ?obj ?subclass ?parent
93 ?subclass rdfs:subClassOf ?parent .
96 query = RDF.SPARQLQuery(body)
97 for r in query.execute(self.model):
98 s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
99 if s not in self.model:
100 self.model.append(s, self._context)
102 def _rule_inverse_of(self):
103 """Add statements computed with inverseOf
106 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
107 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
108 prefix owl: <http://www.w3.org/2002/07/owl#>
110 select ?o ?reverse ?s
115 ?term owl:inverseOf ?reverse .
116 ?term rdfs:domain ?subject_type ;
117 rdfs:range ?object_type .
118 ?reverse rdfs:domain ?object_type ;
119 rdfs:range ?subject_type .
121 query = RDF.SPARQLQuery(body)
124 for r in query.execute(self.model):
125 s = RDF.Statement(r['o'], r['reverse'], r['s'])
126 if s not in self.model:
127 self.model.append(s, self._context)
130 def _validate_types(self):
132 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
133 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
134 prefix owl: <http://www.w3.org/2002/07/owl#>
136 select ?subject ?predicate ?object
138 ?subject ?predicate ?object
139 OPTIONAL { ?subject a ?class }
140 FILTER(!bound(?class))
143 query = RDF.SPARQLQuery(body)
144 errmsg = "Missing type for: {0}"
145 for r in query.execute(self.model):
146 yield errmsg.format(str(r['subject']))
148 def _validate_undefined_properties(self):
149 """Find properties that aren't defined.
152 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
153 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
154 prefix owl: <http://www.w3.org/2002/07/owl#>
156 select ?subject ?predicate ?object
158 ?subject ?predicate ?object
159 OPTIONAL { ?predicate a ?predicate_class }
160 FILTER(!bound(?predicate_class))
162 query = RDF.SPARQLQuery(body)
163 msg = "Undefined property in {0} {1} {2}"
164 for r in query.execute(self.model):
165 yield msg.format(str(r['subject']),
169 def _validate_property_types(self):
170 """Find resources that don't have a type
172 property_template = """
173 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
174 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
178 <{predicate}> a rdf:Property ;
182 def check_node_space(node, predicate, space, errmsg):
183 """Check that a node conforms to it's allowable space of types.
185 e.g. is a subject (node) the domain (space) of this property
186 and is the object (node) the range of of this property.
189 query = RDF.SPARQLQuery(property_template.format(
190 predicate=predicate.uri,
193 for r in query.execute(self.model):
194 if r['type'] == rdfsNS['Resource']:
196 seen.append(str(r['type'].uri))
197 check = RDF.Statement(node, rdfNS['type'], r['type'])
198 if self.model.contains_statement(check):
201 # need the seen check, because we're surpressing checking
202 # rdfs:Resource types
204 return errmsg + ",".join(seen)
207 wrong_domain_type = "Domain of {0} was not in:"
208 wrong_range_type = "Range of {0} was not in:"
211 schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
212 for s, context in self.model.as_stream_context():
213 if context == schema:
216 msg = check_node_space(s.subject, s.predicate, 'rdfs:domain',
217 wrong_domain_type.format(str(s)))
218 if msg is not None: yield msg
220 msg = check_node_space(s.object, s.predicate, 'rdfs:range',
221 wrong_range_type.format(str(s)))
222 if msg is not None: yield msg