7 from htsworkflow.util.rdfns import *
8 from htsworkflow.util.rdfhelp import SCHEMAS_URL
10 INFER_URL='http://jumpgate.caltech.edu/phony/infer'
13 """Provide some simple inference.
15 Provides a few default rules as methods starting with _rule_
17 def __init__(self, model):
19 self._context = RDF.Node(RDF.Uri(INFER_URL))
22 def think(self, max_iterations=None):
23 """Update model with with inferred statements.
25 max_iterations puts a limit on the number of times we
28 it will also try to exit if nothing new has been inferred.
30 Also this is the naive solution.
31 There's probably better ones out there.
34 while max_iterations is None or iterations != max_iterations:
35 starting_size = self.model.size()
37 for method_name in dir(self):
38 if method_name.startswith('_rule_'):
39 method = getattr(self, method_name)
41 if self.model.size() == starting_size:
42 # we didn't add anything new
45 def validate(self, destination=None):
46 if destination is None:
47 destination = sys.stdout
49 for msg in self.run_validation():
50 destination.write(msg)
51 destination.write(os.linesep)
53 def run_validation(self):
54 """Apply validation rules to our model.
56 for method_name in dir(self):
57 if method_name.startswith('_validate_'):
58 method = getattr(self, method_name)
63 def _rule_class(self):
64 """resolve class chains.
65 e.g. if a is an BClass, and a BClass is an AClass
66 then a is both a BClass and AClass.
69 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
70 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
71 prefix owl: <http://www.w3.org/2002/07/owl#>
78 query = RDF.SPARQLQuery(body)
79 for r in query.execute(self.model):
80 s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
81 if s not in self.model:
82 self.model.append(s, self._context)
84 def _rule_subclass(self):
85 """A subclass is a parent class
88 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
89 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
90 prefix owl: <http://www.w3.org/2002/07/owl#>
92 select ?obj ?subclass ?parent
94 ?subclass rdfs:subClassOf ?parent .
97 query = RDF.SPARQLQuery(body)
98 for r in query.execute(self.model):
99 s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
100 if s not in self.model:
101 self.model.append(s, self._context)
103 def _rule_inverse_of(self):
104 """Add statements computed with inverseOf
107 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
108 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
109 prefix owl: <http://www.w3.org/2002/07/owl#>
111 select ?o ?reverse ?s
116 ?term owl:inverseOf ?reverse .
117 ?term rdfs:domain ?subject_type ;
118 rdfs:range ?object_type .
119 ?reverse rdfs:domain ?object_type ;
120 rdfs:range ?subject_type .
122 query = RDF.SPARQLQuery(body)
125 for r in query.execute(self.model):
126 s = RDF.Statement(r['o'], r['reverse'], r['s'])
127 if s not in self.model:
128 self.model.append(s, self._context)
131 def _validate_types(self):
133 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
134 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
135 prefix owl: <http://www.w3.org/2002/07/owl#>
137 select ?subject ?predicate ?object
139 ?subject ?predicate ?object
140 OPTIONAL { ?subject a ?class }
141 FILTER(!bound(?class))
144 query = RDF.SPARQLQuery(body)
145 errmsg = "Missing type for: {0}"
146 for r in query.execute(self.model):
147 yield errmsg.format(str(r['subject']))
149 def _validate_undefined_properties(self):
150 """Find properties that aren't defined.
153 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
154 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
155 prefix owl: <http://www.w3.org/2002/07/owl#>
157 select ?subject ?predicate ?object
159 ?subject ?predicate ?object
160 OPTIONAL { ?predicate a ?predicate_class }
161 FILTER(!bound(?predicate_class))
163 query = RDF.SPARQLQuery(body)
164 msg = "Undefined property in {0} {1} {2}"
165 for r in query.execute(self.model):
166 yield msg.format(str(r['subject']),
170 def _validate_property_types(self):
171 """Find resources that don't have a type
173 property_template = """
174 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
175 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
179 <{predicate}> a rdf:Property ;
183 wrong_domain_type = "Domain of {0} was not {1}"
184 wrong_range_type = "Range of {0} was not {1}"
187 schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
188 for s, context in self.model.as_stream_context():
189 if context == schema:
192 query = RDF.SPARQLQuery(property_template.format(
193 predicate=s.predicate,
194 space='rdfs:domain'))
195 for r in query.execute(self.model):
196 if r['type'] == rdfsNS['Resource']:
198 check = RDF.Statement(s.subject, rdfNS['type'], r['type'])
199 if not self.model.contains_statement(check):
200 yield wrong_domain_type.format(str(s),
203 query = RDF.SPARQLQuery(property_template.format(
204 predicate=s.predicate,
206 for r in query.execute(self.model):
207 if r['type'] == rdfsNS['Resource']:
209 check = RDF.Statement(s.object, rdfNS['type'], r['type'])
210 if not self.model.contains_statement(check):
211 yield wrong_range_type.format(str(s),