7 from htsworkflow.util.rdfns import *
8 from htsworkflow.util.rdfhelp import SCHEMAS_URL
10 INFER_URL='http://jumpgate.caltech.edu/phony/infer'
11 LOGGER = logging.getLogger(__name__)
14 """Provide some simple inference.
16 Provides a few default rules as methods starting with _rule_
18 def __init__(self, model):
20 self._context = RDF.Node(RDF.Uri(INFER_URL))
23 def think(self, max_iterations=None):
24 """Update model with with inferred statements.
26 max_iterations puts a limit on the number of times we
29 it will also try to exit if nothing new has been inferred.
31 Also this is the naive solution.
32 There's probably better ones out there.
35 while max_iterations is None or iterations != max_iterations:
36 starting_size = self.model.size()
38 for method_name in dir(self):
39 if method_name.startswith('_rule_'):
40 LOGGER.info("Running: %s", method_name)
41 method = getattr(self, method_name)
43 if self.model.size() == starting_size:
44 # we didn't add anything new
47 def validate(self, destination=None):
48 if destination is None:
49 destination = sys.stdout
51 for msg in self.run_validation():
52 destination.write(msg)
53 destination.write(os.linesep)
55 def run_validation(self):
56 """Apply validation rules to our model.
58 for method_name in dir(self):
59 if method_name.startswith('_validate_'):
60 LOGGER.info("Running: %s", method_name)
61 method = getattr(self, method_name)
65 def _rule_class(self):
66 """resolve class chains.
67 e.g. if a is an BClass, and a BClass is an AClass
68 then a is both a BClass and AClass.
71 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
72 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
73 prefix owl: <http://www.w3.org/2002/07/owl#>
80 query = RDF.SPARQLQuery(body)
81 for r in query.execute(self.model):
82 s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
83 if s not in self.model:
84 self.model.append(s, self._context)
86 def _rule_subclass(self):
87 """A subclass is a parent class
90 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
91 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
92 prefix owl: <http://www.w3.org/2002/07/owl#>
94 select ?obj ?subclass ?parent
96 ?subclass rdfs:subClassOf ?parent .
99 query = RDF.SPARQLQuery(body)
100 for r in query.execute(self.model):
101 s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
102 if s not in self.model:
103 self.model.append(s, self._context)
105 def _rule_inverse_of(self):
106 """Add statements computed with inverseOf
109 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
110 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
111 prefix owl: <http://www.w3.org/2002/07/owl#>
113 select ?o ?reverse ?s
118 ?term owl:inverseOf ?reverse .
119 ?term rdfs:domain ?subject_type ;
120 rdfs:range ?object_type .
121 ?reverse rdfs:domain ?object_type ;
122 rdfs:range ?subject_type .
124 query = RDF.SPARQLQuery(body)
127 for r in query.execute(self.model):
128 s = RDF.Statement(r['o'], r['reverse'], r['s'])
129 if s not in self.model:
130 self.model.append(s, self._context)
133 def _validate_types(self):
135 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
136 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
137 prefix owl: <http://www.w3.org/2002/07/owl#>
139 select ?subject ?predicate ?object
141 ?subject ?predicate ?object
142 OPTIONAL { ?subject a ?class }
143 FILTER(!bound(?class))
146 query = RDF.SPARQLQuery(body)
147 errmsg = "Missing type for: {0}"
148 for r in query.execute(self.model):
149 yield errmsg.format(str(r['subject']))
151 def _validate_undefined_properties(self):
152 """Find properties that aren't defined.
155 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
156 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
157 prefix owl: <http://www.w3.org/2002/07/owl#>
159 select ?subject ?predicate ?object
161 ?subject ?predicate ?object
162 OPTIONAL { ?predicate a ?predicate_class }
163 FILTER(!bound(?predicate_class))
165 query = RDF.SPARQLQuery(body)
166 msg = "Undefined property in {0} {1} {2}"
167 for r in query.execute(self.model):
168 yield msg.format(str(r['subject']),
172 def _validate_property_types(self):
173 """Find resources that don't have a type
175 property_template = """
176 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
177 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
181 <{predicate}> a rdf:Property ;
185 def check_node_space(node, predicate, space, errmsg):
186 """Check that a node conforms to it's allowable space of types.
188 e.g. is a subject (node) the domain (space) of this property
189 and is the object (node) the range of of this property.
191 resource_error = "Expected resource for {0} in range {1}"
192 type_error = "Type of {0} was {1} not {2}"
194 query = RDF.SPARQLQuery(property_template.format(
195 predicate=predicate.uri,
198 for r in query.execute(self.model):
199 # Make sure we have a resource if we're expecting one
200 if r['type'] == rdfsNS['Resource']:
201 if not node.is_resource():
202 return resource_error.format(str(node), space)
204 seen.add(str(r['type'].uri))
205 if node.is_literal():
206 # literal is a generic type.
207 nodetype = node.literal_value['datatype']
209 # lets default to string
210 nodetype = xsdNS['string'].uri
211 if r['type'] == rdfsNS['Literal']:
213 elif nodetype != r['type'].uri:
214 return type_error.format(
215 str(node), nodetype, r['type'])
216 # check that node is the expetected class type
217 check = RDF.Statement(node, rdfNS['type'], r['type'])
218 if self.model.contains_statement(check):
221 # need the seen check, because we're surpressing checking
222 # rdfs:Resource types
224 return errmsg + ",".join(seen)
227 wrong_domain_type = "Domain of {0} was not in:"
228 wrong_range_type = "Range of {0} was not in:"
231 schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
232 for s, context in self.model.as_stream_context():
233 if context == schema:
236 msg = check_node_space(s.subject, s.predicate, 'rdfs:domain',
237 wrong_domain_type.format(str(s)))
238 if msg is not None: yield msg
240 msg = check_node_space(s.object, s.predicate, 'rdfs:range',
241 wrong_range_type.format(str(s)))
242 if msg is not None: yield msg