7 from htsworkflow.util.rdfns import *
8 from htsworkflow.util.rdfhelp import SCHEMAS_URL
10 INFER_URL='http://jumpgate.caltech.edu/phony/infer'
11 LOGGER = logging.getLogger(__name__)
14 """Provide some simple inference.
16 Provides a few default rules as methods starting with _rule_
18 def __init__(self, model):
20 self._context = RDF.Node(RDF.Uri(INFER_URL))
23 def think(self, max_iterations=None):
24 """Update model with with inferred statements.
26 max_iterations puts a limit on the number of times we
29 it will also try to exit if nothing new has been inferred.
31 Also this is the naive solution.
32 There's probably better ones out there.
35 while max_iterations is None or iterations != max_iterations:
36 starting_size = self.model.size()
38 for method_name in dir(self):
39 if method_name.startswith('_rule_'):
40 LOGGER.info("Running: %s", method_name)
41 method = getattr(self, method_name)
43 if self.model.size() == starting_size:
44 # we didn't add anything new
47 def validate(self, destination=None):
48 if destination is None:
49 destination = sys.stdout
51 for msg in self.run_validation():
52 destination.write(msg)
53 destination.write(os.linesep)
55 def run_validation(self):
56 """Apply validation rules to our model.
58 for method_name in dir(self):
59 if method_name.startswith('_validate_'):
60 LOGGER.info("Running: %s", method_name)
61 method = getattr(self, method_name)
65 def _rule_class(self):
66 """resolve class chains.
67 e.g. if a is an BClass, and a BClass is an AClass
68 then a is both a BClass and AClass.
71 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
72 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
73 prefix owl: <http://www.w3.org/2002/07/owl#>
80 query = RDF.SPARQLQuery(body)
81 for r in query.execute(self.model):
82 s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
83 if s not in self.model:
84 self.model.append(s, self._context)
86 def _rule_subclass(self):
87 """A subclass is a parent class
90 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
91 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
92 prefix owl: <http://www.w3.org/2002/07/owl#>
94 select ?obj ?subclass ?parent
96 ?subclass rdfs:subClassOf ?parent .
99 query = RDF.SPARQLQuery(body)
100 for r in query.execute(self.model):
101 s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
102 if s not in self.model:
103 self.model.append(s, self._context)
105 def _rule_inverse_of(self):
106 """Add statements computed with inverseOf
109 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
110 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
111 prefix owl: <http://www.w3.org/2002/07/owl#>
113 select ?o ?reverse ?s
118 ?term owl:inverseOf ?reverse .
119 ?term rdfs:domain ?subject_type ;
120 rdfs:range ?object_type .
121 ?reverse rdfs:domain ?object_type ;
122 rdfs:range ?subject_type .
124 query = RDF.SPARQLQuery(body)
127 for r in query.execute(self.model):
128 s = RDF.Statement(r['o'], r['reverse'], r['s'])
129 if s not in self.model:
130 self.model.append(s, self._context)
133 def _validate_types(self):
135 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
136 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
137 prefix owl: <http://www.w3.org/2002/07/owl#>
138 prefix xhtmlv: <http://www.w3.org/1999/xhtml/vocab#>
140 select ?subject ?predicate ?object
142 ?subject ?predicate ?object
143 OPTIONAL { ?subject a ?class }
144 FILTER(!bound(?class))
145 FILTER(?predicate != xhtmlv:stylesheet)
148 query = RDF.SPARQLQuery(body)
149 errmsg = "Missing type for: {0}"
150 for r in query.execute(self.model):
151 yield errmsg.format(str(r['subject']))
153 def _validate_undefined_properties(self):
154 """Find properties that aren't defined.
157 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
158 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
159 prefix owl: <http://www.w3.org/2002/07/owl#>
161 select ?subject ?predicate ?object
163 ?subject ?predicate ?object
164 OPTIONAL { ?predicate a ?predicate_class }
165 FILTER(!bound(?predicate_class))
167 query = RDF.SPARQLQuery(body)
168 msg = "Undefined property in {0} {1} {2}"
169 for r in query.execute(self.model):
170 yield msg.format(str(r['subject']),
174 def _validate_property_types(self):
175 """Find resources that don't have a type
177 property_template = """
178 prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
179 prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
183 <{predicate}> a rdf:Property ;
187 def check_node_space(node, predicate, space, errmsg):
188 """Check that a node conforms to it's allowable space of types.
190 e.g. is a subject (node) the domain (space) of this property
191 and is the object (node) the range of of this property.
193 resource_error = "Expected resource for {0} in range {1}"
194 type_error = "Type of {0} was {1} not {2}"
196 query = RDF.SPARQLQuery(property_template.format(
197 predicate=predicate.uri,
200 for r in query.execute(self.model):
201 # Make sure we have a resource if we're expecting one
202 if r['type'] == rdfsNS['Resource']:
203 if node.is_literal():
204 return resource_error.format(str(node), space)
206 seen.add(str(r['type'].uri))
207 if node.is_literal():
208 # literal is a generic type.
209 nodetype = node.literal_value['datatype']
211 # lets default to string
212 nodetype = xsdNS['string'].uri
213 if r['type'] == rdfsNS['Literal']:
215 elif nodetype != r['type'].uri:
216 return type_error.format(
217 str(node), nodetype, r['type'])
218 # check that node is the expetected class type
219 check = RDF.Statement(node, rdfNS['type'], r['type'])
220 if self.model.contains_statement(check):
223 # need the seen check, because we're surpressing checking
224 # rdfs:Resource types
226 return errmsg + ",".join(seen)
229 wrong_domain_type = "Domain of {0} was not in:"
230 wrong_range_type = "Range of {0} was not in:"
233 schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
234 for s, context in self.model.as_stream_context():
235 if context == schema:
238 msg = check_node_space(s.subject, s.predicate, 'rdfs:domain',
239 wrong_domain_type.format(str(s)))
240 if msg is not None: yield msg
242 msg = check_node_space(s.object, s.predicate, 'rdfs:range',
243 wrong_range_type.format(str(s)))
244 if msg is not None: yield msg