+import logging
+import os
+import sys
+
import RDF
from htsworkflow.util.rdfns import *
from htsworkflow.util.rdfhelp import SCHEMAS_URL
INFER_URL='http://jumpgate.caltech.edu/phony/infer'
+LOGGER = logging.getLogger(__name__)
class Infer(object):
"""Provide some simple inference.
self._context = RDF.Node(RDF.Uri(INFER_URL))
- def update(self, max_iterations=None):
+ def think(self, max_iterations=None):
"""Update model with with inferred statements.
max_iterations puts a limit on the number of times we
for method_name in dir(self):
if method_name.startswith('_rule_'):
+ LOGGER.info("Running: %s", method_name)
method = getattr(self, method_name)
method()
if self.model.size() == starting_size:
# we didn't add anything new
return
+ def validate(self, destination=None):
+ if destination is None:
+ destination = sys.stdout
+
+ for msg in self.run_validation():
+ destination.write(msg)
+ destination.write(os.linesep)
+
+ def run_validation(self):
+ """Apply validation rules to our model.
+ """
+ for method_name in dir(self):
+ if method_name.startswith('_validate_'):
+ LOGGER.info("Running: %s", method_name)
+ method = getattr(self, method_name)
+ for msg in method():
+ yield msg
+
+ def _rule_class(self):
+ """resolve class chains.
+ e.g. if a is an BClass, and a BClass is an AClass
+ then a is both a BClass and AClass.
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?obj ?class
+ where {
+ ?alias a ?class .
+ ?obj a ?alias .
+ }"""
+ query = RDF.SPARQLQuery(body)
+ for r in query.execute(self.model):
+ s = RDF.Statement(r['obj'], rdfNS['type'], r['class'])
+ if s not in self.model:
+ self.model.append(s, self._context)
+
+ def _rule_subclass(self):
+ """A subclass is a parent class
+ """
+ body = """
+ prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+ prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+ prefix owl: <http://www.w3.org/2002/07/owl#>
+
+ select ?obj ?subclass ?parent
+ where {
+ ?subclass rdfs:subClassOf ?parent .
+ ?obj a ?subclass .
+ }"""
+ query = RDF.SPARQLQuery(body)
+ for r in query.execute(self.model):
+ s = RDF.Statement(r['obj'], rdfNS['type'], r['parent'])
+ if s not in self.model:
+ self.model.append(s, self._context)
+
def _rule_inverse_of(self):
"""Add statements computed with inverseOf
"""
prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>
prefix owl: <http://www.w3.org/2002/07/owl#>
+ prefix xhtmlv: <http://www.w3.org/1999/xhtml/vocab#>
select ?subject ?predicate ?object
where {
?subject ?predicate ?object
OPTIONAL { ?subject a ?class }
FILTER(!bound(?class))
+ FILTER(?predicate != xhtmlv:stylesheet)
}
"""
query = RDF.SPARQLQuery(body)
{space} ?type .
}}"""
- wrong_domain_type = "Domain of {0} {1} {2} not {3}"
- wrong_range_type = "Range of {0} {1} {2} not {3}"
+ def check_node_space(node, predicate, space, errmsg):
+ """Check that a node conforms to it's allowable space of types.
+
+ e.g. is a subject (node) the domain (space) of this property
+ and is the object (node) the range of of this property.
+ """
+ resource_error = "Expected resource for {0} in range {1}"
+ type_error = "Type of {0} was {1} not {2}"
+ # check domain
+ query = RDF.SPARQLQuery(property_template.format(
+ predicate=predicate.uri,
+ space=space))
+ seen = set()
+ for r in query.execute(self.model):
+ # Make sure we have a resource if we're expecting one
+ if r['type'] == rdfsNS['Resource']:
+ if node.is_literal():
+ return resource_error.format(str(node), space)
+ continue
+ seen.add(str(r['type'].uri))
+ if node.is_literal():
+ # literal is a generic type.
+ nodetype = node.literal_value['datatype']
+ if nodetype is None:
+ # lets default to string
+ nodetype = xsdNS['string'].uri
+ if r['type'] == rdfsNS['Literal']:
+ pass
+ elif nodetype != r['type'].uri:
+ return type_error.format(
+ str(node), nodetype, r['type'])
+ # check that node is the expetected class type
+ check = RDF.Statement(node, rdfNS['type'], r['type'])
+ if self.model.contains_statement(check):
+ return
+
+ # need the seen check, because we're surpressing checking
+ # rdfs:Resource types
+ if len(seen) > 0:
+ return errmsg + ",".join(seen)
+
+
+ wrong_domain_type = "Domain of {0} was not in:"
+ wrong_range_type = "Range of {0} was not in:"
count = 0
schema = RDF.Node(RDF.Uri(SCHEMAS_URL))
if context == schema:
continue
# check domain
- query = RDF.SPARQLQuery(property_template.format(
- predicate=s.predicate,
- space='rdfs:domain'))
- for r in query.execute(self.model):
- if r['type'] == rdfsNS['Resource']:
- continue
- check = RDF.Statement(s.subject, rdfNS['type'], r['type'])
- if not self.model.contains_statement(check):
- yield wrong_domain_type.format(str(s.subject),
- str(s.predicate),
- str(s.object),
- str(r['type']))
+ msg = check_node_space(s.subject, s.predicate, 'rdfs:domain',
+ wrong_domain_type.format(str(s)))
+ if msg is not None: yield msg
# check range
- query = RDF.SPARQLQuery(property_template.format(
- predicate=s.predicate,
- space='rdfs:range'))
- for r in query.execute(self.model):
- if r['type'] == rdfsNS['Resource']:
- continue
- check = RDF.Statement(s.object, rdfNS['type'], r['type'])
- if not self.model.contains_statement(check):
- yield wrong_range_type.format(str(s.subject),
- str(s.predicate),
- str(s.object),
- str(r['type']))
-
+ msg = check_node_space(s.object, s.predicate, 'rdfs:range',
+ wrong_range_type.format(str(s)))
+ if msg is not None: yield msg
return