htsworkflow/util/rdfhelp.py

   1 """Helper features for working with librdf
   2 """
   3 import logging
   4 import os
   5 import types
   6
   7 import RDF
   8
   9 logger = logging.getLogger(__name__)
  10
  11 # standard ontology namespaces
  12 owlNS = RDF.NS('http://www.w3.org/2002/07/owl#')
  13 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
  14 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
  15 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
  16 xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
  17
  18 # internal ontologies
  19 submissionOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#")
  20 dafTermOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/UcscDaf#")
  21 libraryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
  22 inventoryOntology = RDF.NS("http://jumpgate.caltech.edu/wiki/InventoryOntology#")
  23 submissionLog = RDF.NS("http://jumpgate.caltech.edu/wiki/SubmissionsLog/")
  24
  25 def sparql_query(model, query_filename):
  26     """Execute sparql query from file
  27     """
  28     logger.info("Opening: %s" % (query_filename,))
  29     query_body = open(query_filename,'r').read()
  30     query = RDF.SPARQLQuery(query_body)
  31     results = query.execute(model)
  32     for row in results:
  33         output = []
  34         for k,v in row.items()[::-1]:
  35             print "{0}: {1}".format(k,v)
  36         print
  37
  38
  39 def blankOrUri(value=None):
  40     node = None
  41     if value is None:
  42         node = RDF.Node()
  43     elif type(value) in types.StringTypes:
  44         node = RDF.Node(uri_string=value)
  45     elif isinstance(value, RDF.Node):
  46         node = value
  47
  48     return node
  49
  50
  51 def toTypedNode(value):
  52     if type(value) == types.BooleanType:
  53         value_type = xsdNS['boolean'].uri
  54         if value:
  55             value = u'1'
  56         else:
  57             value = u'0'
  58     elif type(value) in (types.IntType, types.LongType):
  59         value_type = xsdNS['decimal'].uri
  60         value = unicode(value)
  61     elif type(value) == types.FloatType:
  62         value_type = xsdNS['float'].uri
  63         value = unicode(value)
  64     else:
  65         value_type = None
  66         value = unicode(value)
  67
  68     if value_type is not None:
  69         node = RDF.Node(literal=value, datatype=value_type)
  70     else:
  71         node = RDF.Node(literal=unicode(value).encode('utf-8'))
  72     return node
  73
  74 def fromTypedNode(node):
  75     if node is None:
  76         return None
  77
  78     value_type = str(node.literal_value['datatype'])
  79     # chop off xml schema declaration
  80     value_type = value_type.replace(str(xsdNS[''].uri),'')
  81     literal = node.literal_value['string']
  82     literal_lower = literal.lower()
  83
  84     if value_type == 'boolean':
  85         if literal_lower in ('1', 'yes', 'true'):
  86             return True
  87         elif literal_lower in ('0', 'no', 'false'):
  88             return False
  89         else:
  90             raise ValueError("Unrecognized boolean %s" % (literal,))
  91     elif value_type == 'decimal' and literal.find('.') == -1:
  92         return int(literal)
  93     elif value_type in ('decimal', 'float', 'double'):
  94         return float(literal)
  95     elif value_type in ('string'):
  96         return literal
  97     elif value_type in ('dateTime'):
  98         raise NotImplemented('need to parse isoformat date-time')
  99
 100     return literal
 101
 102
 103 def get_model(model_name=None, directory=None):
 104     if directory is None:
 105         directory = os.getcwd()
 106
 107     if model_name is None:
 108         storage = RDF.MemoryStorage()
 109         logger.info("Using RDF Memory model")
 110     else:
 111         options = "hash-type='bdb',dir='{0}'".format(directory)
 112         storage = RDF.HashStorage(model_name,
 113                       options=options)
 114         logger.info("Using {0} with options {1}".format(model_name, options))
 115     model = RDF.Model(storage)
 116     return model
 117
 118
 119 def load_into_model(model, parser_name, filename, ns=None):
 120     if not os.path.exists(filename):
 121         raise IOError("Can't find {0}".format(filename))
 122
 123     data = open(filename, 'r').read()
 124     load_string_into_model(model, parser_name, data, ns)
 125
 126
 127 def load_string_into_model(model, parser_name, data, ns=None):
 128     if ns is None:
 129         ns = "http://localhost/"
 130
 131     rdf_parser = RDF.Parser(name=parser_name)
 132     rdf_parser.parse_string_into_model(model, data, ns)
 133
 134
 135 def get_serializer(name='turtle'):
 136     """Return a serializer with our standard prefixes loaded
 137     """
 138     writer = RDF.Serializer(name=name)
 139     # really standard stuff
 140     writer.set_namespace('owl', owlNS._prefix)
 141     writer.set_namespace('rdf', rdfNS._prefix)
 142     writer.set_namespace('rdfs', rdfsNS._prefix)
 143     writer.set_namespace('xsd', xsdNS._prefix)
 144
 145     # should these be here, kind of specific to an application
 146     writer.set_namespace('libraryOntology', libraryOntology._prefix)
 147     writer.set_namespace('ucscSubmission', submissionOntology._prefix)
 148     writer.set_namespace('ucscDaf', dafTermOntology._prefix)
 149     return writer
 150
 151 def dump_model(model):
 152     serializer = get_serializer()
 153     print serializer.serialize_model_to_string(model)