special case unicode handling that differs between python2 & 3
authorDiane Trout <diane@ghic.org>
Fri, 20 Mar 2015 21:19:05 +0000 (14:19 -0700)
committerDiane Trout <diane@ghic.org>
Fri, 20 Mar 2015 21:19:05 +0000 (14:19 -0700)
htsworkflow/util/rdfhelp.py
htsworkflow/util/rdfjsonld.py

index da4b02a1a4af7bd2364f2b2cd024ca2f554182b2..30d91788c47565174ef12a6133a5093c350409e8 100644 (file)
@@ -110,12 +110,15 @@ def toTypedNode(value, language="en"):
             value = value.strftime(ISOFORMAT_MS)
     else:
         value_type = None
-        value = unicode(value)
+        if six.PY3:
+            value = str(value)
+        else:
+            value = unicode(value).encode('utf-8')
 
     if value_type is not None:
         node = RDF.Node(literal=value, datatype=value_type)
     else:
-        node = RDF.Node(literal=unicode(value).encode('utf-8'), language=language)
+        node = RDF.Node(literal=value, language=language)
     return node
 
 
@@ -335,6 +338,9 @@ def add_default_schemas(model, schema_path=None):
     schemas = resource_listdir(__name__, 'schemas')
     for s in schemas:
         schema = resource_string(__name__,  'schemas/' + s)
+        if six.PY3:
+            # files must be encoded utf-8
+            schema = schema.decode('utf-8')
         namespace = 'file://localhost/htsworkflow/schemas/'+s
         add_schema(model, schema, namespace)
 
@@ -381,7 +387,10 @@ def sanitize_literal(node):
         element = lxml.html.fromstring(s)
         cleaner = lxml.html.clean.Cleaner(page_structure=False)
         element = cleaner.clean_html(element)
-        text = lxml.html.tostring(element)
+        if six.PY3:
+            text = lxml.html.tostring(element, encoding=str)
+        else:
+            text = lxml.html.tostring(element)
         p_len = 3
         slash_p_len = 4
 
index 45046a58c069776e26ca07984b1052ad43f9647a..e81319f3ab5c6fba0267fd69d9d595a2a3f9b6e6 100644 (file)
@@ -1,5 +1,6 @@
 import RDF
 from pyld import jsonld
+import six
 
 def load_into_model(model, json_data):
     '''Given a PyLD dictionary, load its statements into our Redland model
@@ -29,5 +30,9 @@ def to_node(item):
     elif nodetype == 'IRI':
         return RDF.Node(uri_string=str(value))
     else:
-        return RDF.Node(literal=unicode(value).encode('utf-8'),
+        if six.PY2:
+            literal = unicode(value).encode('utf-8')
+        else:
+            literal = value
+        return RDF.Node(literal=literal,
                         datatype=RDF.Uri(datatype))