From 3e22181371bb36172a9af0516ae90dfeda33bca4 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 20 Mar 2015 14:19:05 -0700 Subject: [PATCH] special case unicode handling that differs between python2 & 3 --- htsworkflow/util/rdfhelp.py | 15 ++++++++++++--- htsworkflow/util/rdfjsonld.py | 7 ++++++- 2 files changed, 18 insertions(+), 4 deletions(-) diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index da4b02a..30d9178 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -110,12 +110,15 @@ def toTypedNode(value, language="en"): value = value.strftime(ISOFORMAT_MS) else: value_type = None - value = unicode(value) + if six.PY3: + value = str(value) + else: + value = unicode(value).encode('utf-8') if value_type is not None: node = RDF.Node(literal=value, datatype=value_type) else: - node = RDF.Node(literal=unicode(value).encode('utf-8'), language=language) + node = RDF.Node(literal=value, language=language) return node @@ -335,6 +338,9 @@ def add_default_schemas(model, schema_path=None): schemas = resource_listdir(__name__, 'schemas') for s in schemas: schema = resource_string(__name__, 'schemas/' + s) + if six.PY3: + # files must be encoded utf-8 + schema = schema.decode('utf-8') namespace = 'file://localhost/htsworkflow/schemas/'+s add_schema(model, schema, namespace) @@ -381,7 +387,10 @@ def sanitize_literal(node): element = lxml.html.fromstring(s) cleaner = lxml.html.clean.Cleaner(page_structure=False) element = cleaner.clean_html(element) - text = lxml.html.tostring(element) + if six.PY3: + text = lxml.html.tostring(element, encoding=str) + else: + text = lxml.html.tostring(element) p_len = 3 slash_p_len = 4 diff --git a/htsworkflow/util/rdfjsonld.py b/htsworkflow/util/rdfjsonld.py index 45046a5..e81319f 100644 --- a/htsworkflow/util/rdfjsonld.py +++ b/htsworkflow/util/rdfjsonld.py @@ -1,5 +1,6 @@ import RDF from pyld import jsonld +import six def load_into_model(model, json_data): '''Given a PyLD dictionary, load its statements into our Redland model @@ -29,5 +30,9 @@ def to_node(item): elif nodetype == 'IRI': return RDF.Node(uri_string=str(value)) else: - return RDF.Node(literal=unicode(value).encode('utf-8'), + if six.PY2: + literal = unicode(value).encode('utf-8') + else: + literal = value + return RDF.Node(literal=literal, datatype=RDF.Uri(datatype)) -- 2.30.2