From f229244c46cc20cad0f791939b0587429e56d81c Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 28 Aug 2012 15:56:22 -0700 Subject: [PATCH] sanitize_literal had trouble with empty strings. This fixes that and tests that edge case. --- htsworkflow/util/rdfhelp.py | 20 ++++++++++++-------- htsworkflow/util/test/test_rdfhelp.py | 5 +++++ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index 3ca27fb..29df21e 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -267,7 +267,7 @@ def load_into_model(model, parser_name, path, ns=None): def load_string_into_model(model, parser_name, data, ns=None): if ns is None: - ns = RDF.NS("http://localhost/") + ns = RDF.Uri("http://localhost/") imports = owlNS['imports'] rdf_parser = RDF.Parser(name=parser_name) for s in rdf_parser.parse_string_as_stream(data, ns): @@ -288,14 +288,18 @@ def sanitize_literal(node): if not isinstance(node, RDF.Node): raise ValueError("sanitize_literal only works on RDF.Nodes") - element = lxml.html.fromstring(node.literal_value['string']) - cleaner = lxml.html.clean.Cleaner(page_structure=False) - element = cleaner.clean_html(element) - text = lxml.html.tostring(element) - p_len = 3 - slash_p_len = 4 + s = node.literal_value['string'] + if len(s) > 0: + element = lxml.html.fromstring(s) + cleaner = lxml.html.clean.Cleaner(page_structure=False) + element = cleaner.clean_html(element) + text = lxml.html.tostring(element) + p_len = 3 + slash_p_len = 4 - args = {'literal': text[p_len:-slash_p_len]} + args = {'literal': text[p_len:-slash_p_len]} + else: + args = {'literal': ''} datatype = node.literal_value['datatype'] if datatype is not None: args['datatype'] = datatype diff --git a/htsworkflow/util/test/test_rdfhelp.py b/htsworkflow/util/test/test_rdfhelp.py index 4fe6769..34c3200 100644 --- a/htsworkflow/util/test/test_rdfhelp.py +++ b/htsworkflow/util/test/test_rdfhelp.py @@ -150,6 +150,11 @@ _:a owl:imports "{loc}extra.turtle" . self.failUnlessEqual(hello_clean.literal_value['string'], hello_text) + def test_sanitize_literal_empty_string(self): + value = "" + value_node = RDF.Node(value) + self.assertEqual(str(sanitize_literal(value_node)), value) + def test_sanitize_literal_html(self): hello = "hello google.com, whats up?" hello_clean = 'hello google.com, whats up?' -- 2.30.2