From b3217537f3f6e054695d39b5870591c8e267636b Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 11 Sep 2012 16:01:46 -0700 Subject: [PATCH] Better parser guessing when grabbing plain text from a webserver. Also fix a typo in an error message --- htsworkflow/util/rdfhelp.py | 9 +++++---- htsworkflow/util/test/test_rdfhelp.py | 8 ++++++-- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index 7f8902a..9f4a107 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -277,6 +277,7 @@ def load_into_model(model, parser_name, path, ns=None): rdf_parser = RDF.Parser(name=parser_name) + statements = [] retries = 3 while retries > 0: try: @@ -285,7 +286,7 @@ def load_into_model(model, parser_name, path, ns=None): retries = 0 except RDF.RedlandError, e: errmsg = "RDF.RedlandError: {0} {1} tries remaining" - logger.error(errmsg.format(str(e), tries)) + logger.error(errmsg.format(str(e), retries)) for s in statements: conditionally_add_statement(model, s, ns) @@ -355,16 +356,16 @@ def guess_parser(content_type, pathname): return 'turtle' elif content_type in ('text/html',): return 'rdfa' - elif content_type is None: + elif content_type is None or content_type in ('text/plain',): return guess_parser_by_extension(pathname) def guess_parser_by_extension(pathname): _, ext = os.path.splitext(pathname) if ext in ('.xml', '.rdf'): return 'rdfxml' - elif ext in ('.html'): + elif ext in ('.html',): return 'rdfa' - elif ext in ('.turtle'): + elif ext in ('.turtle',): return 'turtle' return 'guess' diff --git a/htsworkflow/util/test/test_rdfhelp.py b/htsworkflow/util/test/test_rdfhelp.py index d331416..61cc1dc 100644 --- a/htsworkflow/util/test/test_rdfhelp.py +++ b/htsworkflow/util/test/test_rdfhelp.py @@ -202,7 +202,8 @@ _:a owl:imports "{loc}extra.turtle" . ('/a/b/c.rdf', 'rdfxml'), ('/a/b/c.xml', 'rdfxml'), ('/a/b/c.html', 'rdfa'), - ('/a/b/c.turtle', 'turtle')] + ('/a/b/c.turtle', 'turtle'), + ('http://foo.bar/bleem.turtle', 'turtle')] for path, parser in DATA: self.assertEqual(guess_parser_by_extension(path), parser) self.assertEqual(guess_parser(None, path), parser) @@ -210,7 +211,10 @@ _:a owl:imports "{loc}extra.turtle" . DATA = [ ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'), ('application/x-turtle', 'http://a.org/b/c', 'turtle'), - ('text/html', 'http://a.org/b/c', 'rdfa') + ('text/html', 'http://a.org/b/c', 'rdfa'), + ('text/html', 'http://a.org/b/c.html', 'rdfa'), + ('text/plain', 'http://a.org/b/c.turtle', 'turtle'), + ('text/plain', 'http://a.org/b/c', 'guess') ] for contenttype, url, parser in DATA: self.assertEqual(guess_parser(contenttype, url), parser) -- 2.30.2