From: Diane Trout <diane@ghic.org>
Date: Fri, 20 Mar 2015 20:42:12 +0000 (-0700)
Subject: prefer native string types.
X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=712b01e7483095811e0f8a58ac34a42287276805

prefer native string types.

Things get really messy when trying to pass unicode strings
to redland in python2. I found it simpler to just try and keep
using the native string type.

So this patch is largely replacing unicode() to str() and
changing u'' to ''.
---

diff --git a/experiments/test_experiments.py b/experiments/test_experiments.py
index 50246b2..f8e51b2 100644
--- a/experiments/test_experiments.py
+++ b/experiments/test_experiments.py
@@ -109,7 +109,7 @@ class ExperimentsTestCases(TestCase):
         fc42jtn = self.fc42jtn
         fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX')
 
-        for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
+        for fc_id in ['FC12150', '42JTNAAXX', '42JU1AAXX']:
             fc_dict = flowcell_information(fc_id)
             fc_django = FlowCell.objects.get(flowcell_id=fc_id)
             self.assertEqual(fc_dict['flowcell_id'], fc_id)
@@ -143,7 +143,7 @@ class ExperimentsTestCases(TestCase):
 
 
             for lane in fc_django.lane_set.all():
-                lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
+                lane_contents = fc_json['lane_set'][str(lane.lane_number)]
                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
 
                 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
@@ -167,7 +167,7 @@ class ExperimentsTestCases(TestCase):
         """
         Require logging in to retrieve meta data
         """
-        response = self.client.get(u'/experiments/config/FC12150/json')
+        response = self.client.get('/experiments/config/FC12150/json')
         self.assertEqual(response.status_code, 403)
 
     def test_library_id(self):
@@ -201,7 +201,7 @@ class ExperimentsTestCases(TestCase):
         This tests to make sure that the value entered in the raw library id field matches
         the library id looked up.
         """
-        expected_ids = [ u'1215{}'.format(i) for i in range(1,9) ]
+        expected_ids = [ '1215{}'.format(i) for i in range(1,9) ]
         self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
         response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id))
 
@@ -410,7 +410,7 @@ class ExperimentsTestCases(TestCase):
         count = 0
         for r in query.execute(model):
             count += 1
-            self.assertEqual(fromTypedNode(r['flowcell_id']), u'FC12150')
+            self.assertEqual(fromTypedNode(r['flowcell_id']), 'FC12150')
             lane_id = fromTypedNode(r['lane_id'])
             library_id = fromTypedNode(r['library_id'])
             self.assertTrue(library_id in expected[lane_id])
@@ -504,7 +504,7 @@ class TestSequencer(TestCase):
         seq.instrument_name = "HWI-SEQ1"
         seq.model = "Imaginary 5000"
 
-        self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
+        self.assertEqual(str(seq), "Seq1 (HWI-SEQ1)")
 
     def test_lookup(self):
         fc = self.fc12150
diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py
index f1b0fda..b9a7ca3 100644
--- a/htsworkflow/pipelines/eland.py
+++ b/htsworkflow/pipelines/eland.py
@@ -378,7 +378,7 @@ class ElandLane(ResultLane):
     def get_elements(self):
         lane = ElementTree.Element(ElandLane.LANE,
                                    {'version':
-                                    unicode(ElandLane.XML_VERSION)})
+                                    str(ElandLane.XML_VERSION)})
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
@@ -390,19 +390,19 @@ class ElandLane(ResultLane):
         for k, v in self.genome_map.items():
             item = ElementTree.SubElement(
                 genome_map, GENOME_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         mapped_reads = ElementTree.SubElement(lane, MAPPED_READS)
         for k, v in self.mapped_reads.items():
             item = ElementTree.SubElement(
                 mapped_reads, MAPPED_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         match_codes = ElementTree.SubElement(lane, MATCH_CODES)
         for k, v in self.match_codes.items():
             item = ElementTree.SubElement(
                 match_codes, MATCH_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         reads = ElementTree.SubElement(lane, READS)
-        reads.text = unicode(self.reads)
+        reads.text = str(self.reads)
 
         return lane
 
@@ -591,7 +591,7 @@ class SequenceLane(ResultLane):
     def get_elements(self):
         lane = ElementTree.Element(SequenceLane.LANE,
                                    {'version':
-                                    unicode(SequenceLane.XML_VERSION)})
+                                    str(SequenceLane.XML_VERSION)})
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
@@ -600,9 +600,9 @@ class SequenceLane(ResultLane):
             end_tag = ElementTree.SubElement(lane, END)
             end_tag.text = str(self.end)
         reads = ElementTree.SubElement(lane, READS)
-        reads.text = unicode(self.reads)
+        reads.text = str(self.reads)
         sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE)
-        sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
+        sequence_type.text = str(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
 
         return lane
 
@@ -676,13 +676,13 @@ class ELAND(collections.MutableMapping):
 
     def get_elements(self):
         root = ElementTree.Element(ELAND.ELAND,
-                                   {'version': unicode(ELAND.XML_VERSION)})
+                                   {'version': str(ELAND.XML_VERSION)})
 
         for key in self:
             eland_lane = self[key].get_elements()
-            eland_lane.attrib[ELAND.END] = unicode(self[key].end-1)
-            eland_lane.attrib[ELAND.LANE_ID] = unicode(self[key].lane_id)
-            eland_lane.attrib[ELAND.SAMPLE] = unicode(self[key].sample_name)
+            eland_lane.attrib[ELAND.END] = str(self[key].end-1)
+            eland_lane.attrib[ELAND.LANE_ID] = str(self[key].lane_id)
+            eland_lane.attrib[ELAND.SAMPLE] = str(self[key].sample_name)
             root.append(eland_lane)
         return root
         return root
diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py
index cc6f8ba..17e65cf 100644
--- a/htsworkflow/pipelines/sequences.py
+++ b/htsworkflow/pipelines/sequences.py
@@ -83,8 +83,8 @@ class SequenceFile(object):
     def key(self):
         return (self.flowcell, self.lane, self.read, self.project, self.split)
 
-    def __unicode__(self):
-        return unicode(self.path)
+    def __str__(self):
+        return str(self.path)
 
     def __eq__(self, other):
         """
@@ -162,7 +162,7 @@ class SequenceFile(object):
         def add(model, s, p, o):
             model.add_statement(RDF.Statement(s,p,o))
         # a bit unreliable... assumes filesystem is encoded in utf-8
-        path = os.path.abspath(self.path.encode('utf-8'))
+        path = os.path.abspath(self.path)
         fileNode = RDF.Node(RDF.Uri('file://' + path))
         add(model, fileNode, rdfNS['type'], libNS['IlluminaResult'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
diff --git a/htsworkflow/pipelines/summary.py b/htsworkflow/pipelines/summary.py
index d7081ec..c1108a2 100644
--- a/htsworkflow/pipelines/summary.py
+++ b/htsworkflow/pipelines/summary.py
@@ -444,8 +444,8 @@ def make_mean_range_element(parent, name, mean, deviation):
     Make an etree subelement <Name mean='mean', deviation='deviation'/>
     """
     element = etree.SubElement(parent, name,
-                                     { 'mean': unicode(mean),
-                                       'deviation': unicode(deviation)})
+                                     { 'mean': str(mean),
+                                       'deviation': str(deviation)})
     return element
 
 def parse_mean_range_element(element):
diff --git a/htsworkflow/pipelines/test/test_sequences.py b/htsworkflow/pipelines/test/test_sequences.py
index cd2b852..6502c64 100644
--- a/htsworkflow/pipelines/test/test_sequences.py
+++ b/htsworkflow/pipelines/test/test_sequences.py
@@ -71,7 +71,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'srf')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -96,7 +96,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -119,7 +119,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
         self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
@@ -142,7 +142,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -164,7 +164,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -188,7 +188,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
@@ -212,7 +212,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
diff --git a/htsworkflow/util/ethelp.py b/htsworkflow/util/ethelp.py
index f2f3e01..beaeb38 100644
--- a/htsworkflow/util/ethelp.py
+++ b/htsworkflow/util/ethelp.py
@@ -45,7 +45,7 @@ def validate_xhtml(html, base_url='http://localhost'):
     and False if it fails.
     """
     try:
-        XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN')
+        XHTML_RDF_DTD = lxml.etree.DTD(external_id=b'-//W3C//DTD XHTML+RDFa 1.0//EN')
     except lxml.etree.DTDParseError as e:
         LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),))
         return
diff --git a/inventory/test_inventory.py b/inventory/test_inventory.py
index 43d0911..9967d44 100644
--- a/inventory/test_inventory.py
+++ b/inventory/test_inventory.py
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, print_function, unicode_literals
+from __future__ import absolute_import, print_function
 
 import RDF
 
@@ -42,7 +42,7 @@ class InventoryTestCase(TestCase):
 
         itemNode = RDF.Node(RDF.Uri(url))
         item_type = fromTypedNode(
-            model.get_target(itemNode, inventoryOntology[b'item_type']))
+            model.get_target(itemNode, inventoryOntology['item_type']))
         self.failUnlessEqual(item_type, item.item_type.name)
 
     def test_itemindex(self):
diff --git a/samples/test_samples.py b/samples/test_samples.py
index df98418..1c53f3c 100644
--- a/samples/test_samples.py
+++ b/samples/test_samples.py
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, print_function, unicode_literals
+from __future__ import absolute_import, print_function
 
 import datetime
 import unittest
@@ -272,9 +272,9 @@ try:
     import RDF
     HAVE_RDF = True
 
-    rdfNS = RDF.NS(b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
-    xsdNS = RDF.NS(b"http://www.w3.org/2001/XMLSchema#")
-    libNS = RDF.NS(b"http://jumpgate.caltech.edu/wiki/LibraryOntology#")
+    rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+    xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
+    libNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
 
     from htsworkflow.util.rdfhelp import dump_model
 except ImportError as e:
@@ -289,7 +289,7 @@ class TestRDFaLibrary(TestCase):
     def test_parse_rdfa(self):
 
         model = get_rdf_memory_model()
-        parser = RDF.Parser(name=b'rdfa')
+        parser = RDF.Parser(name='rdfa')
 
         bob = AffiliationFactory.create(name='Bob')
 
@@ -311,20 +311,20 @@ class TestRDFaLibrary(TestCase):
         #with open('/tmp/test.ttl', 'w') as outstream:
         #    dump_model(model, outstream)
         # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation>
-        self.check_literal_object(model, ['Bob'], p=libNS[b'affiliation'])
+        self.check_literal_object(model, ['Bob'], p=libNS['affiliation'])
         self.check_literal_object(model,
                                   ['experiment type name'],
-                                  p=libNS[b'experiment_type'])
-        self.check_literal_object(model, ['400'], p=libNS[b'gel_cut'])
+                                  p=libNS['experiment_type'])
+        self.check_literal_object(model, ['400'], p=libNS['gel_cut'])
         self.check_literal_object(model,
                                   ['microfluidics bot 7321'],
-                                  p=libNS[b'made_by'])
+                                  p=libNS['made_by'])
         self.check_literal_object(model,
                                   [lib_object.library_name],
-                                  p=libNS[b'name'])
+                                  p=libNS['name'])
         self.check_literal_object(model,
                                   [lib_object.library_species.scientific_name],
-                                  p=libNS[b'species_name'])
+                                  p=libNS['species_name'])
 
 
     def check_literal_object(self, model, values, s=None, p=None, o=None):