prefer native string types.
authorDiane Trout <diane@ghic.org>
Fri, 20 Mar 2015 20:42:12 +0000 (13:42 -0700)
committerDiane Trout <diane@ghic.org>
Fri, 20 Mar 2015 20:42:12 +0000 (13:42 -0700)
Things get really messy when trying to pass unicode strings
to redland in python2. I found it simpler to just try and keep
using the native string type.

So this patch is largely replacing unicode() to str() and
changing u'' to ''.

experiments/test_experiments.py
htsworkflow/pipelines/eland.py
htsworkflow/pipelines/sequences.py
htsworkflow/pipelines/summary.py
htsworkflow/pipelines/test/test_sequences.py
htsworkflow/util/ethelp.py
inventory/test_inventory.py
samples/test_samples.py

index 50246b2227158ed44ec081757201d52cb62254d3..f8e51b2a53f50baaaaa89796b592014156fdeae7 100644 (file)
@@ -109,7 +109,7 @@ class ExperimentsTestCases(TestCase):
         fc42jtn = self.fc42jtn
         fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX')
 
         fc42jtn = self.fc42jtn
         fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX')
 
-        for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
+        for fc_id in ['FC12150', '42JTNAAXX', '42JU1AAXX']:
             fc_dict = flowcell_information(fc_id)
             fc_django = FlowCell.objects.get(flowcell_id=fc_id)
             self.assertEqual(fc_dict['flowcell_id'], fc_id)
             fc_dict = flowcell_information(fc_id)
             fc_django = FlowCell.objects.get(flowcell_id=fc_id)
             self.assertEqual(fc_dict['flowcell_id'], fc_id)
@@ -143,7 +143,7 @@ class ExperimentsTestCases(TestCase):
 
 
             for lane in fc_django.lane_set.all():
 
 
             for lane in fc_django.lane_set.all():
-                lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
+                lane_contents = fc_json['lane_set'][str(lane.lane_number)]
                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
 
                 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
 
                 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
@@ -167,7 +167,7 @@ class ExperimentsTestCases(TestCase):
         """
         Require logging in to retrieve meta data
         """
         """
         Require logging in to retrieve meta data
         """
-        response = self.client.get(u'/experiments/config/FC12150/json')
+        response = self.client.get('/experiments/config/FC12150/json')
         self.assertEqual(response.status_code, 403)
 
     def test_library_id(self):
         self.assertEqual(response.status_code, 403)
 
     def test_library_id(self):
@@ -201,7 +201,7 @@ class ExperimentsTestCases(TestCase):
         This tests to make sure that the value entered in the raw library id field matches
         the library id looked up.
         """
         This tests to make sure that the value entered in the raw library id field matches
         the library id looked up.
         """
-        expected_ids = [ u'1215{}'.format(i) for i in range(1,9) ]
+        expected_ids = [ '1215{}'.format(i) for i in range(1,9) ]
         self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
         response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id))
 
         self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
         response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id))
 
@@ -410,7 +410,7 @@ class ExperimentsTestCases(TestCase):
         count = 0
         for r in query.execute(model):
             count += 1
         count = 0
         for r in query.execute(model):
             count += 1
-            self.assertEqual(fromTypedNode(r['flowcell_id']), u'FC12150')
+            self.assertEqual(fromTypedNode(r['flowcell_id']), 'FC12150')
             lane_id = fromTypedNode(r['lane_id'])
             library_id = fromTypedNode(r['library_id'])
             self.assertTrue(library_id in expected[lane_id])
             lane_id = fromTypedNode(r['lane_id'])
             library_id = fromTypedNode(r['library_id'])
             self.assertTrue(library_id in expected[lane_id])
@@ -504,7 +504,7 @@ class TestSequencer(TestCase):
         seq.instrument_name = "HWI-SEQ1"
         seq.model = "Imaginary 5000"
 
         seq.instrument_name = "HWI-SEQ1"
         seq.model = "Imaginary 5000"
 
-        self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
+        self.assertEqual(str(seq), "Seq1 (HWI-SEQ1)")
 
     def test_lookup(self):
         fc = self.fc12150
 
     def test_lookup(self):
         fc = self.fc12150
index f1b0fdaa7b1d748a0b877976414836e3671167ba..b9a7ca3805a182b5bb827a1f459f267454ff041c 100644 (file)
@@ -378,7 +378,7 @@ class ElandLane(ResultLane):
     def get_elements(self):
         lane = ElementTree.Element(ElandLane.LANE,
                                    {'version':
     def get_elements(self):
         lane = ElementTree.Element(ElandLane.LANE,
                                    {'version':
-                                    unicode(ElandLane.XML_VERSION)})
+                                    str(ElandLane.XML_VERSION)})
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
@@ -390,19 +390,19 @@ class ElandLane(ResultLane):
         for k, v in self.genome_map.items():
             item = ElementTree.SubElement(
                 genome_map, GENOME_ITEM,
         for k, v in self.genome_map.items():
             item = ElementTree.SubElement(
                 genome_map, GENOME_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         mapped_reads = ElementTree.SubElement(lane, MAPPED_READS)
         for k, v in self.mapped_reads.items():
             item = ElementTree.SubElement(
                 mapped_reads, MAPPED_ITEM,
         mapped_reads = ElementTree.SubElement(lane, MAPPED_READS)
         for k, v in self.mapped_reads.items():
             item = ElementTree.SubElement(
                 mapped_reads, MAPPED_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         match_codes = ElementTree.SubElement(lane, MATCH_CODES)
         for k, v in self.match_codes.items():
             item = ElementTree.SubElement(
                 match_codes, MATCH_ITEM,
         match_codes = ElementTree.SubElement(lane, MATCH_CODES)
         for k, v in self.match_codes.items():
             item = ElementTree.SubElement(
                 match_codes, MATCH_ITEM,
-                {'name':k, 'value':unicode(v)})
+                {'name':k, 'value':str(v)})
         reads = ElementTree.SubElement(lane, READS)
         reads = ElementTree.SubElement(lane, READS)
-        reads.text = unicode(self.reads)
+        reads.text = str(self.reads)
 
         return lane
 
 
         return lane
 
@@ -591,7 +591,7 @@ class SequenceLane(ResultLane):
     def get_elements(self):
         lane = ElementTree.Element(SequenceLane.LANE,
                                    {'version':
     def get_elements(self):
         lane = ElementTree.Element(SequenceLane.LANE,
                                    {'version':
-                                    unicode(SequenceLane.XML_VERSION)})
+                                    str(SequenceLane.XML_VERSION)})
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
         sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME)
         sample_tag.text = self.sample_name
         lane_tag = ElementTree.SubElement(lane, LANE_ID)
@@ -600,9 +600,9 @@ class SequenceLane(ResultLane):
             end_tag = ElementTree.SubElement(lane, END)
             end_tag.text = str(self.end)
         reads = ElementTree.SubElement(lane, READS)
             end_tag = ElementTree.SubElement(lane, END)
             end_tag.text = str(self.end)
         reads = ElementTree.SubElement(lane, READS)
-        reads.text = unicode(self.reads)
+        reads.text = str(self.reads)
         sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE)
         sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE)
-        sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
+        sequence_type.text = str(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type])
 
         return lane
 
 
         return lane
 
@@ -676,13 +676,13 @@ class ELAND(collections.MutableMapping):
 
     def get_elements(self):
         root = ElementTree.Element(ELAND.ELAND,
 
     def get_elements(self):
         root = ElementTree.Element(ELAND.ELAND,
-                                   {'version': unicode(ELAND.XML_VERSION)})
+                                   {'version': str(ELAND.XML_VERSION)})
 
         for key in self:
             eland_lane = self[key].get_elements()
 
         for key in self:
             eland_lane = self[key].get_elements()
-            eland_lane.attrib[ELAND.END] = unicode(self[key].end-1)
-            eland_lane.attrib[ELAND.LANE_ID] = unicode(self[key].lane_id)
-            eland_lane.attrib[ELAND.SAMPLE] = unicode(self[key].sample_name)
+            eland_lane.attrib[ELAND.END] = str(self[key].end-1)
+            eland_lane.attrib[ELAND.LANE_ID] = str(self[key].lane_id)
+            eland_lane.attrib[ELAND.SAMPLE] = str(self[key].sample_name)
             root.append(eland_lane)
         return root
         return root
             root.append(eland_lane)
         return root
         return root
index cc6f8ba389e235a8bc4aa03465372aea77112b86..17e65cf91d6ce9f848ca18947ee2ed279e8b3c8c 100644 (file)
@@ -83,8 +83,8 @@ class SequenceFile(object):
     def key(self):
         return (self.flowcell, self.lane, self.read, self.project, self.split)
 
     def key(self):
         return (self.flowcell, self.lane, self.read, self.project, self.split)
 
-    def __unicode__(self):
-        return unicode(self.path)
+    def __str__(self):
+        return str(self.path)
 
     def __eq__(self, other):
         """
 
     def __eq__(self, other):
         """
@@ -162,7 +162,7 @@ class SequenceFile(object):
         def add(model, s, p, o):
             model.add_statement(RDF.Statement(s,p,o))
         # a bit unreliable... assumes filesystem is encoded in utf-8
         def add(model, s, p, o):
             model.add_statement(RDF.Statement(s,p,o))
         # a bit unreliable... assumes filesystem is encoded in utf-8
-        path = os.path.abspath(self.path.encode('utf-8'))
+        path = os.path.abspath(self.path)
         fileNode = RDF.Node(RDF.Uri('file://' + path))
         add(model, fileNode, rdfNS['type'], libNS['IlluminaResult'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
         fileNode = RDF.Node(RDF.Uri('file://' + path))
         add(model, fileNode, rdfNS['type'], libNS['IlluminaResult'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
index d7081ecce3d1d039ef62459fc773b6bff0823f61..c1108a2e83abf25661bcf779f44bd87e3c7cca74 100644 (file)
@@ -444,8 +444,8 @@ def make_mean_range_element(parent, name, mean, deviation):
     Make an etree subelement <Name mean='mean', deviation='deviation'/>
     """
     element = etree.SubElement(parent, name,
     Make an etree subelement <Name mean='mean', deviation='deviation'/>
     """
     element = etree.SubElement(parent, name,
-                                     { 'mean': unicode(mean),
-                                       'deviation': unicode(deviation)})
+                                     { 'mean': str(mean),
+                                       'deviation': str(deviation)})
     return element
 
 def parse_mean_range_element(element):
     return element
 
 def parse_mean_range_element(element):
index cd2b852017e97fa48292cdd0ad5b3bbfebf29e5f..6502c64901471753735b777fa292fd33652a4346 100644 (file)
@@ -71,7 +71,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'srf')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'srf')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -96,7 +96,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -119,7 +119,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'qseq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
         self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
         self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
@@ -142,7 +142,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -164,7 +164,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '4')
@@ -188,7 +188,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
@@ -212,7 +212,7 @@ class SequenceFileTests(TestCase):
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
 
         self.assertEqual(f0.filetype, 'split_fastq')
         self.assertEqual(f0.path, pathname)
-        self.assertEqual(unicode(f0), unicode(pathname))
+        self.assertEqual(str(f0), str(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(f0.lane, '1')
index f2f3e019b20782146c0855f32a4e41e02468336e..beaeb382c0f8112fcb816b54c8f04608ea5f7884 100644 (file)
@@ -45,7 +45,7 @@ def validate_xhtml(html, base_url='http://localhost'):
     and False if it fails.
     """
     try:
     and False if it fails.
     """
     try:
-        XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN')
+        XHTML_RDF_DTD = lxml.etree.DTD(external_id=b'-//W3C//DTD XHTML+RDFa 1.0//EN')
     except lxml.etree.DTDParseError as e:
         LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),))
         return
     except lxml.etree.DTDParseError as e:
         LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),))
         return
index 43d091101fd016b8ab1fbbf39936f3b97d59336a..9967d4411df4de916cf51b5763396daa636a9f79 100644 (file)
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, print_function, unicode_literals
+from __future__ import absolute_import, print_function
 
 import RDF
 
 
 import RDF
 
@@ -42,7 +42,7 @@ class InventoryTestCase(TestCase):
 
         itemNode = RDF.Node(RDF.Uri(url))
         item_type = fromTypedNode(
 
         itemNode = RDF.Node(RDF.Uri(url))
         item_type = fromTypedNode(
-            model.get_target(itemNode, inventoryOntology[b'item_type']))
+            model.get_target(itemNode, inventoryOntology['item_type']))
         self.failUnlessEqual(item_type, item.item_type.name)
 
     def test_itemindex(self):
         self.failUnlessEqual(item_type, item.item_type.name)
 
     def test_itemindex(self):
index df98418aed4afad2e5fb18797d7417a4ae75a96a..1c53f3ccdc3dcb9f51a9388028f396db0a42057d 100644 (file)
@@ -1,4 +1,4 @@
-from __future__ import absolute_import, print_function, unicode_literals
+from __future__ import absolute_import, print_function
 
 import datetime
 import unittest
 
 import datetime
 import unittest
@@ -272,9 +272,9 @@ try:
     import RDF
     HAVE_RDF = True
 
     import RDF
     HAVE_RDF = True
 
-    rdfNS = RDF.NS(b"http://www.w3.org/1999/02/22-rdf-syntax-ns#")
-    xsdNS = RDF.NS(b"http://www.w3.org/2001/XMLSchema#")
-    libNS = RDF.NS(b"http://jumpgate.caltech.edu/wiki/LibraryOntology#")
+    rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
+    xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
+    libNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
 
     from htsworkflow.util.rdfhelp import dump_model
 except ImportError as e:
 
     from htsworkflow.util.rdfhelp import dump_model
 except ImportError as e:
@@ -289,7 +289,7 @@ class TestRDFaLibrary(TestCase):
     def test_parse_rdfa(self):
 
         model = get_rdf_memory_model()
     def test_parse_rdfa(self):
 
         model = get_rdf_memory_model()
-        parser = RDF.Parser(name=b'rdfa')
+        parser = RDF.Parser(name='rdfa')
 
         bob = AffiliationFactory.create(name='Bob')
 
 
         bob = AffiliationFactory.create(name='Bob')
 
@@ -311,20 +311,20 @@ class TestRDFaLibrary(TestCase):
         #with open('/tmp/test.ttl', 'w') as outstream:
         #    dump_model(model, outstream)
         # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation>
         #with open('/tmp/test.ttl', 'w') as outstream:
         #    dump_model(model, outstream)
         # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation>
-        self.check_literal_object(model, ['Bob'], p=libNS[b'affiliation'])
+        self.check_literal_object(model, ['Bob'], p=libNS['affiliation'])
         self.check_literal_object(model,
                                   ['experiment type name'],
         self.check_literal_object(model,
                                   ['experiment type name'],
-                                  p=libNS[b'experiment_type'])
-        self.check_literal_object(model, ['400'], p=libNS[b'gel_cut'])
+                                  p=libNS['experiment_type'])
+        self.check_literal_object(model, ['400'], p=libNS['gel_cut'])
         self.check_literal_object(model,
                                   ['microfluidics bot 7321'],
         self.check_literal_object(model,
                                   ['microfluidics bot 7321'],
-                                  p=libNS[b'made_by'])
+                                  p=libNS['made_by'])
         self.check_literal_object(model,
                                   [lib_object.library_name],
         self.check_literal_object(model,
                                   [lib_object.library_name],
-                                  p=libNS[b'name'])
+                                  p=libNS['name'])
         self.check_literal_object(model,
                                   [lib_object.library_species.scientific_name],
         self.check_literal_object(model,
                                   [lib_object.library_species.scientific_name],
-                                  p=libNS[b'species_name'])
+                                  p=libNS['species_name'])
 
 
     def check_literal_object(self, model, values, s=None, p=None, o=None):
 
 
     def check_literal_object(self, model, values, s=None, p=None, o=None):