From: Diane Trout Date: Fri, 20 Mar 2015 20:42:12 +0000 (-0700) Subject: prefer native string types. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=712b01e7483095811e0f8a58ac34a42287276805 prefer native string types. Things get really messy when trying to pass unicode strings to redland in python2. I found it simpler to just try and keep using the native string type. So this patch is largely replacing unicode() to str() and changing u'' to ''. --- diff --git a/experiments/test_experiments.py b/experiments/test_experiments.py index 50246b2..f8e51b2 100644 --- a/experiments/test_experiments.py +++ b/experiments/test_experiments.py @@ -109,7 +109,7 @@ class ExperimentsTestCases(TestCase): fc42jtn = self.fc42jtn fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX') - for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]: + for fc_id in ['FC12150', '42JTNAAXX', '42JU1AAXX']: fc_dict = flowcell_information(fc_id) fc_django = FlowCell.objects.get(flowcell_id=fc_id) self.assertEqual(fc_dict['flowcell_id'], fc_id) @@ -143,7 +143,7 @@ class ExperimentsTestCases(TestCase): for lane in fc_django.lane_set.all(): - lane_contents = fc_json['lane_set'][unicode(lane.lane_number)] + lane_contents = fc_json['lane_set'][str(lane.lane_number)] lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id] self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate) @@ -167,7 +167,7 @@ class ExperimentsTestCases(TestCase): """ Require logging in to retrieve meta data """ - response = self.client.get(u'/experiments/config/FC12150/json') + response = self.client.get('/experiments/config/FC12150/json') self.assertEqual(response.status_code, 403) def test_library_id(self): @@ -201,7 +201,7 @@ class ExperimentsTestCases(TestCase): This tests to make sure that the value entered in the raw library id field matches the library id looked up. """ - expected_ids = [ u'1215{}'.format(i) for i in range(1,9) ] + expected_ids = [ '1215{}'.format(i) for i in range(1,9) ] self.assertTrue(self.client.login(username=self.admin.username, password=self.password)) response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id)) @@ -410,7 +410,7 @@ class ExperimentsTestCases(TestCase): count = 0 for r in query.execute(model): count += 1 - self.assertEqual(fromTypedNode(r['flowcell_id']), u'FC12150') + self.assertEqual(fromTypedNode(r['flowcell_id']), 'FC12150') lane_id = fromTypedNode(r['lane_id']) library_id = fromTypedNode(r['library_id']) self.assertTrue(library_id in expected[lane_id]) @@ -504,7 +504,7 @@ class TestSequencer(TestCase): seq.instrument_name = "HWI-SEQ1" seq.model = "Imaginary 5000" - self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)") + self.assertEqual(str(seq), "Seq1 (HWI-SEQ1)") def test_lookup(self): fc = self.fc12150 diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index f1b0fda..b9a7ca3 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -378,7 +378,7 @@ class ElandLane(ResultLane): def get_elements(self): lane = ElementTree.Element(ElandLane.LANE, {'version': - unicode(ElandLane.XML_VERSION)}) + str(ElandLane.XML_VERSION)}) sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME) sample_tag.text = self.sample_name lane_tag = ElementTree.SubElement(lane, LANE_ID) @@ -390,19 +390,19 @@ class ElandLane(ResultLane): for k, v in self.genome_map.items(): item = ElementTree.SubElement( genome_map, GENOME_ITEM, - {'name':k, 'value':unicode(v)}) + {'name':k, 'value':str(v)}) mapped_reads = ElementTree.SubElement(lane, MAPPED_READS) for k, v in self.mapped_reads.items(): item = ElementTree.SubElement( mapped_reads, MAPPED_ITEM, - {'name':k, 'value':unicode(v)}) + {'name':k, 'value':str(v)}) match_codes = ElementTree.SubElement(lane, MATCH_CODES) for k, v in self.match_codes.items(): item = ElementTree.SubElement( match_codes, MATCH_ITEM, - {'name':k, 'value':unicode(v)}) + {'name':k, 'value':str(v)}) reads = ElementTree.SubElement(lane, READS) - reads.text = unicode(self.reads) + reads.text = str(self.reads) return lane @@ -591,7 +591,7 @@ class SequenceLane(ResultLane): def get_elements(self): lane = ElementTree.Element(SequenceLane.LANE, {'version': - unicode(SequenceLane.XML_VERSION)}) + str(SequenceLane.XML_VERSION)}) sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME) sample_tag.text = self.sample_name lane_tag = ElementTree.SubElement(lane, LANE_ID) @@ -600,9 +600,9 @@ class SequenceLane(ResultLane): end_tag = ElementTree.SubElement(lane, END) end_tag.text = str(self.end) reads = ElementTree.SubElement(lane, READS) - reads.text = unicode(self.reads) + reads.text = str(self.reads) sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE) - sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type]) + sequence_type.text = str(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type]) return lane @@ -676,13 +676,13 @@ class ELAND(collections.MutableMapping): def get_elements(self): root = ElementTree.Element(ELAND.ELAND, - {'version': unicode(ELAND.XML_VERSION)}) + {'version': str(ELAND.XML_VERSION)}) for key in self: eland_lane = self[key].get_elements() - eland_lane.attrib[ELAND.END] = unicode(self[key].end-1) - eland_lane.attrib[ELAND.LANE_ID] = unicode(self[key].lane_id) - eland_lane.attrib[ELAND.SAMPLE] = unicode(self[key].sample_name) + eland_lane.attrib[ELAND.END] = str(self[key].end-1) + eland_lane.attrib[ELAND.LANE_ID] = str(self[key].lane_id) + eland_lane.attrib[ELAND.SAMPLE] = str(self[key].sample_name) root.append(eland_lane) return root return root diff --git a/htsworkflow/pipelines/sequences.py b/htsworkflow/pipelines/sequences.py index cc6f8ba..17e65cf 100644 --- a/htsworkflow/pipelines/sequences.py +++ b/htsworkflow/pipelines/sequences.py @@ -83,8 +83,8 @@ class SequenceFile(object): def key(self): return (self.flowcell, self.lane, self.read, self.project, self.split) - def __unicode__(self): - return unicode(self.path) + def __str__(self): + return str(self.path) def __eq__(self, other): """ @@ -162,7 +162,7 @@ class SequenceFile(object): def add(model, s, p, o): model.add_statement(RDF.Statement(s,p,o)) # a bit unreliable... assumes filesystem is encoded in utf-8 - path = os.path.abspath(self.path.encode('utf-8')) + path = os.path.abspath(self.path) fileNode = RDF.Node(RDF.Uri('file://' + path)) add(model, fileNode, rdfNS['type'], libNS['IlluminaResult']) add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell) diff --git a/htsworkflow/pipelines/summary.py b/htsworkflow/pipelines/summary.py index d7081ec..c1108a2 100644 --- a/htsworkflow/pipelines/summary.py +++ b/htsworkflow/pipelines/summary.py @@ -444,8 +444,8 @@ def make_mean_range_element(parent, name, mean, deviation): Make an etree subelement """ element = etree.SubElement(parent, name, - { 'mean': unicode(mean), - 'deviation': unicode(deviation)}) + { 'mean': str(mean), + 'deviation': str(deviation)}) return element def parse_mean_range_element(element): diff --git a/htsworkflow/pipelines/test/test_sequences.py b/htsworkflow/pipelines/test/test_sequences.py index cd2b852..6502c64 100644 --- a/htsworkflow/pipelines/test/test_sequences.py +++ b/htsworkflow/pipelines/test/test_sequences.py @@ -71,7 +71,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'srf') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '4') @@ -96,7 +96,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'qseq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '4') @@ -119,7 +119,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'qseq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.lane, '1') self.assertEqual(f0.read, 1) @@ -142,7 +142,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'fastq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '4') @@ -164,7 +164,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'fastq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '4') @@ -188,7 +188,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'split_fastq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" %(pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '1') @@ -212,7 +212,7 @@ class SequenceFileTests(TestCase): self.assertEqual(f0.filetype, 'split_fastq') self.assertEqual(f0.path, pathname) - self.assertEqual(unicode(f0), unicode(pathname)) + self.assertEqual(str(f0), str(pathname)) self.assertEqual(repr(f0), "" % (pathname,)) self.assertEqual(f0.flowcell, '42BW9AAXX') self.assertEqual(f0.lane, '1') diff --git a/htsworkflow/util/ethelp.py b/htsworkflow/util/ethelp.py index f2f3e01..beaeb38 100644 --- a/htsworkflow/util/ethelp.py +++ b/htsworkflow/util/ethelp.py @@ -45,7 +45,7 @@ def validate_xhtml(html, base_url='http://localhost'): and False if it fails. """ try: - XHTML_RDF_DTD = lxml.etree.DTD(external_id='-//W3C//DTD XHTML+RDFa 1.0//EN') + XHTML_RDF_DTD = lxml.etree.DTD(external_id=b'-//W3C//DTD XHTML+RDFa 1.0//EN') except lxml.etree.DTDParseError as e: LOGGER.warn("Unable to load XHTML DTD %s" % (str(e),)) return diff --git a/inventory/test_inventory.py b/inventory/test_inventory.py index 43d0911..9967d44 100644 --- a/inventory/test_inventory.py +++ b/inventory/test_inventory.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function, unicode_literals +from __future__ import absolute_import, print_function import RDF @@ -42,7 +42,7 @@ class InventoryTestCase(TestCase): itemNode = RDF.Node(RDF.Uri(url)) item_type = fromTypedNode( - model.get_target(itemNode, inventoryOntology[b'item_type'])) + model.get_target(itemNode, inventoryOntology['item_type'])) self.failUnlessEqual(item_type, item.item_type.name) def test_itemindex(self): diff --git a/samples/test_samples.py b/samples/test_samples.py index df98418..1c53f3c 100644 --- a/samples/test_samples.py +++ b/samples/test_samples.py @@ -1,4 +1,4 @@ -from __future__ import absolute_import, print_function, unicode_literals +from __future__ import absolute_import, print_function import datetime import unittest @@ -272,9 +272,9 @@ try: import RDF HAVE_RDF = True - rdfNS = RDF.NS(b"http://www.w3.org/1999/02/22-rdf-syntax-ns#") - xsdNS = RDF.NS(b"http://www.w3.org/2001/XMLSchema#") - libNS = RDF.NS(b"http://jumpgate.caltech.edu/wiki/LibraryOntology#") + rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#") + xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#") + libNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#") from htsworkflow.util.rdfhelp import dump_model except ImportError as e: @@ -289,7 +289,7 @@ class TestRDFaLibrary(TestCase): def test_parse_rdfa(self): model = get_rdf_memory_model() - parser = RDF.Parser(name=b'rdfa') + parser = RDF.Parser(name='rdfa') bob = AffiliationFactory.create(name='Bob') @@ -311,20 +311,20 @@ class TestRDFaLibrary(TestCase): #with open('/tmp/test.ttl', 'w') as outstream: # dump_model(model, outstream) # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation> - self.check_literal_object(model, ['Bob'], p=libNS[b'affiliation']) + self.check_literal_object(model, ['Bob'], p=libNS['affiliation']) self.check_literal_object(model, ['experiment type name'], - p=libNS[b'experiment_type']) - self.check_literal_object(model, ['400'], p=libNS[b'gel_cut']) + p=libNS['experiment_type']) + self.check_literal_object(model, ['400'], p=libNS['gel_cut']) self.check_literal_object(model, ['microfluidics bot 7321'], - p=libNS[b'made_by']) + p=libNS['made_by']) self.check_literal_object(model, [lib_object.library_name], - p=libNS[b'name']) + p=libNS['name']) self.check_literal_object(model, [lib_object.library_species.scientific_name], - p=libNS[b'species_name']) + p=libNS['species_name']) def check_literal_object(self, model, values, s=None, p=None, o=None):