X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=pysam.git;a=blobdiff_plain;f=tests%2Fpysam_test.py;fp=tests%2Fpysam_test.py;h=34071822e350237802cf99a50715a9d807debeb2;hp=6938a977130436534a5dcbd80383a29053c3b0c4;hb=68c074fc81858150ca7447a88e731eec96be6378;hpb=768881ffd9d33e3c5fa00dd9ea6f488f4f0700b3 diff --git a/tests/pysam_test.py b/tests/pysam_test.py index 6938a97..3407182 100755 --- a/tests/pysam_test.py +++ b/tests/pysam_test.py @@ -8,11 +8,20 @@ and data files located there. import pysam import unittest import os, re, sys -import itertools, collections +import itertools +import collections import subprocess import shutil import logging +IS_PYTHON3 = sys.version_info[0] >= 3 + +if IS_PYTHON3: + from itertools import zip_longest +else: + from itertools import izip as zip_longest + + SAMTOOLS="samtools" WORKDIR="pysam_test_work" @@ -27,11 +36,11 @@ def checkBinaryEqual( filename1, filename2 ): def chariter( infile ): while 1: c = infile.read(1) - if c == "": break + if c == b"": break yield c found = False - for c1,c2 in itertools.izip( chariter( infile1), chariter( infile2) ): + for c1,c2 in zip_longest( chariter( infile1), chariter( infile2) ): if c1 != c2: break else: found = True @@ -46,15 +55,18 @@ def runSamtools( cmd ): try: retcode = subprocess.call(cmd, shell=True) if retcode < 0: - print >>sys.stderr, "Child was terminated by signal", -retcode - except OSError, e: - print >>sys.stderr, "Execution failed:", e + print("Child was terminated by signal", -retcode) + except OSError as e: + print("Execution failed:", e) def getSamtoolsVersion(): '''return samtools version''' - pipe = subprocess.Popen(SAMTOOLS, shell=True, stderr=subprocess.PIPE).stderr - lines = "".join(pipe.readlines()) + with subprocess.Popen(SAMTOOLS, shell=True, stderr=subprocess.PIPE).stderr as pipe: + lines = b"".join(pipe.readlines()) + + if IS_PYTHON3: + lines = lines.decode('ascii') return re.search( "Version:\s+(\S+)", lines).groups()[0] class BinaryTest(unittest.TestCase): @@ -205,7 +217,8 @@ class BinaryTest(unittest.TestCase): # remove previous files if os.path.exists( WORKDIR ): shutil.rmtree( WORKDIR ) - + pass + # copy the source files to WORKDIR os.makedirs( WORKDIR ) @@ -220,29 +233,40 @@ class BinaryTest(unittest.TestCase): for label in self.order: command = self.commands[label] + # build samtools command and target and run samtools_target, samtools_command = command[0] + runSamtools( " ".join( (SAMTOOLS, samtools_command ))) + + # get pysam command and run try: pysam_target, pysam_command = command[1] - except ValueError, msg: + except ValueError as msg: raise ValueError( "error while setting up %s=%s: %s" %\ (label, command, msg) ) - runSamtools( " ".join( (SAMTOOLS, samtools_command ))) + pysam_method, pysam_options = pysam_command try: output = pysam_method( *pysam_options.split(" "), raw=True) - except pysam.SamtoolsError, msg: + except pysam.SamtoolsError as msg: raise pysam.SamtoolsError( "error while executing %s: options=%s: msg=%s" %\ (label, pysam_options, msg) ) + + + if ">" in samtools_command: - outfile = open( pysam_target, "wb" ) - for line in output: outfile.write( line ) - outfile.close() - + with open( pysam_target, "wb" ) as outfile: + if type(output) == list: + if IS_PYTHON3: + for line in output: + outfile.write( line.encode('ascii') ) + else: + for line in output: outfile.write( line ) + else: + outfile.write(output) + os.chdir( savedir ) BinaryTest.first_time = False - - samtools_version = getSamtoolsVersion() @@ -327,12 +351,14 @@ class BinaryTest(unittest.TestCase): def __del__(self): if os.path.exists( WORKDIR ): - shutil.rmtree( WORKDIR ) + pass + # shutil.rmtree( WORKDIR ) class IOTest(unittest.TestCase): '''check if reading samfile and writing a samfile are consistent.''' - def checkEcho( self, input_filename, reference_filename, + def checkEcho( self, input_filename, + reference_filename, output_filename, input_mode, output_mode, use_template = True ): '''iterate through *input_filename* writing to *output_filename* and @@ -354,8 +380,9 @@ class IOTest(unittest.TestCase): referencenames = infile.references, referencelengths = infile.lengths, add_sq_text = False ) - + iter = infile.fetch() + for x in iter: outfile.write( x ) infile.close() outfile.close() @@ -363,6 +390,7 @@ class IOTest(unittest.TestCase): self.assertTrue( checkBinaryEqual( reference_filename, output_filename), "files %s and %s are not the same" % (reference_filename, output_filename) ) + def testReadWriteBam( self ): input_filename = "ex1.bam" @@ -399,17 +427,58 @@ class IOTest(unittest.TestCase): self.checkEcho( input_filename, reference_filename, output_filename, "r", "w" ) - def testReadSamWithoutHeaderWriteSamWithoutHeader( self ): - + def testReadSamWithoutTargetNames( self ): + '''see issue 104.''' + input_filename = "example_unmapped_reads_no_sq.sam" + + # raise exception in default mode + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r" ) + + # raise exception if no SQ files + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r", + check_header = True) + + infile = pysam.Samfile( input_filename, check_header = False, check_sq = False ) + result = list(infile.fetch()) + + def testReadBamWithoutTargetNames( self ): + '''see issue 104.''' + input_filename = "example_unmapped_reads_no_sq.bam" + + # raise exception in default mode + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r" ) + + # raise exception if no SQ files + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r", + check_header = True) + + + infile = pysam.Samfile( input_filename, check_header = False, check_sq = False ) + result = list(infile.fetch( until_eof = True)) + + def testReadSamWithoutHeader( self ): input_filename = "ex1.sam" output_filename = "pysam_ex1.sam" reference_filename = "ex1.sam" - # disabled - reading from a samfile without header - # is not implemented. + # reading from a samfile without header is not implemented. + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r" ) + + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r", + check_header = False ) + + def testReadUnformattedFile( self ): + '''test reading from a file that is not bam/sam formatted''' + input_filename = "example.vcf40" + + # bam - file raise error + self.assertRaises( ValueError, pysam.Samfile, input_filename, "rb" ) + + # sam - file error, but can't fetch + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r" ) - # self.checkEcho( input_filename, reference_filename, output_filename, - # "r", "w" ) + self.assertRaises( ValueError, pysam.Samfile, input_filename, "r", + check_header = False) def testFetchFromClosedFile( self ): @@ -450,7 +519,7 @@ class IOTest(unittest.TestCase): def testReadingFromSamFileWithoutHeader( self ): '''read from samfile without header. ''' - samfile = pysam.Samfile( "ex7.sam" ) + samfile = pysam.Samfile( "ex7.sam", check_header = False, check_sq = False ) self.assertRaises( NotImplementedError, samfile.__iter__ ) def testReadingFromFileWithoutIndex( self ): @@ -481,10 +550,24 @@ class TestFloatTagBug( unittest.TestCase ): This test is expected to fail until samtools is fixed. ''' samfile = pysam.Samfile("tag_bug.bam") - read = samfile.fetch(until_eof=True).next() + read = next(samfile.fetch(until_eof=True)) self.assertTrue( ('XC',1) in read.tags ) self.assertEqual(read.opt('XC'), 1) +class TestLargeFieldBug( unittest.TestCase ): + '''see issue 100''' + + def testLargeFileBug( self ): + '''when creating a read with a large entry in the tag field + causes an errror: + NotImplementedError: tags field too large + ''' + samfile = pysam.Samfile("issue100.bam") + read = next(samfile.fetch(until_eof=True)) + new_read = pysam.AlignedRead() + new_read.tags = read.tags + self.assertEqual( new_read.tags, read.tags ) + class TestTagParsing( unittest.TestCase ): '''tests checking the accuracy of tag setting and retrieval.''' @@ -501,7 +584,7 @@ class TestTagParsing( unittest.TestCase ): a.mrnm = 0 a.mpos=200 a.isize = 0 - a.qual ="1234" * 3 + a.qual ="1234" * 3 # todo: create tags return a @@ -509,7 +592,7 @@ class TestTagParsing( unittest.TestCase ): x = -2 aligned_read = self.makeRead() aligned_read.tags = [("XD", int(x) ) ] - print aligned_read.tags + # print (aligned_read.tags) def testNegativeIntegers2( self ): x = -2 @@ -522,6 +605,12 @@ class TestTagParsing( unittest.TestCase ): outfile.write (r ) outfile.close() + def testCigarString( self ): + r = self.makeRead() + self.assertEqual( r.cigarstring, "M10D1M25" ) + r.cigarstring = "M20D10M20" + self.assertEqual( r.cigar, [(0,20), (2,10), (0,20)]) + class TestIteratorRow(unittest.TestCase): def setUp(self): @@ -533,15 +622,18 @@ class TestIteratorRow(unittest.TestCase): sa = list(pysam.view( "ex1.bam", rnge, raw = True) ) self.assertEqual( len(ps), len(sa), "unequal number of results for range %s: %i != %i" % (rnge, len(ps), len(sa) )) # check if the same reads are returned and in the same order - for line, pair in enumerate( zip( ps, sa ) ): - a,b = pair + for line, (a, b) in enumerate( list(zip( ps, sa )) ): d = b.split("\t") self.assertEqual( a.qname, d[0], "line %i: read id mismatch: %s != %s" % (line, a.rname, d[0]) ) self.assertEqual( a.pos, int(d[3])-1, "line %i: read position mismatch: %s != %s, \n%s\n%s\n" % \ (line, a.pos, int(d[3])-1, str(a), str(d) ) ) - self.assertEqual( a.qual, d[10], "line %i: quality mismatch: %s != %s, \n%s\n%s\n" % \ - (line, a.qual, d[10], + if sys.version_info[0] < 3: + qual = d[10] + else: + qual = d[10].encode('ascii') + self.assertEqual( a.qual, qual, "line %i: quality mismatch: %s != %s, \n%s\n%s\n" % \ + (line, a.qual, qual, str(a), str(d) ) ) def testIteratePerContig(self): @@ -558,6 +650,7 @@ class TestIteratorRow(unittest.TestCase): def tearDown(self): self.samfile.close() + class TestIteratorRowAll(unittest.TestCase): def setUp(self): @@ -569,7 +662,7 @@ class TestIteratorRowAll(unittest.TestCase): sa = list(pysam.view( "ex1.bam", raw = True) ) self.assertEqual( len(ps), len(sa), "unequal number of results: %i != %i" % (len(ps), len(sa) )) # check if the same reads are returned - for line, pair in enumerate( zip( ps, sa ) ): + for line, pair in enumerate( list(zip( ps, sa )) ): data = pair[1].split("\t") self.assertEqual( pair[0].qname, data[0], "read id mismatch in line %i: %s != %s" % (line, pair[0].rname, data[0]) ) @@ -589,10 +682,13 @@ class TestIteratorColumn(unittest.TestCase): def setUp(self): self.samfile=pysam.Samfile( "ex4.bam","rb" ) - def checkRange( self, rnge ): + def checkRange( self, contig, start = None, end = None, truncate = False ): '''compare results from iterator with those from samtools.''' # check if the same reads are returned and in the same order - for column in self.samfile.pileup(region=rnge): + for column in self.samfile.pileup(contig, start, end, truncate = truncate): + if truncate: + self.assertGreaterEqual( column.pos, start ) + self.assertLess( column.pos, end ) thiscov = len(column.pileups) refcov = self.mCoverages[self.samfile.getrname(column.tid)][column.pos] self.assertEqual( thiscov, refcov, "wrong coverage at pos %s:%i %i should be %i" % (self.samfile.getrname(column.tid), column.pos, thiscov, refcov)) @@ -610,11 +706,11 @@ class TestIteratorColumn(unittest.TestCase): '''check random access per range''' for contig, length in zip(self.samfile.references, self.samfile.lengths): for start in range( 1, length, 90): - self.checkRange( "%s:%i-%i" % (contig, start, start + 90) ) # this includes empty ranges + self.checkRange( contig, start, start + 90 ) # this includes empty ranges def testInverse( self ): '''test the inverse, is point-wise pileup accurate.''' - for contig, refseq in self.mCoverages.items(): + for contig, refseq in list(self.mCoverages.items()): refcolumns = sum(refseq) for pos, refcov in enumerate( refseq ): columns = list(self.samfile.pileup( contig, pos, pos+1) ) @@ -626,10 +722,17 @@ class TestIteratorColumn(unittest.TestCase): self.assertEqual( len(columns), refcolumns, "pileup incomplete at position %i: got %i, expected %i " %\ (pos, len(columns), refcolumns)) - - + def testIterateTruncate( self ): + '''check random access per range''' + for contig, length in zip(self.samfile.references, self.samfile.lengths): + for start in range( 1, length, 90): + self.checkRange( contig, start, start + 90, truncate = True ) # this includes empty ranges + + + def tearDown(self): self.samfile.close() + class TestAlignedReadFromBam(unittest.TestCase): @@ -680,24 +783,24 @@ class TestAlignedReadFromBam(unittest.TestCase): self.assertEqual( self.reads[1].tlen, 412, "insert size mismatch in read 2: %s != %s" % (self.reads[1].tlen, 412) ) def testARseq(self): - self.assertEqual( self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) - self.assertEqual( self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) - self.assertEqual( self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 4: %s != %s" % (self.reads[3].seq, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) + self.assertEqual( self.reads[0].seq, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 1: %s != %s" % (self.reads[0].seq, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) + self.assertEqual( self.reads[1].seq, b"ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "sequence size mismatch in read 2: %s != %s" % (self.reads[1].seq, b"ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) + self.assertEqual( self.reads[3].seq, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "sequence mismatch in read 4: %s != %s" % (self.reads[3].seq, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) def testARqual(self): - self.assertEqual( self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) - self.assertEqual( self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) - self.assertEqual( self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 3: %s != %s" % (self.reads[3].qual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) + self.assertEqual( self.reads[0].qual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 1: %s != %s" % (self.reads[0].qual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) + self.assertEqual( self.reads[1].qual, b"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "quality string mismatch in read 2: %s != %s" % (self.reads[1].qual, b"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) + self.assertEqual( self.reads[3].qual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "quality string mismatch in read 3: %s != %s" % (self.reads[3].qual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) def testARquery(self): - self.assertEqual( self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "query mismatch in read 1: %s != %s" % (self.reads[0].query, "AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) - self.assertEqual( self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "query size mismatch in read 2: %s != %s" % (self.reads[1].query, "ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) - self.assertEqual( self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT", "query mismatch in read 4: %s != %s" % (self.reads[3].query, "TAGCTAGCTACCTATATCTTGGTCTT") ) + self.assertEqual( self.reads[0].query, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG", "query mismatch in read 1: %s != %s" % (self.reads[0].query, b"AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG") ) + self.assertEqual( self.reads[1].query, b"ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA", "query size mismatch in read 2: %s != %s" % (self.reads[1].query, b"ACCTATATCTTGGCCTTGGCCGATGCGGCCTTGCA") ) + self.assertEqual( self.reads[3].query, b"TAGCTAGCTACCTATATCTTGGTCTT", "query mismatch in read 4: %s != %s" % (self.reads[3].query, b"TAGCTAGCTACCTATATCTTGGTCTT") ) def testARqqual(self): - self.assertEqual( self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "qquality string mismatch in read 1: %s != %s" % (self.reads[0].qqual, "<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) - self.assertEqual( self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "qquality string mismatch in read 2: %s != %s" % (self.reads[1].qqual, "<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) - self.assertEqual( self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22", "qquality string mismatch in read 3: %s != %s" % (self.reads[3].qqual, "<<<<<<<<<<<<<<<<<:<9/,&,22") ) + self.assertEqual( self.reads[0].qqual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<", "qquality string mismatch in read 1: %s != %s" % (self.reads[0].qqual, b"<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<") ) + self.assertEqual( self.reads[1].qqual, b"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<", "qquality string mismatch in read 2: %s != %s" % (self.reads[1].qqual, b"<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<") ) + self.assertEqual( self.reads[3].qqual, b"<<<<<<<<<<<<<<<<<:<9/,&,22", "qquality string mismatch in read 3: %s != %s" % (self.reads[3].qqual, b"<<<<<<<<<<<<<<<<<:<9/,&,22") ) def testPresentOptionalFields(self): self.assertEqual( self.reads[0].opt('NM'), 1, "optional field mismatch in read 1, NM: %s != %s" % (self.reads[0].opt('NM'), 1) ) @@ -758,7 +861,7 @@ class TestHeaderSam(unittest.TestCase): def compareHeaders( self, a, b ): '''compare two headers a and b.''' - for ak,av in a.iteritems(): + for ak,av in a.items(): self.assertTrue( ak in b, "key '%s' not in '%s' " % (ak,b) ) self.assertEqual( av, b[ak] ) @@ -787,6 +890,7 @@ class TestHeaderBam(TestHeaderSam): def setUp(self): self.samfile=pysam.Samfile( "ex3.bam","rb" ) + class TestUnmappedReads(unittest.TestCase): def testSAM(self): @@ -825,6 +929,14 @@ class TestPileupObjects(unittest.TestCase): def tearDown(self): self.samfile.close() + def testIteratorOutOfScope( self ): + '''test if exception is raised if pileup col is accessed after iterator is exhausted.''' + + for pileupcol in self.samfile.pileup(): + pass + + self.assertRaises( ValueError, getattr, pileupcol, "pileups" ) + class TestContextManager(unittest.TestCase): def testManager( self ): @@ -917,16 +1029,16 @@ class TestWrongFormat(unittest.TestCase): class TestFastaFile(unittest.TestCase): mSequences = { 'chr1' : - "CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGAACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTGTGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTCTTCTTA", + b"CACTAGTGGCTCATTGTAAATGTGTGGTTTAACTCGTCCATGGCCCAGCATTAGGGAGCTGTGGACCCTGCAGCCTGGCTGTGGGGGCCGCAGTGGCTGAGGGGTGCAGAGCCGAGTCACGGGGTTGCCAGCACAGGGGCTTAACCTCTGGTGACTGCCAGAGCTGCTGGCAAGCTAGAGTCCCATTTGGAGCCCCTCTAAGCCGTTCTATTTGTAATGAAAACTATATTTATGCTATTCAGTTCTAAATATAGAAATTGAAACAGCTGTGTTTAGTGCCTTTGTTCAACCCCCTTGCAACAACCTTGAGAACCCCAGGGAATTTGTCAATGTCAGGGAAGGAGCATTTTGTCAGTTACCAAATGTGTTTATTACCAGAGGGATGGAGGGAAGAGGGACGCTGAAGAACTTTGATGCCCTCTTCTTCCAAAGATGAAACGCGTAACTGCGCTCTCATTCACTCCAGCTCCCTGTCACCCAATGGACCTGTGATATCTGGATTCTGGGAAATTCTTCATCCTGGACCCTGAGAGATTCTGCAGCCCAGCTCCAGATTGCTTGTGGTCTGACAGGCTGCAACTGTGAGCCATCACAATGAACAACAGGAAGAAAAGGTCTTTCAAAAGGTGATGTGTGTTCTCATCAACCTCATACACACACATGGTTTAGGGGTATAATACCTCTACATGGCTGATTATGAAAACAATGTTCCCCAGATACCATCCCTGTCTTACTTCCAGCTCCCCAGAGGGAAAGCTTTCAACGCTTCTAGCCATTTCTTTTGGCATTTGCCTTCAGACCCTACACGAATGCGTCTCTACCACAGGGGGCTGCGCGGTTTCCCATCATGAAGCACTGAACTTCCACGTCTCATCTAGGGGAACAGGGAGGTGCACTAATGCGCTCCACGCCCAAGCCCTTCTCACAGTTTCTGCCCCCAGCATGGTTGTACTGGGCAATACATGAGATTATTAGGAAATGCTTTACTGTCATAACTATGAAGAGACTATTGCCAGATGAACCACACATTAATACTATGTTTCTTATCTGCACATTACTACCCTGCAATTAATATAATTGTGTCCATGTACACACGCTGTCCTATGTACTTATCATGACTCTATCCCAAATTCCCAATTACGTCCTATCTTCTTCTTAGGGAAGAACAGCTTAGGTATCAATTTGGTGTTCTGTGTAAAGTCTCAGGGAGCCGTCCGTGTCCTCCCATCTGGCCTCGTCCACACTGGTTCTCTTGAAAGCTTGGGCTGTAATGATGCCCCTTGGCCATCACCCAGTCCCTGCCCCATCTCTTGTAATCTCTCTCCTTTTTGCTGCATCCCTGTCTTCCTCTGTCTTGATTTACTTGTTGTTGGTTTTCTGTTTCTTTGTTTGATTTGGTGGAAGACATAATCCCACGCTTCCTATGGAAAGGTTGTTGGGAGATTTTTAATGATTCCTCAATGTTAAAATGTCTATTTTTGTCTTGACACCCAACTAATATTTGTCTGAGCAAAACAGTCTAGATGAGAGAGAACTTCCCTGGAGGTCTGATGGCGTTTCTCCCTCGTCTTCTTA", 'chr2' : - "TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATAAAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACCTCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGATGATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATAGCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAAAAAATATTTACAGTAACT", + b"TTCAAATGAACTTCTGTAATTGAAAAATTCATTTAAGAAATTACAAAATATAGTTGAAAGCTCTAACAATAGACTAAACCAAGCAGAAGAAAGAGGTTCAGAACTTGAAGACAAGTCTCTTATGAATTAACCCAGTCAGACAAAAATAAAGAAAAAAATTTTAAAAATGAACAGAGCTTTCAAGAAGTATGAGATTATGTAAAGTAACTGAACCTATGAGTCACAGGTATTCCTGAGGAAAAAGAAAAAGTGAGAAGTTTGGAAAAACTATTTGAGGAAGTAATTGGGGAAAACCTCTTTAGTCTTGCTAGAGATTTAGACATCTAAATGAAAGAGGCTCAAAGAATGCCAGGAAGATACATTGCAAGACAGACTTCATCAAGATATGTAGTCATCAGACTATCTAAAGTCAACATGAAGGAAAAAAATTCTAAAATCAGCAAGAGAAAAGCATACAGTCATCTATAAAGGAAATCCCATCAGAATAACAATGGGCTTCTCAGCAGAAACCTTACAAGCCAGAAGAGATTGGATCTAATTTTTGGACTTCTTAAAGAAAAAAAAACCTGTCAAACACGAATGTTATGCCCTGCTAAACTAAGCATCATAAATGAAGGGGAAATAAAGTCAAGTCTTTCCTGACAAGCAAATGCTAAGATAATTCATCATCACTAAACCAGTCCTATAAGAAATGCTCAAAAGAATTGTAAAAGTCAAAATTAAAGTTCAATACTCACCATCATAAATACACACAAAAGTACAAAACTCACAGGTTTTATAAAACAATTGAGACTACAGAGCAACTAGGTAAAAAATTAACATTACAACAGGAACAAAACCTCATATATCAATATTAACTTTGAATAAAAAGGGATTAAATTCCCCCACTTAAGAGATATAGATTGGCAGAACAGATTTAAAAACATGAACTAACTATATGCTGTTTACAAGAAACTCATTAATAAAGACATGAGTTCAGGTAAAGGGGTGGAAAAAGATGTTCTACGCAAACAGAAACCAAATGAGAGAAGGAGTAGCTATACTTATATCAGATAAAGCACACTTTAAATCAACAACAGTAAAATAAAACAAAGGAGGTCATCATACAATGATAAAAAGATCAATTCAGCAAGAAGATATAACCATCCTACTAAATACATATGCACCTAACACAAGACTACCCAGATTCATAAAACAAATACTACTAGACCTAAGAGGGATGAGAAATTACCTAATTGGTACAATGTACAATATTCTGATGATGGTTACACTAAAAGCCCATACTTTACTGCTACTCAATATATCCATGTAACAAATCTGCGCTTGTACTTCTAAATCTATAAAAAAATTAAAATTTAACAAAAGTAAATAAAACACATAGCTAAAACTAAAAAAGCAAAAACAAAAACTATGCTAAGTATTGGTAAAGATGTGGGGAAAAAAGTAAACTCTCAAATATTGCTAGTGGGAGTATAAATTGTTTTCCACTTTGGAAAACAATTTGGTAATTTCGTTTTTTTTTTTTTCTTTTCTCTTTTTTTTTTTTTTTTTTTTGCATGCCAGAAAAAAATATTTACAGTAACT", } def setUp(self): self.file=pysam.Fastafile( "ex1.fa" ) def testFetch(self): - for id, seq in self.mSequences.items(): + for id, seq in list(self.mSequences.items()): self.assertEqual( seq, self.file.fetch( id ) ) for x in range( 0, len(seq), 10): self.assertEqual( seq[x:x+10], self.file.fetch( id, x, x+10) ) @@ -937,15 +1049,15 @@ class TestFastaFile(unittest.TestCase): # unknown sequence returns "" - self.assertEqual( "", self.file.fetch("chr12") ) + self.assertEqual( b"", self.file.fetch("chr12") ) def testOutOfRangeAccess( self ): '''test out of range access.''' # out of range access returns an empty string - for contig, s in self.mSequences.iteritems(): - self.assertEqual( self.file.fetch( contig, len(s), len(s)+1), "" ) + for contig, s in self.mSequences.items(): + self.assertEqual( self.file.fetch( contig, len(s), len(s)+1), b"" ) - self.assertEqual( self.file.fetch( "chr3", 0 , 100), "" ) + self.assertEqual( self.file.fetch( "chr3", 0 , 100), b"" ) def testFetchErrors( self ): self.assertRaises( ValueError, self.file.fetch ) @@ -998,16 +1110,16 @@ class TestAlignedRead(unittest.TestCase): a = pysam.AlignedRead() a.qname = "read_12345" - a.seq="ACGT" * 3 + a.seq="ACGT" * 10 a.flag = 0 a.rname = 0 - a.pos = 33 + a.pos = 20 a.mapq = 20 - a.cigar = ( (0,10), (2,1), (0,25) ) + a.cigar = ( (0,10), (2,1), (0,9), (1,1), (0,20) ) a.mrnm = 0 a.mpos=200 a.isize=167 - a.qual="1234" * 3 + a.qual="1234" * 10 # todo: create tags return a @@ -1028,17 +1140,17 @@ class TestAlignedRead(unittest.TestCase): # check cigar b.cigar = ( (0,10), ) self.checkFieldEqual( a, b, "cigar" ) - b.cigar = ( (0,10), (2,1), (0,25), (2,1), (0,25) ) + b.cigar = ( (0,10), (2,1), (0,10) ) self.checkFieldEqual( a, b, "cigar" ) - b.cigar = ( (0,10), (2,1), (0,25) ) + b.cigar = ( (0,10), (2,1), (0,9), (1,1), (0,20) ) self.checkFieldEqual( a, b) # check seq b.seq = "ACGT" self.checkFieldEqual( a, b, ("seq", "qual") ) - b.seq = "ACGT" * 10 - self.checkFieldEqual( a, b, ("seq", "qual") ) b.seq = "ACGT" * 3 + self.checkFieldEqual( a, b, ("seq", "qual") ) + b.seq = "ACGT" * 10 self.checkFieldEqual( a, b, ("qual",)) # reset qual @@ -1067,13 +1179,13 @@ class TestAlignedRead(unittest.TestCase): a.seq="ACGT" * 200 a.flag = 0 a.rname = 0 - a.pos = 33 + a.pos = 20 a.mapq = 20 - a.cigar = ( (0,10), (2,1), (0,25) ) + a.cigar = ( (0, 4 * 200), ) a.mrnm = 0 a.mpos=200 a.isize=167 - a.qual="1234" * 200 + a.qual="1234" * 200 return a @@ -1090,8 +1202,41 @@ class TestAlignedRead(unittest.TestCase): after = entry.tags self.assertEqual( after, before ) + def testUpdateTlen( self ): + '''check if updating tlen works''' + a = self.buildRead() + oldlen = a.tlen + oldlen *= 2 + a.tlen = oldlen + self.assertEqual( a.tlen, oldlen ) + + def testPositions( self ): + a = self.buildRead() + self.assertEqual( a.positions, + [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, + 31, 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, + 50, 51, 52, 53, 54, 55, 56, 57, 58, 59] ) + + self.assertEqual( a.aligned_pairs, + [(0, 20), (1, 21), (2, 22), (3, 23), (4, 24), + (5, 25), (6, 26), (7, 27), (8, 28), (9, 29), + (None, 30), + (10, 31), (11, 32), (12, 33), (13, 34), (14, 35), + (15, 36), (16, 37), (17, 38), (18, 39), (19, None), + (20, 40), (21, 41), (22, 42), (23, 43), (24, 44), + (25, 45), (26, 46), (27, 47), (28, 48), (29, 49), + (30, 50), (31, 51), (32, 52), (33, 53), (34, 54), + (35, 55), (36, 56), (37, 57), (38, 58), (39, 59)] ) + + self.assertEqual( a.positions, [x[1] for x in a.aligned_pairs if x[0] != None and x[1] != None] ) + # alen is the length of the aligned read in genome + self.assertEqual( a.alen, a.aligned_pairs[-1][0] + 1 ) + # aend points to one beyond last aligned base in ref + self.assertEqual( a.positions[-1], a.aend - 1 ) + class TestDeNovoConstruction(unittest.TestCase): - '''check BAM/SAM file construction using ex3.sam + '''check BAM/SAM file construction using ex6.sam (note these are +1 coordinates): @@ -1125,7 +1270,6 @@ class TestDeNovoConstruction(unittest.TestCase): def setUp( self ): - a = pysam.AlignedRead() a.qname = "read_28833_29006_6945" a.seq="AGCTTAGCTAGCTACCTATATCTTGGTCTTGGCCG" @@ -1137,8 +1281,8 @@ class TestDeNovoConstruction(unittest.TestCase): a.mrnm = 0 a.mpos=199 a.isize=167 - a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<" - a.tags = ( ("NM", 1), + a.qual="<<<<<<<<<<<<<<<<<<<<<:<9/,&,22;;<<<" + a.tags = ( ("NM", 1), ("RG", "L1") ) b = pysam.AlignedRead() @@ -1152,8 +1296,8 @@ class TestDeNovoConstruction(unittest.TestCase): b.mrnm = 1 b.mpos=499 b.isize=412 - b.qual="<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<" - b.tags = ( ("MF", 18), + b.qual="<<<<<;<<<<7;:<<<6;<<<<<<<<<<<<7<<<<" + b.tags = ( ("MF", 18), ("RG", "L2") ) self.reads = (a,b) @@ -1166,7 +1310,6 @@ class TestDeNovoConstruction(unittest.TestCase): for x in self.reads: outfile.write( x ) outfile.close() - self.assertTrue( checkBinaryEqual( tmpfilename, self.samfile ), "mismatch when construction SAM file, see %s %s" % (tmpfilename, self.samfile)) @@ -1204,6 +1347,61 @@ class TestDeNovoConstruction(unittest.TestCase): os.unlink( tmpfilename ) +class TestDeNovoConstructionUserTags(TestDeNovoConstruction): + '''test de novo construction with a header that contains lower-case tags.''' + + header = { 'HD': {'VN': '1.0'}, + 'SQ': [{'LN': 1575, 'SN': 'chr1'}, + {'LN': 1584, 'SN': 'chr2'}], + 'x1': {'A': 2, 'B': 5 }, + 'x3': {'A': 6, 'B': 5 }, + 'x2': {'A': 4, 'B': 5 } } + + bamfile = "example_user_header.bam" + samfile = "example_user_header.sam" + +class TestEmptyHeader( unittest.TestCase ): + '''see issue 84.''' + + def testEmptyHeader( self ): + + s = pysam.Samfile('example_empty_header.bam') + self.assertEqual( s.header, {'SQ': [{'LN': 1000, 'SN': 'chr1'}]} ) + +class TestBTagSam( unittest.TestCase ): + '''see issue 81.''' + + compare = [ [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95], + [-100,200,-300,-400], + [-100,12], + [12,15], + [-1.0,5.0,2.5] ] + + filename = 'example_btag.sam' + + def testRead( self ): + + s = pysam.Samfile(self.filename) + for x, read in enumerate(s): + if x == 0: + self.assertEqual( read.tags, [('RG', 'QW85I'), ('PG', 'tmap'), ('MD', '140'), ('NM', 0), ('AS', 140), ('FZ', [100, 1, 91, 0, 7, 101, 0, 201, 96, 204, 0, 0, 87, 109, 0, 7, 97, 112, 1, 12, 78, 197, 0, 7, 100, 95, 101, 202, 0, 6, 0, 1, 186, 0, 84, 0, 244, 0, 0, 324, 0, 107, 195, 101, 113, 0, 102, 0, 104, 3, 0, 101, 1, 0, 212, 6, 0, 0, 1, 0, 74, 1, 11, 0, 196, 2, 197, 103, 0, 108, 98, 2, 7, 0, 1, 2, 194, 0, 180, 0, 108, 0, 203, 104, 16, 5, 205, 0, 0, 0, 1, 1, 100, 98, 0, 0, 204, 6, 0, 79, 0, 0, 101, 7, 109, 90, 265, 1, 27, 10, 109, 102, 9, 0, 292, 0, 110, 0, 0, 102, 112, 0, 0, 84, 100, 103, 2, 81, 126, 0, 2, 90, 0, 15, 96, 15, 1, 0, 2, 0, 107, 92, 0, 0, 101, 3, 98, 15, 102, 13, 116, 116, 90, 93, 198, 0, 0, 0, 199, 92, 26, 495, 100, 5, 0, 100, 5, 209, 0, 92, 107, 90, 0, 0, 0, 0, 109, 194, 7, 94, 200, 0, 40, 197, 0, 11, 0, 0, 112, 110, 6, 4, 200, 28, 0, 196, 0, 203, 1, 129, 0, 0, 1, 0, 94, 0, 1, 0, 107, 5, 201, 3, 3, 100, 0, 121, 0, 7, 0, 1, 105, 306, 3, 86, 8, 183, 0, 12, 163, 17, 83, 22, 0, 0, 1, 8, 109, 103, 0, 0, 295, 0, 200, 16, 172, 3, 16, 182, 3, 11, 0, 0, 223, 111, 103, 0, 5, 225, 0, 95]), ('XA', 'map2-1'), ('XS', 53), ('XT', 38), ('XF', 1), ('XE', 0)] + ) + + fz = dict(read.tags)["FZ"] + self.assertEqual( fz, self.compare[x] ) + self.assertEqual( read.opt("FZ"), self.compare[x]) + + def testWrite( self ): + + s = pysam.Samfile(self.filename) + for read in s: + before = read.tags + read.tags = read.tags + after = read.tags + self.assertEqual( after, before ) + +class TestBTagBam( TestBTagSam ): + filename = 'example_btag.bam' class TestDoubleFetch(unittest.TestCase): '''check if two iterators on the same bamfile are independent.''' @@ -1296,22 +1494,22 @@ class TestLargeOptValues( unittest.TestCase ): i = samfile.fetch() for exp in self.ints: - rr = i.next() + rr = next(i) obs = rr.opt("ZP") self.assertEqual( exp, obs, "expected %s, got %s\n%s" % (str(exp), str(obs), str(rr))) for exp in [ -x for x in self.ints ]: - rr = i.next() + rr = next(i) obs = rr.opt("ZP") self.assertEqual( exp, obs, "expected %s, got %s\n%s" % (str(exp), str(obs), str(rr))) for exp in self.floats: - rr = i.next() + rr = next(i) obs = rr.opt("ZP") self.assertEqual( exp, obs, "expected %s, got %s\n%s" % (str(exp), str(obs), str(rr))) for exp in [ -x for x in self.floats ]: - rr = i.next() + rr = next(i) obs = rr.opt("ZP") self.assertEqual( exp, obs, "expected %s, got %s\n%s" % (str(exp), str(obs), str(rr))) @@ -1443,7 +1641,7 @@ class TestLogging( unittest.TestCase ): bam = pysam.Samfile(bamfile, 'rb') cols = bam.pileup() - self.assert_( True ) + self.assertTrue( True ) def testFail1( self ): self.check( "ex9_fail.bam", False ) @@ -1469,7 +1667,7 @@ class TestSamfileUtilityFunctions( unittest.TestCase ): samfile = pysam.Samfile( "ex1.bam", "rb" ) for contig in ("chr1", "chr2" ): - for start in xrange( 0, 2000, 100 ): + for start in range( 0, 2000, 100 ): end = start + 1 self.assertEqual( len( list( samfile.fetch( contig, start, end ) ) ), samfile.count( contig, start, end ) ) @@ -1485,7 +1683,11 @@ class TestSamfileUtilityFunctions( unittest.TestCase ): def testMate( self ): '''test mate access.''' - readnames = [ x.split("\t")[0] for x in open( "ex1.sam", "rb" ).readlines() ] + with open( "ex1.sam", "rb" ) as inf: + readnames = [ x.split(b"\t")[0] for x in inf.readlines() ] + if sys.version_info[0] >= 3: + readnames = [ name.decode('ascii') for name in readnames ] + counts = collections.defaultdict( int ) for x in readnames: counts[x] += 1 @@ -1537,7 +1739,7 @@ class TestSamfileIndex( unittest.TestCase): for read in samfile: reads[read.qname] += 1 - for qname, counts in reads.iteritems(): + for qname, counts in reads.items(): found = list(index.find( qname )) self.assertEqual( len(found), counts ) for x in found: self.assertEqual( x.qname, qname ) @@ -1545,7 +1747,8 @@ class TestSamfileIndex( unittest.TestCase): if __name__ == "__main__": # build data files - print "building data files" + print ("building data files") subprocess.call( "make", shell=True) - print "starting tests" + print ("starting tests") unittest.main() + print ("completed tests")