Imported Upstream version 0.3.1
authorDiane Trout <diane@caltech.edu>
Fri, 19 Nov 2010 20:17:53 +0000 (12:17 -0800)
committerDiane Trout <diane@caltech.edu>
Fri, 19 Nov 2010 20:17:53 +0000 (12:17 -0800)
MANIFEST.in
PKG-INFO
pysam/__init__.py
pysam/csamtools.pyx
pysam/version.py
setup.py
tests/Makefile
tests/ex8.sam [new file with mode: 0644]
tests/pysam_test.py

index 4bbbc8e87250a338c268e113f68558bb98e6d652..ac9e5c8902d4be97c19c1ebccb23763cc07f6c38 100644 (file)
@@ -24,6 +24,7 @@ include tests/ex4.sam
 include tests/ex5.sam
 include tests/ex6.sam
 include tests/ex7.sam
+include tests/ex8.sam
 include tests/example.py
 include tests/pysam_test.py
 include tests/segfault_tests.py
index b95ed79db3263f1c55b6aafaaa37a16e96717880..8c788c7b3c5377e07bc9506fef5a5e00cc4cfc05 100644 (file)
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: pysam
-Version: 0.3
+Version: 0.3.1
 Summary: pysam
 Home-page: http://code.google.com/p/pysam/
 Author: Andreas Heger
index 9f257c210eca878b0f0039b40f724f3482937284..282ef290770aad31aa1bbba8c3ebd1e1759263ab 100644 (file)
@@ -55,7 +55,10 @@ class SamtoolsDispatcher(object):
         # Ignore messages like these
         stderr = [ x for x in stderr \
                        if not x.startswith( "[sam_header_read2]" ) or \
-                       x.startswith("[bam_index_load]") ]
+                       x.startswith("[bam_index_load]") or \
+                       x.startswith("[bam_sort_core]") or \
+                       x.startswith("[samopen] SAM header is present")
+                   ]
         if stderr: raise SamtoolsError( "\n".join( stderr ) )
 
         # call parser for stdout:
index 242e68a402474774b414561186de6cdb6050144e..e94e0e626bde58542b961a5a6ff9fe23bae46675 100644 (file)
@@ -167,6 +167,14 @@ class StderrStore():
         self.stderr_save = Outs( sys.stderr.fileno() )
         self.stderr_save.setfd( self.stderr_h )
         
+    def readAndRelease( self ):
+        self.stderr_save.restore()
+        lines = []
+        if os.path.exists(self.stderr_f):
+            lines = open( self.stderr_f, "r" ).readlines()
+            os.remove( self.stderr_f )
+        return lines
+
     def release(self):
         self.stderr_save.restore()
         if os.path.exists(self.stderr_f):
@@ -316,7 +324,7 @@ cdef class Samfile:
 
             else:
                 # build header from a target names and lengths
-                assert referencenames and referencelengths, "either supply options `template`, `header` or  both `refernencenames` and `referencelengths` for writing"
+                assert referencenames and referencelengths, "either supply options `template`, `header` or  both `referencenames` and `referencelengths` for writing"
                 assert len(referencenames) == len(referencelengths), "unequal names and lengths of reference sequences"
 
                 # allocate and fill header
@@ -361,7 +369,13 @@ cdef class Samfile:
 
             store = StderrStore()
             self.samfile = samopen( filename, mode, NULL )
-            store.release()
+            result = store.readAndRelease()
+            # test for specific messages as open also outputs status messages
+            # that can be ignored.
+            if "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n" in result:
+                raise ValueError( "invalid BAM binary header (is this a BAM file?)" )
+            elif '[samopen] no @SQ lines in the header.\n' in result:
+                raise ValueError( "no @SQ lines in the header (is this a SAM file?)")
 
         if self.samfile == NULL:
             raise IOError("could not open file `%s`" % filename )
@@ -385,6 +399,7 @@ cdef class Samfile:
     def getrname( self, tid ):
         '''(tid )
         convert numerical :term:`tid` into :ref:`reference` name.'''
+        if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
         if not 0 <= tid < self.samfile.header.n_targets:
             raise ValueError( "tid out of range 0<=tid<%i" % self.samfile.header.n_targets )
         return self.samfile.header.target_name[tid]
@@ -404,6 +419,9 @@ cdef class Samfile:
 
         Note that regions are 1-based, while start,end are python coordinates.
         '''
+        # This method's main objective is to translate from a reference to a tid. 
+        # For now, it calls bam_parse_region, which is clumsy. Might be worth
+        # implementing it all in pysam (makes use of khash).
         
         cdef int rtid
         cdef int rstart
@@ -416,14 +434,15 @@ cdef class Samfile:
         # translate to a region
         if reference:
             if start != None and end != None:
+                if start > end: raise ValueError( 'invalid region: start (%i) > end (%i)' % (start, end) )
                 region = "%s:%i-%i" % (reference, start+1, end)
             else:
                 region = reference
 
         if region:
-            store = StderrStore()
+            # this function might be called often (multiprocessing)
+            # thus avoid using StderrStore, see issue 46.
             bam_parse_region( self.samfile.header, region, &rtid, &rstart, &rend)        
-            store.release()
             if rtid < 0: raise ValueError( "invalid region `%s`" % region )
             if rstart > rend: raise ValueError( 'invalid region: start (%i) > end (%i)' % (rstart, rend) )
             if not 0 <= rstart < max_pos: raise ValueError( 'start out of range (%i)' % rstart )
@@ -442,6 +461,8 @@ cdef class Samfile:
 
     def tell( self ):
         '''return current file position'''
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
         if not self.isbam:
             raise NotImplementedError("seek only available in bam files")
 
@@ -605,6 +626,9 @@ cdef class Samfile:
 
         return the number of bytes written.
         '''
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+
         return samwrite( self.samfile, read._delegate )
 
     def __enter__(self):
@@ -617,11 +641,13 @@ cdef class Samfile:
     property nreferences:
         '''number of :term:`reference` sequences in the file.'''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             return self.samfile.header.n_targets
 
     property references:
         """tuple with the names of :term:`reference` sequences."""
         def __get__(self): 
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_name[x] )
@@ -631,6 +657,7 @@ cdef class Samfile:
         """tuple of the lengths of the :term:`reference` sequences. The lengths are in the same order as :attr:`pysam.Samfile.reference`
         """
         def __get__(self): 
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_len[x] )
@@ -639,6 +666,7 @@ cdef class Samfile:
     property text:
         '''full contents of the :term:`sam file` header as a string.'''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             return PyString_FromStringAndSize(self.samfile.header.text, self.samfile.header.l_text)
 
     property header:
@@ -646,6 +674,8 @@ cdef class Samfile:
         a two-level dictionary.
         '''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
+
             result = {}
 
             if self.samfile.header.text != NULL:
@@ -750,6 +780,7 @@ cdef class Samfile:
         return dest
 
     def __iter__(self):
+        if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
         return self 
 
     cdef bam1_t * getCurrent( self ):
@@ -797,7 +828,9 @@ cdef class Fastafile:
         return self.fastafile != NULL
 
     def __len__(self):
-        assert self.fastafile != NULL
+        if self.fastafile == NULL:
+            raise ValueError( "calling len() on closed file" )
+
         return faidx_fetch_nseq(self.fastafile)
 
     def _open( self, 
@@ -841,7 +874,7 @@ cdef class Fastafile:
         if not self._isOpen():
             raise ValueError( "I/O operation on closed file" )
 
-        cdef int len, max_pos
+        cdef int length, max_pos
         cdef char * seq
         max_pos = 2 << 29
 
@@ -855,23 +888,25 @@ cdef class Fastafile:
             # valid ranges are from 0 to 2^29-1
             if not 0 <= start < max_pos: raise ValueError( 'start out of range (%i)' % start )
             if not 0 <= end < max_pos: raise ValueError( 'end out of range (%i)' % end )
-
-            seq = faidx_fetch_seq(self.fastafile, reference, 
+            seq = faidx_fetch_seq(self.fastafile, 
+                                  reference, 
                                   start,
-                                  end-1, &len)
+                                  end-1, 
+                                  &length)
         else:
             # samtools adds a '\0' at the end
-            seq = fai_fetch( self.fastafile, region, &len )
+            seq = fai_fetch( self.fastafile, region, &length )
 
         # copy to python
         if seq == NULL: 
             return ""
         else:
-            result = seq
-            # clean up
-            free(seq)
-        
-        return result
+            try:
+                py_seq = PyString_FromStringAndSize(seq, length)
+            finally:
+                free(seq)
+
+        return py_seq
 
 ###########################################################################
 ###########################################################################
@@ -903,8 +938,11 @@ cdef class IteratorRow:
 
     def __cinit__(self, Samfile samfile, int tid, int beg, int end ):
 
-        assert samfile._isOpen()
-        assert samfile._hasIndex()
+        if not samfile._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+        
+        if not samfile._hasIndex():
+            raise ValueError( "no index available for pileup" )
         
         # makes sure that samfile stays alive as long as the
         # iterator is alive
@@ -958,7 +996,8 @@ cdef class IteratorRowAll:
 
     def __cinit__(self, Samfile samfile):
 
-        assert samfile._isOpen()
+        if not samfile._isOpen():
+            raise ValueError( "I/O operation on closed file" )
 
         if samfile.isbam: mode = "rb"
         else: mode = "r"
@@ -1480,6 +1519,7 @@ cdef class AlignedRead:
 
     property tags:
         """the tags in the AUX field.
+
         This property permits convenience access to 
         the tags. Changes it the returned list will
         not update the tags automatically. Instead,
@@ -1493,63 +1533,49 @@ cdef class AlignedRead:
             cdef char * ctag
             cdef bam1_t * src
             cdef uint8_t * s
-            cdef char tpe
+            cdef char auxtag[3]
+            cdef char auxtype
             
             src = self._delegate
             if src.l_aux == 0: return None
             
             s = bam1_aux( src )
             result = []
-            ctag = <char*>calloc( 3, sizeof(char) )
-            cdef int x
+            auxtag[2] = 0
             while s < (src.data + src.data_len):
                 # get tag
-                ctag[0] = s[0]
-                ctag[1] = s[1]
-                pytag = ctag
-
+                auxtag[0] = s[0]
+                auxtag[1] = s[1]
                 s += 2
+                auxtype = s[0]
 
-                # convert type - is there a better way?
-                ctag[0] = s[0]
-                ctag[1] = 0
-                pytype = ctag
-                # get type and value 
-                # how do I do char literal comparison in cython?
-                # the code below works (i.e, is C comparison)
-                tpe = toupper(s[0])
-                if tpe == 'S':
+                if auxtype in ('c', 'C'):
                     value = <int>bam_aux2i(s)            
-                    s += 2
-                elif tpe == 'I':
+                    s += 1
+                elif auxtype in ('s', 'S'):
                     value = <int>bam_aux2i(s)            
+                    s += 2
+                elif auxtype in ('i', 'I'):
+                    value = <float>bam_aux2i(s)
                     s += 4
-                elif tpe == 'F':
+                elif auxtype == 'f':
                     value = <float>bam_aux2f(s)
                     s += 4
-                elif tpe == 'D':
+                elif auxtype == 'd':
                     value = <double>bam_aux2d(s)
                     s += 8
-                elif tpe == 'C':
-                    value = <int>bam_aux2i(s)
-                    s += 1
-                elif tpe == 'A':
-                    # there might a more efficient way
-                    # to convert a char into a string
+                elif auxtype == 'A':
                     value = "%c" % <char>bam_aux2A(s)
                     s += 1
-                elif tpe == 'Z':
+                elif auxtype in ('Z', 'H'):
                     value = <char*>bam_aux2Z(s)
                     # +1 for NULL terminated string
                     s += len(value) + 1
-
-                # skip over type
+                 # 
                 s += 1
+  
+                result.append( (auxtag, value) )
 
-                # ignore pytype
-                result.append( (pytag, value) )
-
-            free( ctag )
             return result
 
         def __set__(self, tags):
@@ -1557,51 +1583,57 @@ cdef class AlignedRead:
             cdef bam1_t * src
             cdef uint8_t * s
             cdef uint8_t * new_data
+            cdef char * temp 
             cdef int guessed_size, control_size
+            cdef int max_size, size, offset
+
             src = self._delegate
-            cdef int max_size, size
             max_size = 4000
-
-            # map samtools code to python.struct code and byte size
-            buffer = ctypes.create_string_buffer(max_size)
-
             offset = 0
-            for pytag, value in tags:
-                t = type(value)
-                if t == types.FloatType:
-                    fmt = "<cccf"
-                elif t == types.IntType:
-                    if value < 0:
-                        if value >= -127: fmt, pytype = "<cccb", 'c'
-                        elif value >= -32767: fmt, pytype = "<ccch", 's'
-                        elif value < -2147483648: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
-                        else: fmt, ctype = "<ccci", 'i'[0]
-                    else:
-                        if value <= 255: fmt, pytype = "<cccB", 'C'
-                        elif value <= 65535: fmt, pytype = "<cccH", 'S'
-                        elif value > 4294967295: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
-                        else: fmt, pytype = "<cccI", 'I'
-                else:
-                    # Note: hex strings (H) are not supported yet
-                    if len(value) == 1:
-                        fmt, pytype = "<cccc", 'A'
+
+            if tags != None: 
+
+                # map samtools code to python.struct code and byte size
+                buffer = ctypes.create_string_buffer(max_size)
+
+                for pytag, value in tags:
+                    t = type(value)
+                    if t == types.FloatType:
+                        fmt = "<cccf"
+                    elif t == types.IntType:
+                        if value < 0:
+                            if value >= -127: fmt, pytype = "<cccb", 'c'
+                            elif value >= -32767: fmt, pytype = "<ccch", 's'
+                            elif value < -2147483648: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
+                            else: fmt, ctype = "<ccci", 'i'[0]
+                        else:
+                            if value <= 255: fmt, pytype = "<cccB", 'C'
+                            elif value <= 65535: fmt, pytype = "<cccH", 'S'
+                            elif value > 4294967295: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
+                            else: fmt, pytype = "<cccI", 'I'
                     else:
-                        fmt, pytype = "<ccc%is" % (len(value)+1), 'Z'
-
-                size = struct.calcsize(fmt)
-                if offset + size > max_size:
-                    raise NotImplementedError("tags field too large")
-
-                struct.pack_into( fmt,
-                                  buffer,
-                                  offset,
-                                  pytag[0],
-                                  pytag[1],
-                                  pytype,
-                                  value )
-                offset += size
+                        # Note: hex strings (H) are not supported yet
+                        if len(value) == 1:
+                            fmt, pytype = "<cccc", 'A'
+                        else:
+                            fmt, pytype = "<ccc%is" % (len(value)+1), 'Z'
+
+                    size = struct.calcsize(fmt)
+                    if offset + size > max_size:
+                        raise NotImplementedError("tags field too large")
+
+                    struct.pack_into( fmt,
+                                      buffer,
+                                      offset,
+                                      pytag[0],
+                                      pytag[1],
+                                      pytype,
+                                      value )
+                    offset += size
             
             # delete the old data and allocate new
+            # if offset == 0, the aux field will be 
+            # empty
             pysam_bam_update( src, 
                               src.l_aux,
                               offset,
@@ -1609,15 +1641,15 @@ cdef class AlignedRead:
             
             src.l_aux = offset
 
-            if offset == 0: return
+            # copy data only if there is any
+            if offset != 0:
 
-            # get location of new data
-            s = bam1_aux( src )            
+                # get location of new data
+                s = bam1_aux( src )            
             
-            # check if there is direct path from buffer.raw to tmp
-            cdef char * temp 
-            temp = buffer.raw
-            memcpy( s, temp, offset )            
+                # check if there is direct path from buffer.raw to tmp
+                temp = buffer.raw
+                memcpy( s, temp, offset )            
 
     property flag: 
         """properties flag"""
index 5965c7cca61d3c074cbf22b79afee14e501bd50f..3b6c92a463d6d126297bba4de0ad4c086dba7b64 100644 (file)
@@ -1,6 +1,6 @@
 # pysam versioning information
 
-__version__ = "0.3"
+__version__ = "0.3.1"
 
 __samtools_version__ = "0.1.8"
 
index 925f0166e5f7314b5091a9363aeaa4c2cfe21c75..ef3d29f47e1e92fb383320dad8947ecf42b8d933 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ import version
 
 version = version.__version__
 
-samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c" )
+samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c", "errmod.c", "bam_reheader.c", "bam2bcf.c" )
 samtools_dest = os.path.abspath( "samtools" )
 tabix_exclude = ( "main.c", )
 tabix_dest = os.path.abspath( "tabix" )
@@ -109,7 +109,8 @@ metadata = {
       "pysam/namedtuple",
       "pysam/version" ],
     'ext_modules': [samtools, tabix],
-    'cmdclass' : {'build_ext': build_ext} }
+    'cmdclass' : {'build_ext': build_ext},
+    }
 
 if __name__=='__main__':
    dist = setup(**metadata)
index 54037509eded5fc31fac1b66580f6df04c5f1c86..38d30f0ccf480f42f9651579297733c541bbc0a3 100644 (file)
@@ -4,7 +4,8 @@ all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz \
        ex3.bam ex3.bam.bai \
        ex4.bam ex4.bam.bai \
        ex5.bam ex5.bam.bai \
-       ex6.bam 
+       ex6.bam \
+       ex8.bam
 
 ex2.sam.gz: ex1.bam ex1.bam.bai
                samtools view -h ex1.bam | gzip > ex2.sam.gz
diff --git a/tests/ex8.sam b/tests/ex8.sam
new file mode 100644 (file)
index 0000000..5a16b4f
--- /dev/null
@@ -0,0 +1,3 @@
+@HD    VN:1.0
+@SQ    SN:2    LN:48297693
+GJP00TM04CAQ5W 0       2       38297693        60      45H51M1D13M1D12M1D9M2D5M1D7M4D2M1I6M1D28M1D5M1D2M1D18M55H       *       0       0       CATGAAGAACCGCTGGGTATGGAGCACACCTCACCTGATGGACAGTTGATTATGCTCACCTTAACGCTAATTGAGAGCAGCACAAGAGGACTGGAAACTAGAATTTACTCCTCATCTCCGAAGATGTGAATATTCTAAATTCAGCTTGCCTCTTGCTTC IID7757111/=;?///:D>777;EEGAAAEEIHHIIIIIIIIIIIIIIBBBIIIIH==<<<DDGEEE;<<<A><<<DEDDA>>>D?1112544556::03---//25.22=;DD?;;;>BDDDEEEGGGA<888<BAA888<GGGGGEB?9::DD551 NM:i:15 MD:Z:51^T13^A12^A9^AA5^A7^AAAA8^T28^T5^A2^T18   RG:Z:GJP00TM04
index 5be5ca4c783f74cef3ebf775681ef1f95a619269..aefcbb94e2a60fcdd651ae7695c2e22d9834952e 100755 (executable)
@@ -100,11 +100,17 @@ class BinaryTest(unittest.TestCase):
                 ("ex1.view", "samtools view ex1.bam > ex1.view"),
                 ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),
                 ),
+          "view2" :
+        (
+                ("ex1.view", "samtools view -bT ex1.fa -o ex1.view2 ex1.sam"),
+                # note that -o ex1.view2 throws exception.
+                ("pysam_ex1.view", (pysam.view, "-bT ex1.fa -oex1.view2 ex1.sam" ) ),
+                ),
         }
 
     # some tests depend on others. The order specifies in which order
     # the samtools commands are executed.
-    mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view' )
+    mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view', 'view2' )
 
     def setUp( self ):
         '''setup tests. 
@@ -247,11 +253,22 @@ class IOTest(unittest.TestCase):
         samfile.close()
         self.assertRaises( ValueError, samfile.fetch, 'chr1', 100, 120)
 
-    def testPileupFromClosedFile( self ):
+    def testClosedFile( self ):
+        '''test that access to a closed samfile raises ValueError.'''
 
         samfile = pysam.Samfile( "ex1.bam", "rb" )
         samfile.close()
+        self.assertRaises( ValueError, samfile.fetch, 'chr1', 100, 120)
         self.assertRaises( ValueError, samfile.pileup, 'chr1', 100, 120)
+        self.assertRaises( ValueError, samfile.getrname, 0 )
+        self.assertRaises( ValueError, samfile.tell )
+        self.assertRaises( ValueError, samfile.write, None )
+        self.assertRaises( ValueError, samfile.seek, 0 )
+        self.assertRaises( ValueError, getattr, samfile, "nreferences" )
+        self.assertRaises( ValueError, getattr, samfile, "references" )
+        self.assertRaises( ValueError, getattr, samfile, "lengths" )
+        self.assertRaises( ValueError, getattr, samfile, "text" )
+        self.assertRaises( ValueError, getattr, samfile, "header" )
 
     def testBinaryReadFromSamfile( self ):
         pass
@@ -267,12 +284,6 @@ class IOTest(unittest.TestCase):
         self.assertRaises( ValueError, samfile.fetch )
         self.assertEqual( len(list( samfile.fetch(until_eof = True) )), 3270 )
 
-    def testReadingFromFileWithWrongMode( self ):
-
-        assert not os.path.exists( "ex2.bam.bai" )
-        samfile = pysam.Samfile( "ex2.bam", "r" )
-        self.assertRaises( ValueError, samfile.fetch )
-
 class TestIteratorRow(unittest.TestCase):
 
     def setUp(self):
@@ -602,9 +613,29 @@ class TestExceptions(unittest.TestCase):
     def testOutOfRangeLargeOldFormat(self):
         self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" )
 
+    def testZeroToZero(self):        
+        '''see issue 44'''
+        self.assertEqual( len(list(self.samfile.fetch('chr1', 0, 0))), 0)
+
     def tearDown(self):
         self.samfile.close()
 
+
+class TestWrongFormat(unittest.TestCase):
+    '''test cases for opening files not in bam/sam format.'''
+
+    def testOpenSamAsBam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.sam', 'rb' )
+
+    def testOpenBamAsSam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.bam', 'r' )
+
+    def testOpenFastaAsSam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.fa', 'r' )
+
+    def testOpenFastaAsBam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.fa', 'rb' )
+
 class TestFastaFile(unittest.TestCase):
 
     mSequences = { 'chr1' :
@@ -691,7 +722,7 @@ class TestAlignedRead(unittest.TestCase):
         a.mpos=200
         a.isize=167
        a.qual="1234" * 3
-
+        # todo: create tags
         return a
 
     def testUpdate( self ):
@@ -760,6 +791,19 @@ class TestAlignedRead(unittest.TestCase):
 
         return a
 
+    def testTagParsing( self ):
+        '''test for tag parsing
+
+        see http://groups.google.com/group/pysam-user-group/browse_thread/thread/67ca204059ea465a
+        '''
+        samfile=pysam.Samfile( "ex8.bam","rb" )
+
+        for entry in samfile:
+            before = entry.tags
+            entry.tags = entry.tags
+            after = entry.tags
+            self.assertEqual( after, before )
+
 class TestDeNovoConstruction(unittest.TestCase):
     '''check BAM/SAM file construction using ex3.sam
     
@@ -874,6 +918,7 @@ class TestDeNovoConstruction(unittest.TestCase):
         
         os.unlink( tmpfilename )
 
+
 class TestDoubleFetch(unittest.TestCase):
     '''check if two iterators on the same bamfile are independent.'''