Imported Upstream version 0.3.1
authorDiane Trout <diane@caltech.edu>
Fri, 19 Nov 2010 20:17:53 +0000 (12:17 -0800)
committerDiane Trout <diane@caltech.edu>
Fri, 19 Nov 2010 20:17:53 +0000 (12:17 -0800)
MANIFEST.in
PKG-INFO
pysam/__init__.py
pysam/csamtools.pyx
pysam/version.py
setup.py
tests/Makefile
tests/ex8.sam [new file with mode: 0644]
tests/pysam_test.py

index 4bbbc8e87250a338c268e113f68558bb98e6d652..ac9e5c8902d4be97c19c1ebccb23763cc07f6c38 100644 (file)
@@ -24,6 +24,7 @@ include tests/ex4.sam
 include tests/ex5.sam
 include tests/ex6.sam
 include tests/ex7.sam
 include tests/ex5.sam
 include tests/ex6.sam
 include tests/ex7.sam
+include tests/ex8.sam
 include tests/example.py
 include tests/pysam_test.py
 include tests/segfault_tests.py
 include tests/example.py
 include tests/pysam_test.py
 include tests/segfault_tests.py
index b95ed79db3263f1c55b6aafaaa37a16e96717880..8c788c7b3c5377e07bc9506fef5a5e00cc4cfc05 100644 (file)
--- a/PKG-INFO
+++ b/PKG-INFO
@@ -1,6 +1,6 @@
 Metadata-Version: 1.0
 Name: pysam
 Metadata-Version: 1.0
 Name: pysam
-Version: 0.3
+Version: 0.3.1
 Summary: pysam
 Home-page: http://code.google.com/p/pysam/
 Author: Andreas Heger
 Summary: pysam
 Home-page: http://code.google.com/p/pysam/
 Author: Andreas Heger
index 9f257c210eca878b0f0039b40f724f3482937284..282ef290770aad31aa1bbba8c3ebd1e1759263ab 100644 (file)
@@ -55,7 +55,10 @@ class SamtoolsDispatcher(object):
         # Ignore messages like these
         stderr = [ x for x in stderr \
                        if not x.startswith( "[sam_header_read2]" ) or \
         # Ignore messages like these
         stderr = [ x for x in stderr \
                        if not x.startswith( "[sam_header_read2]" ) or \
-                       x.startswith("[bam_index_load]") ]
+                       x.startswith("[bam_index_load]") or \
+                       x.startswith("[bam_sort_core]") or \
+                       x.startswith("[samopen] SAM header is present")
+                   ]
         if stderr: raise SamtoolsError( "\n".join( stderr ) )
 
         # call parser for stdout:
         if stderr: raise SamtoolsError( "\n".join( stderr ) )
 
         # call parser for stdout:
index 242e68a402474774b414561186de6cdb6050144e..e94e0e626bde58542b961a5a6ff9fe23bae46675 100644 (file)
@@ -167,6 +167,14 @@ class StderrStore():
         self.stderr_save = Outs( sys.stderr.fileno() )
         self.stderr_save.setfd( self.stderr_h )
         
         self.stderr_save = Outs( sys.stderr.fileno() )
         self.stderr_save.setfd( self.stderr_h )
         
+    def readAndRelease( self ):
+        self.stderr_save.restore()
+        lines = []
+        if os.path.exists(self.stderr_f):
+            lines = open( self.stderr_f, "r" ).readlines()
+            os.remove( self.stderr_f )
+        return lines
+
     def release(self):
         self.stderr_save.restore()
         if os.path.exists(self.stderr_f):
     def release(self):
         self.stderr_save.restore()
         if os.path.exists(self.stderr_f):
@@ -316,7 +324,7 @@ cdef class Samfile:
 
             else:
                 # build header from a target names and lengths
 
             else:
                 # build header from a target names and lengths
-                assert referencenames and referencelengths, "either supply options `template`, `header` or  both `refernencenames` and `referencelengths` for writing"
+                assert referencenames and referencelengths, "either supply options `template`, `header` or  both `referencenames` and `referencelengths` for writing"
                 assert len(referencenames) == len(referencelengths), "unequal names and lengths of reference sequences"
 
                 # allocate and fill header
                 assert len(referencenames) == len(referencelengths), "unequal names and lengths of reference sequences"
 
                 # allocate and fill header
@@ -361,7 +369,13 @@ cdef class Samfile:
 
             store = StderrStore()
             self.samfile = samopen( filename, mode, NULL )
 
             store = StderrStore()
             self.samfile = samopen( filename, mode, NULL )
-            store.release()
+            result = store.readAndRelease()
+            # test for specific messages as open also outputs status messages
+            # that can be ignored.
+            if "[bam_header_read] invalid BAM binary header (this is not a BAM file).\n" in result:
+                raise ValueError( "invalid BAM binary header (is this a BAM file?)" )
+            elif '[samopen] no @SQ lines in the header.\n' in result:
+                raise ValueError( "no @SQ lines in the header (is this a SAM file?)")
 
         if self.samfile == NULL:
             raise IOError("could not open file `%s`" % filename )
 
         if self.samfile == NULL:
             raise IOError("could not open file `%s`" % filename )
@@ -385,6 +399,7 @@ cdef class Samfile:
     def getrname( self, tid ):
         '''(tid )
         convert numerical :term:`tid` into :ref:`reference` name.'''
     def getrname( self, tid ):
         '''(tid )
         convert numerical :term:`tid` into :ref:`reference` name.'''
+        if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
         if not 0 <= tid < self.samfile.header.n_targets:
             raise ValueError( "tid out of range 0<=tid<%i" % self.samfile.header.n_targets )
         return self.samfile.header.target_name[tid]
         if not 0 <= tid < self.samfile.header.n_targets:
             raise ValueError( "tid out of range 0<=tid<%i" % self.samfile.header.n_targets )
         return self.samfile.header.target_name[tid]
@@ -404,6 +419,9 @@ cdef class Samfile:
 
         Note that regions are 1-based, while start,end are python coordinates.
         '''
 
         Note that regions are 1-based, while start,end are python coordinates.
         '''
+        # This method's main objective is to translate from a reference to a tid. 
+        # For now, it calls bam_parse_region, which is clumsy. Might be worth
+        # implementing it all in pysam (makes use of khash).
         
         cdef int rtid
         cdef int rstart
         
         cdef int rtid
         cdef int rstart
@@ -416,14 +434,15 @@ cdef class Samfile:
         # translate to a region
         if reference:
             if start != None and end != None:
         # translate to a region
         if reference:
             if start != None and end != None:
+                if start > end: raise ValueError( 'invalid region: start (%i) > end (%i)' % (start, end) )
                 region = "%s:%i-%i" % (reference, start+1, end)
             else:
                 region = reference
 
         if region:
                 region = "%s:%i-%i" % (reference, start+1, end)
             else:
                 region = reference
 
         if region:
-            store = StderrStore()
+            # this function might be called often (multiprocessing)
+            # thus avoid using StderrStore, see issue 46.
             bam_parse_region( self.samfile.header, region, &rtid, &rstart, &rend)        
             bam_parse_region( self.samfile.header, region, &rtid, &rstart, &rend)        
-            store.release()
             if rtid < 0: raise ValueError( "invalid region `%s`" % region )
             if rstart > rend: raise ValueError( 'invalid region: start (%i) > end (%i)' % (rstart, rend) )
             if not 0 <= rstart < max_pos: raise ValueError( 'start out of range (%i)' % rstart )
             if rtid < 0: raise ValueError( "invalid region `%s`" % region )
             if rstart > rend: raise ValueError( 'invalid region: start (%i) > end (%i)' % (rstart, rend) )
             if not 0 <= rstart < max_pos: raise ValueError( 'start out of range (%i)' % rstart )
@@ -442,6 +461,8 @@ cdef class Samfile:
 
     def tell( self ):
         '''return current file position'''
 
     def tell( self ):
         '''return current file position'''
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
         if not self.isbam:
             raise NotImplementedError("seek only available in bam files")
 
         if not self.isbam:
             raise NotImplementedError("seek only available in bam files")
 
@@ -605,6 +626,9 @@ cdef class Samfile:
 
         return the number of bytes written.
         '''
 
         return the number of bytes written.
         '''
+        if not self._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+
         return samwrite( self.samfile, read._delegate )
 
     def __enter__(self):
         return samwrite( self.samfile, read._delegate )
 
     def __enter__(self):
@@ -617,11 +641,13 @@ cdef class Samfile:
     property nreferences:
         '''number of :term:`reference` sequences in the file.'''
         def __get__(self):
     property nreferences:
         '''number of :term:`reference` sequences in the file.'''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             return self.samfile.header.n_targets
 
     property references:
         """tuple with the names of :term:`reference` sequences."""
         def __get__(self): 
             return self.samfile.header.n_targets
 
     property references:
         """tuple with the names of :term:`reference` sequences."""
         def __get__(self): 
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_name[x] )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_name[x] )
@@ -631,6 +657,7 @@ cdef class Samfile:
         """tuple of the lengths of the :term:`reference` sequences. The lengths are in the same order as :attr:`pysam.Samfile.reference`
         """
         def __get__(self): 
         """tuple of the lengths of the :term:`reference` sequences. The lengths are in the same order as :attr:`pysam.Samfile.reference`
         """
         def __get__(self): 
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_len[x] )
             t = []
             for x from 0 <= x < self.samfile.header.n_targets:
                 t.append( self.samfile.header.target_len[x] )
@@ -639,6 +666,7 @@ cdef class Samfile:
     property text:
         '''full contents of the :term:`sam file` header as a string.'''
         def __get__(self):
     property text:
         '''full contents of the :term:`sam file` header as a string.'''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
             return PyString_FromStringAndSize(self.samfile.header.text, self.samfile.header.l_text)
 
     property header:
             return PyString_FromStringAndSize(self.samfile.header.text, self.samfile.header.l_text)
 
     property header:
@@ -646,6 +674,8 @@ cdef class Samfile:
         a two-level dictionary.
         '''
         def __get__(self):
         a two-level dictionary.
         '''
         def __get__(self):
+            if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
+
             result = {}
 
             if self.samfile.header.text != NULL:
             result = {}
 
             if self.samfile.header.text != NULL:
@@ -750,6 +780,7 @@ cdef class Samfile:
         return dest
 
     def __iter__(self):
         return dest
 
     def __iter__(self):
+        if not self._isOpen(): raise ValueError( "I/O operation on closed file" )
         return self 
 
     cdef bam1_t * getCurrent( self ):
         return self 
 
     cdef bam1_t * getCurrent( self ):
@@ -797,7 +828,9 @@ cdef class Fastafile:
         return self.fastafile != NULL
 
     def __len__(self):
         return self.fastafile != NULL
 
     def __len__(self):
-        assert self.fastafile != NULL
+        if self.fastafile == NULL:
+            raise ValueError( "calling len() on closed file" )
+
         return faidx_fetch_nseq(self.fastafile)
 
     def _open( self, 
         return faidx_fetch_nseq(self.fastafile)
 
     def _open( self, 
@@ -841,7 +874,7 @@ cdef class Fastafile:
         if not self._isOpen():
             raise ValueError( "I/O operation on closed file" )
 
         if not self._isOpen():
             raise ValueError( "I/O operation on closed file" )
 
-        cdef int len, max_pos
+        cdef int length, max_pos
         cdef char * seq
         max_pos = 2 << 29
 
         cdef char * seq
         max_pos = 2 << 29
 
@@ -855,23 +888,25 @@ cdef class Fastafile:
             # valid ranges are from 0 to 2^29-1
             if not 0 <= start < max_pos: raise ValueError( 'start out of range (%i)' % start )
             if not 0 <= end < max_pos: raise ValueError( 'end out of range (%i)' % end )
             # valid ranges are from 0 to 2^29-1
             if not 0 <= start < max_pos: raise ValueError( 'start out of range (%i)' % start )
             if not 0 <= end < max_pos: raise ValueError( 'end out of range (%i)' % end )
-
-            seq = faidx_fetch_seq(self.fastafile, reference, 
+            seq = faidx_fetch_seq(self.fastafile, 
+                                  reference, 
                                   start,
                                   start,
-                                  end-1, &len)
+                                  end-1, 
+                                  &length)
         else:
             # samtools adds a '\0' at the end
         else:
             # samtools adds a '\0' at the end
-            seq = fai_fetch( self.fastafile, region, &len )
+            seq = fai_fetch( self.fastafile, region, &length )
 
         # copy to python
         if seq == NULL: 
             return ""
         else:
 
         # copy to python
         if seq == NULL: 
             return ""
         else:
-            result = seq
-            # clean up
-            free(seq)
-        
-        return result
+            try:
+                py_seq = PyString_FromStringAndSize(seq, length)
+            finally:
+                free(seq)
+
+        return py_seq
 
 ###########################################################################
 ###########################################################################
 
 ###########################################################################
 ###########################################################################
@@ -903,8 +938,11 @@ cdef class IteratorRow:
 
     def __cinit__(self, Samfile samfile, int tid, int beg, int end ):
 
 
     def __cinit__(self, Samfile samfile, int tid, int beg, int end ):
 
-        assert samfile._isOpen()
-        assert samfile._hasIndex()
+        if not samfile._isOpen():
+            raise ValueError( "I/O operation on closed file" )
+        
+        if not samfile._hasIndex():
+            raise ValueError( "no index available for pileup" )
         
         # makes sure that samfile stays alive as long as the
         # iterator is alive
         
         # makes sure that samfile stays alive as long as the
         # iterator is alive
@@ -958,7 +996,8 @@ cdef class IteratorRowAll:
 
     def __cinit__(self, Samfile samfile):
 
 
     def __cinit__(self, Samfile samfile):
 
-        assert samfile._isOpen()
+        if not samfile._isOpen():
+            raise ValueError( "I/O operation on closed file" )
 
         if samfile.isbam: mode = "rb"
         else: mode = "r"
 
         if samfile.isbam: mode = "rb"
         else: mode = "r"
@@ -1480,6 +1519,7 @@ cdef class AlignedRead:
 
     property tags:
         """the tags in the AUX field.
 
     property tags:
         """the tags in the AUX field.
+
         This property permits convenience access to 
         the tags. Changes it the returned list will
         not update the tags automatically. Instead,
         This property permits convenience access to 
         the tags. Changes it the returned list will
         not update the tags automatically. Instead,
@@ -1493,63 +1533,49 @@ cdef class AlignedRead:
             cdef char * ctag
             cdef bam1_t * src
             cdef uint8_t * s
             cdef char * ctag
             cdef bam1_t * src
             cdef uint8_t * s
-            cdef char tpe
+            cdef char auxtag[3]
+            cdef char auxtype
             
             src = self._delegate
             if src.l_aux == 0: return None
             
             s = bam1_aux( src )
             result = []
             
             src = self._delegate
             if src.l_aux == 0: return None
             
             s = bam1_aux( src )
             result = []
-            ctag = <char*>calloc( 3, sizeof(char) )
-            cdef int x
+            auxtag[2] = 0
             while s < (src.data + src.data_len):
                 # get tag
             while s < (src.data + src.data_len):
                 # get tag
-                ctag[0] = s[0]
-                ctag[1] = s[1]
-                pytag = ctag
-
+                auxtag[0] = s[0]
+                auxtag[1] = s[1]
                 s += 2
                 s += 2
+                auxtype = s[0]
 
 
-                # convert type - is there a better way?
-                ctag[0] = s[0]
-                ctag[1] = 0
-                pytype = ctag
-                # get type and value 
-                # how do I do char literal comparison in cython?
-                # the code below works (i.e, is C comparison)
-                tpe = toupper(s[0])
-                if tpe == 'S':
+                if auxtype in ('c', 'C'):
                     value = <int>bam_aux2i(s)            
                     value = <int>bam_aux2i(s)            
-                    s += 2
-                elif tpe == 'I':
+                    s += 1
+                elif auxtype in ('s', 'S'):
                     value = <int>bam_aux2i(s)            
                     value = <int>bam_aux2i(s)            
+                    s += 2
+                elif auxtype in ('i', 'I'):
+                    value = <float>bam_aux2i(s)
                     s += 4
                     s += 4
-                elif tpe == 'F':
+                elif auxtype == 'f':
                     value = <float>bam_aux2f(s)
                     s += 4
                     value = <float>bam_aux2f(s)
                     s += 4
-                elif tpe == 'D':
+                elif auxtype == 'd':
                     value = <double>bam_aux2d(s)
                     s += 8
                     value = <double>bam_aux2d(s)
                     s += 8
-                elif tpe == 'C':
-                    value = <int>bam_aux2i(s)
-                    s += 1
-                elif tpe == 'A':
-                    # there might a more efficient way
-                    # to convert a char into a string
+                elif auxtype == 'A':
                     value = "%c" % <char>bam_aux2A(s)
                     s += 1
                     value = "%c" % <char>bam_aux2A(s)
                     s += 1
-                elif tpe == 'Z':
+                elif auxtype in ('Z', 'H'):
                     value = <char*>bam_aux2Z(s)
                     # +1 for NULL terminated string
                     s += len(value) + 1
                     value = <char*>bam_aux2Z(s)
                     # +1 for NULL terminated string
                     s += len(value) + 1
-
-                # skip over type
+                 # 
                 s += 1
                 s += 1
+  
+                result.append( (auxtag, value) )
 
 
-                # ignore pytype
-                result.append( (pytag, value) )
-
-            free( ctag )
             return result
 
         def __set__(self, tags):
             return result
 
         def __set__(self, tags):
@@ -1557,51 +1583,57 @@ cdef class AlignedRead:
             cdef bam1_t * src
             cdef uint8_t * s
             cdef uint8_t * new_data
             cdef bam1_t * src
             cdef uint8_t * s
             cdef uint8_t * new_data
+            cdef char * temp 
             cdef int guessed_size, control_size
             cdef int guessed_size, control_size
+            cdef int max_size, size, offset
+
             src = self._delegate
             src = self._delegate
-            cdef int max_size, size
             max_size = 4000
             max_size = 4000
-
-            # map samtools code to python.struct code and byte size
-            buffer = ctypes.create_string_buffer(max_size)
-
             offset = 0
             offset = 0
-            for pytag, value in tags:
-                t = type(value)
-                if t == types.FloatType:
-                    fmt = "<cccf"
-                elif t == types.IntType:
-                    if value < 0:
-                        if value >= -127: fmt, pytype = "<cccb", 'c'
-                        elif value >= -32767: fmt, pytype = "<ccch", 's'
-                        elif value < -2147483648: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
-                        else: fmt, ctype = "<ccci", 'i'[0]
-                    else:
-                        if value <= 255: fmt, pytype = "<cccB", 'C'
-                        elif value <= 65535: fmt, pytype = "<cccH", 'S'
-                        elif value > 4294967295: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
-                        else: fmt, pytype = "<cccI", 'I'
-                else:
-                    # Note: hex strings (H) are not supported yet
-                    if len(value) == 1:
-                        fmt, pytype = "<cccc", 'A'
+
+            if tags != None: 
+
+                # map samtools code to python.struct code and byte size
+                buffer = ctypes.create_string_buffer(max_size)
+
+                for pytag, value in tags:
+                    t = type(value)
+                    if t == types.FloatType:
+                        fmt = "<cccf"
+                    elif t == types.IntType:
+                        if value < 0:
+                            if value >= -127: fmt, pytype = "<cccb", 'c'
+                            elif value >= -32767: fmt, pytype = "<ccch", 's'
+                            elif value < -2147483648: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
+                            else: fmt, ctype = "<ccci", 'i'[0]
+                        else:
+                            if value <= 255: fmt, pytype = "<cccB", 'C'
+                            elif value <= 65535: fmt, pytype = "<cccH", 'S'
+                            elif value > 4294967295: raise ValueError( "integer %i out of range of BAM/SAM specification" % value )
+                            else: fmt, pytype = "<cccI", 'I'
                     else:
                     else:
-                        fmt, pytype = "<ccc%is" % (len(value)+1), 'Z'
-
-                size = struct.calcsize(fmt)
-                if offset + size > max_size:
-                    raise NotImplementedError("tags field too large")
-
-                struct.pack_into( fmt,
-                                  buffer,
-                                  offset,
-                                  pytag[0],
-                                  pytag[1],
-                                  pytype,
-                                  value )
-                offset += size
+                        # Note: hex strings (H) are not supported yet
+                        if len(value) == 1:
+                            fmt, pytype = "<cccc", 'A'
+                        else:
+                            fmt, pytype = "<ccc%is" % (len(value)+1), 'Z'
+
+                    size = struct.calcsize(fmt)
+                    if offset + size > max_size:
+                        raise NotImplementedError("tags field too large")
+
+                    struct.pack_into( fmt,
+                                      buffer,
+                                      offset,
+                                      pytag[0],
+                                      pytag[1],
+                                      pytype,
+                                      value )
+                    offset += size
             
             # delete the old data and allocate new
             
             # delete the old data and allocate new
+            # if offset == 0, the aux field will be 
+            # empty
             pysam_bam_update( src, 
                               src.l_aux,
                               offset,
             pysam_bam_update( src, 
                               src.l_aux,
                               offset,
@@ -1609,15 +1641,15 @@ cdef class AlignedRead:
             
             src.l_aux = offset
 
             
             src.l_aux = offset
 
-            if offset == 0: return
+            # copy data only if there is any
+            if offset != 0:
 
 
-            # get location of new data
-            s = bam1_aux( src )            
+                # get location of new data
+                s = bam1_aux( src )            
             
             
-            # check if there is direct path from buffer.raw to tmp
-            cdef char * temp 
-            temp = buffer.raw
-            memcpy( s, temp, offset )            
+                # check if there is direct path from buffer.raw to tmp
+                temp = buffer.raw
+                memcpy( s, temp, offset )            
 
     property flag: 
         """properties flag"""
 
     property flag: 
         """properties flag"""
index 5965c7cca61d3c074cbf22b79afee14e501bd50f..3b6c92a463d6d126297bba4de0ad4c086dba7b64 100644 (file)
@@ -1,6 +1,6 @@
 # pysam versioning information
 
 # pysam versioning information
 
-__version__ = "0.3"
+__version__ = "0.3.1"
 
 __samtools_version__ = "0.1.8"
 
 
 __samtools_version__ = "0.1.8"
 
index 925f0166e5f7314b5091a9363aeaa4c2cfe21c75..ef3d29f47e1e92fb383320dad8947ecf42b8d933 100644 (file)
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@ import version
 
 version = version.__version__
 
 
 version = version.__version__
 
-samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c" )
+samtools_exclude = ( "bamtk.c", "razip.c", "bgzip.c", "errmod.c", "bam_reheader.c", "bam2bcf.c" )
 samtools_dest = os.path.abspath( "samtools" )
 tabix_exclude = ( "main.c", )
 tabix_dest = os.path.abspath( "tabix" )
 samtools_dest = os.path.abspath( "samtools" )
 tabix_exclude = ( "main.c", )
 tabix_dest = os.path.abspath( "tabix" )
@@ -109,7 +109,8 @@ metadata = {
       "pysam/namedtuple",
       "pysam/version" ],
     'ext_modules': [samtools, tabix],
       "pysam/namedtuple",
       "pysam/version" ],
     'ext_modules': [samtools, tabix],
-    'cmdclass' : {'build_ext': build_ext} }
+    'cmdclass' : {'build_ext': build_ext},
+    }
 
 if __name__=='__main__':
    dist = setup(**metadata)
 
 if __name__=='__main__':
    dist = setup(**metadata)
index 54037509eded5fc31fac1b66580f6df04c5f1c86..38d30f0ccf480f42f9651579297733c541bbc0a3 100644 (file)
@@ -4,7 +4,8 @@ all: ex1.glf ex1.pileup.gz ex1.bam.bai ex1.glfview.gz \
        ex3.bam ex3.bam.bai \
        ex4.bam ex4.bam.bai \
        ex5.bam ex5.bam.bai \
        ex3.bam ex3.bam.bai \
        ex4.bam ex4.bam.bai \
        ex5.bam ex5.bam.bai \
-       ex6.bam 
+       ex6.bam \
+       ex8.bam
 
 ex2.sam.gz: ex1.bam ex1.bam.bai
                samtools view -h ex1.bam | gzip > ex2.sam.gz
 
 ex2.sam.gz: ex1.bam ex1.bam.bai
                samtools view -h ex1.bam | gzip > ex2.sam.gz
diff --git a/tests/ex8.sam b/tests/ex8.sam
new file mode 100644 (file)
index 0000000..5a16b4f
--- /dev/null
@@ -0,0 +1,3 @@
+@HD    VN:1.0
+@SQ    SN:2    LN:48297693
+GJP00TM04CAQ5W 0       2       38297693        60      45H51M1D13M1D12M1D9M2D5M1D7M4D2M1I6M1D28M1D5M1D2M1D18M55H       *       0       0       CATGAAGAACCGCTGGGTATGGAGCACACCTCACCTGATGGACAGTTGATTATGCTCACCTTAACGCTAATTGAGAGCAGCACAAGAGGACTGGAAACTAGAATTTACTCCTCATCTCCGAAGATGTGAATATTCTAAATTCAGCTTGCCTCTTGCTTC IID7757111/=;?///:D>777;EEGAAAEEIHHIIIIIIIIIIIIIIBBBIIIIH==<<<DDGEEE;<<<A><<<DEDDA>>>D?1112544556::03---//25.22=;DD?;;;>BDDDEEEGGGA<888<BAA888<GGGGGEB?9::DD551 NM:i:15 MD:Z:51^T13^A12^A9^AA5^A7^AAAA8^T28^T5^A2^T18   RG:Z:GJP00TM04
index 5be5ca4c783f74cef3ebf775681ef1f95a619269..aefcbb94e2a60fcdd651ae7695c2e22d9834952e 100755 (executable)
@@ -100,11 +100,17 @@ class BinaryTest(unittest.TestCase):
                 ("ex1.view", "samtools view ex1.bam > ex1.view"),
                 ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),
                 ),
                 ("ex1.view", "samtools view ex1.bam > ex1.view"),
                 ("pysam_ex1.view", (pysam.view, "ex1.bam" ) ),
                 ),
+          "view2" :
+        (
+                ("ex1.view", "samtools view -bT ex1.fa -o ex1.view2 ex1.sam"),
+                # note that -o ex1.view2 throws exception.
+                ("pysam_ex1.view", (pysam.view, "-bT ex1.fa -oex1.view2 ex1.sam" ) ),
+                ),
         }
 
     # some tests depend on others. The order specifies in which order
     # the samtools commands are executed.
         }
 
     # some tests depend on others. The order specifies in which order
     # the samtools commands are executed.
-    mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view' )
+    mOrder = ('faidx', 'import', 'index', 'pileup1', 'pileup2', 'glfview', 'view', 'view2' )
 
     def setUp( self ):
         '''setup tests. 
 
     def setUp( self ):
         '''setup tests. 
@@ -247,11 +253,22 @@ class IOTest(unittest.TestCase):
         samfile.close()
         self.assertRaises( ValueError, samfile.fetch, 'chr1', 100, 120)
 
         samfile.close()
         self.assertRaises( ValueError, samfile.fetch, 'chr1', 100, 120)
 
-    def testPileupFromClosedFile( self ):
+    def testClosedFile( self ):
+        '''test that access to a closed samfile raises ValueError.'''
 
         samfile = pysam.Samfile( "ex1.bam", "rb" )
         samfile.close()
 
         samfile = pysam.Samfile( "ex1.bam", "rb" )
         samfile.close()
+        self.assertRaises( ValueError, samfile.fetch, 'chr1', 100, 120)
         self.assertRaises( ValueError, samfile.pileup, 'chr1', 100, 120)
         self.assertRaises( ValueError, samfile.pileup, 'chr1', 100, 120)
+        self.assertRaises( ValueError, samfile.getrname, 0 )
+        self.assertRaises( ValueError, samfile.tell )
+        self.assertRaises( ValueError, samfile.write, None )
+        self.assertRaises( ValueError, samfile.seek, 0 )
+        self.assertRaises( ValueError, getattr, samfile, "nreferences" )
+        self.assertRaises( ValueError, getattr, samfile, "references" )
+        self.assertRaises( ValueError, getattr, samfile, "lengths" )
+        self.assertRaises( ValueError, getattr, samfile, "text" )
+        self.assertRaises( ValueError, getattr, samfile, "header" )
 
     def testBinaryReadFromSamfile( self ):
         pass
 
     def testBinaryReadFromSamfile( self ):
         pass
@@ -267,12 +284,6 @@ class IOTest(unittest.TestCase):
         self.assertRaises( ValueError, samfile.fetch )
         self.assertEqual( len(list( samfile.fetch(until_eof = True) )), 3270 )
 
         self.assertRaises( ValueError, samfile.fetch )
         self.assertEqual( len(list( samfile.fetch(until_eof = True) )), 3270 )
 
-    def testReadingFromFileWithWrongMode( self ):
-
-        assert not os.path.exists( "ex2.bam.bai" )
-        samfile = pysam.Samfile( "ex2.bam", "r" )
-        self.assertRaises( ValueError, samfile.fetch )
-
 class TestIteratorRow(unittest.TestCase):
 
     def setUp(self):
 class TestIteratorRow(unittest.TestCase):
 
     def setUp(self):
@@ -602,9 +613,29 @@ class TestExceptions(unittest.TestCase):
     def testOutOfRangeLargeOldFormat(self):
         self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" )
 
     def testOutOfRangeLargeOldFormat(self):
         self.assertRaises( ValueError, self.samfile.fetch, "chr1:99999999999999999-999999999999999999" )
 
+    def testZeroToZero(self):        
+        '''see issue 44'''
+        self.assertEqual( len(list(self.samfile.fetch('chr1', 0, 0))), 0)
+
     def tearDown(self):
         self.samfile.close()
 
     def tearDown(self):
         self.samfile.close()
 
+
+class TestWrongFormat(unittest.TestCase):
+    '''test cases for opening files not in bam/sam format.'''
+
+    def testOpenSamAsBam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.sam', 'rb' )
+
+    def testOpenBamAsSam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.bam', 'r' )
+
+    def testOpenFastaAsSam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.fa', 'r' )
+
+    def testOpenFastaAsBam( self ):
+        self.assertRaises( ValueError, pysam.Samfile, 'ex1.fa', 'rb' )
+
 class TestFastaFile(unittest.TestCase):
 
     mSequences = { 'chr1' :
 class TestFastaFile(unittest.TestCase):
 
     mSequences = { 'chr1' :
@@ -691,7 +722,7 @@ class TestAlignedRead(unittest.TestCase):
         a.mpos=200
         a.isize=167
        a.qual="1234" * 3
         a.mpos=200
         a.isize=167
        a.qual="1234" * 3
-
+        # todo: create tags
         return a
 
     def testUpdate( self ):
         return a
 
     def testUpdate( self ):
@@ -760,6 +791,19 @@ class TestAlignedRead(unittest.TestCase):
 
         return a
 
 
         return a
 
+    def testTagParsing( self ):
+        '''test for tag parsing
+
+        see http://groups.google.com/group/pysam-user-group/browse_thread/thread/67ca204059ea465a
+        '''
+        samfile=pysam.Samfile( "ex8.bam","rb" )
+
+        for entry in samfile:
+            before = entry.tags
+            entry.tags = entry.tags
+            after = entry.tags
+            self.assertEqual( after, before )
+
 class TestDeNovoConstruction(unittest.TestCase):
     '''check BAM/SAM file construction using ex3.sam
     
 class TestDeNovoConstruction(unittest.TestCase):
     '''check BAM/SAM file construction using ex3.sam
     
@@ -874,6 +918,7 @@ class TestDeNovoConstruction(unittest.TestCase):
         
         os.unlink( tmpfilename )
 
         
         os.unlink( tmpfilename )
 
+
 class TestDoubleFetch(unittest.TestCase):
     '''check if two iterators on the same bamfile are independent.'''
     
 class TestDoubleFetch(unittest.TestCase):
     '''check if two iterators on the same bamfile are independent.'''