#!/usr/bin/env python
import logging
+import mmap
from optparse import OptionParser
import os
from subprocess import Popen, PIPE
opts, args = parser.parse_args(cmdline)
if len(args) != 1:
- parser.error("Requires one argument")
+ parser.error("Requires one argument, got: %s" % (str(args)))
if opts.verbose:
logging.basicConfig(level=logging.INFO)
# open the srf, fastq, or compressed fastq
if is_srf(args[0]):
- source = srf_open(args[0], opts.cnf1)
+ source = srf_open(args[0])
else:
source = autoopen(args[0])
file can be either a fastq file or a srf file.
You can also force the flowcell ID to be added to the header.""")
- parser.add_option('-c','--cnf1',default=False, action="store_true",
- help="pass -c to srf2fastq, needed for calibrated quality values"
- )
+
parser.add_option('--force', default=False, action="store_true",
help="overwrite existing files.")
parser.add_option('--flowcell', default=None,
"""
cmd = ['srf2fastq']
- if cnf1:
+ if is_cnf1(filename):
cmd.append('-c')
cmd.append(filename)
f.close()
return header == "SSRF"
+def is_cnf1(filename):
+ """
+ Brute force detection if a SRF file is using CNF1/CNF4 records
+ """
+ max_header = 1024 ** 2
+ PROGRAM_ID = 'PROGRAM_ID\000'
+
+ if not is_srf(filename):
+ raise ValueError("%s must be a srf file" % (filename,))
+
+ fd = os.open(filename, os.O_RDONLY)
+ f = mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
+ # alas the max search length requires python 2.6+
+ program_id_location = f.find(PROGRAM_ID, 0) #, max_header)
+ program_header_start = program_id_location+len(PROGRAM_ID)
+ next_null = f.find('\000', program_header_start) #, max_header)
+ program_id_header = f[program_header_start:next_null]
+ f.close()
+ os.close(fd)
+
+ if program_id_header == "solexa2srf v1.4":
+ return False
+ else:
+ return True
+
def open_write(filename, force=False):
"""
Open a file, but throw an exception if it already exists
self.failUnlessEqual(lines1[2].rstrip(), '+')
self.failUnlessEqual(lines1[3].rstrip(), '@IIIB+++')
+ def test_is_srf(self):
+ cnf4_srf = 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'
+ cnf4_path = os.path.join(_module_path, cnf4_srf)
+ cnf1_srf = 'woldlab_090512_HWI-EAS229_0114_428NNAAXX_5.srf'
+ cnf1_path = os.path.join(_module_path, cnf1_srf)
+
+ is_srf = srf2named_fastq.is_srf
+ self.failUnlessEqual(is_srf(__file__), False)
+ self.failUnlessEqual(is_srf(cnf4_path), True)
+ self.failUnlessEqual(is_srf(cnf1_path), True)
+
+ def test_is_cnf1(self):
+ cnf4_srf = 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'
+ cnf4_path = os.path.join(_module_path, cnf4_srf)
+ cnf1_srf = 'woldlab_090512_HWI-EAS229_0114_428NNAAXX_5.srf'
+ cnf1_path = os.path.join(_module_path, cnf1_srf)
+
+ is_cnf1 = srf2named_fastq.is_cnf1
+ self.failUnlessRaises(ValueError, is_cnf1, __file__)
+ self.failUnlessEqual(is_cnf1(cnf4_path), False)
+ self.failUnlessEqual(is_cnf1(cnf1_path), True)
def suite():
return unittest.makeSuite(testSrf2Fastq,'test')