import sys
from htsworkflow.util.opener import autoopen
-from htsworkflow.version import version
+from htsworkflow.util.version import version
+
+LOGGER = logging.getLogger(__name__)
# constants for our fastq finite state machine
FASTQ_HEADER = 0
logging.basicConfig(level=logging.WARN)
if opts.version:
- print version()
+ print(version())
return 0
if len(args) != 1:
else:
left = open_write(opts.left, opts.force)
right = open_write(opts.right, opts.force)
-
+
# open the srf, fastq, or compressed fastq
if is_srf(args[0]):
- source = srf_open(args[0])
+ source = srf_open(args[0], opts.srf2fastq, opts.cnf1)
else:
source = autoopen(args[0])
convert_single_to_fastq(source, left, header)
else:
convert_single_to_two_fastq(source, left, right, opts.mid, header)
-
+
return 0
def make_parser():
help="add flowcell id header to sequence")
parser.add_option('-l','--left', default="r1.fastq",
help='left side filename')
- parser.add_option('-m','--mid', default=None,
+ parser.add_option('-m','--mid', default=None,
help='actual sequence mid point')
parser.add_option('-r','--right', default="r2.fastq",
help='right side filename')
help="show information about what we're doing.")
parser.add_option('--version', default=False, action="store_true",
help="Report software version")
+ parser.add_option('--cnf1', default=False, action="store_true",
+ help="Force cnf1 mode in srf2fastq")
+ parser.add_option('--srf2fastq', default='srf2fastq',
+ help='specify srf2fastq command')
return parser
-def srf_open(filename, cnf1=False):
+def srf_open(filename, srf2fastq_cmd, cnf1=False):
"""
Make a stream from srf file using srf2fastq
"""
- cmd = ['srf2fastq']
- if is_cnf1(filename):
+ if not os.path.exists(srf2fastq_cmd):
+ LOGGER.error("srf command: %s doesn't exist" % (srf2fastq_cmd,))
+ cmd = [srf2fastq_cmd]
+ if cnf1 or is_cnf1(filename):
cmd.append('-c')
cmd.append(filename)
-
- logging.info('srf command: %s' % (" ".join(cmd),))
+
+ LOGGER.info('srf command: %s' % (" ".join(cmd),))
p = Popen(cmd, stdout=PIPE)
return p.stdout
-
-def convert_single_to_fastq(instream, target1, header=''):
+def convert_single_to_fastq(instream, target1, header=''):
state = FASTQ_HEADER
for line in instream:
line = line.strip()
state = FASTQ_SEQUENCE_HEADER
# quality header
elif state == FASTQ_SEQUENCE_HEADER:
- # the sequence header isn't really sequence, but
+ # the sequence header isn't really sequence, but
# we're just passing it through
write_sequence(target1, line)
state = FASTQ_QUALITY
raise RuntimeError("Unrecognized STATE in fastq split")
-
+
def convert_single_to_two_fastq(instream, target1, target2, mid=None, header=''):
"""
- read a fastq file where two paired ends have been run together into
+ read a fastq file where two paired ends have been run together into
two halves.
instream is the source stream
state = FASTQ_SEQUENCE_HEADER
# quality header
elif state == FASTQ_SEQUENCE_HEADER:
- # the sequence header isn't really sequence, but
+ # the sequence header isn't really sequence, but
# we're just passing it through
write_sequence(target1, line)
write_sequence(target2, line)
"""
max_header = 1024 ** 2
PROGRAM_ID = 'PROGRAM_ID\000'
- cnf4_apps = set(("solexa2srf v1.4",
+ cnf4_apps = set(("solexa2srf v1.4",
"illumina2srf v1.11.5.Illumina.1.3"))
if not is_srf(filename):
target2_name = base + '_r2.fastq'
for target_name in [target1_name, target2_name]:
- print 'target name', target_name
+ print('target name', target_name)
if os.path.exists(target_name):
raise RuntimeError("%s exists" % (target_name,))