+import optparse
from glob import glob
import logging
import os
import shutil
from htsworkflow.util import queuecommands
+from htsworkflow.pipelines.samplekey import SampleKey
LOGGER = logging.getLogger(__name__)
make a subprocess-friendly list of command line arguments to run solexa2srf
generates files like:
woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
- site run name lane
+ site run name lane
run_name - most of the file name (run folder name is a good choice)
lanes - list of integers corresponding to which lanes to process
LOGGER.info("run_name %s" % (run_name,))
cmd_list = []
- for lane in lanes:
+ for key in lanes:
+ if not isinstance(key, SampleKey):
+ errmsg = "Expected %s got %s"
+ raise ValueError(errmsg % (str(SampleKey), str(type(key))))
name_prefix = '%s_%%l_' % (run_name,)
- destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
+ destname = '%s_%s_%d.srf' % (site_name, run_name, key.lane)
destdir = os.path.normpath(destdir)
dest_path = os.path.join(destdir, destname)
- seq_pattern = 's_%d_*_seq.txt' % (lane,)
+ seq_pattern = 's_%d_*_seq.txt' % (key.lane,)
if cmdlevel == SOLEXA2SRF:
cmd = ['solexa2srf',
'-o', dest_path,
seq_pattern]
elif cmdlevel == ILLUMINA2SRF11:
- seq_pattern = 's_%d_*_qseq.txt' % (lane,)
+ seq_pattern = 's_%d_*_qseq.txt' % (key.lane,)
cmd = ['illumina2srf',
'-o', dest_path,
seq_pattern]
LOGGER.info("run_name %s" % (run_name,))
cmd_list = []
- for lane in lanes:
+ for key in lanes:
+ if not isinstance(key, SampleKey):
+ errmsg = "Expected %s got %s"
+ raise ValueError(errmsg % (str(SampleKey), str(type(key))))
name_prefix = '%s_%%l_%%t_' % (run_name,)
destdir = os.path.normpath(destdir)
qseq_patterns = create_qseq_patterns(bustard_dir)
for read, pattern in qseq_patterns:
if read is None:
- destname = '%s_%s_l%d.tar.bz2' % (site_name, run_name, lane)
+ destname = '%s_%s_l%d.tar.bz2' % (site_name, run_name, key.lane)
dest_path = os.path.join(destdir, destname)
else:
- destname = '%s_%s_l%d_r%d.tar.bz2' % (site_name, run_name, lane, read)
+ destname = '%s_%s_l%d_r%d.tar.bz2' % (site_name, run_name, key.lane, read)
dest_path = os.path.join(destdir, destname)
- cmd = " ".join(['tar', 'cjf', dest_path, pattern % (lane,) ])
+ cmd = " ".join(['tar', 'cjf', dest_path, pattern % (key.lane,) ])
LOGGER.info("Generated command: " + cmd)
cmd_list.append(cmd)
return cmd_list
+def main(cmdline=None):
+ parser = make_parser()
+ opts, args = parser.parse_args(cmdline)
+
+ logging.basicConfig(level = logging.DEBUG)
+ if not opts.name:
+ parser.error("Specify run name. Usually runfolder name")
+ if not opts.destination:
+ parser.error("Specify where to write sequence files")
+ if not opts.site_name:
+ parser.error("Specify site name")
+ if len(args) != 1:
+ parser.error("Can only process one directory")
+
+ source = args[0]
+ LOGGER.info("Raw Format is: %s" % (opts.format, ))
+ seq_cmds = []
+ if opts.format == 'fastq':
+ LOGGER.info("raw data = %s" % (source,))
+ copy_hiseq_project_fastqs(opts.name, source, opts.site_name, opts.destination)
+ elif opts.format == 'qseq':
+ seq_cmds = make_qseq_commands(opts.name, source, opts.lanes, opts.site_name, opts.destination)
+ elif opts.format == 'srf':
+ seq_cmds = make_srf_commands(opts.name, source, opts.lanes, opts.site_name, opts.destination, 0)
+ else:
+ raise ValueError('Unknown --format=%s' % (opts.format))
+ print seq_cmds
+ srf.run_commands(args.source, seq_cmds, num_jobs)
+
+def make_parser():
+ parser = optparse.OptionParser()
+ parser.add_option('-f', '--format', default='fastq',
+ help="Format raw data is in")
+ parser.add_option('-n', '--name', default=None,
+ help="Specify run name")
+ parser.add_option('-d', '--destination', default=None,
+ help='specify where to write files (cycle dir)')
+ parser.add_option('-s', '--site-name', default=None,
+ help="specify site name")
+ parser.add_option('-l', '--lanes', default="1,2,3,4,5,6,7,8",
+ help="what lanes to process, defaults to all")
+ return parser
+if __name__ == "__main__":
+ main()