from htsworkflow.util.alphanum import alphanum
from htsworkflow.util.ethelp import indent, flatten
+from htsworkflow.pipelines import srf
+
class PipelineRun(object):
"""
Capture "interesting" information about a pipeline run
else:
return False
-def extract_results(runs, output_base_dir=None):
+def save_summary_file(gerald_object, cycle_dir):
+ # Copy Summary.htm
+ summary_path = os.path.join(gerald_object.pathname, 'Summary.htm')
+ if os.path.exists(summary_path):
+ logging.info('Copying %s to %s' % (summary_path, cycle_dir))
+ shutil.copy(summary_path, cycle_dir)
+ else:
+ logging.info('Summary file %s was not found' % (summary_path,))
+
+
+def compress_score_files(gerald_object, cycle_dir):
+ """
+ Compress score files into our result directory
+ """
+ # check for g.pathname/Temp a new feature of 1.1rc1
+ scores_path = gerald_object.pathname
+ scores_path_temp = os.path.join(scores_path, 'Temp')
+ if os.path.isdir(scores_path_temp):
+ scores_path = scores_path_temp
+
+ # hopefully we have a directory that contains s_*_score files
+ score_files = []
+ for f in os.listdir(scores_path):
+ if re.match('.*_score.txt', f):
+ score_files.append(f)
+
+ tar_cmd = ['/bin/tar', 'c'] + score_files
+ bzip_cmd = [ 'bzip2', '-9', '-c' ]
+ tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
+ tar_dest = open(tar_dest_name, 'w')
+ logging.info("Compressing score files from %s" % (scores_path,))
+ logging.info("Running tar: " + " ".join(tar_cmd[:10]))
+ logging.info("Running bzip2: " + " ".join(bzip_cmd))
+ logging.info("Writing to %s" %(tar_dest_name))
+
+ env = {'BZIP': '-9'}
+ tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, env=env,
+ cwd=scores_path)
+ bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
+ tar.wait()
+
+def compress_eland_results(gerald_object, cycle_dir):
+ """
+ Compress eland result files into the archive directory
+ """
+ # copy & bzip eland files
+ for lanes_dictionary in gerald_object.eland_results.results:
+ for eland_lane in lanes_dictionary.values():
+ source_name = eland_lane.pathname
+ path, name = os.path.split(eland_lane.pathname)
+ dest_name = os.path.join(cycle_dir, name)
+ logging.info("Saving eland file %s to %s" % \
+ (source_name, dest_name))
+
+ if is_compressed(name):
+ logging.info('Already compressed, Saving to %s' % (dest_name, ))
+ shutil.copy(source_name, dest_name)
+ else:
+ # not compressed
+ dest_name += '.bz2'
+ args = ['bzip2', '-9', '-c', source_name]
+ logging.info('Running: %s' % ( " ".join(args) ))
+ bzip_dest = open(dest_name, 'w')
+ bzip = subprocess.Popen(args, stdout=bzip_dest)
+ logging.info('Saving to %s' % (dest_name, ))
+ bzip.wait()
+
+def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1):
if output_base_dir is None:
output_base_dir = os.getcwd()
# save run file
r.save(cycle_dir)
- # Copy Summary.htm
- summary_path = os.path.join(r.gerald.pathname, 'Summary.htm')
- if os.path.exists(summary_path):
- logging.info('Copying %s to %s' % (summary_path, cycle_dir))
- shutil.copy(summary_path, cycle_dir)
- else:
- logging.info('Summary file %s was not found' % (summary_path,))
-
+ # save summary file
+ save_summary_file(g, cycle_dir)
+
# tar score files
- score_files = []
-
- # check for g.pathname/Temp a new feature of 1.1rc1
- scores_path = g.pathname
- scores_path_temp = os.path.join(scores_path, 'Temp')
- if os.path.isdir(scores_path_temp):
- scores_path = scores_path_temp
-
- # hopefully we have a directory that contains s_*_score files
- for f in os.listdir(scores_path):
- if re.match('.*_score.txt', f):
- score_files.append(f)
-
- tar_cmd = ['/bin/tar', 'c'] + score_files
- bzip_cmd = [ 'bzip2', '-9', '-c' ]
- tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2')
- tar_dest = open(tar_dest_name, 'w')
- logging.info("Compressing score files from %s" % (scores_path,))
- logging.info("Running tar: " + " ".join(tar_cmd[:10]))
- logging.info("Running bzip2: " + " ".join(bzip_cmd))
- logging.info("Writing to %s" %(tar_dest_name))
-
- env = {'BZIP': '-9'}
- tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, env=env,
- cwd=scores_path)
- bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
- tar.wait()
-
- # copy & bzip eland files
- for lanes_dictionary in g.eland_results.results:
- for eland_lane in lanes_dictionary.values():
- source_name = eland_lane.pathname
- path, name = os.path.split(eland_lane.pathname)
- dest_name = os.path.join(cycle_dir, name)
- logging.info("Saving eland file %s to %s" % \
- (source_name, dest_name))
-
- if is_compressed(name):
- logging.info('Already compressed, Saving to %s' % (dest_name, ))
- shutil.copy(source_name, dest_name)
- else:
- # not compressed
- dest_name += '.bz2'
- args = ['bzip2', '-9', '-c', source_name]
- logging.info('Running: %s' % ( " ".join(args) ))
- bzip_dest = open(dest_name, 'w')
- bzip = subprocess.Popen(args, stdout=bzip_dest)
- logging.info('Saving to %s' % (dest_name, ))
- bzip.wait()
-
+ compress_score_files(g, cycle_dir)
+
+ # compress eland result files
+ compress_eland_results(g, cycle_dir)
+
+ # build srf commands
+ lanes = range(1,9)
+ srf_cmds = srf.make_commands(r.pathname, lanes, "woldlab", cycle_dir)
+ srf.run_srf_commands(r.bustard.pathname, srf_cmds, 2)
+
def rm_list(files, dry_run=True):
for f in files:
if os.path.exists(f):
--- /dev/null
+import logging
+import os
+
+from htsworkflow.util import queuecommands
+
+SOLEXA2SRF = 0
+ILLUMINA2SRF10 = 1
+ILLUMINA2SRF11 = 2
+
+def pathname_to_run_name(base):
+ """
+ Convert a pathname to a base runfolder name
+ handle the case with a trailing /
+ """
+ name = ""
+ while len(name) == 0:
+ base, name = os.path.split(base)
+ if len(base) == 0:
+ return None
+ return name
+
+def make_commands(run_name, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
+ """
+ make a subprocess-friendly list of command line arguments to run solexa2srf
+ generates files like:
+ woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
+ site run name lane
+
+ run_name - most of the file name (run folder name is a good choice)
+ lanes - list of integers corresponding to which lanes to process
+ site_name - name of your "sequencing site" or "Individual"
+ destdir - where to write all the srf files
+ """
+ # clean up pathname
+ logging.info("run_name %s" % ( run_name, ))
+
+ cmd_list = []
+ for lane in lanes:
+ name_prefix = '%s_%%l_%%t_' % (run_name,)
+ destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
+ destdir = os.path.normpath(destdir)
+ dest_path = os.path.join(destdir, destname)
+ seq_pattern = 's_%d_*_seq.txt' % (lane,)
+
+ if cmdlevel == SOLEXA2SRF:
+ cmd = ['solexa2srf',
+ '-N', name_prefix,
+ '-n', '%3x:%3y',
+ '-o', dest_path,
+ seq_pattern]
+ elif cmdlevel == ILLUMINA2SRF10:
+ cmd = ['illumina2srf',
+ '-v1.0',
+ '-o', dest_path,
+ seq_pattern]
+ elif cmdlevel == ILLUMINA2SRF11:
+ seq_pattern = 's_%d_*_qseq.txt' % (lane,)
+ cmd = ['illumina2srf',
+ '-o', dest_path,
+ seq_pattern]
+ else:
+ raise ValueError("Unrecognized run level %d" % (cmdlevel,))
+
+ logging.info("Generated command: " + " ".join(cmd))
+ cmd_list.append(" ".join(cmd))
+ return cmd_list
+
+def run_srf_commands(bustard_dir, cmd_list, num_jobs):
+ logging.info("chdir to %s" % (bustard_dir,))
+ curdir = os.getcwd()
+ os.chdir(bustard_dir)
+ q = queuecommands.QueueCommands(cmd_list, num_jobs)
+ q.run()
+ os.chdir(curdir)
+
help='generate run configuration archive')
commands.add_option('--extract-results', action='store_true',
default=False,
- help='create run-xml summary, compress the eland result files, and '
- 'copy them and the Summary.htm file into archival directory.')
+ help='create run-xml summary, compress the eland result files, build srf files and '
+ 'copy all that and the Summary.htm file into an archival directory.')
commands.add_option('-c', '--clean', action='store_true', default=False,
help='Clean runfolder, preparing it for long-term storage')
parser.add_option_group(commands)
+ parser.add_option('-j', '--max-jobs', default=1,
+ help='sepcify the maximum number of processes to run '
+ '(used in extract-results)')
parser.add_option('-o', '--output-dir', default=None,
help="specify the default output directory for extract results")
-
+ parser.add_option('--run-xml', dest='run_xml',
+ default=None,
+ help='specify a run_<FlowCell>.xml file for summary reports')
+ parser.add_option('--site', default='individual',
+ help='specify the site name for srf files')
parser.add_option('-u', '--use-run', dest='use_run', default=None,
help='Specify which run to use instead of autoscanning '
'the runfolder. You do this by providing the final '
' GERALD directory, and it assumes the parent '
'directories are the bustard and image processing '
'directories.')
-
- parser.add_option('--run-xml', dest='run_xml',
- default=None,
- help='specify a run_<FlowCell>.xml file for summary reports')
-
return parser
runfolder.extract_run_parameters(runs)
command_run = True
if opt.extract_results:
- runfolder.extract_results(runs, opt.output_dir)
+ runfolder.extract_results(runs, opt.output_dir, opt.site, opt.max_jobs)
command_run = True
if opt.clean:
runfolder.clean_runs(runs, opt.dry_run)
#!/usr/bin/python
-from glob import glob
import logging
import optparse
import os
-import subprocess
import sys
-from htsworkflow.util import queuecommands
from htsworkflow.pipelines import runfolder
-
-SOLEXA2SRF = 0
-ILLUMINA2SRF10 = 1
-ILLUMINA2SRF11 = 2
-
-def make_commands(run_name, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11):
- """
- make a subprocess-friendly list of command line arguments to run solexa2srf
- generates files like:
- woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf
- site run name lane
-
- run_name - most of the file name (run folder name is a good choice)
- lanes - list of integers corresponding to which lanes to process
- site_name - name of your "sequencing site" or "Individual"
- destdir - where to write all the srf files
- """
- cmd_list = []
- for lane in lanes:
- name_prefix = '%s_%%l_%%t_' % (run_name,)
- destname = '%s_%s_%d.srf' % (site_name, run_name, lane)
- destdir = os.path.normpath(destdir)
- dest_path = os.path.join(destdir, destname)
- seq_pattern = 's_%d_*_seq.txt' % (lane,)
-
- if cmdlevel == SOLEXA2SRF:
- cmd = ['solexa2srf',
- '-N', name_prefix,
- '-n', '%3x:%3y',
- '-o', dest_path,
- seq_pattern]
- elif cmdlevel == ILLUMINA2SRF10:
- cmd = ['illumina2srf',
- '-v1.0',
- '-o', dest_path,
- seq_pattern]
- elif cmdlevel == ILLUMINA2SRF11:
- seq_pattern = 's_%d_*_qseq.txt' % (lane,)
- cmd = ['illumina2srf',
- '-o', dest_path,
- seq_pattern]
- else:
- raise ValueError("Unrecognized run level %d" % (cmdlevel,))
-
- cmd_list.append(" ".join(cmd))
- return cmd_list
-
-def pathname_to_run_name(base):
- """
- Convert a pathname to a base runfolder name
- handle the case with a trailing /
- """
- name = ""
- while len(name) == 0:
- base, name = os.path.split(base)
- if len(base) == 0:
- return None
- return name
+from htsworkflow.pipelines.srf import make_commands, run_srf_commands
+from htsworkflow.pipelines.srf import ILLUMINA2SRF10, ILLUMINA2SRF11, SOLEXA2SRF
def make_parser():
usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]'
for runfolder_path, lanes in zip(args, lanes_list):
# normalize paths, either relative to home dirs or current dir
runfolder_path = os.path.abspath(runfolder_path)
- # the last part of the path should be a runfolder name
- name = pathname_to_run_name(runfolder_path)
+ run_name = pathname_to_run_name(runfolder_path)
# so any bustard directories?
runs = runfolder.get_runs(runfolder_path)
# give up if there are anything other than 1 run
return 1
elif len(runs) == 1:
bustard_dir = runs[0].bustard.pathname
- cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir, opts.runfolder_version)
+ cmds[bustard_dir] = make_commands(run_name, lanes, opts.site, opts.dest_dir, opts.runfolder_version)
else:
print "ERROR: Couldn't find a bustard directory in", runfolder_path
return 1
if not opts.dry_run:
for cwd, cmd_list in cmds.items():
- curdir = os.getcwd()
- os.chdir(cwd)
- q = queuecommands.QueueCommands(cmd_list, opts.jobs)
- q.run()
- os.chdir(curdir)
+ run_srf_command(cwd, cmd_list, opts.jobs)
else:
for cwd, cmd_list in cmds.items():
print cwd