From b93111b63970f848146d8e4e4bceecb2276c7570 Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Fri, 24 Apr 2009 23:37:35 +0000 Subject: [PATCH] add --clean option to runfolder this deletes the various log files as well as the images and calibration directories. Also it runs make clean_intermediate in the firecrest/ipar directory (if the makefile exists) Perhaps it should delete the bustard & gerald directories? Or should I delete them after I've archived the useful parts. --- htsworkflow/pipelines/eland.py | 3 +- htsworkflow/pipelines/firecrest.py | 1 + htsworkflow/pipelines/ipar.py | 1 + htsworkflow/pipelines/runfolder.py | 58 +++++++++++++++++++++++------- scripts/runfolder | 9 +++++ 5 files changed, 59 insertions(+), 13 deletions(-) diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index 08d3d41..f9adcd8 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -358,7 +358,6 @@ def check_for_eland_file(basedir, pattern, lane_id, end): logging.info('found eland file in %s' % (pathname,)) return pathname else: - logging.info('no eland file in %s' % (pathname,)) return None def eland(gerald_dir, gerald=None, genome_maps=None): @@ -399,7 +398,9 @@ def eland(gerald_dir, gerald=None, genome_maps=None): if pathname is not None: break else: + logging.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end)) continue + # yes the lane_id is also being computed in ElandLane._update # I didn't want to clutter up my constructor # but I needed to persist the sample_name/lane_id for diff --git a/htsworkflow/pipelines/firecrest.py b/htsworkflow/pipelines/firecrest.py index e8d0a28..fe5d01a 100644 --- a/htsworkflow/pipelines/firecrest.py +++ b/htsworkflow/pipelines/firecrest.py @@ -102,6 +102,7 @@ def firecrest(pathname): Examine the directory at pathname and initalize a Firecrest object """ f = Firecrest() + f.pathname = pathname # parse firecrest directory name path, name = os.path.split(pathname) diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py index a954420..239239e 100644 --- a/htsworkflow/pipelines/ipar.py +++ b/htsworkflow/pipelines/ipar.py @@ -189,6 +189,7 @@ def ipar(pathname): """ logging.info("Searching IPAR directory") i = IPAR() + i.pathname = pathname # parse firecrest directory name path, name = os.path.split(pathname) diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py index 097a617..56453af 100644 --- a/htsworkflow/pipelines/runfolder.py +++ b/htsworkflow/pipelines/runfolder.py @@ -442,18 +442,52 @@ def extract_results(runs, output_base_dir=None): logging.info('Saving to %s' % (dest_name, )) bzip.wait() -def clean_runs(runs): +def rm_list(files, dry_run=True): + for f in files: + if os.path.exists(f): + logging.info('deleting %s' % (f,)) + if not dry_run: + if os.path.isdir(f): + shutil.rmtree(f) + else: + os.unlink(f) + else: + logging.warn("%s doesn't exist."% (f,)) + +def clean_runs(runs, dry_run=True): """ Clean up run folders to optimize for compression. """ - # TODO: implement this. - # rm RunLog*.xml - # rm pipeline_*.txt - # rm gclog.txt - # rm NetCopy.log - # rm nfn.log - # rm Images/L* - # cd Data/C1-*_Firecrest* - # make clean_intermediate - - pass + if dry_run: + logging.info('In dry-run mode') + + for run in runs: + logging.info('Cleaninging %s' % (run.pathname,)) + # rm RunLog*.xml + runlogs = glob(os.path.join(run.pathname, 'RunLog*xml')) + rm_list(runlogs, dry_run) + # rm pipeline_*.txt + pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt')) + rm_list(pipeline_logs, dry_run) + # rm gclog.txt? + # rm NetCopy.log? Isn't this robocopy? + logs = glob(os.path.join(run.pathname, '*.log')) + rm_list(logs, dry_run) + # rm nfn.log? + # Calibration + calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*')) + rm_list(calibration_dir, dry_run) + # rm Images/L* + logging.info("Cleaning images") + image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*')) + rm_list(image_dirs, dry_run) + # cd Data/C1-*_Firecrest* + logging.info("Cleaning intermediate files") + # make clean_intermediate + if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')): + clean_process = subprocess.Popen(['make', 'clean_intermediate'], + cwd=run.image_analysis.pathname,) + clean_process.wait() + + + diff --git a/scripts/runfolder b/scripts/runfolder index 628050e..1380fbf 100644 --- a/scripts/runfolder +++ b/scripts/runfolder @@ -43,6 +43,9 @@ def make_parser(): parser.add_option('-v', '--verbose', dest='verbose', action='store_true', default=False, help='turn on verbose mode') + parser.add_option('--dry-run', action='store_true', default=False, + help="Don't delete anything (in clean mode)") + commands = optparse.OptionGroup(parser, 'Commands') commands.add_option('-s', '--summary', dest='summary', action='store_true', @@ -55,6 +58,8 @@ def make_parser(): default=False, help='create run-xml summary, compress the eland result files, and ' 'copy them and the Summary.htm file into archival directory.') + commands.add_option('-c', '--clean', action='store_true', default=False, + help='Clean runfolder, preparing it for long-term storage') parser.add_option_group(commands) parser.add_option('-o', '--output-dir', default=None, @@ -116,6 +121,10 @@ def main(cmdlist=None): if opt.extract_results: runfolder.extract_results(runs, opt.output_dir) command_run = True + if opt.clean: + runfolder.clean_runs(runs, opt.dry_run) + command_run = True + if command_run == False: print "You need to specify a command."+os.linesep parser.print_help() -- 2.30.2