From b93111b63970f848146d8e4e4bceecb2276c7570 Mon Sep 17 00:00:00 2001
From: Diane Trout <diane@caltech.edu>
Date: Fri, 24 Apr 2009 23:37:35 +0000
Subject: [PATCH] add --clean option to runfolder this deletes the various log
 files as well as the images and calibration directories. Also it runs make
 clean_intermediate in the firecrest/ipar directory (if the makefile exists)
 Perhaps it should delete the bustard & gerald directories? Or should I delete
 them after I've archived the useful parts.

---
 htsworkflow/pipelines/eland.py     |  3 +-
 htsworkflow/pipelines/firecrest.py |  1 +
 htsworkflow/pipelines/ipar.py      |  1 +
 htsworkflow/pipelines/runfolder.py | 58 +++++++++++++++++++++++-------
 scripts/runfolder                  |  9 +++++
 5 files changed, 59 insertions(+), 13 deletions(-)

diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py
index 08d3d41..f9adcd8 100644
--- a/htsworkflow/pipelines/eland.py
+++ b/htsworkflow/pipelines/eland.py
@@ -358,7 +358,6 @@ def check_for_eland_file(basedir, pattern, lane_id, end):
        logging.info('found eland file in %s' % (pathname,))
        return pathname
    else:
-       logging.info('no eland file in %s' % (pathname,))
        return None
 
 def eland(gerald_dir, gerald=None, genome_maps=None):
@@ -399,7 +398,9 @@ def eland(gerald_dir, gerald=None, genome_maps=None):
                     if pathname is not None:
                       break
                 else:
+                    logging.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end))
                     continue
+
                 # yes the lane_id is also being computed in ElandLane._update
                 # I didn't want to clutter up my constructor
                 # but I needed to persist the sample_name/lane_id for
diff --git a/htsworkflow/pipelines/firecrest.py b/htsworkflow/pipelines/firecrest.py
index e8d0a28..fe5d01a 100644
--- a/htsworkflow/pipelines/firecrest.py
+++ b/htsworkflow/pipelines/firecrest.py
@@ -102,6 +102,7 @@ def firecrest(pathname):
     Examine the directory at pathname and initalize a Firecrest object
     """
     f = Firecrest()
+    f.pathname = pathname
 
     # parse firecrest directory name
     path, name = os.path.split(pathname)
diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py
index a954420..239239e 100644
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -189,6 +189,7 @@ def ipar(pathname):
     """
     logging.info("Searching IPAR directory")
     i = IPAR()
+    i.pathname = pathname
 
     # parse firecrest directory name
     path, name = os.path.split(pathname)
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py
index 097a617..56453af 100644
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -442,18 +442,52 @@ def extract_results(runs, output_base_dir=None):
                 logging.info('Saving to %s' % (dest_name, ))
                 bzip.wait()
 
-def clean_runs(runs):
+def rm_list(files, dry_run=True):
+    for f in files:
+        if os.path.exists(f):
+            logging.info('deleting %s' % (f,))
+            if not dry_run:
+                if os.path.isdir(f):
+                    shutil.rmtree(f)
+                else:
+                    os.unlink(f)
+        else:
+            logging.warn("%s doesn't exist."% (f,))
+
+def clean_runs(runs, dry_run=True):
     """
     Clean up run folders to optimize for compression.
     """
-    # TODO: implement this.
-    # rm RunLog*.xml
-    # rm pipeline_*.txt
-    # rm gclog.txt
-    # rm NetCopy.log
-    # rm nfn.log
-    # rm Images/L*
-    # cd Data/C1-*_Firecrest*
-    # make clean_intermediate
-
-    pass
+    if dry_run:
+        logging.info('In dry-run mode')
+
+    for run in runs:
+        logging.info('Cleaninging %s' % (run.pathname,))
+        # rm RunLog*.xml
+        runlogs = glob(os.path.join(run.pathname, 'RunLog*xml'))
+        rm_list(runlogs, dry_run)
+        # rm pipeline_*.txt
+        pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt'))
+        rm_list(pipeline_logs, dry_run)
+        # rm gclog.txt?
+        # rm NetCopy.log? Isn't this robocopy?
+        logs = glob(os.path.join(run.pathname, '*.log'))
+        rm_list(logs, dry_run)
+        # rm nfn.log?
+        # Calibration
+        calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
+        rm_list(calibration_dir, dry_run)
+        # rm Images/L*
+        logging.info("Cleaning images")
+        image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
+        rm_list(image_dirs, dry_run)
+        # cd Data/C1-*_Firecrest*
+        logging.info("Cleaning intermediate files")
+        # make clean_intermediate
+        if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
+            clean_process = subprocess.Popen(['make', 'clean_intermediate'], 
+                                             cwd=run.image_analysis.pathname,)
+            clean_process.wait()
+
+
+
diff --git a/scripts/runfolder b/scripts/runfolder
index 628050e..1380fbf 100644
--- a/scripts/runfolder
+++ b/scripts/runfolder
@@ -43,6 +43,9 @@ def make_parser():
     parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                       default=False,
                       help='turn on verbose mode')
+    parser.add_option('--dry-run', action='store_true', default=False,
+                      help="Don't delete anything (in clean mode)")
+
     commands = optparse.OptionGroup(parser, 'Commands')
 
     commands.add_option('-s', '--summary', dest='summary', action='store_true',
@@ -55,6 +58,8 @@ def make_parser():
            default=False,
            help='create run-xml summary, compress the eland result files, and '
                 'copy them and the Summary.htm file into archival directory.')
+    commands.add_option('-c', '--clean', action='store_true', default=False,
+                        help='Clean runfolder, preparing it for long-term storage')
     parser.add_option_group(commands)
 
     parser.add_option('-o', '--output-dir', default=None,
@@ -116,6 +121,10 @@ def main(cmdlist=None):
         if opt.extract_results:
             runfolder.extract_results(runs, opt.output_dir)
             command_run = True
+        if opt.clean:
+            runfolder.clean_runs(runs, opt.dry_run)
+            command_run = True
+
         if command_run == False:
             print "You need to specify a command."+os.linesep
             parser.print_help()
-- 
2.30.2