add --clean option to runfolder
authorDiane Trout <diane@caltech.edu>
Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
committerDiane Trout <diane@caltech.edu>
Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
this deletes the various log files as well as the images and calibration
directories.
Also it runs make clean_intermediate in the firecrest/ipar directory
(if the makefile exists)
Perhaps it should delete the bustard & gerald directories?
Or should I delete them after I've archived the useful parts.

htsworkflow/pipelines/eland.py
htsworkflow/pipelines/firecrest.py
htsworkflow/pipelines/ipar.py
htsworkflow/pipelines/runfolder.py
scripts/runfolder

index 08d3d415c6d4052a8cef291b03cb12caafd1e34e..f9adcd85bbdad0f82cf33672498e4a254b66cfd6 100644 (file)
@@ -358,7 +358,6 @@ def check_for_eland_file(basedir, pattern, lane_id, end):
        logging.info('found eland file in %s' % (pathname,))
        return pathname
    else:
-       logging.info('no eland file in %s' % (pathname,))
        return None
 
 def eland(gerald_dir, gerald=None, genome_maps=None):
@@ -399,7 +398,9 @@ def eland(gerald_dir, gerald=None, genome_maps=None):
                     if pathname is not None:
                       break
                 else:
+                    logging.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end))
                     continue
+
                 # yes the lane_id is also being computed in ElandLane._update
                 # I didn't want to clutter up my constructor
                 # but I needed to persist the sample_name/lane_id for
index e8d0a2898c0c4d5621732a2fd0abdb5778dcdfd1..fe5d01a68d6a3fce7a42377165c770291c574d42 100644 (file)
@@ -102,6 +102,7 @@ def firecrest(pathname):
     Examine the directory at pathname and initalize a Firecrest object
     """
     f = Firecrest()
+    f.pathname = pathname
 
     # parse firecrest directory name
     path, name = os.path.split(pathname)
index a9544203afb7ee255c834f07e83ecbed91c39e67..239239ecbf17aa1247d6185d8dd528b0e87eb11d 100644 (file)
@@ -189,6 +189,7 @@ def ipar(pathname):
     """
     logging.info("Searching IPAR directory")
     i = IPAR()
+    i.pathname = pathname
 
     # parse firecrest directory name
     path, name = os.path.split(pathname)
index 097a61732884a7d2b90983cf01ff8ed61000e295..56453aff13bfc1441d16d130d08c12f28fdf5914 100644 (file)
@@ -442,18 +442,52 @@ def extract_results(runs, output_base_dir=None):
                 logging.info('Saving to %s' % (dest_name, ))
                 bzip.wait()
 
-def clean_runs(runs):
+def rm_list(files, dry_run=True):
+    for f in files:
+        if os.path.exists(f):
+            logging.info('deleting %s' % (f,))
+            if not dry_run:
+                if os.path.isdir(f):
+                    shutil.rmtree(f)
+                else:
+                    os.unlink(f)
+        else:
+            logging.warn("%s doesn't exist."% (f,))
+
+def clean_runs(runs, dry_run=True):
     """
     Clean up run folders to optimize for compression.
     """
-    # TODO: implement this.
-    # rm RunLog*.xml
-    # rm pipeline_*.txt
-    # rm gclog.txt
-    # rm NetCopy.log
-    # rm nfn.log
-    # rm Images/L*
-    # cd Data/C1-*_Firecrest*
-    # make clean_intermediate
-
-    pass
+    if dry_run:
+        logging.info('In dry-run mode')
+
+    for run in runs:
+        logging.info('Cleaninging %s' % (run.pathname,))
+        # rm RunLog*.xml
+        runlogs = glob(os.path.join(run.pathname, 'RunLog*xml'))
+        rm_list(runlogs, dry_run)
+        # rm pipeline_*.txt
+        pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt'))
+        rm_list(pipeline_logs, dry_run)
+        # rm gclog.txt?
+        # rm NetCopy.log? Isn't this robocopy?
+        logs = glob(os.path.join(run.pathname, '*.log'))
+        rm_list(logs, dry_run)
+        # rm nfn.log?
+        # Calibration
+        calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
+        rm_list(calibration_dir, dry_run)
+        # rm Images/L*
+        logging.info("Cleaning images")
+        image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
+        rm_list(image_dirs, dry_run)
+        # cd Data/C1-*_Firecrest*
+        logging.info("Cleaning intermediate files")
+        # make clean_intermediate
+        if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
+            clean_process = subprocess.Popen(['make', 'clean_intermediate'], 
+                                             cwd=run.image_analysis.pathname,)
+            clean_process.wait()
+
+
+
index 628050e704b342a0c7d70fea4fdedddda6e23110..1380fbf7fde7affcd856d2884af1889811ba3fe2 100644 (file)
@@ -43,6 +43,9 @@ def make_parser():
     parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                       default=False,
                       help='turn on verbose mode')
+    parser.add_option('--dry-run', action='store_true', default=False,
+                      help="Don't delete anything (in clean mode)")
+
     commands = optparse.OptionGroup(parser, 'Commands')
 
     commands.add_option('-s', '--summary', dest='summary', action='store_true',
@@ -55,6 +58,8 @@ def make_parser():
            default=False,
            help='create run-xml summary, compress the eland result files, and '
                 'copy them and the Summary.htm file into archival directory.')
+    commands.add_option('-c', '--clean', action='store_true', default=False,
+                        help='Clean runfolder, preparing it for long-term storage')
     parser.add_option_group(commands)
 
     parser.add_option('-o', '--output-dir', default=None,
@@ -116,6 +121,10 @@ def main(cmdlist=None):
         if opt.extract_results:
             runfolder.extract_results(runs, opt.output_dir)
             command_run = True
+        if opt.clean:
+            runfolder.clean_runs(runs, opt.dry_run)
+            command_run = True
+
         if command_run == False:
             print "You need to specify a command."+os.linesep
             parser.print_help()