add --clean option to runfolder

author Diane Trout <diane@caltech.edu>

Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)

committer Diane Trout <diane@caltech.edu>

Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
author Diane Trout <diane@caltech.edu>
Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
committer Diane Trout <diane@caltech.edu>
Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py

index 08d3d415c6d4052a8cef291b03cb12caafd1e34e..f9adcd85bbdad0f82cf33672498e4a254b66cfd6 100644 (file)
--- a/htsworkflow/pipelines/eland.py
+++ b/htsworkflow/pipelines/eland.py
@@ -358,7 +358,6 @@ def check_for_eland_file(basedir, pattern, lane_id, end):
         logging.info('found eland file in %s' % (pathname,))
         return pathname
     else:
-       logging.info('no eland file in %s' % (pathname,))
         return None
  
  def eland(gerald_dir, gerald=None, genome_maps=None):
@@ -399,7 +398,9 @@ def eland(gerald_dir, gerald=None, genome_maps=None):
                      if pathname is not None:
                        break
                  else:
+                    logging.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end))
                      continue
+
                  # yes the lane_id is also being computed in ElandLane._update
                  # I didn't want to clutter up my constructor
                  # but I needed to persist the sample_name/lane_id for
diff --git a/htsworkflow/pipelines/firecrest.py b/htsworkflow/pipelines/firecrest.py

index e8d0a2898c0c4d5621732a2fd0abdb5778dcdfd1..fe5d01a68d6a3fce7a42377165c770291c574d42 100644 (file)
--- a/htsworkflow/pipelines/firecrest.py
+++ b/htsworkflow/pipelines/firecrest.py
@@ -102,6 +102,7 @@ def firecrest(pathname):
      Examine the directory at pathname and initalize a Firecrest object
      """
      f = Firecrest()
+    f.pathname = pathname
  
      # parse firecrest directory name
      path, name = os.path.split(pathname)
diff --git a/htsworkflow/pipelines/ipar.py b/htsworkflow/pipelines/ipar.py

index a9544203afb7ee255c834f07e83ecbed91c39e67..239239ecbf17aa1247d6185d8dd528b0e87eb11d 100644 (file)
--- a/htsworkflow/pipelines/ipar.py
+++ b/htsworkflow/pipelines/ipar.py
@@ -189,6 +189,7 @@ def ipar(pathname):
      """
      logging.info("Searching IPAR directory")
      i = IPAR()
+    i.pathname = pathname
  
      # parse firecrest directory name
      path, name = os.path.split(pathname)
diff --git a/htsworkflow/pipelines/runfolder.py b/htsworkflow/pipelines/runfolder.py

index 097a61732884a7d2b90983cf01ff8ed61000e295..56453aff13bfc1441d16d130d08c12f28fdf5914 100644 (file)
--- a/htsworkflow/pipelines/runfolder.py
+++ b/htsworkflow/pipelines/runfolder.py
@@ -442,18 +442,52 @@ def extract_results(runs, output_base_dir=None):
                  logging.info('Saving to %s' % (dest_name, ))
                  bzip.wait()
  
-def clean_runs(runs):
+def rm_list(files, dry_run=True):
+    for f in files:
+        if os.path.exists(f):
+            logging.info('deleting %s' % (f,))
+            if not dry_run:
+                if os.path.isdir(f):
+                    shutil.rmtree(f)
+                else:
+                    os.unlink(f)
+        else:
+            logging.warn("%s doesn't exist."% (f,))
+
+def clean_runs(runs, dry_run=True):
      """
      Clean up run folders to optimize for compression.
      """
-    # TODO: implement this.
-    # rm RunLog*.xml
-    # rm pipeline_*.txt
-    # rm gclog.txt
-    # rm NetCopy.log
-    # rm nfn.log
-    # rm Images/L*
-    # cd Data/C1-*_Firecrest*
-    # make clean_intermediate
-
-    pass
+    if dry_run:
+        logging.info('In dry-run mode')
+
+    for run in runs:
+        logging.info('Cleaninging %s' % (run.pathname,))
+        # rm RunLog*.xml
+        runlogs = glob(os.path.join(run.pathname, 'RunLog*xml'))
+        rm_list(runlogs, dry_run)
+        # rm pipeline_*.txt
+        pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt'))
+        rm_list(pipeline_logs, dry_run)
+        # rm gclog.txt?
+        # rm NetCopy.log? Isn't this robocopy?
+        logs = glob(os.path.join(run.pathname, '*.log'))
+        rm_list(logs, dry_run)
+        # rm nfn.log?
+        # Calibration
+        calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
+        rm_list(calibration_dir, dry_run)
+        # rm Images/L*
+        logging.info("Cleaning images")
+        image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
+        rm_list(image_dirs, dry_run)
+        # cd Data/C1-*_Firecrest*
+        logging.info("Cleaning intermediate files")
+        # make clean_intermediate
+        if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
+            clean_process = subprocess.Popen(['make', 'clean_intermediate'], 
+                                             cwd=run.image_analysis.pathname,)
+            clean_process.wait()
+
+
+
diff --git a/scripts/runfolder b/scripts/runfolder

index 628050e704b342a0c7d70fea4fdedddda6e23110..1380fbf7fde7affcd856d2884af1889811ba3fe2 100644 (file)
--- a/scripts/runfolder
+++ b/scripts/runfolder
@@ -43,6 +43,9 @@ def make_parser():
      parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
                        default=False,
                        help='turn on verbose mode')
+    parser.add_option('--dry-run', action='store_true', default=False,
+                      help="Don't delete anything (in clean mode)")
+
      commands = optparse.OptionGroup(parser, 'Commands')
  
      commands.add_option('-s', '--summary', dest='summary', action='store_true',
@@ -55,6 +58,8 @@ def make_parser():
             default=False,
             help='create run-xml summary, compress the eland result files, and '
                  'copy them and the Summary.htm file into archival directory.')
+    commands.add_option('-c', '--clean', action='store_true', default=False,
+                        help='Clean runfolder, preparing it for long-term storage')
      parser.add_option_group(commands)
  
      parser.add_option('-o', '--output-dir', default=None,
@@ -116,6 +121,10 @@ def main(cmdlist=None):
          if opt.extract_results:
              runfolder.extract_results(runs, opt.output_dir)
              command_run = True
+        if opt.clean:
+            runfolder.clean_runs(runs, opt.dry_run)
+            command_run = True
+
          if command_run == False:
              print "You need to specify a command."+os.linesep
              parser.print_help()
author	Diane Trout <diane@caltech.edu>
	Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
committer	Diane Trout <diane@caltech.edu>
	Fri, 24 Apr 2009 23:37:35 +0000 (23:37 +0000)
htsworkflow/pipelines/eland.py		patch \| blob \| history
htsworkflow/pipelines/firecrest.py		patch \| blob \| history
htsworkflow/pipelines/ipar.py		patch \| blob \| history
htsworkflow/pipelines/runfolder.py		patch \| blob \| history
scripts/runfolder		patch \| blob \| history