return runs
+def get_specific_run(gerald_dir):
+ """
+ Given a gerald directory, construct a PipelineRun out of its parents
+
+ Basically this allows specifying a particular run instead of the previous
+ get_runs which scans a runfolder for various combinations of
+ firecrest/ipar/bustard/gerald runs.
+ """
+ from htsworkflow.pipelines import firecrest
+ from htsworkflow.pipelines import ipar
+ from htsworkflow.pipelines import bustard
+ from htsworkflow.pipelines import gerald
+
+ bustard_dir = os.path.abspath(os.path.join(gerald_dir, '..'))
+ image_dir = os.path.abspath(os.path.join(gerald_dir, '..', '..'))
+
+ runfolder_dir = os.path.abspath(os.path.join(image_dir, '..','..'))
+
+ logging.info('--- use-run detected options ---')
+ logging.info('runfolder: %s' % (runfolder_dir,))
+ logging.info('image_dir: %s' % (image_dir,))
+ logging.info('bustard_dir: %s' % (bustard_dir,))
+ logging.info('gerald_dir: %s' % (gerald_dir,))
+
+ # find our processed image dir
+ image_run = None
+ # split into parent, and leaf directory
+ # leaf directory should be an IPAR or firecrest directory
+ data_dir, short_image_dir = os.path.split(image_dir)
+ logging.info('data_dir: %s' % (data_dir,))
+ logging.info('short_iamge_dir: %s' %(short_image_dir,))
+
+ # guess which type of image processing directory we have by looking
+ # in the leaf directory name
+ if re.search('Firecrest', short_image_dir, re.IGNORECASE) is not None:
+ image_run = firecrest.firecrest(image_dir)
+ elif re.search('IPAR', short_image_dir, re.IGNORECASE) is not None:
+ image_run = ipar.ipar(image_dir)
+ # if we din't find a run, report the error and return
+ if image_run is None:
+ msg = '%s does not contain an image processing step' % (image_dir,)
+ logging.error(msg)
+ return None
+
+ # find our base calling
+ base_calling_run = bustard.bustard(bustard_dir)
+ if base_calling_run is None:
+ logging.error('%s does not contain a bustard run' % (bustard_dir,))
+ return None
+
+ # find alignments
+ gerald_run = gerald.gerald(gerald_dir)
+ if gerald_run is None:
+ logging.error('%s does not contain a gerald run' % (gerald_dir,))
+ return None
+
+ p = PipelineRun(runfolder_dir)
+ p.image_analysis = image_run
+ p.bustard = base_calling_run
+ p.gerald = gerald_run
+
+ logging.info('Constructed PipelineRun from %s' % (gerald_dir,))
+ return p
def extract_run_parameters(runs):
"""
logging.info('Saving to %s' % (dest_name, ))
bzip.wait()
-def clean_runs(runs):
+def rm_list(files, dry_run=True):
+ for f in files:
+ if os.path.exists(f):
+ logging.info('deleting %s' % (f,))
+ if not dry_run:
+ if os.path.isdir(f):
+ shutil.rmtree(f)
+ else:
+ os.unlink(f)
+ else:
+ logging.warn("%s doesn't exist."% (f,))
+
+def clean_runs(runs, dry_run=True):
"""
Clean up run folders to optimize for compression.
"""
- # TODO: implement this.
- # rm RunLog*.xml
- # rm pipeline_*.txt
- # rm gclog.txt
- # rm NetCopy.log
- # rm nfn.log
- # rm Images/L*
- # cd Data/C1-*_Firecrest*
- # make clean_intermediate
-
- pass
+ if dry_run:
+ logging.info('In dry-run mode')
+
+ for run in runs:
+ logging.info('Cleaninging %s' % (run.pathname,))
+ # rm RunLog*.xml
+ runlogs = glob(os.path.join(run.pathname, 'RunLog*xml'))
+ rm_list(runlogs, dry_run)
+ # rm pipeline_*.txt
+ pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt'))
+ rm_list(pipeline_logs, dry_run)
+ # rm gclog.txt?
+ # rm NetCopy.log? Isn't this robocopy?
+ logs = glob(os.path.join(run.pathname, '*.log'))
+ rm_list(logs, dry_run)
+ # rm nfn.log?
+ # Calibration
+ calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
+ rm_list(calibration_dir, dry_run)
+ # rm Images/L*
+ logging.info("Cleaning images")
+ image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
+ rm_list(image_dirs, dry_run)
+ # cd Data/C1-*_Firecrest*
+ logging.info("Cleaning intermediate files")
+ # make clean_intermediate
+ if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
+ clean_process = subprocess.Popen(['make', 'clean_intermediate'],
+ cwd=run.image_analysis.pathname,)
+ clean_process.wait()
+
+
+