2 Core information needed to inspect a runfolder.
15 LOGGER = logging.getLogger(__name__)
17 from htsworkflow.pipelines import firecrest
18 from htsworkflow.pipelines import ipar
19 from htsworkflow.pipelines import bustard
20 from htsworkflow.pipelines import gerald
21 from htsworkflow.pipelines import ElementTree, \
22 EUROPEAN_STRPTIME, EUROPEAN_DATE_RE, \
23 VERSION_RE, USER_RE, \
24 LANES_PER_FLOWCELL, LANE_LIST
25 from htsworkflow.util.alphanum import alphanum
26 from htsworkflow.util.ethelp import indent, flatten
27 from htsworkflow.util.queuecommands import QueueCommands
29 from htsworkflow.pipelines import srf
31 class PipelineRun(object):
33 Capture "interesting" information about a pipeline run
36 PIPELINE_RUN = 'PipelineRun'
37 FLOWCELL_ID = 'FlowcellID'
39 def __init__(self, pathname=None, flowcell_id=None, xml=None):
40 if pathname is not None:
41 self.pathname = os.path.normpath(pathname)
45 self._flowcell_id = flowcell_id
47 self.image_analysis = None
52 self.set_elements(xml)
54 def _get_flowcell_id(self):
56 if self._flowcell_id is None:
57 self._flowcell_id = self._get_flowcell_id_from_runinfo()
58 if self._flowcell_id is None:
59 self._flowcell_id = self._get_flowcell_id_from_flowcellid()
60 if self._flowcell_id is None:
61 self._flowcell_id = self._get_flowcell_id_from_path()
62 if self._flowcell_id is None:
63 self._flowcell_id = 'unknown'
66 "Flowcell id was not found, guessing %s" % (
69 return self._flowcell_id
70 flowcell_id = property(_get_flowcell_id)
72 def _get_flowcell_id_from_flowcellid(self):
73 """Extract flowcell id from a Config/FlowcellId.xml file
75 config_dir = os.path.join(self.pathname, 'Config')
76 flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
77 if os.path.exists(flowcell_id_path):
78 flowcell_id_tree = ElementTree.parse(flowcell_id_path)
79 return flowcell_id_tree.findtext('Text')
81 def _get_flowcell_id_from_runinfo(self):
82 """Read RunInfo file for flowcell id
84 runinfo = os.path.join(self.pathname, 'RunInfo.xml')
85 if os.path.exists(runinfo):
86 tree = ElementTree.parse(runinfo)
88 fc_nodes = root.xpath('/RunInfo/Run/Flowcell')
89 if len(fc_nodes) == 1:
90 return fc_nodes[0].text
93 def _get_flowcell_id_from_path(self):
94 """Guess a flowcell name from the path
96 path_fields = self.pathname.split('_')
97 if len(path_fields) > 0:
98 # guessing last element of filename
99 return path_fields[-1]
101 def _get_runfolder_name(self):
102 if self.gerald is None:
105 return self.gerald.runfolder_name
106 runfolder_name = property(_get_runfolder_name)
108 def get_elements(self):
110 make one master xml file from all of our sub-components.
112 root = ElementTree.Element(PipelineRun.PIPELINE_RUN)
113 flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID)
114 flowcell.text = self.flowcell_id
115 root.append(self.image_analysis.get_elements())
116 root.append(self.bustard.get_elements())
117 root.append(self.gerald.get_elements())
120 def set_elements(self, tree):
121 # this file gets imported by all the others,
122 # so we need to hide the imports to avoid a cyclic imports
124 tag = tree.tag.lower()
125 if tag != PipelineRun.PIPELINE_RUN.lower():
126 raise ValueError('Pipeline Run Expecting %s got %s' % (
127 PipelineRun.PIPELINE_RUN, tag))
129 tag = element.tag.lower()
130 if tag == PipelineRun.FLOWCELL_ID.lower():
131 self._flowcell_id = element.text
132 #ok the xword.Xword.XWORD pattern for module.class.constant is lame
133 # you should only have Firecrest or IPAR, never both of them.
134 elif tag == firecrest.Firecrest.FIRECREST.lower():
135 self.image_analysis = firecrest.Firecrest(xml=element)
136 elif tag == ipar.IPAR.IPAR.lower():
137 self.image_analysis = ipar.IPAR(xml=element)
138 elif tag == bustard.Bustard.BUSTARD.lower():
139 self.bustard = bustard.Bustard(xml=element)
140 elif tag == gerald.Gerald.GERALD.lower():
141 self.gerald = gerald.Gerald(xml=element)
142 elif tag == gerald.CASAVA.GERALD.lower():
143 self.gerald = gerald.CASAVA(xml=element)
145 LOGGER.warn('PipelineRun unrecognized tag %s' % (tag,))
147 def _get_run_name(self):
149 Given a run tuple, find the latest date and use that as our name
151 if self._name is None:
152 tmax = max(self.image_analysis.time, self.bustard.time, self.gerald.time)
153 timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax))
154 self._name = 'run_' + self.flowcell_id + "_" + timestamp + '.xml'
156 name = property(_get_run_name)
158 def save(self, destdir=None):
161 LOGGER.info("Saving run report " + self.name)
162 xml = self.get_elements()
164 dest_pathname = os.path.join(destdir, self.name)
165 ElementTree.ElementTree(xml).write(dest_pathname)
167 def load(self, filename):
168 LOGGER.info("Loading run report from " + filename)
169 tree = ElementTree.parse(filename).getroot()
170 self.set_elements(tree)
172 def load_pipeline_run_xml(pathname):
174 Load and instantiate a Pipeline run from a run xml file
177 - `pathname` : location of an run xml file
179 :Returns: initialized PipelineRun object
181 tree = ElementTree.parse(pathname).getroot()
182 run = PipelineRun(xml=tree)
185 def get_runs(runfolder, flowcell_id=None):
187 Search through a run folder for all the various sub component runs
188 and then return a PipelineRun for each different combination.
190 For example if there are two different GERALD runs, this will
191 generate two different PipelineRun objects, that differ
192 in there gerald component.
194 from htsworkflow.pipelines import firecrest
195 from htsworkflow.pipelines import ipar
196 from htsworkflow.pipelines import bustard
197 from htsworkflow.pipelines import gerald
199 datadir = os.path.join(runfolder, 'Data')
201 LOGGER.info('Searching for runs in ' + datadir)
203 # scan for firecrest directories
204 for firecrest_pathname in glob(os.path.join(datadir, "*Firecrest*")):
205 LOGGER.info('Found firecrest in ' + datadir)
206 image_analysis = firecrest.firecrest(firecrest_pathname)
207 if image_analysis is None:
209 "%s is an empty or invalid firecrest directory" % (firecrest_pathname,)
212 scan_post_image_analysis(
213 runs, runfolder, datadir, image_analysis, firecrest_pathname, flowcell_id
215 # scan for IPAR directories
216 ipar_dirs = glob(os.path.join(datadir, "IPAR_*"))
217 # The Intensities directory from the RTA software looks a lot like IPAR
218 ipar_dirs.extend(glob(os.path.join(datadir, 'Intensities')))
219 for ipar_pathname in ipar_dirs:
220 LOGGER.info('Found ipar directories in ' + datadir)
221 image_analysis = ipar.ipar(ipar_pathname)
222 if image_analysis is None:
224 "%s is an empty or invalid IPAR directory" % (ipar_pathname,)
227 scan_post_image_analysis(
228 runs, runfolder, datadir, image_analysis, ipar_pathname, flowcell_id
233 def scan_post_image_analysis(runs, runfolder, datadir, image_analysis,
234 pathname, flowcell_id):
235 added = build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id)
236 # If we're a multiplexed run, don't look for older run type.
240 LOGGER.info("Looking for bustard directories in %s" % (pathname,))
241 bustard_dirs = glob(os.path.join(pathname, "Bustard*"))
242 # RTA BaseCalls looks enough like Bustard.
243 bustard_dirs.extend(glob(os.path.join(pathname, "BaseCalls")))
244 for bustard_pathname in bustard_dirs:
245 LOGGER.info("Found bustard directory %s" % (bustard_pathname,))
246 b = bustard.bustard(bustard_pathname)
247 build_gerald_runs(runs, b, image_analysis, bustard_pathname, datadir, pathname,
248 runfolder, flowcell_id)
251 def build_gerald_runs(runs, b, image_analysis, bustard_pathname, datadir, pathname, runfolder,
254 gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
255 LOGGER.info("Looking for gerald directories in %s" % (pathname,))
256 for gerald_pathname in glob(gerald_glob):
257 LOGGER.info("Found gerald directory %s" % (gerald_pathname,))
259 g = gerald.gerald(gerald_pathname)
260 p = PipelineRun(runfolder, flowcell_id)
262 p.image_analysis = image_analysis
267 LOGGER.error("Ignoring " + str(e))
268 return len(runs) - start
271 def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
273 aligned_glob = os.path.join(runfolder, 'Aligned*')
274 unaligned_glob = os.path.join(runfolder, 'Unaligned*')
276 aligned_paths = glob(aligned_glob)
277 unaligned_paths = glob(unaligned_glob)
279 matched_paths = hiseq_match_aligned_unaligned(aligned_paths, unaligned_paths)
280 LOGGER.debug("Matched HiSeq analysis: %s", str(matched_paths))
282 for aligned, unaligned in matched_paths:
283 if unaligned is None:
284 LOGGER.warn("Aligned directory %s without matching unalinged, skipping", aligned)
287 g = gerald.gerald(aligned)
288 print "scan for aligned then remove them from unaligned list"
290 p = PipelineRun(runfolder, flowcell_id)
292 p.image_analysis = image_analysis
293 p.bustard = bustard.bustard(unaligned)
295 p.gerald = gerald.gerald(aligned)
298 LOGGER.error("Ignoring " + str(e))
299 return len(runs) - start
301 def hiseq_match_aligned_unaligned(aligned, unaligned):
302 """Match aligned and unaligned folders from seperate lists
304 unaligned_suffix_re = re.compile('Unaligned(?P<suffix>[\w]*)')
306 aligned_by_suffix = build_dir_dict_by_suffix('Aligned', aligned)
307 unaligned_by_suffix = build_dir_dict_by_suffix('Unaligned', unaligned)
309 keys = set(aligned_by_suffix.keys()).union(set(unaligned_by_suffix.keys()))
313 a = aligned_by_suffix.get(key)
314 u = unaligned_by_suffix.get(key)
315 matches.append((a, u))
318 def build_dir_dict_by_suffix(prefix, dirnames):
319 """Build a dictionary indexed by suffix of last directory name.
321 It assumes a constant prefix
323 regex = re.compile('%s(?P<suffix>[\w]*)' % (prefix,))
326 for absname in dirnames:
327 basename = os.path.basename(absname)
328 match = regex.match(basename)
330 by_suffix[match.group('suffix')] = absname
333 def get_specific_run(gerald_dir):
335 Given a gerald directory, construct a PipelineRun out of its parents
337 Basically this allows specifying a particular run instead of the previous
338 get_runs which scans a runfolder for various combinations of
339 firecrest/ipar/bustard/gerald runs.
341 from htsworkflow.pipelines import firecrest
342 from htsworkflow.pipelines import ipar
343 from htsworkflow.pipelines import bustard
344 from htsworkflow.pipelines import gerald
346 gerald_dir = os.path.expanduser(gerald_dir)
347 bustard_dir = os.path.abspath(os.path.join(gerald_dir, '..'))
348 image_dir = os.path.abspath(os.path.join(gerald_dir, '..', '..'))
350 runfolder_dir = os.path.abspath(os.path.join(image_dir, '..', '..'))
352 LOGGER.info('--- use-run detected options ---')
353 LOGGER.info('runfolder: %s' % (runfolder_dir,))
354 LOGGER.info('image_dir: %s' % (image_dir,))
355 LOGGER.info('bustard_dir: %s' % (bustard_dir,))
356 LOGGER.info('gerald_dir: %s' % (gerald_dir,))
358 # find our processed image dir
360 # split into parent, and leaf directory
361 # leaf directory should be an IPAR or firecrest directory
362 data_dir, short_image_dir = os.path.split(image_dir)
363 LOGGER.info('data_dir: %s' % (data_dir,))
364 LOGGER.info('short_iamge_dir: %s' % (short_image_dir,))
366 # guess which type of image processing directory we have by looking
367 # in the leaf directory name
368 if re.search('Firecrest', short_image_dir, re.IGNORECASE) is not None:
369 image_run = firecrest.firecrest(image_dir)
370 elif re.search('IPAR', short_image_dir, re.IGNORECASE) is not None:
371 image_run = ipar.ipar(image_dir)
372 elif re.search('Intensities', short_image_dir, re.IGNORECASE) is not None:
373 image_run = ipar.ipar(image_dir)
375 # if we din't find a run, report the error and return
376 if image_run is None:
377 msg = '%s does not contain an image processing step' % (image_dir,)
381 # find our base calling
382 base_calling_run = bustard.bustard(bustard_dir)
383 if base_calling_run is None:
384 LOGGER.error('%s does not contain a bustard run' % (bustard_dir,))
388 gerald_run = gerald.gerald(gerald_dir)
389 if gerald_run is None:
390 LOGGER.error('%s does not contain a gerald run' % (gerald_dir,))
393 p = PipelineRun(runfolder_dir)
394 p.image_analysis = image_run
395 p.bustard = base_calling_run
396 p.gerald = gerald_run
398 LOGGER.info('Constructed PipelineRun from %s' % (gerald_dir,))
401 def extract_run_parameters(runs):
403 Search through runfolder_path for various runs and grab their parameters
408 def summarize_mapped_reads(genome_map, mapped_reads):
410 Summarize per chromosome reads into a genome count
411 But handle spike-in/contamination symlinks seperately.
413 summarized_reads = {}
416 for k, v in mapped_reads.items():
417 path, k = os.path.split(k)
418 if len(path) > 0 and path not in genome_map:
422 summarized_reads[k] = summarized_reads.setdefault(k, 0) + v
423 summarized_reads[genome] = genome_reads
424 return summarized_reads
426 def summarize_lane(gerald, lane_id):
428 lane_results = gerald.summary.lane_results
429 eland_result = gerald.eland_results[lane_id]
430 report.append("Sample name %s" % (eland_result.sample_name))
431 report.append("Lane id %s end %s" % (lane_id.lane, lane_id.read))
433 if lane_id.read < len(lane_results) and \
434 lane_id.lane in lane_results[lane_id.read]:
435 summary_results = lane_results[lane_id.read][lane_id.lane]
436 cluster = summary_results.cluster
437 report.append("Clusters %d +/- %d" % (cluster[0], cluster[1]))
438 report.append("Total Reads: %d" % (eland_result.reads))
440 if hasattr(eland_result, 'match_codes'):
441 mc = eland_result.match_codes
443 nm_percent = float(nm) / eland_result.reads * 100
445 qc_percent = float(qc) / eland_result.reads * 100
447 report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent))
448 report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent))
449 report.append('Unique (0,1,2 mismatches) %d %d %d' % \
450 (mc['U0'], mc['U1'], mc['U2']))
451 report.append('Repeat (0,1,2 mismatches) %d %d %d' % \
452 (mc['R0'], mc['R1'], mc['R2']))
454 if hasattr(eland_result, 'genome_map'):
455 report.append("Mapped Reads")
456 mapped_reads = summarize_mapped_reads(eland_result.genome_map,
457 eland_result.mapped_reads)
458 for name, counts in mapped_reads.items():
459 report.append(" %s: %d" % (name, counts))
464 def summary_report(runs):
466 Summarize cluster numbers and mapped read counts for a runfolder
471 report.append('Summary for %s' % (run.name,))
473 eland_keys = sorted(run.gerald.eland_results.keys())
474 for lane_id in eland_keys:
475 report.extend(summarize_lane(run.gerald, lane_id))
478 return os.linesep.join(report)
480 def is_compressed(filename):
481 if os.path.splitext(filename)[1] == ".gz":
483 elif os.path.splitext(filename)[1] == '.bz2':
488 def save_flowcell_reports(data_dir, cycle_dir):
490 Save the flowcell quality reports
492 data_dir = os.path.abspath(data_dir)
493 status_file = os.path.join(data_dir, 'Status.xml')
494 reports_dir = os.path.join(data_dir, 'reports')
495 reports_dest = os.path.join(cycle_dir, 'flowcell-reports.tar.bz2')
496 if os.path.exists(reports_dir):
497 cmd_list = [ 'tar', 'cjvf', reports_dest, 'reports/' ]
498 if os.path.exists(status_file):
499 cmd_list.extend(['Status.xml', 'Status.xsl'])
500 LOGGER.info("Saving reports from " + reports_dir)
503 q = QueueCommands([" ".join(cmd_list)])
508 def save_summary_file(pipeline, cycle_dir):
510 gerald_object = pipeline.gerald
511 gerald_summary = os.path.join(gerald_object.pathname, 'Summary.htm')
512 status_files_summary = os.path.join(pipeline.datadir, 'Status_Files', 'Summary.htm')
513 if os.path.exists(gerald_summary):
514 LOGGER.info('Copying %s to %s' % (gerald_summary, cycle_dir))
515 shutil.copy(gerald_summary, cycle_dir)
516 elif os.path.exists(status_files_summary):
517 LOGGER.info('Copying %s to %s' % (status_files_summary, cycle_dir))
518 shutil.copy(status_files_summary, cycle_dir)
520 LOGGER.info('Summary file %s was not found' % (summary_path,))
522 def save_ivc_plot(bustard_object, cycle_dir):
524 Save the IVC page and its supporting images
526 plot_html = os.path.join(bustard_object.pathname, 'IVC.htm')
527 plot_image_path = os.path.join(bustard_object.pathname, 'Plots')
528 plot_images = os.path.join(plot_image_path, 's_?_[a-z]*.png')
530 plot_target_path = os.path.join(cycle_dir, 'Plots')
532 if os.path.exists(plot_html):
533 LOGGER.debug("Saving %s" % (plot_html,))
534 LOGGER.debug("Saving %s" % (plot_images,))
535 shutil.copy(plot_html, cycle_dir)
536 if not os.path.exists(plot_target_path):
537 os.mkdir(plot_target_path)
538 for plot_file in glob(plot_images):
539 shutil.copy(plot_file, plot_target_path)
541 LOGGER.warning('Missing IVC.html file, not archiving')
544 def compress_score_files(bustard_object, cycle_dir):
546 Compress score files into our result directory
548 # check for g.pathname/Temp a new feature of 1.1rc1
549 scores_path = bustard_object.pathname
550 scores_path_temp = os.path.join(scores_path, 'Temp')
551 if os.path.isdir(scores_path_temp):
552 scores_path = scores_path_temp
554 # hopefully we have a directory that contains s_*_score files
556 for f in os.listdir(scores_path):
557 if re.match('.*_score.txt', f):
558 score_files.append(f)
560 tar_cmd = ['tar', 'c'] + score_files
561 bzip_cmd = [ 'bzip2', '-9', '-c' ]
562 tar_dest_name = os.path.join(cycle_dir, 'scores.tar.bz2')
563 tar_dest = open(tar_dest_name, 'w')
564 LOGGER.info("Compressing score files from %s" % (scores_path,))
565 LOGGER.info("Running tar: " + " ".join(tar_cmd[:10]))
566 LOGGER.info("Running bzip2: " + " ".join(bzip_cmd))
567 LOGGER.info("Writing to %s" % (tar_dest_name,))
570 tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, env=env,
572 bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest)
576 def compress_eland_results(gerald_object, cycle_dir, num_jobs=1):
578 Compress eland result files into the archive directory
580 # copy & bzip eland files
583 for key in gerald_object.eland_results:
584 eland_lane = gerald_object.eland_results[key]
585 for source_name in eland_lane.pathnames:
586 if source_name is None:
588 "Lane ID %s does not have a filename." % (eland_lane.lane_id,))
590 path, name = os.path.split(source_name)
591 dest_name = os.path.join(cycle_dir, name)
592 LOGGER.info("Saving eland file %s to %s" % \
593 (source_name, dest_name))
595 if is_compressed(name):
596 LOGGER.info('Already compressed, Saving to %s' % (dest_name,))
597 shutil.copy(source_name, dest_name)
601 args = ['bzip2', '-9', '-c', source_name, '>', dest_name ]
602 bz_commands.append(" ".join(args))
603 #LOGGER.info('Running: %s' % ( " ".join(args) ))
604 #bzip_dest = open(dest_name, 'w')
605 #bzip = subprocess.Popen(args, stdout=bzip_dest)
606 #LOGGER.info('Saving to %s' % (dest_name, ))
609 if len(bz_commands) > 0:
610 q = QueueCommands(bz_commands, num_jobs)
614 def extract_results(runs, output_base_dir=None, site="individual", num_jobs=1, raw_format=None):
616 Iterate over runfolders in runs extracting the most useful information.
617 * run parameters (in run-*.xml)
621 * srf files (raw sequence & qualities)
623 if output_base_dir is None:
624 output_base_dir = os.getcwd()
627 result_dir = os.path.join(output_base_dir, r.flowcell_id)
628 LOGGER.info("Using %s as result directory" % (result_dir,))
629 if not os.path.exists(result_dir):
633 cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop)
634 LOGGER.info("Filling in %s" % (cycle,))
635 cycle_dir = os.path.join(result_dir, cycle)
636 cycle_dir = os.path.abspath(cycle_dir)
637 if os.path.exists(cycle_dir):
638 LOGGER.error("%s already exists, not overwriting" % (cycle_dir,))
646 # save illumina flowcell status report
647 save_flowcell_reports(os.path.join(r.image_analysis.pathname, '..'),
650 # save stuff from bustard
652 save_ivc_plot(r.bustard, cycle_dir)
654 # build base call saving commands
656 save_raw_data(num_jobs, r, site, raw_format, cycle_dir)
658 # save stuff from GERALD
659 # copy stuff out of the main run
663 save_summary_file(r, cycle_dir)
665 # compress eland result files
666 compress_eland_results(g, cycle_dir, num_jobs)
668 # md5 all the compressed files once we're done
669 md5_commands = srf.make_md5_commands(cycle_dir)
670 srf.run_commands(cycle_dir, md5_commands, num_jobs)
672 def save_raw_data(num_jobs, r, site, raw_format, cycle_dir):
674 for lane in r.gerald.lanes:
675 lane_parameters = r.gerald.lanes.get(lane, None)
676 if lane_parameters is not None:
679 run_name = srf.pathname_to_run_name(r.pathname)
681 if raw_format is None:
682 raw_format = r.bustard.sequence_format
684 LOGGER.info("Raw Format is: %s" % (raw_format, ))
685 if raw_format == 'fastq':
686 rawpath = os.path.join(r.pathname, r.gerald.runfolder_name)
687 LOGGER.info("raw data = %s" % (rawpath,))
688 srf.copy_hiseq_project_fastqs(run_name, rawpath, site, cycle_dir)
689 elif raw_format == 'qseq':
690 seq_cmds = srf.make_qseq_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir)
691 elif raw_format == 'srf':
692 seq_cmds = srf.make_srf_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir, 0)
694 raise ValueError('Unknown --raw-format=%s' % (raw_format))
695 srf.run_commands(r.bustard.pathname, seq_cmds, num_jobs)
697 def rm_list(files, dry_run=True):
699 if os.path.exists(f):
700 LOGGER.info('deleting %s' % (f,))
707 LOGGER.warn("%s doesn't exist." % (f,))
709 def clean_runs(runs, dry_run=True):
711 Clean up run folders to optimize for compression.
714 LOGGER.info('In dry-run mode')
717 LOGGER.info('Cleaninging %s' % (run.pathname,))
719 runlogs = glob(os.path.join(run.pathname, 'RunLog*xml'))
720 rm_list(runlogs, dry_run)
722 pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt'))
723 rm_list(pipeline_logs, dry_run)
725 # rm NetCopy.log? Isn't this robocopy?
726 logs = glob(os.path.join(run.pathname, '*.log'))
727 rm_list(logs, dry_run)
730 calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
731 rm_list(calibration_dir, dry_run)
733 LOGGER.info("Cleaning images")
734 image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
735 rm_list(image_dirs, dry_run)
737 LOGGER.info("Cleaning ReadPrep*")
738 read_prep_dirs = glob(os.path.join(run.pathname, 'ReadPrep*'))
739 rm_list(read_prep_dirs, dry_run)
741 LOGGER.info("Cleaning Thubmnail_images")
742 thumbnail_dirs = glob(os.path.join(run.pathname, 'Thumbnail_Images'))
743 rm_list(thumbnail_dirs, dry_run)
745 # make clean_intermediate
746 logging.info("Cleaning intermediate files")
747 if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
748 clean_process = subprocess.Popen(['make', 'clean_intermediate'],
749 cwd=run.image_analysis.pathname,)