+def scan_post_image_analysis(runs, runfolder, datadir, image_analysis,
+ pathname, flowcell_id):
+ added = build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id)
+ # If we're a multiplexed run, don't look for older run type.
+ if added > 0:
+ return
+
+ LOGGER.info("Looking for bustard directories in %s" % (pathname,))
+ bustard_dirs = glob(os.path.join(pathname, "Bustard*"))
+ # RTA BaseCalls looks enough like Bustard.
+ bustard_dirs.extend(glob(os.path.join(pathname, "BaseCalls")))
+ for bustard_pathname in bustard_dirs:
+ LOGGER.info("Found bustard directory %s" % (bustard_pathname,))
+ b = bustard.bustard(bustard_pathname)
+ build_gerald_runs(runs, b, image_analysis, bustard_pathname, datadir, pathname,
+ runfolder, flowcell_id)
+
+
+def build_gerald_runs(runs, b, image_analysis, bustard_pathname, datadir, pathname, runfolder,
+ flowcell_id):
+ start = len(runs)
+ gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
+ LOGGER.info("Looking for gerald directories in %s" % (pathname,))
+ for gerald_pathname in glob(gerald_glob):
+ LOGGER.info("Found gerald directory %s" % (gerald_pathname,))
+ try:
+ g = gerald.gerald(gerald_pathname)
+ p = PipelineRun(runfolder, flowcell_id)
+ p.datadir = datadir
+ p.image_analysis = image_analysis
+ p.bustard = b
+ p.gerald = g
+ runs.append(p)
+ except IOError, e:
+ LOGGER.error("Ignoring " + str(e))
+ return len(runs) - start
+
+
+def build_hiseq_runs(image_analysis, runs, datadir, runfolder, flowcell_id):
+ start = len(runs)
+ aligned_glob = os.path.join(runfolder, 'Aligned*')
+ unaligned_glob = os.path.join(runfolder, 'Unaligned*')
+
+ aligned_paths = glob(aligned_glob)
+ unaligned_paths = glob(unaligned_glob)
+
+ matched_paths = hiseq_match_aligned_unaligned(aligned_paths, unaligned_paths)
+ LOGGER.debug("Matched HiSeq analysis: %s", str(matched_paths))
+
+ for aligned, unaligned, suffix in matched_paths:
+ if unaligned is None:
+ LOGGER.warn("Aligned directory %s without matching unalinged, skipping", aligned)
+ continue
+
+ try:
+ p = PipelineRun(runfolder, flowcell_id)
+ p.datadir = datadir
+ p.suffix = suffix
+ p.image_analysis = image_analysis
+ p.bustard = bustard.bustard(unaligned)
+ assert p.bustard
+ if aligned:
+ p.gerald = gerald.gerald(aligned)
+ runs.append(p)
+ except IOError, e:
+ LOGGER.error("Ignoring " + str(e))
+ return len(runs) - start
+
+def hiseq_match_aligned_unaligned(aligned, unaligned):
+ """Match aligned and unaligned folders from seperate lists
+ """
+ unaligned_suffix_re = re.compile('Unaligned(?P<suffix>[\w]*)')
+
+ aligned_by_suffix = build_dir_dict_by_suffix('Aligned', aligned)
+ unaligned_by_suffix = build_dir_dict_by_suffix('Unaligned', unaligned)
+
+ keys = set(aligned_by_suffix.keys()).union(set(unaligned_by_suffix.keys()))
+
+ matches = []
+ for key in keys:
+ a = aligned_by_suffix.get(key)
+ u = unaligned_by_suffix.get(key)
+ matches.append((a, u, key))
+ return matches
+
+def build_dir_dict_by_suffix(prefix, dirnames):
+ """Build a dictionary indexed by suffix of last directory name.
+
+ It assumes a constant prefix
+ """
+ regex = re.compile('%s(?P<suffix>[\w]*)' % (prefix,))
+
+ by_suffix = {}
+ for absname in dirnames:
+ basename = os.path.basename(absname)
+ match = regex.match(basename)
+ if match:
+ by_suffix[match.group('suffix')] = absname
+ return by_suffix
+
+def get_specific_run(gerald_dir):
+ """
+ Given a gerald directory, construct a PipelineRun out of its parents
+
+ Basically this allows specifying a particular run instead of the previous
+ get_runs which scans a runfolder for various combinations of
+ firecrest/ipar/bustard/gerald runs.
+ """
+ from htsworkflow.pipelines import firecrest
+ from htsworkflow.pipelines import ipar
+ from htsworkflow.pipelines import bustard
+ from htsworkflow.pipelines import gerald
+
+ gerald_dir = os.path.expanduser(gerald_dir)
+ bustard_dir = os.path.abspath(os.path.join(gerald_dir, '..'))
+ image_dir = os.path.abspath(os.path.join(gerald_dir, '..', '..'))
+
+ runfolder_dir = os.path.abspath(os.path.join(image_dir, '..', '..'))
+
+ LOGGER.info('--- use-run detected options ---')
+ LOGGER.info('runfolder: %s' % (runfolder_dir,))
+ LOGGER.info('image_dir: %s' % (image_dir,))
+ LOGGER.info('bustard_dir: %s' % (bustard_dir,))
+ LOGGER.info('gerald_dir: %s' % (gerald_dir,))
+
+ # find our processed image dir
+ image_run = None
+ # split into parent, and leaf directory
+ # leaf directory should be an IPAR or firecrest directory
+ data_dir, short_image_dir = os.path.split(image_dir)
+ LOGGER.info('data_dir: %s' % (data_dir,))
+ LOGGER.info('short_iamge_dir: %s' % (short_image_dir,))
+
+ # guess which type of image processing directory we have by looking
+ # in the leaf directory name
+ if re.search('Firecrest', short_image_dir, re.IGNORECASE) is not None:
+ image_run = firecrest.firecrest(image_dir)
+ elif re.search('IPAR', short_image_dir, re.IGNORECASE) is not None:
+ image_run = ipar.ipar(image_dir)
+ elif re.search('Intensities', short_image_dir, re.IGNORECASE) is not None:
+ image_run = ipar.ipar(image_dir)
+
+ # if we din't find a run, report the error and return
+ if image_run is None:
+ msg = '%s does not contain an image processing step' % (image_dir,)
+ LOGGER.error(msg)
+ return None
+
+ # find our base calling
+ base_calling_run = bustard.bustard(bustard_dir)
+ if base_calling_run is None:
+ LOGGER.error('%s does not contain a bustard run' % (bustard_dir,))
+ return None
+
+ # find alignments
+ gerald_run = gerald.gerald(gerald_dir)
+ if gerald_run is None:
+ LOGGER.error('%s does not contain a gerald run' % (gerald_dir,))
+ return None
+
+ p = PipelineRun(runfolder_dir)
+ p.image_analysis = image_run
+ p.bustard = base_calling_run
+ p.gerald = gerald_run
+
+ LOGGER.info('Constructed PipelineRun from %s' % (gerald_dir,))
+ return p