from datetime import datetime, date
import logging
import os
+import stat
import time
from htsworkflow.pipelines.summary import Summary
self._lanes = {}
tree = self._gerald.tree
analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
+ if analysis is None:
+ return
# according to the pipeline specs I think their fields
# are sampleName_laneID, with sampleName defaulting to s
# since laneIDs are constant lets just try using
if self._lane is None:
self._initalize_lanes()
return self._lanes[key]
+ def get(self, key, default):
+ if self._lane is None:
+ self._initalize_lanes()
+ return self._lanes.get(key, None)
def keys(self):
if self._lane is None:
self._initalize_lanes()
if self.tree is None:
return datetime.today()
timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
- epochstamp = time.mktime(time.strptime(timestamp, '%c'))
- return datetime.fromtimestamp(epochstamp)
+ if timestamp is not None:
+ epochstamp = time.mktime(time.strptime(timestamp, '%c'))
+ return datetime.fromtimestamp(epochstamp)
+ if self.pathname is not None:
+ epochstamp = os.stat(self.pathname)[stat.ST_MTIME]
+ return datetime.fromtimestamp(epochstamp)
+ return datetime.today()
date = property(_get_date)
def _get_time(self):
root = os.path.join(root,'')
experiment_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
- if experiment_dir is None:
- return None
- experiment_dir = experiment_dir.replace(root, '')
- if len(experiment_dir) == 0:
+ if experiment_dir is not None:
+ experiment_dir = experiment_dir.replace(root, '')
+ experiment_dir = self.tree.findtext('Defaults/EXPT_DIR')
+ if experiment_dir is not None:
+ _, experiment_dir = os.path.split(experiment_dir)
+ if experiment_dir is None or len(experiment_dir) == 0:
return None
dirnames = experiment_dir.split(os.path.sep)
g.tree = ElementTree.parse(config_pathname).getroot()
# parse Summary.htm file
- summary_pathname = os.path.join(g.pathname, 'Summary.xml')
- if os.path.exists(summary_pathname):
+ summary_xml = os.path.join(g.pathname, 'Summary.xml')
+ summary_htm = os.path.join(g.pathname, 'Summary.htm')
+ status_files_summary = os.path.join(g.pathname, '..', 'Data', 'Status_Files', 'Summary.htm')
+ if os.path.exists(summary_xml):
LOGGER.info("Parsing Summary.xml")
- else:
+ summary_pathname = summary_xml
+ elif os.path.exists(summary_htm):
summary_pathname = os.path.join(g.pathname, 'Summary.htm')
LOGGER.info("Parsing Summary.htm")
+ else:
+ summary_pathname = status_files_summary
+ LOGGER.info("Parsing %s" % (status_files_summary,))
g.summary = Summary(summary_pathname)
# parse eland files
g.eland_results = eland(g.pathname, g)
self.pathname = None
self._name = None
self._flowcell_id = flowcell_id
+ self.datadir = None
self.image_analysis = None
self.bustard = None
self.gerald = None
from htsworkflow.pipelines import bustard
from htsworkflow.pipelines import gerald
- def scan_post_image_analysis(runs, runfolder, image_analysis, pathname):
+ def scan_post_image_analysis(runs, runfolder, datadir, image_analysis, pathname):
LOGGER.info("Looking for bustard directories in %s" % (pathname,))
bustard_dirs = glob(os.path.join(pathname, "Bustard*"))
# RTA BaseCalls looks enough like Bustard.
try:
g = gerald.gerald(gerald_pathname)
p = PipelineRun(runfolder, flowcell_id)
+ p.datadir = datadir
+ p.image_analysis = image_analysis
+ p.bustard = b
+ p.gerald = g
+ runs.append(p)
+ except IOError, e:
+ LOGGER.error("Ignoring " + str(e))
+
+ aligned_glob = os.path.join(runfolder, 'Aligned*')
+ for aligned in glob(aligned_glob):
+ LOGGER.info("Found aligned directory %s" % (aligned,))
+ try:
+ g = gerald.gerald(aligned)
+ p = PipelineRun(runfolder, flowcell_id)
+ p.datadir = datadir
p.image_analysis = image_analysis
p.bustard = b
p.gerald = g
)
else:
scan_post_image_analysis(
- runs, runfolder, image_analysis, ipar_pathname
+ runs, runfolder, datadir, image_analysis, ipar_pathname
)
return runs
os.chdir(cwd)
-def save_summary_file(gerald_object, cycle_dir):
+def save_summary_file(pipeline, cycle_dir):
# Copy Summary.htm
- summary_path = os.path.join(gerald_object.pathname, 'Summary.htm')
- if os.path.exists(summary_path):
- LOGGER.info('Copying %s to %s' % (summary_path, cycle_dir))
- shutil.copy(summary_path, cycle_dir)
+ gerald_object = pipeline.gerald
+ gerald_summary = os.path.join(gerald_object.pathname, 'Summary.htm')
+ status_files_summary = os.path.join(pipeline.datadir, 'Status_Files', 'Summary.htm')
+ if os.path.exists(gerald_summary):
+ LOGGER.info('Copying %s to %s' % (gerald_summary, cycle_dir))
+ shutil.copy(gerald_summary, cycle_dir)
+ elif os.path.exists(status_files_summary):
+ LOGGER.info('Copying %s to %s' % (status_files_summary, cycle_dir))
+ shutil.copy(status_files_summary, cycle_dir)
else:
LOGGER.info('Summary file %s was not found' % (summary_path,))
if site is not None:
lanes = []
for lane in range(1, 9):
- if r.gerald.lanes[lane].analysis != 'none':
+ lane_parameters = r.gerald.lanes.get(lane, None)
+ if lane_parameters is not None and lane_parameters.analysis != 'none':
lanes.append(lane)
run_name = srf.pathname_to_run_name(r.pathname)
seq_cmds = []
+ LOGGER.info("Raw Format is: %s" % (raw_format, ))
if raw_format == 'fastq':
- srf.copy_hiseq_project_fastqs(run_name, r.bustard.pathname, site, cycle_dir)
+ rawpath = os.path.join(r.pathname, r.gerald.runfolder_name)
+ LOGGER.info("raw data = %s" % (rawpath,))
+ srf.copy_hiseq_project_fastqs(run_name, rawpath, site, cycle_dir)
elif raw_format == 'qseq':
seq_cmds = srf.make_qseq_commands(run_name, r.bustard.pathname, lanes, site, cycle_dir)
elif raw_format == 'srf':
g = r.gerald
# save summary file
- save_summary_file(g, cycle_dir)
+ save_summary_file(r, cycle_dir)
# compress eland result files
compress_eland_results(g, cycle_dir, num_jobs)