if len(status_nodes) == 0:
# has no status node, add one
- logging.info("Adding status node to {0}".format(subUrn))
+ LOGGER.info("Adding status node to {0}".format(subUrn))
status_node = create_status_node(subUrn, recent_update)
add_stmt(model, subUrn, HasStatusN, status_node)
- add_stmt(model, status_node, rdfsNS['type'], StatusN)
+ add_stmt(model, status_node, rdfNS['type'], StatusN)
add_stmt(model, status_node, StatusN, status)
add_stmt(model, status_node, LastModifyN, recent_update)
update_ddf(model, subUrn, status_node, cookie=cookie)
status_is_daf = RDF.Statement(status_node, TYPE_N, dafTermOntology[''])
if not model.contains_statement(status_is_daf):
- logging.info('Adding daf to {0}, {1}'.format(submission_url,
+ LOGGER.info('Adding daf to {0}, {1}'.format(submission_url,
status_node))
daf_text = get_url_as_text(download_daf_uri, 'GET', cookie)
+ daf_hash = hashlib.md5(daf_text).hexdigest()
+ daf_hash_stmt = RDF.Statement(status_node,
+ dafTermOntology['md5sum'],
+ daf_hash)
+ model.add_statement(daf_hash_stmt)
daf.fromstring_into_model(model, status_node, daf_text)
flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
- logging.debug('genome_dir: %s' % ( options.genome_dir, ))
+ LOGGER.debug('genome_dir: %s' % ( options.genome_dir, ))
available_genomes = getAvailableGenomes(options.genome_dir)
genome_map = constructMapperDict(available_genomes)
- logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
+ LOGGER.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
- #config = format_gerald_config(options, flowcell_info, genome_map)
- #
- #if options.output_filepath is not None:
- # outstream = open(options.output_filepath, 'w')
- # LOGGER.info('Writing config file to %s' % (options.output_filepath,))
- #else:
- # outstream = sys.stdout
- #
- #outstream.write(config)
+ config = format_gerald_config(options, flowcell_info, genome_map)
+
+ if options.output_filepath is not None:
+ outstream = open(options.output_filepath, 'w')
+ logging.info('Writing config file to %s' % (options.output_filepath,))
+ else:
+ outstream = sys.stdout
+
+ outstream.write(config)
if options.sample_sheet is None:
pass
import time
try:
- from xml.etree import ElementTree
+ from xml.etree import ElementTree
except ImportError, e:
- from elementtree import ElementTree
+ from elementtree import ElementTree
+LOGGER = logging.getLogger(__name__)
+
EUROPEAN_STRPTIME = "%d-%m-%Y"
EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})"
VERSION_RE = "([0-9\.]+)"
def _get_flowcell_id(self):
# extract flowcell ID
if self._flowcell_id is None:
- config_dir = os.path.join(self.pathname, 'Config')
- flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
- if os.path.exists(flowcell_id_path):
- flowcell_id_tree = ElementTree.parse(flowcell_id_path)
- self._flowcell_id = flowcell_id_tree.findtext('Text')
- else:
- path_fields = self.pathname.split('_')
- if len(path_fields) > 0:
- # guessing last element of filename
- flowcell_id = path_fields[-1]
+ config_dir = os.path.join(self.pathname, 'Config')
+ flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml')
+ if os.path.exists(flowcell_id_path):
+ flowcell_id_tree = ElementTree.parse(flowcell_id_path)
+ self._flowcell_id = flowcell_id_tree.findtext('Text')
else:
- flowcell_id = 'unknown'
+ path_fields = self.pathname.split('_')
+ if len(path_fields) > 0:
+ # guessing last element of filename
- flowcell_id = path_fields[-1]
++ self._flowcell_id = path_fields[-1]
+ else:
- flowcell_id = 'unknown'
++ self._flowcell_id = 'unknown'
++
++ LOGGER.warning(
++ "Flowcell id was not found, guessing %s" % (
++ self._flowcell_id))
- LOGGER.warning(
- "Flowcell id was not found, guessing %s" % (
- flowcell_id))
- self._flowcell_id = flowcell_id
- logging.warning(
- "Flowcell id was not found, guessing %s" % (
- flowcell_id))
- self._flowcell_id = flowcell_id
return self._flowcell_id
flowcell_id = property(_get_flowcell_id)
# RTA BaseCalls looks enough like Bustard.
bustard_dirs.extend(glob(os.path.join(pathname, "BaseCalls")))
for bustard_pathname in bustard_dirs:
- logging.info("Found bustard directory %s" % (bustard_pathname,))
+ LOGGER.info("Found bustard directory %s" % (bustard_pathname,))
b = bustard.bustard(bustard_pathname)
gerald_glob = os.path.join(bustard_pathname, 'GERALD*')
- logging.info("Looking for gerald directories in %s" % (pathname,))
+ LOGGER.info("Looking for gerald directories in %s" % (pathname,))
for gerald_pathname in glob(gerald_glob):
- logging.info("Found gerald directory %s" % (gerald_pathname,))
+ LOGGER.info("Found gerald directory %s" % (gerald_pathname,))
try:
g = gerald.gerald(gerald_pathname)
- p = PipelineRun(runfolder)
+ p = PipelineRun(runfolder, flowcell_id)
p.image_analysis = image_analysis
p.bustard = b
p.gerald = g
calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*'))
rm_list(calibration_dir, dry_run)
# rm Images/L*
- logging.info("Cleaning images")
+ LOGGER.info("Cleaning images")
image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*'))
rm_list(image_dirs, dry_run)
- # cd Data/C1-*_Firecrest*
- LOGGER.info("Cleaning intermediate files")
+ # rm ReadPrep
- logging.info("Cleaning ReadPrep*")
++ LOGGER.info("Cleaning ReadPrep*")
+ read_prep_dirs = glob(os.path.join(run.pathname, 'ReadPrep*'))
+ rm_list(read_prep_dirs, dry_run)
+ # rm ReadPrep
- logging.info("Cleaning Thubmnail_images")
++ LOGGER.info("Cleaning Thubmnail_images")
+ thumbnail_dirs = glob(os.path.join(run.pathname, 'Thumbnail_Images'))
+ rm_list(thumbnail_dirs, dry_run)
+
# make clean_intermediate
+ logging.info("Cleaning intermediate files")
if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')):
clean_process = subprocess.Popen(['make', 'clean_intermediate'],
cwd=run.image_analysis.pathname,)
from htsworkflow.pipelines import runfolder
from htsworkflow.pipelines.runfolder import ElementTree
+LOGGER = logging.getLogger(__name__)
+ def main(cmdlist=None):
+ parser = make_parser()
+ opts, args = parser.parse_args(cmdlist)
+
+ logging.basicConfig()
+ root_log = logging.getLogger()
+ if opts.debug:
+ root_log.setLevel(logging.DEBUG)
+ elif opts.verbose:
+ root_log.setLevel(logging.INFO)
+
+ logging.info('Starting htsworkflow illumina runfolder processing tool.')
+ runs = []
+ runs.extend(load_run_xml_file(parser, args, opts))
+ runs.extend(load_specific_runfolder_analysis(parser, args, opts))
+ runs.extend(load_runfolders(parser, args, opts))
+
+ if len(runs) == 0:
+ parser.error("Please specify some run folders to process")
+
+ command_run = False
+ if opts.summary:
+ print runfolder.summary_report(runs)
+ command_run = True
+ if opts.archive:
+ runfolder.extract_run_parameters(runs)
+ command_run = True
+ if opts.extract_results:
+ command_run = True
+ extract_results(parser, args, opts, runs)
+ if opts.clean:
+ runfolder.clean_runs(runs, opts.dry_run)
+ command_run = True
+
+ if command_run == False:
+ parser.perror("No commands provided")
+
+ return 0
+
+
+ def load_run_xml_file(parser, args, opts):
+ runs = []
+ if opts.run_xml:
+ # handle ~ shortcut
+ opt.run_xml = os.path.expanduser(opt.run_xml)
+ tree = ElementTree.parse(opt.run_xml).getroot()
+ runs.append(runfolder.PipelineRun(xml=tree))
+ return runs
+
+
+ def load_specific_runfolder_analysis(parser, args, opts):
+ # look for manually specified run
+ runs = []
+ if opts.use_run is not None:
+ specific_run = runfolder.get_specific_run(opts.use_run)
+ if specific_run is not None:
+ runs.append(specific_run)
+ else:
+ logging.warn("Couldn't find a run in %s" % (opts.use_run,))
+ return runs
+
+
+ def load_runfolders(parser, args, opts):
+ if opts.flowcell_id is not None:
+ if len(args) != 1:
+ parser.error(
+ 'Can only force flowcell ID when operating on one run')
+ # scan runfolders for runs
+ runs = []
+ for run_pattern in args:
+ # expand args on our own if needed
+ for run_dir in glob(run_pattern):
+ runs.extend(runfolder.get_runs(run_dir, opts.flowcell_id))
+ return runs
+
+
+ def extract_results(parser, args, opts, runs):
+ if opts.dry_run:
+ parser.error("Dry-run is not supported for extract-results")
+ runfolder.extract_results(runs,
+ opts.output_dir,
+ opts.site,
+ opts.max_jobs,
+ opts.raw_format)
+
+
def make_parser():
usage = 'usage: %prog [options] runfolder_root_dir'
parser = optparse.OptionParser(usage)