X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=blobdiff_plain;f=htsworkflow%2Fpipelines%2Fretrieve_config.py;h=3a1a56aeaf2188350db8b980d7b098bf0098a0ba;hp=3b6ba3e21f331f5ceade12cd40349831131d6653;hb=86a66805c52c4bafb5d878d2b22604b9b9309cc8;hpb=c7dbc44c9ce10c492903d3ee6538e0d80872f8df diff --git a/htsworkflow/pipelines/retrieve_config.py b/htsworkflow/pipelines/retrieve_config.py index 3b6ba3e..3a1a56a 100644 --- a/htsworkflow/pipelines/retrieve_config.py +++ b/htsworkflow/pipelines/retrieve_config.py @@ -14,6 +14,7 @@ except ImportError, e: import simplejson as json from htsworkflow.frontend.auth import apidata +from htsworkflow.util import api from htsworkflow.util.url import normalize_url from htsworkflow.pipelines.genome_mapper import getAvailableGenomes from htsworkflow.pipelines.genome_mapper import constructMapperDict @@ -36,7 +37,7 @@ def retrieve_flowcell_info(base_host_url, flowcell): """ Return a dictionary describing a """ - url = base_host_url + '/experiments/config/%s/json' % (flowcell) + url = api.flowcell_url(base_host_url, flowcell) try: apipayload = urllib.urlencode(apidata) @@ -49,7 +50,7 @@ def retrieve_flowcell_info(base_host_url, flowcell): contents = web.read() headers = web.info() - + if web.code == 403: msg = "403 - Forbbidden, probably need api key" raise FlowCellNotFound(msg) @@ -106,6 +107,7 @@ def format_gerald_header(flowcell_info): lane_info = flowcell_info['lane_set'][lane_number] config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'], lane_info['library_name'])] + config += [''] return "\n# ".join(config) @@ -114,13 +116,19 @@ def format_gerald_config(options, flowcell_info, genome_map): Generate a GERALD config file """ # so we can add nothing or _pair if we're a paired end run - run_type_suffix = { False: "", True: "_pair" } + eland_analysis_suffix = { False: "_extended", True: "_pair" } + sequence_analysis_suffix = { False: "", True: "_pair" } # it's convienent to have helpful information describing the flowcell # in the config file... things like which lane is which library. config = [format_gerald_header(flowcell_info)] - analysis_suffix = run_type_suffix[flowcell_info['paired_end']] + config += ['SEQUENCE_FORMAT --fastq'] + config += ['ELAND_SET_SIZE 20'] + config += ['WITH_SEQUENCE TRUE'] + config += ['12345678:WITH_SEQUENCE TRUE'] + analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']] + sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']] lane_groups = group_lane_parameters(flowcell_info) for lane_index, lane_numbers in lane_groups.items(): # lane_index is return value of group_lane_parameters @@ -128,18 +136,25 @@ def format_gerald_config(options, flowcell_info, genome_map): lane_numbers.sort() lane_prefix = u"".join(lane_numbers) - if not is_sequencing: - config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)] + species_path = genome_map.get(species, None) + logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path)) + if species_path is None: + no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s" + logging.warning(no_genome_msg % (lane_numbers, species)) + is_sequencing = True + + if is_sequencing: + config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)] else: - config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)] + config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)] + config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ] #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ] config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ] - species_path = genome_map.get(species, "Unknown") - config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ] # add in option for running script after - if options.post_run is not None: - post_run = options.post_run % {'runfolder': options.runfolder} + if not (options.post_run is None or options.runfolder is None): + runfolder = os.path.abspath(options.runfolder) + post_run = options.post_run % {'runfolder': runfolder} config += ['POST_RUN_COMMAND %s' % (post_run,) ] config += [''] # force trailing newline @@ -193,8 +208,9 @@ Config File: Example Config File: [%s] - config_host=http://somewhere.domain:port - genome_dir=/path to search for genomes + config_host: http://somewhere.domain:port + genome_dir: /path to search for genomes + post_run: runfolder -o %%(runfolder)s """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION) @@ -218,7 +234,9 @@ Config File: parser.add_option("-r", "--runfolder", action="store", type="string", help="specify runfolder for post_run command ") - + + parser.add_option('-v', '--verbose', action='store_true', default=False, + help='increase logging verbosity') return parser def constructConfigParser(): @@ -267,13 +285,6 @@ def getCombinedOptions(argv=None): if options.runfolder is not None: options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt') - logging.info('USING OPTIONS:') - logging.info(u' URL: %s' % (options.url,)) - logging.info(u' OUT: %s' % (options.output_filepath,)) - logging.info(u' FC: %s' % (options.flowcell,)) - #logging.info(': %s' % (options.genome_dir,)) - logging.info(u'post_run: %s' % ( unicode(options.post_run),)) - return options @@ -282,10 +293,19 @@ def saveConfigFile(options): retrieves the flowcell eland config file, give the base_host_url (i.e. http://sub.domain.edu:port) """ + logging.info('USING OPTIONS:') + logging.info(u' URL: %s' % (options.url,)) + logging.info(u' OUT: %s' % (options.output_filepath,)) + logging.info(u' FC: %s' % (options.flowcell,)) + #logging.info(': %s' % (options.genome_dir,)) + logging.info(u'post_run: %s' % ( unicode(options.post_run),)) + flowcell_info = retrieve_flowcell_info(options.url, options.flowcell) + logging.debug('genome_dir: %s' % ( options.genome_dir, )) available_genomes = getAvailableGenomes(options.genome_dir) genome_map = constructMapperDict(available_genomes) + logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),)) config = format_gerald_config(options, flowcell_info, genome_map)