import simplejson as json
from htsworkflow.frontend.auth import apidata
+from htsworkflow.util import api
from htsworkflow.util.url import normalize_url
from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
from htsworkflow.pipelines.genome_mapper import constructMapperDict
"""
Return a dictionary describing a
"""
- url = base_host_url + '/experiments/config/%s/json' % (flowcell)
+ url = api.flowcell_url(base_host_url, flowcell)
try:
apipayload = urllib.urlencode(apidata)
contents = web.read()
headers = web.info()
-
+
if web.code == 403:
msg = "403 - Forbbidden, probably need api key"
raise FlowCellNotFound(msg)
lane_info = flowcell_info['lane_set'][lane_number]
config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
lane_info['library_name'])]
+
config += ['']
return "\n# ".join(config)
Generate a GERALD config file
"""
# so we can add nothing or _pair if we're a paired end run
- run_type_suffix = { False: "", True: "_pair" }
+ eland_analysis_suffix = { False: "_extended", True: "_pair" }
+ sequence_analysis_suffix = { False: "", True: "_pair" }
# it's convienent to have helpful information describing the flowcell
# in the config file... things like which lane is which library.
config = [format_gerald_header(flowcell_info)]
- analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
+ config += ['SEQUENCE_FORMAT --fastq']
+ config += ['ELAND_SET_SIZE 20']
+ config += ['WITH_SEQUENCE TRUE']
+ config += ['12345678:WITH_SEQUENCE TRUE']
+ analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
+ sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
lane_groups = group_lane_parameters(flowcell_info)
for lane_index, lane_numbers in lane_groups.items():
# lane_index is return value of group_lane_parameters
lane_numbers.sort()
lane_prefix = u"".join(lane_numbers)
- if not is_sequencing:
- config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
+ species_path = genome_map.get(species, None)
+ logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
+ if species_path is None:
+ no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
+ logging.warning(no_genome_msg % (lane_numbers, species))
+ is_sequencing = True
+
+ if is_sequencing:
+ config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)]
else:
- config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
+ config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
+ config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
#config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
- species_path = genome_map.get(species, "Unknown")
- config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
# add in option for running script after
- if options.post_run is not None:
- post_run = options.post_run % {'runfolder': options.runfolder}
+ if not (options.post_run is None or options.runfolder is None):
+ runfolder = os.path.abspath(options.runfolder)
+ post_run = options.post_run % {'runfolder': runfolder}
config += ['POST_RUN_COMMAND %s' % (post_run,) ]
config += [''] # force trailing newline
Example Config File:
[%s]
- config_host=http://somewhere.domain:port
- genome_dir=/path to search for genomes
+ config_host: http://somewhere.domain:port
+ genome_dir: /path to search for genomes
+ post_run: runfolder -o <destdir> %%(runfolder)s
""" % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
parser.add_option("-r", "--runfolder",
action="store", type="string",
help="specify runfolder for post_run command ")
-
+
+ parser.add_option('-v', '--verbose', action='store_true', default=False,
+ help='increase logging verbosity')
return parser
def constructConfigParser():
if options.runfolder is not None:
options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
- logging.info('USING OPTIONS:')
- logging.info(u' URL: %s' % (options.url,))
- logging.info(u' OUT: %s' % (options.output_filepath,))
- logging.info(u' FC: %s' % (options.flowcell,))
- #logging.info(': %s' % (options.genome_dir,))
- logging.info(u'post_run: %s' % ( unicode(options.post_run),))
-
return options
retrieves the flowcell eland config file, give the base_host_url
(i.e. http://sub.domain.edu:port)
"""
+ logging.info('USING OPTIONS:')
+ logging.info(u' URL: %s' % (options.url,))
+ logging.info(u' OUT: %s' % (options.output_filepath,))
+ logging.info(u' FC: %s' % (options.flowcell,))
+ #logging.info(': %s' % (options.genome_dir,))
+ logging.info(u'post_run: %s' % ( unicode(options.post_run),))
+
flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
+ logging.debug('genome_dir: %s' % ( options.genome_dir, ))
available_genomes = getAvailableGenomes(options.genome_dir)
genome_map = constructMapperDict(available_genomes)
+ logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
config = format_gerald_config(options, flowcell_info, genome_map)