+def retrieve_flowcell_info(base_host_url, flowcell):
+ """
+ Return a dictionary describing a
+ """
+ url = base_host_url + '/experiments/config/%s/json' % (flowcell)
+
+ try:
+ apipayload = urllib.urlencode(apidata)
+ web = urllib2.urlopen(url, apipayload)
+ except urllib2.URLError, e:
+ errmsg = 'URLError: %d %s' % (e.code, e.msg)
+ logging.error(errmsg)
+ logging.error('opened %s' % (url,))
+ raise IOError(errmsg)
+
+ contents = web.read()
+ headers = web.info()
+
+ if web.getcode() == 403:
+ msg = "403 - Forbbidden, probably need api key"
+ raise FlowCellNotFound(msg)
+
+ if web.getcode() == 404:
+ msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
+ "Did you get right port #?" % (flowcell, base_host_url, url)
+ raise FlowCellNotFound(msg)
+
+ if len(contents) == 0:
+ msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
+ raise FlowCellNotFound(msg)
+
+ data = json.loads(contents)
+ return data
+
+def is_sequencing(lane_info):
+ """
+ Determine if we are just sequencing and not doing any follow-up analysis
+ """
+ if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
+ return True
+ else:
+ return False
+
+def group_lane_parameters(flowcell_info):
+ """
+ goup lanes that can share GERALD configuration blocks.
+
+ (The same species, read length, and eland vs sequencing)
+ """
+ lane_groups = {}
+ for lane_number, lane_info in flowcell_info['lane_set'].items():
+ index = (lane_info['read_length'],
+ lane_info['library_species'],
+ is_sequencing(lane_info))
+ lane_groups.setdefault(index, []).append(lane_number)
+ return lane_groups
+
+def format_gerald_header(flowcell_info):
+ """
+ Generate comment describing the contents of the flowcell
+ """
+ # I'm using '\n# ' to join the lines together, that doesn't include the
+ # first element so i needed to put the # in manually
+ config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
+ config += ['']
+ config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
+ config += ['']
+ config += ['Flowcell Notes:']
+ config.extend(flowcell_info['notes'].split('\r\n'))
+ config += ['']
+ for lane_number in LANE_LIST:
+ lane_info = flowcell_info['lane_set'][lane_number]
+ config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
+ lane_info['library_name'])]
+ config += ['']
+ return "\n# ".join(config)
+
+def format_gerald_config(options, flowcell_info, genome_map):
+ """
+ Generate a GERALD config file
+ """
+ # so we can add nothing or _pair if we're a paired end run
+ run_type_suffix = { False: "", True: "_pair" }
+
+ # it's convienent to have helpful information describing the flowcell
+ # in the config file... things like which lane is which library.
+ config = [format_gerald_header(flowcell_info)]
+
+ analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
+ lane_groups = group_lane_parameters(flowcell_info)
+ for lane_index, lane_numbers in lane_groups.items():
+ # lane_index is return value of group_lane_parameters
+ read_length, species, is_sequencing = lane_index
+ lane_numbers.sort()
+ lane_prefix = u"".join(lane_numbers)
+
+ if not is_sequencing:
+ config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
+ else:
+ config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
+ #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
+ config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
+ species_path = genome_map.get(species, "Unknown")
+ config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
+
+ # add in option for running script after
+ if options.post_run is not None:
+ post_run = options.post_run % {'runfolder': options.runfolder}
+ config += ['POST_RUN_COMMAND %s' % (post_run,) ]
+
+ config += [''] # force trailing newline
+
+ return "\n".join(config)
+