3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util import api
18 from htsworkflow.util.url import normalize_url
19 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
20 from htsworkflow.pipelines.genome_mapper import constructMapperDict
22 __docformat__ = "restructredtext en"
24 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
25 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
26 GERALD_CONFIG_SECTION = 'gerald_config'
28 #Disable or enable commandline arg parsing; disabled by default.
29 DISABLE_CMDLINE = True
31 LANE_LIST = ['1','2','3','4','5','6','7','8']
33 class FlowCellNotFound(Exception): pass
34 class WebError404(Exception): pass
36 def retrieve_flowcell_info(base_host_url, flowcell):
38 Return a dictionary describing a
40 url = api.flowcell_url(base_host_url, flowcell)
43 apipayload = urllib.urlencode(apidata)
44 web = urllib2.urlopen(url, apipayload)
45 except urllib2.URLError, e:
46 errmsg = 'URLError: %d %s' % (e.code, e.msg)
48 logging.error('opened %s' % (url,))
55 msg = "403 - Forbbidden, probably need api key"
56 raise FlowCellNotFound(msg)
59 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
60 "Did you get right port #?" % (flowcell, base_host_url, url)
61 raise FlowCellNotFound(msg)
63 if len(contents) == 0:
64 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
65 raise FlowCellNotFound(msg)
67 data = json.loads(contents)
70 def is_sequencing(lane_info):
72 Determine if we are just sequencing and not doing any follow-up analysis
74 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
79 def group_lane_parameters(flowcell_info):
81 goup lanes that can share GERALD configuration blocks.
83 (The same species, read length, and eland vs sequencing)
86 for lane_number, lane_info in flowcell_info['lane_set'].items():
87 index = (lane_info['read_length'],
88 lane_info['library_species'],
89 is_sequencing(lane_info))
90 lane_groups.setdefault(index, []).append(lane_number)
93 def format_gerald_header(flowcell_info):
95 Generate comment describing the contents of the flowcell
97 # I'm using '\n# ' to join the lines together, that doesn't include the
98 # first element so i needed to put the # in manually
99 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
101 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
103 config += ['Flowcell Notes:']
104 config.extend(flowcell_info['notes'].split('\r\n'))
106 for lane_number in LANE_LIST:
107 lane_info = flowcell_info['lane_set'][lane_number]
108 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
109 lane_info['library_name'])]
111 config += ['SEQUENCE_FORMAT --fastq']
113 return "\n# ".join(config)
115 def format_gerald_config(options, flowcell_info, genome_map):
117 Generate a GERALD config file
119 # so we can add nothing or _pair if we're a paired end run
120 eland_analysis_suffix = { False: "_extended", True: "_pair" }
121 sequence_analysis_suffix = { False: "", True: "_pair" }
123 # it's convienent to have helpful information describing the flowcell
124 # in the config file... things like which lane is which library.
125 config = [format_gerald_header(flowcell_info)]
127 analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
128 sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
129 lane_groups = group_lane_parameters(flowcell_info)
130 for lane_index, lane_numbers in lane_groups.items():
131 # lane_index is return value of group_lane_parameters
132 read_length, species, is_sequencing = lane_index
134 lane_prefix = u"".join(lane_numbers)
136 species_path = genome_map.get(species, None)
137 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
138 if species_path is None:
139 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
140 logging.warning(no_genome_msg % (lane_numbers, species))
144 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)]
146 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
147 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
148 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
149 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
151 # add in option for running script after
152 if not (options.post_run is None or options.runfolder is None):
153 runfolder = os.path.abspath(options.runfolder)
154 post_run = options.post_run % {'runfolder': runfolder}
155 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
157 config += [''] # force trailing newline
159 return "\n".join(config)
163 Used when command line parsing is disabled; default
167 self.output_filepath = None
169 self.genome_dir = None
171 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
173 #def format_description(self, description):
176 # return description + "\n"
180 def format_epilog(self, epilog):
182 It was removing my preformated epilog, so this should override
183 that behavior! Muhahaha!
186 return "\n" + epilog + "\n"
191 def constructOptionParser():
193 returns a pre-setup optparser
195 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
197 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
202 * %s (User specific; overrides system)
203 * command line overrides all config file options
208 config_host: http://somewhere.domain:port
209 genome_dir: /path to search for genomes
210 post_run: runfolder -o <destdir> %%(runfolder)s
212 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
214 #Special formatter for allowing preformatted description.
215 ##parser.format_epilog(PreformattedDescriptionFormatter())
217 parser.add_option("-u", "--url",
218 action="store", type="string", dest="url")
220 parser.add_option("-o", "--output-file",
221 action="store", type="string", dest="output_filepath",
222 help="config file destination. If runfolder is specified defaults "
223 "to <runfolder>/config-auto.txt" )
225 parser.add_option("-f", "--flowcell",
226 action="store", type="string", dest="flowcell")
228 parser.add_option("-g", "--genome_dir",
229 action="store", type="string", dest="genome_dir")
231 parser.add_option("-r", "--runfolder",
232 action="store", type="string",
233 help="specify runfolder for post_run command ")
235 parser.add_option('-v', '--verbose', action='store_true', default=False,
236 help='increase logging verbosity')
239 def constructConfigParser():
241 returns a pre-setup config parser
243 parser = RawConfigParser()
244 parser.read([CONFIG_SYSTEM, CONFIG_USER])
245 if not parser.has_section(GERALD_CONFIG_SECTION):
246 parser.add_section(GERALD_CONFIG_SECTION)
251 def getCombinedOptions(argv=None):
253 Returns optparse options after it has be updated with ConfigParser
254 config files and merged with parsed commandline options.
256 expects command line arguments to be passed in
258 cl_parser = constructOptionParser()
259 conf_parser = constructConfigParser()
262 options = DummyOptions()
264 options, args = cl_parser.parse_args(argv)
266 if options.url is None:
267 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
268 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
270 options.url = normalize_url(options.url)
272 if options.genome_dir is None:
273 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
274 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
276 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
277 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
279 options.post_run = None
281 if options.output_filepath is None:
282 if options.runfolder is not None:
283 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
288 def saveConfigFile(options):
290 retrieves the flowcell eland config file, give the base_host_url
291 (i.e. http://sub.domain.edu:port)
293 logging.info('USING OPTIONS:')
294 logging.info(u' URL: %s' % (options.url,))
295 logging.info(u' OUT: %s' % (options.output_filepath,))
296 logging.info(u' FC: %s' % (options.flowcell,))
297 #logging.info(': %s' % (options.genome_dir,))
298 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
300 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
302 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
303 available_genomes = getAvailableGenomes(options.genome_dir)
304 genome_map = constructMapperDict(available_genomes)
305 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
307 config = format_gerald_config(options, flowcell_info, genome_map)
309 if options.output_filepath is not None:
310 outstream = open(options.output_filepath, 'w')
311 logging.info('Writing config file to %s' % (options.output_filepath,))
313 outstream = sys.stdout
315 outstream.write(config)