3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util import api
18 from htsworkflow.util.url import normalize_url
19 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
20 from htsworkflow.pipelines.genome_mapper import constructMapperDict
22 __docformat__ = "restructredtext en"
24 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
25 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
26 GERALD_CONFIG_SECTION = 'gerald_config'
28 #Disable or enable commandline arg parsing; disabled by default.
29 DISABLE_CMDLINE = True
31 LANE_LIST = ['1','2','3','4','5','6','7','8']
33 class FlowCellNotFound(Exception): pass
34 class WebError404(Exception): pass
36 def retrieve_flowcell_info(base_host_url, flowcell):
38 Return a dictionary describing a
40 url = api.flowcell_url(base_host_url, flowcell)
43 apipayload = urllib.urlencode(apidata)
44 web = urllib2.urlopen(url, apipayload)
45 except urllib2.URLError, e:
46 errmsg = 'URLError: %d %s' % (e.code, e.msg)
48 logging.error('opened %s' % (url,))
55 msg = "403 - Forbbidden, probably need api key"
56 raise FlowCellNotFound(msg)
59 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
60 "Did you get right port #?" % (flowcell, base_host_url, url)
61 raise FlowCellNotFound(msg)
63 if len(contents) == 0:
64 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
65 raise FlowCellNotFound(msg)
67 data = json.loads(contents)
70 def is_sequencing(lane_info):
72 Determine if we are just sequencing and not doing any follow-up analysis
74 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
79 def group_lane_parameters(flowcell_info):
81 goup lanes that can share GERALD configuration blocks.
83 (The same species, read length, and eland vs sequencing)
86 for lane_number, lane_info in flowcell_info['lane_set'].items():
87 index = (lane_info['read_length'],
88 lane_info['library_species'],
89 is_sequencing(lane_info))
90 lane_groups.setdefault(index, []).append(lane_number)
93 def format_gerald_header(flowcell_info):
95 Generate comment describing the contents of the flowcell
97 # I'm using '\n# ' to join the lines together, that doesn't include the
98 # first element so i needed to put the # in manually
99 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
101 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
103 config += ['Flowcell Notes:']
104 config.extend(flowcell_info['notes'].split('\r\n'))
106 for lane_number in LANE_LIST:
107 lane_info = flowcell_info['lane_set'][lane_number]
108 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
109 lane_info['library_name'])]
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 eland_analysis_suffix = { False: "_extended", True: "_pair" }
120 sequence_analysis_suffix = { False: "", True: "_pair" }
122 # it's convienent to have helpful information describing the flowcell
123 # in the config file... things like which lane is which library.
124 config = [format_gerald_header(flowcell_info)]
126 config += ['SEQUENCE_FORMAT --fastq']
127 config += ['ELAND_SET_SIZE 20']
128 config += ['12345678:WITH_SEQUENCE TRUE']
129 analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
130 sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
131 lane_groups = group_lane_parameters(flowcell_info)
132 for lane_index, lane_numbers in lane_groups.items():
133 # lane_index is return value of group_lane_parameters
134 read_length, species, is_sequencing = lane_index
136 lane_prefix = u"".join(lane_numbers)
138 species_path = genome_map.get(species, None)
139 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
140 if species_path is None:
141 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
142 logging.warning(no_genome_msg % (lane_numbers, species))
146 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)]
148 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
149 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
150 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
151 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
153 # add in option for running script after
154 if not (options.post_run is None or options.runfolder is None):
155 runfolder = os.path.abspath(options.runfolder)
156 post_run = options.post_run % {'runfolder': runfolder}
157 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
159 config += [''] # force trailing newline
161 return "\n".join(config)
165 Used when command line parsing is disabled; default
169 self.output_filepath = None
171 self.genome_dir = None
173 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
175 #def format_description(self, description):
178 # return description + "\n"
182 def format_epilog(self, epilog):
184 It was removing my preformated epilog, so this should override
185 that behavior! Muhahaha!
188 return "\n" + epilog + "\n"
193 def constructOptionParser():
195 returns a pre-setup optparser
197 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
199 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
204 * %s (User specific; overrides system)
205 * command line overrides all config file options
210 config_host: http://somewhere.domain:port
211 genome_dir: /path to search for genomes
212 post_run: runfolder -o <destdir> %%(runfolder)s
214 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
216 #Special formatter for allowing preformatted description.
217 ##parser.format_epilog(PreformattedDescriptionFormatter())
219 parser.add_option("-u", "--url",
220 action="store", type="string", dest="url")
222 parser.add_option("-o", "--output-file",
223 action="store", type="string", dest="output_filepath",
224 help="config file destination. If runfolder is specified defaults "
225 "to <runfolder>/config-auto.txt" )
227 parser.add_option("-f", "--flowcell",
228 action="store", type="string", dest="flowcell")
230 parser.add_option("-g", "--genome_dir",
231 action="store", type="string", dest="genome_dir")
233 parser.add_option("-r", "--runfolder",
234 action="store", type="string",
235 help="specify runfolder for post_run command ")
237 parser.add_option('-v', '--verbose', action='store_true', default=False,
238 help='increase logging verbosity')
241 def constructConfigParser():
243 returns a pre-setup config parser
245 parser = RawConfigParser()
246 parser.read([CONFIG_SYSTEM, CONFIG_USER])
247 if not parser.has_section(GERALD_CONFIG_SECTION):
248 parser.add_section(GERALD_CONFIG_SECTION)
253 def getCombinedOptions(argv=None):
255 Returns optparse options after it has be updated with ConfigParser
256 config files and merged with parsed commandline options.
258 expects command line arguments to be passed in
260 cl_parser = constructOptionParser()
261 conf_parser = constructConfigParser()
264 options = DummyOptions()
266 options, args = cl_parser.parse_args(argv)
268 if options.url is None:
269 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
270 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
272 options.url = normalize_url(options.url)
274 if options.genome_dir is None:
275 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
276 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
278 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
279 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
281 options.post_run = None
283 if options.output_filepath is None:
284 if options.runfolder is not None:
285 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
290 def saveConfigFile(options):
292 retrieves the flowcell eland config file, give the base_host_url
293 (i.e. http://sub.domain.edu:port)
295 logging.info('USING OPTIONS:')
296 logging.info(u' URL: %s' % (options.url,))
297 logging.info(u' OUT: %s' % (options.output_filepath,))
298 logging.info(u' FC: %s' % (options.flowcell,))
299 #logging.info(': %s' % (options.genome_dir,))
300 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
302 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
304 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
305 available_genomes = getAvailableGenomes(options.genome_dir)
306 genome_map = constructMapperDict(available_genomes)
307 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
309 config = format_gerald_config(options, flowcell_info, genome_map)
311 if options.output_filepath is not None:
312 outstream = open(options.output_filepath, 'w')
313 logging.info('Writing config file to %s' % (options.output_filepath,))
315 outstream = sys.stdout
317 outstream.write(config)