3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util.url import normalize_url
18 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
19 from htsworkflow.pipelines.genome_mapper import constructMapperDict
21 __docformat__ = "restructredtext en"
23 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
24 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
25 GERALD_CONFIG_SECTION = 'gerald_config'
27 #Disable or enable commandline arg parsing; disabled by default.
28 DISABLE_CMDLINE = True
30 LANE_LIST = ['1','2','3','4','5','6','7','8']
32 class FlowCellNotFound(Exception): pass
33 class WebError404(Exception): pass
35 def retrieve_flowcell_info(base_host_url, flowcell):
37 Return a dictionary describing a
39 url = base_host_url + '/experiments/config/%s/json' % (flowcell)
42 apipayload = urllib.urlencode(apidata)
43 web = urllib2.urlopen(url, apipayload)
44 except urllib2.URLError, e:
45 errmsg = 'URLError: %d %s' % (e.code, e.msg)
47 logging.error('opened %s' % (url,))
54 msg = "403 - Forbbidden, probably need api key"
55 raise FlowCellNotFound(msg)
58 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
59 "Did you get right port #?" % (flowcell, base_host_url, url)
60 raise FlowCellNotFound(msg)
62 if len(contents) == 0:
63 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
64 raise FlowCellNotFound(msg)
66 data = json.loads(contents)
69 def is_sequencing(lane_info):
71 Determine if we are just sequencing and not doing any follow-up analysis
73 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
78 def group_lane_parameters(flowcell_info):
80 goup lanes that can share GERALD configuration blocks.
82 (The same species, read length, and eland vs sequencing)
85 for lane_number, lane_info in flowcell_info['lane_set'].items():
86 index = (lane_info['read_length'],
87 lane_info['library_species'],
88 is_sequencing(lane_info))
89 lane_groups.setdefault(index, []).append(lane_number)
92 def format_gerald_header(flowcell_info):
94 Generate comment describing the contents of the flowcell
96 # I'm using '\n# ' to join the lines together, that doesn't include the
97 # first element so i needed to put the # in manually
98 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
100 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
102 config += ['Flowcell Notes:']
103 config.extend(flowcell_info['notes'].split('\r\n'))
105 for lane_number in LANE_LIST:
106 lane_info = flowcell_info['lane_set'][lane_number]
107 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
108 lane_info['library_name'])]
110 config += ['SEQUENCE_FORMAT --fastq']
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 eland_analysis_suffix = { False: "_extended", True: "_pair" }
120 sequence_analysis_suffix = { False: "", True: "_pair" }
122 # it's convienent to have helpful information describing the flowcell
123 # in the config file... things like which lane is which library.
124 config = [format_gerald_header(flowcell_info)]
126 analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
127 sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
128 lane_groups = group_lane_parameters(flowcell_info)
129 for lane_index, lane_numbers in lane_groups.items():
130 # lane_index is return value of group_lane_parameters
131 read_length, species, is_sequencing = lane_index
133 lane_prefix = u"".join(lane_numbers)
135 species_path = genome_map.get(species, None)
136 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
137 if species_path is None:
138 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
139 logging.warning(no_genome_msg % (lane_numbers, species))
143 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
145 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
146 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
147 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
148 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
150 # add in option for running script after
151 if not (options.post_run is None or options.runfolder is None):
152 runfolder = os.path.abspath(options.runfolder)
153 post_run = options.post_run % {'runfolder': runfolder}
154 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
156 config += [''] # force trailing newline
158 return "\n".join(config)
162 Used when command line parsing is disabled; default
166 self.output_filepath = None
168 self.genome_dir = None
170 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
172 #def format_description(self, description):
175 # return description + "\n"
179 def format_epilog(self, epilog):
181 It was removing my preformated epilog, so this should override
182 that behavior! Muhahaha!
185 return "\n" + epilog + "\n"
190 def constructOptionParser():
192 returns a pre-setup optparser
194 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
196 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
201 * %s (User specific; overrides system)
202 * command line overrides all config file options
207 config_host: http://somewhere.domain:port
208 genome_dir: /path to search for genomes
209 post_run: runfolder -o <destdir> %%(runfolder)s
211 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
213 #Special formatter for allowing preformatted description.
214 ##parser.format_epilog(PreformattedDescriptionFormatter())
216 parser.add_option("-u", "--url",
217 action="store", type="string", dest="url")
219 parser.add_option("-o", "--output-file",
220 action="store", type="string", dest="output_filepath",
221 help="config file destination. If runfolder is specified defaults "
222 "to <runfolder>/config-auto.txt" )
224 parser.add_option("-f", "--flowcell",
225 action="store", type="string", dest="flowcell")
227 parser.add_option("-g", "--genome_dir",
228 action="store", type="string", dest="genome_dir")
230 parser.add_option("-r", "--runfolder",
231 action="store", type="string",
232 help="specify runfolder for post_run command ")
234 parser.add_option('-v', '--verbose', action='store_true', default=False,
235 help='increase logging verbosity')
238 def constructConfigParser():
240 returns a pre-setup config parser
242 parser = RawConfigParser()
243 parser.read([CONFIG_SYSTEM, CONFIG_USER])
244 if not parser.has_section(GERALD_CONFIG_SECTION):
245 parser.add_section(GERALD_CONFIG_SECTION)
250 def getCombinedOptions(argv=None):
252 Returns optparse options after it has be updated with ConfigParser
253 config files and merged with parsed commandline options.
255 expects command line arguments to be passed in
257 cl_parser = constructOptionParser()
258 conf_parser = constructConfigParser()
261 options = DummyOptions()
263 options, args = cl_parser.parse_args(argv)
265 if options.url is None:
266 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
267 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
269 options.url = normalize_url(options.url)
271 if options.genome_dir is None:
272 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
273 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
275 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
276 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
278 options.post_run = None
280 if options.output_filepath is None:
281 if options.runfolder is not None:
282 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
287 def saveConfigFile(options):
289 retrieves the flowcell eland config file, give the base_host_url
290 (i.e. http://sub.domain.edu:port)
292 logging.info('USING OPTIONS:')
293 logging.info(u' URL: %s' % (options.url,))
294 logging.info(u' OUT: %s' % (options.output_filepath,))
295 logging.info(u' FC: %s' % (options.flowcell,))
296 #logging.info(': %s' % (options.genome_dir,))
297 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
299 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
301 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
302 available_genomes = getAvailableGenomes(options.genome_dir)
303 genome_map = constructMapperDict(available_genomes)
304 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
306 config = format_gerald_config(options, flowcell_info, genome_map)
308 if options.output_filepath is not None:
309 outstream = open(options.output_filepath, 'w')
310 logging.info('Writing config file to %s' % (options.output_filepath,))
312 outstream = sys.stdout
314 outstream.write(config)