3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util.url import normalize_url
18 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
19 from htsworkflow.pipelines.genome_mapper import constructMapperDict
21 __docformat__ = "restructredtext en"
23 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
24 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
25 GERALD_CONFIG_SECTION = 'gerald_config'
27 #Disable or enable commandline arg parsing; disabled by default.
28 DISABLE_CMDLINE = True
30 LANE_LIST = ['1','2','3','4','5','6','7','8']
32 class FlowCellNotFound(Exception): pass
33 class WebError404(Exception): pass
35 def retrieve_flowcell_info(base_host_url, flowcell):
37 Return a dictionary describing a
39 url = base_host_url + '/experiments/config/%s/json' % (flowcell)
42 apipayload = urllib.urlencode(apidata)
43 web = urllib2.urlopen(url, apipayload)
44 except urllib2.URLError, e:
45 errmsg = 'URLError: %d %s' % (e.code, e.msg)
47 logging.error('opened %s' % (url,))
54 msg = "403 - Forbbidden, probably need api key"
55 raise FlowCellNotFound(msg)
58 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
59 "Did you get right port #?" % (flowcell, base_host_url, url)
60 raise FlowCellNotFound(msg)
62 if len(contents) == 0:
63 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
64 raise FlowCellNotFound(msg)
66 data = json.loads(contents)
69 def is_sequencing(lane_info):
71 Determine if we are just sequencing and not doing any follow-up analysis
73 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
78 def group_lane_parameters(flowcell_info):
80 goup lanes that can share GERALD configuration blocks.
82 (The same species, read length, and eland vs sequencing)
85 for lane_number, lane_info in flowcell_info['lane_set'].items():
86 index = (lane_info['read_length'],
87 lane_info['library_species'],
88 is_sequencing(lane_info))
89 lane_groups.setdefault(index, []).append(lane_number)
92 def format_gerald_header(flowcell_info):
94 Generate comment describing the contents of the flowcell
96 # I'm using '\n# ' to join the lines together, that doesn't include the
97 # first element so i needed to put the # in manually
98 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
100 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
102 config += ['Flowcell Notes:']
103 config.extend(flowcell_info['notes'].split('\r\n'))
105 for lane_number in LANE_LIST:
106 lane_info = flowcell_info['lane_set'][lane_number]
107 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
108 lane_info['library_name'])]
110 config += ['SEQUENCE_FORMAT --fastq']
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 eland_analysis_suffix = { False: "_extended", True: "_pair" }
120 sequence_analysis_suffix = { False: "", True: "_pair" }
122 # it's convienent to have helpful information describing the flowcell
123 # in the config file... things like which lane is which library.
124 config = [format_gerald_header(flowcell_info)]
126 analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
127 sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
128 lane_groups = group_lane_parameters(flowcell_info)
129 for lane_index, lane_numbers in lane_groups.items():
130 # lane_index is return value of group_lane_parameters
131 read_length, species, is_sequencing = lane_index
133 lane_prefix = u"".join(lane_numbers)
135 species_path = genome_map.get(species, None)
136 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
137 if species_path is None:
138 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
139 logging.warning(no_genome_msg % (lane_numbers, species))
143 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
145 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
146 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
147 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
148 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
150 # add in option for running script after
151 if options.post_run is not None:
152 post_run = options.post_run % {'runfolder': options.runfolder}
153 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
155 config += [''] # force trailing newline
157 return "\n".join(config)
161 Used when command line parsing is disabled; default
165 self.output_filepath = None
167 self.genome_dir = None
169 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
171 #def format_description(self, description):
174 # return description + "\n"
178 def format_epilog(self, epilog):
180 It was removing my preformated epilog, so this should override
181 that behavior! Muhahaha!
184 return "\n" + epilog + "\n"
189 def constructOptionParser():
191 returns a pre-setup optparser
193 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
195 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
200 * %s (User specific; overrides system)
201 * command line overrides all config file options
206 config_host=http://somewhere.domain:port
207 genome_dir=/path to search for genomes
209 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
211 #Special formatter for allowing preformatted description.
212 ##parser.format_epilog(PreformattedDescriptionFormatter())
214 parser.add_option("-u", "--url",
215 action="store", type="string", dest="url")
217 parser.add_option("-o", "--output-file",
218 action="store", type="string", dest="output_filepath",
219 help="config file destination. If runfolder is specified defaults "
220 "to <runfolder>/config-auto.txt" )
222 parser.add_option("-f", "--flowcell",
223 action="store", type="string", dest="flowcell")
225 parser.add_option("-g", "--genome_dir",
226 action="store", type="string", dest="genome_dir")
228 parser.add_option("-r", "--runfolder",
229 action="store", type="string",
230 help="specify runfolder for post_run command ")
232 parser.add_option('-v', '--verbose', action='store_true', default=False,
233 help='increase logging verbosity')
236 def constructConfigParser():
238 returns a pre-setup config parser
240 parser = RawConfigParser()
241 parser.read([CONFIG_SYSTEM, CONFIG_USER])
242 if not parser.has_section(GERALD_CONFIG_SECTION):
243 parser.add_section(GERALD_CONFIG_SECTION)
248 def getCombinedOptions(argv=None):
250 Returns optparse options after it has be updated with ConfigParser
251 config files and merged with parsed commandline options.
253 expects command line arguments to be passed in
255 cl_parser = constructOptionParser()
256 conf_parser = constructConfigParser()
259 options = DummyOptions()
261 options, args = cl_parser.parse_args(argv)
263 if options.url is None:
264 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
265 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
267 options.url = normalize_url(options.url)
269 if options.genome_dir is None:
270 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
271 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
273 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
274 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
276 options.post_run = None
278 if options.output_filepath is None:
279 if options.runfolder is not None:
280 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
285 def saveConfigFile(options):
287 retrieves the flowcell eland config file, give the base_host_url
288 (i.e. http://sub.domain.edu:port)
290 logging.info('USING OPTIONS:')
291 logging.info(u' URL: %s' % (options.url,))
292 logging.info(u' OUT: %s' % (options.output_filepath,))
293 logging.info(u' FC: %s' % (options.flowcell,))
294 #logging.info(': %s' % (options.genome_dir,))
295 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
297 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
299 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
300 available_genomes = getAvailableGenomes(options.genome_dir)
301 genome_map = constructMapperDict(available_genomes)
302 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
304 config = format_gerald_config(options, flowcell_info, genome_map)
306 if options.output_filepath is not None:
307 outstream = open(options.output_filepath, 'w')
308 logging.info('Writing config file to %s' % (options.output_filepath,))
310 outstream = sys.stdout
312 outstream.write(config)