3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util.url import normalize_url
18 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
19 from htsworkflow.pipelines.genome_mapper import constructMapperDict
21 __docformat__ = "restructredtext en"
23 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
24 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
25 GERALD_CONFIG_SECTION = 'gerald_config'
27 #Disable or enable commandline arg parsing; disabled by default.
28 DISABLE_CMDLINE = True
30 LANE_LIST = ['1','2','3','4','5','6','7','8']
32 class FlowCellNotFound(Exception): pass
33 class WebError404(Exception): pass
35 def retrieve_flowcell_info(base_host_url, flowcell):
37 Return a dictionary describing a
39 url = base_host_url + '/experiments/config/%s/json' % (flowcell)
42 apipayload = urllib.urlencode(apidata)
43 web = urllib2.urlopen(url, apipayload)
44 except urllib2.URLError, e:
45 errmsg = 'URLError: %d %s' % (e.code, e.msg)
47 logging.error('opened %s' % (url,))
53 if web.getcode() == 403:
54 msg = "403 - Forbbidden, probably need api key"
55 raise FlowCellNotFound(msg)
57 if web.getcode() == 404:
58 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
59 "Did you get right port #?" % (flowcell, base_host_url, url)
60 raise FlowCellNotFound(msg)
62 if len(contents) == 0:
63 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
64 raise FlowCellNotFound(msg)
66 data = json.loads(contents)
69 def is_sequencing(lane_info):
71 Determine if we are just sequencing and not doing any follow-up analysis
73 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
78 def group_lane_parameters(flowcell_info):
80 goup lanes that can share GERALD configuration blocks.
82 (The same species, read length, and eland vs sequencing)
85 for lane_number, lane_info in flowcell_info['lane_set'].items():
86 index = (lane_info['read_length'],
87 lane_info['library_species'],
88 is_sequencing(lane_info))
89 lane_groups.setdefault(index, []).append(lane_number)
92 def format_gerald_header(flowcell_info):
94 Generate comment describing the contents of the flowcell
96 # I'm using '\n# ' to join the lines together, that doesn't include the
97 # first element so i needed to put the # in manually
98 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
100 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
102 config += ['Flowcell Notes:']
103 config.extend(flowcell_info['notes'].split('\r\n'))
105 for lane_number in LANE_LIST:
106 lane_info = flowcell_info['lane_set'][lane_number]
107 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
108 lane_info['library_name'])]
110 return "\n# ".join(config)
112 def format_gerald_config(options, flowcell_info, genome_map):
114 Generate a GERALD config file
116 # so we can add nothing or _pair if we're a paired end run
117 run_type_suffix = { False: "", True: "_pair" }
119 # it's convienent to have helpful information describing the flowcell
120 # in the config file... things like which lane is which library.
121 config = [format_gerald_header(flowcell_info)]
123 analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
124 lane_groups = group_lane_parameters(flowcell_info)
125 for lane_index, lane_numbers in lane_groups.items():
126 # lane_index is return value of group_lane_parameters
127 read_length, species, is_sequencing = lane_index
129 lane_prefix = u"".join(lane_numbers)
131 if not is_sequencing:
132 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
134 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
135 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
136 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
137 species_path = genome_map.get(species, "Unknown")
138 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
140 # add in option for running script after
141 if options.post_run is not None:
142 post_run = options.post_run % {'runfolder': options.runfolder}
143 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
145 config += [''] # force trailing newline
147 return "\n".join(config)
151 Used when command line parsing is disabled; default
155 self.output_filepath = None
157 self.genome_dir = None
159 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
161 #def format_description(self, description):
164 # return description + "\n"
168 def format_epilog(self, epilog):
170 It was removing my preformated epilog, so this should override
171 that behavior! Muhahaha!
174 return "\n" + epilog + "\n"
179 def constructOptionParser():
181 returns a pre-setup optparser
183 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
185 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
190 * %s (User specific; overrides system)
191 * command line overrides all config file options
196 config_host=http://somewhere.domain:port
197 genome_dir=/path to search for genomes
199 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
201 #Special formatter for allowing preformatted description.
202 ##parser.format_epilog(PreformattedDescriptionFormatter())
204 parser.add_option("-u", "--url",
205 action="store", type="string", dest="url")
207 parser.add_option("-o", "--output-file",
208 action="store", type="string", dest="output_filepath",
209 help="config file destination. If runfolder is specified defaults "
210 "to <runfolder>/config-auto.txt" )
212 parser.add_option("-f", "--flowcell",
213 action="store", type="string", dest="flowcell")
215 parser.add_option("-g", "--genome_dir",
216 action="store", type="string", dest="genome_dir")
218 parser.add_option("-r", "--runfolder",
219 action="store", type="string",
220 help="specify runfolder for post_run command ")
224 def constructConfigParser():
226 returns a pre-setup config parser
228 parser = RawConfigParser()
229 parser.read([CONFIG_SYSTEM, CONFIG_USER])
230 if not parser.has_section(GERALD_CONFIG_SECTION):
231 parser.add_section(GERALD_CONFIG_SECTION)
236 def getCombinedOptions(argv=None):
238 Returns optparse options after it has be updated with ConfigParser
239 config files and merged with parsed commandline options.
241 expects command line arguments to be passed in
243 cl_parser = constructOptionParser()
244 conf_parser = constructConfigParser()
247 options = DummyOptions()
249 options, args = cl_parser.parse_args(argv)
251 if options.url is None:
252 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
253 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
255 options.url = normalize_url(options.url)
257 if options.genome_dir is None:
258 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
259 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
261 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
262 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
264 options.post_run = None
266 if options.output_filepath is None:
267 if options.runfolder is not None:
268 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
270 logging.info('USING OPTIONS:')
271 logging.info(u' URL: %s' % (options.url,))
272 logging.info(u' OUT: %s' % (options.output_filepath,))
273 logging.info(u' FC: %s' % (options.flowcell,))
274 #logging.info(': %s' % (options.genome_dir,))
275 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
280 def saveConfigFile(options):
282 retrieves the flowcell eland config file, give the base_host_url
283 (i.e. http://sub.domain.edu:port)
285 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
287 available_genomes = getAvailableGenomes(options.genome_dir)
288 genome_map = constructMapperDict(available_genomes)
290 config = format_gerald_config(options, flowcell_info, genome_map)
292 if options.output_filepath is not None:
293 outstream = open(options.output_filepath, 'w')
294 logging.info('Writing config file to %s' % (options.output_filepath,))
296 outstream = sys.stdout
298 outstream.write(config)