3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util.url import normalize_url
18 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
19 from htsworkflow.pipelines.genome_mapper import constructMapperDict
21 __docformat__ = "restructredtext en"
23 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
24 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
25 GERALD_CONFIG_SECTION = 'gerald_config'
27 #Disable or enable commandline arg parsing; disabled by default.
28 DISABLE_CMDLINE = True
30 LANE_LIST = ['1','2','3','4','5','6','7','8']
32 class FlowCellNotFound(Exception): pass
33 class WebError404(Exception): pass
35 def retrieve_flowcell_info(base_host_url, flowcell):
37 Return a dictionary describing a
39 url = base_host_url + '/experiments/config/%s/json' % (flowcell)
42 apipayload = urllib.urlencode(apidata)
43 web = urllib2.urlopen(url, apipayload)
44 except urllib2.URLError, e:
45 errmsg = 'URLError: %d %s' % (e.code, e.msg)
47 logging.error('opened %s' % (url,))
54 msg = "403 - Forbbidden, probably need api key"
55 raise FlowCellNotFound(msg)
58 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
59 "Did you get right port #?" % (flowcell, base_host_url, url)
60 raise FlowCellNotFound(msg)
62 if len(contents) == 0:
63 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
64 raise FlowCellNotFound(msg)
66 data = json.loads(contents)
69 def is_sequencing(lane_info):
71 Determine if we are just sequencing and not doing any follow-up analysis
73 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
78 def group_lane_parameters(flowcell_info):
80 goup lanes that can share GERALD configuration blocks.
82 (The same species, read length, and eland vs sequencing)
85 for lane_number, lane_info in flowcell_info['lane_set'].items():
86 index = (lane_info['read_length'],
87 lane_info['library_species'],
88 is_sequencing(lane_info))
89 lane_groups.setdefault(index, []).append(lane_number)
92 def format_gerald_header(flowcell_info):
94 Generate comment describing the contents of the flowcell
96 # I'm using '\n# ' to join the lines together, that doesn't include the
97 # first element so i needed to put the # in manually
98 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
100 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
102 config += ['Flowcell Notes:']
103 config.extend(flowcell_info['notes'].split('\r\n'))
105 for lane_number in LANE_LIST:
106 lane_info = flowcell_info['lane_set'][lane_number]
107 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
108 lane_info['library_name'])]
110 config += ['SEQUENCE_FORMAT --fastq']
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 run_type_suffix = { False: "", True: "_pair" }
121 # it's convienent to have helpful information describing the flowcell
122 # in the config file... things like which lane is which library.
123 config = [format_gerald_header(flowcell_info)]
125 analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
126 lane_groups = group_lane_parameters(flowcell_info)
127 for lane_index, lane_numbers in lane_groups.items():
128 # lane_index is return value of group_lane_parameters
129 read_length, species, is_sequencing = lane_index
131 lane_prefix = u"".join(lane_numbers)
133 if not is_sequencing:
134 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
136 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
137 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
138 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
139 species_path = genome_map.get(species, "Unknown")
140 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
142 # add in option for running script after
143 if options.post_run is not None:
144 post_run = options.post_run % {'runfolder': options.runfolder}
145 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
147 config += [''] # force trailing newline
149 return "\n".join(config)
153 Used when command line parsing is disabled; default
157 self.output_filepath = None
159 self.genome_dir = None
161 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
163 #def format_description(self, description):
166 # return description + "\n"
170 def format_epilog(self, epilog):
172 It was removing my preformated epilog, so this should override
173 that behavior! Muhahaha!
176 return "\n" + epilog + "\n"
181 def constructOptionParser():
183 returns a pre-setup optparser
185 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
187 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
192 * %s (User specific; overrides system)
193 * command line overrides all config file options
198 config_host=http://somewhere.domain:port
199 genome_dir=/path to search for genomes
201 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
203 #Special formatter for allowing preformatted description.
204 ##parser.format_epilog(PreformattedDescriptionFormatter())
206 parser.add_option("-u", "--url",
207 action="store", type="string", dest="url")
209 parser.add_option("-o", "--output-file",
210 action="store", type="string", dest="output_filepath",
211 help="config file destination. If runfolder is specified defaults "
212 "to <runfolder>/config-auto.txt" )
214 parser.add_option("-f", "--flowcell",
215 action="store", type="string", dest="flowcell")
217 parser.add_option("-g", "--genome_dir",
218 action="store", type="string", dest="genome_dir")
220 parser.add_option("-r", "--runfolder",
221 action="store", type="string",
222 help="specify runfolder for post_run command ")
226 def constructConfigParser():
228 returns a pre-setup config parser
230 parser = RawConfigParser()
231 parser.read([CONFIG_SYSTEM, CONFIG_USER])
232 if not parser.has_section(GERALD_CONFIG_SECTION):
233 parser.add_section(GERALD_CONFIG_SECTION)
238 def getCombinedOptions(argv=None):
240 Returns optparse options after it has be updated with ConfigParser
241 config files and merged with parsed commandline options.
243 expects command line arguments to be passed in
245 cl_parser = constructOptionParser()
246 conf_parser = constructConfigParser()
249 options = DummyOptions()
251 options, args = cl_parser.parse_args(argv)
253 if options.url is None:
254 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
255 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
257 options.url = normalize_url(options.url)
259 if options.genome_dir is None:
260 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
261 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
263 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
264 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
266 options.post_run = None
268 if options.output_filepath is None:
269 if options.runfolder is not None:
270 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
272 logging.info('USING OPTIONS:')
273 logging.info(u' URL: %s' % (options.url,))
274 logging.info(u' OUT: %s' % (options.output_filepath,))
275 logging.info(u' FC: %s' % (options.flowcell,))
276 #logging.info(': %s' % (options.genome_dir,))
277 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
282 def saveConfigFile(options):
284 retrieves the flowcell eland config file, give the base_host_url
285 (i.e. http://sub.domain.edu:port)
287 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
289 available_genomes = getAvailableGenomes(options.genome_dir)
290 genome_map = constructMapperDict(available_genomes)
292 config = format_gerald_config(options, flowcell_info, genome_map)
294 if options.output_filepath is not None:
295 outstream = open(options.output_filepath, 'w')
296 logging.info('Writing config file to %s' % (options.output_filepath,))
298 outstream = sys.stdout
300 outstream.write(config)