3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util.url import normalize_url
18 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
19 from htsworkflow.pipelines.genome_mapper import constructMapperDict
21 __docformat__ = "restructredtext en"
23 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
24 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
25 GERALD_CONFIG_SECTION = 'gerald_config'
27 #Disable or enable commandline arg parsing; disabled by default.
28 DISABLE_CMDLINE = True
30 LANE_LIST = ['1','2','3','4','5','6','7','8']
32 class FlowCellNotFound(Exception): pass
33 class WebError404(Exception): pass
35 def retrieve_flowcell_info(base_host_url, flowcell):
37 Return a dictionary describing a
39 url = base_host_url + '/experiments/config/%s/json' % (flowcell)
42 apipayload = urllib.urlencode(apidata)
43 web = urllib2.urlopen(url, apipayload)
44 except urllib2.URLError, e:
45 errmsg = 'URLError: %d %s' % (e.code, e.msg)
47 logging.error('opened %s' % (url,))
54 msg = "403 - Forbbidden, probably need api key"
55 raise FlowCellNotFound(msg)
58 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
59 "Did you get right port #?" % (flowcell, base_host_url, url)
60 raise FlowCellNotFound(msg)
62 if len(contents) == 0:
63 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
64 raise FlowCellNotFound(msg)
66 data = json.loads(contents)
69 def is_sequencing(lane_info):
71 Determine if we are just sequencing and not doing any follow-up analysis
73 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
78 def group_lane_parameters(flowcell_info):
80 goup lanes that can share GERALD configuration blocks.
82 (The same species, read length, and eland vs sequencing)
85 for lane_number, lane_info in flowcell_info['lane_set'].items():
86 index = (lane_info['read_length'],
87 lane_info['library_species'],
88 is_sequencing(lane_info))
89 lane_groups.setdefault(index, []).append(lane_number)
92 def format_gerald_header(flowcell_info):
94 Generate comment describing the contents of the flowcell
96 # I'm using '\n# ' to join the lines together, that doesn't include the
97 # first element so i needed to put the # in manually
98 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
100 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
102 config += ['Flowcell Notes:']
103 config.extend(flowcell_info['notes'].split('\r\n'))
105 for lane_number in LANE_LIST:
106 lane_info = flowcell_info['lane_set'][lane_number]
107 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
108 lane_info['library_name'])]
110 config += ['SEQUENCE_FORMAT --fastq']
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 run_type_suffix = { False: "", True: "_pair" }
121 # it's convienent to have helpful information describing the flowcell
122 # in the config file... things like which lane is which library.
123 config = [format_gerald_header(flowcell_info)]
125 analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
126 lane_groups = group_lane_parameters(flowcell_info)
127 for lane_index, lane_numbers in lane_groups.items():
128 # lane_index is return value of group_lane_parameters
129 read_length, species, is_sequencing = lane_index
131 lane_prefix = u"".join(lane_numbers)
133 species_path = genome_map.get(species, None)
134 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
135 if species_path is None:
136 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
137 logging.warning(no_genome_msg % (lane_numbers, species))
140 if not is_sequencing:
141 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
142 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
144 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
145 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
146 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
148 # add in option for running script after
149 if options.post_run is not None:
150 post_run = options.post_run % {'runfolder': options.runfolder}
151 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
153 config += [''] # force trailing newline
155 return "\n".join(config)
159 Used when command line parsing is disabled; default
163 self.output_filepath = None
165 self.genome_dir = None
167 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
169 #def format_description(self, description):
172 # return description + "\n"
176 def format_epilog(self, epilog):
178 It was removing my preformated epilog, so this should override
179 that behavior! Muhahaha!
182 return "\n" + epilog + "\n"
187 def constructOptionParser():
189 returns a pre-setup optparser
191 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
193 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
198 * %s (User specific; overrides system)
199 * command line overrides all config file options
204 config_host=http://somewhere.domain:port
205 genome_dir=/path to search for genomes
207 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
209 #Special formatter for allowing preformatted description.
210 ##parser.format_epilog(PreformattedDescriptionFormatter())
212 parser.add_option("-u", "--url",
213 action="store", type="string", dest="url")
215 parser.add_option("-o", "--output-file",
216 action="store", type="string", dest="output_filepath",
217 help="config file destination. If runfolder is specified defaults "
218 "to <runfolder>/config-auto.txt" )
220 parser.add_option("-f", "--flowcell",
221 action="store", type="string", dest="flowcell")
223 parser.add_option("-g", "--genome_dir",
224 action="store", type="string", dest="genome_dir")
226 parser.add_option("-r", "--runfolder",
227 action="store", type="string",
228 help="specify runfolder for post_run command ")
230 parser.add_option('-v', '--verbose', action='store_true', default=False,
231 help='increase logging verbosity')
234 def constructConfigParser():
236 returns a pre-setup config parser
238 parser = RawConfigParser()
239 parser.read([CONFIG_SYSTEM, CONFIG_USER])
240 if not parser.has_section(GERALD_CONFIG_SECTION):
241 parser.add_section(GERALD_CONFIG_SECTION)
246 def getCombinedOptions(argv=None):
248 Returns optparse options after it has be updated with ConfigParser
249 config files and merged with parsed commandline options.
251 expects command line arguments to be passed in
253 cl_parser = constructOptionParser()
254 conf_parser = constructConfigParser()
257 options = DummyOptions()
259 options, args = cl_parser.parse_args(argv)
261 if options.url is None:
262 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
263 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
265 options.url = normalize_url(options.url)
267 if options.genome_dir is None:
268 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
269 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
271 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
272 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
274 options.post_run = None
276 if options.output_filepath is None:
277 if options.runfolder is not None:
278 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
283 def saveConfigFile(options):
285 retrieves the flowcell eland config file, give the base_host_url
286 (i.e. http://sub.domain.edu:port)
288 logging.info('USING OPTIONS:')
289 logging.info(u' URL: %s' % (options.url,))
290 logging.info(u' OUT: %s' % (options.output_filepath,))
291 logging.info(u' FC: %s' % (options.flowcell,))
292 #logging.info(': %s' % (options.genome_dir,))
293 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
295 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
297 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
298 available_genomes = getAvailableGenomes(options.genome_dir)
299 genome_map = constructMapperDict(available_genomes)
300 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
302 config = format_gerald_config(options, flowcell_info, genome_map)
304 if options.output_filepath is not None:
305 outstream = open(options.output_filepath, 'w')
306 logging.info('Writing config file to %s' % (options.output_filepath,))
308 outstream = sys.stdout
310 outstream.write(config)