3 from ConfigParser import RawConfigParser
5 from optparse import OptionParser, IndentedHelpFormatter
13 except ImportError, e:
14 import simplejson as json
16 from htsworkflow.frontend.auth import apidata
17 from htsworkflow.util import api
18 from htsworkflow.util.url import normalize_url
19 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
20 from htsworkflow.pipelines.genome_mapper import constructMapperDict
22 __docformat__ = "restructredtext en"
24 CONFIG_SYSTEM = '/etc/htsworkflow.ini'
25 CONFIG_USER = os.path.expanduser('~/.htsworkflow.ini')
26 GERALD_CONFIG_SECTION = 'gerald_config'
28 #Disable or enable commandline arg parsing; disabled by default.
29 DISABLE_CMDLINE = True
31 LANE_LIST = ['1','2','3','4','5','6','7','8']
33 class FlowCellNotFound(Exception): pass
34 class WebError404(Exception): pass
36 def retrieve_flowcell_info(base_host_url, flowcell):
38 Return a dictionary describing a
40 url = api.flowcell_url(base_host_url, flowcell)
43 apipayload = urllib.urlencode(apidata)
44 web = urllib2.urlopen(url, apipayload)
45 except urllib2.URLError, e:
46 errmsg = 'URLError: %d %s' % (e.code, e.msg)
48 logging.error('opened %s' % (url,))
55 msg = "403 - Forbbidden, probably need api key"
56 raise FlowCellNotFound(msg)
59 msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \
60 "Did you get right port #?" % (flowcell, base_host_url, url)
61 raise FlowCellNotFound(msg)
63 if len(contents) == 0:
64 msg = "No information for flowcell (%s) returned; full url(%s)" % (flowcell, url)
65 raise FlowCellNotFound(msg)
67 data = json.loads(contents)
70 def is_sequencing(lane_info):
72 Determine if we are just sequencing and not doing any follow-up analysis
74 if lane_info['experiment_type'] in ('De Novo','Whole Genome'):
79 def group_lane_parameters(flowcell_info):
81 goup lanes that can share GERALD configuration blocks.
83 (The same species, read length, and eland vs sequencing)
86 for lane_number, lane_info in flowcell_info['lane_set'].items():
87 index = (lane_info['read_length'],
88 lane_info['library_species'],
89 is_sequencing(lane_info))
90 lane_groups.setdefault(index, []).append(lane_number)
93 def format_gerald_header(flowcell_info):
95 Generate comment describing the contents of the flowcell
97 # I'm using '\n# ' to join the lines together, that doesn't include the
98 # first element so i needed to put the # in manually
99 config = ['# FLOWCELL: %s' % (flowcell_info['flowcell_id'])]
101 config += ['CONTROL-LANE: %s' % (flowcell_info['control_lane'],)]
103 config += ['Flowcell Notes:']
104 config.extend(flowcell_info['notes'].split('\r\n'))
106 for lane_number in LANE_LIST:
107 lane_info = flowcell_info['lane_set'][lane_number]
108 config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
109 lane_info['library_name'])]
112 return "\n# ".join(config)
114 def format_gerald_config(options, flowcell_info, genome_map):
116 Generate a GERALD config file
118 # so we can add nothing or _pair if we're a paired end run
119 eland_analysis_suffix = { False: "_extended", True: "_pair" }
120 sequence_analysis_suffix = { False: "", True: "_pair" }
122 # it's convienent to have helpful information describing the flowcell
123 # in the config file... things like which lane is which library.
124 config = [format_gerald_header(flowcell_info)]
126 config += ['SEQUENCE_FORMAT --fastq']
127 config += ['ELAND_SET_SIZE 20']
128 config += ['WITH_SEQUENCE TRUE']
129 config += ['12345678:WITH_SEQUENCE TRUE']
130 analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
131 sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
132 lane_groups = group_lane_parameters(flowcell_info)
133 for lane_index, lane_numbers in lane_groups.items():
134 # lane_index is return value of group_lane_parameters
135 read_length, species, is_sequencing = lane_index
137 lane_prefix = u"".join(lane_numbers)
139 species_path = genome_map.get(species, None)
140 logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
141 if species_path is None:
142 no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
143 logging.warning(no_genome_msg % (lane_numbers, species))
147 config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)]
149 config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
150 config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
151 #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
152 config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
154 # add in option for running script after
155 if not (options.post_run is None or options.runfolder is None):
156 runfolder = os.path.abspath(options.runfolder)
157 post_run = options.post_run % {'runfolder': runfolder}
158 config += ['POST_RUN_COMMAND %s' % (post_run,) ]
160 config += [''] # force trailing newline
162 return "\n".join(config)
166 Used when command line parsing is disabled; default
170 self.output_filepath = None
172 self.genome_dir = None
174 class PreformattedDescriptionFormatter(IndentedHelpFormatter):
176 #def format_description(self, description):
179 # return description + "\n"
183 def format_epilog(self, epilog):
185 It was removing my preformated epilog, so this should override
186 that behavior! Muhahaha!
189 return "\n" + epilog + "\n"
194 def constructOptionParser():
196 returns a pre-setup optparser
198 parser = OptionParser(formatter=PreformattedDescriptionFormatter())
200 parser.set_description('Retrieves eland config file from hts_frontend web frontend.')
205 * %s (User specific; overrides system)
206 * command line overrides all config file options
211 config_host: http://somewhere.domain:port
212 genome_dir: /path to search for genomes
213 post_run: runfolder -o <destdir> %%(runfolder)s
215 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)
217 #Special formatter for allowing preformatted description.
218 ##parser.format_epilog(PreformattedDescriptionFormatter())
220 parser.add_option("-u", "--url",
221 action="store", type="string", dest="url")
223 parser.add_option("-o", "--output-file",
224 action="store", type="string", dest="output_filepath",
225 help="config file destination. If runfolder is specified defaults "
226 "to <runfolder>/config-auto.txt" )
228 parser.add_option("-f", "--flowcell",
229 action="store", type="string", dest="flowcell")
231 parser.add_option("-g", "--genome_dir",
232 action="store", type="string", dest="genome_dir")
234 parser.add_option("-r", "--runfolder",
235 action="store", type="string",
236 help="specify runfolder for post_run command ")
238 parser.add_option('-v', '--verbose', action='store_true', default=False,
239 help='increase logging verbosity')
242 def constructConfigParser():
244 returns a pre-setup config parser
246 parser = RawConfigParser()
247 parser.read([CONFIG_SYSTEM, CONFIG_USER])
248 if not parser.has_section(GERALD_CONFIG_SECTION):
249 parser.add_section(GERALD_CONFIG_SECTION)
254 def getCombinedOptions(argv=None):
256 Returns optparse options after it has be updated with ConfigParser
257 config files and merged with parsed commandline options.
259 expects command line arguments to be passed in
261 cl_parser = constructOptionParser()
262 conf_parser = constructConfigParser()
265 options = DummyOptions()
267 options, args = cl_parser.parse_args(argv)
269 if options.url is None:
270 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'config_host'):
271 options.url = conf_parser.get(GERALD_CONFIG_SECTION, 'config_host')
273 options.url = normalize_url(options.url)
275 if options.genome_dir is None:
276 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'genome_dir'):
277 options.genome_dir = conf_parser.get(GERALD_CONFIG_SECTION, 'genome_dir')
279 if conf_parser.has_option(GERALD_CONFIG_SECTION, 'post_run'):
280 options.post_run = conf_parser.get(GERALD_CONFIG_SECTION, 'post_run')
282 options.post_run = None
284 if options.output_filepath is None:
285 if options.runfolder is not None:
286 options.output_filepath = os.path.join(options.runfolder, 'config-auto.txt')
291 def saveConfigFile(options):
293 retrieves the flowcell eland config file, give the base_host_url
294 (i.e. http://sub.domain.edu:port)
296 logging.info('USING OPTIONS:')
297 logging.info(u' URL: %s' % (options.url,))
298 logging.info(u' OUT: %s' % (options.output_filepath,))
299 logging.info(u' FC: %s' % (options.flowcell,))
300 #logging.info(': %s' % (options.genome_dir,))
301 logging.info(u'post_run: %s' % ( unicode(options.post_run),))
303 flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
305 logging.debug('genome_dir: %s' % ( options.genome_dir, ))
306 available_genomes = getAvailableGenomes(options.genome_dir)
307 genome_map = constructMapperDict(available_genomes)
308 logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
310 config = format_gerald_config(options, flowcell_info, genome_map)
312 if options.output_filepath is not None:
313 outstream = open(options.output_filepath, 'w')
314 logging.info('Writing config file to %s' % (options.output_filepath,))
316 outstream = sys.stdout
318 outstream.write(config)