#!/usr/bin/env python
import csv
-from ConfigParser import RawConfigParser
+from configparser import RawConfigParser
import logging
from optparse import OptionParser, IndentedHelpFormatter
import os
import sys
import types
-import urllib
-import urllib2
+import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.error, urllib.parse
+import collections
try:
import json
-except ImportError, e:
+except ImportError as e:
import simplejson as json
from htsworkflow.frontend.auth import apidata
from htsworkflow.util import api
+from htsworkflow.util import alphanum
from htsworkflow.util.url import normalize_url
from htsworkflow.pipelines.genome_mapper import \
getAvailableGenomes, \
constructMapperDict
-from htsworkflow.pipelines.runfolder import LANE_LIST
+from htsworkflow.pipelines import LANE_LIST
# JSON dictionaries use strings
LANE_LIST_JSON = [ str(l) for l in LANE_LIST ]
+LOGGER = logging.getLogger(__name__)
+
__docformat__ = "restructredtext en"
CONFIG_SYSTEM = '/etc/htsworkflow.ini'
url = api.flowcell_url(base_host_url, flowcell)
try:
- apipayload = urllib.urlencode(apidata)
- web = urllib2.urlopen(url, apipayload)
- except urllib2.URLError, e:
+ apipayload = urllib.parse.urlencode(apidata)
+ web = urllib.request.urlopen(url, apipayload)
+ except urllib.error.URLError as e:
errmsg = 'URLError: %d %s' % (e.code, e.msg)
- logging.error(errmsg)
- logging.error('opened %s' % (url,))
+ LOGGER.error(errmsg)
+ LOGGER.error('opened %s' % (url,))
raise IOError(errmsg)
contents = web.read()
(The same species, read length, and eland vs sequencing)
"""
lane_groups = {}
- for lane_number, lane_contents in flowcell_info['lane_set'].items():
+ for lane_number, lane_contents in list(flowcell_info['lane_set'].items()):
for lane_info in lane_contents:
index = (lane_info['read_length'],
lane_info['library_species'],
analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
lane_groups = group_lane_parameters(flowcell_info)
- for lane_index, lane_numbers in lane_groups.items():
+ for lane_index, lane_numbers in list(lane_groups.items()):
# lane_index is return value of group_lane_parameters
read_length, species, is_sequencing = lane_index
lane_numbers.sort()
- lane_prefix = u"".join(lane_numbers)
+ lane_prefix = "".join(lane_numbers)
species_path = genome_map.get(species, None)
- logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
+ LOGGER.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
if not is_sequencing and species_path is None:
no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
- logging.warning(no_genome_msg % (lane_numbers, species))
+ LOGGER.warning(no_genome_msg % (lane_numbers, species))
is_sequencing = True
if is_sequencing:
retrieves the flowcell eland config file, give the base_host_url
(i.e. http://sub.domain.edu:port)
"""
- logging.info('USING OPTIONS:')
- logging.info(u' URL: %s' % (options.url,))
- logging.info(u' OUT: %s' % (options.output_filepath,))
- logging.info(u' FC: %s' % (options.flowcell,))
- #logging.info(': %s' % (options.genome_dir,))
- logging.info(u'post_run: %s' % ( unicode(options.post_run),))
+ LOGGER.info('USING OPTIONS:')
+ LOGGER.info(' URL: %s' % (options.url,))
+ LOGGER.info(' OUT: %s' % (options.output_filepath,))
+ LOGGER.info(' FC: %s' % (options.flowcell,))
+ #LOGGER.info(': %s' % (options.genome_dir,))
+ LOGGER.info('post_run: %s' % ( str(options.post_run),))
flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
- logging.debug('genome_dir: %s' % ( options.genome_dir, ))
+ LOGGER.debug('genome_dir: %s' % ( options.genome_dir, ))
available_genomes = getAvailableGenomes(options.genome_dir)
genome_map = constructMapperDict(available_genomes)
- logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
+ LOGGER.debug('available genomes: %s' % ( str( list(genome_map.keys()) ),))
- #config = format_gerald_config(options, flowcell_info, genome_map)
- #
- #if options.output_filepath is not None:
- # outstream = open(options.output_filepath, 'w')
- # logging.info('Writing config file to %s' % (options.output_filepath,))
- #else:
- # outstream = sys.stdout
- #
- #outstream.write(config)
+ config = format_gerald_config(options, flowcell_info, genome_map)
+
+ if options.output_filepath is not None:
+ outstream = open(options.output_filepath, 'w')
+ logging.info('Writing config file to %s' % (options.output_filepath,))
+ else:
+ outstream = sys.stdout
+
+ outstream.write(config)
if options.sample_sheet is None:
pass
htsw_field = illumina_to_htsw_map.get(illumina_name, None)
if htsw_field is None:
continue
- if callable(htsw_field):
+ if isinstance(htsw_field, collections.Callable):
renamed[illumina_name] = htsw_field(options,
flowcell_info,
library)
sequences = library.get('index_sequence', None)
if sequences is None:
return []
- elif (type(sequences) in types.StringTypes and
+ elif (type(sequences) in str and
sequences.lower().startswith('err')):
shared['Index'] = ''
shared['SampleProject'] = library['library_id']
return [shared]
- elif (type(sequences) == types.DictType):
+ elif (type(sequences) == dict):
pooled = []
- multiplex_ids = sequences.keys()
- multiplex_ids.sort(key=int)
+ multiplex_ids = list(sequences.keys())
+ multiplex_ids.sort(cmp=alphanum.alphanum)
for multiplex_id in multiplex_ids:
sample = {}
sample.update(shared)