Initial port to python3

[htsworkflow.git] / htsworkflow / pipelines / retrieve_config.py
diff --git a/htsworkflow/pipelines/retrieve_config.py b/htsworkflow/pipelines/retrieve_config.py

index 2292cb9fca955485d9faf42cbb5274fd57ced1b4..43c0af6a625cb87d78931ea2916a4acb38f2c0a8 100644 (file)
--- a/htsworkflow/pipelines/retrieve_config.py
+++ b/htsworkflow/pipelines/retrieve_config.py
@@ -1,30 +1,34 @@
  #!/usr/bin/env python
  
  import csv
-from ConfigParser import RawConfigParser
+from configparser import RawConfigParser
  import logging
  from optparse import OptionParser, IndentedHelpFormatter
  import os
  import sys
  import types
-import urllib
-import urllib2
+import urllib.request, urllib.parse, urllib.error
+import urllib.request, urllib.error, urllib.parse
+import collections
  
  try:
      import json
-except ImportError, e:
+except ImportError as e:
      import simplejson as json
  
  from htsworkflow.frontend.auth import apidata
  from htsworkflow.util import api
+from htsworkflow.util import alphanum
  from htsworkflow.util.url import normalize_url
  from htsworkflow.pipelines.genome_mapper import \
       getAvailableGenomes, \
       constructMapperDict
-from htsworkflow.pipelines.runfolder import LANE_LIST
+from htsworkflow.pipelines import LANE_LIST
  # JSON dictionaries use strings
  LANE_LIST_JSON = [ str(l) for l in LANE_LIST ]
  
+LOGGER = logging.getLogger(__name__)
+
  __docformat__ = "restructredtext en"
  
  CONFIG_SYSTEM = '/etc/htsworkflow.ini'
@@ -44,12 +48,12 @@ def retrieve_flowcell_info(base_host_url, flowcell):
      url = api.flowcell_url(base_host_url, flowcell)
  
      try:
-        apipayload = urllib.urlencode(apidata)
-        web = urllib2.urlopen(url, apipayload)
-    except urllib2.URLError, e:
+        apipayload = urllib.parse.urlencode(apidata)
+        web = urllib.request.urlopen(url, apipayload)
+    except urllib.error.URLError as e:
          errmsg = 'URLError: %d %s' % (e.code, e.msg)
-        logging.error(errmsg)
-        logging.error('opened %s' % (url,))
+        LOGGER.error(errmsg)
+        LOGGER.error('opened %s' % (url,))
          raise IOError(errmsg)
  
      contents = web.read()
@@ -87,7 +91,7 @@ def group_lane_parameters(flowcell_info):
      (The same species, read length, and eland vs sequencing)
      """
      lane_groups = {}
-    for lane_number, lane_contents in flowcell_info['lane_set'].items():
+    for lane_number, lane_contents in list(flowcell_info['lane_set'].items()):
          for lane_info in lane_contents:
              index = (lane_info['read_length'],
                       lane_info['library_species'],
@@ -136,17 +140,17 @@ def format_gerald_config(options, flowcell_info, genome_map):
      analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
      sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
      lane_groups = group_lane_parameters(flowcell_info)
-    for lane_index, lane_numbers in lane_groups.items():
+    for lane_index, lane_numbers in list(lane_groups.items()):
          # lane_index is return value of group_lane_parameters
          read_length, species, is_sequencing = lane_index
          lane_numbers.sort()
-        lane_prefix = u"".join(lane_numbers)
+        lane_prefix = "".join(lane_numbers)
  
          species_path = genome_map.get(species, None)
-        logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
+        LOGGER.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
          if not is_sequencing and species_path is None:
              no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
-            logging.warning(no_genome_msg % (lane_numbers, species))
+            LOGGER.warning(no_genome_msg % (lane_numbers, species))
              is_sequencing = True
  
          if is_sequencing:
@@ -306,29 +310,29 @@ def saveConfigFile(options):
    retrieves the flowcell eland config file, give the base_host_url
    (i.e. http://sub.domain.edu:port)
    """
-  logging.info('USING OPTIONS:')
-  logging.info(u'     URL: %s' % (options.url,))
-  logging.info(u'     OUT: %s' % (options.output_filepath,))
-  logging.info(u'      FC: %s' % (options.flowcell,))
-  #logging.info(': %s' % (options.genome_dir,))
-  logging.info(u'post_run: %s' % ( unicode(options.post_run),))
+  LOGGER.info('USING OPTIONS:')
+  LOGGER.info('     URL: %s' % (options.url,))
+  LOGGER.info('     OUT: %s' % (options.output_filepath,))
+  LOGGER.info('      FC: %s' % (options.flowcell,))
+  #LOGGER.info(': %s' % (options.genome_dir,))
+  LOGGER.info('post_run: %s' % ( str(options.post_run),))
  
    flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
  
-  logging.debug('genome_dir: %s' % ( options.genome_dir, ))
+  LOGGER.debug('genome_dir: %s' % ( options.genome_dir, ))
    available_genomes = getAvailableGenomes(options.genome_dir)
    genome_map = constructMapperDict(available_genomes)
-  logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
+  LOGGER.debug('available genomes: %s' % ( str( list(genome_map.keys()) ),))
  
-  #config = format_gerald_config(options, flowcell_info, genome_map)
-  #
-  #if options.output_filepath is not None:
-  #    outstream = open(options.output_filepath, 'w')
-  #    logging.info('Writing config file to %s' % (options.output_filepath,))
-  #else:
-  #    outstream = sys.stdout
-  #
-  #outstream.write(config)
+  config = format_gerald_config(options, flowcell_info, genome_map)
+
+  if options.output_filepath is not None:
+      outstream = open(options.output_filepath, 'w')
+      logging.info('Writing config file to %s' % (options.output_filepath,))
+  else:
+      outstream = sys.stdout
+
+  outstream.write(config)
  
    if options.sample_sheet is None:
        pass
@@ -364,7 +368,7 @@ def save_sample_sheet(outstream, options, flowcell_info):
                  htsw_field = illumina_to_htsw_map.get(illumina_name, None)
                  if htsw_field is None:
                      continue
-                if callable(htsw_field):
+                if isinstance(htsw_field, collections.Callable):
                      renamed[illumina_name] = htsw_field(options,
                                                          flowcell_info,
                                                          library)
@@ -400,15 +404,15 @@ def format_pooled_libraries(shared, library):
      sequences = library.get('index_sequence', None)
      if sequences is None:
          return []
-    elif (type(sequences) in types.StringTypes and
+    elif (type(sequences) in str and
            sequences.lower().startswith('err')):
          shared['Index'] = ''
          shared['SampleProject'] = library['library_id']
          return [shared]
-    elif (type(sequences) == types.DictType):
+    elif (type(sequences) == dict):
          pooled = []
-        multiplex_ids = sequences.keys()
-        multiplex_ids.sort(key=int)
+        multiplex_ids = list(sequences.keys())
+        multiplex_ids.sort(cmp=alphanum.alphanum)
          for multiplex_id in multiplex_ids:
              sample = {}
              sample.update(shared)