Dynamically respond to the number of lanes.

[htsworkflow.git] / htsworkflow / pipelines / retrieve_config.py
diff --git a/htsworkflow/pipelines/retrieve_config.py b/htsworkflow/pipelines/retrieve_config.py

index 2292cb9fca955485d9faf42cbb5274fd57ced1b4..29f49bb32f8c3914976326d3b3b329db0021357e 100644 (file)
--- a/htsworkflow/pipelines/retrieve_config.py
+++ b/htsworkflow/pipelines/retrieve_config.py
@@ -17,6 +17,7 @@ except ImportError, e:
  
  from htsworkflow.frontend.auth import apidata
  from htsworkflow.util import api
+from htsworkflow.util import alphanum
  from htsworkflow.util.url import normalize_url
  from htsworkflow.pipelines.genome_mapper import \
       getAvailableGenomes, \
@@ -25,6 +26,8 @@ from htsworkflow.pipelines.runfolder import LANE_LIST
  # JSON dictionaries use strings
  LANE_LIST_JSON = [ str(l) for l in LANE_LIST ]
  
+LOGGER = logging.getLogger(__name__)
+
  __docformat__ = "restructredtext en"
  
  CONFIG_SYSTEM = '/etc/htsworkflow.ini'
@@ -48,8 +51,8 @@ def retrieve_flowcell_info(base_host_url, flowcell):
          web = urllib2.urlopen(url, apipayload)
      except urllib2.URLError, e:
          errmsg = 'URLError: %d %s' % (e.code, e.msg)
-        logging.error(errmsg)
-        logging.error('opened %s' % (url,))
+        LOGGER.error(errmsg)
+        LOGGER.error('opened %s' % (url,))
          raise IOError(errmsg)
  
      contents = web.read()
@@ -108,7 +111,7 @@ def format_gerald_header(flowcell_info):
      config += ['Flowcell Notes:']
      config.extend(flowcell_info['notes'].split('\r\n'))
      config += ['']
-    for lane_number in LANE_LIST_JSON:
+    for lane_number in sorted(flowcell_info['lane_set']):
          lane_contents = flowcell_info['lane_set'][lane_number]
          for lane_info in lane_contents:
              config += ['Lane%s: %s | %s' % (lane_number,
@@ -143,10 +146,10 @@ def format_gerald_config(options, flowcell_info, genome_map):
          lane_prefix = u"".join(lane_numbers)
  
          species_path = genome_map.get(species, None)
-        logging.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
+        LOGGER.debug("Looked for genome '%s' got location '%s'" % (species, species_path))
          if not is_sequencing and species_path is None:
              no_genome_msg = "Forcing lanes %s to sequencing as there is no genome for %s"
-            logging.warning(no_genome_msg % (lane_numbers, species))
+            LOGGER.warning(no_genome_msg % (lane_numbers, species))
              is_sequencing = True
  
          if is_sequencing:
@@ -306,29 +309,29 @@ def saveConfigFile(options):
    retrieves the flowcell eland config file, give the base_host_url
    (i.e. http://sub.domain.edu:port)
    """
-  logging.info('USING OPTIONS:')
-  logging.info(u'     URL: %s' % (options.url,))
-  logging.info(u'     OUT: %s' % (options.output_filepath,))
-  logging.info(u'      FC: %s' % (options.flowcell,))
-  #logging.info(': %s' % (options.genome_dir,))
-  logging.info(u'post_run: %s' % ( unicode(options.post_run),))
+  LOGGER.info('USING OPTIONS:')
+  LOGGER.info(u'     URL: %s' % (options.url,))
+  LOGGER.info(u'     OUT: %s' % (options.output_filepath,))
+  LOGGER.info(u'      FC: %s' % (options.flowcell,))
+  #LOGGER.info(': %s' % (options.genome_dir,))
+  LOGGER.info(u'post_run: %s' % ( unicode(options.post_run),))
  
    flowcell_info = retrieve_flowcell_info(options.url, options.flowcell)
  
-  logging.debug('genome_dir: %s' % ( options.genome_dir, ))
+  LOGGER.debug('genome_dir: %s' % ( options.genome_dir, ))
    available_genomes = getAvailableGenomes(options.genome_dir)
    genome_map = constructMapperDict(available_genomes)
-  logging.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
+  LOGGER.debug('available genomes: %s' % ( unicode( genome_map.keys() ),))
  
-  #config = format_gerald_config(options, flowcell_info, genome_map)
-  #
-  #if options.output_filepath is not None:
-  #    outstream = open(options.output_filepath, 'w')
-  #    logging.info('Writing config file to %s' % (options.output_filepath,))
-  #else:
-  #    outstream = sys.stdout
-  #
-  #outstream.write(config)
+  config = format_gerald_config(options, flowcell_info, genome_map)
+
+  if options.output_filepath is not None:
+      outstream = open(options.output_filepath, 'w')
+      logging.info('Writing config file to %s' % (options.output_filepath,))
+  else:
+      outstream = sys.stdout
+
+  outstream.write(config)
  
    if options.sample_sheet is None:
        pass
@@ -353,8 +356,8 @@ def save_sample_sheet(outstream, options, flowcell_info):
                              'Operator': format_operator_name}
      out = csv.DictWriter(outstream, sample_sheet_fields)
      out.writerow(dict(((x,x) for x in sample_sheet_fields)))
-    for lane_number in LANE_LIST:
-        lane_contents = flowcell_info['lane_set'][str(lane_number)]
+    for lane_number in sorted(flowcell_info['lane_set']):
+        lane_contents = flowcell_info['lane_set'][lane_number]
  
          pooled_lane_contents = []
          for library in lane_contents:
@@ -408,7 +411,7 @@ def format_pooled_libraries(shared, library):
      elif (type(sequences) == types.DictType):
          pooled = []
          multiplex_ids = sequences.keys()
-        multiplex_ids.sort(key=int)
+        multiplex_ids.sort(cmp=alphanum.alphanum)
          for multiplex_id in multiplex_ids:
              sample = {}
              sample.update(shared)