Set WITH_SEQUENCE as both a per-lane AND global parameter
[htsworkflow.git] / htsworkflow / pipelines / retrieve_config.py
index 1a5e1360d389355044c887133e699b25e088d608..3a1a56aeaf2188350db8b980d7b098bf0098a0ba 100644 (file)
@@ -14,6 +14,7 @@ except ImportError, e:
     import simplejson as json
 
 from htsworkflow.frontend.auth import apidata
+from htsworkflow.util import api
 from htsworkflow.util.url import normalize_url
 from htsworkflow.pipelines.genome_mapper import getAvailableGenomes
 from htsworkflow.pipelines.genome_mapper import constructMapperDict
@@ -36,7 +37,7 @@ def retrieve_flowcell_info(base_host_url, flowcell):
     """
     Return a dictionary describing a 
     """
-    url = base_host_url + '/experiments/config/%s/json' % (flowcell)
+    url = api.flowcell_url(base_host_url, flowcell)
   
     try:
         apipayload = urllib.urlencode(apidata)
@@ -107,7 +108,6 @@ def format_gerald_header(flowcell_info):
         config += ['Lane%s: %s | %s' % (lane_number, lane_info['library_id'],
                                         lane_info['library_name'])]
 
-    config += ['SEQUENCE_FORMAT --fastq']
     config += ['']
     return "\n# ".join(config)
 
@@ -116,13 +116,19 @@ def format_gerald_config(options, flowcell_info, genome_map):
     Generate a GERALD config file
     """
     # so we can add nothing or _pair if we're a paired end run
-    run_type_suffix = { False: "", True: "_pair" }
+    eland_analysis_suffix = { False: "_extended", True: "_pair" }
+    sequence_analysis_suffix = { False: "", True: "_pair" }
 
     # it's convienent to have helpful information describing the flowcell
     # in the config file... things like which lane is which library.
     config = [format_gerald_header(flowcell_info)]
 
-    analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
+    config += ['SEQUENCE_FORMAT --fastq']
+    config += ['ELAND_SET_SIZE 20']
+    config += ['WITH_SEQUENCE TRUE']
+    config += ['12345678:WITH_SEQUENCE TRUE']
+    analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
+    sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
     lane_groups = group_lane_parameters(flowcell_info)
     for lane_index, lane_numbers in lane_groups.items():
         # lane_index is return value of group_lane_parameters
@@ -137,17 +143,18 @@ def format_gerald_config(options, flowcell_info, genome_map):
             logging.warning(no_genome_msg % (lane_numbers, species))
             is_sequencing = True
             
-        if not is_sequencing:
+        if is_sequencing:
+            config += ['%s:ANALYSIS sequence%s' % (lane_prefix, sequence_suffix)]
+        else:
             config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
             config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
-        else:
-            config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
         #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
         config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
 
     # add in option for running script after 
-    if options.post_run is not None:
-        post_run = options.post_run  % {'runfolder': options.runfolder}
+    if not (options.post_run is None or options.runfolder is None):
+        runfolder = os.path.abspath(options.runfolder)
+        post_run = options.post_run  % {'runfolder': runfolder}
         config += ['POST_RUN_COMMAND %s' % (post_run,) ]
         
     config += [''] # force trailing newline
@@ -201,8 +208,9 @@ Config File:
   Example Config File:
   
     [%s]
-    config_host=http://somewhere.domain:port
-    genome_dir=/path to search for genomes
+    config_host: http://somewhere.domain:port
+    genome_dir: /path to search for genomes
+    post_run: runfolder -o <destdir> %%(runfolder)s
     
 """ % (CONFIG_SYSTEM, CONFIG_USER, GERALD_CONFIG_SECTION)