Use eland_extended or eland_pair instead of eland for ANALYSIS type
authorDiane Trout <diane@caltech.edu>
Tue, 6 Oct 2009 18:34:36 +0000 (18:34 +0000)
committerDiane Trout <diane@caltech.edu>
Tue, 6 Oct 2009 18:34:36 +0000 (18:34 +0000)
this required splitting up my analysis suffix for sequencing and
aligning code.

Also forcing lanes that have no available genome to be sequencing
changed part of a retrive_config test case.

htsworkflow/pipelines/retrieve_config.py
htsworkflow/pipelines/test/test_retrive_config.py

index 1a5e1360d389355044c887133e699b25e088d608..136bb5a50c60ff8890a35b6b579426ca6f593257 100644 (file)
@@ -116,13 +116,15 @@ def format_gerald_config(options, flowcell_info, genome_map):
     Generate a GERALD config file
     """
     # so we can add nothing or _pair if we're a paired end run
-    run_type_suffix = { False: "", True: "_pair" }
+    eland_analysis_suffix = { False: "_extended", True: "_pair" }
+    sequence_analysis_suffix = { False: "", True: "_pair" }
 
     # it's convienent to have helpful information describing the flowcell
     # in the config file... things like which lane is which library.
     config = [format_gerald_header(flowcell_info)]
 
-    analysis_suffix = run_type_suffix[flowcell_info['paired_end']]
+    analysis_suffix = eland_analysis_suffix[flowcell_info['paired_end']]
+    sequence_suffix = sequence_analysis_suffix[flowcell_info['paired_end']]
     lane_groups = group_lane_parameters(flowcell_info)
     for lane_index, lane_numbers in lane_groups.items():
         # lane_index is return value of group_lane_parameters
@@ -137,11 +139,11 @@ def format_gerald_config(options, flowcell_info, genome_map):
             logging.warning(no_genome_msg % (lane_numbers, species))
             is_sequencing = True
             
-        if not is_sequencing:
+        if is_sequencing:
+            config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
+        else:
             config += ['%s:ANALYSIS eland%s' % (lane_prefix, analysis_suffix)]
             config += ['%s:ELAND_GENOME %s' % (lane_prefix, species_path) ]
-        else:
-            config += ['%s:ANALYSIS sequence%s' % (lane_prefix, analysis_suffix)]
         #config += ['%s:READ_LENGTH %s' % ( lane_prefix, read_length ) ]
         config += ['%s:USE_BASES Y%s' % ( lane_prefix, read_length ) ]
 
index dd8f30e92d5c91f7d5b9479a2adac5592ca5d3bb..9847ffd4aea53d7a613bfe266cc882584c1c3a7d 100644 (file)
@@ -35,8 +35,10 @@ class RetrieveTestCases(TestCase):
         human = [ line for line in config_lines if re.search('hg18', line) ]
         self.failUnlessEqual(len(human), 1)
         self.failUnlessEqual(human[0], '345678:ELAND_GENOME /tmp/hg18')
-        unknown = [ line for line in config_lines if re.search('Unknown', line) ]
-        self.failUnlessEqual(len(unknown), 2)
+        # we changed the api to force unknown genomes to be sequencing
+        sequencing = [ line for line in config_lines if re.search('sequence_pair', line) ]
+        self.failUnlessEqual(len(sequencing), 2)
+