Add support for first-gen HiSeq flowcells (e.g. ABXX)

[htsworkflow.git] / htsworkflow / pipelines / eland.py
diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py

index 271a6517f632209612fb27b6647d54bec921386d..bd478aaa416a8d61d37aebd032c5e7b8290bd618 100644 (file)
--- a/htsworkflow/pipelines/eland.py
+++ b/htsworkflow/pipelines/eland.py
@@ -44,7 +44,7 @@ class ResultLane(object):
          self.lane_id = lane_id
          self.end = end
          self._reads = None
-        
+
          if xml is not None:
              self.set_elements(xml)
  
@@ -182,13 +182,13 @@ class ElandLane(ResultLane):
              reads += 1
              fields = line.split()
              # fields[2] = QC/NM/or number of matches
-            score_type = self._score_mapped_mismatches(fields[MATCH_INDEX], 
+            score_type = self._score_mapped_mismatches(fields[MATCH_INDEX],
                                                         match_codes)
              if score_type == ElandLane.SCORE_READ:
                  # when there are too many hits, eland  writes a - where
                  # it would have put the list of hits.
                  # or in a different version of eland, it just leaves
-                # that column blank, and only outputs 3 fields.     
+                # that column blank, and only outputs 3 fields.
                  if len(fields) < 4 or fields[LOCATION_INDEX] == '-':
                    continue
  
@@ -213,16 +213,16 @@ class ElandLane(ResultLane):
              reads += 1
              fields = line.split()
              # fields[2] = QC/NM/or number of matches
-            score_type = self._score_mapped_mismatches(fields[MATCH_INDEX], 
+            score_type = self._score_mapped_mismatches(fields[MATCH_INDEX],
                                                         match_codes)
              if score_type == ElandLane.SCORE_UNRECOGNIZED:
                  # export files have three states for the match field
-                # QC code, count of multi-reads, or a single 
+                # QC code, count of multi-reads, or a single
                  # read location. The score_mapped_mismatches function
                  # only understands the first two types.
                  # if we get unrecognized, that implies the field is probably
                  # a location.
-                code = self._count_mapped_export(mapped_reads, 
+                code = self._count_mapped_export(mapped_reads,
                                                   fields[LOCATION_INDEX],
                                                   fields[DESCRIPTOR_INDEX])
                  match_codes[code] += 1
@@ -232,7 +232,7 @@ class ElandLane(ResultLane):
  
      def _score_mapped_mismatches(self, match, match_codes):
          """Update match_codes with eland map counts, or failure code.
-        
+
          Returns True if the read mapped, false if it was an error code.
          """
          groups = ElandLane.MATCH_COUNTS_RE.match(match)
@@ -260,12 +260,12 @@ class ElandLane(ResultLane):
                  match_codes['U1'] += 1
              elif one_mismatches < 255:
                  match_codes['R1'] += one_mismatches
-    
+
              if two_mismatches == 1:
                  match_codes['U2'] += 1
              elif two_mismatches < 255:
                  match_codes['R2'] += two_mismatches
-                
+
              return ElandLane.SCORE_READ
  
  
@@ -284,9 +284,9 @@ class ElandLane(ResultLane):
  
      def _count_mapped_export(self, mapped_reads, match_string, descriptor):
          """Count a read as defined in an export file
-        
+
          match_string contains the chromosome
-        descriptor contains the an ecoding of bases that match, mismatch, 
+        descriptor contains the an ecoding of bases that match, mismatch,
                     and have indels.
          returns the "best" match code
  
@@ -320,25 +320,25 @@ class ElandLane(ResultLane):
  
      def _get_no_match(self):
          if self._mapped_reads is None:
-            self._update()  
+            self._update()
          return self._match_codes['NM']
-    no_match = property(_get_no_match, 
+    no_match = property(_get_no_match,
                          doc="total reads that didn't match the target genome.")
  
      def _get_no_match_percent(self):
-        return float(self.no_match)/self.reads * 100 
+        return float(self.no_match)/self.reads * 100
      no_match_percent = property(_get_no_match_percent,
                                  doc="no match reads as percent of total")
  
      def _get_qc_failed(self):
          if self._mapped_reads is None:
-            self._update()  
+            self._update()
          return self._match_codes['QC']
      qc_failed = property(_get_qc_failed,
                          doc="total reads that didn't match the target genome.")
  
      def _get_qc_failed_percent(self):
-        return float(self.qc_failed)/self.reads * 100 
+        return float(self.qc_failed)/self.reads * 100
      qc_failed_percent = property(_get_qc_failed_percent,
                                   doc="QC failed reads as percent of total")
  
@@ -361,7 +361,7 @@ class ElandLane(ResultLane):
          return sum
      repeat_reads = property(_get_repeat_reads,
                              doc="total repeat reads")
-    
+
      def get_elements(self):
          lane = ElementTree.Element(ElandLane.LANE,
                                     {'version':
@@ -565,7 +565,7 @@ class ELAND(object):
              raise ValueError('Expecting %s', ELAND.ELAND)
          for element in list(tree):
              lane_id = int(element.attrib[ELAND.LANE_ID])
-            end = int(element.attrib.get(ELAND.END, 0)) 
+            end = int(element.attrib.get(ELAND.END, 0))
              if element.tag.lower() == ElandLane.LANE.lower():
                  lane = ElandLane(xml=element)
              elif element.tag.lower() == SequenceLane.LANE.lower():
@@ -598,16 +598,16 @@ def update_result_with_eland(gerald, results, lane_id, end, pathname, genome_map
      # split_name = name.split('_')
      # lane_id = int(split_name[1])
  
+    genome_map = {}
      if genome_maps is not None:
          genome_map = genome_maps[lane_id]
      elif gerald is not None:
          genome_dir = gerald.lanes[lane_id].eland_genome
-        genome_map = build_genome_fasta_map(genome_dir)
-    else:
-        genome_map = {}
+        if genome_dir is not None:
+            genome_map = build_genome_fasta_map(genome_dir)
  
      lane = ElandLane(pathname, lane_id, end, genome_map)
-    
+
      if end is None:
          effective_end =  0
      else:
@@ -643,7 +643,7 @@ def eland(gerald_dir, gerald=None, genome_maps=None):
      if os.path.isdir(basedir_temp):
          basedirs.append(basedir_temp)
  
-   
+
      # the order in patterns determines the preference for what
      # will be found.
      MAPPED_ELAND = 0
@@ -723,7 +723,7 @@ def main(cmdline=None):
          e = eland(a)
          print e.get_elements()
  
-    return 
+    return
  
  
  if __name__ == "__main__":