A better resolution to a possible circular dependency.
[htsworkflow.git] / htsworkflow / pipelines / eland.py
index 87c6fb7bc4e972988149de833bd7cba1ee6dfaab..a508a494c8c24900e8574c75f43c3616bf309814 100644 (file)
@@ -10,8 +10,9 @@ import stat
 import sys
 import types
 
-from htsworkflow.pipelines.runfolder import ElementTree, LANE_LIST
+from htsworkflow.pipelines import ElementTree, LANE_LIST
 from htsworkflow.pipelines.samplekey import SampleKey
+from htsworkflow.pipelines.genomemap import GenomeMap
 from htsworkflow.util.ethelp import indent, flatten
 from htsworkflow.util.opener import autoopen
 
@@ -97,9 +98,7 @@ class ElandLane(ResultLane):
         self._mapped_reads = None
         self._match_codes = None
         self._reads = None
-        if genome_map is None:
-            genome_map = {}
-        self.genome_map = genome_map
+        self.genome_map = GenomeMap(genome_map)
         self.eland_type = None
 
         if xml is not None:
@@ -639,7 +638,7 @@ class ELAND(collections.MutableMapping):
 
     def __init__(self, xml=None):
         # we need information from the gerald config.xml
-        self.results = collections.OrderedDict()
+        self.results = {}
 
         if xml is not None:
             self.set_elements(xml)
@@ -658,7 +657,9 @@ class ELAND(collections.MutableMapping):
         del self.result[key]
 
     def __iter__(self):
-        return self.results.iterkeys()
+        keys = self.results.iterkeys()
+        for k in sorted(keys):
+            yield k
 
     def __len__(self):
         return len(self.results)
@@ -709,14 +710,23 @@ class ELAND(collections.MutableMapping):
         # runfolder summary_report
         names = [ os.path.split(p)[1] for p in pathnames]
         LOGGER.info("Adding eland files %s" %(",".join(names),))
+        basedir = os.path.split(pathnames[0])[0]
+        gs_template = "{0}_*_L{1:03}_genomesize.xml"
+        genomesize = glob(
+            os.path.join(basedir,
+                         gs_template.format(key.sample, key.lane)))
 
-        genome_map = {}
+
+        genome_map = GenomeMap()
         if genome_maps is not None:
-            genome_map = genome_maps[key.lane]
+            genome_map = GenomeMap(genome_maps[key.lane])
+        elif len(genomesize) > 0:
+            LOGGER.info("Found {0}".format(genomesize))
+            genome_map.parse_genomesize(genomesize[0])
         elif gerald is not None:
-            genome_dir = gerald.lanes[key.lane].eland_genome
+            genome_dir = gerald.lanes[key].eland_genome
             if genome_dir is not None:
-                genome_map = build_genome_fasta_map(genome_dir)
+                genome_map.scan_genome_dir(genome_dir)
 
         lane = ElandLane(pathnames, key.sample, key.lane, key.read, genome_map)
 
@@ -861,24 +871,6 @@ class ElandMatch(object):
         if self._part is not None: name.append('P%s' % (self.part,))
         return '<ElandMatch(' + "_".join(name) + ')>'
 
-def build_genome_fasta_map(genome_dir):
-    # build fasta to fasta file map
-    LOGGER.info("Building genome map")
-    genome = genome_dir.split(os.path.sep)[-1]
-    fasta_map = {}
-    for vld_file in glob(os.path.join(genome_dir, '*.vld')):
-        is_link = False
-        if os.path.islink(vld_file):
-            is_link = True
-        vld_file = os.path.realpath(vld_file)
-        path, vld_name = os.path.split(vld_file)
-        name, ext = os.path.splitext(vld_name)
-        if is_link:
-            fasta_map[name] = name
-        else:
-            fasta_map[name] = os.path.join(genome, name)
-    return fasta_map
-
 
 def extract_eland_sequence(instream, outstream, start, end):
     """