Initial port to python3
[htsworkflow.git] / htsworkflow / pipelines / bustard.py
index acdc60ea7a511c3a30386beaf0c9b6d10e777f0c..b53813028c6e492265ca10cdf399919d04e2fec1 100644 (file)
@@ -13,7 +13,7 @@ import re
 import sys
 import time
 
-from htsworkflow.pipelines.runfolder import \
+from htsworkflow.pipelines import \
    ElementTree, \
    VERSION_RE, \
    EUROPEAN_STRPTIME
@@ -23,7 +23,7 @@ LOGGER = logging.getLogger(__name__)
 # make epydoc happy
 __docformat__ = "restructuredtext en"
 
-LANE_LIST = range(1,9)
+LANE_LIST = list(range(1,9))
 
 class Phasing(object):
     PHASING = 'Phasing'
@@ -111,7 +111,7 @@ class CrosstalkMatrix(object):
         for b in base_order:
             for value in self.base[b]:
                 crosstalk_value = ElementTree.SubElement(root, CrosstalkMatrix.ELEMENT)
-                crosstalk_value.text = unicode(value)
+                crosstalk_value.text = str(value)
                 crosstalk_value.tail = os.linesep
 
         return root
@@ -170,7 +170,8 @@ def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree):
         # we estimated the matrix from something in this run.
         # though we don't really care which lane it was
         if matrix_auto_lane == 0:
-            auto_lane_fragment = ""
+            # its defaulting to all of the lanes, so just pick one
+            auto_lane_fragment = "_1"
         else:
             auto_lane_fragment = "_%d" % ( matrix_auto_lane,)
 
@@ -208,7 +209,7 @@ class Bustard(object):
     BUSTARD_CONFIG = 'BaseCallAnalysis'
 
     def __init__(self, xml=None):
-        self.version = None
+        self._path_version = None # version number from directory name
         self.date = None
         self.user = None
         self.phasing = {}
@@ -233,11 +234,54 @@ class Bustard(object):
 
         groups = name.split("_")
         version = re.search(VERSION_RE, groups[0])
-        self.version = version.group(1)
+        self._path_version = version.group(1)
         t = time.strptime(groups[1], EUROPEAN_STRPTIME)
         self.date = date(*t[0:3])
         self.user = groups[2]
 
+    def _get_sequence_format(self):
+        """Guess sequence format"""
+        project_glob = os.path.join(self.pathname, 'Project_*')
+        LOGGER.debug("Scanning: %s" % (project_glob,))
+        projects = glob(project_glob)
+        if len(projects) > 0:
+            # Hey we look like a demultiplexed run
+            return 'fastq'
+        seqs = glob(os.path.join(self.pathname, '*_seq.txt'))
+        if len(seqs) > 0:
+            return 'srf'
+        return 'qseq'
+    sequence_format = property(_get_sequence_format)
+
+    def _get_software_version(self):
+        """return software name, version tuple"""
+        if self.bustard_config is None:
+            if self._path_version is not None:
+                return 'Bustard', self._path_version
+            else:
+                return None
+        software_nodes = self.bustard_config.xpath('Run/Software')
+        if len(software_nodes) == 0:
+            return None
+        elif len(software_nodes) > 1:
+            raise RuntimeError("Too many software XML elements for bustard.py")
+        else:
+            return (software_nodes[0].attrib['Name'],
+                    software_nodes[0].attrib['Version'])
+
+    def _get_software(self):
+        """Return software name"""
+        software_version = self._get_software_version()
+        return software_version[0] if software_version is not None else None
+    software = property(_get_software)
+
+    def _get_version(self):
+        """Return software name"""
+        software_version = self._get_software_version()
+        return software_version[1] if software_version is not None else None
+    version = property(_get_version)
+
+
     def _get_time(self):
         if self.date is None:
             return None
@@ -263,7 +307,7 @@ class Bustard(object):
 
         # add phasing parameters
         for lane in LANE_LIST:
-            if self.phasing.has_key(lane):
+            if lane in self.phasing:
                 params.append(self.phasing[lane].get_elements())
 
         # add crosstalk matrix if it exists
@@ -283,7 +327,7 @@ class Bustard(object):
             LOGGER.warn('Bustard XML tree is a higher version than this class')
         for element in list(tree):
             if element.tag == Bustard.SOFTWARE_VERSION:
-                self.version = element.text
+                self._path_version = element.text
             elif element.tag == Bustard.DATE:
                 self.date = date.fromtimestamp(float(element.text))
             elif element.tag == Bustard.USER:
@@ -321,6 +365,9 @@ def bustard(pathname):
     else:
         b = bustard_from_ga1(pathname)
 
+    if not b:
+        raise RuntimeError("Unable to parse base-call directory at %s" % (pathname,))
+
     return b
 
 def bustard_from_ga1(pathname):
@@ -332,14 +379,14 @@ def bustard_from_ga1(pathname):
     if len(groups) < 3:
         msg = "Not enough information to create attributes"\
               " from directory name: %s"
-        LOGGER.error(msg % (self.pathname,))
+        LOGGER.error(msg % (pathname,))
         return None
 
     b = Bustard()
     b.pathname = pathname
     b.update_attributes_from_pathname()
     version = re.search(VERSION_RE, groups[0])
-    b.version = version.group(1)
+    b._path_version = version.group(1)
     t = time.strptime(groups[1], EUROPEAN_STRPTIME)
     b.date = date(*t[0:3])
     b.user = groups[2]
@@ -371,8 +418,6 @@ def bustard_from_ga2(pathname, config_filename):
     b.bustard_config = bustard_config_root.getroot()
     b.crosstalk = crosstalk_matrix_from_bustard_config(b.pathname,
                                                        b.bustard_config)
-    software = bustard_config_root.find('*/Software')
-    b.version = software.attrib['Version']
     add_phasing(b)
 
     return b
@@ -382,8 +427,6 @@ def bustard_from_hiseq(pathname, config_filename):
     b.pathname = pathname
     bustard_config_root = ElementTree.parse(config_filename)
     b.bustard_config = bustard_config_root.getroot()
-    software = bustard_config_root.find('*/Software')
-    b.version = software.attrib['Version']
     add_phasing(b)
     return b
 
@@ -413,7 +456,7 @@ def main(cmdline):
     opts, args = parser.parse_args(cmdline)
 
     for bustard_dir in args:
-        print u'analyzing bustard directory: ' + unicode(bustard_dir)
+        print('analyzing bustard directory: ' + str(bustard_dir))
         bustard_object = bustard(bustard_dir)
         bustard_object.dump()
 
@@ -427,8 +470,8 @@ def main(cmdline):
         b2 = ElementTree.tostring(b2_tree).split(os.linesep)
         for line1, line2 in zip(b1, b2):
             if b1 != b2:
-                print "b1: ", b1
-                print "b2: ", b2
+                print("b1: ", b1)
+                print("b2: ", b2)
 
 if __name__ == "__main__":
     main(sys.argv[1:])