convert to print_function
[htsworkflow.git] / htsworkflow / pipelines / firecrest.py
1 """
2 Extract information about the Firecrest run
3
4 Firecrest
5   class holding the properties we found
6 firecrest
7   Firecrest factory function initalized from a directory name
8 fromxml
9   Firecrest factory function initalized from an xml dump from
10   the Firecrest object.
11 """
12 from __future__ import print_function
13
14 from datetime import date
15 from glob import glob
16 import logging
17 import os
18 import re
19 import time
20
21 from htsworkflow.pipelines import \
22    ElementTree, \
23    VERSION_RE, \
24    EUROPEAN_STRPTIME
25
26 LOGGER = logging.getLogger(__name__)
27
28 class Firecrest(object):
29     """Gather information about older firecrest runs
30     """
31     XML_VERSION=1
32
33     # xml tag names
34     FIRECREST = 'Firecrest'
35     SOFTWARE_VERSION = 'version'
36     START = 'FirstCycle'
37     STOP = 'LastCycle'
38     DATE = 'run_time'
39     USER = 'user'
40     MATRIX = 'matrix'
41
42     def __init__(self, xml=None):
43         """Initialize a Firecrest object
44         
45         consider using factory :function:firecrest
46         
47         :param xml: xml serialzation element to initialze from [optional]
48         """
49         self.start = None
50         self.stop = None
51         self.version = None
52         self.date = date.today()
53         self.user = None
54         self.matrix = None
55
56         if xml is not None:
57             self.set_elements(xml)
58
59     def _get_software(self):
60         return "Firecrest"
61     software = property(_get_software)
62
63     def _get_time(self):
64         return time.mktime(self.date.timetuple())
65     time = property(_get_time, doc='return run time as seconds since epoch')
66
67     def dump(self):
68         """Report debugginf information
69         """
70         print("Starting cycle:", self.start)
71         print("Ending cycle:", self.stop)
72         print("Firecrest version:", self.version)
73         print("Run date:", self.date)
74         print("user:", self.user)
75
76     def get_elements(self):
77         """Return XML serialization structure.
78         """
79         attribs = {'version': str(Firecrest.XML_VERSION) }
80         root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
81         version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
82         version.text = self.version
83         start_cycle = ElementTree.SubElement(root, Firecrest.START)
84         start_cycle.text = str(self.start)
85         stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
86         stop_cycle.text = str(self.stop)
87         run_date = ElementTree.SubElement(root, Firecrest.DATE)
88         run_date.text = str(self.time)
89         user = ElementTree.SubElement(root, Firecrest.USER)
90         user.text = self.user
91         if self.matrix is not None:
92             matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
93             matrix.text = self.matrix
94         return root
95
96     def set_elements(self, tree):
97         if tree.tag != Firecrest.FIRECREST:
98             raise ValueError('Expected "Firecrest" SubElements')
99         xml_version = int(tree.attrib.get('version', 0))
100         if xml_version > Firecrest.XML_VERSION:
101             LOGGER.warn('Firecrest XML tree is a higher version than this class')
102         for element in list(tree):
103             if element.tag == Firecrest.SOFTWARE_VERSION:
104                 self.version = element.text
105             elif element.tag == Firecrest.START:
106                 self.start = int(element.text)
107             elif element.tag == Firecrest.STOP:
108                 self.stop = int(element.text)
109             elif element.tag == Firecrest.DATE:
110                 self.date = date.fromtimestamp(float(element.text))
111             elif element.tag == Firecrest.USER:
112                 self.user = element.text
113             elif element.tag == Firecrest.MATRIX:
114                 self.matrix = element.text
115             else:
116                 raise ValueError("Unrecognized tag: %s" % (element.tag,))
117
118 def firecrest(pathname):
119     """
120     Examine the directory at pathname and initalize a Firecrest object
121     """
122     f = Firecrest()
123     f.pathname = pathname
124
125     # parse firecrest directory name
126     path, name = os.path.split(pathname)
127     groups = name.split('_')
128     # grab the start/stop cycle information
129     cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
130     f.start = int(cycle.group(1))
131     f.stop = int(cycle.group(2))
132     # firecrest version
133     version = re.search(VERSION_RE, groups[1])
134     f.version = (version.group(1))
135     # datetime
136     t = time.strptime(groups[2], EUROPEAN_STRPTIME)
137     f.date = date(*t[0:3])
138     # username
139     f.user = groups[3]
140
141     bustard_pattern = os.path.join(pathname, 'Bustard*')
142     # should I parse this deeper than just stashing the
143     # contents of the matrix file?
144     matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
145     if os.path.exists(matrix_pathname):
146         # this is for firecrest < 1.3.2
147         f.matrix = open(matrix_pathname, 'r').read()
148     elif glob(bustard_pattern) > 0:
149         f.matrix = None
150         # there are runs here. Bustard should save the matrix.
151     else:
152         return None
153
154     return f
155
156 def fromxml(tree):
157     """
158     Initialize a Firecrest object from an element tree node
159     """
160     f = Firecrest()
161     f.set_elements(tree)
162     return f