add --clean option to runfolder
[htsworkflow.git] / htsworkflow / pipelines / firecrest.py
1 """
2 Extract information about the Firecrest run
3
4 Firecrest 
5   class holding the properties we found
6 firecrest 
7   Firecrest factory function initalized from a directory name
8 fromxml 
9   Firecrest factory function initalized from an xml dump from
10   the Firecrest object.
11 """
12
13 from datetime import date
14 from glob import glob
15 import os
16 import re
17 import time
18
19 from htsworkflow.pipelines.runfolder import \
20    ElementTree, \
21    VERSION_RE, \
22    EUROPEAN_STRPTIME
23
24 __docformat__ = "restructuredtext en"
25
26 class Firecrest(object):
27     XML_VERSION=1
28
29     # xml tag names
30     FIRECREST = 'Firecrest'
31     SOFTWARE_VERSION = 'version'
32     START = 'FirstCycle'
33     STOP = 'LastCycle'
34     DATE = 'run_time'
35     USER = 'user'
36     MATRIX = 'matrix'
37
38     def __init__(self, xml=None):
39         self.start = None
40         self.stop = None
41         self.version = None
42         self.date = date.today()
43         self.user = None
44         self.matrix = None
45
46         if xml is not None:
47             self.set_elements(xml)
48         
49     def _get_time(self):
50         return time.mktime(self.date.timetuple())
51     time = property(_get_time, doc='return run time as seconds since epoch')
52
53     def dump(self):
54         print "Starting cycle:", self.start
55         print "Ending cycle:", self.stop
56         print "Firecrest version:", self.version
57         print "Run date:", self.date
58         print "user:", self.user
59
60     def get_elements(self):
61         attribs = {'version': str(Firecrest.XML_VERSION) }
62         root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs)
63         version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION)
64         version.text = self.version
65         start_cycle = ElementTree.SubElement(root, Firecrest.START)
66         start_cycle.text = str(self.start)
67         stop_cycle = ElementTree.SubElement(root, Firecrest.STOP)
68         stop_cycle.text = str(self.stop)
69         run_date = ElementTree.SubElement(root, Firecrest.DATE)
70         run_date.text = str(self.time)
71         user = ElementTree.SubElement(root, Firecrest.USER)
72         user.text = self.user
73         if self.matrix is not None:
74             matrix = ElementTree.SubElement(root, Firecrest.MATRIX)
75             matrix.text = self.matrix
76         return root
77
78     def set_elements(self, tree):
79         if tree.tag != Firecrest.FIRECREST:
80             raise ValueError('Expected "Firecrest" SubElements')
81         xml_version = int(tree.attrib.get('version', 0))
82         if xml_version > Firecrest.XML_VERSION:
83             logging.warn('Firecrest XML tree is a higher version than this class')
84         for element in list(tree):
85             if element.tag == Firecrest.SOFTWARE_VERSION:
86                 self.version = element.text
87             elif element.tag == Firecrest.START:
88                 self.start = int(element.text)
89             elif element.tag == Firecrest.STOP:
90                 self.stop = int(element.text)
91             elif element.tag == Firecrest.DATE:
92                 self.date = date.fromtimestamp(float(element.text))
93             elif element.tag == Firecrest.USER:
94                 self.user = element.text
95             elif element.tag == Firecrest.MATRIX:
96                 self.matrix = element.text
97             else:
98                 raise ValueError("Unrecognized tag: %s" % (element.tag,))
99
100 def firecrest(pathname):
101     """
102     Examine the directory at pathname and initalize a Firecrest object
103     """
104     f = Firecrest()
105     f.pathname = pathname
106
107     # parse firecrest directory name
108     path, name = os.path.split(pathname)
109     groups = name.split('_')
110     # grab the start/stop cycle information
111     cycle = re.match("C([0-9]+)-([0-9]+)", groups[0])
112     f.start = int(cycle.group(1))
113     f.stop = int(cycle.group(2))
114     # firecrest version
115     version = re.search(VERSION_RE, groups[1])
116     f.version = (version.group(1))
117     # datetime
118     t = time.strptime(groups[2], EUROPEAN_STRPTIME)
119     f.date = date(*t[0:3])
120     # username
121     f.user = groups[3]
122
123     bustard_pattern = os.path.join(pathname, 'Bustard*')
124     # should I parse this deeper than just stashing the 
125     # contents of the matrix file?
126     matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt')
127     if os.path.exists(matrix_pathname):
128         # this is for firecrest < 1.3.2
129         f.matrix = open(matrix_pathname, 'r').read()
130     elif glob(bustard_pattern) > 0:
131         f.matrix = None
132         # there are runs here. Bustard should save the matrix.
133     else:
134         return None
135
136     return f
137
138 def fromxml(tree):
139     """
140     Initialize a Firecrest object from an element tree node
141     """
142     f = Firecrest()
143     f.set_elements(tree)
144     return f