Use a logger initialized to the module name much more consistently.
[htsworkflow.git] / htsworkflow / pipelines / gerald.py
1 """
2 Provide access to information stored in the GERALD directory.
3 """
4 from datetime import datetime, date
5 import logging
6 import os
7 import time
8
9 from htsworkflow.pipelines.summary import Summary
10 from htsworkflow.pipelines.eland import eland, ELAND
11
12 from htsworkflow.pipelines.runfolder import \
13    ElementTree, \
14    EUROPEAN_STRPTIME, \
15    LANES_PER_FLOWCELL, \
16    VERSION_RE
17 from htsworkflow.util.ethelp import indent, flatten
18
19 LOGGER = logging.getLogger(__name__)
20
21 class Gerald(object):
22     """
23     Capture meaning out of the GERALD directory
24     """
25     XML_VERSION = 1
26     GERALD='Gerald'
27     RUN_PARAMETERS='RunParameters'
28     SUMMARY='Summary'
29
30     class LaneParameters(object):
31         """
32         Make it easy to access elements of LaneSpecificRunParameters from python
33         """
34         def __init__(self, gerald, lane_id):
35             self._gerald = gerald
36             self._lane_id = lane_id
37
38         def __get_attribute(self, xml_tag):
39             subtree = self._gerald.tree.find('LaneSpecificRunParameters')
40             container = subtree.find(xml_tag)
41             if container is None:
42                 return None
43             if len(container.getchildren()) > LANES_PER_FLOWCELL:
44                 raise RuntimeError('GERALD config.xml file changed')
45             lanes = [x.tag.split('_')[1] for x in container.getchildren()]
46             try:
47                 index = lanes.index(self._lane_id)
48             except ValueError, e:
49                 return None
50             element = container[index]
51             return element.text
52         def _get_analysis(self):
53             return self.__get_attribute('ANALYSIS')
54         analysis = property(_get_analysis)
55
56         def _get_eland_genome(self):
57             genome = self.__get_attribute('ELAND_GENOME')
58             # default to the chipwide parameters if there isn't an
59             # entry in the lane specific paramaters
60             if genome is None:
61                 genome = self._gerald._get_chip_attribute('ELAND_GENOME')
62             # ignore flag value
63             if genome == 'Need_to_specify_ELAND_genome_directory':
64                 genome = None
65             return genome
66         eland_genome = property(_get_eland_genome)
67
68         def _get_read_length(self):
69             read_length = self.__get_attribute('READ_LENGTH')
70             if read_length is None:
71                 read_length = self._gerald._get_chip_attribute('READ_LENGTH')
72             return read_length
73         read_length = property(_get_read_length)
74
75         def _get_use_bases(self):
76             return self.__get_attribute('USE_BASES')
77         use_bases = property(_get_use_bases)
78
79     class LaneSpecificRunParameters(object):
80         """
81         Provide access to LaneSpecificRunParameters
82         """
83         def __init__(self, gerald):
84             self._gerald = gerald
85             self._lane = None
86
87         def _initalize_lanes(self):
88             """
89             build dictionary of LaneParameters
90             """
91             self._lanes = {}
92             tree = self._gerald.tree
93             analysis = tree.find('LaneSpecificRunParameters/ANALYSIS')
94             # according to the pipeline specs I think their fields
95             # are sampleName_laneID, with sampleName defaulting to s
96             # since laneIDs are constant lets just try using
97             # those consistently.
98             for element in analysis:
99                 sample, lane_id = element.tag.split('_')
100                 self._lanes[int(lane_id)] = Gerald.LaneParameters(
101                                               self._gerald, lane_id)
102
103         def __getitem__(self, key):
104             if self._lane is None:
105                 self._initalize_lanes()
106             return self._lanes[key]
107         def keys(self):
108             if self._lane is None:
109                 self._initalize_lanes()
110             return self._lanes.keys()
111         def values(self):
112             if self._lane is None:
113                 self._initalize_lanes()
114             return self._lanes.values()
115         def items(self):
116             if self._lane is None:
117                 self._initalize_lanes()
118             return self._lanes.items()
119         def __len__(self):
120             if self._lane is None:
121                 self._initalize_lanes()
122             return len(self._lanes)
123
124     def __init__(self, xml=None):
125         self.pathname = None
126         self.tree = None
127
128         # parse lane parameters out of the config.xml file
129         self.lanes = Gerald.LaneSpecificRunParameters(self)
130
131         self.summary = None
132         self.eland_results = None
133
134         if xml is not None:
135             self.set_elements(xml)
136
137     def _get_date(self):
138         if self.tree is None:
139             return datetime.today()
140         timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP')
141         epochstamp = time.mktime(time.strptime(timestamp, '%c'))
142         return datetime.fromtimestamp(epochstamp)
143     date = property(_get_date)
144
145     def _get_time(self):
146         return time.mktime(self.date.timetuple())
147     time = property(_get_time, doc='return run time as seconds since epoch')
148
149     def _get_experiment_root(self):
150         if self.tree is None:
151             return None
152         return self.tree.findtext('ChipWideRunParameters/EXPT_DIR_ROOT')
153
154     def _get_runfolder_name(self):
155         if self.tree is None:
156             return None
157
158         root = self._get_experiment_root()
159         if root is None:
160             root = ''
161         else:
162             root = os.path.join(root,'')
163
164         experiment_dir = self.tree.findtext('ChipWideRunParameters/EXPT_DIR')
165         if experiment_dir is None:
166             return None
167         experiment_dir = experiment_dir.replace(root, '')
168         if len(experiment_dir) == 0:
169             return None
170
171         dirnames = experiment_dir.split(os.path.sep)
172         return dirnames[0]
173     runfolder_name = property(_get_runfolder_name)
174
175     def _get_version(self):
176         if self.tree is None:
177             return None
178         return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION')
179     version = property(_get_version)
180
181     def _get_chip_attribute(self, value):
182         return self.tree.findtext('ChipWideRunParameters/%s' % (value,))
183
184     def dump(self):
185         """
186         Debugging function, report current object
187         """
188         print 'Gerald version:', self.version
189         print 'Gerald run date:', self.date
190         print 'Gerald config.xml:', self.tree
191         self.summary.dump()
192
193     def get_elements(self):
194         if self.tree is None or self.summary is None:
195             return None
196
197         gerald = ElementTree.Element(Gerald.GERALD,
198                                      {'version': unicode(Gerald.XML_VERSION)})
199         gerald.append(self.tree)
200         gerald.append(self.summary.get_elements())
201         if self.eland_results:
202             gerald.append(self.eland_results.get_elements())
203         return gerald
204
205     def set_elements(self, tree):
206         if tree.tag !=  Gerald.GERALD:
207             raise ValueError('exptected GERALD')
208         xml_version = int(tree.attrib.get('version', 0))
209         if xml_version > Gerald.XML_VERSION:
210             LOGGER.warn('XML tree is a higher version than this class')
211         self.eland_results = ELAND()
212         for element in list(tree):
213             tag = element.tag.lower()
214             if tag == Gerald.RUN_PARAMETERS.lower():
215                 self.tree = element
216             elif tag == Gerald.SUMMARY.lower():
217                 self.summary = Summary(xml=element)
218             elif tag == ELAND.ELAND.lower():
219                 self.eland_results = ELAND(xml=element)
220             else:
221                 LOGGER.warn("Unrecognized tag %s" % (element.tag,))
222
223 def gerald(pathname):
224     g = Gerald()
225     g.pathname = os.path.expanduser(pathname)
226     path, name = os.path.split(g.pathname)
227     LOGGER.info("Parsing gerald config.xml")
228     config_pathname = os.path.join(g.pathname, 'config.xml')
229     g.tree = ElementTree.parse(config_pathname).getroot()
230
231     # parse Summary.htm file
232     summary_pathname = os.path.join(g.pathname, 'Summary.xml')
233     if os.path.exists(summary_pathname):
234         LOGGER.info("Parsing Summary.xml")
235     else:
236         summary_pathname = os.path.join(g.pathname, 'Summary.htm')
237         LOGGER.info("Parsing Summary.htm")
238     g.summary = Summary(summary_pathname)
239     # parse eland files
240     g.eland_results = eland(g.pathname, g)
241     return g
242
243 if __name__ == "__main__":
244   # quick test code
245   import sys
246   g = gerald(sys.argv[1])
247   #ElementTree.dump(g.get_elements())