Adding the new Reports component.
[htsworkflow.git] / gaworkflow / frontend / htsw_reports / libinfopar.py
1 from xml.sax import make_parser
2 from xml.sax.handler import ContentHandler
3
4
5 '''
6 Example library node from LibraryInfo.xml:
7 <Library Name="SL14">
8 <Track Flowcell="FC10135" Lane="4" Filename="071005_FC10135_s4_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2438679" Complexity="4.51989e-06"/>
9 <Track Flowcell="FC11977" Lane="6" Filename="070928_FC11977_s6_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2007880" Complexity="0"/>
10 <Track Flowcell="FC13593" Lane="5" Filename="071002_FC13593_s5_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2533720" Complexity="1.97771e-06"/>
11 </Library>
12 '''
13 class LibInfoHandler(ContentHandler):
14
15   def __init__ (self, searchTerm):
16     self.searchTerm= searchTerm
17     self.currlibid = ''
18     self.LanesCount, self.ReadsCount = 0, 0
19     self.Msg = ''
20        
21   def startElement(self, name, attrs):
22     if name == 'Library':     
23       self.currlibid = attrs.get('Name',"")      
24     elif name == 'Track' and self.searchTerm == self.currlibid:
25       self.LanesCount += len(attrs.get('Lane',""))
26       self.ReadsCount += int(attrs.get('Count',""))
27     else:
28       self.Msg += ' | name = '+name+', currlibid = '+ self.currlibid
29     return
30
31   #def characters (self, ch):
32     # return ..
33
34   #def endElement(self, name):
35     # return ..
36
37
38 ## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server
39
40 def getLibReads(libid):
41   searchTerm= libid
42   parser = make_parser()   
43   curHandler = LibInfoHandler(searchTerm)
44   parser.setContentHandler(curHandler)
45   parser.parse(open('/gaworkflow/gaworkflow/frontend/htsw_reports/LibraryInfo.xml'))
46   arRes = []
47   arRes.append(curHandler.LanesCount) 
48   arRes.append(curHandler.ReadsCount)
49   return arRes
50