fixed: read correctly the Lanes attribute in Track tag for multiple lanes
[htsworkflow.git] / htswfrontend / htswfrontend / htsw_reports / libinfopar.py
1 from htswfrontend import settings
2 from django.http import HttpResponse
3 from datetime import datetime
4 from string import *
5 import re
6 from xml.sax import make_parser
7 from xml.sax.handler import ContentHandler
8 import urllib
9 import urllib2
10 import os
11
12 '''
13 Example library node from LibraryInfo.xml:
14 <Library Name="SL14">
15 <Track Flowcell="FC10135" Lane="4" Filename="071005_FC10135_s4_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2438679" Complexity="4.51989e-06"/>
16 <Track Flowcell="FC11977" Lane="6" Filename="070928_FC11977_s6_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2007880" Complexity="0"/>
17 <Track Flowcell="FC13593" Lane="5" Filename="071002_FC13593_s5_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2533720" Complexity="1.97771e-06"/>
18 </Library>
19 '''
20 class LibInfoHandler(ContentHandler):
21   def __init__ (self, searchTerm):
22     self.searchTerm= searchTerm
23     self.currlibid = ''
24     self.LanesCount, self.ReadsCount = 0, 0
25     self.Msg = 'OK'
26        
27   def startElement(self, name, attrs):
28     try:  
29       if name == 'Library':     
30         self.currlibid = attrs.get('Name',"")      
31       elif name == 'Track' and self.searchTerm == self.currlibid:
32         self.LanesCount += len(attrs.get('Lane',""))
33         self.ReadsCount += int(attrs.get('Count',""))
34     except: 
35       self.Msg = 'failed parsing xml file'
36     return
37
38   #def characters (self, ch):
39     # return ..
40
41   #def endElement(self, name):
42     # return ..
43
44 class LibInfoHandlerByFlowCell(ContentHandler):
45   def __init__ (self, searchTerm):
46     self.searchTerm = searchTerm
47     self.LanesCount, self.ReadsCount, self.LaneReadsCount = 0, 0, [0,0,0,0,0,0,0,0]
48     self.Msg = 'OK'
49
50   def startElement(self, name, attrs):
51     try:
52       if name == 'Track' and attrs.get('Flowcell',"") == self.searchTerm:
53         self.LanesCount += len(attrs.get('Lane',""))
54         self.ReadsCount += int(attrs.get('Count',""))
55         # this loop is to separate between multiple lanes in one Lane attrbute of a Track tag
56         for l in attrs.get('Lane',""): self.LaneReadsCount[int(l)-1] += int(attrs.get('Count',""))
57     except:
58       self.Msg = 'failed parsing xml file'
59     return
60
61
62 ## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server
63
64 def getLibReads(search_term,search_by):
65   searchTerm= search_term
66   parser = make_parser() 
67
68   if search_by == 'ByLib':  
69     curHandler = LibInfoHandler(searchTerm)
70   elif search_by == 'ByFC':
71     curHandler = LibInfoHandlerByFlowCell(searchTerm)
72
73   parser.setContentHandler(curHandler)
74
75   folder_loc = '/htsworkflow/htswfrontend/htswfrontend'  # DEV
76   #folder_loc = '/Library/WebServer/gaworkflow/gaworkflow/frontend'  # PROD
77   parser.parse(open(folder_loc+'/htsw_reports/LibInfo/LibraryInfo.xml'))
78   arRes = []
79   arRes.append(curHandler.LanesCount) 
80   arRes.append(curHandler.ReadsCount)
81
82   if search_by == 'ByFC':  arRes.append(curHandler.LaneReadsCount)
83
84   arRes.append(curHandler.Msg)
85
86   return arRes
87
88 def getWebPage(url,params):
89   pdata = urllib.urlencode(params)
90   req = urllib2.Request(url,pdata)
91   wpage = urllib2.urlopen(req)
92   restext = wpage.read()
93   wpage.close()
94   return restext
95
96 def refreshLibInfoFile(request): 
97  varStatus = 'getting conf file from exp trac server'
98  url = settings.TASKS_PROJS_SERVER+'/LibraryInfo.xml'
99  params = {}
100  readw = getWebPage(url,params)
101  # make sure file content starts as xml
102  match_str = re.compile('^<\?xml.+')
103  if match_str.search(readw): ##tempstr):
104    # Rename current file with timestamp
105    year = datetime.today().year.__str__()
106    year = replace(year,'20','')
107    month = datetime.today().month
108    if month < 10: month = "0"+month.__str__()
109    else: month = month.__str__()
110    day = datetime.today().day
111    if day < 10: day = "0"+day.__str__()
112    else: day = day.__str__()
113    mydate = year+month+day
114    folder_loc = '/htsworkflow/htswfrontend/htswfrontend'  # DEV                                                                                                                          
115    #folder_loc = '/Library/WebServer/gaworkflow/gaworkflow/frontend'  # PROD
116    folder = folder_loc+'/htsw_reports/LibInfo/'
117    os.rename(folder+'LibraryInfo.xml',folder+mydate+'_LibraryInfo.xml')
118    # create file in curret folder
119    file_path = os.path.join(folder,'LibraryInfo.xml')
120    f = open(file_path, 'w')
121    f.write(readw)
122    f.close()
123    varStatus = 'OK. LibraryInfo.xml refreshed at Web server.'
124  else:
125    varStatus = 'Failed reading valid LibraryInfo.xml server reply:\n'+readw
126  return HttpResponse(varStatus)