Initial port to python3
[htsworkflow.git] / htsworkflow / frontend / reports / libinfopar.py
1 from django.conf import settings
2 from django.http import HttpResponse
3 from datetime import datetime
4 from string import *
5 import re
6 from xml.sax import make_parser
7 from xml.sax.handler import ContentHandler
8 import urllib.request, urllib.parse, urllib.error
9 import urllib.request, urllib.error, urllib.parse
10 import os
11
12 '''
13 Example library node from LibraryInfo.xml:
14 <Library Name="SL14">
15 <Track Flowcell="FC10135" Lane="4" Filename="071005_FC10135_s4_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2438679" Complexity="4.51989e-06"/>
16 <Track Flowcell="FC11977" Lane="6" Filename="070928_FC11977_s6_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2007880" Complexity="0"/>
17 <Track Flowcell="FC13593" Lane="5" Filename="071002_FC13593_s5_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2533720" Complexity="1.97771e-06"/>
18 </Library>
19 '''
20 class LibInfoHandler(ContentHandler):
21
22   def __init__ (self, searchTerm):
23     self.searchTerm= searchTerm
24     self.currlibid = ''
25     self.LanesCount, self.ReadsCount = 0, 0
26     self.Msg = 'OK'
27        
28   def startElement(self, name, attrs):
29     try:
30       if name == 'Library':     
31         self.currlibid = attrs.get('Name',"")      
32       elif name == 'Track' and self.searchTerm == self.currlibid:
33         self.LanesCount += len(attrs.get('Lane',""))
34         self.ReadsCount += int(attrs.get('Count',""))
35       #else:
36       #  self.Msg += ' | name = '+name+', currlibid = '+ self.currlibid
37     except: 
38       self.Msg = 'failed parsing xml file'
39     return
40
41   #def characters (self, ch):
42     # return ..
43
44   #def endElement(self, name):
45     # return ..
46
47
48 ## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server
49
50 def getLibReads(libid):
51   searchTerm= libid
52   parser = make_parser()   
53   curHandler = LibInfoHandler(searchTerm)
54   parser.setContentHandler(curHandler)
55   reports_dir = os.path.split(__file__)[0]
56   library_info = os.path.join(reports_dir, 'LibraryInfo.xml')
57   parser.parse(open(library_info))
58   arRes = []
59   arRes.append(curHandler.LanesCount) 
60   arRes.append(curHandler.ReadsCount)
61   arRes.append(curHandler.Msg)
62
63   return arRes
64
65 def getWebPage(url,params):
66   pdata = urllib.parse.urlencode(params)
67   req = urllib.request.Request(url,pdata)
68   wpage = urllib.request.urlopen(req)
69   restext = wpage.read()
70   wpage.close()
71   return restext
72
73 def refreshLibInfoFile(request): 
74  varStatus = 'getting conf file from exp trac server'
75  url = settings.TASKS_PROJS_SERVER+'/LibraryInfo.xml'
76  params = {}
77  readw = getWebPage(url,params)
78  # make sure file content starts as xml
79  match_str = re.compile('^<\?xml.+')
80  if match_str.search(readw): ##tempstr):
81    # Rename current file with timestamp
82    year = datetime.today().year.__str__()
83    year = replace(year,'20','')
84    month = datetime.today().month
85    if month < 10: month = "0"+month.__str__()
86    else: month = month.__str__()
87    day = datetime.today().day
88    if day < 10: day = "0"+day.__str__()
89    else: day = day.__str__()
90    mydate = year+month+day
91    folder_loc = '/htsworkflow/htswfrontend/htswfrontend'  # DEV                                                                                                                          
92    #folder_loc = '/Library/WebServer/gaworkflow/gaworkflow/frontend'  # PROD
93    folder = folder_loc+'/htsw_reports/LibInfo/'
94    os.rename(folder+'LibraryInfo.xml',folder+mydate+'_LibraryInfo.xml')
95    # create file in curret folder
96    file_path = os.path.join(folder,'LibraryInfo.xml')
97    f = open(file_path, 'w')
98    f.write(readw)
99    f.close()
100    varStatus = 'OK. LibraryInfo.xml refreshed at Web server.'
101  else:
102    varStatus = 'Failed reading valid LibraryInfo.xml server reply:\n'+readw
103  return HttpResponse(varStatus)