htsworkflow/frontend/reports/libinfopar.py

   1 from django.http import HttpResponse
   2 from datetime import datetime
   3 from string import *
   4 import re
   5 from xml.sax import make_parser
   6 from xml.sax.handler import ContentHandler
   7 import urllib
   8 import urllib2
   9 import os
  10
  11 '''
  12 Example library node from LibraryInfo.xml:
  13 <Library Name="SL14">
  14 <Track Flowcell="FC10135" Lane="4" Filename="071005_FC10135_s4_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2438679" Complexity="4.51989e-06"/>
  15 <Track Flowcell="FC11977" Lane="6" Filename="070928_FC11977_s6_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2007880" Complexity="0"/>
  16 <Track Flowcell="FC13593" Lane="5" Filename="071002_FC13593_s5_FoxP2_polyclonal_pfsk1_SL14.align_25.hg18.txt" Count=" 2533720" Complexity="1.97771e-06"/>
  17 </Library>
  18 '''
  19 class LibInfoHandler(ContentHandler):
  20
  21   def __init__ (self, searchTerm):
  22     self.searchTerm= searchTerm
  23     self.currlibid = ''
  24     self.LanesCount, self.ReadsCount = 0, 0
  25     self.Msg = ''
  26
  27   def startElement(self, name, attrs):
  28     if name == 'Library':
  29       self.currlibid = attrs.get('Name',"")
  30     elif name == 'Track' and self.searchTerm == self.currlibid:
  31       self.LanesCount += len(attrs.get('Lane',""))
  32       self.ReadsCount += int(attrs.get('Count',""))
  33     else:
  34       self.Msg += ' | name = '+name+', currlibid = '+ self.currlibid
  35     return
  36
  37   #def characters (self, ch):
  38     # return ..
  39
  40   #def endElement(self, name):
  41     # return ..
  42
  43
  44 ## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server
  45
  46 def getLibReads(libid):
  47   searchTerm= libid
  48   parser = make_parser()
  49   curHandler = LibInfoHandler(searchTerm)
  50   parser.setContentHandler(curHandler)
  51   parser.parse(open('/htsworkflow/htswfrontend/htswfrontend/htsw_reports/LibInfo/LibraryInfo.xml'))
  52   arRes = []
  53   arRes.append(curHandler.LanesCount)
  54   arRes.append(curHandler.ReadsCount)
  55   return arRes
  56
  57 def getWebPage(url,params):
  58   pdata = urllib.urlencode(params)
  59   req = urllib2.Request(url,pdata)
  60   wpage = urllib2.urlopen(req)
  61   restext = wpage.read()
  62   wpage.close()
  63   return restext
  64
  65 def refreshLibInfoFile(request):
  66  varStatus = 'getting conf file from exp trac server'
  67  url = 'http://m304-apple-server.stanford.edu/ENCODE/LibraryInfo.xml'
  68  params = {}
  69  readw = getWebPage(url,params)
  70  # make sure file content starts as xml
  71  match_str = re.compile('^<\?xml.+')
  72  if match_str.search(readw): ##tempstr):
  73    # Rename current file with timestamp
  74    year = datetime.today().year.__str__()
  75    year = replace(year,'20','')
  76    month = datetime.today().month
  77    if month < 10: month = "0"+month.__str__()
  78    else: month = month.__str__()
  79    day = datetime.today().day
  80    if day < 10: day = "0"+day.__str__()
  81    else: day = day.__str__()
  82    mydate = year+month+day
  83    folder = '/htsworkflow/htswfrontend/htswfrontend/htsw_reports/LibInfo/'
  84    os.rename(folder+'LibraryInfo.xml',folder+mydate+'_LibraryInfo.xml')
  85    # create file in curret folder
  86    file_path = os.path.join(folder,'LibraryInfo.xml')
  87    f = open(file_path, 'w')
  88    f.write(readw)
  89    f.close()
  90    varStatus = 'OK. LibraryInfo.xml refreshed at Web server.'
  91  else:
  92    varStatus = 'Failed reading valid LibraryInfo.xml server reply:\n'+readw
  93  return HttpResponse(varStatus)