From: Rami Rauch Date: Thu, 14 Aug 2008 20:28:05 +0000 (+0000) Subject: Adding the new Reports component. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=refs%2Fheads%2Fstanford Adding the new Reports component. --- diff --git a/gaworkflow/frontend/analys_track/an_urls.py b/gaworkflow/frontend/analys_track/an_urls.py index 3079cbe..683ae78 100644 --- a/gaworkflow/frontend/analys_track/an_urls.py +++ b/gaworkflow/frontend/analys_track/an_urls.py @@ -2,7 +2,7 @@ from django.conf.urls.defaults import * urlpatterns = patterns('', (r'^$', 'gaworkflow.frontend.analys_track.views.index'), - (r'^(?P.+)/$', 'gaworkflow.frontend.analys_track.views.detail'), + #(r'^(?P.+)/$', 'gaworkflow.frontend.analys_track.views.detail'), (r'^updStatus$', 'gaworkflow.frontend.analys_track.main.updStatus'), (r'^getProjects$', 'gaworkflow.frontend.analys_track.main.getProjects'), ) diff --git a/gaworkflow/frontend/htsw_reports/LibraryInfo.xml b/gaworkflow/frontend/htsw_reports/LibraryInfo.xml new file mode 100644 index 0000000..2a573e3 --- /dev/null +++ b/gaworkflow/frontend/htsw_reports/LibraryInfo.xml @@ -0,0 +1,1342 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/gaworkflow/frontend/htsw_reports/__init__.py b/gaworkflow/frontend/htsw_reports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/gaworkflow/frontend/htsw_reports/libinfopar.py b/gaworkflow/frontend/htsw_reports/libinfopar.py new file mode 100644 index 0000000..b54d742 --- /dev/null +++ b/gaworkflow/frontend/htsw_reports/libinfopar.py @@ -0,0 +1,50 @@ +from xml.sax import make_parser +from xml.sax.handler import ContentHandler + + +''' +Example library node from LibraryInfo.xml: + + + + + +''' +class LibInfoHandler(ContentHandler): + + def __init__ (self, searchTerm): + self.searchTerm= searchTerm + self.currlibid = '' + self.LanesCount, self.ReadsCount = 0, 0 + self.Msg = '' + + def startElement(self, name, attrs): + if name == 'Library': + self.currlibid = attrs.get('Name',"") + elif name == 'Track' and self.searchTerm == self.currlibid: + self.LanesCount += len(attrs.get('Lane',"")) + self.ReadsCount += int(attrs.get('Count',"")) + else: + self.Msg += ' | name = '+name+', currlibid = '+ self.currlibid + return + + #def characters (self, ch): + # return .. + + #def endElement(self, name): + # return .. + + +## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server + +def getLibReads(libid): + searchTerm= libid + parser = make_parser() + curHandler = LibInfoHandler(searchTerm) + parser.setContentHandler(curHandler) + parser.parse(open('/gaworkflow/gaworkflow/frontend/htsw_reports/LibraryInfo.xml')) + arRes = [] + arRes.append(curHandler.LanesCount) + arRes.append(curHandler.ReadsCount) + return arRes + diff --git a/gaworkflow/frontend/htsw_reports/models.py b/gaworkflow/frontend/htsw_reports/models.py new file mode 100644 index 0000000..782ebed --- /dev/null +++ b/gaworkflow/frontend/htsw_reports/models.py @@ -0,0 +1,302 @@ +from django.db import models +from django.db.models import Q +from django.core.exceptions import ObjectDoesNotExist +from datetime import datetime +from gaworkflow.frontend.fctracker.models import * +from gaworkflow.frontend.analys_track.models import * +from gaworkflow.frontend.exp_track.models import * +from string import * +import re +##from p1 import LibInfo +from libinfopar import * + +## This is a table based REPORT generator. The goal is to display a Progress Report for all the ENCODE projects, based on Study Name (e.g. NRSF, FOXP2, Methy-Seq on .. etc). + +class ProgressReport(models.Model): + st_sbj = models.ForeignKey(Project,limit_choices_to = Q(project_name__startswith='ENCODE '),related_name='project',db_index=True,verbose_name="Studied Subject") + interactome_complete = models.BooleanField(default=False) + + def Study(self): + str = self.st_sbj.__str__() + str += '

' + str += 'Edit Project' + return str + Study.allow_tags = True + + def submit_to_DCC(self): + varText = '' + if self.note_about_DCC: + varText += '
Note:
'+self.note_about_DCC + return '%s
%s' % (self.submitted_to_DCC,varText) + submit_to_DCC.allow_tags = True + + def submit_to_NCBI(self): + varText = '' + if self.note_about_NCBI: + varText += '
Note:
'+self.note_about_NCBI + return '%s
%s' % (self.submitted_to_NCBI,varText) + submit_to_NCBI.allow_tags = True + + #REPS = ((1,1),(2,2),(3,3)) + #replicate = models.PositiveSmallIntegerField(choices=REPS,verbose_name='Replicate Number') + + ## -- Utility functions + def unique(self,s): + """Return a list of the elements in s, but without duplicates. + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u + + + ## --- LIBARAY PREPARATION SECTION + def getLibIds(self): + ptasks = self.st_sbj.tasks.distinct() + arLibs = [] + for t in ptasks: + if t.subject1 is not None: + arLibs.append(t.subject1.library_id) + if t.subject2 is not None: + arLibs.append(t.subject2.library_id) + arLibs = self.unique(arLibs) + return arLibs #.sort() + + def getFCInfo(self,libid): ## This is the haviest function + arFCLanes = [] + ##Test return arFCLanes + # can't get this to work: FC_L1 = FlowCell.objects.filter(lane_5_library__exact=libid) + allFCs = FlowCell.objects.all() + for f in allFCs: + entry = '' + lanes = [] + #found = False +# for i in range(1,9): +# if eval('f.lane_'+i.__str__()+'_library.library_id==libid'): +# lanes.append(i.__str__()) +# found = True + +# maybe a bit faster this way: + if f.lane_1_library.library_id==libid: + lanes.append('1') + #found = True + if f.lane_2_library.library_id==libid: + lanes.append('2') + #found = True + if f.lane_3_library.library_id==libid: + lanes.append('3') + #found = True + if f.lane_4_library.library_id==libid: + lanes.append('4') + #found = True + if f.lane_5_library.library_id==libid: + lanes.append('5') + #found = True + if f.lane_6_library.library_id==libid: + lanes.append('6') + #found = True + if f.lane_7_library.library_id==libid: + lanes.append('7') + #found = True + if f.lane_8_library.library_id==libid: + lanes.append('8') + #found = True + + + #if found: + if len(lanes)>0: + rundate = re.sub(pattern="\s.*$",repl="",string=f.run_date.__str__()) + entry = ''+f.flowcell_id + ' Lanes No.: '+','.join(lanes)+' ('+rundate+')' + arFCLanes.append(entry) + if len(arFCLanes)==0: + arFCLanes.append('Flowcell not found.') + return arFCLanes + + def ab_batch(self): + ## To have the Company's lot number, apearing on the (source) tube, we need to add new Field in Library. + arlibs = self.getLibIds() + tstr = '
    ' ##Ab from '+len(arlibs).__str__()+' libs: ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l,antibody__isnull=False) + arRows.append('
  • '+rec.antibody.antibodies+' for '+rec.antibody.antigene+' (src:'+rec.antibody.source+', cat:'+rec.antibody.catalog+')
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(self.unique(arRows))+'
' + return tstr + ab_batch.allow_tags = True + + def cell_line(self): + arlibs = self.getLibIds() + tstr = '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.cell_line.cellline_name+' ('+rec.condition.condition_name+')
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(self.unique(arRows))+'
' + return tstr + cell_line.allow_tags = True + + def cell_harvest_batch(self): # <- data now displayed in "cell_line" + ## name + date + arlibs = self.getLibIds() + tstr = '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.condition.condition_name+'
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(self.unique(arRows))+'
' + return tstr + cell_harvest_batch.allow_tags = True + + def ChIP_made(self): + ## person + date + return '...' + + def library(self): + ## Lib Id + Date + Person + tstr = '' + arlibs = self.getLibIds() ##.sort() + arlibs = arlibs + tstr +='view /hide' + tstr += '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.library_id+': '+rec.library_name+'.
    Made By: '+rec.made_by+', On: '+ rec.creation_date.__str__()+'
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(self.unique(arRows))+'
' + return tstr + library.allow_tags = True + + + ## -- SEQUENCING SECTION + def sequencing(self): + ## FCId + Lane + Date + arlibs = self.getLibIds() + tstr ='view /hide' + tstr += '
    ' + for l in arlibs: + tstr += '
  • '+l+':
    '+(' / '.join(self.getFCInfo(l)))+'
  • ' + tstr += '
' + return tstr + sequencing.allow_tags = True + + def aligned_reads(self): + ## Mega reads/lane + arlibs = self.getLibIds() + tstr = 'view /hide' + tstr += '
' + tstr += '' + LanesCnt, ReadsCnt = 0, 0 + for l in arlibs: + res = getLibReads(l) + LanesCnt += res[0] + ReadsCnt += res[1] + rc = "%1.2f" % (res[1]/1000000.0) + tstr += '' + tstr += '
Library IdTotal LanesM Reads
'+l+''+res[0].__str__()+''+rc+'
' + #tstr += 'Project results page' + tstr += '
' + myNum = (ReadsCnt/1000000.0) + myNum = "%1.2f" % (myNum) + tstr += '
Total: '+LanesCnt.__str__()+' lanes and '+myNum+' M Reads
' + tstr += 'Project results page' + return tstr + aligned_reads.allow_tags = True + + def peak_calling(self): + # date + what etc.. + return 'coming up ..' + + QPCR = models.CharField(max_length=500,blank=True,null=True) + submitted_to_DCC = models.DateTimeField(core=True,blank=True,null=True) + submitted_to_NCBI = models.DateTimeField(core=True,blank=True,null=True) + note_about_DCC = models.TextField(blank=True) + note_about_NCBI = models.TextField(blank=True) + + def __str__(self): + return '"%s" - %s' % (self.st_sbj,self.interactome_complete) + + class Meta: + #verbose_name_plural = "Reports" + ordering = ["id"] + + class Admin: + list_display = ('Study','ab_batch','cell_line','library','sequencing','aligned_reads','QPCR','submit_to_DCC','submit_to_NCBI','interactome_complete') + ## list_filter = ('interactome_complete') + diff --git a/gaworkflow/frontend/settings.py b/gaworkflow/frontend/settings.py index 938ed4e..79ca3d7 100644 --- a/gaworkflow/frontend/settings.py +++ b/gaworkflow/frontend/settings.py @@ -15,7 +15,7 @@ EMAIL_HOST = 'myerslab.stanford.edu' EMAIL_PORT = 25 DATABASE_ENGINE = 'sqlite3' # 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'. -DATABASE_NAME = os.path.abspath('/gaworkflow/dev_fctracker.db') # Or path to database file if using sqlite3. +DATABASE_NAME = os.path.abspath('/gaworkflow/dev_fctracker.db') # Or path to database file if using sqlite3. DATABASE_USER = '' # Not used with sqlite3. DATABASE_PASSWORD = '' # Not used with sqlite3. DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3.