4d9dac15715e078e3a131fa36260e080d32edd20
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend import settings
17 from htsworkflow.frontend.experiments.models import FlowCell, DataRun
18 from htsworkflow.frontend.samples.models import Library
19 from htsworkflow.frontend.auth import require_api_key
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'experiment_type': lane.library.experiment_type.name,
36             'experiment_type_id': lane.library.experiment_type_id,
37             'flowcell': lane.flowcell.flowcell_id,
38             'lane_number': int(lane.lane_number),
39             'library_name': lane.library.library_name,
40             'library_id': lane.library.id,
41             'library_species': lane.library.library_species.scientific_name,
42             'pM': float(lane.pM),
43             'read_length': fc.read_length
44         }
45     info = {
46         'advanced_run': fc.advanced_run,
47         'cluster_station_id': fc.cluster_station_id,
48         'cluster_station': fc.cluster_station.name,
49         'control_lane': int(fc.control_lane),
50         # 'datarun_set': how should this be represented?,
51         'flowcell_id': fc.flowcell_id,
52         'id': fc.id,
53         'lane_set': lane_set,
54         'notes': fc.notes,
55         'paired_end': fc.paired_end,
56         'read_length': fc.read_length,
57         'run_date': fc.run_date.isoformat(),
58         'sequencer_id': fc.sequencer_id,
59         'sequencer': fc.sequencer.name,
60     }
61     
62     return info
63
64 def flowcell_json(request, fc_id):
65     """
66     Return a JSON blob containing enough information to generate a config file.
67     """
68     require_api_key(request)
69     
70     fc_dict = flowcell_information(fc_id)
71
72     if fc_dict is None:
73         raise Http404
74     
75     fc_json = json.dumps(fc_dict)
76     return HttpResponse(fc_json, mimetype = 'application/json')
77     
78 def updStatus(request):
79     output=''
80     user = 'none'
81     pswd = ''
82     UpdatedStatus = 'unknown'
83     fcid = 'none'
84     runfolder = 'unknown'
85     ClIP = request.META['REMOTE_ADDR']
86
87     if hasattr(request, 'user'):
88       user = request.user
89
90     #Check access permission
91     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
92         return HttpResponse("%s access denied from %s." % (user, ClIP))
93
94     # ~~~~~~Parameters for the job ~~~~
95     if request.REQUEST.has_key('fcid'):
96       fcid = request.REQUEST['fcid']
97     else:
98       return HttpResponse('missing fcid')
99     
100     if request.REQUEST.has_key('runf'):
101       runfolder = request.REQUEST['runf']
102     else:
103       return HttpResponse('missing runf')
104
105     
106     if request.REQUEST.has_key('updst'):
107       UpdatedStatus = request.REQUEST['updst']
108     else:
109       return HttpResponse('missing status')
110     
111     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
112
113     # Update Data Run status in DB
114     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
115     try:
116       rec = DataRun.objects.get(run_folder=runfolder)
117       rec.run_status = UpdatedStatus
118
119       #if there's a message update that too
120       mytimestamp = datetime.now().__str__()
121       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
122       if request.REQUEST.has_key('msg'):
123         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
124       else :
125         if UpdatedStatus == '1':
126           rec.run_note = "Started ("+mytimestamp+")"
127
128       rec.save()
129       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
130     except ObjectDoesNotExist:
131       output = "entry not found: "+fcid+", "+runfolder
132
133
134     #Notify researcher by email
135     # Doesn't work
136     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
137     #mail_admins("test subject", "testing , testing", fail_silently=False)
138     # gives error: (49, "Can't assign requested address")
139     return HttpResponse(output)
140
141 def generateConfile(request,fcid):
142     #granted = False
143     #ClIP = request.META['REMOTE_ADDR']
144     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
145
146     #if not granted: return HttpResponse("access denied.")
147
148     config = ['READ_LENGTH 25']
149     config += ['ANALYSIS eland']
150     config += ['GENOME_FILE all_chr.fa']
151     config += ['ELAND_MULTIPLE_INSTANCES 8']
152     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
153     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
154     
155     try:                                                                                                                                              
156       fc = FlowCell.objects.get(flowcell_id=fcid)
157       for lane in fc.lane_set.all():
158           config += [ str(lane.lane_number) +":" + \
159                       genome_dir + lane.library.library_species.scientific_name ]
160           config += [ str(lane.lane_number) +":" + \
161                       eland_genome + lane.library.library_species.scientific_name ]
162       
163     except ObjectDoesNotExist:
164       config = 'Entry not found for fcid  = '+fcid
165
166     return os.linesep.join(config)
167
168 def getConfile(req):
169     granted = False
170     ClIP = req.META['REMOTE_ADDR']
171     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
172
173     if not granted: return HttpResponse("access denied. IP: "+ClIP)
174
175     fcid = 'none'
176     cnfgfile = 'Nothing found'
177     runfolder = 'unknown'
178     request = req.REQUEST
179     if request.has_key('fcid'):
180       fcid = request['fcid']
181       if request.has_key('runf'):
182         runfolder = request['runf']
183         try:
184           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
185           cnfgfile = rec.config_params
186           #match_str = re.compile(r"READ_LENGTH.+$")
187           match_str = re.compile('^READ_LENGTH.+')
188           if not match_str.search(cnfgfile):
189             cnfgfile = generateConfile(request,fcid)
190             if match_str.search(cnfgfile):
191               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
192               rec.config_params = cnfgfile
193               rec.save()
194             else:
195               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
196             
197         except ObjectDoesNotExist:
198           cnfgfile = 'Entry not found for RunFolder = '+runfolder
199
200     return HttpResponse(cnfgfile, mimetype='text/plain')
201
202 def getLaneLibs(req):
203     granted = False
204     ClIP = req.META['REMOTE_ADDR']
205     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
206
207     if not granted: return HttpResponse("access denied.")
208
209     request = req.REQUEST
210     fcid = 'none'
211     outputfile = ''
212     if request.has_key('fcid'):
213       fcid = request['fcid']
214       try:                                
215         rec = FlowCell.objects.get(flowcell_id=fcid)
216         #Ex: 071211
217         year = datetime.today().year.__str__()
218         year = replace(year,'20','')
219         month = datetime.today().month
220         if month < 10: month = "0"+month.__str__()
221         else: month = month.__str__() 
222         day = datetime.today().day
223         if day < 10: day = "0"+day.__str__()
224         else: day = day.__str__()
225         mydate = year+month+day
226         outputfile = '<?xml version="1.0" ?>'
227         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
228         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
229         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
230         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
231         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
232         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
233         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
234         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
235         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
236         outputfile += '\n</SolexaResult>'
237       except ObjectDoesNotExist:
238         outputfile = 'Flowcell entry not found for: '+fcid
239     else: outputfile = 'Missing input: flowcell id'
240
241     return HttpResponse(outputfile, mimetype='text/plain')
242
243 def estimateFlowcellDuration(flowcell):
244     """
245     Attempt to estimate how long it will take to run a flowcell
246
247     """
248     # (3600 seconds * 1.5 hours per cycle )
249     sequencing_seconds_per_cycle= 3600 * 1.5
250     # 800 is a rough guess
251     pipeline_seconds_per_cycle = 800
252     
253     cycles = flowcell.read_length
254     if flowcell.paired_end:
255         cycles *= 2
256     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
257     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
258     estimate_mid = sequencing_time + analysis_time
259
260     return estimate_mid
261
262 def estimateFlowcellTimeRemaining(flowcell):
263     estimate_mid = estimateFlowcellDuration(flowcell)
264     
265     # offset for how long we've been running
266     running_time = datetime.now() - flowcell.run_date
267     estimate_mid -= running_time
268
269     return estimate_mid
270
271 def roundToDays(estimate):
272     """
273     Given a time estimate round up and down in days
274     """
275     # floor estimate_mid
276     estimate_low = timedelta(estimate.days, 0)
277     # floor estimate_mid and add a day
278     estimate_high = timedelta(estimate.days+1, 0)
279     
280     return (estimate_low, estimate_high)
281     
282
283 def makeUserLaneMap(flowcell):
284     """
285     Given a flowcell return a mapping of users interested in
286     the libraries on those lanes.
287     """
288     users = {}
289
290     for lane in flowcell.lane_set.all():
291         for affiliation in lane.library.affiliations.all():
292             for user in affiliation.users.all():
293                 users.setdefault(user,[]).append(lane)
294
295     return users
296
297 def getUsersForFlowcell(flowcell):
298     users = set()
299     
300     for lane in flowcell.lane_set.all():
301         for affiliation in lane.library.affiliations.all():
302             for user in affiliation.users.all():
303                 users.add(user)
304                 
305     return users
306     
307 def makeUserLibraryMap(libraries):
308     """
309     Given an interable set of libraries return a mapping or
310     users interested in those libraries.
311     """
312     users = {}
313     
314     for library in libraries:
315         for affiliation in library.affiliations.all():
316             for user in affiliation.users.all():
317                 users.setdefault(user,[]).append(library)
318                 
319     return users
320
321 def makeAffiliationLaneMap(flowcell):
322     affs = {}
323
324     for lane in flowcell.lane_set.all():
325         for affiliation in lane.library.affiliations.all():
326             affs.setdefault(affiliation,[]).append(lane)
327
328     return affs
329
330 def makeEmailLaneMap(flowcell):
331     """
332     Create a list of email addresses and the lanes associated with those users.
333
334     The email addresses can come from both the "users" table and the "affiliations" table.
335     """
336     emails = {}
337     for lane in flowcell.lane_set.all():
338         for affiliation in lane.library.affiliations.all():
339             if affiliation.email is not None and len(affiliation.email) > 0:
340                 emails.setdefault(affiliation.email,set()).add(lane)
341             for user in affiliation.users.all():
342                 if user.email is not None and len(user.email) > 0:
343                     emails.setdefault(user.email,set()).add(lane)
344
345     return emails