882ea89c9961db701c3168f19efeb6910856a94c
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend.auth import require_api_key
17 from htsworkflow.frontend import settings
18 from htsworkflow.frontend.experiments.models import \
19     FlowCell, \
20     DataRun, \
21     Lane, \
22     LANE_STATUS_MAP
23 from htsworkflow.frontend.samples.models import Library, HTSUser
24
25 def flowcell_information(flowcell_id):
26     """
27     Return a dictionary describing a flowcell
28     """
29     try:
30         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
31     except FlowCell.DoesNotExist, e:
32         return None
33
34     lane_set = {}
35     for lane in fc.lane_set.all():
36         lane_set[lane.lane_number] = {
37             'cluster_estimate': lane.cluster_estimate,
38             'comment': lane.comment,
39             'experiment_type': lane.library.experiment_type.name,
40             'experiment_type_id': lane.library.experiment_type_id,
41             'flowcell': lane.flowcell.flowcell_id,
42             'lane_number': int(lane.lane_number),
43             'library_name': lane.library.library_name,
44             'library_id': lane.library.id,
45             'library_species': lane.library.library_species.scientific_name,
46             'pM': unicode(lane.pM),
47             'read_length': lane.flowcell.read_length,
48             'status_code': lane.status,
49             'status': LANE_STATUS_MAP[lane.status]
50         }
51
52     if fc.control_lane is None:
53         control_lane = None
54     else:
55         control_lane = int(fc.control_lane)
56         
57     info = {
58         'advanced_run': fc.advanced_run,
59         'cluster_station_id': fc.cluster_station_id,
60         'cluster_station': fc.cluster_station.name,
61         'control_lane': control_lane,
62         # 'datarun_set': how should this be represented?,
63         'flowcell_id': fc.flowcell_id,
64         'id': fc.id,
65         'lane_set': lane_set,
66         'notes': fc.notes,
67         'paired_end': fc.paired_end,
68         'read_length': fc.read_length,
69         'run_date': fc.run_date.isoformat(),
70         'sequencer_id': fc.sequencer_id,
71         'sequencer': fc.sequencer.name,
72     }
73     
74     return info
75
76 def flowcell_json(request, fc_id):
77     """
78     Return a JSON blob containing enough information to generate a config file.
79     """
80     require_api_key(request)
81     
82     fc_dict = flowcell_information(fc_id)
83
84     if fc_dict is None:
85         raise Http404
86     
87     fc_json = json.dumps(fc_dict)
88     return HttpResponse(fc_json, mimetype = 'application/json')
89
90 def lanes_for(username=None):
91     """
92     Given a user id try to return recent lanes as a list of dictionaries
93     """
94     query = {}
95     if username is not None:
96         user = HTSUser.objects.get(username=username)        
97         query.update({'library__affiliations__users__id': user.id})
98         
99     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
100
101     
102     result = []
103     for l in lanes:
104         affiliations = l.library.affiliations.all()
105         affiliations_list = [(a.id, a.name) for a in affiliations]
106         result.append({ 'flowcell': l.flowcell.flowcell_id,
107                         'run_date': l.flowcell.run_date.isoformat(),
108                         'lane_number': l.lane_number,
109                         'library': l.library.id,
110                         'library_name': l.library.library_name,
111                         'comment': l.comment,
112                         'affiliations': affiliations_list})
113     return result
114
115 def lanes_for_json(request, username):
116     """
117     Format lanes for a user
118     """
119     require_api_key(request)
120
121     try:
122         result = lanes_for(username)
123     except ObjectDoesNotExist, e:
124         raise Http404
125     
126     #convert query set to python structure
127     
128     result_json = json.dumps(result)
129     return HttpResponse(result_json, mimetype='application/json')
130                  
131 def updStatus(request):
132     output=''
133     user = 'none'
134     pswd = ''
135     UpdatedStatus = 'unknown'
136     fcid = 'none'
137     runfolder = 'unknown'
138     ClIP = request.META['REMOTE_ADDR']
139
140     if hasattr(request, 'user'):
141       user = request.user
142
143     #Check access permission
144     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
145         return HttpResponse("%s access denied from %s." % (user, ClIP))
146
147     # ~~~~~~Parameters for the job ~~~~
148     if request.REQUEST.has_key('fcid'):
149       fcid = request.REQUEST['fcid']
150     else:
151       return HttpResponse('missing fcid')
152     
153     if request.REQUEST.has_key('runf'):
154       runfolder = request.REQUEST['runf']
155     else:
156       return HttpResponse('missing runf')
157
158     
159     if request.REQUEST.has_key('updst'):
160       UpdatedStatus = request.REQUEST['updst']
161     else:
162       return HttpResponse('missing status')
163     
164     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
165
166     # Update Data Run status in DB
167     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
168     try:
169       rec = DataRun.objects.get(run_folder=runfolder)
170       rec.run_status = UpdatedStatus
171
172       #if there's a message update that too
173       mytimestamp = datetime.now().__str__()
174       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
175       if request.REQUEST.has_key('msg'):
176         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
177       else :
178         if UpdatedStatus == '1':
179           rec.run_note = "Started ("+mytimestamp+")"
180
181       rec.save()
182       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
183     except ObjectDoesNotExist:
184       output = "entry not found: "+fcid+", "+runfolder
185
186
187     #Notify researcher by email
188     # Doesn't work
189     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
190     #mail_admins("test subject", "testing , testing", fail_silently=False)
191     # gives error: (49, "Can't assign requested address")
192     return HttpResponse(output)
193
194 def generateConfile(request,fcid):
195     #granted = False
196     #ClIP = request.META['REMOTE_ADDR']
197     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
198
199     #if not granted: return HttpResponse("access denied.")
200
201     config = ['READ_LENGTH 25']
202     config += ['ANALYSIS eland']
203     config += ['GENOME_FILE all_chr.fa']
204     config += ['ELAND_MULTIPLE_INSTANCES 8']
205     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
206     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
207     
208     try:                                                                                                                                              
209       fc = FlowCell.objects.get(flowcell_id=fcid)
210       for lane in fc.lane_set.all():
211           config += [ str(lane.lane_number) +":" + \
212                       genome_dir + lane.library.library_species.scientific_name ]
213           config += [ str(lane.lane_number) +":" + \
214                       eland_genome + lane.library.library_species.scientific_name ]
215       
216     except ObjectDoesNotExist:
217       config = 'Entry not found for fcid  = '+fcid
218
219     return os.linesep.join(config)
220
221 def getConfile(req):
222     granted = False
223     ClIP = req.META['REMOTE_ADDR']
224     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
225
226     if not granted: return HttpResponse("access denied. IP: "+ClIP)
227
228     fcid = 'none'
229     cnfgfile = 'Nothing found'
230     runfolder = 'unknown'
231     request = req.REQUEST
232     if request.has_key('fcid'):
233       fcid = request['fcid']
234       if request.has_key('runf'):
235         runfolder = request['runf']
236         try:
237           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
238           cnfgfile = rec.config_params
239           #match_str = re.compile(r"READ_LENGTH.+$")
240           match_str = re.compile('^READ_LENGTH.+')
241           if not match_str.search(cnfgfile):
242             cnfgfile = generateConfile(request,fcid)
243             if match_str.search(cnfgfile):
244               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
245               rec.config_params = cnfgfile
246               rec.save()
247             else:
248               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
249             
250         except ObjectDoesNotExist:
251           cnfgfile = 'Entry not found for RunFolder = '+runfolder
252
253     return HttpResponse(cnfgfile, mimetype='text/plain')
254
255 def getLaneLibs(req):
256     granted = False
257     ClIP = req.META['REMOTE_ADDR']
258     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
259
260     if not granted: return HttpResponse("access denied.")
261
262     request = req.REQUEST
263     fcid = 'none'
264     outputfile = ''
265     if request.has_key('fcid'):
266       fcid = request['fcid']
267       try:                                
268         rec = FlowCell.objects.get(flowcell_id=fcid)
269         #Ex: 071211
270         year = datetime.today().year.__str__()
271         year = replace(year,'20','')
272         month = datetime.today().month
273         if month < 10: month = "0"+month.__str__()
274         else: month = month.__str__() 
275         day = datetime.today().day
276         if day < 10: day = "0"+day.__str__()
277         else: day = day.__str__()
278         mydate = year+month+day
279         outputfile = '<?xml version="1.0" ?>'
280         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
281         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
282         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
283         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
284         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
285         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
286         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
287         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
288         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
289         outputfile += '\n</SolexaResult>'
290       except ObjectDoesNotExist:
291         outputfile = 'Flowcell entry not found for: '+fcid
292     else: outputfile = 'Missing input: flowcell id'
293
294     return HttpResponse(outputfile, mimetype='text/plain')
295
296 def estimateFlowcellDuration(flowcell):
297     """
298     Attempt to estimate how long it will take to run a flowcell
299
300     """
301     # (3600 seconds * 1.5 hours per cycle )
302     sequencing_seconds_per_cycle= 3600 * 1.5
303     # 800 is a rough guess
304     pipeline_seconds_per_cycle = 800
305     
306     cycles = flowcell.read_length
307     if flowcell.paired_end:
308         cycles *= 2
309     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
310     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
311     estimate_mid = sequencing_time + analysis_time
312
313     return estimate_mid
314
315 def estimateFlowcellTimeRemaining(flowcell):
316     estimate_mid = estimateFlowcellDuration(flowcell)
317     
318     # offset for how long we've been running
319     running_time = datetime.now() - flowcell.run_date
320     estimate_mid -= running_time
321
322     return estimate_mid
323
324 def roundToDays(estimate):
325     """
326     Given a time estimate round up and down in days
327     """
328     # floor estimate_mid
329     estimate_low = timedelta(estimate.days, 0)
330     # floor estimate_mid and add a day
331     estimate_high = timedelta(estimate.days+1, 0)
332     
333     return (estimate_low, estimate_high)
334     
335
336 def makeUserLaneMap(flowcell):
337     """
338     Given a flowcell return a mapping of users interested in
339     the libraries on those lanes.
340     """
341     users = {}
342
343     for lane in flowcell.lane_set.all():
344         for affiliation in lane.library.affiliations.all():
345             for user in affiliation.users.all():
346                 users.setdefault(user,[]).append(lane)
347
348     return users
349
350 def getUsersForFlowcell(flowcell):
351     users = set()
352     
353     for lane in flowcell.lane_set.all():
354         for affiliation in lane.library.affiliations.all():
355             for user in affiliation.users.all():
356                 users.add(user)
357                 
358     return users
359     
360 def makeUserLibraryMap(libraries):
361     """
362     Given an interable set of libraries return a mapping or
363     users interested in those libraries.
364     """
365     users = {}
366     
367     for library in libraries:
368         for affiliation in library.affiliations.all():
369             for user in affiliation.users.all():
370                 users.setdefault(user,[]).append(library)
371                 
372     return users
373
374 def makeAffiliationLaneMap(flowcell):
375     affs = {}
376
377     for lane in flowcell.lane_set.all():
378         for affiliation in lane.library.affiliations.all():
379             affs.setdefault(affiliation,[]).append(lane)
380
381     return affs
382
383 def makeEmailLaneMap(flowcell):
384     """
385     Create a list of email addresses and the lanes associated with those users.
386
387     The email addresses can come from both the "users" table and the "affiliations" table.
388     """
389     emails = {}
390     for lane in flowcell.lane_set.all():
391         for affiliation in lane.library.affiliations.all():
392             if affiliation.email is not None and len(affiliation.email) > 0:
393                 emails.setdefault(affiliation.email,set()).add(lane)
394             for user in affiliation.users.all():
395                 if user.email is not None and len(user.email) > 0:
396                     emails.setdefault(user.email,set()).add(lane)
397
398     return emails