django provides django.utils.timezone.now to return a timezone aware timestamp if...
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.views.decorators.csrf import csrf_exempt
13 from django.core.exceptions import ObjectDoesNotExist
14 from django.core.mail import send_mail, mail_admins
15 from django.http import HttpResponse, Http404
16 from django.conf import settings
17 from django.utils import timezone
18
19 from htsworkflow.frontend.auth import require_api_key
20 from htsworkflow.frontend.experiments.models import \
21     FlowCell, \
22     DataRun, \
23     Lane, \
24     LANE_STATUS_MAP
25 from htsworkflow.frontend.samples.models import Library, MultiplexIndex, HTSUser
26
27 def flowcell_information(flowcell_id):
28     """
29     Return a dictionary describing a flowcell
30     """
31     try:
32         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
33     except FlowCell.DoesNotExist, e:
34         return None
35
36     lane_set = {}
37     for lane in fc.lane_set.all():
38         lane_item = {
39             'cluster_estimate': lane.cluster_estimate,
40             'comment': lane.comment,
41             'experiment_type': lane.library.experiment_type.name,
42             'experiment_type_id': lane.library.experiment_type_id,
43             'flowcell': lane.flowcell.flowcell_id,
44             'lane_number': lane.lane_number,
45             'library_name': lane.library.library_name,
46             'library_id': lane.library.id,
47             'library_species': lane.library.library_species.scientific_name,
48             'pM': unicode(lane.pM),
49             'read_length': lane.flowcell.read_length,
50             'status_code': lane.status,
51             'status': LANE_STATUS_MAP[lane.status]
52         }
53         sequences = lane.library.index_sequences()
54         if sequences is not None:
55             lane_item['index_sequence'] = sequences
56
57         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
58
59     if fc.control_lane is None:
60         control_lane = None
61     else:
62         control_lane = int(fc.control_lane)
63
64     info = {
65         'advanced_run': fc.advanced_run,
66         'cluster_station_id': fc.cluster_station_id,
67         'cluster_station': fc.cluster_station.name,
68         'control_lane': control_lane,
69         # 'datarun_set': how should this be represented?,
70         'flowcell_id': fc.flowcell_id,
71         'id': fc.id,
72         'lane_set': lane_set,
73         'notes': fc.notes,
74         'paired_end': fc.paired_end,
75         'read_length': fc.read_length,
76         'run_date': fc.run_date.isoformat(),
77         'sequencer_id': fc.sequencer_id,
78         'sequencer': fc.sequencer.name,
79     }
80
81     return info
82
83 @csrf_exempt
84 def flowcell_json(request, fc_id):
85     """
86     Return a JSON blob containing enough information to generate a config file.
87     """
88     require_api_key(request)
89
90     fc_dict = flowcell_information(fc_id)
91
92     if fc_dict is None:
93         raise Http404
94
95     fc_json = json.dumps(fc_dict)
96     return HttpResponse(fc_json, mimetype = 'application/json')
97
98 def lanes_for(username=None):
99     """
100     Given a user id try to return recent lanes as a list of dictionaries
101     """
102     query = {}
103     if username is not None:
104         user = HTSUser.objects.get(username=username)
105         query.update({'library__affiliations__users__id': user.id})
106
107     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
108
109
110     result = []
111     for l in lanes:
112         affiliations = l.library.affiliations.all()
113         affiliations_list = [(a.id, a.name) for a in affiliations]
114         result.append({ 'flowcell': l.flowcell.flowcell_id,
115                         'run_date': l.flowcell.run_date.isoformat(),
116                         'lane_number': l.lane_number,
117                         'library': l.library.id,
118                         'library_name': l.library.library_name,
119                         'comment': l.comment,
120                         'affiliations': affiliations_list})
121     return result
122
123 @csrf_exempt
124 def lanes_for_json(request, username):
125     """
126     Format lanes for a user
127     """
128     require_api_key(request)
129
130     try:
131         result = lanes_for(username)
132     except ObjectDoesNotExist, e:
133         raise Http404
134
135     #convert query set to python structure
136
137     result_json = json.dumps(result)
138     return HttpResponse(result_json, mimetype='application/json')
139
140
141 def updStatus(request):
142     output=''
143     user = 'none'
144     pswd = ''
145     UpdatedStatus = 'unknown'
146     fcid = 'none'
147     runfolder = 'unknown'
148     ClIP = request.META['REMOTE_ADDR']
149
150     if hasattr(request, 'user'):
151       user = request.user
152
153     #Check access permission
154     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)):
155         return HttpResponse("%s access denied from %s." % (user, ClIP))
156
157     # ~~~~~~Parameters for the job ~~~~
158     if request.REQUEST.has_key('fcid'):
159       fcid = request.REQUEST['fcid']
160     else:
161       return HttpResponse('missing fcid')
162
163     if request.REQUEST.has_key('runf'):
164       runfolder = request.REQUEST['runf']
165     else:
166       return HttpResponse('missing runf')
167
168
169     if request.REQUEST.has_key('updst'):
170       UpdatedStatus = request.REQUEST['updst']
171     else:
172       return HttpResponse('missing status')
173
174     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175
176     # Update Data Run status in DB
177     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
178     try:
179       rec = DataRun.objects.get(run_folder=runfolder)
180       rec.run_status = UpdatedStatus
181
182       #if there's a message update that too
183       mytimestamp = timezone.now().__str__()
184       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
185       if request.REQUEST.has_key('msg'):
186         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
187       else :
188         if UpdatedStatus == '1':
189           rec.run_note = "Started ("+mytimestamp+")"
190
191       rec.save()
192       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
193     except ObjectDoesNotExist:
194       output = "entry not found: "+fcid+", "+runfolder
195
196
197     #Notify researcher by email
198     # Doesn't work
199     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
200     #mail_admins("test subject", "testing , testing", fail_silently=False)
201     # gives error: (49, "Can't assign requested address")
202     return HttpResponse(output)
203
204 def generateConfile(request,fcid):
205     #granted = False
206     #ClIP = request.META['REMOTE_ADDR']
207     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
208
209     #if not granted: return HttpResponse("access denied.")
210
211     config = ['READ_LENGTH 25']
212     config += ['ANALYSIS eland']
213     config += ['GENOME_FILE all_chr.fa']
214     config += ['ELAND_MULTIPLE_INSTANCES 8']
215     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
216     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
217
218     try:
219       fc = FlowCell.objects.get(flowcell_id=fcid)
220       for lane in fc.lane_set.all():
221           config += [ str(lane.lane_number) +":" + \
222                       genome_dir + lane.library.library_species.scientific_name ]
223           config += [ str(lane.lane_number) +":" + \
224                       eland_genome + lane.library.library_species.scientific_name ]
225
226     except ObjectDoesNotExist:
227       config = 'Entry not found for fcid  = '+fcid
228
229     return os.linesep.join(config)
230
231 def getConfile(req):
232     granted = False
233     ClIP = req.META['REMOTE_ADDR']
234     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
235
236     if not granted: return HttpResponse("access denied. IP: "+ClIP)
237
238     fcid = 'none'
239     cnfgfile = 'Nothing found'
240     runfolder = 'unknown'
241     request = req.REQUEST
242     if request.has_key('fcid'):
243       fcid = request['fcid']
244       if request.has_key('runf'):
245         runfolder = request['runf']
246         try:
247           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
248           cnfgfile = rec.config_params
249           #match_str = re.compile(r"READ_LENGTH.+$")
250           match_str = re.compile('^READ_LENGTH.+')
251           if not match_str.search(cnfgfile):
252             cnfgfile = generateConfile(request,fcid)
253             if match_str.search(cnfgfile):
254               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
255               rec.config_params = cnfgfile
256               rec.save()
257             else:
258               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
259
260         except ObjectDoesNotExist:
261           cnfgfile = 'Entry not found for RunFolder = '+runfolder
262
263     return HttpResponse(cnfgfile, mimetype='text/plain')
264
265 def getLaneLibs(req):
266     granted = False
267     ClIP = req.META['REMOTE_ADDR']
268     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
269
270     if not granted: return HttpResponse("access denied.")
271
272     request = req.REQUEST
273     fcid = 'none'
274     outputfile = ''
275     if request.has_key('fcid'):
276       fcid = request['fcid']
277       try:
278         rec = FlowCell.objects.get(flowcell_id=fcid)
279         #Ex: 071211
280         year = datetime.today().year.__str__()
281         year = replace(year,'20','')
282         month = datetime.today().month
283         if month < 10: month = "0"+month.__str__()
284         else: month = month.__str__()
285         day = datetime.today().day
286         if day < 10: day = "0"+day.__str__()
287         else: day = day.__str__()
288         mydate = year+month+day
289         outputfile = '<?xml version="1.0" ?>'
290         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
291         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
292         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
293         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
294         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
295         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
296         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
297         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
298         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
299         outputfile += '\n</SolexaResult>'
300       except ObjectDoesNotExist:
301         outputfile = 'Flowcell entry not found for: '+fcid
302     else: outputfile = 'Missing input: flowcell id'
303
304     return HttpResponse(outputfile, mimetype='text/plain')
305
306 def estimateFlowcellDuration(flowcell):
307     """
308     Attempt to estimate how long it will take to run a flowcell
309
310     """
311     # (3600 seconds * 1.5 hours per cycle )
312     sequencing_seconds_per_cycle= 3600 * 1.5
313     # 800 is a rough guess
314     pipeline_seconds_per_cycle = 800
315
316     cycles = flowcell.read_length
317     if flowcell.paired_end:
318         cycles *= 2
319     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
320     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
321     estimate_mid = sequencing_time + analysis_time
322
323     return estimate_mid
324
325 def estimateFlowcellTimeRemaining(flowcell):
326     estimate_mid = estimateFlowcellDuration(flowcell)
327
328     # offset for how long we've been running
329     running_time = timezone.now() - flowcell.run_date
330     estimate_mid -= running_time
331
332     return estimate_mid
333
334 def roundToDays(estimate):
335     """
336     Given a time estimate round up and down in days
337     """
338     # floor estimate_mid
339     estimate_low = timedelta(estimate.days, 0)
340     # floor estimate_mid and add a day
341     estimate_high = timedelta(estimate.days+1, 0)
342
343     return (estimate_low, estimate_high)
344
345
346 def makeUserLaneMap(flowcell):
347     """
348     Given a flowcell return a mapping of users interested in
349     the libraries on those lanes.
350     """
351     users = {}
352
353     for lane in flowcell.lane_set.all():
354         for affiliation in lane.library.affiliations.all():
355             for user in affiliation.users.all():
356                 users.setdefault(user,[]).append(lane)
357
358     return users
359
360 def getUsersForFlowcell(flowcell):
361     users = set()
362
363     for lane in flowcell.lane_set.all():
364         for affiliation in lane.library.affiliations.all():
365             for user in affiliation.users.all():
366                 users.add(user)
367
368     return users
369
370 def makeUserLibraryMap(libraries):
371     """
372     Given an interable set of libraries return a mapping or
373     users interested in those libraries.
374     """
375     users = {}
376
377     for library in libraries:
378         for affiliation in library.affiliations.all():
379             for user in affiliation.users.all():
380                 users.setdefault(user,[]).append(library)
381
382     return users
383
384 def makeAffiliationLaneMap(flowcell):
385     affs = {}
386
387     for lane in flowcell.lane_set.all():
388         for affiliation in lane.library.affiliations.all():
389             affs.setdefault(affiliation,[]).append(lane)
390
391     return affs
392
393 def makeEmailLaneMap(flowcell):
394     """
395     Create a list of email addresses and the lanes associated with those users.
396
397     The email addresses can come from both the "users" table and the "affiliations" table.
398     """
399     emails = {}
400     for lane in flowcell.lane_set.all():
401         for affiliation in lane.library.affiliations.all():
402             if affiliation.email is not None and len(affiliation.email) > 0:
403                 emails.setdefault(affiliation.email,set()).add(lane)
404             for user in affiliation.users.all():
405                 if user.email is not None and len(user.email) > 0:
406                     emails.setdefault(user.email,set()).add(lane)
407
408     return emails