f24d13d5b8dc3e830b2b3c1548251886ec7f1299
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.views.decorators.csrf import csrf_exempt
13 from django.core.exceptions import ObjectDoesNotExist
14 from django.core.mail import send_mail, mail_admins
15 from django.http import HttpResponse, Http404
16 from django.conf import settings
17
18 from htsworkflow.frontend.auth import require_api_key
19 from htsworkflow.frontend.experiments.models import \
20     FlowCell, \
21     DataRun, \
22     Lane, \
23     LANE_STATUS_MAP
24 from htsworkflow.frontend.samples.models import Library, MultiplexIndex, HTSUser
25
26 def flowcell_information(flowcell_id):
27     """
28     Return a dictionary describing a flowcell
29     """
30     try:
31         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
32     except FlowCell.DoesNotExist, e:
33         return None
34
35     lane_set = {}
36     for lane in fc.lane_set.all():
37         lane_item = {
38             'cluster_estimate': lane.cluster_estimate,
39             'comment': lane.comment,
40             'experiment_type': lane.library.experiment_type.name,
41             'experiment_type_id': lane.library.experiment_type_id,
42             'flowcell': lane.flowcell.flowcell_id,
43             'lane_number': lane.lane_number,
44             'library_name': lane.library.library_name,
45             'library_id': lane.library.id,
46             'library_species': lane.library.library_species.scientific_name,
47             'pM': unicode(lane.pM),
48             'read_length': lane.flowcell.read_length,
49             'status_code': lane.status,
50             'status': LANE_STATUS_MAP[lane.status]
51         }
52         sequences = lane.library.index_sequences()
53         if sequences is not None:
54             lane_item['index_sequence'] = sequences
55
56         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
57
58     if fc.control_lane is None:
59         control_lane = None
60     else:
61         control_lane = int(fc.control_lane)
62
63     info = {
64         'advanced_run': fc.advanced_run,
65         'cluster_station_id': fc.cluster_station_id,
66         'cluster_station': fc.cluster_station.name,
67         'control_lane': control_lane,
68         # 'datarun_set': how should this be represented?,
69         'flowcell_id': fc.flowcell_id,
70         'id': fc.id,
71         'lane_set': lane_set,
72         'notes': fc.notes,
73         'paired_end': fc.paired_end,
74         'read_length': fc.read_length,
75         'run_date': fc.run_date.isoformat(),
76         'sequencer_id': fc.sequencer_id,
77         'sequencer': fc.sequencer.name,
78     }
79
80     return info
81
82 @csrf_exempt
83 def flowcell_json(request, fc_id):
84     """
85     Return a JSON blob containing enough information to generate a config file.
86     """
87     require_api_key(request)
88
89     fc_dict = flowcell_information(fc_id)
90
91     if fc_dict is None:
92         raise Http404
93
94     fc_json = json.dumps(fc_dict)
95     return HttpResponse(fc_json, mimetype = 'application/json')
96
97 def lanes_for(username=None):
98     """
99     Given a user id try to return recent lanes as a list of dictionaries
100     """
101     query = {}
102     if username is not None:
103         user = HTSUser.objects.get(username=username)
104         query.update({'library__affiliations__users__id': user.id})
105
106     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
107
108
109     result = []
110     for l in lanes:
111         affiliations = l.library.affiliations.all()
112         affiliations_list = [(a.id, a.name) for a in affiliations]
113         result.append({ 'flowcell': l.flowcell.flowcell_id,
114                         'run_date': l.flowcell.run_date.isoformat(),
115                         'lane_number': l.lane_number,
116                         'library': l.library.id,
117                         'library_name': l.library.library_name,
118                         'comment': l.comment,
119                         'affiliations': affiliations_list})
120     return result
121
122 @csrf_exempt
123 def lanes_for_json(request, username):
124     """
125     Format lanes for a user
126     """
127     require_api_key(request)
128
129     try:
130         result = lanes_for(username)
131     except ObjectDoesNotExist, e:
132         raise Http404
133
134     #convert query set to python structure
135
136     result_json = json.dumps(result)
137     return HttpResponse(result_json, mimetype='application/json')
138
139
140 def updStatus(request):
141     output=''
142     user = 'none'
143     pswd = ''
144     UpdatedStatus = 'unknown'
145     fcid = 'none'
146     runfolder = 'unknown'
147     ClIP = request.META['REMOTE_ADDR']
148
149     if hasattr(request, 'user'):
150       user = request.user
151
152     #Check access permission
153     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)):
154         return HttpResponse("%s access denied from %s." % (user, ClIP))
155
156     # ~~~~~~Parameters for the job ~~~~
157     if request.REQUEST.has_key('fcid'):
158       fcid = request.REQUEST['fcid']
159     else:
160       return HttpResponse('missing fcid')
161
162     if request.REQUEST.has_key('runf'):
163       runfolder = request.REQUEST['runf']
164     else:
165       return HttpResponse('missing runf')
166
167
168     if request.REQUEST.has_key('updst'):
169       UpdatedStatus = request.REQUEST['updst']
170     else:
171       return HttpResponse('missing status')
172
173     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174
175     # Update Data Run status in DB
176     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
177     try:
178       rec = DataRun.objects.get(run_folder=runfolder)
179       rec.run_status = UpdatedStatus
180
181       #if there's a message update that too
182       mytimestamp = datetime.now().__str__()
183       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
184       if request.REQUEST.has_key('msg'):
185         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
186       else :
187         if UpdatedStatus == '1':
188           rec.run_note = "Started ("+mytimestamp+")"
189
190       rec.save()
191       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
192     except ObjectDoesNotExist:
193       output = "entry not found: "+fcid+", "+runfolder
194
195
196     #Notify researcher by email
197     # Doesn't work
198     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
199     #mail_admins("test subject", "testing , testing", fail_silently=False)
200     # gives error: (49, "Can't assign requested address")
201     return HttpResponse(output)
202
203 def generateConfile(request,fcid):
204     #granted = False
205     #ClIP = request.META['REMOTE_ADDR']
206     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
207
208     #if not granted: return HttpResponse("access denied.")
209
210     config = ['READ_LENGTH 25']
211     config += ['ANALYSIS eland']
212     config += ['GENOME_FILE all_chr.fa']
213     config += ['ELAND_MULTIPLE_INSTANCES 8']
214     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
215     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
216
217     try:
218       fc = FlowCell.objects.get(flowcell_id=fcid)
219       for lane in fc.lane_set.all():
220           config += [ str(lane.lane_number) +":" + \
221                       genome_dir + lane.library.library_species.scientific_name ]
222           config += [ str(lane.lane_number) +":" + \
223                       eland_genome + lane.library.library_species.scientific_name ]
224
225     except ObjectDoesNotExist:
226       config = 'Entry not found for fcid  = '+fcid
227
228     return os.linesep.join(config)
229
230 def getConfile(req):
231     granted = False
232     ClIP = req.META['REMOTE_ADDR']
233     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
234
235     if not granted: return HttpResponse("access denied. IP: "+ClIP)
236
237     fcid = 'none'
238     cnfgfile = 'Nothing found'
239     runfolder = 'unknown'
240     request = req.REQUEST
241     if request.has_key('fcid'):
242       fcid = request['fcid']
243       if request.has_key('runf'):
244         runfolder = request['runf']
245         try:
246           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
247           cnfgfile = rec.config_params
248           #match_str = re.compile(r"READ_LENGTH.+$")
249           match_str = re.compile('^READ_LENGTH.+')
250           if not match_str.search(cnfgfile):
251             cnfgfile = generateConfile(request,fcid)
252             if match_str.search(cnfgfile):
253               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
254               rec.config_params = cnfgfile
255               rec.save()
256             else:
257               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
258
259         except ObjectDoesNotExist:
260           cnfgfile = 'Entry not found for RunFolder = '+runfolder
261
262     return HttpResponse(cnfgfile, mimetype='text/plain')
263
264 def getLaneLibs(req):
265     granted = False
266     ClIP = req.META['REMOTE_ADDR']
267     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
268
269     if not granted: return HttpResponse("access denied.")
270
271     request = req.REQUEST
272     fcid = 'none'
273     outputfile = ''
274     if request.has_key('fcid'):
275       fcid = request['fcid']
276       try:
277         rec = FlowCell.objects.get(flowcell_id=fcid)
278         #Ex: 071211
279         year = datetime.today().year.__str__()
280         year = replace(year,'20','')
281         month = datetime.today().month
282         if month < 10: month = "0"+month.__str__()
283         else: month = month.__str__()
284         day = datetime.today().day
285         if day < 10: day = "0"+day.__str__()
286         else: day = day.__str__()
287         mydate = year+month+day
288         outputfile = '<?xml version="1.0" ?>'
289         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
290         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
291         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
292         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
293         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
294         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
295         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
296         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
297         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
298         outputfile += '\n</SolexaResult>'
299       except ObjectDoesNotExist:
300         outputfile = 'Flowcell entry not found for: '+fcid
301     else: outputfile = 'Missing input: flowcell id'
302
303     return HttpResponse(outputfile, mimetype='text/plain')
304
305 def estimateFlowcellDuration(flowcell):
306     """
307     Attempt to estimate how long it will take to run a flowcell
308
309     """
310     # (3600 seconds * 1.5 hours per cycle )
311     sequencing_seconds_per_cycle= 3600 * 1.5
312     # 800 is a rough guess
313     pipeline_seconds_per_cycle = 800
314
315     cycles = flowcell.read_length
316     if flowcell.paired_end:
317         cycles *= 2
318     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
319     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
320     estimate_mid = sequencing_time + analysis_time
321
322     return estimate_mid
323
324 def estimateFlowcellTimeRemaining(flowcell):
325     estimate_mid = estimateFlowcellDuration(flowcell)
326
327     # offset for how long we've been running
328     running_time = datetime.now() - flowcell.run_date
329     estimate_mid -= running_time
330
331     return estimate_mid
332
333 def roundToDays(estimate):
334     """
335     Given a time estimate round up and down in days
336     """
337     # floor estimate_mid
338     estimate_low = timedelta(estimate.days, 0)
339     # floor estimate_mid and add a day
340     estimate_high = timedelta(estimate.days+1, 0)
341
342     return (estimate_low, estimate_high)
343
344
345 def makeUserLaneMap(flowcell):
346     """
347     Given a flowcell return a mapping of users interested in
348     the libraries on those lanes.
349     """
350     users = {}
351
352     for lane in flowcell.lane_set.all():
353         for affiliation in lane.library.affiliations.all():
354             for user in affiliation.users.all():
355                 users.setdefault(user,[]).append(lane)
356
357     return users
358
359 def getUsersForFlowcell(flowcell):
360     users = set()
361
362     for lane in flowcell.lane_set.all():
363         for affiliation in lane.library.affiliations.all():
364             for user in affiliation.users.all():
365                 users.add(user)
366
367     return users
368
369 def makeUserLibraryMap(libraries):
370     """
371     Given an interable set of libraries return a mapping or
372     users interested in those libraries.
373     """
374     users = {}
375
376     for library in libraries:
377         for affiliation in library.affiliations.all():
378             for user in affiliation.users.all():
379                 users.setdefault(user,[]).append(library)
380
381     return users
382
383 def makeAffiliationLaneMap(flowcell):
384     affs = {}
385
386     for lane in flowcell.lane_set.all():
387         for affiliation in lane.library.affiliations.all():
388             affs.setdefault(affiliation,[]).append(lane)
389
390     return affs
391
392 def makeEmailLaneMap(flowcell):
393     """
394     Create a list of email addresses and the lanes associated with those users.
395
396     The email addresses can come from both the "users" table and the "affiliations" table.
397     """
398     emails = {}
399     for lane in flowcell.lane_set.all():
400         for affiliation in lane.library.affiliations.all():
401             if affiliation.email is not None and len(affiliation.email) > 0:
402                 emails.setdefault(affiliation.email,set()).add(lane)
403             for user in affiliation.users.all():
404                 if user.email is not None and len(user.email) > 0:
405                     emails.setdefault(user.email,set()).add(lane)
406
407     return emails