e9d12dee03bfc00aff72566b8ba6794ca433f54d
[htsworkflow.git] / experiments / experiments.py
1 from __future__ import absolute_import, print_function, unicode_literals
2
3 # some core functions of the exp tracker module
4 from datetime import datetime, timedelta
5 try:
6     import json
7 except ImportError as e:
8     import simplejson as json
9
10 import os
11 import re
12
13 from django.contrib.auth.decorators import login_required
14 from django.views.decorators.csrf import csrf_exempt
15 from django.core.exceptions import ObjectDoesNotExist
16 from django.core.mail import send_mail, mail_admins
17 from django.http import HttpResponse, Http404
18 from django.conf import settings
19 from django.utils import timezone
20
21 from htsworkflow.auth import require_api_key
22 from .models import FlowCell, DataRun, Lane, LANE_STATUS_MAP
23 from samples.models import Library, MultiplexIndex, HTSUser
24
25 def flowcell_information(flowcell_id):
26     """
27     Return a dictionary describing a flowcell
28     """
29     try:
30         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
31     except FlowCell.DoesNotExist as e:
32         return None
33
34     lane_set = {}
35     for lane in fc.lane_set.all():
36         lane_item = {
37             'cluster_estimate': lane.cluster_estimate,
38             'comment': lane.comment,
39             'experiment_type': lane.library.experiment_type.name,
40             'experiment_type_id': lane.library.experiment_type_id,
41             'flowcell': lane.flowcell.flowcell_id,
42             'lane_number': lane.lane_number,
43             'library_name': lane.library.library_name,
44             'library_id': lane.library.id,
45             'library_species': lane.library.library_species.scientific_name,
46             'pM': str(lane.pM),
47             'read_length': lane.flowcell.read_length,
48             'status_code': lane.status,
49             'status': LANE_STATUS_MAP[lane.status]
50         }
51         sequences = lane.library.index_sequences()
52         if sequences is not None:
53             lane_item['index_sequence'] = sequences
54
55         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
56
57     if fc.control_lane is None:
58         control_lane = None
59     else:
60         control_lane = int(fc.control_lane)
61
62     info = {
63         'advanced_run': fc.advanced_run,
64         'cluster_station_id': fc.cluster_station_id,
65         'cluster_station': fc.cluster_station.name,
66         'control_lane': control_lane,
67         # 'datarun_set': how should this be represented?,
68         'flowcell_id': fc.flowcell_id,
69         'id': fc.id,
70         'lane_set': lane_set,
71         'notes': fc.notes,
72         'paired_end': fc.paired_end,
73         'read_length': fc.read_length,
74         'run_date': fc.run_date.isoformat(),
75         'sequencer_id': fc.sequencer_id,
76         'sequencer': fc.sequencer.name,
77     }
78
79     return info
80
81 @csrf_exempt
82 def flowcell_json(request, fc_id):
83     """
84     Return a JSON blob containing enough information to generate a config file.
85     """
86     require_api_key(request)
87
88     fc_dict = flowcell_information(fc_id)
89
90     if fc_dict is None:
91         raise Http404
92
93     fc_json = json.dumps({'result': fc_dict})
94     return HttpResponse(fc_json, content_type = 'application/json')
95
96 def lanes_for(username=None):
97     """
98     Given a user id try to return recent lanes as a list of dictionaries
99     """
100     query = {}
101     if username is not None:
102         user = HTSUser.objects.get(username=username)
103         query.update({'library__affiliations__users__id': user.id})
104
105     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
106
107     result = []
108     for l in lanes:
109         affiliations = l.library.affiliations.all()
110         affiliations_list = [(a.id, a.name) for a in affiliations]
111         result.append({ 'flowcell': l.flowcell.flowcell_id,
112                         'run_date': l.flowcell.run_date.isoformat(),
113                         'lane_number': l.lane_number,
114                         'library': l.library.id,
115                         'library_name': l.library.library_name,
116                         'comment': l.comment,
117                         'affiliations': affiliations_list})
118     return result
119
120 @csrf_exempt
121 def lanes_for_json(request, username):
122     """
123     Format lanes for a user
124     """
125     require_api_key(request)
126
127     try:
128         result = lanes_for(username)
129     except ObjectDoesNotExist as e:
130         raise Http404
131
132     #convert query set to python structure
133
134     result_json = json.dumps({'result': result})
135     return HttpResponse(result_json, content_type='application/json')
136
137
138 def updStatus(request):
139     output=''
140     user = 'none'
141     pswd = ''
142     UpdatedStatus = 'unknown'
143     fcid = 'none'
144     runfolder = 'unknown'
145     ClIP = request.META['REMOTE_ADDR']
146
147     if hasattr(request, 'user'):
148       user = request.user
149
150     #Check access permission
151     if not (user.is_superuser and ClIP in settings.ALLOWED_IPS):
152         return HttpResponse("%s access denied from %s." % (user, ClIP))
153
154     # ~~~~~~Parameters for the job ~~~~
155     if 'fcid' in request.REQUEST:
156       fcid = request.REQUEST['fcid']
157     else:
158       return HttpResponse('missing fcid')
159
160     if 'runf' in request.REQUEST:
161       runfolder = request.REQUEST['runf']
162     else:
163       return HttpResponse('missing runf')
164
165
166     if 'updst' in request.REQUEST:
167       UpdatedStatus = request.REQUEST['updst']
168     else:
169       return HttpResponse('missing status')
170
171     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
172
173     # Update Data Run status in DB
174     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
175     try:
176       rec = DataRun.objects.get(run_folder=runfolder)
177       rec.run_status = UpdatedStatus
178
179       #if there's a message update that too
180       mytimestamp = timezone.now().__str__()
181       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
182       if 'msg' in request.REQUEST:
183         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
184       else :
185         if UpdatedStatus == '1':
186           rec.run_note = "Started ("+mytimestamp+")"
187
188       rec.save()
189       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
190     except ObjectDoesNotExist:
191       output = "entry not found: "+fcid+", "+runfolder
192
193
194     #Notify researcher by email
195     # Doesn't work
196     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
197     #mail_admins("test subject", "testing , testing", fail_silently=False)
198     # gives error: (49, "Can't assign requested address")
199     return HttpResponse(output)
200
201 def generateConfile(request,fcid):
202     #granted = False
203     #ClIP = request.META['REMOTE_ADDR']
204     #if (ClIP in settings.ALLOWED_IPS):  granted = True
205
206     #if not granted: return HttpResponse("access denied.")
207
208     config = ['READ_LENGTH 25']
209     config += ['ANALYSIS eland']
210     config += ['GENOME_FILE all_chr.fa']
211     config += ['ELAND_MULTIPLE_INSTANCES 8']
212     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
213     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
214
215     try:
216       fc = FlowCell.objects.get(flowcell_id=fcid)
217       for lane in fc.lane_set.all():
218           config += [ str(lane.lane_number) +":" + \
219                       genome_dir + lane.library.library_species.scientific_name ]
220           config += [ str(lane.lane_number) +":" + \
221                       eland_genome + lane.library.library_species.scientific_name ]
222
223     except ObjectDoesNotExist:
224       config = 'Entry not found for fcid  = '+fcid
225
226     return os.linesep.join(config)
227
228 def getConfile(req):
229     granted = False
230     ClIP = req.META['REMOTE_ADDR']
231     if (ClIP in settings.ALLOWED_IPS):  granted = True
232
233     if not granted: return HttpResponse("access denied. IP: "+ClIP)
234
235     fcid = 'none'
236     cnfgfile = 'Nothing found'
237     runfolder = 'unknown'
238     request = req.REQUEST
239     if 'fcid' in request:
240       fcid = request['fcid']
241       if 'runf' in request:
242         runfolder = request['runf']
243         try:
244           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
245           cnfgfile = rec.config_params
246           #match_str = re.compile(r"READ_LENGTH.+$")
247           match_str = re.compile('^READ_LENGTH.+')
248           if not match_str.search(cnfgfile):
249             cnfgfile = generateConfile(request,fcid)
250             if match_str.search(cnfgfile):
251               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
252               rec.config_params = cnfgfile
253               rec.save()
254             else:
255               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
256
257         except ObjectDoesNotExist:
258           cnfgfile = 'Entry not found for RunFolder = '+runfolder
259
260     return HttpResponse(cnfgfile, content_type='text/plain')
261
262 def getLaneLibs(req):
263     granted = False
264     ClIP = req.META['REMOTE_ADDR']
265     if (ClIP in settings.ALLOWED_IPS):  granted = True
266
267     if not granted: return HttpResponse("access denied.")
268
269     request = req.REQUEST
270     fcid = 'none'
271     outputfile = ''
272     if 'fcid' in request:
273       fcid = request['fcid']
274       try:
275         rec = FlowCell.objects.get(flowcell_id=fcid)
276         #Ex: 071211
277         year = datetime.today().year.__str__()
278         year = replace(year,'20','')
279         month = datetime.today().month
280         if month < 10: month = "0"+month.__str__()
281         else: month = month.__str__()
282         day = datetime.today().day
283         if day < 10: day = "0"+day.__str__()
284         else: day = day.__str__()
285         mydate = year+month+day
286         outputfile = '<?xml version="1.0" ?>'
287         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
288         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
289         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
290         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
291         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
292         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
293         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
294         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
295         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
296         outputfile += '\n</SolexaResult>'
297       except ObjectDoesNotExist:
298         outputfile = 'Flowcell entry not found for: '+fcid
299     else: outputfile = 'Missing input: flowcell id'
300
301     return HttpResponse(outputfile, content_type='text/plain')
302
303 def estimateFlowcellDuration(flowcell):
304     """
305     Attempt to estimate how long it will take to run a flowcell
306
307     """
308     # (3600 seconds * 1.5 hours per cycle )
309     sequencing_seconds_per_cycle= 3600 * 1.5
310     # 800 is a rough guess
311     pipeline_seconds_per_cycle = 800
312
313     cycles = flowcell.read_length
314     if flowcell.paired_end:
315         cycles *= 2
316     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
317     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
318     estimate_mid = sequencing_time + analysis_time
319
320     return estimate_mid
321
322 def estimateFlowcellTimeRemaining(flowcell):
323     estimate_mid = estimateFlowcellDuration(flowcell)
324
325     # offset for how long we've been running
326     running_time = timezone.now() - flowcell.run_date
327     estimate_mid -= running_time
328
329     return estimate_mid
330
331 def roundToDays(estimate):
332     """
333     Given a time estimate round up and down in days
334     """
335     # floor estimate_mid
336     estimate_low = timedelta(estimate.days, 0)
337     # floor estimate_mid and add a day
338     estimate_high = timedelta(estimate.days+1, 0)
339
340     return (estimate_low, estimate_high)
341
342
343 def makeUserLaneMap(flowcell):
344     """
345     Given a flowcell return a mapping of users interested in
346     the libraries on those lanes.
347     """
348     users = {}
349
350     for lane in flowcell.lane_set.all():
351         for affiliation in lane.library.affiliations.all():
352             for user in affiliation.users.all():
353                 users.setdefault(user,[]).append(lane)
354
355     return users
356
357 def getUsersForFlowcell(flowcell):
358     users = set()
359
360     for lane in flowcell.lane_set.all():
361         for affiliation in lane.library.affiliations.all():
362             for user in affiliation.users.all():
363                 users.add(user)
364
365     return users
366
367 def makeUserLibraryMap(libraries):
368     """
369     Given an interable set of libraries return a mapping or
370     users interested in those libraries.
371     """
372     users = {}
373
374     for library in libraries:
375         for affiliation in library.affiliations.all():
376             for user in affiliation.users.all():
377                 users.setdefault(user,[]).append(library)
378
379     return users
380
381 def makeAffiliationLaneMap(flowcell):
382     affs = {}
383
384     for lane in flowcell.lane_set.all():
385         for affiliation in lane.library.affiliations.all():
386             affs.setdefault(affiliation,[]).append(lane)
387
388     return affs
389
390 def makeEmailLaneMap(flowcell):
391     """
392     Create a list of email addresses and the lanes associated with those users.
393
394     The email addresses can come from both the "users" table and the "affiliations" table.
395     """
396     emails = {}
397     for lane in flowcell.lane_set.all():
398         for affiliation in lane.library.affiliations.all():
399             if affiliation.email is not None and len(affiliation.email) > 0:
400                 emails.setdefault(affiliation.email,set()).add(lane)
401             for user in affiliation.users.all():
402                 if user.email is not None and len(user.email) > 0:
403                     emails.setdefault(user.email,set()).add(lane)
404
405     return emails