drop support for simplejson. were only caring about python 2.7+ and 3.3+
[htsworkflow.git] / experiments / experiments.py
1 from __future__ import absolute_import, print_function, unicode_literals
2
3 # some core functions of the exp tracker module
4 from datetime import datetime, timedelta
5 import os
6 import re
7
8 from django.contrib.auth.decorators import login_required
9 from django.views.decorators.csrf import csrf_exempt
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core.mail import send_mail, mail_admins
12 from django.http import HttpResponse, Http404
13 from django.conf import settings
14 from django.utils import timezone
15
16 from htsworkflow.auth import require_api_key
17 from .models import FlowCell, DataRun, Lane, LANE_STATUS_MAP
18 from samples.models import Library, MultiplexIndex, HTSUser
19
20 def flowcell_information(flowcell_id):
21     """
22     Return a dictionary describing a flowcell
23     """
24     try:
25         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
26     except FlowCell.DoesNotExist as e:
27         return None
28
29     lane_set = {}
30     for lane in fc.lane_set.all():
31         lane_item = {
32             'cluster_estimate': lane.cluster_estimate,
33             'comment': lane.comment,
34             'experiment_type': lane.library.experiment_type.name,
35             'experiment_type_id': lane.library.experiment_type_id,
36             'flowcell': lane.flowcell.flowcell_id,
37             'lane_number': lane.lane_number,
38             'library_name': lane.library.library_name,
39             'library_id': lane.library.id,
40             'library_species': lane.library.library_species.scientific_name,
41             'pM': str(lane.pM),
42             'read_length': lane.flowcell.read_length,
43             'status_code': lane.status,
44             'status': LANE_STATUS_MAP[lane.status]
45         }
46         sequences = lane.library.index_sequences()
47         if sequences is not None:
48             lane_item['index_sequence'] = sequences
49
50         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
51
52     if fc.control_lane is None:
53         control_lane = None
54     else:
55         control_lane = int(fc.control_lane)
56
57     info = {
58         'advanced_run': fc.advanced_run,
59         'cluster_station_id': fc.cluster_station_id,
60         'cluster_station': fc.cluster_station.name,
61         'control_lane': control_lane,
62         # 'datarun_set': how should this be represented?,
63         'flowcell_id': fc.flowcell_id,
64         'id': fc.id,
65         'lane_set': lane_set,
66         'notes': fc.notes,
67         'paired_end': fc.paired_end,
68         'read_length': fc.read_length,
69         'run_date': fc.run_date.isoformat(),
70         'sequencer_id': fc.sequencer_id,
71         'sequencer': fc.sequencer.name,
72     }
73
74     return info
75
76 @csrf_exempt
77 def flowcell_json(request, fc_id):
78     """
79     Return a JSON blob containing enough information to generate a config file.
80     """
81     require_api_key(request)
82
83     fc_dict = flowcell_information(fc_id)
84
85     if fc_dict is None:
86         raise Http404
87
88     fc_json = json.dumps({'result': fc_dict})
89     return HttpResponse(fc_json, content_type = 'application/json')
90
91 def lanes_for(username=None):
92     """
93     Given a user id try to return recent lanes as a list of dictionaries
94     """
95     query = {}
96     if username is not None:
97         user = HTSUser.objects.get(username=username)
98         query.update({'library__affiliations__users__id': user.id})
99
100     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
101
102     result = []
103     for l in lanes:
104         affiliations = l.library.affiliations.all()
105         affiliations_list = [(a.id, a.name) for a in affiliations]
106         result.append({ 'flowcell': l.flowcell.flowcell_id,
107                         'run_date': l.flowcell.run_date.isoformat(),
108                         'lane_number': l.lane_number,
109                         'library': l.library.id,
110                         'library_name': l.library.library_name,
111                         'comment': l.comment,
112                         'affiliations': affiliations_list})
113     return result
114
115 @csrf_exempt
116 def lanes_for_json(request, username):
117     """
118     Format lanes for a user
119     """
120     require_api_key(request)
121
122     try:
123         result = lanes_for(username)
124     except ObjectDoesNotExist as e:
125         raise Http404
126
127     #convert query set to python structure
128
129     result_json = json.dumps({'result': result})
130     return HttpResponse(result_json, content_type='application/json')
131
132
133 def updStatus(request):
134     output=''
135     user = 'none'
136     pswd = ''
137     UpdatedStatus = 'unknown'
138     fcid = 'none'
139     runfolder = 'unknown'
140     ClIP = request.META['REMOTE_ADDR']
141
142     if hasattr(request, 'user'):
143       user = request.user
144
145     #Check access permission
146     if not (user.is_superuser and ClIP in settings.ALLOWED_IPS):
147         return HttpResponse("%s access denied from %s." % (user, ClIP))
148
149     # ~~~~~~Parameters for the job ~~~~
150     if 'fcid' in request.REQUEST:
151       fcid = request.REQUEST['fcid']
152     else:
153       return HttpResponse('missing fcid')
154
155     if 'runf' in request.REQUEST:
156       runfolder = request.REQUEST['runf']
157     else:
158       return HttpResponse('missing runf')
159
160
161     if 'updst' in request.REQUEST:
162       UpdatedStatus = request.REQUEST['updst']
163     else:
164       return HttpResponse('missing status')
165
166     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
167
168     # Update Data Run status in DB
169     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
170     try:
171       rec = DataRun.objects.get(run_folder=runfolder)
172       rec.run_status = UpdatedStatus
173
174       #if there's a message update that too
175       mytimestamp = timezone.now().__str__()
176       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
177       if 'msg' in request.REQUEST:
178         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
179       else :
180         if UpdatedStatus == '1':
181           rec.run_note = "Started ("+mytimestamp+")"
182
183       rec.save()
184       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
185     except ObjectDoesNotExist:
186       output = "entry not found: "+fcid+", "+runfolder
187
188
189     #Notify researcher by email
190     # Doesn't work
191     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
192     #mail_admins("test subject", "testing , testing", fail_silently=False)
193     # gives error: (49, "Can't assign requested address")
194     return HttpResponse(output)
195
196 def generateConfile(request,fcid):
197     #granted = False
198     #ClIP = request.META['REMOTE_ADDR']
199     #if (ClIP in settings.ALLOWED_IPS):  granted = True
200
201     #if not granted: return HttpResponse("access denied.")
202
203     config = ['READ_LENGTH 25']
204     config += ['ANALYSIS eland']
205     config += ['GENOME_FILE all_chr.fa']
206     config += ['ELAND_MULTIPLE_INSTANCES 8']
207     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
208     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
209
210     try:
211       fc = FlowCell.objects.get(flowcell_id=fcid)
212       for lane in fc.lane_set.all():
213           config += [ str(lane.lane_number) +":" + \
214                       genome_dir + lane.library.library_species.scientific_name ]
215           config += [ str(lane.lane_number) +":" + \
216                       eland_genome + lane.library.library_species.scientific_name ]
217
218     except ObjectDoesNotExist:
219       config = 'Entry not found for fcid  = '+fcid
220
221     return os.linesep.join(config)
222
223 def getConfile(req):
224     granted = False
225     ClIP = req.META['REMOTE_ADDR']
226     if (ClIP in settings.ALLOWED_IPS):  granted = True
227
228     if not granted: return HttpResponse("access denied. IP: "+ClIP)
229
230     fcid = 'none'
231     cnfgfile = 'Nothing found'
232     runfolder = 'unknown'
233     request = req.REQUEST
234     if 'fcid' in request:
235       fcid = request['fcid']
236       if 'runf' in request:
237         runfolder = request['runf']
238         try:
239           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
240           cnfgfile = rec.config_params
241           #match_str = re.compile(r"READ_LENGTH.+$")
242           match_str = re.compile('^READ_LENGTH.+')
243           if not match_str.search(cnfgfile):
244             cnfgfile = generateConfile(request,fcid)
245             if match_str.search(cnfgfile):
246               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
247               rec.config_params = cnfgfile
248               rec.save()
249             else:
250               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
251
252         except ObjectDoesNotExist:
253           cnfgfile = 'Entry not found for RunFolder = '+runfolder
254
255     return HttpResponse(cnfgfile, content_type='text/plain')
256
257 def getLaneLibs(req):
258     granted = False
259     ClIP = req.META['REMOTE_ADDR']
260     if (ClIP in settings.ALLOWED_IPS):  granted = True
261
262     if not granted: return HttpResponse("access denied.")
263
264     request = req.REQUEST
265     fcid = 'none'
266     outputfile = ''
267     if 'fcid' in request:
268       fcid = request['fcid']
269       try:
270         rec = FlowCell.objects.get(flowcell_id=fcid)
271         #Ex: 071211
272         year = datetime.today().year.__str__()
273         year = replace(year,'20','')
274         month = datetime.today().month
275         if month < 10: month = "0"+month.__str__()
276         else: month = month.__str__()
277         day = datetime.today().day
278         if day < 10: day = "0"+day.__str__()
279         else: day = day.__str__()
280         mydate = year+month+day
281         outputfile = '<?xml version="1.0" ?>'
282         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
283         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
284         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
285         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
286         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
287         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
288         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
289         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
290         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
291         outputfile += '\n</SolexaResult>'
292       except ObjectDoesNotExist:
293         outputfile = 'Flowcell entry not found for: '+fcid
294     else: outputfile = 'Missing input: flowcell id'
295
296     return HttpResponse(outputfile, content_type='text/plain')
297
298 def estimateFlowcellDuration(flowcell):
299     """
300     Attempt to estimate how long it will take to run a flowcell
301
302     """
303     # (3600 seconds * 1.5 hours per cycle )
304     sequencing_seconds_per_cycle= 3600 * 1.5
305     # 800 is a rough guess
306     pipeline_seconds_per_cycle = 800
307
308     cycles = flowcell.read_length
309     if flowcell.paired_end:
310         cycles *= 2
311     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
312     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
313     estimate_mid = sequencing_time + analysis_time
314
315     return estimate_mid
316
317 def estimateFlowcellTimeRemaining(flowcell):
318     estimate_mid = estimateFlowcellDuration(flowcell)
319
320     # offset for how long we've been running
321     running_time = timezone.now() - flowcell.run_date
322     estimate_mid -= running_time
323
324     return estimate_mid
325
326 def roundToDays(estimate):
327     """
328     Given a time estimate round up and down in days
329     """
330     # floor estimate_mid
331     estimate_low = timedelta(estimate.days, 0)
332     # floor estimate_mid and add a day
333     estimate_high = timedelta(estimate.days+1, 0)
334
335     return (estimate_low, estimate_high)
336
337
338 def makeUserLaneMap(flowcell):
339     """
340     Given a flowcell return a mapping of users interested in
341     the libraries on those lanes.
342     """
343     users = {}
344
345     for lane in flowcell.lane_set.all():
346         for affiliation in lane.library.affiliations.all():
347             for user in affiliation.users.all():
348                 users.setdefault(user,[]).append(lane)
349
350     return users
351
352 def getUsersForFlowcell(flowcell):
353     users = set()
354
355     for lane in flowcell.lane_set.all():
356         for affiliation in lane.library.affiliations.all():
357             for user in affiliation.users.all():
358                 users.add(user)
359
360     return users
361
362 def makeUserLibraryMap(libraries):
363     """
364     Given an interable set of libraries return a mapping or
365     users interested in those libraries.
366     """
367     users = {}
368
369     for library in libraries:
370         for affiliation in library.affiliations.all():
371             for user in affiliation.users.all():
372                 users.setdefault(user,[]).append(library)
373
374     return users
375
376 def makeAffiliationLaneMap(flowcell):
377     affs = {}
378
379     for lane in flowcell.lane_set.all():
380         for affiliation in lane.library.affiliations.all():
381             affs.setdefault(affiliation,[]).append(lane)
382
383     return affs
384
385 def makeEmailLaneMap(flowcell):
386     """
387     Create a list of email addresses and the lanes associated with those users.
388
389     The email addresses can come from both the "users" table and the "affiliations" table.
390     """
391     emails = {}
392     for lane in flowcell.lane_set.all():
393         for affiliation in lane.library.affiliations.all():
394             if affiliation.email is not None and len(affiliation.email) > 0:
395                 emails.setdefault(affiliation.email,set()).add(lane)
396             for user in affiliation.users.all():
397                 if user.email is not None and len(user.email) > 0:
398                     emails.setdefault(user.email,set()).add(lane)
399
400     return emails