Flatten project hierarchy, moving djano applications out of htsworkflow.frontend...
[htsworkflow.git] / experiments / experiments.py
1 from __future__ import absolute_import, print_function
2
3 # some core functions of the exp tracker module
4 from datetime import datetime, timedelta
5 try:
6     import json
7 except ImportError, e:
8     import simplejson as json
9
10 import os
11 import re
12
13 from django.contrib.auth.decorators import login_required
14 from django.views.decorators.csrf import csrf_exempt
15 from django.core.exceptions import ObjectDoesNotExist
16 from django.core.mail import send_mail, mail_admins
17 from django.http import HttpResponse, Http404
18 from django.conf import settings
19 from django.utils import timezone
20
21 from htsworkflow.auth import require_api_key
22 from .models import FlowCell, DataRun, Lane, LANE_STATUS_MAP
23 from samples.models import Library, MultiplexIndex, HTSUser
24
25 def flowcell_information(flowcell_id):
26     """
27     Return a dictionary describing a flowcell
28     """
29     try:
30         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
31     except FlowCell.DoesNotExist, e:
32         return None
33
34     lane_set = {}
35     for lane in fc.lane_set.all():
36         lane_item = {
37             'cluster_estimate': lane.cluster_estimate,
38             'comment': lane.comment,
39             'experiment_type': lane.library.experiment_type.name,
40             'experiment_type_id': lane.library.experiment_type_id,
41             'flowcell': lane.flowcell.flowcell_id,
42             'lane_number': lane.lane_number,
43             'library_name': lane.library.library_name,
44             'library_id': lane.library.id,
45             'library_species': lane.library.library_species.scientific_name,
46             'pM': unicode(lane.pM),
47             'read_length': lane.flowcell.read_length,
48             'status_code': lane.status,
49             'status': LANE_STATUS_MAP[lane.status]
50         }
51         sequences = lane.library.index_sequences()
52         if sequences is not None:
53             lane_item['index_sequence'] = sequences
54
55         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
56
57     if fc.control_lane is None:
58         control_lane = None
59     else:
60         control_lane = int(fc.control_lane)
61
62     info = {
63         'advanced_run': fc.advanced_run,
64         'cluster_station_id': fc.cluster_station_id,
65         'cluster_station': fc.cluster_station.name,
66         'control_lane': control_lane,
67         # 'datarun_set': how should this be represented?,
68         'flowcell_id': fc.flowcell_id,
69         'id': fc.id,
70         'lane_set': lane_set,
71         'notes': fc.notes,
72         'paired_end': fc.paired_end,
73         'read_length': fc.read_length,
74         'run_date': fc.run_date.isoformat(),
75         'sequencer_id': fc.sequencer_id,
76         'sequencer': fc.sequencer.name,
77     }
78
79     return info
80
81 @csrf_exempt
82 def flowcell_json(request, fc_id):
83     """
84     Return a JSON blob containing enough information to generate a config file.
85     """
86     require_api_key(request)
87
88     fc_dict = flowcell_information(fc_id)
89
90     if fc_dict is None:
91         raise Http404
92
93     fc_json = json.dumps(fc_dict)
94     return HttpResponse(fc_json, content_type = 'application/json')
95
96 def lanes_for(username=None):
97     """
98     Given a user id try to return recent lanes as a list of dictionaries
99     """
100     query = {}
101     if username is not None:
102         user = HTSUser.objects.get(username=username)
103         query.update({'library__affiliations__users__id': user.id})
104
105     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
106
107
108     result = []
109     for l in lanes:
110         affiliations = l.library.affiliations.all()
111         affiliations_list = [(a.id, a.name) for a in affiliations]
112         result.append({ 'flowcell': l.flowcell.flowcell_id,
113                         'run_date': l.flowcell.run_date.isoformat(),
114                         'lane_number': l.lane_number,
115                         'library': l.library.id,
116                         'library_name': l.library.library_name,
117                         'comment': l.comment,
118                         'affiliations': affiliations_list})
119     return result
120
121 @csrf_exempt
122 def lanes_for_json(request, username):
123     """
124     Format lanes for a user
125     """
126     require_api_key(request)
127
128     try:
129         result = lanes_for(username)
130     except ObjectDoesNotExist, e:
131         raise Http404
132
133     #convert query set to python structure
134
135     result_json = json.dumps(result)
136     return HttpResponse(result_json, content_type='application/json')
137
138
139 def updStatus(request):
140     output=''
141     user = 'none'
142     pswd = ''
143     UpdatedStatus = 'unknown'
144     fcid = 'none'
145     runfolder = 'unknown'
146     ClIP = request.META['REMOTE_ADDR']
147
148     if hasattr(request, 'user'):
149       user = request.user
150
151     #Check access permission
152     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)):
153         return HttpResponse("%s access denied from %s." % (user, ClIP))
154
155     # ~~~~~~Parameters for the job ~~~~
156     if request.REQUEST.has_key('fcid'):
157       fcid = request.REQUEST['fcid']
158     else:
159       return HttpResponse('missing fcid')
160
161     if request.REQUEST.has_key('runf'):
162       runfolder = request.REQUEST['runf']
163     else:
164       return HttpResponse('missing runf')
165
166
167     if request.REQUEST.has_key('updst'):
168       UpdatedStatus = request.REQUEST['updst']
169     else:
170       return HttpResponse('missing status')
171
172     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
173
174     # Update Data Run status in DB
175     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
176     try:
177       rec = DataRun.objects.get(run_folder=runfolder)
178       rec.run_status = UpdatedStatus
179
180       #if there's a message update that too
181       mytimestamp = timezone.now().__str__()
182       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
183       if request.REQUEST.has_key('msg'):
184         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
185       else :
186         if UpdatedStatus == '1':
187           rec.run_note = "Started ("+mytimestamp+")"
188
189       rec.save()
190       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
191     except ObjectDoesNotExist:
192       output = "entry not found: "+fcid+", "+runfolder
193
194
195     #Notify researcher by email
196     # Doesn't work
197     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
198     #mail_admins("test subject", "testing , testing", fail_silently=False)
199     # gives error: (49, "Can't assign requested address")
200     return HttpResponse(output)
201
202 def generateConfile(request,fcid):
203     #granted = False
204     #ClIP = request.META['REMOTE_ADDR']
205     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
206
207     #if not granted: return HttpResponse("access denied.")
208
209     config = ['READ_LENGTH 25']
210     config += ['ANALYSIS eland']
211     config += ['GENOME_FILE all_chr.fa']
212     config += ['ELAND_MULTIPLE_INSTANCES 8']
213     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
214     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
215
216     try:
217       fc = FlowCell.objects.get(flowcell_id=fcid)
218       for lane in fc.lane_set.all():
219           config += [ str(lane.lane_number) +":" + \
220                       genome_dir + lane.library.library_species.scientific_name ]
221           config += [ str(lane.lane_number) +":" + \
222                       eland_genome + lane.library.library_species.scientific_name ]
223
224     except ObjectDoesNotExist:
225       config = 'Entry not found for fcid  = '+fcid
226
227     return os.linesep.join(config)
228
229 def getConfile(req):
230     granted = False
231     ClIP = req.META['REMOTE_ADDR']
232     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
233
234     if not granted: return HttpResponse("access denied. IP: "+ClIP)
235
236     fcid = 'none'
237     cnfgfile = 'Nothing found'
238     runfolder = 'unknown'
239     request = req.REQUEST
240     if request.has_key('fcid'):
241       fcid = request['fcid']
242       if request.has_key('runf'):
243         runfolder = request['runf']
244         try:
245           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
246           cnfgfile = rec.config_params
247           #match_str = re.compile(r"READ_LENGTH.+$")
248           match_str = re.compile('^READ_LENGTH.+')
249           if not match_str.search(cnfgfile):
250             cnfgfile = generateConfile(request,fcid)
251             if match_str.search(cnfgfile):
252               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
253               rec.config_params = cnfgfile
254               rec.save()
255             else:
256               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
257
258         except ObjectDoesNotExist:
259           cnfgfile = 'Entry not found for RunFolder = '+runfolder
260
261     return HttpResponse(cnfgfile, content_type='text/plain')
262
263 def getLaneLibs(req):
264     granted = False
265     ClIP = req.META['REMOTE_ADDR']
266     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
267
268     if not granted: return HttpResponse("access denied.")
269
270     request = req.REQUEST
271     fcid = 'none'
272     outputfile = ''
273     if request.has_key('fcid'):
274       fcid = request['fcid']
275       try:
276         rec = FlowCell.objects.get(flowcell_id=fcid)
277         #Ex: 071211
278         year = datetime.today().year.__str__()
279         year = replace(year,'20','')
280         month = datetime.today().month
281         if month < 10: month = "0"+month.__str__()
282         else: month = month.__str__()
283         day = datetime.today().day
284         if day < 10: day = "0"+day.__str__()
285         else: day = day.__str__()
286         mydate = year+month+day
287         outputfile = '<?xml version="1.0" ?>'
288         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
289         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
290         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
291         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
292         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
293         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
294         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
295         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
296         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
297         outputfile += '\n</SolexaResult>'
298       except ObjectDoesNotExist:
299         outputfile = 'Flowcell entry not found for: '+fcid
300     else: outputfile = 'Missing input: flowcell id'
301
302     return HttpResponse(outputfile, content_type='text/plain')
303
304 def estimateFlowcellDuration(flowcell):
305     """
306     Attempt to estimate how long it will take to run a flowcell
307
308     """
309     # (3600 seconds * 1.5 hours per cycle )
310     sequencing_seconds_per_cycle= 3600 * 1.5
311     # 800 is a rough guess
312     pipeline_seconds_per_cycle = 800
313
314     cycles = flowcell.read_length
315     if flowcell.paired_end:
316         cycles *= 2
317     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
318     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
319     estimate_mid = sequencing_time + analysis_time
320
321     return estimate_mid
322
323 def estimateFlowcellTimeRemaining(flowcell):
324     estimate_mid = estimateFlowcellDuration(flowcell)
325
326     # offset for how long we've been running
327     running_time = timezone.now() - flowcell.run_date
328     estimate_mid -= running_time
329
330     return estimate_mid
331
332 def roundToDays(estimate):
333     """
334     Given a time estimate round up and down in days
335     """
336     # floor estimate_mid
337     estimate_low = timedelta(estimate.days, 0)
338     # floor estimate_mid and add a day
339     estimate_high = timedelta(estimate.days+1, 0)
340
341     return (estimate_low, estimate_high)
342
343
344 def makeUserLaneMap(flowcell):
345     """
346     Given a flowcell return a mapping of users interested in
347     the libraries on those lanes.
348     """
349     users = {}
350
351     for lane in flowcell.lane_set.all():
352         for affiliation in lane.library.affiliations.all():
353             for user in affiliation.users.all():
354                 users.setdefault(user,[]).append(lane)
355
356     return users
357
358 def getUsersForFlowcell(flowcell):
359     users = set()
360
361     for lane in flowcell.lane_set.all():
362         for affiliation in lane.library.affiliations.all():
363             for user in affiliation.users.all():
364                 users.add(user)
365
366     return users
367
368 def makeUserLibraryMap(libraries):
369     """
370     Given an interable set of libraries return a mapping or
371     users interested in those libraries.
372     """
373     users = {}
374
375     for library in libraries:
376         for affiliation in library.affiliations.all():
377             for user in affiliation.users.all():
378                 users.setdefault(user,[]).append(library)
379
380     return users
381
382 def makeAffiliationLaneMap(flowcell):
383     affs = {}
384
385     for lane in flowcell.lane_set.all():
386         for affiliation in lane.library.affiliations.all():
387             affs.setdefault(affiliation,[]).append(lane)
388
389     return affs
390
391 def makeEmailLaneMap(flowcell):
392     """
393     Create a list of email addresses and the lanes associated with those users.
394
395     The email addresses can come from both the "users" table and the "affiliations" table.
396     """
397     emails = {}
398     for lane in flowcell.lane_set.all():
399         for affiliation in lane.library.affiliations.all():
400             if affiliation.email is not None and len(affiliation.email) > 0:
401                 emails.setdefault(affiliation.email,set()).add(lane)
402             for user in affiliation.users.all():
403                 if user.email is not None and len(user.email) > 0:
404                     emails.setdefault(user.email,set()).add(lane)
405
406     return emails