remove extra blank line
[htsworkflow.git] / experiments / experiments.py
1 from __future__ import absolute_import, print_function, unicode_literals
2
3 # some core functions of the exp tracker module
4 from datetime import datetime, timedelta
5 import os
6 import re
7
8 from django.contrib.auth.decorators import login_required
9 from django.views.decorators.csrf import csrf_exempt
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core.mail import send_mail, mail_admins
12 from django.http import HttpResponse, Http404, JsonResponse
13 from django.conf import settings
14 from django.utils import timezone
15
16 from htsworkflow.auth import require_api_key
17 from .models import FlowCell, DataRun, Lane, LANE_STATUS_MAP
18 from samples.models import Library, MultiplexIndex, HTSUser
19
20 def flowcell_information(flowcell_id):
21     """
22     Return a dictionary describing a flowcell
23     """
24     try:
25         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
26     except FlowCell.DoesNotExist as e:
27         return None
28
29     lane_set = {}
30     for lane in fc.lane_set.all():
31         lane_item = {
32             'cluster_estimate': lane.cluster_estimate,
33             'comment': lane.comment,
34             'experiment_type': lane.library.experiment_type.name,
35             'experiment_type_id': lane.library.experiment_type_id,
36             'flowcell': lane.flowcell.flowcell_id,
37             'lane_number': lane.lane_number,
38             'library_name': lane.library.library_name,
39             'library_id': lane.library.id,
40             'library_species': lane.library.library_species.scientific_name,
41             'pM': str(lane.pM),
42             'read_length': lane.flowcell.read_length,
43             'status_code': lane.status,
44             'status': LANE_STATUS_MAP[lane.status]
45         }
46         sequences = lane.library.index_sequences()
47         if sequences is not None:
48             lane_item['index_sequence'] = sequences
49
50         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
51
52     if fc.control_lane is None:
53         control_lane = None
54     else:
55         control_lane = int(fc.control_lane)
56
57     info = {
58         'advanced_run': fc.advanced_run,
59         'cluster_station_id': fc.cluster_station_id,
60         'cluster_station': fc.cluster_station.name,
61         'control_lane': control_lane,
62         # 'datarun_set': how should this be represented?,
63         'flowcell_id': fc.flowcell_id,
64         'id': fc.id,
65         'lane_set': lane_set,
66         'notes': fc.notes,
67         'paired_end': fc.paired_end,
68         'read_length': fc.read_length,
69         'run_date': fc.run_date.isoformat(),
70         'sequencer_id': fc.sequencer_id,
71         'sequencer': fc.sequencer.name,
72     }
73
74     return info
75
76 @csrf_exempt
77 def flowcell_json(request, fc_id):
78     """
79     Return a JSON blob containing enough information to generate a config file.
80     """
81     require_api_key(request)
82
83     fc_dict = flowcell_information(fc_id)
84
85     if fc_dict is None:
86         raise Http404
87
88     return JsonResponse({'result': fc_dict})
89
90 def lanes_for(username=None):
91     """
92     Given a user id try to return recent lanes as a list of dictionaries
93     """
94     query = {}
95     if username is not None:
96         user = HTSUser.objects.get(username=username)
97         query.update({'library__affiliations__users__id': user.id})
98
99     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
100
101     result = []
102     for l in lanes:
103         affiliations = l.library.affiliations.all()
104         affiliations_list = [(a.id, a.name) for a in affiliations]
105         result.append({ 'flowcell': l.flowcell.flowcell_id,
106                         'run_date': l.flowcell.run_date.isoformat(),
107                         'lane_number': l.lane_number,
108                         'library': l.library.id,
109                         'library_name': l.library.library_name,
110                         'comment': l.comment,
111                         'affiliations': affiliations_list})
112     return result
113
114 @csrf_exempt
115 def lanes_for_json(request, username):
116     """
117     Format lanes for a user
118     """
119     require_api_key(request)
120
121     try:
122         result = lanes_for(username)
123     except ObjectDoesNotExist as e:
124         raise Http404
125
126     #convert query set to python structure
127
128     return JsonResponse({'result': result})
129
130
131 def updStatus(request):
132     output=''
133     user = 'none'
134     pswd = ''
135     UpdatedStatus = 'unknown'
136     fcid = 'none'
137     runfolder = 'unknown'
138     ClIP = request.META['REMOTE_ADDR']
139
140     if hasattr(request, 'user'):
141       user = request.user
142
143     #Check access permission
144     if not (user.is_superuser and ClIP in settings.ALLOWED_IPS):
145         return HttpResponse("%s access denied from %s." % (user, ClIP))
146
147     # ~~~~~~Parameters for the job ~~~~
148     if 'fcid' in request.REQUEST:
149       fcid = request.REQUEST['fcid']
150     else:
151       return HttpResponse('missing fcid')
152
153     if 'runf' in request.REQUEST:
154       runfolder = request.REQUEST['runf']
155     else:
156       return HttpResponse('missing runf')
157
158
159     if 'updst' in request.REQUEST:
160       UpdatedStatus = request.REQUEST['updst']
161     else:
162       return HttpResponse('missing status')
163
164     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
165
166     # Update Data Run status in DB
167     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
168     try:
169       rec = DataRun.objects.get(run_folder=runfolder)
170       rec.run_status = UpdatedStatus
171
172       #if there's a message update that too
173       mytimestamp = timezone.now().__str__()
174       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
175       if 'msg' in request.REQUEST:
176         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
177       else :
178         if UpdatedStatus == '1':
179           rec.run_note = "Started ("+mytimestamp+")"
180
181       rec.save()
182       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
183     except ObjectDoesNotExist:
184       output = "entry not found: "+fcid+", "+runfolder
185
186     #Notify researcher by email
187     # Doesn't work
188     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
189     #mail_admins("test subject", "testing , testing", fail_silently=False)
190     # gives error: (49, "Can't assign requested address")
191     return HttpResponse(output)
192
193 def generateConfile(request,fcid):
194     #granted = False
195     #ClIP = request.META['REMOTE_ADDR']
196     #if (ClIP in settings.ALLOWED_IPS):  granted = True
197
198     #if not granted: return HttpResponse("access denied.")
199
200     config = ['READ_LENGTH 25']
201     config += ['ANALYSIS eland']
202     config += ['GENOME_FILE all_chr.fa']
203     config += ['ELAND_MULTIPLE_INSTANCES 8']
204     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
205     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
206
207     try:
208       fc = FlowCell.objects.get(flowcell_id=fcid)
209       for lane in fc.lane_set.all():
210           config += [ str(lane.lane_number) +":" + \
211                       genome_dir + lane.library.library_species.scientific_name ]
212           config += [ str(lane.lane_number) +":" + \
213                       eland_genome + lane.library.library_species.scientific_name ]
214
215     except ObjectDoesNotExist:
216       config = 'Entry not found for fcid  = '+fcid
217
218     return os.linesep.join(config)
219
220 def getConfile(req):
221     granted = False
222     ClIP = req.META['REMOTE_ADDR']
223     if (ClIP in settings.ALLOWED_IPS):  granted = True
224
225     if not granted: return HttpResponse("access denied. IP: "+ClIP)
226
227     fcid = 'none'
228     cnfgfile = 'Nothing found'
229     runfolder = 'unknown'
230     request = req.REQUEST
231     if 'fcid' in request:
232       fcid = request['fcid']
233       if 'runf' in request:
234         runfolder = request['runf']
235         try:
236           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
237           cnfgfile = rec.config_params
238           #match_str = re.compile(r"READ_LENGTH.+$")
239           match_str = re.compile('^READ_LENGTH.+')
240           if not match_str.search(cnfgfile):
241             cnfgfile = generateConfile(request,fcid)
242             if match_str.search(cnfgfile):
243               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
244               rec.config_params = cnfgfile
245               rec.save()
246             else:
247               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
248
249         except ObjectDoesNotExist:
250           cnfgfile = 'Entry not found for RunFolder = '+runfolder
251
252     return HttpResponse(cnfgfile, content_type='text/plain')
253
254 def getLaneLibs(req):
255     granted = False
256     ClIP = req.META['REMOTE_ADDR']
257     if (ClIP in settings.ALLOWED_IPS):  granted = True
258
259     if not granted: return HttpResponse("access denied.")
260
261     request = req.REQUEST
262     fcid = 'none'
263     outputfile = ''
264     if 'fcid' in request:
265       fcid = request['fcid']
266       try:
267         rec = FlowCell.objects.get(flowcell_id=fcid)
268         #Ex: 071211
269         year = datetime.today().year.__str__()
270         year = replace(year,'20','')
271         month = datetime.today().month
272         if month < 10: month = "0"+month.__str__()
273         else: month = month.__str__()
274         day = datetime.today().day
275         if day < 10: day = "0"+day.__str__()
276         else: day = day.__str__()
277         mydate = year+month+day
278         outputfile = '<?xml version="1.0" ?>'
279         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
280         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
281         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
282         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
283         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
284         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
285         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
286         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
287         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
288         outputfile += '\n</SolexaResult>'
289       except ObjectDoesNotExist:
290         outputfile = 'Flowcell entry not found for: '+fcid
291     else: outputfile = 'Missing input: flowcell id'
292
293     return HttpResponse(outputfile, content_type='text/plain')
294
295 def estimateFlowcellDuration(flowcell):
296     """
297     Attempt to estimate how long it will take to run a flowcell
298
299     """
300     # (3600 seconds * 1.5 hours per cycle )
301     sequencing_seconds_per_cycle= 3600 * 1.5
302     # 800 is a rough guess
303     pipeline_seconds_per_cycle = 800
304
305     cycles = flowcell.read_length
306     if flowcell.paired_end:
307         cycles *= 2
308     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
309     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
310     estimate_mid = sequencing_time + analysis_time
311
312     return estimate_mid
313
314 def estimateFlowcellTimeRemaining(flowcell):
315     estimate_mid = estimateFlowcellDuration(flowcell)
316
317     # offset for how long we've been running
318     running_time = timezone.now() - flowcell.run_date
319     estimate_mid -= running_time
320
321     return estimate_mid
322
323 def roundToDays(estimate):
324     """
325     Given a time estimate round up and down in days
326     """
327     # floor estimate_mid
328     estimate_low = timedelta(estimate.days, 0)
329     # floor estimate_mid and add a day
330     estimate_high = timedelta(estimate.days+1, 0)
331
332     return (estimate_low, estimate_high)
333
334
335 def makeUserLaneMap(flowcell):
336     """
337     Given a flowcell return a mapping of users interested in
338     the libraries on those lanes.
339     """
340     users = {}
341
342     for lane in flowcell.lane_set.all():
343         for affiliation in lane.library.affiliations.all():
344             for user in affiliation.users.all():
345                 users.setdefault(user,[]).append(lane)
346
347     return users
348
349 def getUsersForFlowcell(flowcell):
350     users = set()
351
352     for lane in flowcell.lane_set.all():
353         for affiliation in lane.library.affiliations.all():
354             for user in affiliation.users.all():
355                 users.add(user)
356
357     return users
358
359 def makeUserLibraryMap(libraries):
360     """
361     Given an interable set of libraries return a mapping or
362     users interested in those libraries.
363     """
364     users = {}
365
366     for library in libraries:
367         for affiliation in library.affiliations.all():
368             for user in affiliation.users.all():
369                 users.setdefault(user,[]).append(library)
370
371     return users
372
373 def makeAffiliationLaneMap(flowcell):
374     affs = {}
375
376     for lane in flowcell.lane_set.all():
377         for affiliation in lane.library.affiliations.all():
378             affs.setdefault(affiliation,[]).append(lane)
379
380     return affs
381
382 def makeEmailLaneMap(flowcell):
383     """
384     Create a list of email addresses and the lanes associated with those users.
385
386     The email addresses can come from both the "users" table and the "affiliations" table.
387     """
388     emails = {}
389     for lane in flowcell.lane_set.all():
390         for affiliation in lane.library.affiliations.all():
391             if affiliation.email is not None and len(affiliation.email) > 0:
392                 emails.setdefault(affiliation.email,set()).add(lane)
393             for user in affiliation.users.all():
394                 if user.email is not None and len(user.email) > 0:
395                     emails.setdefault(user.email,set()).add(lane)
396
397     return emails