Add support for tracking the multiplex index sequence.
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15 from django.conf import settings
16
17 from htsworkflow.frontend.auth import require_api_key
18 from htsworkflow.frontend.experiments.models import \
19     FlowCell, \
20     DataRun, \
21     Lane, \
22     LANE_STATUS_MAP
23 from htsworkflow.frontend.samples.models import Library, MultiplexIndex, HTSUser
24
25 def flowcell_information(flowcell_id):
26     """
27     Return a dictionary describing a flowcell
28     """
29     try:
30         fc = FlowCell.objects.get(flowcell_id__startswith=flowcell_id)
31     except FlowCell.DoesNotExist, e:
32         return None
33
34     lane_set = {}
35     for lane in fc.lane_set.all():
36         lane_item = {
37             'cluster_estimate': lane.cluster_estimate,
38             'comment': lane.comment,
39             'experiment_type': lane.library.experiment_type.name,
40             'experiment_type_id': lane.library.experiment_type_id,
41             'flowcell': lane.flowcell.flowcell_id,
42             'lane_number': lane.lane_number,
43             'library_name': lane.library.library_name,
44             'library_id': lane.library.id,
45             'library_species': lane.library.library_species.scientific_name,
46             'pM': unicode(lane.pM),
47             'read_length': lane.flowcell.read_length,
48             'status_code': lane.status,
49             'status': LANE_STATUS_MAP[lane.status]
50         }
51         sequences = lane.library.index_sequences()
52         if sequences is not None:
53             lane_item['index_sequence'] = sequences
54
55         lane_set.setdefault(lane.lane_number,[]).append(lane_item)
56
57     if fc.control_lane is None:
58         control_lane = None
59     else:
60         control_lane = int(fc.control_lane)
61
62     info = {
63         'advanced_run': fc.advanced_run,
64         'cluster_station_id': fc.cluster_station_id,
65         'cluster_station': fc.cluster_station.name,
66         'control_lane': control_lane,
67         # 'datarun_set': how should this be represented?,
68         'flowcell_id': fc.flowcell_id,
69         'id': fc.id,
70         'lane_set': lane_set,
71         'notes': fc.notes,
72         'paired_end': fc.paired_end,
73         'read_length': fc.read_length,
74         'run_date': fc.run_date.isoformat(),
75         'sequencer_id': fc.sequencer_id,
76         'sequencer': fc.sequencer.name,
77     }
78
79     return info
80
81 def flowcell_json(request, fc_id):
82     """
83     Return a JSON blob containing enough information to generate a config file.
84     """
85     require_api_key(request)
86
87     fc_dict = flowcell_information(fc_id)
88
89     if fc_dict is None:
90         raise Http404
91
92     fc_json = json.dumps(fc_dict)
93     return HttpResponse(fc_json, mimetype = 'application/json')
94
95 def lanes_for(username=None):
96     """
97     Given a user id try to return recent lanes as a list of dictionaries
98     """
99     query = {}
100     if username is not None:
101         user = HTSUser.objects.get(username=username)
102         query.update({'library__affiliations__users__id': user.id})
103
104     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
105
106
107     result = []
108     for l in lanes:
109         affiliations = l.library.affiliations.all()
110         affiliations_list = [(a.id, a.name) for a in affiliations]
111         result.append({ 'flowcell': l.flowcell.flowcell_id,
112                         'run_date': l.flowcell.run_date.isoformat(),
113                         'lane_number': l.lane_number,
114                         'library': l.library.id,
115                         'library_name': l.library.library_name,
116                         'comment': l.comment,
117                         'affiliations': affiliations_list})
118     return result
119
120 def lanes_for_json(request, username):
121     """
122     Format lanes for a user
123     """
124     require_api_key(request)
125
126     try:
127         result = lanes_for(username)
128     except ObjectDoesNotExist, e:
129         raise Http404
130
131     #convert query set to python structure
132
133     result_json = json.dumps(result)
134     return HttpResponse(result_json, mimetype='application/json')
135
136
137 def updStatus(request):
138     output=''
139     user = 'none'
140     pswd = ''
141     UpdatedStatus = 'unknown'
142     fcid = 'none'
143     runfolder = 'unknown'
144     ClIP = request.META['REMOTE_ADDR']
145
146     if hasattr(request, 'user'):
147       user = request.user
148
149     #Check access permission
150     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)):
151         return HttpResponse("%s access denied from %s." % (user, ClIP))
152
153     # ~~~~~~Parameters for the job ~~~~
154     if request.REQUEST.has_key('fcid'):
155       fcid = request.REQUEST['fcid']
156     else:
157       return HttpResponse('missing fcid')
158
159     if request.REQUEST.has_key('runf'):
160       runfolder = request.REQUEST['runf']
161     else:
162       return HttpResponse('missing runf')
163
164
165     if request.REQUEST.has_key('updst'):
166       UpdatedStatus = request.REQUEST['updst']
167     else:
168       return HttpResponse('missing status')
169
170     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
171
172     # Update Data Run status in DB
173     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated
174     try:
175       rec = DataRun.objects.get(run_folder=runfolder)
176       rec.run_status = UpdatedStatus
177
178       #if there's a message update that too
179       mytimestamp = datetime.now().__str__()
180       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
181       if request.REQUEST.has_key('msg'):
182         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
183       else :
184         if UpdatedStatus == '1':
185           rec.run_note = "Started ("+mytimestamp+")"
186
187       rec.save()
188       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
189     except ObjectDoesNotExist:
190       output = "entry not found: "+fcid+", "+runfolder
191
192
193     #Notify researcher by email
194     # Doesn't work
195     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
196     #mail_admins("test subject", "testing , testing", fail_silently=False)
197     # gives error: (49, "Can't assign requested address")
198     return HttpResponse(output)
199
200 def generateConfile(request,fcid):
201     #granted = False
202     #ClIP = request.META['REMOTE_ADDR']
203     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
204
205     #if not granted: return HttpResponse("access denied.")
206
207     config = ['READ_LENGTH 25']
208     config += ['ANALYSIS eland']
209     config += ['GENOME_FILE all_chr.fa']
210     config += ['ELAND_MULTIPLE_INSTANCES 8']
211     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
212     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
213
214     try:
215       fc = FlowCell.objects.get(flowcell_id=fcid)
216       for lane in fc.lane_set.all():
217           config += [ str(lane.lane_number) +":" + \
218                       genome_dir + lane.library.library_species.scientific_name ]
219           config += [ str(lane.lane_number) +":" + \
220                       eland_genome + lane.library.library_species.scientific_name ]
221
222     except ObjectDoesNotExist:
223       config = 'Entry not found for fcid  = '+fcid
224
225     return os.linesep.join(config)
226
227 def getConfile(req):
228     granted = False
229     ClIP = req.META['REMOTE_ADDR']
230     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
231
232     if not granted: return HttpResponse("access denied. IP: "+ClIP)
233
234     fcid = 'none'
235     cnfgfile = 'Nothing found'
236     runfolder = 'unknown'
237     request = req.REQUEST
238     if request.has_key('fcid'):
239       fcid = request['fcid']
240       if request.has_key('runf'):
241         runfolder = request['runf']
242         try:
243           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
244           cnfgfile = rec.config_params
245           #match_str = re.compile(r"READ_LENGTH.+$")
246           match_str = re.compile('^READ_LENGTH.+')
247           if not match_str.search(cnfgfile):
248             cnfgfile = generateConfile(request,fcid)
249             if match_str.search(cnfgfile):
250               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
251               rec.config_params = cnfgfile
252               rec.save()
253             else:
254               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile
255
256         except ObjectDoesNotExist:
257           cnfgfile = 'Entry not found for RunFolder = '+runfolder
258
259     return HttpResponse(cnfgfile, mimetype='text/plain')
260
261 def getLaneLibs(req):
262     granted = False
263     ClIP = req.META['REMOTE_ADDR']
264     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
265
266     if not granted: return HttpResponse("access denied.")
267
268     request = req.REQUEST
269     fcid = 'none'
270     outputfile = ''
271     if request.has_key('fcid'):
272       fcid = request['fcid']
273       try:
274         rec = FlowCell.objects.get(flowcell_id=fcid)
275         #Ex: 071211
276         year = datetime.today().year.__str__()
277         year = replace(year,'20','')
278         month = datetime.today().month
279         if month < 10: month = "0"+month.__str__()
280         else: month = month.__str__()
281         day = datetime.today().day
282         if day < 10: day = "0"+day.__str__()
283         else: day = day.__str__()
284         mydate = year+month+day
285         outputfile = '<?xml version="1.0" ?>'
286         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
287         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
288         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
289         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
290         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
291         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
292         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
293         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
294         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
295         outputfile += '\n</SolexaResult>'
296       except ObjectDoesNotExist:
297         outputfile = 'Flowcell entry not found for: '+fcid
298     else: outputfile = 'Missing input: flowcell id'
299
300     return HttpResponse(outputfile, mimetype='text/plain')
301
302 def estimateFlowcellDuration(flowcell):
303     """
304     Attempt to estimate how long it will take to run a flowcell
305
306     """
307     # (3600 seconds * 1.5 hours per cycle )
308     sequencing_seconds_per_cycle= 3600 * 1.5
309     # 800 is a rough guess
310     pipeline_seconds_per_cycle = 800
311
312     cycles = flowcell.read_length
313     if flowcell.paired_end:
314         cycles *= 2
315     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
316     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
317     estimate_mid = sequencing_time + analysis_time
318
319     return estimate_mid
320
321 def estimateFlowcellTimeRemaining(flowcell):
322     estimate_mid = estimateFlowcellDuration(flowcell)
323
324     # offset for how long we've been running
325     running_time = datetime.now() - flowcell.run_date
326     estimate_mid -= running_time
327
328     return estimate_mid
329
330 def roundToDays(estimate):
331     """
332     Given a time estimate round up and down in days
333     """
334     # floor estimate_mid
335     estimate_low = timedelta(estimate.days, 0)
336     # floor estimate_mid and add a day
337     estimate_high = timedelta(estimate.days+1, 0)
338
339     return (estimate_low, estimate_high)
340
341
342 def makeUserLaneMap(flowcell):
343     """
344     Given a flowcell return a mapping of users interested in
345     the libraries on those lanes.
346     """
347     users = {}
348
349     for lane in flowcell.lane_set.all():
350         for affiliation in lane.library.affiliations.all():
351             for user in affiliation.users.all():
352                 users.setdefault(user,[]).append(lane)
353
354     return users
355
356 def getUsersForFlowcell(flowcell):
357     users = set()
358
359     for lane in flowcell.lane_set.all():
360         for affiliation in lane.library.affiliations.all():
361             for user in affiliation.users.all():
362                 users.add(user)
363
364     return users
365
366 def makeUserLibraryMap(libraries):
367     """
368     Given an interable set of libraries return a mapping or
369     users interested in those libraries.
370     """
371     users = {}
372
373     for library in libraries:
374         for affiliation in library.affiliations.all():
375             for user in affiliation.users.all():
376                 users.setdefault(user,[]).append(library)
377
378     return users
379
380 def makeAffiliationLaneMap(flowcell):
381     affs = {}
382
383     for lane in flowcell.lane_set.all():
384         for affiliation in lane.library.affiliations.all():
385             affs.setdefault(affiliation,[]).append(lane)
386
387     return affs
388
389 def makeEmailLaneMap(flowcell):
390     """
391     Create a list of email addresses and the lanes associated with those users.
392
393     The email addresses can come from both the "users" table and the "affiliations" table.
394     """
395     emails = {}
396     for lane in flowcell.lane_set.all():
397         for affiliation in lane.library.affiliations.all():
398             if affiliation.email is not None and len(affiliation.email) > 0:
399                 emails.setdefault(affiliation.email,set()).add(lane)
400             for user in affiliation.users.all():
401                 if user.email is not None and len(user.email) > 0:
402                     emails.setdefault(user.email,set()).add(lane)
403
404     return emails