Adds a json api 'lanes_for' feature
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend.auth import require_api_key
17 from htsworkflow.frontend import settings
18 from htsworkflow.frontend.experiments.models import FlowCell, DataRun, Lane
19 from htsworkflow.frontend.samples.models import Library, HTSUser
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'experiment_type': lane.library.experiment_type.name,
36             'experiment_type_id': lane.library.experiment_type_id,
37             'flowcell': lane.flowcell.flowcell_id,
38             'lane_number': int(lane.lane_number),
39             'library_name': lane.library.library_name,
40             'library_id': lane.library.id,
41             'library_species': lane.library.library_species.scientific_name,
42             'pM': unicode(lane.pM),
43             'read_length': lane.flowcell.read_length
44         }
45
46     if fc.control_lane is None:
47         control_lane = None
48     else:
49         control_lane = int(fc.control_lane)
50         
51     info = {
52         'advanced_run': fc.advanced_run,
53         'cluster_station_id': fc.cluster_station_id,
54         'cluster_station': fc.cluster_station.name,
55         'control_lane': control_lane,
56         # 'datarun_set': how should this be represented?,
57         'flowcell_id': fc.flowcell_id,
58         'id': fc.id,
59         'lane_set': lane_set,
60         'notes': fc.notes,
61         'paired_end': fc.paired_end,
62         'read_length': fc.read_length,
63         'run_date': fc.run_date.isoformat(),
64         'sequencer_id': fc.sequencer_id,
65         'sequencer': fc.sequencer.name,
66     }
67     
68     return info
69
70 def flowcell_json(request, fc_id):
71     """
72     Return a JSON blob containing enough information to generate a config file.
73     """
74     require_api_key(request)
75     
76     fc_dict = flowcell_information(fc_id)
77
78     if fc_dict is None:
79         raise Http404
80     
81     fc_json = json.dumps(fc_dict)
82     return HttpResponse(fc_json, mimetype = 'application/json')
83
84 def lanes_for(username=None):
85     """
86     Given a user id try to return recent lanes as a list of dictionaries
87     """
88     query = {}
89     if username is not None:
90         user = HTSUser.objects.get(username=username)        
91         query.update({'library__affiliations__users__id': user.id})
92         
93     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
94     
95     result = []
96     for l in lanes:
97         result.append({ 'flowcell': l.flowcell.flowcell_id,
98                         'run_date': l.flowcell.run_date.isoformat(),
99                         'lane_number': l.lane_number,
100                         'library': l.library.id,
101                         'comment': l.comment})
102     return result
103
104 def lanes_for_json(request, username):
105     """
106     Format lanes for a user
107     """
108     require_api_key(request)
109
110     try:
111         result = lanes_for(username)
112     except ObjectDoesNotExist, e:
113         raise Http404
114     
115     #convert query set to python structure
116     
117     result_json = json.dumps(result)
118     return HttpResponse(result_json, mimetype='application/json')
119                  
120 def updStatus(request):
121     output=''
122     user = 'none'
123     pswd = ''
124     UpdatedStatus = 'unknown'
125     fcid = 'none'
126     runfolder = 'unknown'
127     ClIP = request.META['REMOTE_ADDR']
128
129     if hasattr(request, 'user'):
130       user = request.user
131
132     #Check access permission
133     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
134         return HttpResponse("%s access denied from %s." % (user, ClIP))
135
136     # ~~~~~~Parameters for the job ~~~~
137     if request.REQUEST.has_key('fcid'):
138       fcid = request.REQUEST['fcid']
139     else:
140       return HttpResponse('missing fcid')
141     
142     if request.REQUEST.has_key('runf'):
143       runfolder = request.REQUEST['runf']
144     else:
145       return HttpResponse('missing runf')
146
147     
148     if request.REQUEST.has_key('updst'):
149       UpdatedStatus = request.REQUEST['updst']
150     else:
151       return HttpResponse('missing status')
152     
153     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
154
155     # Update Data Run status in DB
156     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
157     try:
158       rec = DataRun.objects.get(run_folder=runfolder)
159       rec.run_status = UpdatedStatus
160
161       #if there's a message update that too
162       mytimestamp = datetime.now().__str__()
163       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
164       if request.REQUEST.has_key('msg'):
165         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
166       else :
167         if UpdatedStatus == '1':
168           rec.run_note = "Started ("+mytimestamp+")"
169
170       rec.save()
171       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
172     except ObjectDoesNotExist:
173       output = "entry not found: "+fcid+", "+runfolder
174
175
176     #Notify researcher by email
177     # Doesn't work
178     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
179     #mail_admins("test subject", "testing , testing", fail_silently=False)
180     # gives error: (49, "Can't assign requested address")
181     return HttpResponse(output)
182
183 def generateConfile(request,fcid):
184     #granted = False
185     #ClIP = request.META['REMOTE_ADDR']
186     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
187
188     #if not granted: return HttpResponse("access denied.")
189
190     config = ['READ_LENGTH 25']
191     config += ['ANALYSIS eland']
192     config += ['GENOME_FILE all_chr.fa']
193     config += ['ELAND_MULTIPLE_INSTANCES 8']
194     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
195     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
196     
197     try:                                                                                                                                              
198       fc = FlowCell.objects.get(flowcell_id=fcid)
199       for lane in fc.lane_set.all():
200           config += [ str(lane.lane_number) +":" + \
201                       genome_dir + lane.library.library_species.scientific_name ]
202           config += [ str(lane.lane_number) +":" + \
203                       eland_genome + lane.library.library_species.scientific_name ]
204       
205     except ObjectDoesNotExist:
206       config = 'Entry not found for fcid  = '+fcid
207
208     return os.linesep.join(config)
209
210 def getConfile(req):
211     granted = False
212     ClIP = req.META['REMOTE_ADDR']
213     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
214
215     if not granted: return HttpResponse("access denied. IP: "+ClIP)
216
217     fcid = 'none'
218     cnfgfile = 'Nothing found'
219     runfolder = 'unknown'
220     request = req.REQUEST
221     if request.has_key('fcid'):
222       fcid = request['fcid']
223       if request.has_key('runf'):
224         runfolder = request['runf']
225         try:
226           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
227           cnfgfile = rec.config_params
228           #match_str = re.compile(r"READ_LENGTH.+$")
229           match_str = re.compile('^READ_LENGTH.+')
230           if not match_str.search(cnfgfile):
231             cnfgfile = generateConfile(request,fcid)
232             if match_str.search(cnfgfile):
233               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
234               rec.config_params = cnfgfile
235               rec.save()
236             else:
237               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
238             
239         except ObjectDoesNotExist:
240           cnfgfile = 'Entry not found for RunFolder = '+runfolder
241
242     return HttpResponse(cnfgfile, mimetype='text/plain')
243
244 def getLaneLibs(req):
245     granted = False
246     ClIP = req.META['REMOTE_ADDR']
247     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
248
249     if not granted: return HttpResponse("access denied.")
250
251     request = req.REQUEST
252     fcid = 'none'
253     outputfile = ''
254     if request.has_key('fcid'):
255       fcid = request['fcid']
256       try:                                
257         rec = FlowCell.objects.get(flowcell_id=fcid)
258         #Ex: 071211
259         year = datetime.today().year.__str__()
260         year = replace(year,'20','')
261         month = datetime.today().month
262         if month < 10: month = "0"+month.__str__()
263         else: month = month.__str__() 
264         day = datetime.today().day
265         if day < 10: day = "0"+day.__str__()
266         else: day = day.__str__()
267         mydate = year+month+day
268         outputfile = '<?xml version="1.0" ?>'
269         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
270         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
271         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
272         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
273         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
274         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
275         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
276         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
277         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
278         outputfile += '\n</SolexaResult>'
279       except ObjectDoesNotExist:
280         outputfile = 'Flowcell entry not found for: '+fcid
281     else: outputfile = 'Missing input: flowcell id'
282
283     return HttpResponse(outputfile, mimetype='text/plain')
284
285 def estimateFlowcellDuration(flowcell):
286     """
287     Attempt to estimate how long it will take to run a flowcell
288
289     """
290     # (3600 seconds * 1.5 hours per cycle )
291     sequencing_seconds_per_cycle= 3600 * 1.5
292     # 800 is a rough guess
293     pipeline_seconds_per_cycle = 800
294     
295     cycles = flowcell.read_length
296     if flowcell.paired_end:
297         cycles *= 2
298     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
299     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
300     estimate_mid = sequencing_time + analysis_time
301
302     return estimate_mid
303
304 def estimateFlowcellTimeRemaining(flowcell):
305     estimate_mid = estimateFlowcellDuration(flowcell)
306     
307     # offset for how long we've been running
308     running_time = datetime.now() - flowcell.run_date
309     estimate_mid -= running_time
310
311     return estimate_mid
312
313 def roundToDays(estimate):
314     """
315     Given a time estimate round up and down in days
316     """
317     # floor estimate_mid
318     estimate_low = timedelta(estimate.days, 0)
319     # floor estimate_mid and add a day
320     estimate_high = timedelta(estimate.days+1, 0)
321     
322     return (estimate_low, estimate_high)
323     
324
325 def makeUserLaneMap(flowcell):
326     """
327     Given a flowcell return a mapping of users interested in
328     the libraries on those lanes.
329     """
330     users = {}
331
332     for lane in flowcell.lane_set.all():
333         for affiliation in lane.library.affiliations.all():
334             for user in affiliation.users.all():
335                 users.setdefault(user,[]).append(lane)
336
337     return users
338
339 def getUsersForFlowcell(flowcell):
340     users = set()
341     
342     for lane in flowcell.lane_set.all():
343         for affiliation in lane.library.affiliations.all():
344             for user in affiliation.users.all():
345                 users.add(user)
346                 
347     return users
348     
349 def makeUserLibraryMap(libraries):
350     """
351     Given an interable set of libraries return a mapping or
352     users interested in those libraries.
353     """
354     users = {}
355     
356     for library in libraries:
357         for affiliation in library.affiliations.all():
358             for user in affiliation.users.all():
359                 users.setdefault(user,[]).append(library)
360                 
361     return users
362
363 def makeAffiliationLaneMap(flowcell):
364     affs = {}
365
366     for lane in flowcell.lane_set.all():
367         for affiliation in lane.library.affiliations.all():
368             affs.setdefault(affiliation,[]).append(lane)
369
370     return affs
371
372 def makeEmailLaneMap(flowcell):
373     """
374     Create a list of email addresses and the lanes associated with those users.
375
376     The email addresses can come from both the "users" table and the "affiliations" table.
377     """
378     emails = {}
379     for lane in flowcell.lane_set.all():
380         for affiliation in lane.library.affiliations.all():
381             if affiliation.email is not None and len(affiliation.email) > 0:
382                 emails.setdefault(affiliation.email,set()).add(lane)
383             for user in affiliation.users.all():
384                 if user.email is not None and len(user.email) > 0:
385                     emails.setdefault(user.email,set()).add(lane)
386
387     return emails