Return affiliation, library name, and comment in the lanes_for json
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend.auth import require_api_key
17 from htsworkflow.frontend import settings
18 from htsworkflow.frontend.experiments.models import FlowCell, DataRun, Lane
19 from htsworkflow.frontend.samples.models import Library, HTSUser
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'experiment_type': lane.library.experiment_type.name,
36             'experiment_type_id': lane.library.experiment_type_id,
37             'flowcell': lane.flowcell.flowcell_id,
38             'lane_number': int(lane.lane_number),
39             'library_name': lane.library.library_name,
40             'library_id': lane.library.id,
41             'library_species': lane.library.library_species.scientific_name,
42             'pM': unicode(lane.pM),
43             'read_length': lane.flowcell.read_length
44         }
45
46     if fc.control_lane is None:
47         control_lane = None
48     else:
49         control_lane = int(fc.control_lane)
50         
51     info = {
52         'advanced_run': fc.advanced_run,
53         'cluster_station_id': fc.cluster_station_id,
54         'cluster_station': fc.cluster_station.name,
55         'control_lane': control_lane,
56         # 'datarun_set': how should this be represented?,
57         'flowcell_id': fc.flowcell_id,
58         'id': fc.id,
59         'lane_set': lane_set,
60         'notes': fc.notes,
61         'paired_end': fc.paired_end,
62         'read_length': fc.read_length,
63         'run_date': fc.run_date.isoformat(),
64         'sequencer_id': fc.sequencer_id,
65         'sequencer': fc.sequencer.name,
66     }
67     
68     return info
69
70 def flowcell_json(request, fc_id):
71     """
72     Return a JSON blob containing enough information to generate a config file.
73     """
74     require_api_key(request)
75     
76     fc_dict = flowcell_information(fc_id)
77
78     if fc_dict is None:
79         raise Http404
80     
81     fc_json = json.dumps(fc_dict)
82     return HttpResponse(fc_json, mimetype = 'application/json')
83
84 def lanes_for(username=None):
85     """
86     Given a user id try to return recent lanes as a list of dictionaries
87     """
88     query = {}
89     if username is not None:
90         user = HTSUser.objects.get(username=username)        
91         query.update({'library__affiliations__users__id': user.id})
92         
93     lanes = Lane.objects.filter(**query).order_by('-flowcell__run_date')
94
95     
96     result = []
97     for l in lanes:
98         affiliations = l.library.affiliations.all()
99         affiliations_list = [(a.id, a.name) for a in affiliations]
100         result.append({ 'flowcell': l.flowcell.flowcell_id,
101                         'run_date': l.flowcell.run_date.isoformat(),
102                         'lane_number': l.lane_number,
103                         'library': l.library.id,
104                         'library_name': l.library.library_name,
105                         'comment': l.comment,
106                         'affiliations': affiliations_list})
107     return result
108
109 def lanes_for_json(request, username):
110     """
111     Format lanes for a user
112     """
113     require_api_key(request)
114
115     try:
116         result = lanes_for(username)
117     except ObjectDoesNotExist, e:
118         raise Http404
119     
120     #convert query set to python structure
121     
122     result_json = json.dumps(result)
123     return HttpResponse(result_json, mimetype='application/json')
124                  
125 def updStatus(request):
126     output=''
127     user = 'none'
128     pswd = ''
129     UpdatedStatus = 'unknown'
130     fcid = 'none'
131     runfolder = 'unknown'
132     ClIP = request.META['REMOTE_ADDR']
133
134     if hasattr(request, 'user'):
135       user = request.user
136
137     #Check access permission
138     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
139         return HttpResponse("%s access denied from %s." % (user, ClIP))
140
141     # ~~~~~~Parameters for the job ~~~~
142     if request.REQUEST.has_key('fcid'):
143       fcid = request.REQUEST['fcid']
144     else:
145       return HttpResponse('missing fcid')
146     
147     if request.REQUEST.has_key('runf'):
148       runfolder = request.REQUEST['runf']
149     else:
150       return HttpResponse('missing runf')
151
152     
153     if request.REQUEST.has_key('updst'):
154       UpdatedStatus = request.REQUEST['updst']
155     else:
156       return HttpResponse('missing status')
157     
158     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
159
160     # Update Data Run status in DB
161     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
162     try:
163       rec = DataRun.objects.get(run_folder=runfolder)
164       rec.run_status = UpdatedStatus
165
166       #if there's a message update that too
167       mytimestamp = datetime.now().__str__()
168       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
169       if request.REQUEST.has_key('msg'):
170         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
171       else :
172         if UpdatedStatus == '1':
173           rec.run_note = "Started ("+mytimestamp+")"
174
175       rec.save()
176       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
177     except ObjectDoesNotExist:
178       output = "entry not found: "+fcid+", "+runfolder
179
180
181     #Notify researcher by email
182     # Doesn't work
183     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
184     #mail_admins("test subject", "testing , testing", fail_silently=False)
185     # gives error: (49, "Can't assign requested address")
186     return HttpResponse(output)
187
188 def generateConfile(request,fcid):
189     #granted = False
190     #ClIP = request.META['REMOTE_ADDR']
191     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
192
193     #if not granted: return HttpResponse("access denied.")
194
195     config = ['READ_LENGTH 25']
196     config += ['ANALYSIS eland']
197     config += ['GENOME_FILE all_chr.fa']
198     config += ['ELAND_MULTIPLE_INSTANCES 8']
199     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
200     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
201     
202     try:                                                                                                                                              
203       fc = FlowCell.objects.get(flowcell_id=fcid)
204       for lane in fc.lane_set.all():
205           config += [ str(lane.lane_number) +":" + \
206                       genome_dir + lane.library.library_species.scientific_name ]
207           config += [ str(lane.lane_number) +":" + \
208                       eland_genome + lane.library.library_species.scientific_name ]
209       
210     except ObjectDoesNotExist:
211       config = 'Entry not found for fcid  = '+fcid
212
213     return os.linesep.join(config)
214
215 def getConfile(req):
216     granted = False
217     ClIP = req.META['REMOTE_ADDR']
218     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
219
220     if not granted: return HttpResponse("access denied. IP: "+ClIP)
221
222     fcid = 'none'
223     cnfgfile = 'Nothing found'
224     runfolder = 'unknown'
225     request = req.REQUEST
226     if request.has_key('fcid'):
227       fcid = request['fcid']
228       if request.has_key('runf'):
229         runfolder = request['runf']
230         try:
231           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
232           cnfgfile = rec.config_params
233           #match_str = re.compile(r"READ_LENGTH.+$")
234           match_str = re.compile('^READ_LENGTH.+')
235           if not match_str.search(cnfgfile):
236             cnfgfile = generateConfile(request,fcid)
237             if match_str.search(cnfgfile):
238               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
239               rec.config_params = cnfgfile
240               rec.save()
241             else:
242               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
243             
244         except ObjectDoesNotExist:
245           cnfgfile = 'Entry not found for RunFolder = '+runfolder
246
247     return HttpResponse(cnfgfile, mimetype='text/plain')
248
249 def getLaneLibs(req):
250     granted = False
251     ClIP = req.META['REMOTE_ADDR']
252     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
253
254     if not granted: return HttpResponse("access denied.")
255
256     request = req.REQUEST
257     fcid = 'none'
258     outputfile = ''
259     if request.has_key('fcid'):
260       fcid = request['fcid']
261       try:                                
262         rec = FlowCell.objects.get(flowcell_id=fcid)
263         #Ex: 071211
264         year = datetime.today().year.__str__()
265         year = replace(year,'20','')
266         month = datetime.today().month
267         if month < 10: month = "0"+month.__str__()
268         else: month = month.__str__() 
269         day = datetime.today().day
270         if day < 10: day = "0"+day.__str__()
271         else: day = day.__str__()
272         mydate = year+month+day
273         outputfile = '<?xml version="1.0" ?>'
274         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
275         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
276         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
277         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
278         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
279         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
280         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
281         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
282         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
283         outputfile += '\n</SolexaResult>'
284       except ObjectDoesNotExist:
285         outputfile = 'Flowcell entry not found for: '+fcid
286     else: outputfile = 'Missing input: flowcell id'
287
288     return HttpResponse(outputfile, mimetype='text/plain')
289
290 def estimateFlowcellDuration(flowcell):
291     """
292     Attempt to estimate how long it will take to run a flowcell
293
294     """
295     # (3600 seconds * 1.5 hours per cycle )
296     sequencing_seconds_per_cycle= 3600 * 1.5
297     # 800 is a rough guess
298     pipeline_seconds_per_cycle = 800
299     
300     cycles = flowcell.read_length
301     if flowcell.paired_end:
302         cycles *= 2
303     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
304     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
305     estimate_mid = sequencing_time + analysis_time
306
307     return estimate_mid
308
309 def estimateFlowcellTimeRemaining(flowcell):
310     estimate_mid = estimateFlowcellDuration(flowcell)
311     
312     # offset for how long we've been running
313     running_time = datetime.now() - flowcell.run_date
314     estimate_mid -= running_time
315
316     return estimate_mid
317
318 def roundToDays(estimate):
319     """
320     Given a time estimate round up and down in days
321     """
322     # floor estimate_mid
323     estimate_low = timedelta(estimate.days, 0)
324     # floor estimate_mid and add a day
325     estimate_high = timedelta(estimate.days+1, 0)
326     
327     return (estimate_low, estimate_high)
328     
329
330 def makeUserLaneMap(flowcell):
331     """
332     Given a flowcell return a mapping of users interested in
333     the libraries on those lanes.
334     """
335     users = {}
336
337     for lane in flowcell.lane_set.all():
338         for affiliation in lane.library.affiliations.all():
339             for user in affiliation.users.all():
340                 users.setdefault(user,[]).append(lane)
341
342     return users
343
344 def getUsersForFlowcell(flowcell):
345     users = set()
346     
347     for lane in flowcell.lane_set.all():
348         for affiliation in lane.library.affiliations.all():
349             for user in affiliation.users.all():
350                 users.add(user)
351                 
352     return users
353     
354 def makeUserLibraryMap(libraries):
355     """
356     Given an interable set of libraries return a mapping or
357     users interested in those libraries.
358     """
359     users = {}
360     
361     for library in libraries:
362         for affiliation in library.affiliations.all():
363             for user in affiliation.users.all():
364                 users.setdefault(user,[]).append(library)
365                 
366     return users
367
368 def makeAffiliationLaneMap(flowcell):
369     affs = {}
370
371     for lane in flowcell.lane_set.all():
372         for affiliation in lane.library.affiliations.all():
373             affs.setdefault(affiliation,[]).append(lane)
374
375     return affs
376
377 def makeEmailLaneMap(flowcell):
378     """
379     Create a list of email addresses and the lanes associated with those users.
380
381     The email addresses can come from both the "users" table and the "affiliations" table.
382     """
383     emails = {}
384     for lane in flowcell.lane_set.all():
385         for affiliation in lane.library.affiliations.all():
386             if affiliation.email is not None and len(affiliation.email) > 0:
387                 emails.setdefault(affiliation.email,set()).add(lane)
388             for user in affiliation.users.all():
389                 if user.email is not None and len(user.email) > 0:
390                     emails.setdefault(user.email,set()).add(lane)
391
392     return emails