Add flowcell/lane information for a library to the rest hts api.
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend import settings
17 from htsworkflow.frontend.experiments.models import FlowCell, DataRun
18 from htsworkflow.frontend.samples.models import Library
19 from htsworkflow.frontend.auth import require_api_key
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'experiment_type': lane.library.experiment_type.name,
36             'experiment_type_id': lane.library.experiment_type_id,
37             'flowcell': lane.flowcell.flowcell_id,
38             'lane_number': int(lane.lane_number),
39             'library_name': lane.library.library_name,
40             'library_id': lane.library.id,
41             'library_species': lane.library.library_species.scientific_name,
42             'pM': unicode(lane.pM),
43             'read_length': lane.flowcell.read_length
44         }
45
46     if fc.control_lane is None:
47         control_lane = None
48     else:
49         control_lane = int(fc.control_lane)
50         
51     info = {
52         'advanced_run': fc.advanced_run,
53         'cluster_station_id': fc.cluster_station_id,
54         'cluster_station': fc.cluster_station.name,
55         'control_lane': control_lane,
56         # 'datarun_set': how should this be represented?,
57         'flowcell_id': fc.flowcell_id,
58         'id': fc.id,
59         'lane_set': lane_set,
60         'notes': fc.notes,
61         'paired_end': fc.paired_end,
62         'read_length': fc.read_length,
63         'run_date': fc.run_date.isoformat(),
64         'sequencer_id': fc.sequencer_id,
65         'sequencer': fc.sequencer.name,
66     }
67     
68     return info
69
70 def flowcell_json(request, fc_id):
71     """
72     Return a JSON blob containing enough information to generate a config file.
73     """
74     require_api_key(request)
75     
76     fc_dict = flowcell_information(fc_id)
77
78     if fc_dict is None:
79         raise Http404
80     
81     fc_json = json.dumps(fc_dict)
82     return HttpResponse(fc_json, mimetype = 'application/json')
83     
84 def updStatus(request):
85     output=''
86     user = 'none'
87     pswd = ''
88     UpdatedStatus = 'unknown'
89     fcid = 'none'
90     runfolder = 'unknown'
91     ClIP = request.META['REMOTE_ADDR']
92
93     if hasattr(request, 'user'):
94       user = request.user
95
96     #Check access permission
97     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
98         return HttpResponse("%s access denied from %s." % (user, ClIP))
99
100     # ~~~~~~Parameters for the job ~~~~
101     if request.REQUEST.has_key('fcid'):
102       fcid = request.REQUEST['fcid']
103     else:
104       return HttpResponse('missing fcid')
105     
106     if request.REQUEST.has_key('runf'):
107       runfolder = request.REQUEST['runf']
108     else:
109       return HttpResponse('missing runf')
110
111     
112     if request.REQUEST.has_key('updst'):
113       UpdatedStatus = request.REQUEST['updst']
114     else:
115       return HttpResponse('missing status')
116     
117     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
118
119     # Update Data Run status in DB
120     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
121     try:
122       rec = DataRun.objects.get(run_folder=runfolder)
123       rec.run_status = UpdatedStatus
124
125       #if there's a message update that too
126       mytimestamp = datetime.now().__str__()
127       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
128       if request.REQUEST.has_key('msg'):
129         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
130       else :
131         if UpdatedStatus == '1':
132           rec.run_note = "Started ("+mytimestamp+")"
133
134       rec.save()
135       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
136     except ObjectDoesNotExist:
137       output = "entry not found: "+fcid+", "+runfolder
138
139
140     #Notify researcher by email
141     # Doesn't work
142     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
143     #mail_admins("test subject", "testing , testing", fail_silently=False)
144     # gives error: (49, "Can't assign requested address")
145     return HttpResponse(output)
146
147 def generateConfile(request,fcid):
148     #granted = False
149     #ClIP = request.META['REMOTE_ADDR']
150     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
151
152     #if not granted: return HttpResponse("access denied.")
153
154     config = ['READ_LENGTH 25']
155     config += ['ANALYSIS eland']
156     config += ['GENOME_FILE all_chr.fa']
157     config += ['ELAND_MULTIPLE_INSTANCES 8']
158     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
159     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
160     
161     try:                                                                                                                                              
162       fc = FlowCell.objects.get(flowcell_id=fcid)
163       for lane in fc.lane_set.all():
164           config += [ str(lane.lane_number) +":" + \
165                       genome_dir + lane.library.library_species.scientific_name ]
166           config += [ str(lane.lane_number) +":" + \
167                       eland_genome + lane.library.library_species.scientific_name ]
168       
169     except ObjectDoesNotExist:
170       config = 'Entry not found for fcid  = '+fcid
171
172     return os.linesep.join(config)
173
174 def getConfile(req):
175     granted = False
176     ClIP = req.META['REMOTE_ADDR']
177     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
178
179     if not granted: return HttpResponse("access denied. IP: "+ClIP)
180
181     fcid = 'none'
182     cnfgfile = 'Nothing found'
183     runfolder = 'unknown'
184     request = req.REQUEST
185     if request.has_key('fcid'):
186       fcid = request['fcid']
187       if request.has_key('runf'):
188         runfolder = request['runf']
189         try:
190           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
191           cnfgfile = rec.config_params
192           #match_str = re.compile(r"READ_LENGTH.+$")
193           match_str = re.compile('^READ_LENGTH.+')
194           if not match_str.search(cnfgfile):
195             cnfgfile = generateConfile(request,fcid)
196             if match_str.search(cnfgfile):
197               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
198               rec.config_params = cnfgfile
199               rec.save()
200             else:
201               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
202             
203         except ObjectDoesNotExist:
204           cnfgfile = 'Entry not found for RunFolder = '+runfolder
205
206     return HttpResponse(cnfgfile, mimetype='text/plain')
207
208 def getLaneLibs(req):
209     granted = False
210     ClIP = req.META['REMOTE_ADDR']
211     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
212
213     if not granted: return HttpResponse("access denied.")
214
215     request = req.REQUEST
216     fcid = 'none'
217     outputfile = ''
218     if request.has_key('fcid'):
219       fcid = request['fcid']
220       try:                                
221         rec = FlowCell.objects.get(flowcell_id=fcid)
222         #Ex: 071211
223         year = datetime.today().year.__str__()
224         year = replace(year,'20','')
225         month = datetime.today().month
226         if month < 10: month = "0"+month.__str__()
227         else: month = month.__str__() 
228         day = datetime.today().day
229         if day < 10: day = "0"+day.__str__()
230         else: day = day.__str__()
231         mydate = year+month+day
232         outputfile = '<?xml version="1.0" ?>'
233         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
234         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
235         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
236         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
237         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
238         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
239         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
240         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
241         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
242         outputfile += '\n</SolexaResult>'
243       except ObjectDoesNotExist:
244         outputfile = 'Flowcell entry not found for: '+fcid
245     else: outputfile = 'Missing input: flowcell id'
246
247     return HttpResponse(outputfile, mimetype='text/plain')
248
249 def estimateFlowcellDuration(flowcell):
250     """
251     Attempt to estimate how long it will take to run a flowcell
252
253     """
254     # (3600 seconds * 1.5 hours per cycle )
255     sequencing_seconds_per_cycle= 3600 * 1.5
256     # 800 is a rough guess
257     pipeline_seconds_per_cycle = 800
258     
259     cycles = flowcell.read_length
260     if flowcell.paired_end:
261         cycles *= 2
262     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
263     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
264     estimate_mid = sequencing_time + analysis_time
265
266     return estimate_mid
267
268 def estimateFlowcellTimeRemaining(flowcell):
269     estimate_mid = estimateFlowcellDuration(flowcell)
270     
271     # offset for how long we've been running
272     running_time = datetime.now() - flowcell.run_date
273     estimate_mid -= running_time
274
275     return estimate_mid
276
277 def roundToDays(estimate):
278     """
279     Given a time estimate round up and down in days
280     """
281     # floor estimate_mid
282     estimate_low = timedelta(estimate.days, 0)
283     # floor estimate_mid and add a day
284     estimate_high = timedelta(estimate.days+1, 0)
285     
286     return (estimate_low, estimate_high)
287     
288
289 def makeUserLaneMap(flowcell):
290     """
291     Given a flowcell return a mapping of users interested in
292     the libraries on those lanes.
293     """
294     users = {}
295
296     for lane in flowcell.lane_set.all():
297         for affiliation in lane.library.affiliations.all():
298             for user in affiliation.users.all():
299                 users.setdefault(user,[]).append(lane)
300
301     return users
302
303 def getUsersForFlowcell(flowcell):
304     users = set()
305     
306     for lane in flowcell.lane_set.all():
307         for affiliation in lane.library.affiliations.all():
308             for user in affiliation.users.all():
309                 users.add(user)
310                 
311     return users
312     
313 def makeUserLibraryMap(libraries):
314     """
315     Given an interable set of libraries return a mapping or
316     users interested in those libraries.
317     """
318     users = {}
319     
320     for library in libraries:
321         for affiliation in library.affiliations.all():
322             for user in affiliation.users.all():
323                 users.setdefault(user,[]).append(library)
324                 
325     return users
326
327 def makeAffiliationLaneMap(flowcell):
328     affs = {}
329
330     for lane in flowcell.lane_set.all():
331         for affiliation in lane.library.affiliations.all():
332             affs.setdefault(affiliation,[]).append(lane)
333
334     return affs
335
336 def makeEmailLaneMap(flowcell):
337     """
338     Create a list of email addresses and the lanes associated with those users.
339
340     The email addresses can come from both the "users" table and the "affiliations" table.
341     """
342     emails = {}
343     for lane in flowcell.lane_set.all():
344         for affiliation in lane.library.affiliations.all():
345             if affiliation.email is not None and len(affiliation.email) > 0:
346                 emails.setdefault(affiliation.email,set()).add(lane)
347             for user in affiliation.users.all():
348                 if user.email is not None and len(user.email) > 0:
349                     emails.setdefault(user.email,set()).add(lane)
350
351     return emails