Add a /config/<fcid>/json url that returns information about a flowcell
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend import settings
17 from htsworkflow.frontend.experiments.models import FlowCell, DataRun
18 from htsworkflow.frontend.samples.models import Library
19
20 def flowcell_information(flowcell_id):
21     """
22     Return a dictionary describing a flowcell
23     """
24     try:
25         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
26     except FlowCell.DoesNotExist, e:
27         return None
28
29     lane_set = {}
30     for lane in fc.lane_set.all():
31         lane_set[lane.lane_number] = {
32             'cluster_estimate': lane.cluster_estimate,
33             'comment': lane.comment,
34             'flowcell': lane.flowcell.flowcell_id,
35             'lane_number': int(lane.lane_number),
36             'library_name': lane.library.library_name,
37             'library_id': lane.library_id,
38             'pM': float(lane.pM),
39         }
40     info = {
41         'advanced_run': fc.advanced_run,
42         'cluster_station_id': fc.cluster_station_id,
43         'cluster_station': fc.cluster_station.name,
44         'control_lane': int(fc.control_lane),
45         # 'datarun_set': how should this be represented?,
46         'flowcell_id': fc.flowcell_id,
47         'id': fc.id,
48         'lane_set': lane_set,
49         'notes': fc.notes,
50         'paired_end': fc.paired_end,
51         'read_length': fc.read_length,
52         'run_date': fc.run_date.isoformat(),
53         'sequencer_id': fc.sequencer_id,
54         'sequencer': fc.sequencer.name,
55     }
56     
57     return info
58
59 @login_required    
60 def flowcell_json(request, fc_id):
61     """
62     Return a JSON blob containing enough information to generate a config file.
63     """
64     fc_dict = flowcell_information(fc_id)
65
66     if fc_dict is None:
67         raise Http404
68     
69     fc_json = json.dumps(fc_dict)
70     return HttpResponse(fc_json, mimetype = 'application/json')
71     
72 def updStatus(request):
73     output=''
74     user = 'none'
75     pswd = ''
76     UpdatedStatus = 'unknown'
77     fcid = 'none'
78     runfolder = 'unknown'
79     ClIP = request.META['REMOTE_ADDR']
80
81     if hasattr(request, 'user'):
82       user = request.user
83
84     #Check access permission
85     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
86         return HttpResponse("%s access denied from %s." % (user, ClIP))
87
88     # ~~~~~~Parameters for the job ~~~~
89     if request.REQUEST.has_key('fcid'):
90       fcid = request.REQUEST['fcid']
91     else:
92       return HttpResponse('missing fcid')
93     
94     if request.REQUEST.has_key('runf'):
95       runfolder = request.REQUEST['runf']
96     else:
97       return HttpResponse('missing runf')
98
99     
100     if request.REQUEST.has_key('updst'):
101       UpdatedStatus = request.REQUEST['updst']
102     else:
103       return HttpResponse('missing status')
104     
105     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
106
107     # Update Data Run status in DB
108     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
109     try:
110       rec = DataRun.objects.get(run_folder=runfolder)
111       rec.run_status = UpdatedStatus
112
113       #if there's a message update that too
114       mytimestamp = datetime.now().__str__()
115       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
116       if request.REQUEST.has_key('msg'):
117         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
118       else :
119         if UpdatedStatus == '1':
120           rec.run_note = "Started ("+mytimestamp+")"
121
122       rec.save()
123       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
124     except ObjectDoesNotExist:
125       output = "entry not found: "+fcid+", "+runfolder
126
127
128     #Notify researcher by email
129     # Doesn't work
130     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
131     #mail_admins("test subject", "testing , testing", fail_silently=False)
132     # gives error: (49, "Can't assign requested address")
133     return HttpResponse(output)
134
135 def generateConfile(request,fcid):
136     #granted = False
137     #ClIP = request.META['REMOTE_ADDR']
138     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
139
140     #if not granted: return HttpResponse("access denied.")
141
142     config = ['READ_LENGTH 25']
143     config += ['ANALYSIS eland']
144     config += ['GENOME_FILE all_chr.fa']
145     config += ['ELAND_MULTIPLE_INSTANCES 8']
146     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
147     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
148     
149     try:                                                                                                                                              
150       fc = FlowCell.objects.get(flowcell_id=fcid)
151       for lane in fc.lane_set.all():
152           print dir(lane.library.library_species)
153           config += [ str(lane.lane_number) +":" + \
154                       genome_dir + lane.library.library_species.scientific_name ]
155           config += [ str(lane.lane_number) +":" + \
156                       eland_genome + lane.library.library_species.scientific_name ]
157       
158     except ObjectDoesNotExist:
159       config = 'Entry not found for fcid  = '+fcid
160
161     return os.linesep.join(config)
162
163 def getConfile(req):
164     granted = False
165     ClIP = req.META['REMOTE_ADDR']
166     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
167
168     if not granted: return HttpResponse("access denied. IP: "+ClIP)
169
170     fcid = 'none'
171     cnfgfile = 'Nothing found'
172     runfolder = 'unknown'
173     request = req.REQUEST
174     print request, dir(request)
175     print request['fcid'], request.has_key('fcid')
176     print request['runf']
177     if request.has_key('fcid'):
178       fcid = request['fcid']
179       if request.has_key('runf'):
180         runfolder = request['runf']
181         try:
182           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
183           cnfgfile = rec.config_params
184           #match_str = re.compile(r"READ_LENGTH.+$")
185           match_str = re.compile('^READ_LENGTH.+')
186           if not match_str.search(cnfgfile):
187             cnfgfile = generateConfile(request,fcid)
188             if match_str.search(cnfgfile):
189               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
190               rec.config_params = cnfgfile
191               rec.save()
192             else:
193               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
194             
195         except ObjectDoesNotExist:
196           cnfgfile = 'Entry not found for RunFolder = '+runfolder
197
198     return HttpResponse(cnfgfile, mimetype='text/plain')
199
200 def getLaneLibs(req):
201     granted = False
202     ClIP = req.META['REMOTE_ADDR']
203     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
204
205     if not granted: return HttpResponse("access denied.")
206
207     request = req.REQUEST
208     fcid = 'none'
209     outputfile = ''
210     if request.has_key('fcid'):
211       fcid = request['fcid']
212       try:                                
213         rec = FlowCell.objects.get(flowcell_id=fcid)
214         #Ex: 071211
215         year = datetime.today().year.__str__()
216         year = replace(year,'20','')
217         month = datetime.today().month
218         if month < 10: month = "0"+month.__str__()
219         else: month = month.__str__() 
220         day = datetime.today().day
221         if day < 10: day = "0"+day.__str__()
222         else: day = day.__str__()
223         mydate = year+month+day
224         outputfile = '<?xml version="1.0" ?>'
225         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
226         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.library_id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
227         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.library_id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
228         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.library_id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
229         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.library_id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
230         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.library_id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
231         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.library_id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
232         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.library_id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
233         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.library_id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
234         outputfile += '\n</SolexaResult>'
235       except ObjectDoesNotExist:
236         outputfile = 'Flowcell entry not found for: '+fcid
237     else: outputfile = 'Missing input: flowcell id'
238
239     return HttpResponse(outputfile, mimetype='text/plain')
240
241 def estimateFlowcellDuration(flowcell):
242     """
243     Attempt to estimate how long it will take to run a flowcell
244
245     """
246     # (3600 seconds * 1.5 hours per cycle )
247     sequencing_seconds_per_cycle= 3600 * 1.5
248     # 800 is a rough guess
249     pipeline_seconds_per_cycle = 800
250     
251     cycles = flowcell.read_length
252     if flowcell.paired_end:
253         cycles *= 2
254     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
255     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
256     estimate_mid = sequencing_time + analysis_time
257     # floor estimate_mid
258     estimate_low = timedelta(estimate_mid.days, 0)
259     # floor estimate_mid and add a day
260     estimate_high = timedelta(estimate_mid.days+1, 0)
261     
262     return (estimate_low, estimate_high)
263     
264
265 def makeUserLaneMap(flowcell):
266     """
267     Given a flowcell return a mapping of users interested in
268     the libraries on those lanes.
269     """
270     users = {}
271
272     for lane in flowcell.lane_set.all():
273         for affiliation in lane.library.affiliations.all():
274             for user in affiliation.users.all():
275                 users.setdefault(user,[]).append(lane)
276
277     return users
278
279 def getUsersForFlowcell(flowcell):
280     users = set()
281     
282     for lane in flowcell.lane_set.all():
283         for affiliation in lane.library.affiliations.all():
284             for user in affiliation.users.all():
285                 users.add(user)
286                 
287     return users
288     
289 def makeUserLibraryMap(libraries):
290     """
291     Given an interable set of libraries return a mapping or
292     users interested in those libraries.
293     """
294     users = {}
295     
296     for library in libraries:
297         for affiliation in library.affiliations.all():
298             for user in affiliation.users.all():
299                 users.setdefault(user,[]).append(library)
300                 
301     return users
302
303 def makeAffiliationLaneMap(flowcell):
304     affs = {}
305
306     for lane in flowcell.lane_set.all():
307         for affiliation in lane.library.affiliations.all():
308             affs.setdefault(affiliation,[]).append(lane)
309
310     return affs
311
312 def makeEmailLaneMap(flowcell):
313     """
314     Create a list of email addresses and the lanes associated with those users.
315
316     The email addresses can come from both the "users" table and the "affiliations" table.
317     """
318     emails = {}
319     for lane in flowcell.lane_set.all():
320         for affiliation in lane.library.affiliations.all():
321             if affiliation.email is not None and len(affiliation.email) > 0:
322                 emails.setdefault(affiliation.email,set()).add(lane)
323             for user in affiliation.users.all():
324                 if user.email is not None and len(user.email) > 0:
325                     emails.setdefault(user.email,set()).add(lane)
326
327     return emails