Turn the library_id back into the primary key for samples_library (SCHEMA CHANGE!)
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend import settings
17 from htsworkflow.frontend.experiments.models import FlowCell, DataRun
18 from htsworkflow.frontend.samples.models import Library
19 from htsworkflow.frontend.auth import require_api_key
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'experiment_type': lane.library.experiment_type.name,
36             'experiment_type_id': lane.library.experiment_type_id,
37             'flowcell': lane.flowcell.flowcell_id,
38             'lane_number': int(lane.lane_number),
39             'library_name': lane.library.library_name,
40             'library_id': lane.library.id,
41             'library_species': lane.library.library_species.scientific_name,
42             'pM': float(lane.pM),
43             'read_length': fc.read_length
44         }
45     info = {
46         'advanced_run': fc.advanced_run,
47         'cluster_station_id': fc.cluster_station_id,
48         'cluster_station': fc.cluster_station.name,
49         'control_lane': int(fc.control_lane),
50         # 'datarun_set': how should this be represented?,
51         'flowcell_id': fc.flowcell_id,
52         'id': fc.id,
53         'lane_set': lane_set,
54         'notes': fc.notes,
55         'paired_end': fc.paired_end,
56         'read_length': fc.read_length,
57         'run_date': fc.run_date.isoformat(),
58         'sequencer_id': fc.sequencer_id,
59         'sequencer': fc.sequencer.name,
60     }
61     
62     return info
63
64 def flowcell_json(request, fc_id):
65     """
66     Return a JSON blob containing enough information to generate a config file.
67     """
68     require_api_key(request)
69     
70     fc_dict = flowcell_information(fc_id)
71
72     if fc_dict is None:
73         raise Http404
74     
75     fc_json = json.dumps(fc_dict)
76     return HttpResponse(fc_json, mimetype = 'application/json')
77     
78 def updStatus(request):
79     output=''
80     user = 'none'
81     pswd = ''
82     UpdatedStatus = 'unknown'
83     fcid = 'none'
84     runfolder = 'unknown'
85     ClIP = request.META['REMOTE_ADDR']
86
87     if hasattr(request, 'user'):
88       user = request.user
89
90     #Check access permission
91     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
92         return HttpResponse("%s access denied from %s." % (user, ClIP))
93
94     # ~~~~~~Parameters for the job ~~~~
95     if request.REQUEST.has_key('fcid'):
96       fcid = request.REQUEST['fcid']
97     else:
98       return HttpResponse('missing fcid')
99     
100     if request.REQUEST.has_key('runf'):
101       runfolder = request.REQUEST['runf']
102     else:
103       return HttpResponse('missing runf')
104
105     
106     if request.REQUEST.has_key('updst'):
107       UpdatedStatus = request.REQUEST['updst']
108     else:
109       return HttpResponse('missing status')
110     
111     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
112
113     # Update Data Run status in DB
114     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
115     try:
116       rec = DataRun.objects.get(run_folder=runfolder)
117       rec.run_status = UpdatedStatus
118
119       #if there's a message update that too
120       mytimestamp = datetime.now().__str__()
121       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
122       if request.REQUEST.has_key('msg'):
123         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
124       else :
125         if UpdatedStatus == '1':
126           rec.run_note = "Started ("+mytimestamp+")"
127
128       rec.save()
129       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
130     except ObjectDoesNotExist:
131       output = "entry not found: "+fcid+", "+runfolder
132
133
134     #Notify researcher by email
135     # Doesn't work
136     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
137     #mail_admins("test subject", "testing , testing", fail_silently=False)
138     # gives error: (49, "Can't assign requested address")
139     return HttpResponse(output)
140
141 def generateConfile(request,fcid):
142     #granted = False
143     #ClIP = request.META['REMOTE_ADDR']
144     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
145
146     #if not granted: return HttpResponse("access denied.")
147
148     config = ['READ_LENGTH 25']
149     config += ['ANALYSIS eland']
150     config += ['GENOME_FILE all_chr.fa']
151     config += ['ELAND_MULTIPLE_INSTANCES 8']
152     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
153     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
154     
155     try:                                                                                                                                              
156       fc = FlowCell.objects.get(flowcell_id=fcid)
157       for lane in fc.lane_set.all():
158           config += [ str(lane.lane_number) +":" + \
159                       genome_dir + lane.library.library_species.scientific_name ]
160           config += [ str(lane.lane_number) +":" + \
161                       eland_genome + lane.library.library_species.scientific_name ]
162       
163     except ObjectDoesNotExist:
164       config = 'Entry not found for fcid  = '+fcid
165
166     return os.linesep.join(config)
167
168 def getConfile(req):
169     granted = False
170     ClIP = req.META['REMOTE_ADDR']
171     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
172
173     if not granted: return HttpResponse("access denied. IP: "+ClIP)
174
175     fcid = 'none'
176     cnfgfile = 'Nothing found'
177     runfolder = 'unknown'
178     request = req.REQUEST
179     if request.has_key('fcid'):
180       fcid = request['fcid']
181       if request.has_key('runf'):
182         runfolder = request['runf']
183         try:
184           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
185           cnfgfile = rec.config_params
186           #match_str = re.compile(r"READ_LENGTH.+$")
187           match_str = re.compile('^READ_LENGTH.+')
188           if not match_str.search(cnfgfile):
189             cnfgfile = generateConfile(request,fcid)
190             if match_str.search(cnfgfile):
191               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
192               rec.config_params = cnfgfile
193               rec.save()
194             else:
195               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
196             
197         except ObjectDoesNotExist:
198           cnfgfile = 'Entry not found for RunFolder = '+runfolder
199
200     return HttpResponse(cnfgfile, mimetype='text/plain')
201
202 def getLaneLibs(req):
203     granted = False
204     ClIP = req.META['REMOTE_ADDR']
205     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
206
207     if not granted: return HttpResponse("access denied.")
208
209     request = req.REQUEST
210     fcid = 'none'
211     outputfile = ''
212     if request.has_key('fcid'):
213       fcid = request['fcid']
214       try:                                
215         rec = FlowCell.objects.get(flowcell_id=fcid)
216         #Ex: 071211
217         year = datetime.today().year.__str__()
218         year = replace(year,'20','')
219         month = datetime.today().month
220         if month < 10: month = "0"+month.__str__()
221         else: month = month.__str__() 
222         day = datetime.today().day
223         if day < 10: day = "0"+day.__str__()
224         else: day = day.__str__()
225         mydate = year+month+day
226         outputfile = '<?xml version="1.0" ?>'
227         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
228         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
229         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
230         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
231         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
232         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
233         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
234         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
235         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
236         outputfile += '\n</SolexaResult>'
237       except ObjectDoesNotExist:
238         outputfile = 'Flowcell entry not found for: '+fcid
239     else: outputfile = 'Missing input: flowcell id'
240
241     return HttpResponse(outputfile, mimetype='text/plain')
242
243 def estimateFlowcellDuration(flowcell):
244     """
245     Attempt to estimate how long it will take to run a flowcell
246
247     """
248     # (3600 seconds * 1.5 hours per cycle )
249     sequencing_seconds_per_cycle= 3600 * 1.5
250     # 800 is a rough guess
251     pipeline_seconds_per_cycle = 800
252     
253     cycles = flowcell.read_length
254     if flowcell.paired_end:
255         cycles *= 2
256     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
257     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
258     estimate_mid = sequencing_time + analysis_time
259     # floor estimate_mid
260     estimate_low = timedelta(estimate_mid.days, 0)
261     # floor estimate_mid and add a day
262     estimate_high = timedelta(estimate_mid.days+1, 0)
263     
264     return (estimate_low, estimate_high)
265     
266
267 def makeUserLaneMap(flowcell):
268     """
269     Given a flowcell return a mapping of users interested in
270     the libraries on those lanes.
271     """
272     users = {}
273
274     for lane in flowcell.lane_set.all():
275         for affiliation in lane.library.affiliations.all():
276             for user in affiliation.users.all():
277                 users.setdefault(user,[]).append(lane)
278
279     return users
280
281 def getUsersForFlowcell(flowcell):
282     users = set()
283     
284     for lane in flowcell.lane_set.all():
285         for affiliation in lane.library.affiliations.all():
286             for user in affiliation.users.all():
287                 users.add(user)
288                 
289     return users
290     
291 def makeUserLibraryMap(libraries):
292     """
293     Given an interable set of libraries return a mapping or
294     users interested in those libraries.
295     """
296     users = {}
297     
298     for library in libraries:
299         for affiliation in library.affiliations.all():
300             for user in affiliation.users.all():
301                 users.setdefault(user,[]).append(library)
302                 
303     return users
304
305 def makeAffiliationLaneMap(flowcell):
306     affs = {}
307
308     for lane in flowcell.lane_set.all():
309         for affiliation in lane.library.affiliations.all():
310             affs.setdefault(affiliation,[]).append(lane)
311
312     return affs
313
314 def makeEmailLaneMap(flowcell):
315     """
316     Create a list of email addresses and the lanes associated with those users.
317
318     The email addresses can come from both the "users" table and the "affiliations" table.
319     """
320     emails = {}
321     for lane in flowcell.lane_set.all():
322         for affiliation in lane.library.affiliations.all():
323             if affiliation.email is not None and len(affiliation.email) > 0:
324                 emails.setdefault(affiliation.email,set()).add(lane)
325             for user in affiliation.users.all():
326                 if user.email is not None and len(user.email) > 0:
327                     emails.setdefault(user.email,set()).add(lane)
328
329     return emails