Return species information as part of the flowcell json information.
[htsworkflow.git] / htsworkflow / frontend / experiments / experiments.py
1 # some core functions of the exp tracker module
2 from datetime import datetime, timedelta
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7     
8 import os
9 import re
10
11 from django.contrib.auth.decorators import login_required
12 from django.core.exceptions import ObjectDoesNotExist
13 from django.core.mail import send_mail, mail_admins
14 from django.http import HttpResponse, Http404
15
16 from htsworkflow.frontend import settings
17 from htsworkflow.frontend.experiments.models import FlowCell, DataRun
18 from htsworkflow.frontend.samples.models import Library
19 from htsworkflow.frontend.auth import require_api_key
20
21 def flowcell_information(flowcell_id):
22     """
23     Return a dictionary describing a flowcell
24     """
25     try:
26         fc = FlowCell.objects.get(flowcell_id=flowcell_id)
27     except FlowCell.DoesNotExist, e:
28         return None
29
30     lane_set = {}
31     for lane in fc.lane_set.all():
32         lane_set[lane.lane_number] = {
33             'cluster_estimate': lane.cluster_estimate,
34             'comment': lane.comment,
35             'flowcell': lane.flowcell.flowcell_id,
36             'lane_number': int(lane.lane_number),
37             'library_name': lane.library.library_name,
38             'library_id': lane.library.library_id,
39             'library_species': lane.library.library_species.scientific_name,
40             'pM': float(lane.pM),
41         }
42     info = {
43         'advanced_run': fc.advanced_run,
44         'cluster_station_id': fc.cluster_station_id,
45         'cluster_station': fc.cluster_station.name,
46         'control_lane': int(fc.control_lane),
47         # 'datarun_set': how should this be represented?,
48         'flowcell_id': fc.flowcell_id,
49         'id': fc.id,
50         'lane_set': lane_set,
51         'notes': fc.notes,
52         'paired_end': fc.paired_end,
53         'read_length': fc.read_length,
54         'run_date': fc.run_date.isoformat(),
55         'sequencer_id': fc.sequencer_id,
56         'sequencer': fc.sequencer.name,
57     }
58     
59     return info
60
61 def flowcell_json(request, fc_id):
62     """
63     Return a JSON blob containing enough information to generate a config file.
64     """
65     require_api_key(request)
66     
67     fc_dict = flowcell_information(fc_id)
68
69     if fc_dict is None:
70         raise Http404
71     
72     fc_json = json.dumps(fc_dict)
73     return HttpResponse(fc_json, mimetype = 'application/json')
74     
75 def updStatus(request):
76     output=''
77     user = 'none'
78     pswd = ''
79     UpdatedStatus = 'unknown'
80     fcid = 'none'
81     runfolder = 'unknown'
82     ClIP = request.META['REMOTE_ADDR']
83
84     if hasattr(request, 'user'):
85       user = request.user
86
87     #Check access permission
88     if not (user.is_superuser and settings.ALLOWED_IPS.has_key(ClIP)): 
89         return HttpResponse("%s access denied from %s." % (user, ClIP))
90
91     # ~~~~~~Parameters for the job ~~~~
92     if request.REQUEST.has_key('fcid'):
93       fcid = request.REQUEST['fcid']
94     else:
95       return HttpResponse('missing fcid')
96     
97     if request.REQUEST.has_key('runf'):
98       runfolder = request.REQUEST['runf']
99     else:
100       return HttpResponse('missing runf')
101
102     
103     if request.REQUEST.has_key('updst'):
104       UpdatedStatus = request.REQUEST['updst']
105     else:
106       return HttpResponse('missing status')
107     
108     # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 
109
110     # Update Data Run status in DB
111     # Try get rec. If not found return 'entry not found + <fcid><runfolder>', if found try update and return updated 
112     try:
113       rec = DataRun.objects.get(run_folder=runfolder)
114       rec.run_status = UpdatedStatus
115
116       #if there's a message update that too
117       mytimestamp = datetime.now().__str__()
118       mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp)
119       if request.REQUEST.has_key('msg'):
120         rec.run_note += ", "+request.REQUEST['msg']+" ("+mytimestamp+")"
121       else :
122         if UpdatedStatus == '1':
123           rec.run_note = "Started ("+mytimestamp+")"
124
125       rec.save()
126       output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'"
127     except ObjectDoesNotExist:
128       output = "entry not found: "+fcid+", "+runfolder
129
130
131     #Notify researcher by email
132     # Doesn't work
133     #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False)
134     #mail_admins("test subject", "testing , testing", fail_silently=False)
135     # gives error: (49, "Can't assign requested address")
136     return HttpResponse(output)
137
138 def generateConfile(request,fcid):
139     #granted = False
140     #ClIP = request.META['REMOTE_ADDR']
141     #if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
142
143     #if not granted: return HttpResponse("access denied.")
144
145     config = ['READ_LENGTH 25']
146     config += ['ANALYSIS eland']
147     config += ['GENOME_FILE all_chr.fa']
148     config += ['ELAND_MULTIPLE_INSTANCES 8']
149     genome_dir = 'GENOME_DIR /Volumes/Genomes/'
150     eland_genome = 'ELAND_GENOME /Volumes/Genomes/'
151     
152     try:                                                                                                                                              
153       fc = FlowCell.objects.get(flowcell_id=fcid)
154       for lane in fc.lane_set.all():
155           print dir(lane.library.library_species)
156           config += [ str(lane.lane_number) +":" + \
157                       genome_dir + lane.library.library_species.scientific_name ]
158           config += [ str(lane.lane_number) +":" + \
159                       eland_genome + lane.library.library_species.scientific_name ]
160       
161     except ObjectDoesNotExist:
162       config = 'Entry not found for fcid  = '+fcid
163
164     return os.linesep.join(config)
165
166 def getConfile(req):
167     granted = False
168     ClIP = req.META['REMOTE_ADDR']
169     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
170
171     if not granted: return HttpResponse("access denied. IP: "+ClIP)
172
173     fcid = 'none'
174     cnfgfile = 'Nothing found'
175     runfolder = 'unknown'
176     request = req.REQUEST
177     print request, dir(request)
178     print request['fcid'], request.has_key('fcid')
179     print request['runf']
180     if request.has_key('fcid'):
181       fcid = request['fcid']
182       if request.has_key('runf'):
183         runfolder = request['runf']
184         try:
185           rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
186           cnfgfile = rec.config_params
187           #match_str = re.compile(r"READ_LENGTH.+$")
188           match_str = re.compile('^READ_LENGTH.+')
189           if not match_str.search(cnfgfile):
190             cnfgfile = generateConfile(request,fcid)
191             if match_str.search(cnfgfile):
192               rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid)
193               rec.config_params = cnfgfile
194               rec.save()
195             else:
196               cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile  
197             
198         except ObjectDoesNotExist:
199           cnfgfile = 'Entry not found for RunFolder = '+runfolder
200
201     return HttpResponse(cnfgfile, mimetype='text/plain')
202
203 def getLaneLibs(req):
204     granted = False
205     ClIP = req.META['REMOTE_ADDR']
206     if (settings.ALLOWED_IPS.has_key(ClIP)):  granted = True
207
208     if not granted: return HttpResponse("access denied.")
209
210     request = req.REQUEST
211     fcid = 'none'
212     outputfile = ''
213     if request.has_key('fcid'):
214       fcid = request['fcid']
215       try:                                
216         rec = FlowCell.objects.get(flowcell_id=fcid)
217         #Ex: 071211
218         year = datetime.today().year.__str__()
219         year = replace(year,'20','')
220         month = datetime.today().month
221         if month < 10: month = "0"+month.__str__()
222         else: month = month.__str__() 
223         day = datetime.today().day
224         if day < 10: day = "0"+day.__str__()
225         else: day = day.__str__()
226         mydate = year+month+day
227         outputfile = '<?xml version="1.0" ?>'
228         outputfile += '\n<SolexaResult Date="'+mydate+'" Flowcell="'+fcid+'" Client="'+settings.ALLOWED_IPS[ClIP]+'">'
229         outputfile += '\n<Lane Index="1" Name="'+rec.lane_1_library.library_name+'" Library="'+rec.lane_1_library.library_id+'" Genome="'+rec.lane_1_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
230         outputfile += '\n<Lane Index="2" Name="'+rec.lane_2_library.library_name+'" Library="'+rec.lane_2_library.library_id+'" Genome="'+rec.lane_2_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
231         outputfile += '\n<Lane Index="3" Name="'+rec.lane_3_library.library_name+'" Library="'+rec.lane_3_library.library_id+'" Genome="'+rec.lane_3_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
232         outputfile += '\n<Lane Index="4" Name="'+rec.lane_4_library.library_name+'" Library="'+rec.lane_4_library.library_id+'" Genome="'+rec.lane_4_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
233         outputfile += '\n<Lane Index="5" Name="'+rec.lane_5_library.library_name+'" Library="'+rec.lane_5_library.library_id+'" Genome="'+rec.lane_5_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
234         outputfile += '\n<Lane Index="6" Name="'+rec.lane_6_library.library_name+'" Library="'+rec.lane_6_library.library_id+'" Genome="'+rec.lane_6_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
235         outputfile += '\n<Lane Index="7" Name="'+rec.lane_7_library.library_name+'" Library="'+rec.lane_7_library.library_id+'" Genome="'+rec.lane_7_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
236         outputfile += '\n<Lane Index="8" Name="'+rec.lane_8_library.library_name+'" Library="'+rec.lane_8_library.library_id+'" Genome="'+rec.lane_8_library.library_species.use_genome_build+'" PrimerName="" PrimerSeq=""/>'
237         outputfile += '\n</SolexaResult>'
238       except ObjectDoesNotExist:
239         outputfile = 'Flowcell entry not found for: '+fcid
240     else: outputfile = 'Missing input: flowcell id'
241
242     return HttpResponse(outputfile, mimetype='text/plain')
243
244 def estimateFlowcellDuration(flowcell):
245     """
246     Attempt to estimate how long it will take to run a flowcell
247
248     """
249     # (3600 seconds * 1.5 hours per cycle )
250     sequencing_seconds_per_cycle= 3600 * 1.5
251     # 800 is a rough guess
252     pipeline_seconds_per_cycle = 800
253     
254     cycles = flowcell.read_length
255     if flowcell.paired_end:
256         cycles *= 2
257     sequencing_time = timedelta(0, cycles * sequencing_seconds_per_cycle)
258     analysis_time = timedelta(0, cycles * pipeline_seconds_per_cycle)
259     estimate_mid = sequencing_time + analysis_time
260     # floor estimate_mid
261     estimate_low = timedelta(estimate_mid.days, 0)
262     # floor estimate_mid and add a day
263     estimate_high = timedelta(estimate_mid.days+1, 0)
264     
265     return (estimate_low, estimate_high)
266     
267
268 def makeUserLaneMap(flowcell):
269     """
270     Given a flowcell return a mapping of users interested in
271     the libraries on those lanes.
272     """
273     users = {}
274
275     for lane in flowcell.lane_set.all():
276         for affiliation in lane.library.affiliations.all():
277             for user in affiliation.users.all():
278                 users.setdefault(user,[]).append(lane)
279
280     return users
281
282 def getUsersForFlowcell(flowcell):
283     users = set()
284     
285     for lane in flowcell.lane_set.all():
286         for affiliation in lane.library.affiliations.all():
287             for user in affiliation.users.all():
288                 users.add(user)
289                 
290     return users
291     
292 def makeUserLibraryMap(libraries):
293     """
294     Given an interable set of libraries return a mapping or
295     users interested in those libraries.
296     """
297     users = {}
298     
299     for library in libraries:
300         for affiliation in library.affiliations.all():
301             for user in affiliation.users.all():
302                 users.setdefault(user,[]).append(library)
303                 
304     return users
305
306 def makeAffiliationLaneMap(flowcell):
307     affs = {}
308
309     for lane in flowcell.lane_set.all():
310         for affiliation in lane.library.affiliations.all():
311             affs.setdefault(affiliation,[]).append(lane)
312
313     return affs
314
315 def makeEmailLaneMap(flowcell):
316     """
317     Create a list of email addresses and the lanes associated with those users.
318
319     The email addresses can come from both the "users" table and the "affiliations" table.
320     """
321     emails = {}
322     for lane in flowcell.lane_set.all():
323         for affiliation in lane.library.affiliations.all():
324             if affiliation.email is not None and len(affiliation.email) > 0:
325                 emails.setdefault(affiliation.email,set()).add(lane)
326             for user in affiliation.users.all():
327                 if user.email is not None and len(user.email) > 0:
328                     emails.setdefault(user.email,set()).add(lane)
329
330     return emails