Convert Rami's DataRun feature to something useful to us.
[htsworkflow.git] / htsworkflow / frontend / experiments / tests.py
1 import re
2 from BeautifulSoup import BeautifulSoup
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7 import os
8 import shutil
9 import sys
10 import tempfile
11
12 from django.conf import settings
13 from django.core import mail
14 from django.core.exceptions import ObjectDoesNotExist
15 from django.test import TestCase
16 from htsworkflow.frontend.experiments import models
17 from htsworkflow.frontend.experiments import experiments
18 from htsworkflow.frontend.auth import apidata
19
20 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
21
22 LANE_SET = range(1,9)
23
24 class ExperimentsTestCases(TestCase):
25     fixtures = ['test_flowcells.json']
26
27     def setUp(self):
28         self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
29         settings.RESULT_HOME_DIR = self.tempdir
30
31         self.fc1_id = 'FC12150'
32         self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
33         os.mkdir(self.fc1_root)
34         self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
35         os.mkdir(self.fc1_dir)
36         runxml = 'run_FC12150_2007-09-27.xml'
37         shutil.copy(os.path.join(TESTDATA_DIR, runxml),
38                     os.path.join(self.fc1_dir, runxml))
39         for i in range(1,9):
40             shutil.copy(
41                 os.path.join(TESTDATA_DIR,
42                              'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
43                 os.path.join(self.fc1_dir,
44                              'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
45                 )
46         
47         self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
48         os.mkdir(self.fc2_dir)
49         os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
50         os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
51         os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
52
53     def tearDown(self):
54         shutil.rmtree(self.tempdir)
55
56     def test_flowcell_information(self):
57         """
58         Check the code that packs the django objects into simple types.
59         """
60         for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
61             fc_dict = experiments.flowcell_information(fc_id)
62             fc_django = models.FlowCell.objects.get(flowcell_id=fc_id)
63             self.failUnlessEqual(fc_dict['flowcell_id'], fc_id)
64             self.failUnlessEqual(fc_django.flowcell_id, fc_id)
65             self.failUnlessEqual(fc_dict['sequencer'], fc_django.sequencer.name)
66             self.failUnlessEqual(fc_dict['read_length'], fc_django.read_length)
67             self.failUnlessEqual(fc_dict['notes'], fc_django.notes)
68             self.failUnlessEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
69
70             for lane in fc_django.lane_set.all():
71                 lane_dict = fc_dict['lane_set'][lane.lane_number]
72                 self.failUnlessEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
73                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
74                 self.failUnlessEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
75                 self.failUnlessEqual(lane_dict['lane_number'], lane.lane_number)
76                 self.failUnlessEqual(lane_dict['library_name'], lane.library.library_name)
77                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
78                 self.failUnlessAlmostEqual(float(lane_dict['pM']), float(lane.pM))
79                 self.failUnlessEqual(lane_dict['library_species'],
80                                      lane.library.library_species.scientific_name)
81                     
82             response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
83             # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
84             fc_json = json.loads(response.content)
85             self.failUnlessEqual(fc_json['flowcell_id'], fc_id)
86             self.failUnlessEqual(fc_json['sequencer'], fc_django.sequencer.name)
87             self.failUnlessEqual(fc_json['read_length'], fc_django.read_length)
88             self.failUnlessEqual(fc_json['notes'], fc_django.notes)
89             self.failUnlessEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
90
91
92             for lane in fc_django.lane_set.all():
93                 lane_dict = fc_json['lane_set'][unicode(lane.lane_number)]
94                 self.failUnlessEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
95                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
96                 self.failUnlessEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
97                 self.failUnlessEqual(lane_dict['lane_number'], lane.lane_number)
98                 self.failUnlessEqual(lane_dict['library_name'], lane.library.library_name)
99                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
100                 self.failUnlessAlmostEqual(float(lane_dict['pM']), float(lane.pM))
101                 self.failUnlessEqual(lane_dict['library_species'],
102                                      lane.library.library_species.scientific_name)
103
104     def test_invalid_flowcell(self):
105         """
106         Make sure we get a 404 if we request an invalid flowcell ID
107         """
108         response = self.client.get('/experiments/config/nottheone/json', apidata)
109         self.failUnlessEqual(response.status_code, 404)
110
111     def test_no_key(self):
112         """
113         Require logging in to retrieve meta data
114         """
115         response = self.client.get(u'/experiments/config/FC12150/json')
116         self.failUnlessEqual(response.status_code, 403)
117
118     def test_library_id(self):
119         """
120         Library IDs should be flexible, so make sure we can retrive a non-numeric ID
121         """
122         response = self.client.get('/experiments/config/FC12150/json', apidata)
123         self.failUnlessEqual(response.status_code, 200)
124         flowcell = json.loads(response.content)
125
126         self.failUnlessEqual(flowcell['lane_set']['3']['library_id'], 'SL039')
127
128         response = self.client.get('/samples/library/SL039/json', apidata)
129         self.failUnlessEqual(response.status_code, 200)
130         library_sl039 = json.loads(response.content)
131
132         self.failUnlessEqual(library_sl039['library_id'], 'SL039')
133
134     def test_raw_id_field(self):
135         """
136         Test ticket:147
137
138         Library's have IDs, libraries also have primary keys,
139         we eventually had enough libraries that the drop down combo box was too
140         hard to filter through, unfortnately we want a field that uses our library
141         id and not the internal primary key, and raw_id_field uses primary keys.
142
143         This tests to make sure that the value entered in the raw library id field matches
144         the library id looked up.
145         """
146         expected_ids = [u'10981',u'11016',u'SL039',u'11060',
147                         u'11061',u'11062',u'11063',u'11064']
148         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
149         response = self.client.get('/admin/experiments/flowcell/153/')
150         soup = BeautifulSoup(response.content)
151         for i in range(0,8):
152             input_field = soup.find(id='id_lane_set-%d-library' % (i,))
153             library_field = input_field.findNext('strong')
154             library_id, library_name = library_field.string.split(':')
155             # strip leading '#' sign from name
156             library_id = library_id[1:]
157             self.failUnlessEqual(library_id, expected_ids[i])
158             self.failUnlessEqual(input_field['value'], library_id)
159
160     def test_lanes_for(self):
161         """
162         Check the code that packs the django objects into simple types.
163         """
164         user = 'test'
165         lanes = experiments.lanes_for(user)
166         self.failUnlessEqual(len(lanes), 5)
167
168         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
169         lanes_json = json.loads(response.content)
170         self.failUnlessEqual(len(lanes), len(lanes_json))
171         for i in range(len(lanes)):
172             self.failUnlessEqual(lanes[i]['comment'], lanes_json[i]['comment'])
173             self.failUnlessEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
174             self.failUnlessEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
175             self.failUnlessEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
176             
177     def test_lanes_for_no_lanes(self):
178         """
179         Do we get something meaningful back when the user isn't attached to anything?
180         """
181         user = 'supertest'
182         lanes = experiments.lanes_for(user)
183         self.failUnlessEqual(len(lanes), 0)
184
185         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
186         lanes_json = json.loads(response.content)
187
188     def test_lanes_for_no_user(self):
189         """
190         Do we get something meaningful back when its the wrong user
191         """
192         user = 'not a real user'
193         self.failUnlessRaises(ObjectDoesNotExist, experiments.lanes_for, user)
194
195         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
196         self.failUnlessEqual(response.status_code, 404)
197
198
199     def test_raw_data_dir(self):
200         """Raw data path generator check"""
201         flowcell_id = self.fc1_id
202         raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
203         
204         fc = models.FlowCell.objects.get(flowcell_id=flowcell_id)
205         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
206
207         fc.flowcell_id = flowcell_id + " (failed)"
208         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
209
210
211     def test_data_run_import(self):
212         srf_file_type = models.FileType.objects.get(name='SRF')
213         runxml_file_type = models.FileType.objects.get(name='run_xml')
214         flowcell_id = self.fc1_id
215         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
216         flowcell.update_data_runs()
217         self.failUnlessEqual(len(flowcell.datarun_set.all()), 1)
218
219         run = flowcell.datarun_set.all()[0]
220         result_files = run.datafile_set.all()
221         result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
222
223         srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
224         self.failUnlessEqual(srf4.file_type, srf_file_type)
225         self.failUnlessEqual(srf4.library_id, '11060')
226         self.failUnlessEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
227         self.failUnlessEqual(
228             srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
229             '11060')
230         self.failUnlessEqual(
231             srf4.pathname,
232             os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
233
234         lane_files = run.lane_files()
235         self.failUnlessEqual(lane_files[4]['srf'], srf4)
236
237         runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
238         self.failUnlessEqual(runxml.file_type, runxml_file_type)
239         self.failUnlessEqual(runxml.library_id, None)
240             
241
242     def test_read_result_file(self):
243         """make sure we can return a result file
244         """
245         flowcell_id = self.fc1_id
246         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
247         flowcell.update_data_runs()
248         
249         #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5') 
250
251         result_files = flowcell.datarun_set.all()[0].datafile_set.all()
252         for f in result_files:
253             url = '/experiments/file/%s' % ( f.random_key,)
254             response = self.client.get(url)
255             self.failUnlessEqual(response.status_code, 200)
256             mimetype = f.file_type.mimetype
257             if mimetype is None:
258                 mimetype = 'application/octet-stream'
259             
260             self.failUnlessEqual(mimetype, response['content-type'])
261         
262 class TestFileType(TestCase):
263     def test_file_type_unicode(self):
264         file_type_objects = models.FileType.objects
265         name = 'QSEQ tarfile'
266         file_type_object = file_type_objects.get(name=name)
267         self.failUnlessEqual(u"<FileType: QSEQ tarfile>",
268                              unicode(file_type_object))
269     
270 class TestFileType(TestCase):
271     def test_find_file_type(self):
272         file_type_objects = models.FileType.objects
273         cases = [('woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
274                   'QSEQ tarfile', 7, 1),
275                  ('woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
276                   'SRF', 1, None),
277                  ('s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
278                  ('s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
279                  ('s_3_eland_result.txt.bz2','ELAND Result', 3, None),
280                  ('s_1_export.txt.bz2','ELAND Export', 1, None),
281                  ('s_1_percent_call.png', 'IVC Percent Call', 1, None),
282                  ('s_2_percent_base.png', 'IVC Percent Base', 2, None),
283                  ('s_3_percent_all.png', 'IVC Percent All', 3, None),
284                  ('s_4_call.png', 'IVC Call', 4, None),
285                  ('s_5_all.png', 'IVC All', 5, None),
286                  ('Summary.htm', 'Summary.htm', None, None),
287                  ('run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
288          ]
289         for filename, typename, lane, end in cases:
290             ft = models.find_file_type_metadata_from_filename(filename)
291             self.failUnlessEqual(ft['file_type'],
292                                  file_type_objects.get(name=typename))
293             self.failUnlessEqual(ft.get('lane', None), lane)
294             self.failUnlessEqual(ft.get('end', None), end)
295
296     def test_assign_file_type_complex_path(self):
297         file_type_objects = models.FileType.objects
298         cases = [('/a/b/c/woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
299                   'QSEQ tarfile', 7, 1),
300                  ('foo/woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
301                   'SRF', 1, None),
302                  ('../s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
303                  ('/bleem/s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
304                  ('/qwer/s_3_eland_result.txt.bz2','ELAND Result', 3, None),
305                  ('/ty///1/s_1_export.txt.bz2','ELAND Export', 1, None),
306                  ('/help/s_1_percent_call.png', 'IVC Percent Call', 1, None),
307                  ('/bored/s_2_percent_base.png', 'IVC Percent Base', 2, None),
308                  ('/example1/s_3_percent_all.png', 'IVC Percent All', 3, None),
309                  ('amonkey/s_4_call.png', 'IVC Call', 4, None),
310                  ('fishie/s_5_all.png', 'IVC All', 5, None),
311                  ('/random/Summary.htm', 'Summary.htm', None, None),
312                  ('/notrandom/run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
313          ]
314         for filename, typename, lane, end in cases:
315             result = models.find_file_type_metadata_from_filename(filename)
316             self.failUnlessEqual(result['file_type'],
317                                  file_type_objects.get(name=typename))
318             self.failUnlessEqual(result.get('lane',None), lane)
319             self.failUnlessEqual(result.get('end', None), end)
320                              
321 class TestEmailNotify(TestCase):
322     fixtures = ['test_flowcells.json']
323
324     def test_started_email_not_logged_in(self):
325         response = self.client.get('/experiments/started/153/')
326         self.failUnlessEqual(response.status_code, 302)
327
328     def test_started_email_logged_in_user(self):
329         self.client.login(username='test', password='BJOKL5kAj6aFZ6A5')
330         response = self.client.get('/experiments/started/153/')
331         self.failUnlessEqual(response.status_code, 302)
332         
333     def test_started_email_logged_in_staff(self):
334         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5') 
335         response = self.client.get('/experiments/started/153/')
336         self.failUnlessEqual(response.status_code, 200)
337
338     def test_started_email_send(self):
339         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5') 
340         response = self.client.get('/experiments/started/153/')
341         self.failUnlessEqual(response.status_code, 200)
342         
343         self.failUnless('pk1@example.com' in response.content)
344         self.failUnless('Lane #8 : (11064) Paired ends 104' in response.content)
345
346         response = self.client.get('/experiments/started/153/', {'send':'1','bcc':'on'})
347         self.failUnlessEqual(response.status_code, 200)
348         self.failUnlessEqual(len(mail.outbox), 4)
349         for m in mail.outbox:
350             self.failUnless(len(m.body) > 0)
351
352     def test_email_navigation(self):
353         """
354         Can we navigate between the flowcell and email forms properly?
355         """
356         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5') 
357         response = self.client.get('/experiments/started/153/')
358         self.failUnlessEqual(response.status_code, 200)
359         self.failUnless(re.search('Flowcell FC12150', response.content))
360         # require that navigation back to the admin page exists
361         self.failUnless(re.search('<a href="/admin/experiments/flowcell/153/">[^<]+</a>', response.content))
362         
363