Add support for tracking the multiplex index sequence.
[htsworkflow.git] / htsworkflow / frontend / experiments / tests.py
1 import re
2 from BeautifulSoup import BeautifulSoup
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7 import os
8 import shutil
9 import sys
10 import tempfile
11
12 from django.conf import settings
13 from django.core import mail
14 from django.core.exceptions import ObjectDoesNotExist
15 from django.test import TestCase
16 from htsworkflow.frontend.experiments import models
17 from htsworkflow.frontend.experiments import experiments
18 from htsworkflow.frontend.auth import apidata
19
20 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
21
22 LANE_SET = range(1, 9)
23
24
25 class ExperimentsTestCases(TestCase):
26     fixtures = ['test_flowcells.json']
27
28     def setUp(self):
29         self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
30         settings.RESULT_HOME_DIR = self.tempdir
31
32         self.fc1_id = 'FC12150'
33         self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
34         os.mkdir(self.fc1_root)
35         self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
36         os.mkdir(self.fc1_dir)
37         runxml = 'run_FC12150_2007-09-27.xml'
38         shutil.copy(os.path.join(TESTDATA_DIR, runxml),
39                     os.path.join(self.fc1_dir, runxml))
40         for i in range(1, 9):
41             shutil.copy(
42                 os.path.join(TESTDATA_DIR,
43                              'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
44                 os.path.join(self.fc1_dir,
45                              'woldlab_070829_SERIAL_FC12150_%d.srf' % (i, ))
46                 )
47
48         self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
49         os.mkdir(self.fc2_dir)
50         os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
51         os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
52         os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
53
54     def tearDown(self):
55         shutil.rmtree(self.tempdir)
56
57     def test_flowcell_information(self):
58         """
59         Check the code that packs the django objects into simple types.
60         """
61         for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
62             fc_dict = experiments.flowcell_information(fc_id)
63             fc_django = models.FlowCell.objects.get(flowcell_id=fc_id)
64             self.failUnlessEqual(fc_dict['flowcell_id'], fc_id)
65             self.failUnlessEqual(fc_django.flowcell_id, fc_id)
66             self.failUnlessEqual(fc_dict['sequencer'],
67                                  fc_django.sequencer.name)
68             self.failUnlessEqual(fc_dict['read_length'],
69                                  fc_django.read_length)
70             self.failUnlessEqual(fc_dict['notes'], fc_django.notes)
71             self.failUnlessEqual(fc_dict['cluster_station'],
72                                  fc_django.cluster_station.name)
73
74             for lane in fc_django.lane_set.all():
75                 lane_contents = fc_dict['lane_set'][lane.lane_number]
76                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
77                 self.failUnlessEqual(lane_dict['cluster_estimate'],
78                                      lane.cluster_estimate)
79                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
80                 self.failUnlessEqual(lane_dict['flowcell'],
81                                      lane.flowcell.flowcell_id)
82                 self.failUnlessEqual(lane_dict['lane_number'],
83                                      lane.lane_number)
84                 self.failUnlessEqual(lane_dict['library_name'],
85                                      lane.library.library_name)
86                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
87                 self.failUnlessAlmostEqual(float(lane_dict['pM']),
88                                            float(lane.pM))
89                 self.failUnlessEqual(lane_dict['library_species'],
90                      lane.library.library_species.scientific_name)
91
92             flowcell_url = '/experiments/config/%s/json'
93             response = self.client.get(flowcell_url % (fc_id,), apidata)
94             # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
95             fc_json = json.loads(response.content)
96             self.failUnlessEqual(fc_json['flowcell_id'], fc_id)
97             self.failUnlessEqual(fc_json['sequencer'],
98                                  fc_django.sequencer.name)
99             self.failUnlessEqual(fc_json['read_length'], fc_django.read_length)
100             self.failUnlessEqual(fc_json['notes'], fc_django.notes)
101             self.failUnlessEqual(fc_json['cluster_station'],
102                                  fc_django.cluster_station.name)
103
104             for lane in fc_django.lane_set.all():
105                 lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
106                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
107
108                 self.failUnlessEqual(lane_dict['cluster_estimate'],
109                                      lane.cluster_estimate)
110                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
111                 self.failUnlessEqual(lane_dict['flowcell'],
112                                      lane.flowcell.flowcell_id)
113                 self.failUnlessEqual(lane_dict['lane_number'],
114                                      lane.lane_number)
115                 self.failUnlessEqual(lane_dict['library_name'],
116                                      lane.library.library_name)
117                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
118                 self.failUnlessAlmostEqual(float(lane_dict['pM']),
119                                            float(lane.pM))
120                 self.failUnlessEqual(lane_dict['library_species'],
121                      lane.library.library_species.scientific_name)
122
123     def test_invalid_flowcell(self):
124         """
125         Make sure we get a 404 if we request an invalid flowcell ID
126         """
127         flowcell_url = '/experiments/config/nottheone/json'
128         response = self.client.get(flowcell_url, apidata)
129         self.failUnlessEqual(response.status_code, 404)
130
131     def test_no_key(self):
132         """
133         Require logging in to retrieve meta data
134         """
135         response = self.client.get(u'/experiments/config/FC12150/json')
136         self.failUnlessEqual(response.status_code, 403)
137
138     def test_library_id(self):
139         """
140         Library IDs should be flexible, retrive a non-numeric ID
141         """
142         flowcell_url = '/experiments/config/FC12150/json'
143         response = self.client.get(flowcell_url, apidata)
144         self.failUnlessEqual(response.status_code, 200)
145         flowcell = json.loads(response.content)
146
147         lane_contents = flowcell['lane_set']['3']
148         lane_library = lane_contents[0]
149         self.failUnlessEqual(lane_library['library_id'], 'SL039')
150
151         response = self.client.get('/samples/library/SL039/json', apidata)
152         self.failUnlessEqual(response.status_code, 200)
153         library_sl039 = json.loads(response.content)
154
155         self.failUnlessEqual(library_sl039['library_id'], 'SL039')
156
157     def test_raw_id_field(self):
158         """
159         Test ticket:147
160
161         Library's have IDs, libraries also have primary keys,
162         we eventually had enough libraries that the drop down combo box was too
163         hard to filter through, unfortnately we want a field that uses our
164         library id and not the internal primary key, and raw_id_field uses
165         primary keys.
166
167         This tests to make sure that the value entered in the raw library id
168         field matches the library id looked up.
169         """
170         expected_ids = [u'10981', u'11016', u'SL039', u'11060',
171                         u'11061', u'11062', u'11063', u'11064']
172         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
173         response = self.client.get('/admin/experiments/flowcell/153/')
174         soup = BeautifulSoup(response.content)
175         for i in range(0, 8):
176             input_field = soup.find(id='id_lane_set-%d-library' % (i,))
177             library_field = input_field.findNext('strong')
178             library_id, library_name = library_field.text.split(':')
179             # strip leading '#' sign from name
180             library_id = library_id[1:]
181             self.failUnlessEqual(library_id, expected_ids[i])
182             self.failUnlessEqual(input_field['value'], library_id)
183
184     def test_library_to_flowcell_link(self):
185         """
186         Make sure the library page includes links to the flowcell pages.
187         That work with flowcell IDs that have parenthetical comments.
188         """
189         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
190         response = self.client.get('/library/11070/')
191         soup = BeautifulSoup(response.content)
192         failed_fc_span = soup.find(text='30012AAXX (failed)')
193         failed_fc_a = failed_fc_span.findPrevious('a')
194         # make sure some of our RDF made it.
195         self.failUnlessEqual(failed_fc_a.get('rel'), 'libns:flowcell')
196         self.failUnlessEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/')
197         fc_response = self.client.get(failed_fc_a.get('href'))
198         self.failUnlessEqual(fc_response.status_code, 200)
199         fc_lane_response = self.client.get('/flowcell/30012AAXX/8/')
200         self.failUnlessEqual(fc_lane_response.status_code, 200)
201
202     def test_pooled_multiplex_id(self):
203         fc_dict = experiments.flowcell_information('42JU1AAXX')
204         lane_contents = fc_dict['lane_set'][3]
205         self.assertEqual(len(lane_contents), 2)
206         lane_dict = multi_lane_to_dict(lane_contents)
207
208         self.assertEqual(lane_dict['12044']['index_sequence'],
209                          {u'1': u'ATCACG',
210                           u'2': u'CGATGT',
211                           u'3': u'TTAGGC'})
212         self.assertEqual(lane_dict['11045']['index_sequence'],
213                          {u'1': u'ATCACG'})
214
215     def test_lanes_for(self):
216         """
217         Check the code that packs the django objects into simple types.
218         """
219         user = 'test'
220         lanes = experiments.lanes_for(user)
221         self.failUnlessEqual(len(lanes), 5)
222
223         flowcell_url = '/experiments/lanes_for/%s/json'
224         response = self.client.get(flowcell_url % (user,), apidata)
225         lanes_json = json.loads(response.content)
226         self.failUnlessEqual(len(lanes), len(lanes_json))
227         for i in range(len(lanes)):
228             self.failUnlessEqual(lanes[i]['comment'], lanes_json[i]['comment'])
229             self.failUnlessEqual(lanes[i]['lane_number'],
230                                  lanes_json[i]['lane_number'])
231             self.failUnlessEqual(lanes[i]['flowcell'],
232                                  lanes_json[i]['flowcell'])
233             self.failUnlessEqual(lanes[i]['run_date'],
234                                  lanes_json[i]['run_date'])
235
236     def test_lanes_for_no_lanes(self):
237         """
238         What happens to user who haven't submitted anything
239         """
240         user = 'supertest'
241         lanes = experiments.lanes_for(user)
242         self.failUnlessEqual(len(lanes), 0)
243
244         url = '/experiments/lanes_for/%s/json'
245         response = self.client.get(url % (user,), apidata)
246         lanes_json = json.loads(response.content)
247
248     def test_lanes_for_no_user(self):
249         """
250         Do we get something meaningful back when its the wrong user
251         """
252         user = 'not a real user'
253         self.failUnlessRaises(ObjectDoesNotExist, experiments.lanes_for, user)
254
255         url = '/experiments/lanes_for/%s/json'
256         response = self.client.get(url % (user,), apidata)
257         self.failUnlessEqual(response.status_code, 404)
258
259     def test_raw_data_dir(self):
260         """Raw data path generator check"""
261         flowcell_id = self.fc1_id
262         raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
263
264         fc = models.FlowCell.objects.get(flowcell_id=flowcell_id)
265         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
266
267         fc.flowcell_id = flowcell_id + " (failed)"
268         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
269
270     def test_data_run_import(self):
271         srf_file_type = models.FileType.objects.get(name='SRF')
272         runxml_file_type = models.FileType.objects.get(name='run_xml')
273         flowcell_id = self.fc1_id
274         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
275         flowcell.update_data_runs()
276         self.failUnlessEqual(len(flowcell.datarun_set.all()), 1)
277
278         run = flowcell.datarun_set.all()[0]
279         result_files = run.datafile_set.all()
280         result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
281
282         srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
283         self.failUnlessEqual(srf4.file_type, srf_file_type)
284         self.failUnlessEqual(srf4.library_id, '11060')
285         self.failUnlessEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
286         self.failUnlessEqual(
287             srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
288             '11060')
289         self.failUnlessEqual(
290             srf4.pathname,
291             os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
292
293         lane_files = run.lane_files()
294         self.failUnlessEqual(lane_files[4]['srf'], srf4)
295
296         runxml = result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
297         self.failUnlessEqual(runxml.file_type, runxml_file_type)
298         self.failUnlessEqual(runxml.library_id, None)
299
300     def test_read_result_file(self):
301         """make sure we can return a result file
302         """
303         flowcell_id = self.fc1_id
304         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
305         flowcell.update_data_runs()
306
307         #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
308
309         result_files = flowcell.datarun_set.all()[0].datafile_set.all()
310         for f in result_files:
311             url = '/experiments/file/%s' % (f.random_key, )
312             response = self.client.get(url)
313             self.failUnlessEqual(response.status_code, 200)
314             mimetype = f.file_type.mimetype
315             if mimetype is None:
316                 mimetype = 'application/octet-stream'
317
318             self.failUnlessEqual(mimetype, response['content-type'])
319
320
321 class TestFileType(TestCase):
322     def test_file_type_unicode(self):
323         file_type_objects = models.FileType.objects
324         name = 'QSEQ tarfile'
325         file_type_object = file_type_objects.get(name=name)
326         self.failUnlessEqual(u"<FileType: QSEQ tarfile>",
327                              unicode(file_type_object))
328
329
330 class TestFileType(TestCase):
331     def test_find_file_type(self):
332         file_type_objects = models.FileType.objects
333         cases = [('woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
334                   'QSEQ tarfile', 7, 1),
335                  ('woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
336                   'SRF', 1, None),
337                  ('s_1_eland_extended.txt.bz2', 'ELAND Extended', 1, None),
338                  ('s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
339                  ('s_3_eland_result.txt.bz2', 'ELAND Result', 3, None),
340                  ('s_1_export.txt.bz2','ELAND Export', 1, None),
341                  ('s_1_percent_call.png', 'IVC Percent Call', 1, None),
342                  ('s_2_percent_base.png', 'IVC Percent Base', 2, None),
343                  ('s_3_percent_all.png', 'IVC Percent All', 3, None),
344                  ('s_4_call.png', 'IVC Call', 4, None),
345                  ('s_5_all.png', 'IVC All', 5, None),
346                  ('Summary.htm', 'Summary.htm', None, None),
347                  ('run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
348          ]
349         for filename, typename, lane, end in cases:
350             ft = models.find_file_type_metadata_from_filename(filename)
351             self.failUnlessEqual(ft['file_type'],
352                                  file_type_objects.get(name=typename))
353             self.failUnlessEqual(ft.get('lane', None), lane)
354             self.failUnlessEqual(ft.get('end', None), end)
355
356     def test_assign_file_type_complex_path(self):
357         file_type_objects = models.FileType.objects
358         cases = [('/a/b/c/woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
359                   'QSEQ tarfile', 7, 1),
360                  ('foo/woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
361                   'SRF', 1, None),
362                  ('../s_1_eland_extended.txt.bz2', 'ELAND Extended', 1, None),
363                  ('/bleem/s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
364                  ('/qwer/s_3_eland_result.txt.bz2', 'ELAND Result', 3, None),
365                  ('/ty///1/s_1_export.txt.bz2', 'ELAND Export', 1, None),
366                  ('/help/s_1_percent_call.png', 'IVC Percent Call', 1, None),
367                  ('/bored/s_2_percent_base.png', 'IVC Percent Base', 2, None),
368                  ('/example1/s_3_percent_all.png', 'IVC Percent All', 3, None),
369                  ('amonkey/s_4_call.png', 'IVC Call', 4, None),
370                  ('fishie/s_5_all.png', 'IVC All', 5, None),
371                  ('/random/Summary.htm', 'Summary.htm', None, None),
372                  ('/notrandom/run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
373          ]
374         for filename, typename, lane, end in cases:
375             result = models.find_file_type_metadata_from_filename(filename)
376             self.failUnlessEqual(result['file_type'],
377                                  file_type_objects.get(name=typename))
378             self.failUnlessEqual(result.get('lane', None), lane)
379             self.failUnlessEqual(result.get('end', None), end)
380
381
382 class TestEmailNotify(TestCase):
383     fixtures = ['test_flowcells.json']
384
385     def test_started_email_not_logged_in(self):
386         response = self.client.get('/experiments/started/153/')
387         self.failUnlessEqual(response.status_code, 302)
388
389     def test_started_email_logged_in_user(self):
390         self.client.login(username='test', password='BJOKL5kAj6aFZ6A5')
391         response = self.client.get('/experiments/started/153/')
392         self.failUnlessEqual(response.status_code, 302)
393
394     def test_started_email_logged_in_staff(self):
395         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
396         response = self.client.get('/experiments/started/153/')
397         self.failUnlessEqual(response.status_code, 200)
398
399     def test_started_email_send(self):
400         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
401         response = self.client.get('/experiments/started/153/')
402         self.failUnlessEqual(response.status_code, 200)
403
404         self.failUnless('pk1@example.com' in response.content)
405         self.failUnless('Lane #8 : (11064) Paired ends 104' in response.content)
406
407         response = self.client.get('/experiments/started/153/',
408                                    {'send': '1','bcc': 'on'})
409         self.failUnlessEqual(response.status_code, 200)
410         self.failUnlessEqual(len(mail.outbox), 4)
411         for m in mail.outbox:
412             self.failUnless(len(m.body) > 0)
413
414     def test_email_navigation(self):
415         """
416         Can we navigate between the flowcell and email forms properly?
417         """
418         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
419         response = self.client.get('/experiments/started/153/')
420         self.failUnlessEqual(response.status_code, 200)
421         self.failUnless(re.search('Flowcell FC12150', response.content))
422         # require that navigation back to the admin page exists
423         flowcell_a_re = '<a href="/admin/experiments/flowcell/153/">[^<]+</a>'
424         self.failUnless(re.search(flowcell_a_re, response.content))
425
426
427 def multi_lane_to_dict(lane):
428     """Convert a list of lane entries into a dictionary indexed by library ID
429     """
430     return dict(((x['library_id'], x) for x in lane))