e54d630497e2f3b9c412b7364f50d06dcf659cd3
[htsworkflow.git] / htsworkflow / frontend / experiments / tests.py
1 import re
2 from lxml.html import fromstring
3 try:
4     import json
5 except ImportError, e:
6     import simplejson as json
7 import os
8 import shutil
9 import sys
10 import tempfile
11
12 from django.conf import settings
13 from django.core import mail
14 from django.core.exceptions import ObjectDoesNotExist
15 from django.test import TestCase
16 from htsworkflow.frontend.experiments import models
17 from htsworkflow.frontend.experiments import experiments
18 from htsworkflow.frontend.auth import apidata
19
20 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
21
22 LANE_SET = range(1,9)
23
24 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
25
26 class ExperimentsTestCases(TestCase):
27     fixtures = ['test_flowcells.json',
28                 ]
29
30     def setUp(self):
31         self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
32         settings.RESULT_HOME_DIR = self.tempdir
33
34         self.fc1_id = 'FC12150'
35         self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
36         os.mkdir(self.fc1_root)
37         self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
38         os.mkdir(self.fc1_dir)
39         runxml = 'run_FC12150_2007-09-27.xml'
40         shutil.copy(os.path.join(TESTDATA_DIR, runxml),
41                     os.path.join(self.fc1_dir, runxml))
42         for i in range(1,9):
43             shutil.copy(
44                 os.path.join(TESTDATA_DIR,
45                              'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
46                 os.path.join(self.fc1_dir,
47                              'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
48                 )
49
50         self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
51         os.mkdir(self.fc2_dir)
52         os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
53         os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
54         os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
55
56     def tearDown(self):
57         shutil.rmtree(self.tempdir)
58
59     def test_flowcell_information(self):
60         """
61         Check the code that packs the django objects into simple types.
62         """
63         for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
64             fc_dict = experiments.flowcell_information(fc_id)
65             fc_django = models.FlowCell.objects.get(flowcell_id=fc_id)
66             self.failUnlessEqual(fc_dict['flowcell_id'], fc_id)
67             self.failUnlessEqual(fc_django.flowcell_id, fc_id)
68             self.failUnlessEqual(fc_dict['sequencer'], fc_django.sequencer.name)
69             self.failUnlessEqual(fc_dict['read_length'], fc_django.read_length)
70             self.failUnlessEqual(fc_dict['notes'], fc_django.notes)
71             self.failUnlessEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
72
73             for lane in fc_django.lane_set.all():
74                 lane_contents = fc_dict['lane_set'][lane.lane_number]
75                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
76                 self.failUnlessEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
77                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
78                 self.failUnlessEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
79                 self.failUnlessEqual(lane_dict['lane_number'], lane.lane_number)
80                 self.failUnlessEqual(lane_dict['library_name'], lane.library.library_name)
81                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
82                 self.failUnlessAlmostEqual(float(lane_dict['pM']), float(lane.pM))
83                 self.failUnlessEqual(lane_dict['library_species'],
84                                      lane.library.library_species.scientific_name)
85
86             response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
87             # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
88             fc_json = json.loads(response.content)
89             self.failUnlessEqual(fc_json['flowcell_id'], fc_id)
90             self.failUnlessEqual(fc_json['sequencer'], fc_django.sequencer.name)
91             self.failUnlessEqual(fc_json['read_length'], fc_django.read_length)
92             self.failUnlessEqual(fc_json['notes'], fc_django.notes)
93             self.failUnlessEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
94
95
96             for lane in fc_django.lane_set.all():
97                 lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
98                 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
99
100                 self.failUnlessEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
101                 self.failUnlessEqual(lane_dict['comment'], lane.comment)
102                 self.failUnlessEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
103                 self.failUnlessEqual(lane_dict['lane_number'], lane.lane_number)
104                 self.failUnlessEqual(lane_dict['library_name'], lane.library.library_name)
105                 self.failUnlessEqual(lane_dict['library_id'], lane.library.id)
106                 self.failUnlessAlmostEqual(float(lane_dict['pM']), float(lane.pM))
107                 self.failUnlessEqual(lane_dict['library_species'],
108                                      lane.library.library_species.scientific_name)
109
110     def test_invalid_flowcell(self):
111         """
112         Make sure we get a 404 if we request an invalid flowcell ID
113         """
114         response = self.client.get('/experiments/config/nottheone/json', apidata)
115         self.failUnlessEqual(response.status_code, 404)
116
117     def test_no_key(self):
118         """
119         Require logging in to retrieve meta data
120         """
121         response = self.client.get(u'/experiments/config/FC12150/json')
122         self.failUnlessEqual(response.status_code, 403)
123
124     def test_library_id(self):
125         """
126         Library IDs should be flexible, so make sure we can retrive a non-numeric ID
127         """
128         response = self.client.get('/experiments/config/FC12150/json', apidata)
129         self.failUnlessEqual(response.status_code, 200)
130         flowcell = json.loads(response.content)
131
132         lane_contents = flowcell['lane_set']['3']
133         lane_library = lane_contents[0]
134         self.failUnlessEqual(lane_library['library_id'], 'SL039')
135
136         response = self.client.get('/samples/library/SL039/json', apidata)
137         self.failUnlessEqual(response.status_code, 200)
138         library_sl039 = json.loads(response.content)
139
140         self.failUnlessEqual(library_sl039['library_id'], 'SL039')
141
142     def test_raw_id_field(self):
143         """
144         Test ticket:147
145
146         Library's have IDs, libraries also have primary keys,
147         we eventually had enough libraries that the drop down combo box was too
148         hard to filter through, unfortnately we want a field that uses our library
149         id and not the internal primary key, and raw_id_field uses primary keys.
150
151         This tests to make sure that the value entered in the raw library id field matches
152         the library id looked up.
153         """
154         expected_ids = [u'10981',u'11016',u'SL039',u'11060',
155                         u'11061',u'11062',u'11063',u'11064']
156         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
157         response = self.client.get('/admin/experiments/flowcell/153/')
158         tree = fromstring(response.content)
159         for i in range(0,8):
160             xpath_expression = '//input[@id="id_lane_set-%d-library"]'
161             input_field = tree.xpath(xpath_expression % (i,))[0]
162             library_field = input_field.find('../strong')
163             library_id, library_name = library_field.text.split(':')
164             # strip leading '#' sign from name
165             library_id = library_id[1:]
166             self.failUnlessEqual(library_id, expected_ids[i])
167             self.failUnlessEqual(input_field.attrib['value'], library_id)
168
169     def test_library_to_flowcell_link(self):
170         """
171         Make sure the library page includes links to the flowcell pages.
172         That work with flowcell IDs that have parenthetical comments.
173         """
174         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
175         response = self.client.get('/library/11070/')
176         tree = fromstring(response.content)
177         flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]',
178                                     namespaces=NSMAP)
179         self.assertEqual(flowcell_spans[0].text, '30012AAXX (failed)')
180         failed_fc_span = flowcell_spans[0]
181         failed_fc_a = failed_fc_span.getparent()
182         # make sure some of our RDF made it.
183         self.failUnlessEqual(failed_fc_a.get('rel'), 'libns:flowcell')
184         self.failUnlessEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/')
185         fc_response = self.client.get(failed_fc_a.get('href'))
186         self.failUnlessEqual(fc_response.status_code, 200)
187         fc_lane_response = self.client.get('/flowcell/30012AAXX/8/')
188         self.failUnlessEqual(fc_lane_response.status_code, 200)
189
190     def test_pooled_multiplex_id(self):
191         fc_dict = experiments.flowcell_information('42JU1AAXX')
192         lane_contents = fc_dict['lane_set'][3]
193         self.assertEqual(len(lane_contents), 2)
194         lane_dict = multi_lane_to_dict(lane_contents)
195
196         self.assertEqual(lane_dict['12044']['index_sequence'],
197                          {u'1': u'ATCACG',
198                           u'2': u'CGATGT',
199                           u'3': u'TTAGGC'})
200         self.assertEqual(lane_dict['11045']['index_sequence'],
201                          {u'1': u'ATCACG'})
202
203
204
205     def test_lanes_for(self):
206         """
207         Check the code that packs the django objects into simple types.
208         """
209         user = 'test'
210         lanes = experiments.lanes_for(user)
211         self.failUnlessEqual(len(lanes), 5)
212
213         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
214         lanes_json = json.loads(response.content)
215         self.failUnlessEqual(len(lanes), len(lanes_json))
216         for i in range(len(lanes)):
217             self.failUnlessEqual(lanes[i]['comment'], lanes_json[i]['comment'])
218             self.failUnlessEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
219             self.failUnlessEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
220             self.failUnlessEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
221
222     def test_lanes_for_no_lanes(self):
223         """
224         Do we get something meaningful back when the user isn't attached to anything?
225         """
226         user = 'supertest'
227         lanes = experiments.lanes_for(user)
228         self.failUnlessEqual(len(lanes), 0)
229
230         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
231         lanes_json = json.loads(response.content)
232
233     def test_lanes_for_no_user(self):
234         """
235         Do we get something meaningful back when its the wrong user
236         """
237         user = 'not a real user'
238         self.failUnlessRaises(ObjectDoesNotExist, experiments.lanes_for, user)
239
240         response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
241         self.failUnlessEqual(response.status_code, 404)
242
243
244     def test_raw_data_dir(self):
245         """Raw data path generator check"""
246         flowcell_id = self.fc1_id
247         raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
248
249         fc = models.FlowCell.objects.get(flowcell_id=flowcell_id)
250         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
251
252         fc.flowcell_id = flowcell_id + " (failed)"
253         self.failUnlessEqual(fc.get_raw_data_directory(), raw_dir)
254
255
256     def test_data_run_import(self):
257         srf_file_type = models.FileType.objects.get(name='SRF')
258         runxml_file_type = models.FileType.objects.get(name='run_xml')
259         flowcell_id = self.fc1_id
260         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
261         flowcell.update_data_runs()
262         self.failUnlessEqual(len(flowcell.datarun_set.all()), 1)
263
264         run = flowcell.datarun_set.all()[0]
265         result_files = run.datafile_set.all()
266         result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
267
268         srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
269         self.failUnlessEqual(srf4.file_type, srf_file_type)
270         self.failUnlessEqual(srf4.library_id, '11060')
271         self.failUnlessEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
272         self.failUnlessEqual(
273             srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
274             '11060')
275         self.failUnlessEqual(
276             srf4.pathname,
277             os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
278
279         lane_files = run.lane_files()
280         self.failUnlessEqual(lane_files[4]['srf'], srf4)
281
282         runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
283         self.failUnlessEqual(runxml.file_type, runxml_file_type)
284         self.failUnlessEqual(runxml.library_id, None)
285
286
287     def test_read_result_file(self):
288         """make sure we can return a result file
289         """
290         flowcell_id = self.fc1_id
291         flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
292         flowcell.update_data_runs()
293
294         #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
295
296         result_files = flowcell.datarun_set.all()[0].datafile_set.all()
297         for f in result_files:
298             url = '/experiments/file/%s' % ( f.random_key,)
299             response = self.client.get(url)
300             self.failUnlessEqual(response.status_code, 200)
301             mimetype = f.file_type.mimetype
302             if mimetype is None:
303                 mimetype = 'application/octet-stream'
304
305             self.failUnlessEqual(mimetype, response['content-type'])
306
307 class TestFileType(TestCase):
308     def test_file_type_unicode(self):
309         file_type_objects = models.FileType.objects
310         name = 'QSEQ tarfile'
311         file_type_object = file_type_objects.get(name=name)
312         self.failUnlessEqual(u"<FileType: QSEQ tarfile>",
313                              unicode(file_type_object))
314
315 class TestFileType(TestCase):
316     def test_find_file_type(self):
317         file_type_objects = models.FileType.objects
318         cases = [('woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
319                   'QSEQ tarfile', 7, 1),
320                  ('woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
321                   'SRF', 1, None),
322                  ('s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
323                  ('s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
324                  ('s_3_eland_result.txt.bz2','ELAND Result', 3, None),
325                  ('s_1_export.txt.bz2','ELAND Export', 1, None),
326                  ('s_1_percent_call.png', 'IVC Percent Call', 1, None),
327                  ('s_2_percent_base.png', 'IVC Percent Base', 2, None),
328                  ('s_3_percent_all.png', 'IVC Percent All', 3, None),
329                  ('s_4_call.png', 'IVC Call', 4, None),
330                  ('s_5_all.png', 'IVC All', 5, None),
331                  ('Summary.htm', 'Summary.htm', None, None),
332                  ('run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
333          ]
334         for filename, typename, lane, end in cases:
335             ft = models.find_file_type_metadata_from_filename(filename)
336             self.failUnlessEqual(ft['file_type'],
337                                  file_type_objects.get(name=typename))
338             self.failUnlessEqual(ft.get('lane', None), lane)
339             self.failUnlessEqual(ft.get('end', None), end)
340
341     def test_assign_file_type_complex_path(self):
342         file_type_objects = models.FileType.objects
343         cases = [('/a/b/c/woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
344                   'QSEQ tarfile', 7, 1),
345                  ('foo/woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
346                   'SRF', 1, None),
347                  ('../s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
348                  ('/bleem/s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
349                  ('/qwer/s_3_eland_result.txt.bz2','ELAND Result', 3, None),
350                  ('/ty///1/s_1_export.txt.bz2','ELAND Export', 1, None),
351                  ('/help/s_1_percent_call.png', 'IVC Percent Call', 1, None),
352                  ('/bored/s_2_percent_base.png', 'IVC Percent Base', 2, None),
353                  ('/example1/s_3_percent_all.png', 'IVC Percent All', 3, None),
354                  ('amonkey/s_4_call.png', 'IVC Call', 4, None),
355                  ('fishie/s_5_all.png', 'IVC All', 5, None),
356                  ('/random/Summary.htm', 'Summary.htm', None, None),
357                  ('/notrandom/run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
358          ]
359         for filename, typename, lane, end in cases:
360             result = models.find_file_type_metadata_from_filename(filename)
361             self.failUnlessEqual(result['file_type'],
362                                  file_type_objects.get(name=typename))
363             self.failUnlessEqual(result.get('lane',None), lane)
364             self.failUnlessEqual(result.get('end', None), end)
365
366 class TestEmailNotify(TestCase):
367     fixtures = ['test_flowcells.json']
368
369     def test_started_email_not_logged_in(self):
370         response = self.client.get('/experiments/started/153/')
371         self.failUnlessEqual(response.status_code, 302)
372
373     def test_started_email_logged_in_user(self):
374         self.client.login(username='test', password='BJOKL5kAj6aFZ6A5')
375         response = self.client.get('/experiments/started/153/')
376         self.failUnlessEqual(response.status_code, 302)
377
378     def test_started_email_logged_in_staff(self):
379         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
380         response = self.client.get('/experiments/started/153/')
381         self.failUnlessEqual(response.status_code, 200)
382
383     def test_started_email_send(self):
384         self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
385         response = self.client.get('/experiments/started/153/')
386         self.failUnlessEqual(response.status_code, 200)
387
388         self.failUnless('pk1@example.com' in response.content)
389         self.failUnless('Lane #8 : (11064) Paired ends 104' in response.content)
390
391         response = self.client.get('/experiments/started/153/', {'send':'1','bcc':'on'})
392         self.failUnlessEqual(response.status_code, 200)
393         self.failUnlessEqual(len(mail.outbox), 4)
394         for m in mail.outbox:
395             self.failUnless(len(m.body) > 0)
396
397     def test_email_navigation(self):
398         """
399         Can we navigate between the flowcell and email forms properly?
400         """
401         self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
402         response = self.client.get('/experiments/started/153/')
403         self.failUnlessEqual(response.status_code, 200)
404         self.failUnless(re.search('Flowcell FC12150', response.content))
405         # require that navigation back to the admin page exists
406         self.failUnless(re.search('<a href="/admin/experiments/flowcell/153/">[^<]+</a>', response.content))
407
408 def multi_lane_to_dict(lane):
409     """Convert a list of lane entries into a dictionary indexed by library ID
410     """
411     return dict( ((x['library_id'],x) for x in lane) )
412
413 class TestSequencer(TestCase):
414     fixtures = ['test_flowcells.json',
415                 ]
416
417     def test_name_generation(self):
418         seq = models.Sequencer()
419         seq.name = "Seq1"
420         seq.instrument_name = "HWI-SEQ1"
421         seq.model = "Imaginary 5000"
422
423         self.failUnlessEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
424
425     def test_lookup(self):
426         fc = models.FlowCell.objects.get(pk=153)
427         self.failUnlessEqual(fc.sequencer.model,
428                              "Illumina Genome Analyzer IIx")
429         self.failUnlessEqual(fc.sequencer.instrument_name,
430                              "ILLUMINA-EC5D15")
431
432     def test_rdf(self):
433         response = self.client.get('/flowcell/FC12150/', apidata)
434         tree = fromstring(response.content)
435         divs = tree.xpath('//div[@rel="libns:sequenced_by"]',
436                           namespaces=NSMAP)
437         self.failUnlessEqual(len(divs), 1)
438         self.failUnlessEqual(divs[0].attrib['rel'], 'libns:sequenced_by')
439         self.failUnlessEqual(divs[0].attrib['resource'], '/sequencer/2')
440
441         name = divs[0].xpath('./span[@property="libns:sequencer_name"]')
442         self.failUnlessEqual(len(name), 1)
443         self.failUnlessEqual(name[0].text, 'Tardigrade')
444         instrument = divs[0].xpath(
445             './span[@property="libns:sequencer_instrument"]')
446         self.failUnlessEqual(len(instrument), 1)
447         self.failUnlessEqual(instrument[0].text, 'ILLUMINA-EC5D15')
448         model = divs[0].xpath(
449             './span[@property="libns:sequencer_model"]')
450         self.failUnlessEqual(len(model), 1)
451         self.failUnlessEqual(model[0].text, 'Illumina Genome Analyzer IIx')