2 from lxml.html import fromstring
6 import simplejson as json
11 from urlparse import urljoin
13 from django.conf import settings
14 from django.core import mail
15 from django.core.exceptions import ObjectDoesNotExist
16 from django.test import TestCase
17 from htsworkflow.frontend.experiments import models
18 from htsworkflow.frontend.experiments import experiments
19 from htsworkflow.frontend.auth import apidata
21 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
25 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
27 class ClusterStationTestCases(TestCase):
28 fixtures = ['test_flowcells.json']
30 def test_default(self):
31 c = models.ClusterStation.default()
32 self.assertEqual(c.id, 2)
37 total = models.ClusterStation.objects.filter(isdefault=True).count()
38 self.assertEqual(total, 0)
40 other_default = models.ClusterStation.default()
41 self.assertEqual(other_default.id, 3)
44 def test_update_default(self):
45 old_default = models.ClusterStation.default()
47 c = models.ClusterStation.objects.get(pk=3)
51 new_default = models.ClusterStation.default()
53 self.assertNotEqual(old_default, new_default)
54 self.assertEqual(new_default, c)
56 total = models.ClusterStation.objects.filter(isdefault=True).count()
57 self.assertEqual(total, 1)
59 def test_update_other(self):
60 old_default = models.ClusterStation.default()
61 total = models.ClusterStation.objects.filter(isdefault=True).count()
62 self.assertEqual(total, 1)
64 c = models.ClusterStation.objects.get(pk=1)
65 c.name = "Primary Key 1"
68 total = models.ClusterStation.objects.filter(isdefault=True).count()
69 self.assertEqual(total, 1)
71 new_default = models.ClusterStation.default()
72 self.assertEqual(old_default, new_default)
75 class SequencerTestCases(TestCase):
76 fixtures = ['test_flowcells.json']
78 def test_default(self):
79 # starting with no default
80 s = models.Sequencer.default()
81 self.assertEqual(s.id, 2)
83 total = models.Sequencer.objects.filter(isdefault=True).count()
84 self.assertEqual(total, 1)
89 total = models.Sequencer.objects.filter(isdefault=True).count()
90 self.assertEqual(total, 0)
92 other_default = models.Sequencer.default()
93 self.assertEqual(other_default.id, 7)
95 def test_update_default(self):
96 old_default = models.Sequencer.default()
98 s = models.Sequencer.objects.get(pk=1)
102 new_default = models.Sequencer.default()
104 self.assertNotEqual(old_default, new_default)
105 self.assertEqual(new_default, s)
107 total = models.Sequencer.objects.filter(isdefault=True).count()
108 self.assertEqual(total, 1)
111 def test_update_other(self):
112 old_default = models.Sequencer.default()
113 total = models.Sequencer.objects.filter(isdefault=True).count()
114 self.assertEqual(total, 1)
116 s = models.Sequencer.objects.get(pk=1)
117 s.name = "Primary Key 1"
120 total = models.Sequencer.objects.filter(isdefault=True).count()
121 self.assertEqual(total, 1)
123 new_default = models.Sequencer.default()
124 self.assertEqual(old_default, new_default)
127 class ExperimentsTestCases(TestCase):
128 fixtures = ['test_flowcells.json',
132 self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
133 settings.RESULT_HOME_DIR = self.tempdir
135 self.fc1_id = 'FC12150'
136 self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
137 os.mkdir(self.fc1_root)
138 self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
139 os.mkdir(self.fc1_dir)
140 runxml = 'run_FC12150_2007-09-27.xml'
141 shutil.copy(os.path.join(TESTDATA_DIR, runxml),
142 os.path.join(self.fc1_dir, runxml))
145 os.path.join(TESTDATA_DIR,
146 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
147 os.path.join(self.fc1_dir,
148 'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
151 self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
152 os.mkdir(self.fc2_dir)
153 os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
154 os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
155 os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
158 shutil.rmtree(self.tempdir)
160 def test_flowcell_information(self):
162 Check the code that packs the django objects into simple types.
164 for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
165 fc_dict = experiments.flowcell_information(fc_id)
166 fc_django = models.FlowCell.objects.get(flowcell_id=fc_id)
167 self.assertEqual(fc_dict['flowcell_id'], fc_id)
168 self.assertEqual(fc_django.flowcell_id, fc_id)
169 self.assertEqual(fc_dict['sequencer'], fc_django.sequencer.name)
170 self.assertEqual(fc_dict['read_length'], fc_django.read_length)
171 self.assertEqual(fc_dict['notes'], fc_django.notes)
172 self.assertEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
174 for lane in fc_django.lane_set.all():
175 lane_contents = fc_dict['lane_set'][lane.lane_number]
176 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
177 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
178 self.assertEqual(lane_dict['comment'], lane.comment)
179 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
180 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
181 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
182 self.assertEqual(lane_dict['library_id'], lane.library.id)
183 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
184 self.assertEqual(lane_dict['library_species'],
185 lane.library.library_species.scientific_name)
187 response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
188 # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
189 fc_json = json.loads(response.content)
190 self.assertEqual(fc_json['flowcell_id'], fc_id)
191 self.assertEqual(fc_json['sequencer'], fc_django.sequencer.name)
192 self.assertEqual(fc_json['read_length'], fc_django.read_length)
193 self.assertEqual(fc_json['notes'], fc_django.notes)
194 self.assertEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
197 for lane in fc_django.lane_set.all():
198 lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
199 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
201 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
202 self.assertEqual(lane_dict['comment'], lane.comment)
203 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
204 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
205 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
206 self.assertEqual(lane_dict['library_id'], lane.library.id)
207 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
208 self.assertEqual(lane_dict['library_species'],
209 lane.library.library_species.scientific_name)
211 def test_invalid_flowcell(self):
213 Make sure we get a 404 if we request an invalid flowcell ID
215 response = self.client.get('/experiments/config/nottheone/json', apidata)
216 self.assertEqual(response.status_code, 404)
218 def test_no_key(self):
220 Require logging in to retrieve meta data
222 response = self.client.get(u'/experiments/config/FC12150/json')
223 self.assertEqual(response.status_code, 403)
225 def test_library_id(self):
227 Library IDs should be flexible, so make sure we can retrive a non-numeric ID
229 response = self.client.get('/experiments/config/FC12150/json', apidata)
230 self.assertEqual(response.status_code, 200)
231 flowcell = json.loads(response.content)
233 lane_contents = flowcell['lane_set']['3']
234 lane_library = lane_contents[0]
235 self.assertEqual(lane_library['library_id'], 'SL039')
237 response = self.client.get('/samples/library/SL039/json', apidata)
238 self.assertEqual(response.status_code, 200)
239 library_sl039 = json.loads(response.content)
241 self.assertEqual(library_sl039['library_id'], 'SL039')
243 def test_raw_id_field(self):
247 Library's have IDs, libraries also have primary keys,
248 we eventually had enough libraries that the drop down combo box was too
249 hard to filter through, unfortnately we want a field that uses our library
250 id and not the internal primary key, and raw_id_field uses primary keys.
252 This tests to make sure that the value entered in the raw library id field matches
253 the library id looked up.
255 expected_ids = [u'10981',u'11016',u'SL039',u'11060',
256 u'11061',u'11062',u'11063',u'11064']
257 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
258 response = self.client.get('/admin/experiments/flowcell/153/')
259 tree = fromstring(response.content)
261 xpath_expression = '//input[@id="id_lane_set-%d-library"]'
262 input_field = tree.xpath(xpath_expression % (i,))[0]
263 library_field = input_field.find('../strong')
264 library_id, library_name = library_field.text.split(':')
265 # strip leading '#' sign from name
266 library_id = library_id[1:]
267 self.assertEqual(library_id, expected_ids[i])
268 self.assertEqual(input_field.attrib['value'], library_id)
270 def test_library_to_flowcell_link(self):
272 Make sure the library page includes links to the flowcell pages.
273 That work with flowcell IDs that have parenthetical comments.
275 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
276 response = self.client.get('/library/11070/')
277 tree = fromstring(response.content)
278 flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]',
280 self.assertEqual(flowcell_spans[0].text, '30012AAXX (failed)')
281 failed_fc_span = flowcell_spans[0]
282 failed_fc_a = failed_fc_span.getparent()
283 # make sure some of our RDF made it.
284 self.assertEqual(failed_fc_a.get('typeof'), 'libns:IlluminaFlowcell')
285 self.assertEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/')
286 fc_response = self.client.get(failed_fc_a.get('href'))
287 self.assertEqual(fc_response.status_code, 200)
288 fc_lane_response = self.client.get('/flowcell/30012AAXX/8/')
289 self.assertEqual(fc_lane_response.status_code, 200)
291 def test_pooled_multiplex_id(self):
292 fc_dict = experiments.flowcell_information('42JU1AAXX')
293 lane_contents = fc_dict['lane_set'][3]
294 self.assertEqual(len(lane_contents), 2)
295 lane_dict = multi_lane_to_dict(lane_contents)
297 self.assertEqual(lane_dict['12044']['index_sequence'],
301 self.assertEqual(lane_dict['11045']['index_sequence'],
306 def test_lanes_for(self):
308 Check the code that packs the django objects into simple types.
311 lanes = experiments.lanes_for(user)
312 self.assertEqual(len(lanes), 5)
314 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
315 lanes_json = json.loads(response.content)
316 self.assertEqual(len(lanes), len(lanes_json))
317 for i in range(len(lanes)):
318 self.assertEqual(lanes[i]['comment'], lanes_json[i]['comment'])
319 self.assertEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
320 self.assertEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
321 self.assertEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
323 def test_lanes_for_no_lanes(self):
325 Do we get something meaningful back when the user isn't attached to anything?
328 lanes = experiments.lanes_for(user)
329 self.assertEqual(len(lanes), 0)
331 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
332 lanes_json = json.loads(response.content)
334 def test_lanes_for_no_user(self):
336 Do we get something meaningful back when its the wrong user
338 user = 'not a real user'
339 self.assertRaises(ObjectDoesNotExist, experiments.lanes_for, user)
341 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
342 self.assertEqual(response.status_code, 404)
345 def test_raw_data_dir(self):
346 """Raw data path generator check"""
347 flowcell_id = self.fc1_id
348 raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
350 fc = models.FlowCell.objects.get(flowcell_id=flowcell_id)
351 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
353 fc.flowcell_id = flowcell_id + " (failed)"
354 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
357 def test_data_run_import(self):
358 srf_file_type = models.FileType.objects.get(name='SRF')
359 runxml_file_type = models.FileType.objects.get(name='run_xml')
360 flowcell_id = self.fc1_id
361 flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
362 flowcell.update_data_runs()
363 self.assertEqual(len(flowcell.datarun_set.all()), 1)
365 run = flowcell.datarun_set.all()[0]
366 result_files = run.datafile_set.all()
367 result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
369 srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
370 self.assertEqual(srf4.file_type, srf_file_type)
371 self.assertEqual(srf4.library_id, '11060')
372 self.assertEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
374 srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
378 os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
380 lane_files = run.lane_files()
381 self.assertEqual(lane_files[4]['srf'], srf4)
383 runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
384 self.assertEqual(runxml.file_type, runxml_file_type)
385 self.assertEqual(runxml.library_id, None)
387 import1 = len(models.DataRun.objects.filter(result_dir='FC12150/C1-37'))
388 # what happens if we import twice?
389 flowcell.import_data_run('FC12150/C1-37',
390 'run_FC12150_2007-09-27.xml')
392 len(models.DataRun.objects.filter(result_dir='FC12150/C1-37')),
395 def test_read_result_file(self):
396 """make sure we can return a result file
398 flowcell_id = self.fc1_id
399 flowcell = models.FlowCell.objects.get(flowcell_id=flowcell_id)
400 flowcell.update_data_runs()
402 #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
404 result_files = flowcell.datarun_set.all()[0].datafile_set.all()
405 for f in result_files:
406 url = '/experiments/file/%s' % ( f.random_key,)
407 response = self.client.get(url)
408 self.assertEqual(response.status_code, 200)
409 mimetype = f.file_type.mimetype
411 mimetype = 'application/octet-stream'
413 self.assertEqual(mimetype, response['content-type'])
415 def test_flowcell_rdf(self):
417 from htsworkflow.util.rdfhelp import get_model, \
419 load_string_into_model, \
426 expected = {1: ['11034'],
428 3: ['12044','11045'],
429 4: ['11047','13044'],
434 url = '/flowcell/42JU1AAXX/'
435 response = self.client.get(url)
436 self.assertEqual(response.status_code, 200)
437 ns = urljoin('http://localhost', url)
438 load_string_into_model(model, 'rdfa', response.content, ns=ns)
439 body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
440 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
442 select ?flowcell ?flowcell_id ?lane_id ?library_id
444 ?flowcell a libns:IlluminaFlowcell ;
445 libns:flowcell_id ?flowcell_id ;
446 libns:has_lane ?lane .
447 ?lane libns:lane_number ?lane_id ;
448 libns:library ?library .
449 ?library libns:library_id ?library_id .
451 query = RDF.SPARQLQuery(body)
453 for r in query.execute(model):
455 self.assertEqual(fromTypedNode(r['flowcell_id']), u'42JU1AAXX')
456 lane_id = fromTypedNode(r['lane_id'])
457 library_id = fromTypedNode(r['library_id'])
458 self.assertTrue(library_id in expected[lane_id])
459 self.assertEqual(count, 10)
462 class TestFileType(TestCase):
463 def test_file_type_unicode(self):
464 file_type_objects = models.FileType.objects
465 name = 'QSEQ tarfile'
466 file_type_object = file_type_objects.get(name=name)
467 self.assertEqual(u"<FileType: QSEQ tarfile>",
468 unicode(file_type_object))
470 class TestFileType(TestCase):
471 def test_find_file_type(self):
472 file_type_objects = models.FileType.objects
473 cases = [('woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
474 'QSEQ tarfile', 7, 1),
475 ('woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
477 ('s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
478 ('s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
479 ('s_3_eland_result.txt.bz2','ELAND Result', 3, None),
480 ('s_1_export.txt.bz2','ELAND Export', 1, None),
481 ('s_1_percent_call.png', 'IVC Percent Call', 1, None),
482 ('s_2_percent_base.png', 'IVC Percent Base', 2, None),
483 ('s_3_percent_all.png', 'IVC Percent All', 3, None),
484 ('s_4_call.png', 'IVC Call', 4, None),
485 ('s_5_all.png', 'IVC All', 5, None),
486 ('Summary.htm', 'Summary.htm', None, None),
487 ('run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
489 for filename, typename, lane, end in cases:
490 ft = models.find_file_type_metadata_from_filename(filename)
491 self.assertEqual(ft['file_type'],
492 file_type_objects.get(name=typename))
493 self.assertEqual(ft.get('lane', None), lane)
494 self.assertEqual(ft.get('end', None), end)
496 def test_assign_file_type_complex_path(self):
497 file_type_objects = models.FileType.objects
498 cases = [('/a/b/c/woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
499 'QSEQ tarfile', 7, 1),
500 ('foo/woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
502 ('../s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
503 ('/bleem/s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
504 ('/qwer/s_3_eland_result.txt.bz2','ELAND Result', 3, None),
505 ('/ty///1/s_1_export.txt.bz2','ELAND Export', 1, None),
506 ('/help/s_1_percent_call.png', 'IVC Percent Call', 1, None),
507 ('/bored/s_2_percent_base.png', 'IVC Percent Base', 2, None),
508 ('/example1/s_3_percent_all.png', 'IVC Percent All', 3, None),
509 ('amonkey/s_4_call.png', 'IVC Call', 4, None),
510 ('fishie/s_5_all.png', 'IVC All', 5, None),
511 ('/random/Summary.htm', 'Summary.htm', None, None),
512 ('/notrandom/run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
514 for filename, typename, lane, end in cases:
515 result = models.find_file_type_metadata_from_filename(filename)
516 self.assertEqual(result['file_type'],
517 file_type_objects.get(name=typename))
518 self.assertEqual(result.get('lane',None), lane)
519 self.assertEqual(result.get('end', None), end)
521 class TestEmailNotify(TestCase):
522 fixtures = ['test_flowcells.json']
524 def test_started_email_not_logged_in(self):
525 response = self.client.get('/experiments/started/153/')
526 self.assertEqual(response.status_code, 302)
528 def test_started_email_logged_in_user(self):
529 self.client.login(username='test', password='BJOKL5kAj6aFZ6A5')
530 response = self.client.get('/experiments/started/153/')
531 self.assertEqual(response.status_code, 302)
533 def test_started_email_logged_in_staff(self):
534 self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
535 response = self.client.get('/experiments/started/153/')
536 self.assertEqual(response.status_code, 200)
538 def test_started_email_send(self):
539 self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
540 response = self.client.get('/experiments/started/153/')
541 self.assertEqual(response.status_code, 200)
543 self.assertTrue('pk1@example.com' in response.content)
544 self.assertTrue('Lane #8 : (11064) Paired ends 104' in response.content)
546 response = self.client.get('/experiments/started/153/', {'send':'1','bcc':'on'})
547 self.assertEqual(response.status_code, 200)
548 self.assertEqual(len(mail.outbox), 4)
549 for m in mail.outbox:
550 self.assertTrue(len(m.body) > 0)
552 def test_email_navigation(self):
554 Can we navigate between the flowcell and email forms properly?
556 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
557 response = self.client.get('/experiments/started/153/')
558 self.assertEqual(response.status_code, 200)
559 self.assertTrue(re.search('Flowcell FC12150', response.content))
560 # require that navigation back to the admin page exists
561 self.assertTrue(re.search('<a href="/admin/experiments/flowcell/153/">[^<]+</a>', response.content))
563 def multi_lane_to_dict(lane):
564 """Convert a list of lane entries into a dictionary indexed by library ID
566 return dict( ((x['library_id'],x) for x in lane) )
568 class TestSequencer(TestCase):
569 fixtures = ['test_flowcells.json',
572 def test_name_generation(self):
573 seq = models.Sequencer()
575 seq.instrument_name = "HWI-SEQ1"
576 seq.model = "Imaginary 5000"
578 self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
580 def test_lookup(self):
581 fc = models.FlowCell.objects.get(pk=153)
582 self.assertEqual(fc.sequencer.model,
583 "Illumina Genome Analyzer IIx")
584 self.assertEqual(fc.sequencer.instrument_name,
586 # well actually we let the browser tack on the host name
587 url = fc.get_absolute_url()
588 self.assertEqual(url, '/flowcell/FC12150/')
591 response = self.client.get('/flowcell/FC12150/', apidata)
592 tree = fromstring(response.content)
593 seq_by = tree.xpath('//div[@rel="libns:sequenced_by"]',
595 self.assertEqual(len(seq_by), 1)
596 self.assertEqual(seq_by[0].attrib['rel'], 'libns:sequenced_by')
597 seq = seq_by[0].getchildren()
598 self.assertEqual(len(seq), 1)
599 self.assertEqual(seq[0].attrib['about'], '/sequencer/2')
600 self.assertEqual(seq[0].attrib['typeof'], 'libns:Sequencer')
602 name = seq[0].xpath('./span[@property="libns:sequencer_name"]')
603 self.assertEqual(len(name), 1)
604 self.assertEqual(name[0].text, 'Tardigrade')
605 instrument = seq[0].xpath(
606 './span[@property="libns:sequencer_instrument"]')
607 self.assertEqual(len(instrument), 1)
608 self.assertEqual(instrument[0].text, 'ILLUMINA-EC5D15')
609 model = seq[0].xpath(
610 './span[@property="libns:sequencer_model"]')
611 self.assertEqual(len(model), 1)
612 self.assertEqual(model[0].text, 'Illumina Genome Analyzer IIx')