1 from __future__ import absolute_import, print_function
4 from lxml.html import fromstring
8 import simplejson as json
13 from urlparse import urljoin
15 from django.conf import settings
16 from django.core import mail
17 from django.core.exceptions import ObjectDoesNotExist
18 from django.test import TestCase
19 from django.test.utils import setup_test_environment, teardown_test_environment
20 from django.db import connection
21 from django.conf import settings
23 from .models import ClusterStation, DataRun, Sequencer, FlowCell, FileType, \
24 find_file_type_metadata_from_filename
25 from .experiments import flowcell_information, lanes_for
26 from htsworkflow.auth import apidata
27 from htsworkflow.util.ethelp import validate_xhtml
29 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
33 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
35 from django.db import connection
38 class ExperimentsTestCases(TestCase):
39 fixtures = ['initial_data.json',
40 'test_flowcells.json',
44 self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
45 settings.RESULT_HOME_DIR = self.tempdir
47 self.fc1_id = 'FC12150'
48 self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
49 os.mkdir(self.fc1_root)
50 self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
51 os.mkdir(self.fc1_dir)
52 runxml = 'run_FC12150_2007-09-27.xml'
53 shutil.copy(os.path.join(TESTDATA_DIR, runxml),
54 os.path.join(self.fc1_dir, runxml))
57 os.path.join(TESTDATA_DIR,
58 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
59 os.path.join(self.fc1_dir,
60 'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
63 self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
64 os.mkdir(self.fc2_dir)
65 os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
66 os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
67 os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
70 shutil.rmtree(self.tempdir)
72 def test_flowcell_information(self):
74 Check the code that packs the django objects into simple types.
76 for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
77 fc_dict = flowcell_information(fc_id)
78 fc_django = FlowCell.objects.get(flowcell_id=fc_id)
79 self.assertEqual(fc_dict['flowcell_id'], fc_id)
80 self.assertEqual(fc_django.flowcell_id, fc_id)
81 self.assertEqual(fc_dict['sequencer'], fc_django.sequencer.name)
82 self.assertEqual(fc_dict['read_length'], fc_django.read_length)
83 self.assertEqual(fc_dict['notes'], fc_django.notes)
84 self.assertEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
86 for lane in fc_django.lane_set.all():
87 lane_contents = fc_dict['lane_set'][lane.lane_number]
88 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
89 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
90 self.assertEqual(lane_dict['comment'], lane.comment)
91 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
92 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
93 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
94 self.assertEqual(lane_dict['library_id'], lane.library.id)
95 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
96 self.assertEqual(lane_dict['library_species'],
97 lane.library.library_species.scientific_name)
99 response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
100 # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
101 fc_json = json.loads(response.content)['result']
102 self.assertEqual(fc_json['flowcell_id'], fc_id)
103 self.assertEqual(fc_json['sequencer'], fc_django.sequencer.name)
104 self.assertEqual(fc_json['read_length'], fc_django.read_length)
105 self.assertEqual(fc_json['notes'], fc_django.notes)
106 self.assertEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
109 for lane in fc_django.lane_set.all():
110 lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
111 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
113 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
114 self.assertEqual(lane_dict['comment'], lane.comment)
115 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
116 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
117 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
118 self.assertEqual(lane_dict['library_id'], lane.library.id)
119 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
120 self.assertEqual(lane_dict['library_species'],
121 lane.library.library_species.scientific_name)
123 def test_invalid_flowcell(self):
125 Make sure we get a 404 if we request an invalid flowcell ID
127 response = self.client.get('/experiments/config/nottheone/json', apidata)
128 self.assertEqual(response.status_code, 404)
130 def test_no_key(self):
132 Require logging in to retrieve meta data
134 response = self.client.get(u'/experiments/config/FC12150/json')
135 self.assertEqual(response.status_code, 403)
137 def test_library_id(self):
139 Library IDs should be flexible, so make sure we can retrive a non-numeric ID
141 response = self.client.get('/experiments/config/FC12150/json', apidata)
142 self.assertEqual(response.status_code, 200)
143 flowcell = json.loads(response.content)['result']
145 lane_contents = flowcell['lane_set']['3']
146 lane_library = lane_contents[0]
147 self.assertEqual(lane_library['library_id'], 'SL039')
149 response = self.client.get('/samples/library/SL039/json', apidata)
150 self.assertEqual(response.status_code, 200)
151 library_sl039 = json.loads(response.content)['result']
153 self.assertEqual(library_sl039['library_id'], 'SL039')
155 def test_raw_id_field(self):
159 Library's have IDs, libraries also have primary keys,
160 we eventually had enough libraries that the drop down combo box was too
161 hard to filter through, unfortnately we want a field that uses our library
162 id and not the internal primary key, and raw_id_field uses primary keys.
164 This tests to make sure that the value entered in the raw library id field matches
165 the library id looked up.
167 expected_ids = [u'10981',u'11016',u'SL039',u'11060',
168 u'11061',u'11062',u'11063',u'11064']
169 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
170 response = self.client.get('/admin/experiments/flowcell/153/')
172 tree = fromstring(response.content)
174 xpath_expression = '//input[@id="id_lane_set-%d-library"]'
175 input_field = tree.xpath(xpath_expression % (i,))[0]
176 library_field = input_field.find('../strong')
177 library_id, library_name = library_field.text.split(':')
178 # strip leading '#' sign from name
179 library_id = library_id[1:]
180 self.assertEqual(library_id, expected_ids[i])
181 self.assertEqual(input_field.attrib['value'], library_id)
183 def test_library_to_flowcell_link(self):
185 Make sure the library page includes links to the flowcell pages.
186 That work with flowcell IDs that have parenthetical comments.
188 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
189 response = self.client.get('/library/11070/')
190 self.assertEqual(response.status_code, 200)
191 status = validate_xhtml(response.content)
192 if status is not None: self.assertTrue(status)
194 tree = fromstring(response.content)
195 flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]',
197 self.assertEqual(flowcell_spans[1].text, '30012AAXX (failed)')
198 failed_fc_span = flowcell_spans[1]
199 failed_fc_a = failed_fc_span.getparent()
200 # make sure some of our RDF made it.
201 self.assertEqual(failed_fc_a.get('typeof'), 'libns:IlluminaFlowcell')
202 self.assertEqual(failed_fc_a.get('href'), '/flowcell/30012AAXX/')
203 fc_response = self.client.get(failed_fc_a.get('href'))
204 self.assertEqual(fc_response.status_code, 200)
205 status = validate_xhtml(response.content)
206 if status is not None: self.assertTrue(status)
208 fc_lane_response = self.client.get('/flowcell/30012AAXX/8/')
209 self.assertEqual(fc_lane_response.status_code, 200)
210 status = validate_xhtml(response.content)
211 if status is not None: self.assertTrue(status)
214 def test_pooled_multiplex_id(self):
215 fc_dict = flowcell_information('42JU1AAXX')
216 lane_contents = fc_dict['lane_set'][3]
217 self.assertEqual(len(lane_contents), 2)
218 lane_dict = multi_lane_to_dict(lane_contents)
220 self.assertEqual(lane_dict['12044']['index_sequence'],
224 self.assertEqual(lane_dict['11045']['index_sequence'],
229 def test_lanes_for(self):
231 Check the code that packs the django objects into simple types.
234 lanes = lanes_for(user)
235 self.assertEqual(len(lanes), 5)
237 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
238 lanes_json = json.loads(response.content)['result']
239 self.assertEqual(len(lanes), len(lanes_json))
240 for i in range(len(lanes)):
241 self.assertEqual(lanes[i]['comment'], lanes_json[i]['comment'])
242 self.assertEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
243 self.assertEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
244 self.assertEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
246 def test_lanes_for_no_lanes(self):
248 Do we get something meaningful back when the user isn't attached to anything?
251 lanes = lanes_for(user)
252 self.assertEqual(len(lanes), 0)
254 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
255 lanes_json = json.loads(response.content)
257 def test_lanes_for_no_user(self):
259 Do we get something meaningful back when its the wrong user
261 user = 'not a real user'
262 self.assertRaises(ObjectDoesNotExist, lanes_for, user)
264 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
265 self.assertEqual(response.status_code, 404)
268 def test_raw_data_dir(self):
269 """Raw data path generator check"""
270 flowcell_id = self.fc1_id
271 raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
273 fc = FlowCell.objects.get(flowcell_id=flowcell_id)
274 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
276 fc.flowcell_id = flowcell_id + " (failed)"
277 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
280 def test_data_run_import(self):
281 srf_file_type = FileType.objects.get(name='SRF')
282 runxml_file_type = FileType.objects.get(name='run_xml')
283 flowcell_id = self.fc1_id
284 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
285 flowcell.update_data_runs()
286 self.assertEqual(len(flowcell.datarun_set.all()), 1)
288 run = flowcell.datarun_set.all()[0]
289 result_files = run.datafile_set.all()
290 result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
292 srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
293 self.assertEqual(srf4.file_type, srf_file_type)
294 self.assertEqual(srf4.library_id, '11060')
295 self.assertEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
297 srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
301 os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
303 lane_files = run.lane_files()
304 self.assertEqual(lane_files[4]['srf'], srf4)
306 runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
307 self.assertEqual(runxml.file_type, runxml_file_type)
308 self.assertEqual(runxml.library_id, None)
310 import1 = len(DataRun.objects.filter(result_dir='FC12150/C1-37'))
311 # what happens if we import twice?
312 flowcell.import_data_run('FC12150/C1-37',
313 'run_FC12150_2007-09-27.xml')
315 len(DataRun.objects.filter(result_dir='FC12150/C1-37')),
318 def test_read_result_file(self):
319 """make sure we can return a result file
321 flowcell_id = self.fc1_id
322 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
323 flowcell.update_data_runs()
325 #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
327 result_files = flowcell.datarun_set.all()[0].datafile_set.all()
328 for f in result_files:
329 url = '/experiments/file/%s' % ( f.random_key,)
330 response = self.client.get(url)
331 self.assertEqual(response.status_code, 200)
332 mimetype = f.file_type.mimetype
334 mimetype = 'application/octet-stream'
336 self.assertEqual(mimetype, response['content-type'])
338 def test_flowcell_rdf(self):
340 from htsworkflow.util.rdfhelp import get_model, \
342 load_string_into_model, \
349 expected = {'1': ['11034'],
351 '3': ['12044','11045'],
352 '4': ['11047','13044'],
357 url = '/flowcell/42JU1AAXX/'
358 response = self.client.get(url)
359 self.assertEqual(response.status_code, 200)
360 status = validate_xhtml(response.content)
361 if status is not None: self.assertTrue(status)
363 ns = urljoin('http://localhost', url)
364 load_string_into_model(model, 'rdfa', response.content, ns=ns)
365 body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
366 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
368 select ?flowcell ?flowcell_id ?lane_id ?library_id
370 ?flowcell a libns:IlluminaFlowcell ;
371 libns:flowcell_id ?flowcell_id ;
372 libns:has_lane ?lane .
373 ?lane libns:lane_number ?lane_id ;
374 libns:library ?library .
375 ?library libns:library_id ?library_id .
377 query = RDF.SPARQLQuery(body)
379 for r in query.execute(model):
381 self.assertEqual(fromTypedNode(r['flowcell_id']), u'42JU1AAXX')
382 lane_id = fromTypedNode(r['lane_id'])
383 library_id = fromTypedNode(r['library_id'])
384 self.assertTrue(library_id in expected[lane_id])
385 self.assertEqual(count, 10)
388 class TestFileType(TestCase):
389 fixtures = ['initial_data.json',
390 'test_flowcells.json',
393 def test_file_type_unicode(self):
394 file_type_objects = FileType.objects
395 name = 'QSEQ tarfile'
396 file_type_object = file_type_objects.get(name=name)
397 self.assertEqual(u"QSEQ tarfile",
398 unicode(file_type_object))
400 def test_find_file_type(self):
401 file_type_objects = FileType.objects
402 cases = [('woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
403 'QSEQ tarfile', 7, 1),
404 ('woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
406 ('s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
407 ('s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
408 ('s_3_eland_result.txt.bz2','ELAND Result', 3, None),
409 ('s_1_export.txt.bz2','ELAND Export', 1, None),
410 ('s_1_percent_call.png', 'IVC Percent Call', 1, None),
411 ('s_2_percent_base.png', 'IVC Percent Base', 2, None),
412 ('s_3_percent_all.png', 'IVC Percent All', 3, None),
413 ('s_4_call.png', 'IVC Call', 4, None),
414 ('s_5_all.png', 'IVC All', 5, None),
415 ('Summary.htm', 'Summary.htm', None, None),
416 ('run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
418 for filename, typename, lane, end in cases:
419 ft = find_file_type_metadata_from_filename(filename)
420 self.assertEqual(ft['file_type'],
421 file_type_objects.get(name=typename))
422 self.assertEqual(ft.get('lane', None), lane)
423 self.assertEqual(ft.get('end', None), end)
425 def test_assign_file_type_complex_path(self):
426 file_type_objects = FileType.objects
427 cases = [('/a/b/c/woldlab_090921_HWUSI-EAS627_0009_42FC3AAXX_l7_r1.tar.bz2',
428 'QSEQ tarfile', 7, 1),
429 ('foo/woldlab_091005_HWUSI-EAS627_0010_42JT2AAXX_1.srf',
431 ('../s_1_eland_extended.txt.bz2','ELAND Extended', 1, None),
432 ('/bleem/s_7_eland_multi.txt.bz2', 'ELAND Multi', 7, None),
433 ('/qwer/s_3_eland_result.txt.bz2','ELAND Result', 3, None),
434 ('/ty///1/s_1_export.txt.bz2','ELAND Export', 1, None),
435 ('/help/s_1_percent_call.png', 'IVC Percent Call', 1, None),
436 ('/bored/s_2_percent_base.png', 'IVC Percent Base', 2, None),
437 ('/example1/s_3_percent_all.png', 'IVC Percent All', 3, None),
438 ('amonkey/s_4_call.png', 'IVC Call', 4, None),
439 ('fishie/s_5_all.png', 'IVC All', 5, None),
440 ('/random/Summary.htm', 'Summary.htm', None, None),
441 ('/notrandom/run_42JT2AAXX_2009-10-07.xml', 'run_xml', None, None),
443 for filename, typename, lane, end in cases:
444 result = find_file_type_metadata_from_filename(filename)
445 self.assertEqual(result['file_type'],
446 file_type_objects.get(name=typename))
447 self.assertEqual(result.get('lane',None), lane)
448 self.assertEqual(result.get('end', None), end)
450 class TestEmailNotify(TestCase):
451 fixtures = ['initial_data.json',
452 'test_flowcells.json']
454 def test_started_email_not_logged_in(self):
455 response = self.client.get('/experiments/started/153/')
456 self.assertEqual(response.status_code, 302)
458 def test_started_email_logged_in_user(self):
459 self.client.login(username='test', password='BJOKL5kAj6aFZ6A5')
460 response = self.client.get('/experiments/started/153/')
461 self.assertEqual(response.status_code, 302)
463 def test_started_email_logged_in_staff(self):
464 self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
465 response = self.client.get('/experiments/started/153/')
466 self.assertEqual(response.status_code, 200)
468 def test_started_email_send(self):
469 self.client.login(username='admintest', password='BJOKL5kAj6aFZ6A5')
470 response = self.client.get('/experiments/started/153/')
471 self.assertEqual(response.status_code, 200)
473 self.assertTrue('pk1@example.com' in response.content)
474 self.assertTrue('Lane #8 : (11064) Paired ends 104' in response.content)
476 response = self.client.get('/experiments/started/153/', {'send':'1','bcc':'on'})
477 self.assertEqual(response.status_code, 200)
478 self.assertEqual(len(mail.outbox), 4)
479 bcc = set(settings.NOTIFICATION_BCC).copy()
480 bcc.update(set(settings.MANAGERS))
481 for m in mail.outbox:
482 self.assertTrue(len(m.body) > 0)
483 self.assertEqual(set(m.bcc), bcc)
485 def test_email_navigation(self):
487 Can we navigate between the flowcell and email forms properly?
489 self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
490 response = self.client.get('/experiments/started/153/')
491 self.assertEqual(response.status_code, 200)
492 self.assertTrue(re.search('Flowcell FC12150', response.content))
493 # require that navigation back to the admin page exists
494 self.assertTrue(re.search('<a href="/admin/experiments/flowcell/153/">[^<]+</a>', response.content))
496 def multi_lane_to_dict(lane):
497 """Convert a list of lane entries into a dictionary indexed by library ID
499 return dict( ((x['library_id'],x) for x in lane) )
501 class TestSequencer(TestCase):
502 fixtures = ['initial_data.json',
503 'test_flowcells.json',
506 def test_name_generation(self):
509 seq.instrument_name = "HWI-SEQ1"
510 seq.model = "Imaginary 5000"
512 self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
514 def test_lookup(self):
515 fc = FlowCell.objects.get(pk=153)
516 self.assertEqual(fc.sequencer.model,
517 "Illumina Genome Analyzer IIx")
518 self.assertEqual(fc.sequencer.instrument_name,
520 # well actually we let the browser tack on the host name
521 url = fc.get_absolute_url()
522 self.assertEqual(url, '/flowcell/FC12150/')
525 response = self.client.get('/flowcell/FC12150/', apidata)
526 tree = fromstring(response.content)
527 seq_by = tree.xpath('//div[@rel="libns:sequenced_by"]',
529 self.assertEqual(len(seq_by), 1)
530 self.assertEqual(seq_by[0].attrib['rel'], 'libns:sequenced_by')
531 seq = seq_by[0].getchildren()
532 self.assertEqual(len(seq), 1)
533 self.assertEqual(seq[0].attrib['about'], '/sequencer/2')
534 self.assertEqual(seq[0].attrib['typeof'], 'libns:Sequencer')
536 name = seq[0].xpath('./span[@property="libns:sequencer_name"]')
537 self.assertEqual(len(name), 1)
538 self.assertEqual(name[0].text, 'Tardigrade')
539 instrument = seq[0].xpath(
540 './span[@property="libns:sequencer_instrument"]')
541 self.assertEqual(len(instrument), 1)
542 self.assertEqual(instrument[0].text, 'ILLUMINA-EC5D15')
543 model = seq[0].xpath(
544 './span[@property="libns:sequencer_model"]')
545 self.assertEqual(len(model), 1)
546 self.assertEqual(model[0].text, 'Illumina Genome Analyzer IIx')
548 def test_flowcell_with_rdf_validation(self):
549 from htsworkflow.util.rdfhelp import add_default_schemas, \
552 load_string_into_model
553 from htsworkflow.util.rdfinfer import Infer
556 add_default_schemas(model)
557 inference = Infer(model)
559 url ='/flowcell/FC12150/'
560 response = self.client.get(url)
561 self.assertEqual(response.status_code, 200)
562 status = validate_xhtml(response.content)
563 if status is not None: self.assertTrue(status)
565 load_string_into_model(model, 'rdfa', response.content)
567 errmsgs = list(inference.run_validation())
568 self.assertEqual(len(errmsgs), 0)
570 def test_lane_with_rdf_validation(self):
571 from htsworkflow.util.rdfhelp import add_default_schemas, \
574 load_string_into_model
575 from htsworkflow.util.rdfinfer import Infer
578 add_default_schemas(model)
579 inference = Infer(model)
582 response = self.client.get(url)
583 self.assertEqual(response.status_code, 200)
584 status = validate_xhtml(response.content)
585 if status is not None: self.assertTrue(status)
587 load_string_into_model(model, 'rdfa', response.content)
589 errmsgs = list(inference.run_validation())
590 self.assertEqual(len(errmsgs), 0)
593 from unittest import TestSuite, defaultTestLoader
595 for testcase in [ClusterStationTestCases,
601 suite.addTests(defaultTestLoader.loadTestsFromTestCase(testcase))
604 if __name__ == "__main__":
605 from unittest import main
606 main(defaultTest="suite")