1 from __future__ import absolute_import, print_function
4 from lxml.html import fromstring
7 except ImportError as e:
8 import simplejson as json
13 from urlparse import urljoin
15 from django.conf import settings
16 from django.core import mail
17 from django.core.exceptions import ObjectDoesNotExist
18 from django.test import TestCase
19 from django.test.utils import setup_test_environment, teardown_test_environment
20 from django.db import connection
21 from django.conf import settings
23 from .models import ClusterStation, cluster_station_default, \
24 DataRun, Sequencer, FlowCell, FileType
25 from samples.models import HTSUser
26 from .experiments import flowcell_information, lanes_for
27 from .experiments_factory import ClusterStationFactory, FlowCellFactory, LaneFactory
28 from samples.samples_factory import AffiliationFactory, HTSUserFactory, \
29 LibraryFactory, LibraryTypeFactory, MultiplexIndexFactory
30 from htsworkflow.auth import apidata
31 from htsworkflow.util.ethelp import validate_xhtml
33 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
37 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
39 from django.db import connection
42 class ExperimentsTestCases(TestCase):
44 # Generate at least one fleshed out example flowcell
45 self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
46 settings.RESULT_HOME_DIR = self.tempdir
48 self.password = 'password'
49 self.user_odd = HTSUserFactory(username='user-odd')
50 self.user_odd.set_password(self.password)
51 self.affiliation_odd = AffiliationFactory(name='affiliation-odd', users=[self.user_odd])
52 self.user_even = HTSUserFactory(username='user-even')
53 self.user_even.set_password(self.password)
54 self.affiliation_even = AffiliationFactory(name='affiliation-even', users=[self.user_even])
55 self.admin = HTSUserFactory.create(username='admin', is_staff=True, is_superuser=True)
56 self.admin.set_password(self.password)
59 self.fc12150 = FlowCellFactory(flowcell_id='FC12150')
60 self.fc1_id = 'FC12150'
61 self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
62 os.mkdir(self.fc1_root)
63 self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
64 os.mkdir(self.fc1_dir)
65 runxml = 'run_FC12150_2007-09-27.xml'
66 shutil.copy(os.path.join(TESTDATA_DIR, runxml),
67 os.path.join(self.fc1_dir, runxml))
69 affiliation = self.affiliation_odd if i % 2 == 1 else self.affiliation_even
70 library = LibraryFactory(id="1215" + str(i))
71 library.affiliations.add(affiliation)
72 lane = LaneFactory(flowcell=self.fc12150, lane_number=i, library=library)
74 os.path.join(TESTDATA_DIR,
75 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
76 os.path.join(self.fc1_dir,
77 'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
81 self.fc42jtn = FlowCellFactory(flowcell_id='42JTNAAXX')
82 self.fc42jtn_lanes = []
84 affiliation = self.affiliation_odd if i % 2 == 1 else self.affiliation_even
85 library_type = LibraryTypeFactory(can_multiplex=True)
86 multiplex_index = MultiplexIndexFactory(adapter_type=library_type)
87 library = LibraryFactory(id="1300" + str(i),
88 library_type=library_type,
89 multiplex_id=multiplex_index.multiplex_id)
90 library.affiliations.add(affiliation)
91 lane = LaneFactory(flowcell=self.fc42jtn, lane_number=(i % 2) + 1, library=library)
92 self.fc42jtn_lanes.append(lane)
94 self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
95 os.mkdir(self.fc2_dir)
96 os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
97 os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
98 os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
101 shutil.rmtree(self.tempdir)
103 def test_flowcell_information(self):
105 Check the code that packs the django objects into simple types.
107 fc12150 = self.fc12150
108 fc42jtn = self.fc42jtn
109 fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX')
111 for fc_id in [u'FC12150', u"42JTNAAXX", "42JU1AAXX"]:
112 fc_dict = flowcell_information(fc_id)
113 fc_django = FlowCell.objects.get(flowcell_id=fc_id)
114 self.assertEqual(fc_dict['flowcell_id'], fc_id)
115 self.assertEqual(fc_django.flowcell_id, fc_id)
116 self.assertEqual(fc_dict['sequencer'], fc_django.sequencer.name)
117 self.assertEqual(fc_dict['read_length'], fc_django.read_length)
118 self.assertEqual(fc_dict['notes'], fc_django.notes)
119 self.assertEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
121 for lane in fc_django.lane_set.all():
122 lane_contents = fc_dict['lane_set'][lane.lane_number]
123 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
124 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
125 self.assertEqual(lane_dict['comment'], lane.comment)
126 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
127 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
128 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
129 self.assertEqual(lane_dict['library_id'], lane.library.id)
130 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
131 self.assertEqual(lane_dict['library_species'],
132 lane.library.library_species.scientific_name)
134 response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
135 # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
136 fc_json = json.loads(response.content)['result']
137 self.assertEqual(fc_json['flowcell_id'], fc_id)
138 self.assertEqual(fc_json['sequencer'], fc_django.sequencer.name)
139 self.assertEqual(fc_json['read_length'], fc_django.read_length)
140 self.assertEqual(fc_json['notes'], fc_django.notes)
141 self.assertEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
144 for lane in fc_django.lane_set.all():
145 lane_contents = fc_json['lane_set'][unicode(lane.lane_number)]
146 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
148 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
149 self.assertEqual(lane_dict['comment'], lane.comment)
150 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
151 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
152 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
153 self.assertEqual(lane_dict['library_id'], lane.library.id)
154 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
155 self.assertEqual(lane_dict['library_species'],
156 lane.library.library_species.scientific_name)
158 def test_invalid_flowcell(self):
160 Make sure we get a 404 if we request an invalid flowcell ID
162 response = self.client.get('/experiments/config/nottheone/json', apidata)
163 self.assertEqual(response.status_code, 404)
165 def test_no_key(self):
167 Require logging in to retrieve meta data
169 response = self.client.get(u'/experiments/config/FC12150/json')
170 self.assertEqual(response.status_code, 403)
172 def test_library_id(self):
174 Library IDs should be flexible, so make sure we can retrive a non-numeric ID
176 response = self.client.get('/experiments/config/FC12150/json', apidata)
177 self.assertEqual(response.status_code, 200)
178 flowcell = json.loads(response.content)['result']
180 # library id is 12150 + lane number (1-8), so 12153
181 lane_contents = flowcell['lane_set']['3']
182 lane_library = lane_contents[0]
183 self.assertEqual(lane_library['library_id'], '12153')
185 response = self.client.get('/samples/library/12153/json', apidata)
186 self.assertEqual(response.status_code, 200)
187 library_12153 = json.loads(response.content)['result']
189 self.assertEqual(library_12153['library_id'], '12153')
191 def test_raw_id_field(self):
195 Library's have IDs, libraries also have primary keys,
196 we eventually had enough libraries that the drop down combo box was too
197 hard to filter through, unfortnately we want a field that uses our library
198 id and not the internal primary key, and raw_id_field uses primary keys.
200 This tests to make sure that the value entered in the raw library id field matches
201 the library id looked up.
203 expected_ids = [ u'1215{}'.format(i) for i in range(1,9) ]
204 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
205 response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id))
207 tree = fromstring(response.content)
209 xpath_expression = '//input[@id="id_lane_set-%d-library"]'
210 input_field = tree.xpath(xpath_expression % (i,))[0]
211 library_field = input_field.find('../strong')
212 library_id, library_name = library_field.text.split(':')
213 # strip leading '#' sign from name
214 library_id = library_id[1:]
215 self.assertEqual(library_id, expected_ids[i])
216 self.assertEqual(input_field.attrib['value'], library_id)
218 def test_library_to_flowcell_link(self):
220 Make sure the library page includes links to the flowcell pages.
221 That work with flowcell IDs that have parenthetical comments.
223 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
224 response = self.client.get('/library/12151/')
225 self.assertEqual(response.status_code, 200)
226 status = validate_xhtml(response.content)
227 if status is not None: self.assertTrue(status)
229 tree = fromstring(response.content)
230 flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]',
232 self.assertEqual(flowcell_spans[1].text, 'FC12150')
233 failed_fc_span = flowcell_spans[1]
234 failed_fc_a = failed_fc_span.getparent()
235 # make sure some of our RDF made it.
236 self.assertEqual(failed_fc_a.get('typeof'), 'libns:IlluminaFlowcell')
237 self.assertEqual(failed_fc_a.get('href'), '/flowcell/FC12150/')
238 fc_response = self.client.get(failed_fc_a.get('href'))
239 self.assertEqual(fc_response.status_code, 200)
240 status = validate_xhtml(response.content)
241 if status is not None: self.assertTrue(status)
243 fc_lane_response = self.client.get('/flowcell/FC12150/8/')
244 self.assertEqual(fc_lane_response.status_code, 200)
245 status = validate_xhtml(response.content)
246 if status is not None: self.assertTrue(status)
248 def test_pooled_multiplex_id(self):
249 fc_dict = flowcell_information(self.fc42jtn.flowcell_id)
251 lane_contents = fc_dict['lane_set'][2]
252 self.assertEqual(len(lane_contents), len(self.fc42jtn_lanes) / 2)
253 lane_dict = multi_lane_to_dict(lane_contents)
255 self.assertTrue(self.fc42jtn_lanes[0].library.multiplex_id in \
256 lane_dict['13001']['index_sequence'])
257 self.assertTrue(self.fc42jtn_lanes[2].library.multiplex_id in \
258 lane_dict['13003']['index_sequence'])
260 def test_lanes_for(self):
262 Check the code that packs the django objects into simple types.
264 user = self.user_odd.username
265 lanes = lanes_for(user)
266 self.assertEqual(len(lanes), 8)
268 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
269 lanes_json = json.loads(response.content)['result']
270 self.assertEqual(len(lanes), len(lanes_json))
271 for i in range(len(lanes)):
272 self.assertEqual(lanes[i]['comment'], lanes_json[i]['comment'])
273 self.assertEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
274 self.assertEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
275 self.assertEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
277 def test_lanes_for_no_lanes(self):
279 Do we get something meaningful back when the user isn't attached to anything?
281 user = HTSUserFactory.create(username='supertest')
282 lanes = lanes_for(user.username)
283 self.assertEqual(len(lanes), 0)
285 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
286 self.assertEqual(response.status_code, 404)
288 def test_lanes_for_no_user(self):
290 Do we get something meaningful back when its the wrong user
292 user = 'not a real user'
293 self.assertRaises(ObjectDoesNotExist, lanes_for, user)
295 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
296 self.assertEqual(response.status_code, 404)
299 def test_raw_data_dir(self):
300 """Raw data path generator check"""
301 flowcell_id = self.fc1_id
302 raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
304 fc = FlowCell.objects.get(flowcell_id=flowcell_id)
305 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
307 fc.flowcell_id = flowcell_id + " (failed)"
308 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
311 def test_data_run_import(self):
312 srf_file_type = FileType.objects.get(name='SRF')
313 runxml_file_type = FileType.objects.get(name='run_xml')
314 flowcell_id = self.fc1_id
315 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
316 flowcell.update_data_runs()
317 self.assertEqual(len(flowcell.datarun_set.all()), 1)
319 run = flowcell.datarun_set.all()[0]
320 result_files = run.datafile_set.all()
321 result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
323 srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
324 self.assertEqual(srf4.file_type, srf_file_type)
325 self.assertEqual(srf4.library_id, '12154')
326 self.assertEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
328 srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
332 os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
334 lane_files = run.lane_files()
335 self.assertEqual(lane_files[4]['srf'], srf4)
337 runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
338 self.assertEqual(runxml.file_type, runxml_file_type)
339 self.assertEqual(runxml.library_id, None)
341 import1 = len(DataRun.objects.filter(result_dir='FC12150/C1-37'))
342 # what happens if we import twice?
343 flowcell.import_data_run('FC12150/C1-37',
344 'run_FC12150_2007-09-27.xml')
346 len(DataRun.objects.filter(result_dir='FC12150/C1-37')),
349 def test_read_result_file(self):
350 """make sure we can return a result file
352 flowcell_id = self.fc1_id
353 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
354 flowcell.update_data_runs()
356 #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
358 result_files = flowcell.datarun_set.all()[0].datafile_set.all()
359 for f in result_files:
360 url = '/experiments/file/%s' % ( f.random_key,)
361 response = self.client.get(url)
362 self.assertEqual(response.status_code, 200)
363 mimetype = f.file_type.mimetype
365 mimetype = 'application/octet-stream'
367 self.assertEqual(mimetype, response['content-type'])
369 def test_flowcell_rdf(self):
371 from htsworkflow.util.rdfhelp import get_model, \
373 load_string_into_model, \
380 expected = {'1': ['12151'],
388 url = '/flowcell/{}/'.format(self.fc12150.flowcell_id)
389 response = self.client.get(url)
390 self.assertEqual(response.status_code, 200)
391 status = validate_xhtml(response.content)
392 if status is not None: self.assertTrue(status)
394 ns = urljoin('http://localhost', url)
395 load_string_into_model(model, 'rdfa', response.content, ns=ns)
396 body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
397 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
399 select ?flowcell ?flowcell_id ?lane_id ?library_id
401 ?flowcell a libns:IlluminaFlowcell ;
402 libns:flowcell_id ?flowcell_id ;
403 libns:has_lane ?lane .
404 ?lane libns:lane_number ?lane_id ;
405 libns:library ?library .
406 ?library libns:library_id ?library_id .
408 query = RDF.SPARQLQuery(body)
410 for r in query.execute(model):
412 self.assertEqual(fromTypedNode(r['flowcell_id']), u'FC12150')
413 lane_id = fromTypedNode(r['lane_id'])
414 library_id = fromTypedNode(r['library_id'])
415 self.assertTrue(library_id in expected[lane_id])
416 self.assertEqual(count, 8)
418 class TestEmailNotify(TestCase):
420 self.password = 'foo27'
421 self.user = HTSUserFactory.create(username='test')
422 self.user.set_password(self.password)
424 self.admin = HTSUserFactory.create(username='admintest', is_staff=True)
425 self.admin.set_password(self.password)
427 self.super = HTSUserFactory.create(username='supertest', is_staff=True, is_superuser=True)
428 self.super.set_password(self.password)
431 self.library = LibraryFactory.create()
432 self.affiliation = AffiliationFactory()
433 self.affiliation.users.add(self.user)
434 self.library.affiliations.add(self.affiliation)
435 self.fc = FlowCellFactory.create()
436 self.lane = LaneFactory(flowcell=self.fc, lane_number=1, library=self.library)
438 self.url = '/experiments/started/{}/'.format(self.fc.id)
440 def test_started_email_not_logged_in(self):
441 response = self.client.get(self.url)
442 self.assertEqual(response.status_code, 302)
444 def test_started_email_logged_in_user(self):
445 self.assertTrue(self.client.login(username=self.user.username, password=self.password))
446 response = self.client.get(self.url)
447 self.assertEqual(response.status_code, 302)
449 def test_started_email_logged_in_staff(self):
450 self.assertTrue(self.admin.is_staff)
451 admin = HTSUser.objects.get(username=self.admin.username)
452 self.assertTrue(admin.is_staff)
453 self.assertTrue(admin.check_password(self.password))
454 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
455 response = self.client.get(self.url)
456 self.assertEqual(response.status_code, 200)
458 def test_started_email_send(self):
459 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
460 response = self.client.get(self.url)
461 self.assertEqual(response.status_code, 200)
463 self.assertTrue(self.affiliation.email in response.content)
464 self.assertTrue(self.library.library_name in response.content)
466 response = self.client.get(self.url, {'send':'1','bcc':'on'})
467 self.assertEqual(response.status_code, 200)
468 self.assertEqual(len(mail.outbox), 2)
469 bcc = set(settings.NOTIFICATION_BCC).copy()
470 bcc.update(set(settings.MANAGERS))
471 for m in mail.outbox:
472 self.assertTrue(len(m.body) > 0)
473 self.assertEqual(set(m.bcc), bcc)
475 def test_email_navigation(self):
477 Can we navigate between the flowcell and email forms properly?
479 admin_url = '/admin/experiments/flowcell/{}/'.format(self.fc.id)
480 self.client.login(username=self.admin.username, password=self.password)
481 response = self.client.get(self.url)
482 self.assertEqual(response.status_code, 200)
483 #print("email navigation content:", response.content)
484 self.assertTrue(re.search(self.fc.flowcell_id, response.content))
485 # require that navigation back to the admin page exists
486 self.assertTrue(re.search('<a href="{}">[^<]+</a>'.format(admin_url), response.content))
488 def multi_lane_to_dict(lane):
489 """Convert a list of lane entries into a dictionary indexed by library ID
491 return dict( ((x['library_id'],x) for x in lane) )
493 class TestSequencer(TestCase):
495 self.fc12150 = FlowCellFactory(flowcell_id='FC12150')
496 self.library = LibraryFactory(id="12150")
497 self.lane = LaneFactory(flowcell=self.fc12150, lane_number=1, library=self.library)
499 def test_name_generation(self):
502 seq.instrument_name = "HWI-SEQ1"
503 seq.model = "Imaginary 5000"
505 self.assertEqual(unicode(seq), "Seq1 (HWI-SEQ1)")
507 def test_lookup(self):
509 self.assertEqual(fc.sequencer.model, 'HiSeq 1')
510 self.assertTrue(fc.sequencer.instrument_name.startswith('instrument name')),
511 # well actually we let the browser tack on the host name
512 url = fc.get_absolute_url()
513 self.assertEqual(url, '/flowcell/FC12150/')
516 response = self.client.get('/flowcell/FC12150/', apidata)
517 tree = fromstring(response.content)
518 seq_by = tree.xpath('//div[@rel="libns:sequenced_by"]',
520 self.assertEqual(len(seq_by), 1)
521 self.assertEqual(seq_by[0].attrib['rel'], 'libns:sequenced_by')
522 seq = seq_by[0].getchildren()
523 self.assertEqual(len(seq), 1)
524 sequencer = '/sequencer/' + str(self.fc12150.sequencer.id)
525 self.assertEqual(seq[0].attrib['about'], sequencer)
526 self.assertEqual(seq[0].attrib['typeof'], 'libns:Sequencer')
528 name = seq[0].xpath('./span[@property="libns:sequencer_name"]')
529 self.assertEqual(len(name), 1)
530 self.assertTrue(name[0].text.startswith('sequencer '))
531 instrument = seq[0].xpath(
532 './span[@property="libns:sequencer_instrument"]')
533 self.assertEqual(len(instrument), 1)
534 self.assertTrue(instrument[0].text.startswith('instrument name'))
535 model = seq[0].xpath(
536 './span[@property="libns:sequencer_model"]')
537 self.assertEqual(len(model), 1)
538 self.assertEqual(model[0].text, 'HiSeq 1')
540 def test_flowcell_with_rdf_validation(self):
541 from htsworkflow.util.rdfhelp import add_default_schemas, \
544 load_string_into_model
545 from htsworkflow.util.rdfinfer import Infer
548 add_default_schemas(model)
549 inference = Infer(model)
551 url ='/flowcell/FC12150/'
552 response = self.client.get(url)
553 self.assertEqual(response.status_code, 200)
554 status = validate_xhtml(response.content)
555 if status is not None: self.assertTrue(status)
557 load_string_into_model(model, 'rdfa', response.content)
559 errmsgs = list(inference.run_validation())
560 self.assertEqual(len(errmsgs), 0)
562 def test_lane_with_rdf_validation(self):
563 from htsworkflow.util.rdfhelp import add_default_schemas, \
566 load_string_into_model
567 from htsworkflow.util.rdfinfer import Infer
570 add_default_schemas(model)
571 inference = Infer(model)
573 url = '/lane/{}'.format(self.lane.id)
574 response = self.client.get(url)
575 self.assertEqual(response.status_code, 200)
576 status = validate_xhtml(response.content)
577 if status is not None: self.assertTrue(status)
579 load_string_into_model(model, 'rdfa', response.content)
581 errmsgs = list(inference.run_validation())
582 self.assertEqual(len(errmsgs), 0)
585 from unittest import TestSuite, defaultTestLoader
587 for testcase in [ExerimentsTestCases,
590 suite.addTests(defaultTestLoader.loadTestsFromTestCase(testcase))
593 if __name__ == "__main__":
594 from unittest import main
595 main(defaultTest="suite")