1 from __future__ import absolute_import, print_function, unicode_literals
4 from lxml.html import fromstring
10 from six.moves.urllib.parse import urljoin
12 from django.conf import settings
13 from django.core import mail
14 from django.core.exceptions import ObjectDoesNotExist
15 from django.test import TestCase
16 from django.test.utils import setup_test_environment, teardown_test_environment
17 from django.db import connection
18 from django.conf import settings
19 from django.utils.encoding import smart_text
21 from .models import ClusterStation, cluster_station_default, \
22 DataRun, Sequencer, FlowCell, FileType
23 from samples.models import HTSUser
24 from .experiments import flowcell_information, lanes_for
25 from .experiments_factory import ClusterStationFactory, FlowCellFactory, LaneFactory
26 from samples.samples_factory import AffiliationFactory, HTSUserFactory, \
27 LibraryFactory, LibraryTypeFactory, MultiplexIndexFactory
28 from htsworkflow.auth import apidata
29 from htsworkflow.util.ethelp import validate_xhtml
31 from htsworkflow.pipelines.test.simulate_runfolder import TESTDATA_DIR
35 NSMAP = {'libns':'http://jumpgate.caltech.edu/wiki/LibraryOntology#'}
37 from django.db import connection
40 class ExperimentsTestCases(TestCase):
42 # Generate at least one fleshed out example flowcell
43 self.tempdir = tempfile.mkdtemp(prefix='htsw-test-experiments-')
44 settings.RESULT_HOME_DIR = self.tempdir
46 self.password = 'password'
47 self.user_odd = HTSUserFactory(username='user-odd')
48 self.user_odd.set_password(self.password)
49 self.affiliation_odd = AffiliationFactory(name='affiliation-odd', users=[self.user_odd])
50 self.user_even = HTSUserFactory(username='user-even')
51 self.user_even.set_password(self.password)
52 self.affiliation_even = AffiliationFactory(name='affiliation-even', users=[self.user_even])
53 self.admin = HTSUserFactory.create(username='admin', is_staff=True, is_superuser=True)
54 self.admin.set_password(self.password)
57 self.fc12150 = FlowCellFactory(flowcell_id='FC12150')
58 self.fc1_id = 'FC12150'
59 self.fc1_root = os.path.join(self.tempdir, self.fc1_id)
60 os.mkdir(self.fc1_root)
61 self.fc1_dir = os.path.join(self.fc1_root, 'C1-37')
62 os.mkdir(self.fc1_dir)
63 runxml = 'run_FC12150_2007-09-27.xml'
64 shutil.copy(os.path.join(TESTDATA_DIR, runxml),
65 os.path.join(self.fc1_dir, runxml))
67 affiliation = self.affiliation_odd if i % 2 == 1 else self.affiliation_even
68 library = LibraryFactory(id="1215" + str(i))
69 library.affiliations.add(affiliation)
70 lane = LaneFactory(flowcell=self.fc12150, lane_number=i, library=library)
72 os.path.join(TESTDATA_DIR,
73 'woldlab_070829_USI-EAS44_0017_FC11055_1.srf'),
74 os.path.join(self.fc1_dir,
75 'woldlab_070829_SERIAL_FC12150_%d.srf' %(i,))
79 self.fc42jtn = FlowCellFactory(flowcell_id='42JTNAAXX')
80 self.fc42jtn_lanes = []
82 affiliation = self.affiliation_odd if i % 2 == 1 else self.affiliation_even
83 library_type = LibraryTypeFactory(can_multiplex=True)
84 multiplex_index = MultiplexIndexFactory(adapter_type=library_type)
85 library = LibraryFactory(id="1300" + str(i),
86 library_type=library_type,
87 multiplex_id=multiplex_index.multiplex_id)
88 library.affiliations.add(affiliation)
89 lane = LaneFactory(flowcell=self.fc42jtn, lane_number=(i % 2) + 1, library=library)
90 self.fc42jtn_lanes.append(lane)
92 self.fc2_dir = os.path.join(self.tempdir, '42JTNAAXX')
93 os.mkdir(self.fc2_dir)
94 os.mkdir(os.path.join(self.fc2_dir, 'C1-25'))
95 os.mkdir(os.path.join(self.fc2_dir, 'C1-37'))
96 os.mkdir(os.path.join(self.fc2_dir, 'C1-37', 'Plots'))
99 shutil.rmtree(self.tempdir)
101 def test_flowcell_information(self):
103 Check the code that packs the django objects into simple types.
105 fc12150 = self.fc12150
106 fc42jtn = self.fc42jtn
107 fc42ju1 = FlowCellFactory(flowcell_id='42JU1AAXX')
109 for fc_id in ['FC12150', '42JTNAAXX', '42JU1AAXX']:
110 fc_dict = flowcell_information(fc_id)
111 fc_django = FlowCell.objects.get(flowcell_id=fc_id)
112 self.assertEqual(fc_dict['flowcell_id'], fc_id)
113 self.assertEqual(fc_django.flowcell_id, fc_id)
114 self.assertEqual(fc_dict['sequencer'], fc_django.sequencer.name)
115 self.assertEqual(fc_dict['read_length'], fc_django.read_length)
116 self.assertEqual(fc_dict['notes'], fc_django.notes)
117 self.assertEqual(fc_dict['cluster_station'], fc_django.cluster_station.name)
119 for lane in fc_django.lane_set.all():
120 lane_contents = fc_dict['lane_set'][lane.lane_number]
121 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
122 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
123 self.assertEqual(lane_dict['comment'], lane.comment)
124 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
125 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
126 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
127 self.assertEqual(lane_dict['library_id'], lane.library.id)
128 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
129 self.assertEqual(lane_dict['library_species'],
130 lane.library.library_species.scientific_name)
132 response = self.client.get('/experiments/config/%s/json' % (fc_id,), apidata)
133 # strptime isoformat string = '%Y-%m-%dT%H:%M:%S'
134 fc_json = json.loads(smart_text(response.content))['result']
135 self.assertEqual(fc_json['flowcell_id'], fc_id)
136 self.assertEqual(fc_json['sequencer'], fc_django.sequencer.name)
137 self.assertEqual(fc_json['read_length'], fc_django.read_length)
138 self.assertEqual(fc_json['notes'], fc_django.notes)
139 self.assertEqual(fc_json['cluster_station'], fc_django.cluster_station.name)
142 for lane in fc_django.lane_set.all():
143 lane_contents = fc_json['lane_set'][str(lane.lane_number)]
144 lane_dict = multi_lane_to_dict(lane_contents)[lane.library_id]
146 self.assertEqual(lane_dict['cluster_estimate'], lane.cluster_estimate)
147 self.assertEqual(lane_dict['comment'], lane.comment)
148 self.assertEqual(lane_dict['flowcell'], lane.flowcell.flowcell_id)
149 self.assertEqual(lane_dict['lane_number'], lane.lane_number)
150 self.assertEqual(lane_dict['library_name'], lane.library.library_name)
151 self.assertEqual(lane_dict['library_id'], lane.library.id)
152 self.assertAlmostEqual(float(lane_dict['pM']), float(lane.pM))
153 self.assertEqual(lane_dict['library_species'],
154 lane.library.library_species.scientific_name)
156 def test_invalid_flowcell(self):
158 Make sure we get a 404 if we request an invalid flowcell ID
160 response = self.client.get('/experiments/config/nottheone/json', apidata)
161 self.assertEqual(response.status_code, 404)
163 def test_no_key(self):
165 Require logging in to retrieve meta data
167 response = self.client.get('/experiments/config/FC12150/json')
168 self.assertEqual(response.status_code, 403)
170 def test_library_id(self):
172 Library IDs should be flexible, so make sure we can retrive a non-numeric ID
174 response = self.client.get('/experiments/config/FC12150/json', apidata)
175 self.assertEqual(response.status_code, 200)
176 flowcell = json.loads(smart_text(response.content))['result']
178 # library id is 12150 + lane number (1-8), so 12153
179 lane_contents = flowcell['lane_set']['3']
180 lane_library = lane_contents[0]
181 self.assertEqual(lane_library['library_id'], '12153')
183 response = self.client.get('/samples/library/12153/json', apidata)
184 self.assertEqual(response.status_code, 200)
185 library_12153 = json.loads(smart_text(response.content))['result']
187 self.assertEqual(library_12153['library_id'], '12153')
189 def test_raw_id_field(self):
193 Library's have IDs, libraries also have primary keys,
194 we eventually had enough libraries that the drop down combo box was too
195 hard to filter through, unfortnately we want a field that uses our library
196 id and not the internal primary key, and raw_id_field uses primary keys.
198 This tests to make sure that the value entered in the raw library id field matches
199 the library id looked up.
201 expected_ids = [ '1215{}'.format(i) for i in range(1,9) ]
202 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
203 response = self.client.get('/admin/experiments/flowcell/{}/'.format(self.fc12150.id))
205 tree = fromstring(response.content)
207 xpath_expression = '//input[@id="id_lane_set-%d-library"]'
208 input_field = tree.xpath(xpath_expression % (i,))[0]
209 library_field = input_field.find('../strong')
210 library_id, library_name = library_field.text.split(':')
211 # strip leading '#' sign from name
212 library_id = library_id[1:]
213 self.assertEqual(library_id, expected_ids[i])
214 self.assertEqual(input_field.attrib['value'], library_id)
216 def test_library_to_flowcell_link(self):
218 Make sure the library page includes links to the flowcell pages.
219 That work with flowcell IDs that have parenthetical comments.
221 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
222 response = self.client.get('/library/12151/')
223 self.assertEqual(response.status_code, 200)
224 status = validate_xhtml(response.content)
225 if status is not None: self.assertTrue(status)
227 tree = fromstring(response.content)
228 flowcell_spans = tree.xpath('//span[@property="libns:flowcell_id"]',
230 self.assertEqual(flowcell_spans[1].text, 'FC12150')
231 failed_fc_span = flowcell_spans[1]
232 failed_fc_a = failed_fc_span.getparent()
233 # make sure some of our RDF made it.
234 self.assertEqual(failed_fc_a.get('typeof'), 'libns:IlluminaFlowcell')
235 self.assertEqual(failed_fc_a.get('href'), '/flowcell/FC12150/')
236 fc_response = self.client.get(failed_fc_a.get('href'))
237 self.assertEqual(fc_response.status_code, 200)
238 status = validate_xhtml(response.content)
239 if status is not None: self.assertTrue(status)
241 fc_lane_response = self.client.get('/flowcell/FC12150/8/')
242 self.assertEqual(fc_lane_response.status_code, 200)
243 status = validate_xhtml(response.content)
244 if status is not None: self.assertTrue(status)
246 def test_pooled_multiplex_id(self):
247 fc_dict = flowcell_information(self.fc42jtn.flowcell_id)
249 lane_contents = fc_dict['lane_set'][2]
250 self.assertEqual(len(lane_contents), len(self.fc42jtn_lanes) / 2)
251 lane_dict = multi_lane_to_dict(lane_contents)
253 self.assertTrue(self.fc42jtn_lanes[0].library.multiplex_id in \
254 lane_dict['13001']['index_sequence'])
255 self.assertTrue(self.fc42jtn_lanes[2].library.multiplex_id in \
256 lane_dict['13003']['index_sequence'])
258 def test_lanes_for(self):
260 Check the code that packs the django objects into simple types.
262 user = self.user_odd.username
263 lanes = lanes_for(user)
264 self.assertEqual(len(lanes), 8)
266 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
267 lanes_json = json.loads(smart_text(response.content))['result']
268 self.assertEqual(len(lanes), len(lanes_json))
269 for i in range(len(lanes)):
270 self.assertEqual(lanes[i]['comment'], lanes_json[i]['comment'])
271 self.assertEqual(lanes[i]['lane_number'], lanes_json[i]['lane_number'])
272 self.assertEqual(lanes[i]['flowcell'], lanes_json[i]['flowcell'])
273 self.assertEqual(lanes[i]['run_date'], lanes_json[i]['run_date'])
275 def test_lanes_for_no_lanes(self):
277 Do we get something meaningful back when the user isn't attached to anything?
279 user = HTSUserFactory.create(username='supertest')
280 lanes = lanes_for(user.username)
281 self.assertEqual(len(lanes), 0)
283 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
284 self.assertEqual(response.status_code, 404)
286 def test_lanes_for_no_user(self):
288 Do we get something meaningful back when its the wrong user
290 user = 'not a real user'
291 self.assertRaises(ObjectDoesNotExist, lanes_for, user)
293 response = self.client.get('/experiments/lanes_for/%s/json' % (user,), apidata)
294 self.assertEqual(response.status_code, 404)
296 def test_raw_data_dir(self):
297 """Raw data path generator check"""
298 flowcell_id = self.fc1_id
299 raw_dir = os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
301 fc = FlowCell.objects.get(flowcell_id=flowcell_id)
302 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
304 fc.flowcell_id = flowcell_id + " (failed)"
305 self.assertEqual(fc.get_raw_data_directory(), raw_dir)
308 def test_data_run_import(self):
309 srf_file_type = FileType.objects.get(name='SRF')
310 runxml_file_type = FileType.objects.get(name='run_xml')
311 flowcell_id = self.fc1_id
312 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
313 flowcell.update_data_runs()
314 self.assertEqual(len(flowcell.datarun_set.all()), 1)
316 run = flowcell.datarun_set.all()[0]
317 result_files = run.datafile_set.all()
318 result_dict = dict(((rf.relative_pathname, rf) for rf in result_files))
320 srf4 = result_dict['FC12150/C1-37/woldlab_070829_SERIAL_FC12150_4.srf']
321 self.assertEqual(srf4.file_type, srf_file_type)
322 self.assertEqual(srf4.library_id, '12154')
323 self.assertEqual(srf4.data_run.flowcell.flowcell_id, 'FC12150')
325 srf4.data_run.flowcell.lane_set.get(lane_number=4).library_id,
329 os.path.join(settings.RESULT_HOME_DIR, srf4.relative_pathname))
331 lane_files = run.lane_files()
332 self.assertEqual(lane_files[4]['srf'], srf4)
334 runxml= result_dict['FC12150/C1-37/run_FC12150_2007-09-27.xml']
335 self.assertEqual(runxml.file_type, runxml_file_type)
336 self.assertEqual(runxml.library_id, None)
338 import1 = len(DataRun.objects.filter(result_dir='FC12150/C1-37'))
339 # what happens if we import twice?
340 flowcell.import_data_run('FC12150/C1-37',
341 'run_FC12150_2007-09-27.xml')
343 len(DataRun.objects.filter(result_dir='FC12150/C1-37')),
346 def test_read_result_file(self):
347 """make sure we can return a result file
349 flowcell_id = self.fc1_id
350 flowcell = FlowCell.objects.get(flowcell_id=flowcell_id)
351 flowcell.update_data_runs()
353 #self.client.login(username='supertest', password='BJOKL5kAj6aFZ6A5')
355 result_files = flowcell.datarun_set.all()[0].datafile_set.all()
356 for f in result_files:
357 url = '/experiments/file/%s' % ( f.random_key,)
358 response = self.client.get(url)
359 self.assertEqual(response.status_code, 200)
360 mimetype = f.file_type.mimetype
362 mimetype = 'application/octet-stream'
364 self.assertEqual(mimetype, response['content-type'])
366 def test_flowcell_rdf(self):
368 from htsworkflow.util.rdfhelp import get_model, \
370 load_string_into_model, \
377 expected = {'1': ['12151'],
385 url = '/flowcell/{}/'.format(self.fc12150.flowcell_id)
386 response = self.client.get(url)
387 self.assertEqual(response.status_code, 200)
388 status = validate_xhtml(response.content)
389 if status is not None: self.assertTrue(status)
391 ns = urljoin('http://localhost', url)
392 load_string_into_model(model, 'rdfa', smart_text(response.content), ns=ns)
393 body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
394 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
396 select ?flowcell ?flowcell_id ?lane_id ?library_id
398 ?flowcell a libns:IlluminaFlowcell ;
399 libns:flowcell_id ?flowcell_id ;
400 libns:has_lane ?lane .
401 ?lane libns:lane_number ?lane_id ;
402 libns:library ?library .
403 ?library libns:library_id ?library_id .
405 query = RDF.SPARQLQuery(body)
407 for r in query.execute(model):
409 self.assertEqual(fromTypedNode(r['flowcell_id']), 'FC12150')
410 lane_id = fromTypedNode(r['lane_id'])
411 library_id = fromTypedNode(r['library_id'])
412 self.assertTrue(library_id in expected[lane_id])
413 self.assertEqual(count, 8)
415 class TestEmailNotify(TestCase):
417 self.password = 'foo27'
418 self.user = HTSUserFactory.create(username='test')
419 self.user.set_password(self.password)
421 self.admin = HTSUserFactory.create(username='admintest', is_staff=True)
422 self.admin.set_password(self.password)
424 self.super = HTSUserFactory.create(username='supertest', is_staff=True, is_superuser=True)
425 self.super.set_password(self.password)
428 self.library = LibraryFactory.create()
429 self.affiliation = AffiliationFactory()
430 self.affiliation.users.add(self.user)
431 self.library.affiliations.add(self.affiliation)
432 self.fc = FlowCellFactory.create()
433 self.lane = LaneFactory(flowcell=self.fc, lane_number=1, library=self.library)
435 self.url = '/experiments/started/{}/'.format(self.fc.id)
437 def test_started_email_not_logged_in(self):
438 response = self.client.get(self.url)
439 self.assertEqual(response.status_code, 302)
441 def test_started_email_logged_in_user(self):
442 self.assertTrue(self.client.login(username=self.user.username, password=self.password))
443 response = self.client.get(self.url)
444 self.assertEqual(response.status_code, 302)
446 def test_started_email_logged_in_staff(self):
447 self.assertTrue(self.admin.is_staff)
448 admin = HTSUser.objects.get(username=self.admin.username)
449 self.assertTrue(admin.is_staff)
450 self.assertTrue(admin.check_password(self.password))
451 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
452 response = self.client.get(self.url)
453 self.assertEqual(response.status_code, 200)
455 def test_started_email_send(self):
456 self.assertTrue(self.client.login(username=self.admin.username, password=self.password))
457 response = self.client.get(self.url)
458 self.assertEqual(response.status_code, 200)
460 self.assertTrue(self.affiliation.email in smart_text(response.content))
461 self.assertTrue(self.library.library_name in smart_text(response.content))
463 response = self.client.get(self.url, {'send':'1','bcc':'on'})
464 self.assertEqual(response.status_code, 200)
465 self.assertEqual(len(mail.outbox), 2)
466 bcc = set(settings.NOTIFICATION_BCC).copy()
467 bcc.update(set(settings.MANAGERS))
468 for m in mail.outbox:
469 self.assertTrue(len(m.body) > 0)
470 self.assertEqual(set(m.bcc), bcc)
472 def test_email_navigation(self):
474 Can we navigate between the flowcell and email forms properly?
476 admin_url = '/admin/experiments/flowcell/{}/'.format(self.fc.id)
477 self.client.login(username=self.admin.username, password=self.password)
478 response = self.client.get(self.url)
479 self.assertEqual(response.status_code, 200)
480 #print("email navigation content:", response.content)
481 self.assertTrue(re.search(self.fc.flowcell_id, smart_text(response.content)))
482 # require that navigation back to the admin page exists
483 self.assertTrue(re.search('<a href="{}">[^<]+</a>'.format(admin_url),
484 smart_text(response.content)))
486 def multi_lane_to_dict(lane):
487 """Convert a list of lane entries into a dictionary indexed by library ID
489 return dict( ((x['library_id'],x) for x in lane) )
491 class TestSequencer(TestCase):
493 self.fc12150 = FlowCellFactory(flowcell_id='FC12150')
494 self.library = LibraryFactory(id="12150")
495 self.lane = LaneFactory(flowcell=self.fc12150, lane_number=1, library=self.library)
497 def test_name_generation(self):
500 seq.instrument_name = "HWI-SEQ1"
501 seq.model = "Imaginary 5000"
503 self.assertEqual(str(seq), "Seq1 (HWI-SEQ1)")
505 def test_lookup(self):
507 self.assertEqual(fc.sequencer.model, 'HiSeq 1')
508 self.assertTrue(fc.sequencer.instrument_name.startswith('instrument name')),
509 # well actually we let the browser tack on the host name
510 url = fc.get_absolute_url()
511 self.assertEqual(url, '/flowcell/FC12150/')
514 response = self.client.get('/flowcell/FC12150/', apidata)
515 tree = fromstring(response.content)
516 seq_by = tree.xpath('//div[@rel="libns:sequenced_by"]',
518 self.assertEqual(len(seq_by), 1)
519 self.assertEqual(seq_by[0].attrib['rel'], 'libns:sequenced_by')
520 seq = seq_by[0].getchildren()
521 self.assertEqual(len(seq), 1)
522 sequencer = '/sequencer/' + str(self.fc12150.sequencer.id)
523 self.assertEqual(seq[0].attrib['about'], sequencer)
524 self.assertEqual(seq[0].attrib['typeof'], 'libns:Sequencer')
526 name = seq[0].xpath('./span[@property="libns:sequencer_name"]')
527 self.assertEqual(len(name), 1)
528 self.assertTrue(name[0].text.startswith('sequencer '))
529 instrument = seq[0].xpath(
530 './span[@property="libns:sequencer_instrument"]')
531 self.assertEqual(len(instrument), 1)
532 self.assertTrue(instrument[0].text.startswith('instrument name'))
533 model = seq[0].xpath(
534 './span[@property="libns:sequencer_model"]')
535 self.assertEqual(len(model), 1)
536 self.assertEqual(model[0].text, 'HiSeq 1')
538 def test_flowcell_with_rdf_validation(self):
539 from htsworkflow.util.rdfhelp import add_default_schemas, \
542 load_string_into_model
543 from htsworkflow.util.rdfinfer import Infer
546 add_default_schemas(model)
547 inference = Infer(model)
549 url ='/flowcell/FC12150/'
550 response = self.client.get(url)
551 self.assertEqual(response.status_code, 200)
552 status = validate_xhtml(response.content)
553 if status is not None: self.assertTrue(status)
555 load_string_into_model(model, 'rdfa', smart_text(response.content))
557 errmsgs = list(inference.run_validation())
558 self.assertEqual(len(errmsgs), 0)
560 def test_lane_with_rdf_validation(self):
561 from htsworkflow.util.rdfhelp import add_default_schemas, \
564 load_string_into_model
565 from htsworkflow.util.rdfinfer import Infer
568 add_default_schemas(model)
569 inference = Infer(model)
571 url = '/lane/{}'.format(self.lane.id)
572 response = self.client.get(url)
573 rdfbody = smart_text(response.content)
574 self.assertEqual(response.status_code, 200)
575 status = validate_xhtml(rdfbody)
576 if status is not None: self.assertTrue(status)
578 load_string_into_model(model, 'rdfa', rdfbody)
580 errmsgs = list(inference.run_validation())
581 self.assertEqual(len(errmsgs), 0)
584 from unittest import TestSuite, defaultTestLoader
586 for testcase in [ExerimentsTestCases,
589 suite.addTests(defaultTestLoader.loadTestsFromTestCase(testcase))
592 if __name__ == "__main__":
593 from unittest import main
594 main(defaultTest="suite")