537e9cb2f94b1c2cd975de3c0d7c489dceea20ed
[htsworkflow.git] / samples / test_samples.py
1 from __future__ import absolute_import, print_function
2
3 import datetime
4 import json
5 from unittest import skipUnless
6
7 from django.core.exceptions import ValidationError
8 from django.test import TestCase, RequestFactory
9 from django.utils.encoding import smart_text, smart_str, smart_bytes
10
11 from .models import Affiliation, ExperimentType, Species, Library
12 from .views import library_dict
13 from .samples_factory import (
14     AffiliationFactory,
15     LibraryAccessionFactory,
16     LibraryFactory,
17     SpeciesFactory,
18 )
19 from htsworkflow.auth import apidata
20 from htsworkflow.util.conversion import str_or_none
21 from htsworkflow.util.ethelp import validate_xhtml
22
23 try:
24     import RDF
25     HAVE_RDF = True
26
27     rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
28     xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
29     libNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
30
31     from htsworkflow.util.rdfhelp import get_model, \
32         add_default_schemas, \
33         fromTypedNode, \
34         load_string_into_model
35     from htsworkflow.util.rdfinfer import Infer
36 except ImportError as e:
37     HAVE_RDF = False
38
39
40 class LibraryAccessionTestCase(TestCase):
41     def test_validator(self):
42         library = LibraryFactory()
43         acc = LibraryAccessionFactory(library_id=library.id)
44         acc.clean_fields()
45         accession = acc.accession
46         # test a variety of escape characters one at a time
47         for c in "<>'\"&;":
48             acc.accession = accession + c
49             self.assertRaises(ValidationError, acc.clean_fields)
50
51     def test_library_save_hook(self):
52         library = LibraryFactory()
53         acc = LibraryAccessionFactory(library_id=library.id)
54
55         self.assertEquals(acc.url[:len(acc.agency.homepage)],
56                           acc.agency.homepage)
57         self.assertEquals(acc.url[len(acc.agency.homepage):],
58                           '/library/'+acc.accession)
59
60     @skipUnless(HAVE_RDF, "No RDF Support")
61     def test_have_accession(self):
62         library = LibraryFactory()
63         acc = LibraryAccessionFactory(library_id=library.id)
64         lib_response = self.client.get(library.get_absolute_url())
65         lib_content = smart_text(lib_response.content)
66
67         model = get_model()
68         load_string_into_model(model, 'rdfa', lib_content)
69
70         body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
71         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
72
73         select ?library ?accession
74         where {
75            ?library libns:accession ?accession
76         }"""
77         query = RDF.SPARQLQuery(body)
78         accessions = []
79         for row in query.execute(model):
80             accessions.append(str(row['accession']))
81         self.assertEqual(len(accessions), 1)
82         self.assertEqual(accessions[0], acc.url)
83
84
85 class LibraryTestCase(TestCase):
86     def testOrganism(self):
87         human = SpeciesFactory(common_name='human')
88         self.assertEquals(human.common_name, 'human')
89         library = LibraryFactory(library_species=human)
90         self.assertEquals(library.organism(), 'human')
91
92     def testAddingOneAffiliation(self):
93         affiliation = AffiliationFactory.create(name='Alice')
94         library = LibraryFactory()
95         library.affiliations.add(affiliation)
96
97         self.assertEqual(len(library.affiliations.all()), 1)
98         self.assertEqual(library.affiliation(), 'Alice (contact name)')
99
100     def testMultipleAffiliations(self):
101         alice = AffiliationFactory.create(name='Alice')
102         bob = AffiliationFactory.create(name='Bob')
103
104         library = LibraryFactory()
105         library.affiliations.add(alice, bob)
106
107         self.assertEqual(len(library.affiliations.all()), 2)
108         self.assertEqual(library.affiliation(),
109                          'Alice (contact name), Bob (contact name)')
110
111
112 class SampleWebTestCase(TestCase):
113     """
114     Test returning data from our database in rest like ways.
115     (like returning json objects)
116     """
117     def test_library_dict(self):
118         library = LibraryFactory.create()
119         lib_dict = library_dict(library.id)
120         url = '/samples/library/%s/json' % (library.id,)
121         lib_response = self.client.get(url, apidata)
122         lib_json = json.loads(smart_text(lib_response.content))['result']
123
124         for d in [lib_dict, lib_json]:
125             # amplified_from_sample is a link to the library table,
126             # I want to use the "id" for the data lookups not
127             # the embedded primary key.
128             # It gets slightly confusing on how to implement sending the right id
129             # since amplified_from_sample can be null
130             #self.failUnlessEqual(d['amplified_from_sample'], lib.amplified_from_sample)
131             self.failUnlessEqual(d['antibody_id'], library.antibody_id)
132             self.failUnlessEqual(d['cell_line_id'], library.cell_line_id)
133             self.failUnlessEqual(d['cell_line'], str_or_none(library.cell_line))
134             self.failUnlessEqual(d['experiment_type'], library.experiment_type.name)
135             self.failUnlessEqual(d['experiment_type_id'], library.experiment_type_id)
136             self.failUnlessEqual(d['gel_cut_size'], library.gel_cut_size)
137             self.failUnlessEqual(d['hidden'], library.hidden)
138             self.failUnlessEqual(d['id'], library.id)
139             self.failUnlessEqual(d['insert_size'], library.insert_size)
140             self.failUnlessEqual(d['library_name'], library.library_name)
141             self.failUnlessEqual(d['library_species'], library.library_species.scientific_name)
142             self.failUnlessEqual(d['library_species_id'], library.library_species_id)
143             self.failUnlessEqual(d['library_type_id'], library.library_type_id)
144             self.assertTrue(d['library_type'].startswith('library type'))
145             self.failUnlessEqual(d['made_for'], library.made_for)
146             self.failUnlessEqual(d['made_by'], library.made_by)
147             self.failUnlessEqual(d['notes'], library.notes)
148             self.failUnlessEqual(d['replicate'], library.replicate)
149             self.failUnlessEqual(d['stopping_point'], library.stopping_point)
150             self.failUnlessEqual(d['successful_pM'], library.successful_pM)
151             self.failUnlessEqual(d['undiluted_concentration'],
152                                  str(library.undiluted_concentration))
153
154
155         def junk(self):
156                 # some specific tests
157                 if library.id == '10981':
158                     # test a case where there is no known status
159                     lane_set = {u'status': u'Unknown',
160                                 u'paired_end': True,
161                                 u'read_length': 75,
162                                 u'lane_number': 1,
163                                 u'lane_id': 1193,
164                                 u'flowcell': u'303TUAAXX',
165                                 u'status_code': None}
166                     self.failUnlessEqual(len(d['lane_set']), 1)
167                     self.failUnlessEqual(d['lane_set'][0], lane_set)
168                 elif library.id == '11016':
169                     # test a case where there is a status
170                     lane_set = {u'status': 'Good',
171                                 u'paired_end': True,
172                                 u'read_length': 75,
173                                 u'lane_number': 5,
174                                 u'lane_id': 1197,
175                                 u'flowcell': u'303TUAAXX',
176                                 u'status_code': 2}
177                     self.failUnlessEqual(len(d['lane_set']), 1)
178                     self.failUnlessEqual(d['lane_set'][0], lane_set)
179
180
181     def test_invalid_library_json(self):
182         """
183         Make sure we get a 404 if we request an invalid library id
184         """
185         response = self.client.get('/samples/library/nottheone/json', apidata)
186         self.failUnlessEqual(response.status_code, 404)
187
188
189     def test_invalid_library(self):
190         response = self.client.get('/library/nottheone/')
191         self.failUnlessEqual(response.status_code, 404)
192
193
194     def test_library_no_key(self):
195         """
196         Make sure we get a 403 if we're not logged in
197         """
198         library = LibraryFactory.create()
199
200         url = '/samples/library/{}/json'.format(library.id)
201         response = self.client.get(url, apidata)
202         self.failUnlessEqual(response.status_code, 200)
203         response = self.client.get(url)
204         self.failUnlessEqual(response.status_code, 403)
205
206     @skipUnless(HAVE_RDF, "No RDF Support")
207     def test_library_rdf(self):
208         library = LibraryFactory.create()
209
210         model = get_model()
211
212         response = self.client.get(library.get_absolute_url())
213         self.assertEqual(response.status_code, 200)
214         load_string_into_model(model, 'rdfa', smart_text(response.content))
215
216         body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
217         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
218
219         select ?library ?name ?library_id ?gel_cut ?made_by
220         where {
221            ?library a libns:library ;
222                     libns:name ?name ;
223                     libns:library_id ?library_id ;
224                     libns:gel_cut ?gel_cut ;
225                     libns:made_by ?made_by
226         }"""
227         query = RDF.SPARQLQuery(body)
228         for r in query.execute(model):
229             self.assertEqual(fromTypedNode(r['library_id']),
230                              library.id)
231             self.assertEqual(fromTypedNode(r['name']),
232                              library.name)
233             self.assertEqual(fromTypedNode(r['gel_cut']),
234                              library.gel_cut)
235             self.assertEqual(fromTypedNode(r['made_by']),
236                              library.made_by)
237
238         state = validate_xhtml(smart_bytes(response.content))
239         if state is not None:
240             self.assertTrue(state)
241
242         # validate a library page.
243         add_default_schemas(model)
244         inference = Infer(model)
245         errmsgs = list(inference.run_validation())
246         self.assertEqual(len(errmsgs), 0)
247
248     @skipUnless(HAVE_RDF, "No RDF Support")
249     def test_library_index_rdfa(self):
250         model = get_model()
251         add_default_schemas(model)
252         inference = Infer(model)
253
254         response = self.client.get('/library/')
255         self.assertEqual(response.status_code, 200)
256         load_string_into_model(model, 'rdfa', smart_text(response.content))
257
258         errmsgs = list(inference.run_validation())
259         self.assertEqual(len(errmsgs), 0)
260
261         body =  """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
262         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
263
264         select ?library ?library_id ?name ?species ?species_name
265         where {
266            ?library a libns:Library .
267            OPTIONAL { ?library libns:library_id ?library_id . }
268            OPTIONAL { ?library libns:species ?species .
269                       ?species libns:species_name ?species_name . }
270            OPTIONAL { ?library libns:name ?name . }
271         }"""
272         bindings = set(['library', 'library_id', 'name', 'species', 'species_name'])
273         query = RDF.SPARQLQuery(body)
274         count = 0
275         for r in query.execute(model):
276             count += 1
277             for name, value in r.items():
278                 self.assertTrue(name in bindings)
279                 self.assertTrue(value is not None)
280
281         self.assertEqual(count, len(Library.objects.filter(hidden=False)))
282
283         state = validate_xhtml(response.content)
284         if state is not None:
285             self.assertTrue(state)
286
287
288 # The django test runner flushes the database between test suites not cases,
289 # so to be more compatible with running via nose we flush the database tables
290 # of interest before creating our sample data.
291 def create_db(obj):
292     obj.species_human = Species.objects.get(pk=8)
293     obj.experiment_rna_seq = ExperimentType.objects.get(pk=4)
294     obj.affiliation_alice = Affiliation.objects.get(pk=1)
295     obj.affiliation_bob = Affiliation.objects.get(pk=2)
296
297     Library.objects.all().delete()
298     obj.library_10001 = Library(
299         id = "10001",
300         library_name = 'C2C12 named poorly',
301         library_species = obj.species_human,
302         experiment_type = obj.experiment_rna_seq,
303         creation_date = datetime.datetime.now(),
304         made_for = 'scientist unit 2007',
305         made_by = 'microfludics system 7321',
306         stopping_point = '2A',
307         undiluted_concentration = '5.01',
308         hidden = False,
309     )
310     obj.library_10001.save()
311     obj.library_10002 = Library(
312         id = "10002",
313         library_name = 'Worm named poorly',
314         library_species = obj.species_human,
315         experiment_type = obj.experiment_rna_seq,
316         creation_date = datetime.datetime.now(),
317         made_for = 'scientist unit 2007',
318         made_by = 'microfludics system 7321',
319         stopping_point = '2A',
320         undiluted_concentration = '5.01',
321         hidden = False,
322     )
323     obj.library_10002.save()
324
325 @skipUnless(HAVE_RDF, "No RDF Support")
326 class TestRDFaLibrary(TestCase):
327
328     def setUp(self):
329         self.request = RequestFactory()
330
331     def test_parse_rdfa(self):
332
333         model = get_rdf_memory_model()
334         parser = RDF.Parser(name='rdfa')
335
336         bob = AffiliationFactory.create(name='Bob')
337
338         lib_object = LibraryFactory()
339         lib_object.affiliations.add(bob)
340         url = '/library/{}/'.format(lib_object.id)
341         ## request = self.request.get(url)
342         ## lib_response = library(request)
343         lib_response = self.client.get(url)
344         lib_body = smart_str(lib_response.content)
345         self.failIfEqual(len(lib_body), 0)
346         with open('/tmp/body.html', 'wt') as outstream:
347             outstream.write(lib_body)
348
349         parser.parse_string_into_model(model,
350                                        lib_body,
351                                        'http://localhost'+url)
352         # help debugging rdf errrors
353         #with open('/tmp/test.ttl', 'w') as outstream:
354         #    dump_model(model, outstream)
355         # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation>
356         self.check_literal_object(model, ['Bob'], p=libNS['affiliation'])
357         self.check_literal_object(model,
358                                   ['experiment type name'],
359                                   p=libNS['experiment_type'])
360         self.check_literal_object(model, ['400'], p=libNS['gel_cut'])
361         self.check_literal_object(model,
362                                   ['microfluidics bot 7321'],
363                                   p=libNS['made_by'])
364         self.check_literal_object(model,
365                                   [lib_object.library_name],
366                                   p=libNS['name'])
367         self.check_literal_object(model,
368                                   [lib_object.library_species.scientific_name],
369                                   p=libNS['species_name'])
370
371
372     def check_literal_object(self, model, values, s=None, p=None, o=None):
373         statements = list(model.find_statements(
374             RDF.Statement(s,p,o)))
375         self.failUnlessEqual(len(statements), len(values),
376                         "Couln't find %s %s %s" % (s,p,o))
377         for s in statements:
378             self.failUnless(s.object.literal_value['string'] in values)
379
380
381     def check_uri_object(self, model, values, s=None, p=None, o=None):
382         statements = list(model.find_statements(
383             RDF.Statement(s,p,o)))
384         self.failUnlessEqual(len(statements), len(values),
385                         "Couln't find %s %s %s" % (s,p,o))
386         for s in statements:
387             self.failUnless(str(s.object.uri) in values)
388
389
390
391 def get_rdf_memory_model():
392     storage = RDF.MemoryStorage()
393     model = RDF.Model(storage)
394     return model
395
396 def suite():
397     from unittest import TestSuite, defaultTestLoader
398     suite = TestSuite()
399     suite.addTests(defaultTestLoader.loadTestsFromTestCase(LibraryAccessionTestCase))
400     suite.addTests(defaultTestLoader.loadTestsFromTestCase(LibraryTestCase))
401     suite.addTests(defaultTestLoader.loadTestsFromTestCase(SampleWebTestCase))
402     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFaLibrary))
403     return suite
404
405 if __name__ == "__main__":
406     from unittest import main
407     main(defaultTest="suite")