elpy+flake8 suggested some formatting improvements
[htsworkflow.git] / samples / test_samples.py
1 from __future__ import absolute_import, print_function
2
3 import datetime
4 import json
5 from unittest import skipUnless
6
7 from django.core.exceptions import ValidationError
8 from django.test import TestCase, RequestFactory
9 from django.utils.encoding import smart_text, smart_str
10
11 from .models import Affiliation, ExperimentType, Species, Library
12 from .views import library_dict
13 from .samples_factory import (
14     AffiliationFactory,
15     LibraryAccessionFactory,
16     LibraryFactory,
17     SpeciesFactory,
18 )
19 from htsworkflow.auth import apidata
20 from htsworkflow.util.conversion import str_or_none
21 from htsworkflow.util.ethelp import validate_xhtml
22
23 try:
24     import RDF
25     HAVE_RDF = True
26
27     rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
28     xsdNS = RDF.NS("http://www.w3.org/2001/XMLSchema#")
29     libNS = RDF.NS("http://jumpgate.caltech.edu/wiki/LibraryOntology#")
30
31     from htsworkflow.util.rdfhelp import get_model, \
32         add_default_schemas, \
33         fromTypedNode, \
34         load_string_into_model
35     from htsworkflow.util.rdfinfer import Infer
36 except ImportError as e:
37     HAVE_RDF = False
38
39
40 class LibraryAccessionTestCase(TestCase):
41     def test_validator(self):
42         library = LibraryFactory()
43         acc = LibraryAccessionFactory(library_id=library.id)
44         acc.clean_fields()
45         accession = acc.accession
46         # test a variety of escape characters one at a time
47         for c in "<>'\"&;":
48             acc.accession = accession + c
49             self.assertRaises(ValidationError, acc.clean_fields)
50
51     def test_library_save_hook(self):
52         library = LibraryFactory()
53         acc = LibraryAccessionFactory(library_id=library.id)
54
55         self.assertEquals(acc.url[:len(acc.agency.homepage)],
56                           acc.agency.homepage)
57         self.assertEquals(acc.url[len(acc.agency.homepage):],
58                           '/library/'+acc.accession)
59
60     @skipUnless(HAVE_RDF, "No RDF Support")
61     def test_have_accession(self):
62         library = LibraryFactory()
63         acc = LibraryAccessionFactory(library_id=library.id)
64         lib_response = self.client.get(library.get_absolute_url())
65         lib_content = smart_text(lib_response.content)
66
67         model = get_model()
68         load_string_into_model(model, 'rdfa', lib_content)
69
70         body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
71         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
72
73         select ?library ?accession
74         where {
75            ?library libns:accession ?accession
76         }"""
77         query = RDF.SPARQLQuery(body)
78         accessions = []
79         for row in query.execute(model):
80             accessions.append(str(row['accession']))
81         self.assertEqual(len(accessions), 1)
82         self.assertEqual(accessions[0], acc.url)
83
84
85 class LibraryTestCase(TestCase):
86     def testOrganism(self):
87         human = SpeciesFactory(common_name='human')
88         self.assertEquals(human.common_name, 'human')
89         library = LibraryFactory(library_species=human)
90         self.assertEquals(library.organism(), 'human')
91
92     def testAddingOneAffiliation(self):
93         affiliation = AffiliationFactory.create(name='Alice')
94         library = LibraryFactory()
95         library.affiliations.add(affiliation)
96
97         self.assertEqual(len(library.affiliations.all()), 1)
98         self.assertEqual(library.affiliation(), 'Alice (contact name)')
99
100     def testMultipleAffiliations(self):
101         alice = AffiliationFactory.create(name='Alice')
102         bob = AffiliationFactory.create(name='Bob')
103
104         library = LibraryFactory()
105         library.affiliations.add(alice, bob)
106
107         self.assertEqual(len(library.affiliations.all()), 2)
108         self.assertEqual(library.affiliation(),
109                          'Alice (contact name), Bob (contact name)')
110
111
112 class SampleWebTestCase(TestCase):
113     """
114     Test returning data from our database in rest like ways.
115     (like returning json objects)
116     """
117     def test_library_dict(self):
118         library = LibraryFactory.create()
119         lib_dict = library_dict(library.id)
120         url = '/samples/library/%s/json' % (library.id,)
121         lib_response = self.client.get(url, apidata)
122         lib_json = json.loads(smart_text(lib_response.content))['result']
123
124         for d in [lib_dict, lib_json]:
125             # amplified_from_sample is a link to the library table,
126             # I want to use the "id" for the data lookups not
127             # the embedded primary key.
128             # It gets slightly confusing on how to implement sending the right id
129             # since amplified_from_sample can be null
130             #self.failUnlessEqual(d['amplified_from_sample'], lib.amplified_from_sample)
131             self.failUnlessEqual(d['antibody_id'], library.antibody_id)
132             self.failUnlessEqual(d['cell_line_id'], library.cell_line_id)
133             self.failUnlessEqual(d['cell_line'], str_or_none(library.cell_line))
134             self.failUnlessEqual(d['experiment_type'], library.experiment_type.name)
135             self.failUnlessEqual(d['experiment_type_id'], library.experiment_type_id)
136             self.failUnlessEqual(d['gel_cut_size'], library.gel_cut_size)
137             self.failUnlessEqual(d['hidden'], library.hidden)
138             self.failUnlessEqual(d['id'], library.id)
139             self.failUnlessEqual(d['insert_size'], library.insert_size)
140             self.failUnlessEqual(d['library_name'], library.library_name)
141             self.failUnlessEqual(d['library_species'], library.library_species.scientific_name)
142             self.failUnlessEqual(d['library_species_id'], library.library_species_id)
143             self.failUnlessEqual(d['library_type_id'], library.library_type_id)
144             self.assertTrue(d['library_type'].startswith('library type'))
145             self.failUnlessEqual(d['made_for'], library.made_for)
146             self.failUnlessEqual(d['made_by'], library.made_by)
147             self.failUnlessEqual(d['notes'], library.notes)
148             self.failUnlessEqual(d['replicate'], library.replicate)
149             self.failUnlessEqual(d['stopping_point'], library.stopping_point)
150             self.failUnlessEqual(d['successful_pM'], library.successful_pM)
151             self.failUnlessEqual(d['undiluted_concentration'],
152                                  str(library.undiluted_concentration))
153
154
155         def junk(self):
156                 # some specific tests
157                 if library.id == '10981':
158                     # test a case where there is no known status
159                     lane_set = {u'status': u'Unknown',
160                                 u'paired_end': True,
161                                 u'read_length': 75,
162                                 u'lane_number': 1,
163                                 u'lane_id': 1193,
164                                 u'flowcell': u'303TUAAXX',
165                                 u'status_code': None}
166                     self.failUnlessEqual(len(d['lane_set']), 1)
167                     self.failUnlessEqual(d['lane_set'][0], lane_set)
168                 elif library.id == '11016':
169                     # test a case where there is a status
170                     lane_set = {u'status': 'Good',
171                                 u'paired_end': True,
172                                 u'read_length': 75,
173                                 u'lane_number': 5,
174                                 u'lane_id': 1197,
175                                 u'flowcell': u'303TUAAXX',
176                                 u'status_code': 2}
177                     self.failUnlessEqual(len(d['lane_set']), 1)
178                     self.failUnlessEqual(d['lane_set'][0], lane_set)
179
180
181     def test_invalid_library_json(self):
182         """
183         Make sure we get a 404 if we request an invalid library id
184         """
185         response = self.client.get('/samples/library/nottheone/json', apidata)
186         self.failUnlessEqual(response.status_code, 404)
187
188
189     def test_invalid_library(self):
190         response = self.client.get('/library/nottheone/')
191         self.failUnlessEqual(response.status_code, 404)
192
193
194     def test_library_no_key(self):
195         """
196         Make sure we get a 403 if we're not logged in
197         """
198         library = LibraryFactory.create()
199
200         url = '/samples/library/{}/json'.format(library.id)
201         response = self.client.get(url, apidata)
202         self.failUnlessEqual(response.status_code, 200)
203         response = self.client.get(url)
204         self.failUnlessEqual(response.status_code, 403)
205
206     @skipUnless(HAVE_RDF, "No RDF Support")
207     def test_library_rdf(self):
208         library = LibraryFactory.create()
209
210         model = get_model()
211
212         response = self.client.get(library.get_absolute_url())
213         self.assertEqual(response.status_code, 200)
214         content = smart_text(response.content)
215         load_string_into_model(model, 'rdfa', content)
216
217         body = """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
218         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
219
220         select ?library ?name ?library_id ?gel_cut ?made_by
221         where {
222            ?library a libns:library ;
223                     libns:name ?name ;
224                     libns:library_id ?library_id ;
225                     libns:gel_cut ?gel_cut ;
226                     libns:made_by ?made_by
227         }"""
228         query = RDF.SPARQLQuery(body)
229         for r in query.execute(model):
230             self.assertEqual(fromTypedNode(r['library_id']),
231                              library.id)
232             self.assertEqual(fromTypedNode(r['name']),
233                              library.name)
234             self.assertEqual(fromTypedNode(r['gel_cut']),
235                              library.gel_cut)
236             self.assertEqual(fromTypedNode(r['made_by']),
237                              library.made_by)
238
239         state = validate_xhtml(content)
240         if state is not None:
241             self.assertTrue(state)
242
243         # validate a library page.
244         add_default_schemas(model)
245         inference = Infer(model)
246         errmsgs = list(inference.run_validation())
247         self.assertEqual(len(errmsgs), 0)
248
249     @skipUnless(HAVE_RDF, "No RDF Support")
250     def test_library_index_rdfa(self):
251         model = get_model()
252         add_default_schemas(model)
253         inference = Infer(model)
254
255         response = self.client.get('/library/')
256         self.assertEqual(response.status_code, 200)
257         load_string_into_model(model, 'rdfa', smart_text(response.content))
258
259         errmsgs = list(inference.run_validation())
260         self.assertEqual(len(errmsgs), 0)
261
262         body =  """prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
263         prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
264
265         select ?library ?library_id ?name ?species ?species_name
266         where {
267            ?library a libns:Library .
268            OPTIONAL { ?library libns:library_id ?library_id . }
269            OPTIONAL { ?library libns:species ?species .
270                       ?species libns:species_name ?species_name . }
271            OPTIONAL { ?library libns:name ?name . }
272         }"""
273         bindings = set(['library', 'library_id', 'name', 'species', 'species_name'])
274         query = RDF.SPARQLQuery(body)
275         count = 0
276         for r in query.execute(model):
277             count += 1
278             for name, value in r.items():
279                 self.assertTrue(name in bindings)
280                 self.assertTrue(value is not None)
281
282         self.assertEqual(count, len(Library.objects.filter(hidden=False)))
283
284         state = validate_xhtml(response.content)
285         if state is not None:
286             self.assertTrue(state)
287
288
289 # The django test runner flushes the database between test suites not cases,
290 # so to be more compatible with running via nose we flush the database tables
291 # of interest before creating our sample data.
292 def create_db(obj):
293     obj.species_human = Species.objects.get(pk=8)
294     obj.experiment_rna_seq = ExperimentType.objects.get(pk=4)
295     obj.affiliation_alice = Affiliation.objects.get(pk=1)
296     obj.affiliation_bob = Affiliation.objects.get(pk=2)
297
298     Library.objects.all().delete()
299     obj.library_10001 = Library(
300         id = "10001",
301         library_name = 'C2C12 named poorly',
302         library_species = obj.species_human,
303         experiment_type = obj.experiment_rna_seq,
304         creation_date = datetime.datetime.now(),
305         made_for = 'scientist unit 2007',
306         made_by = 'microfludics system 7321',
307         stopping_point = '2A',
308         undiluted_concentration = '5.01',
309         hidden = False,
310     )
311     obj.library_10001.save()
312     obj.library_10002 = Library(
313         id = "10002",
314         library_name = 'Worm named poorly',
315         library_species = obj.species_human,
316         experiment_type = obj.experiment_rna_seq,
317         creation_date = datetime.datetime.now(),
318         made_for = 'scientist unit 2007',
319         made_by = 'microfludics system 7321',
320         stopping_point = '2A',
321         undiluted_concentration = '5.01',
322         hidden = False,
323     )
324     obj.library_10002.save()
325
326 @skipUnless(HAVE_RDF, "No RDF Support")
327 class TestRDFaLibrary(TestCase):
328
329     def setUp(self):
330         self.request = RequestFactory()
331
332     def test_parse_rdfa(self):
333
334         model = get_rdf_memory_model()
335         parser = RDF.Parser(name='rdfa')
336
337         bob = AffiliationFactory.create(name='Bob')
338
339         lib_object = LibraryFactory()
340         lib_object.affiliations.add(bob)
341         url = '/library/{}/'.format(lib_object.id)
342         ## request = self.request.get(url)
343         ## lib_response = library(request)
344         lib_response = self.client.get(url)
345         lib_body = smart_str(lib_response.content)
346         self.failIfEqual(len(lib_body), 0)
347         with open('/tmp/body.html', 'wt') as outstream:
348             outstream.write(lib_body)
349
350         parser.parse_string_into_model(model,
351                                        lib_body,
352                                        'http://localhost'+url)
353         # help debugging rdf errrors
354         #with open('/tmp/test.ttl', 'w') as outstream:
355         #    dump_model(model, outstream)
356         # http://jumpgate.caltech.edu/wiki/LibraryOntology#affiliation>
357         self.check_literal_object(model, ['Bob'], p=libNS['affiliation'])
358         self.check_literal_object(model,
359                                   ['experiment type name'],
360                                   p=libNS['experiment_type'])
361         self.check_literal_object(model, ['400'], p=libNS['gel_cut'])
362         self.check_literal_object(model,
363                                   ['microfluidics bot 7321'],
364                                   p=libNS['made_by'])
365         self.check_literal_object(model,
366                                   [lib_object.library_name],
367                                   p=libNS['name'])
368         self.check_literal_object(model,
369                                   [lib_object.library_species.scientific_name],
370                                   p=libNS['species_name'])
371
372
373     def check_literal_object(self, model, values, s=None, p=None, o=None):
374         statements = list(model.find_statements(
375             RDF.Statement(s,p,o)))
376         self.failUnlessEqual(len(statements), len(values),
377                         "Couln't find %s %s %s" % (s,p,o))
378         for s in statements:
379             self.failUnless(s.object.literal_value['string'] in values)
380
381
382     def check_uri_object(self, model, values, s=None, p=None, o=None):
383         statements = list(model.find_statements(
384             RDF.Statement(s,p,o)))
385         self.failUnlessEqual(len(statements), len(values),
386                         "Couln't find %s %s %s" % (s,p,o))
387         for s in statements:
388             self.failUnless(str(s.object.uri) in values)
389
390
391
392 def get_rdf_memory_model():
393     storage = RDF.MemoryStorage()
394     model = RDF.Model(storage)
395     return model
396
397 def suite():
398     from unittest import TestSuite, defaultTestLoader
399     suite = TestSuite()
400     suite.addTests(defaultTestLoader.loadTestsFromTestCase(LibraryAccessionTestCase))
401     suite.addTests(defaultTestLoader.loadTestsFromTestCase(LibraryTestCase))
402     suite.addTests(defaultTestLoader.loadTestsFromTestCase(SampleWebTestCase))
403     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestRDFaLibrary))
404     return suite
405
406 if __name__ == "__main__":
407     from unittest import main
408     main(defaultTest="suite")