update url to new standard url
[htsworkflow.git] / htsworkflow / submission / test / test_encoded.py
1 from __future__ import absolute_import, print_function
2
3 import json
4 import os
5 from pprint import pprint
6 from unittest import TestCase, TestSuite, defaultTestLoader, skip
7
8 from htsworkflow.submission.encoded import (ENCODED,
9      ENCODED_CONTEXT,
10      ENCODED_NAMESPACES
11 )
12
13 class TestEncoded(TestCase):
14     def test_prepare_url(self):
15         encode = ENCODED('www.encodeproject.org')
16
17         tests = [
18             ('/experiments', 'https://www.encodeproject.org/experiments'),
19             ('/experiments/ENCLB045ZZZ',
20              'https://www.encodeproject.org/experiments/ENCLB045ZZZ'),
21             ('https://www.encodeproject.org/experiments/ENCLB045ZZZ',
22              'https://www.encodeproject.org/experiments/ENCLB045ZZZ'),
23         ]
24         for url, result in tests:
25             self.assertEqual(encode.prepare_url(url), result)
26
27     def test_validate(self):
28         """Test validation
29         """
30         schema_file = os.path.join(os.path.dirname(__file__), 'library.json')
31         schema = json.loads(open(schema_file, 'r').read())
32
33         obj = {u'@id': u'/libraries/ENCLB045ZZZ/',
34                u'@type': [u'Library', u'Item'],
35                u'accession': u'ENCLB045ZZZ',
36                u'aliases': [],
37                u'alternate_accessions': [],
38                u'award': u'/awards/U54HG006998/',
39                u'biosample': u'/biosamples/ENCBS089RNA/',
40                u'date_created': u'2014-01-14T19:44:51.061770+00:00',
41                u'depleted_in_term_id': [],
42                u'depleted_in_term_name': [],
43                u'documents': [],
44                u'extraction_method': u'Ambion mirVana',
45                u'fragmentation_method': u'chemical (Nextera tagmentation)',
46                u'lab': u'/labs/barbara-wold/',
47                u'library_size_selection_method': u'SPRI beads',
48                u'lysis_method': u'Ambion mirVana',
49                u'nucleic_acid_term_id': u'SO:0000871',
50                u'nucleic_acid_term_name': u'polyadenylated mRNA',
51                u'schema_version': u'2',
52                u'size_range': u'>200',
53                u'status': u'released',
54                u'strand_specificity': False,
55                u'submitted_by': u'/users/0e3dde9b-aaf9-42dd-87f7-975a85072ed2/',
56                u'treatments': [],
57                u'uuid': u'42c46028-708f-4347-a3df-2c82dfb021c4'}
58         encode = ENCODED('www.encodeproject.org')
59         encode.schemas[u'library'] = schema
60         encode.validate(obj)
61         self.assertTrue('@id' in obj)
62
63     def test_create_context(self):
64         linked_id = {'@type': '@id'}
65         library = { '@id': '/libraries/1234', '@type': ['Library', 'Item'] }
66
67         encode = ENCODED('www.encodeproject.org')
68         url = encode.prepare_url(library['@id'])
69         context = encode.create_jsonld_context(library, url)
70         self.assertEqual(context['@vocab'], 'https://www.encodeproject.org/profiles/Library.json#')
71         self.assertEqual(context['award'], linked_id )
72         self._verify_context(context, 'Library')
73         # namespaces not added yet.
74         self.assertRaises(AssertionError, self._verify_namespaces, context)
75         encode.add_jsonld_namespaces(context)
76         self._verify_namespaces(context)
77
78     def test_add_context(self):
79         """Checking to make sure nested @base and @vocab urls are set correctly
80         """
81         obj = {
82             "nucleic_acid_term_name": "RNA",
83             "accession": "ENCLB044ZZZ",
84             "@id": "/libraries/ENCLB044ZZZ/",
85             "schema_version": "1",
86             "@type": [
87                 "Library",
88                 "Item"
89             ],
90             "lysis_method": "Ambion mirVana",
91             "nucleic_acid_term_id": "SO:0000356",
92             "biosample": {
93                 "biosample_term_name": "GM12878",
94                 "description": "B-lymphocyte, lymphoblastoid, International HapMap Project - CEPH/Utah - European Caucasion, Epstein-Barr Virus",
95                 "accession": "ENCBS090RNA",
96                 "date_created": "2013-10-29T21:15:29.144260+00:00",
97                 "@id": "/biosamples/ENCBS090RNA/",
98                 "aliases": [
99                 "brenton-graveley:GM12878-2",
100                 "thomas-gingeras:191WC"
101                 ],
102                 "organism": "/organisms/human/",
103                 "@type": [
104                 "Biosample",
105                 "Item"
106                 ]
107             },
108         }
109
110         encode = ENCODED('www.encodeproject.org')
111         bio_base = encode.prepare_url(obj['biosample']['@id'])
112
113         url = encode.prepare_url('/libraries/ENCLB044ZZZ/?format=json&embed=False')
114         obj_type = encode.get_object_type(obj)
115         schema_url = encode.get_schema_url(obj_type)
116         encode.add_jsonld_context(obj, url)
117
118         self.assertEqual(obj['biosample']['@context']['@base'], bio_base)
119         self.assertEqual(obj['@context']['@vocab'], schema_url)
120         self._verify_context(obj['@context'], 'Library')
121         self._verify_namespaces(obj['@context'])
122         self._verify_context(obj['biosample']['@context'], 'Biosample')
123         self.assertEqual(obj['@context']['rdf'], 'http://www.w3.org/1999/02/22-rdf-syntax-ns#')
124         self.assertEqual(obj['@context']['OBO'], 'http://purl.obolibrary.org/obo/')
125
126
127     def test_convert_search_to_jsonld(self):
128         example = {'count': {'biosamples': 2},
129                    'portal_title': 'ENCODE',
130                    'title': 'Search',
131                    'notification': 'Success',
132                    'filters': [],
133                    '@id': '/search/?searchTerm=wold',
134                    '@type': ['search'],
135                    'facets': [],
136                     '@graph': [{
137                     u'@id': u'/biosamples/ENCBS125ENC/',
138                     u'@type': [u'Biosample', u'Item'],
139                     u'accession': u'ENCBS125ENC',
140                     u'award.rfa': u'ENCODE2-Mouse',
141                     u'biosample_term_name': u'myocyte',
142                     u'biosample_type': u'in vitro differentiated cells',
143                     u'characterizations.length': [],
144                     u'constructs.length': [],
145                     u'lab.title': u'Barbara Wold, Caltech',
146                     u'life_stage': u'unknown',
147                     u'organism.name': u'mouse',
148                     u'source.title': u'Barbara Wold',
149                     u'status': u'CURRENT',
150                     u'treatments.length': []},
151                     {u'@id': u'/biosamples/ENCBS126ENC/',
152                     u'@type': [u'Biosample', u'Item'],
153                     u'accession': u'ENCBS126ENC',
154                     u'award.rfa': u'ENCODE2-Mouse',
155                     u'biosample_term_name': u'myocyte',
156                     u'biosample_type': u'in vitro differentiated cells',
157                     u'characterizations.length': [],
158                     u'constructs.length': [],
159                     u'lab.title': u'Barbara Wold, Caltech',
160                     u'life_stage': u'unknown',
161                     u'organism.name': u'mouse',
162                     u'source.title': u'Barbara Wold',
163                     u'status': u'CURRENT',
164                     u'treatments.length': []},
165                     ]}
166
167         encode = ENCODED('www.encodeproject.org')
168         result = encode.convert_search_to_jsonld(example)
169         for obj in result['@graph']:
170             self.assertNotIn('award.rfa', obj)
171
172     def _verify_context(self, context, obj_type):
173         for context_key in [None, obj_type]:
174             for k in ENCODED_CONTEXT[context_key]:
175                 self.assertIn(k, context)
176                 self.assertEqual(ENCODED_CONTEXT[context_key][k], context[k])
177
178     def _verify_namespaces(self, context):
179         for k in ENCODED_NAMESPACES:
180             self.assertIn(k, context)
181             self.assertEqual(ENCODED_NAMESPACES[k], context[k])
182
183 def suite():
184     suite = TestSuite()
185     suite.addTests(
186         defaultTestLoader.loadTestsFromTestCase(TestEncoded))
187     return suite
188
189 if __name__ == "__main__":
190     from unittest import main
191     main(defaultTest='suite')