Initial port to python3
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from io import StringIO
4 import shutil
5 import tempfile
6 from unittest import TestCase, TestSuite, defaultTestLoader
7
8 from htsworkflow.submission import daf, results
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 from htsworkflow.submission.test import test_results
19 import RDF
20
21 test_daf = """# Lab and general info
22 grant             Hardison
23 lab               Caltech-m
24 dataType          ChipSeq
25 variables         cell, antibody,sex,age,strain,control
26 compositeSuffix   CaltechHistone
27 assembly          mm9
28 dafVersion        2.0
29 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
30
31 # Track/view definition
32 view             FastqRd1
33 longLabelPrefix  Caltech Fastq Read 1
34 type             fastq
35 hasReplicates    yes
36 required         no
37
38 view             Signal
39 longLabelPrefix  Caltech Histone Signal
40 type             bigWig
41 hasReplicates    yes
42 required         no
43 """
44
45 test_daf_no_rep = """# Lab and general info
46 grant             Hardison
47 lab               Caltech-m
48 dataType          ChipSeq
49 variables         cell, antibody,sex,age,strain,control
50 compositeSuffix   CaltechHistone
51 assembly          mm9
52 dafVersion        2.0
53 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
54
55 # Track/view definition
56 view             FastqRd1
57 longLabelPrefix  Caltech Fastq Read 1
58 type             fastq
59 hasReplicates    no
60 required         no
61 """
62
63 test_daf_extra = """# Lab and general info
64 grant             Hardison
65 lab               Caltech-m
66 dataType          ChipSeq
67 variables         cell,antibody,sex,age,strain
68 extraVariables    controlId,treatment
69 compositeSuffix   CaltechHistone
70 assembly          mm9
71 dafVersion        2.0
72 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
73
74 # Track/view definition
75 view             FastqRd1
76 longLabelPrefix  Caltech Fastq Read 1
77 type             fastq
78 hasReplicates    no
79 required         no
80 """
81
82
83 class TestDAF(TestCase):
84     def test_parse(self):
85
86         parsed = daf.fromstring(test_daf)
87
88         self.failUnlessEqual(parsed['assembly'], 'mm9')
89         self.failUnlessEqual(parsed['grant'], 'Hardison')
90         self.failUnlessEqual(len(parsed['variables']), 6)
91         self.failUnlessEqual(len(parsed['views']), 2)
92         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
93         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
94         signal = parsed['views']['Signal']
95         self.failUnlessEqual(signal['required'], False)
96         self.failUnlessEqual(signal['longLabelPrefix'],
97                              'Caltech Histone Signal')
98
99
100     def test_rdf(self):
101
102         parsed = daf.fromstring(test_daf)
103         #mem = RDF.Storage(storage_name='hashes',
104         #                  options_string='hash-type="memory"'),
105         mem = RDF.MemoryStorage()
106         model = RDF.Model(mem)
107
108         name = 'cursub'
109         subNS = RDF.NS(str(submissionLog[name].uri))
110         daf.add_to_model(model, parsed, submissionLog[name].uri)
111
112         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
113
114         writer = get_serializer()
115         turtle =  writer.serialize_model_to_string(model)
116
117         self.failUnless(str(signal_view_node.uri) in turtle)
118
119         statements = list(model.find_statements(
120             RDF.Statement(
121                 signal_view_node, None, None)))
122         self.failUnlessEqual(len(statements), 6)
123         name = model.get_target(signal_view_node, dafTermOntology['name'])
124         self.failUnlessEqual(fromTypedNode(name), 'Signal')
125
126     def test_get_view_namespace_from_string(self):
127         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
128         target = RDF.NS(url + 'view/')
129         view_namespace = daf.get_view_namespace(url)
130         self.assertEqual(view_namespace[''], target[''])
131
132     def test_get_view_namespace_from_string_no_trailing_slash(self):
133         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
134         target = RDF.NS(url + '/view/')
135         view_namespace = daf.get_view_namespace(url)
136         self.assertEqual(view_namespace[''], target[''])
137
138     def test_get_view_namespace_from_uri_node(self):
139         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
140         node = RDF.Node(RDF.Uri(url))
141         target = RDF.NS(url + 'view/')
142         view_namespace = daf.get_view_namespace(node)
143         self.assertEqual(view_namespace[''], target[''])
144
145
146 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
147     """Load test model in
148     """
149     model = get_model()
150     if ns is None:
151         ns="http://extra"
152
153     if extra_statements is not None:
154         parser = RDF.Parser(name='turtle')
155         parser.parse_string_into_model(model, extra_statements,
156                                        ns)
157
158     test_daf_stream = StringIO(test_daf)
159     mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
160     return mapper
161
162 def dump_model(model):
163     writer = get_serializer()
164     turtle =  writer.serialize_model_to_string(model)
165     print(turtle)
166
167
168 class TestUCSCSubmission(TestCase):
169     def setUp(self):
170         test_results.generate_sample_results_tree(self)
171
172     def tearDown(self):
173         # see things created by temp_results.generate_sample_results_tree
174         shutil.rmtree(self.tempdir)
175
176     def test_create_mapper_add_pattern(self):
177         name = 'testsub'
178         mapper = load_daf_mapper(name)
179         pattern = '.bam\Z(?ms)'
180         mapper.add_pattern('Signal', pattern)
181
182         s = RDF.Statement(mapper.viewNS['Signal'],
183                           dafTermOntology['filename_re'],
184                           None)
185         search = list(mapper.model.find_statements(s))
186         self.failUnlessEqual(len(search), 1)
187         self.failUnlessEqual(str(search[0].subject),
188                              str(submissionLog['testsub/view/Signal']))
189         self.failUnlessEqual(str(search[0].predicate),
190                              str(dafTermOntology['filename_re']))
191         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
192
193
194     def test_find_one_view(self):
195         name='testfind'
196         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
197 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
198
199 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
200 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
201 '''.format(name)
202         daf_mapper = load_daf_mapper(name, extra_statements = extra)
203
204         view = daf_mapper.find_view('filename_r1.fastq')
205
206         # dump_model(daf_mapper.model)
207         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
208         view_root = view_root.format(name)
209         self.failUnlessEqual(str(view.uri),
210                              '{0}{1}'.format(view_root,'FastqRd1'))
211
212     def test_find_overlapping_view(self):
213         name = 'testfind'
214         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
215 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
216
217 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
218 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
219 '''.format(name)
220         daf_mapper = load_daf_mapper(name, extra_statements = extra)
221
222         self.failUnlessRaises(daf.ModelException,
223                               daf_mapper.find_view,
224                               'filename_r1.fastq')
225
226     def test_find_attributes(self):
227         lib_id = '11204'
228         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
229         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
230 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
231 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
232 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
233
234 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
235       submissionOntology:view_name "Signal" .
236 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
237         submissionOntology:view_name "FastqRd1" .
238 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
239 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
240        'libUrl': lib_url}
241
242         daf_mapper = load_daf_mapper('testfind', extra)
243         libNode = RDF.Node(RDF.Uri(lib_url))
244         daf_mapper._add_library_details_to_model(libNode)
245         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
246         # make sure we can override attributes, the value in our
247         # server is 500 for this library
248         self.failUnlessEqual(gel_cut, 100)
249
250         species = daf_mapper._get_library_attribute(libNode, 'species_name')
251         self.failUnlessEqual(species, "Homo sapiens")
252
253         with mktempdir('analysis') as analysis_dir:
254             path, analysis_name = os.path.split(analysis_dir)
255             with mktempfile('.bam', dir=analysis_dir) as filename:
256                 daf_mapper.construct_track_attributes(analysis_dir,
257                                                       libNode,
258                                                       filename)
259
260         #dump_model(daf_mapper.model)
261
262         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
263         submission_name = sub_root + analysis_name
264         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
265         self.failUnlessEqual(str(source.uri), submission_name)
266
267         view_name = submission_name + '/Signal'
268         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
269         self.failUnlessEqual(str(view.uri), view_name)
270
271
272     def test_library_url(self):
273         daf_mapper = load_daf_mapper('urltest')
274
275         self.failUnlessEqual(daf_mapper.library_url,
276                              'http://jumpgate.caltech.edu/library/')
277         daf_mapper.library_url = 'http://google.com'
278         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
279
280     def test_daf_with_replicate(self):
281         daf_mapper = load_daf_mapper('test_rep')
282         self.failUnlessEqual(daf_mapper.need_replicate(), True)
283         self.failUnless('replicate' in daf_mapper.get_daf_variables())
284
285     def test_daf_without_replicate(self):
286         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
287         self.failUnlessEqual(daf_mapper.need_replicate(), False)
288         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
289
290     def test_daf_with_extra(self):
291         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
292         variables = daf_mapper.get_daf_variables()
293         self.assertEqual(len(variables), 11)
294         self.failUnless('treatment' in variables)
295         self.failUnless('controlId' in variables)
296
297
298     def test_link_daf(self):
299         name = 'testsub'
300         submission = load_daf_mapper(name, test_daf=test_daf)
301         result_map = results.ResultMap()
302         result_dir = os.path.join(self.sourcedir,
303                                   test_results.S1_NAME)
304         result_map['1000'] = result_dir
305
306         submission.link_daf(result_map)
307
308         # make sure daf gets linked
309         created_daf = os.path.join(result_dir, name+'.daf')
310         self.failUnless(os.path.exists(created_daf))
311         stream = open(created_daf,'r')
312         daf_body = stream.read()
313         stream.close()
314
315         self.failUnlessEqual(test_daf, daf_body)
316
317
318 @contextmanager
319 def mktempdir(prefix='tmp'):
320     d = tempfile.mkdtemp(prefix=prefix)
321     yield d
322     shutil.rmtree(d)
323
324
325 @contextmanager
326 def mktempfile(suffix='', prefix='tmp', dir=None):
327     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
328     yield pathname
329     os.close(fd)
330     os.unlink(pathname)
331
332 def suite():
333     suite = TestSuite()
334     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestUCSCInfo))
335     return suite
336
337 if __name__ == "__main__":
338     from unittest import main
339     main(defaultTest='suite')