b992af95ea3bacfda98fa1570c57f769b519e893
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import logging
3 import os
4 from six.moves import StringIO
5 import shutil
6 import tempfile
7 from unittest import TestCase, TestSuite, defaultTestLoader
8
9 from rdflib import Graph, Namespace, URIRef
10 from rdflib.namespace import RDF
11
12 from htsworkflow.submission import daf, results
13 from htsworkflow.util.rdfns import (
14      dafTermOntology,
15      submissionLog,
16      submissionOntology
17 )
18
19 from htsworkflow.submission.test import test_results
20
21 test_daf = """# Lab and general info
22 grant             Hardison
23 lab               Caltech-m
24 dataType          ChipSeq
25 variables         cell, antibody,sex,age,strain,control
26 compositeSuffix   CaltechHistone
27 assembly          mm9
28 dafVersion        2.0
29 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
30
31 # Track/view definition
32 view             FastqRd1
33 longLabelPrefix  Caltech Fastq Read 1
34 type             fastq
35 hasReplicates    yes
36 required         no
37
38 view             Signal
39 longLabelPrefix  Caltech Histone Signal
40 type             bigWig
41 hasReplicates    yes
42 required         no
43 """
44
45 test_daf_no_rep = """# Lab and general info
46 grant             Hardison
47 lab               Caltech-m
48 dataType          ChipSeq
49 variables         cell, antibody,sex,age,strain,control
50 compositeSuffix   CaltechHistone
51 assembly          mm9
52 dafVersion        2.0
53 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
54
55 # Track/view definition
56 view             FastqRd1
57 longLabelPrefix  Caltech Fastq Read 1
58 type             fastq
59 hasReplicates    no
60 required         no
61 """
62
63 test_daf_extra = """# Lab and general info
64 grant             Hardison
65 lab               Caltech-m
66 dataType          ChipSeq
67 variables         cell,antibody,sex,age,strain
68 extraVariables    controlId,treatment
69 compositeSuffix   CaltechHistone
70 assembly          mm9
71 dafVersion        2.0
72 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
73
74 # Track/view definition
75 view             FastqRd1
76 longLabelPrefix  Caltech Fastq Read 1
77 type             fastq
78 hasReplicates    no
79 required         no
80 """
81
82
83 class TestDAF(TestCase):
84     def test_parse(self):
85
86         parsed = daf.fromstring(test_daf)
87
88         self.failUnlessEqual(parsed['assembly'], 'mm9')
89         self.failUnlessEqual(parsed['grant'], 'Hardison')
90         self.failUnlessEqual(len(parsed['variables']), 6)
91         self.failUnlessEqual(len(parsed['views']), 2)
92         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
93         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
94         signal = parsed['views']['Signal']
95         self.failUnlessEqual(signal['required'], False)
96         self.failUnlessEqual(signal['longLabelPrefix'],
97                              'Caltech Histone Signal')
98
99
100     def test_rdf(self):
101
102         parsed = daf.fromstring(test_daf)
103         model = Graph()
104
105         name = 'cursub'
106         subNS = Namespace(str(submissionLog[name]))
107         daf.add_to_model(model, parsed, submissionLog[name])
108
109         signal_view_node = subNS['/view/Signal']
110
111         turtle = str(model.serialize(format='turtle'))
112
113         self.failUnless(str(signal_view_node) in turtle)
114
115         statements = list(model.triples((signal_view_node, None, None)))
116         self.failUnlessEqual(len(statements), 6)
117         names = list(model.objects(signal_view_node, dafTermOntology['name']))
118         self.assertEqual(len(names), 1)
119         self.failUnlessEqual(names[0].toPython(), u'Signal')
120
121     def test_get_view_namespace_from_string(self):
122         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
123         target = Namespace(url + 'view/')
124         view_namespace = daf.get_view_namespace(url)
125         self.assertEqual(view_namespace[''], target[''])
126
127     def test_get_view_namespace_from_string_no_trailing_slash(self):
128         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
129         target = Namespace(url + '/view/')
130         view_namespace = daf.get_view_namespace(url)
131         self.assertEqual(view_namespace[''], target[''])
132
133     def test_get_view_namespace_from_uri_node(self):
134         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
135         node = URIRef(url)
136         target = Namespace(url + 'view/')
137         view_namespace = daf.get_view_namespace(node)
138         self.assertEqual(view_namespace[''], target[''])
139
140
141 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
142     """Load test model in
143     """
144     model = Graph()
145     if ns is None:
146         ns="http://extra"
147
148     if extra_statements is not None:
149         model.parse(data=extra_statements, format='turtle', publicID=ns)
150
151     test_daf_stream = StringIO(test_daf)
152     mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
153     return mapper
154
155
156 class TestUCSCSubmission(TestCase):
157     def setUp(self):
158         test_results.generate_sample_results_tree(self, 'daf_results')
159
160     def tearDown(self):
161         # see things created by temp_results.generate_sample_results_tree
162         shutil.rmtree(self.tempdir)
163
164     def test_create_mapper_add_pattern(self):
165         name = 'testsub'
166         mapper = load_daf_mapper(name)
167         pattern = '.bam\Z(?ms)'
168         mapper.add_pattern('Signal', pattern)
169
170         s = (mapper.viewNS['Signal'],
171              dafTermOntology['filename_re'],
172              None)
173         search = list(mapper.model.triples(s))
174         self.failUnlessEqual(len(search), 1)
175         self.failUnlessEqual(str(search[0][0]),
176                              str(submissionLog['testsub/view/Signal']))
177         self.failUnlessEqual(str(search[0][1]),
178                              str(dafTermOntology['filename_re']))
179         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
180
181
182     def test_find_one_view(self):
183         name='testfind'
184         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
185 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
186
187 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
188 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
189 '''.format(name)
190         daf_mapper = load_daf_mapper(name, extra_statements = extra)
191
192         view = daf_mapper.find_view('filename_r1.fastq')
193
194         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
195         view_root = view_root.format(name)
196         self.failUnlessEqual(str(view),
197                              '{0}{1}'.format(view_root,'FastqRd1'))
198
199     def test_find_overlapping_view(self):
200         name = 'testfind'
201         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
202 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
203
204 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
205 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
206 '''.format(name)
207         daf_mapper = load_daf_mapper(name, extra_statements = extra)
208
209         self.failUnlessRaises(daf.ModelException,
210                               daf_mapper.find_view,
211                               'filename_r1.fastq')
212
213     def test_find_attributes(self):
214         lib_id = '11204'
215         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
216         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
217 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
218 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
219 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
220
221 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
222       submissionOntology:view_name "Signal" .
223 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
224         submissionOntology:view_name "FastqRd1" .
225 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
226 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
227        'libUrl': lib_url}
228
229         daf_mapper = load_daf_mapper('testfind', extra)
230         libNode = URIRef(lib_url)
231         daf_mapper._add_library_details_to_model(libNode)
232         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
233         # make sure we can override attributes, the value in our
234         # server is 500 for this library
235         self.failUnlessEqual(gel_cut, 100)
236
237         species = daf_mapper._get_library_attribute(libNode, 'species_name')
238         self.failUnlessEqual(species, "Homo sapiens")
239
240         with mktempdir('analysis') as analysis_dir:
241             path, analysis_name = os.path.split(analysis_dir)
242             with mktempfile('.bam', dir=analysis_dir) as filename:
243                 daf_mapper.construct_track_attributes(analysis_dir,
244                                                       libNode,
245                                                       filename)
246
247         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
248         submission_name = sub_root + analysis_name
249         sources = list(daf_mapper.model.subjects(RDF['type'], submissionOntology['submission']))
250         self.assertEqual(len(sources), 1)
251         source = sources[0]
252         self.failUnlessEqual(str(source), submission_name)
253
254         view_name = submission_name + '/Signal'
255         views = list(daf_mapper.model.objects(source, submissionOntology['has_view']))
256         self.assertEqual(len(views), 1)
257         self.failUnlessEqual(str(views[0]), view_name)
258
259
260     def test_library_url(self):
261         daf_mapper = load_daf_mapper('urltest')
262
263         self.failUnlessEqual(daf_mapper.library_url,
264                              'http://jumpgate.caltech.edu/library/')
265         daf_mapper.library_url = 'http://google.com'
266         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
267
268     def test_daf_with_replicate(self):
269         daf_mapper = load_daf_mapper('test_rep')
270         self.failUnlessEqual(daf_mapper.need_replicate(), True)
271         self.failUnless('replicate' in daf_mapper.get_daf_variables())
272
273     def test_daf_without_replicate(self):
274         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
275         self.failUnlessEqual(daf_mapper.need_replicate(), False)
276         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
277
278     def test_daf_with_extra(self):
279         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
280         variables = daf_mapper.get_daf_variables()
281
282         self.assertEqual(len(variables), 11)
283         self.failUnless('treatment' in variables)
284         self.failUnless('controlId' in variables)
285
286
287     def test_link_daf(self):
288         name = 'testsub'
289         submission = load_daf_mapper(name, test_daf=test_daf)
290         result_map = results.ResultMap()
291         result_dir = os.path.join(self.sourcedir,
292                                   test_results.S1_NAME)
293         result_map['1000'] = result_dir
294
295         submission.link_daf(result_map)
296
297         # make sure daf gets linked
298         created_daf = os.path.join(result_dir, name+'.daf')
299         self.failUnless(os.path.exists(created_daf))
300         stream = open(created_daf,'r')
301         daf_body = stream.read()
302         stream.close()
303
304         self.failUnlessEqual(test_daf, daf_body)
305
306
307 @contextmanager
308 def mktempdir(prefix='tmp'):
309     d = tempfile.mkdtemp(prefix=prefix)
310     yield d
311     shutil.rmtree(d)
312
313
314 @contextmanager
315 def mktempfile(suffix='', prefix='tmp', dir=None):
316     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
317     yield pathname
318     os.close(fd)
319     os.unlink(pathname)
320
321 def suite():
322     suite = TestSuite()
323     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestDAF))
324     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestUCSCSubmission))
325     return suite
326
327 if __name__ == "__main__":
328     logging.basicConfig(level=logging.DEBUG)
329     from unittest import main
330     main(defaultTest='suite')