Add support for new "extraVariables" term in DAF.
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 test_daf_no_rep = """# Lab and general info
45 grant             Hardison
46 lab               Caltech-m
47 dataType          ChipSeq
48 variables         cell, antibody,sex,age,strain,control
49 compositeSuffix   CaltechHistone
50 assembly          mm9
51 dafVersion        2.0
52 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
53
54 # Track/view definition
55 view             FastqRd1
56 longLabelPrefix  Caltech Fastq Read 1
57 type             fastq
58 hasReplicates    no
59 required         no
60 """
61
62 test_daf_extra = """# Lab and general info
63 grant             Hardison
64 lab               Caltech-m
65 dataType          ChipSeq
66 variables         cell,antibody,sex,age,strain
67 extraVariables    controlId,treatment
68 compositeSuffix   CaltechHistone
69 assembly          mm9
70 dafVersion        2.0
71 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
72
73 # Track/view definition
74 view             FastqRd1
75 longLabelPrefix  Caltech Fastq Read 1
76 type             fastq
77 hasReplicates    no
78 required         no
79 """
80
81
82 class TestDAF(unittest.TestCase):
83     def test_parse(self):
84
85         parsed = daf.fromstring(test_daf)
86
87         self.failUnlessEqual(parsed['assembly'], 'mm9')
88         self.failUnlessEqual(parsed['grant'], 'Hardison')
89         self.failUnlessEqual(len(parsed['variables']), 6)
90         self.failUnlessEqual(len(parsed['views']), 2)
91         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
92         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
93         signal = parsed['views']['Signal']
94         self.failUnlessEqual(signal['required'], False)
95         self.failUnlessEqual(signal['longLabelPrefix'],
96                              'Caltech Histone Signal')
97
98
99     def test_rdf(self):
100
101         parsed = daf.fromstring(test_daf)
102         #mem = RDF.Storage(storage_name='hashes',
103         #                  options_string='hash-type="memory"'),
104         mem = RDF.MemoryStorage()
105         model = RDF.Model(mem)
106
107         name = 'cursub'
108         subNS = RDF.NS(str(submissionLog[name].uri))
109         daf.add_to_model(model, parsed, submissionLog[name].uri)
110
111         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
112
113         writer = get_serializer()
114         turtle =  writer.serialize_model_to_string(model)
115
116         self.failUnless(str(signal_view_node.uri) in turtle)
117
118         statements = list(model.find_statements(
119             RDF.Statement(
120                 signal_view_node, None, None)))
121         self.failUnlessEqual(len(statements), 6)
122         name = model.get_target(signal_view_node, dafTermOntology['name'])
123         self.failUnlessEqual(fromTypedNode(name), u'Signal')
124
125     def test_get_view_namespace_from_string(self):
126         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
127         target = RDF.NS(url + 'view/')
128         view_namespace = daf.get_view_namespace(url)
129         self.assertEqual(view_namespace[''], target[''])
130
131     def test_get_view_namespace_from_string_no_trailing_slash(self):
132         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
133         target = RDF.NS(url + '/view/')
134         view_namespace = daf.get_view_namespace(url)
135         self.assertEqual(view_namespace[''], target[''])
136
137     def test_get_view_namespace_from_uri_node(self):
138         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
139         node = RDF.Node(RDF.Uri(url))
140         target = RDF.NS(url + 'view/')
141         view_namespace = daf.get_view_namespace(node)
142         self.assertEqual(view_namespace[''], target[''])
143
144
145 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
146     """Load test model in
147     """
148     model = get_model()
149     if ns is None:
150         ns="http://extra"
151
152     if extra_statements is not None:
153         parser = RDF.Parser(name='turtle')
154         parser.parse_string_into_model(model, extra_statements,
155                                        ns)
156
157     test_daf_stream = StringIO(test_daf)
158     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
159     return mapper
160
161 def dump_model(model):
162     writer = get_serializer()
163     turtle =  writer.serialize_model_to_string(model)
164     print turtle
165
166
167 class TestDAFMapper(unittest.TestCase):
168     def test_create_mapper_add_pattern(self):
169         name = 'testsub'
170         mapper = load_daf_mapper(name)
171         pattern = '.bam\Z(?ms)'
172         mapper.add_pattern('Signal', pattern)
173
174         s = RDF.Statement(mapper.viewNS['Signal'],
175                           dafTermOntology['filename_re'],
176                           None)
177         search = list(mapper.model.find_statements(s))
178         self.failUnlessEqual(len(search), 1)
179         self.failUnlessEqual(str(search[0].subject),
180                              str(submissionLog['testsub/view/Signal']))
181         self.failUnlessEqual(str(search[0].predicate),
182                              str(dafTermOntology['filename_re']))
183         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
184
185
186     def test_find_one_view(self):
187         name='testfind'
188         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
189 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
190
191 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
192 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
193 '''.format(name)
194         daf_mapper = load_daf_mapper(name, extra_statements = extra)
195
196         view = daf_mapper.find_view('filename_r1.fastq')
197
198         # dump_model(daf_mapper.model)
199         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
200         view_root = view_root.format(name)
201         self.failUnlessEqual(str(view)[1:-1],
202                              '{0}{1}'.format(view_root,'FastqRd1'))
203
204     def test_find_overlapping_view(self):
205         name = 'testfind'
206         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
207 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
208
209 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
210 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
211 '''.format(name)
212         daf_mapper = load_daf_mapper(name, extra_statements = extra)
213
214         self.failUnlessRaises(daf.ModelException,
215                               daf_mapper.find_view,
216                               'filename_r1.fastq')
217
218     def test_find_attributes(self):
219         lib_id = '11204'
220         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
221         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
222 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
223 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
224 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
225
226 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
227       submissionOntology:view_name "Signal" .
228 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
229         submissionOntology:view_name "FastqRd1" .
230 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
231 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
232        'libUrl': lib_url}
233
234         daf_mapper = load_daf_mapper('testfind', extra)
235         libNode = RDF.Node(RDF.Uri(lib_url))
236         daf_mapper._add_library_details_to_model(libNode)
237         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
238         # make sure we can override attributes, the value in our
239         # server is 500 for this library
240         self.failUnlessEqual(gel_cut, 100)
241
242         species = daf_mapper._get_library_attribute(libNode, 'species')
243         self.failUnlessEqual(species, "Homo sapiens")
244
245         with mktempdir('analysis') as analysis_dir:
246             path, analysis_name = os.path.split(analysis_dir)
247             with mktempfile('.bam', dir=analysis_dir) as filename:
248                 daf_mapper.construct_track_attributes(analysis_dir,
249                                                       libNode,
250                                                       filename)
251
252         #dump_model(daf_mapper.model)
253
254         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
255         submission_name = sub_root + analysis_name
256         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
257         self.failUnlessEqual(str(source.uri), submission_name)
258
259         view_name = submission_name + '/Signal'
260         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
261         self.failUnlessEqual(str(view.uri), view_name)
262
263
264     def test_library_url(self):
265         daf_mapper = load_daf_mapper('urltest')
266
267         self.failUnlessEqual(daf_mapper.library_url,
268                              'http://jumpgate.caltech.edu/library/')
269         daf_mapper.library_url = 'http://google.com'
270         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
271
272     def test_daf_with_replicate(self):
273         daf_mapper = load_daf_mapper('test_rep')
274         self.failUnlessEqual(daf_mapper.need_replicate(), True)
275         self.failUnless('replicate' in daf_mapper.get_daf_variables())
276
277     def test_daf_without_replicate(self):
278         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
279         self.failUnlessEqual(daf_mapper.need_replicate(), False)
280         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
281
282     def test_daf_with_extra(self):
283         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
284         variables = daf_mapper.get_daf_variables()
285         self.assertEqual(len(variables), 9)
286         self.failUnless('treatment' in variables)
287         self.failUnless('controlId' in variables)
288
289
290 @contextmanager
291 def mktempdir(prefix='tmp'):
292     d = tempfile.mkdtemp(prefix=prefix)
293     yield d
294     shutil.rmtree(d)
295
296
297 @contextmanager
298 def mktempfile(suffix='', prefix='tmp', dir=None):
299     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
300     yield pathname
301     os.close(fd)
302     os.unlink(pathname)
303
304
305 def suite():
306     suite = unittest.makeSuite(TestDAF, 'test')
307     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
308     return suite
309
310 if __name__ == "__main__":
311     unittest.main(defaultTest='suite')