Change unittest2 back into unittest.
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import logging
3 import os
4 from StringIO import StringIO
5 import shutil
6 import tempfile
7 from unittest import TestCase, TestSuite, defaultTestLoader
8
9 from htsworkflow.submission import daf, results
10 from htsworkflow.util.rdfhelp import \
11      dafTermOntology, \
12      fromTypedNode, \
13      rdfNS, \
14      submissionLog, \
15      submissionOntology, \
16      get_model, \
17      get_serializer
18
19 from htsworkflow.submission.test import test_results
20 import RDF
21
22 test_daf = """# Lab and general info
23 grant             Hardison
24 lab               Caltech-m
25 dataType          ChipSeq
26 variables         cell, antibody,sex,age,strain,control
27 compositeSuffix   CaltechHistone
28 assembly          mm9
29 dafVersion        2.0
30 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
31
32 # Track/view definition
33 view             FastqRd1
34 longLabelPrefix  Caltech Fastq Read 1
35 type             fastq
36 hasReplicates    yes
37 required         no
38
39 view             Signal
40 longLabelPrefix  Caltech Histone Signal
41 type             bigWig
42 hasReplicates    yes
43 required         no
44 """
45
46 test_daf_no_rep = """# Lab and general info
47 grant             Hardison
48 lab               Caltech-m
49 dataType          ChipSeq
50 variables         cell, antibody,sex,age,strain,control
51 compositeSuffix   CaltechHistone
52 assembly          mm9
53 dafVersion        2.0
54 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
55
56 # Track/view definition
57 view             FastqRd1
58 longLabelPrefix  Caltech Fastq Read 1
59 type             fastq
60 hasReplicates    no
61 required         no
62 """
63
64 test_daf_extra = """# Lab and general info
65 grant             Hardison
66 lab               Caltech-m
67 dataType          ChipSeq
68 variables         cell,antibody,sex,age,strain
69 extraVariables    controlId,treatment
70 compositeSuffix   CaltechHistone
71 assembly          mm9
72 dafVersion        2.0
73 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
74
75 # Track/view definition
76 view             FastqRd1
77 longLabelPrefix  Caltech Fastq Read 1
78 type             fastq
79 hasReplicates    no
80 required         no
81 """
82
83
84 class TestDAF(TestCase):
85     def test_parse(self):
86
87         parsed = daf.fromstring(test_daf)
88
89         self.failUnlessEqual(parsed['assembly'], 'mm9')
90         self.failUnlessEqual(parsed['grant'], 'Hardison')
91         self.failUnlessEqual(len(parsed['variables']), 6)
92         self.failUnlessEqual(len(parsed['views']), 2)
93         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
94         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
95         signal = parsed['views']['Signal']
96         self.failUnlessEqual(signal['required'], False)
97         self.failUnlessEqual(signal['longLabelPrefix'],
98                              'Caltech Histone Signal')
99
100
101     def test_rdf(self):
102
103         parsed = daf.fromstring(test_daf)
104         #mem = RDF.Storage(storage_name='hashes',
105         #                  options_string='hash-type="memory"'),
106         mem = RDF.MemoryStorage()
107         model = RDF.Model(mem)
108
109         name = 'cursub'
110         subNS = RDF.NS(str(submissionLog[name].uri))
111         daf.add_to_model(model, parsed, submissionLog[name].uri)
112
113         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
114
115         writer = get_serializer()
116         turtle =  writer.serialize_model_to_string(model)
117
118         self.failUnless(str(signal_view_node.uri) in turtle)
119
120         statements = list(model.find_statements(
121             RDF.Statement(
122                 signal_view_node, None, None)))
123         self.failUnlessEqual(len(statements), 6)
124         name = model.get_target(signal_view_node, dafTermOntology['name'])
125         self.failUnlessEqual(fromTypedNode(name), u'Signal')
126
127     def test_get_view_namespace_from_string(self):
128         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
129         target = RDF.NS(url + 'view/')
130         view_namespace = daf.get_view_namespace(url)
131         self.assertEqual(view_namespace[''], target[''])
132
133     def test_get_view_namespace_from_string_no_trailing_slash(self):
134         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
135         target = RDF.NS(url + '/view/')
136         view_namespace = daf.get_view_namespace(url)
137         self.assertEqual(view_namespace[''], target[''])
138
139     def test_get_view_namespace_from_uri_node(self):
140         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
141         node = RDF.Node(RDF.Uri(url))
142         target = RDF.NS(url + 'view/')
143         view_namespace = daf.get_view_namespace(node)
144         self.assertEqual(view_namespace[''], target[''])
145
146
147 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
148     """Load test model in
149     """
150     model = get_model()
151     if ns is None:
152         ns="http://extra"
153
154     if extra_statements is not None:
155         parser = RDF.Parser(name='turtle')
156         parser.parse_string_into_model(model, extra_statements,
157                                        ns)
158
159     test_daf_stream = StringIO(test_daf)
160     mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
161     return mapper
162
163 def dump_model(model):
164     writer = get_serializer()
165     turtle =  writer.serialize_model_to_string(model)
166     print turtle
167
168
169 class TestUCSCSubmission(TestCase):
170     def setUp(self):
171         test_results.generate_sample_results_tree(self, 'daf_results')
172
173     def tearDown(self):
174         # see things created by temp_results.generate_sample_results_tree
175         shutil.rmtree(self.tempdir)
176
177     def test_create_mapper_add_pattern(self):
178         name = 'testsub'
179         mapper = load_daf_mapper(name)
180         pattern = '.bam\Z(?ms)'
181         mapper.add_pattern('Signal', pattern)
182
183         s = RDF.Statement(mapper.viewNS['Signal'],
184                           dafTermOntology['filename_re'],
185                           None)
186         search = list(mapper.model.find_statements(s))
187         self.failUnlessEqual(len(search), 1)
188         self.failUnlessEqual(str(search[0].subject),
189                              str(submissionLog['testsub/view/Signal']))
190         self.failUnlessEqual(str(search[0].predicate),
191                              str(dafTermOntology['filename_re']))
192         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
193
194
195     def test_find_one_view(self):
196         name='testfind'
197         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
198 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
199
200 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
201 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
202 '''.format(name)
203         daf_mapper = load_daf_mapper(name, extra_statements = extra)
204
205         view = daf_mapper.find_view('filename_r1.fastq')
206
207         # dump_model(daf_mapper.model)
208         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
209         view_root = view_root.format(name)
210         self.failUnlessEqual(str(view.uri),
211                              '{0}{1}'.format(view_root,'FastqRd1'))
212
213     def test_find_overlapping_view(self):
214         name = 'testfind'
215         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
216 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
217
218 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
219 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
220 '''.format(name)
221         daf_mapper = load_daf_mapper(name, extra_statements = extra)
222
223         self.failUnlessRaises(daf.ModelException,
224                               daf_mapper.find_view,
225                               'filename_r1.fastq')
226
227     def test_find_attributes(self):
228         lib_id = '11204'
229         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
230         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
231 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
232 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
233 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
234
235 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
236       submissionOntology:view_name "Signal" .
237 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
238         submissionOntology:view_name "FastqRd1" .
239 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
240 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
241        'libUrl': lib_url}
242
243         daf_mapper = load_daf_mapper('testfind', extra)
244         libNode = RDF.Node(RDF.Uri(lib_url))
245         daf_mapper._add_library_details_to_model(libNode)
246         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
247         # make sure we can override attributes, the value in our
248         # server is 500 for this library
249         self.failUnlessEqual(gel_cut, 100)
250
251         species = daf_mapper._get_library_attribute(libNode, 'species_name')
252         self.failUnlessEqual(species, "Homo sapiens")
253
254         with mktempdir('analysis') as analysis_dir:
255             path, analysis_name = os.path.split(analysis_dir)
256             with mktempfile('.bam', dir=analysis_dir) as filename:
257                 daf_mapper.construct_track_attributes(analysis_dir,
258                                                       libNode,
259                                                       filename)
260
261         #dump_model(daf_mapper.model)
262
263         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
264         submission_name = sub_root + analysis_name
265         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
266         self.failUnlessEqual(str(source.uri), submission_name)
267
268         view_name = submission_name + '/Signal'
269         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
270         self.failUnlessEqual(str(view.uri), view_name)
271
272
273     def test_library_url(self):
274         daf_mapper = load_daf_mapper('urltest')
275
276         self.failUnlessEqual(daf_mapper.library_url,
277                              'http://jumpgate.caltech.edu/library/')
278         daf_mapper.library_url = 'http://google.com'
279         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
280
281     def test_daf_with_replicate(self):
282         daf_mapper = load_daf_mapper('test_rep')
283         self.failUnlessEqual(daf_mapper.need_replicate(), True)
284         self.failUnless('replicate' in daf_mapper.get_daf_variables())
285
286     def test_daf_without_replicate(self):
287         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
288         self.failUnlessEqual(daf_mapper.need_replicate(), False)
289         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
290
291     def test_daf_with_extra(self):
292         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
293         variables = daf_mapper.get_daf_variables()
294         self.assertEqual(len(variables), 11)
295         self.failUnless('treatment' in variables)
296         self.failUnless('controlId' in variables)
297
298
299     def test_link_daf(self):
300         name = 'testsub'
301         submission = load_daf_mapper(name, test_daf=test_daf)
302         result_map = results.ResultMap()
303         result_dir = os.path.join(self.sourcedir,
304                                   test_results.S1_NAME)
305         result_map['1000'] = result_dir
306
307         submission.link_daf(result_map)
308
309         # make sure daf gets linked
310         created_daf = os.path.join(result_dir, name+'.daf')
311         self.failUnless(os.path.exists(created_daf))
312         stream = open(created_daf,'r')
313         daf_body = stream.read()
314         stream.close()
315
316         self.failUnlessEqual(test_daf, daf_body)
317
318
319 @contextmanager
320 def mktempdir(prefix='tmp'):
321     d = tempfile.mkdtemp(prefix=prefix)
322     yield d
323     shutil.rmtree(d)
324
325
326 @contextmanager
327 def mktempfile(suffix='', prefix='tmp', dir=None):
328     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
329     yield pathname
330     os.close(fd)
331     os.unlink(pathname)
332
333 def suite():
334     suite = TestSuite()
335     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestDAF))
336     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestUCSCSubmission))
337     return suite
338
339 if __name__ == "__main__":
340     logging.basicConfig(level=logging.DEBUG)
341     from unittest import main
342     main(defaultTest='suite')