remove some commented out code
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import logging
3 import os
4 from six.moves import StringIO
5 import shutil
6 import tempfile
7 from unittest import TestCase, TestSuite, defaultTestLoader
8
9 from htsworkflow.submission import daf, results
10 from htsworkflow.util.rdfhelp import \
11      dafTermOntology, \
12      fromTypedNode, \
13      rdfNS, \
14      submissionLog, \
15      submissionOntology, \
16      get_model, \
17      get_serializer
18
19 from htsworkflow.submission.test import test_results
20 import RDF
21
22 test_daf = """# Lab and general info
23 grant             Hardison
24 lab               Caltech-m
25 dataType          ChipSeq
26 variables         cell, antibody,sex,age,strain,control
27 compositeSuffix   CaltechHistone
28 assembly          mm9
29 dafVersion        2.0
30 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
31
32 # Track/view definition
33 view             FastqRd1
34 longLabelPrefix  Caltech Fastq Read 1
35 type             fastq
36 hasReplicates    yes
37 required         no
38
39 view             Signal
40 longLabelPrefix  Caltech Histone Signal
41 type             bigWig
42 hasReplicates    yes
43 required         no
44 """
45
46 test_daf_no_rep = """# Lab and general info
47 grant             Hardison
48 lab               Caltech-m
49 dataType          ChipSeq
50 variables         cell, antibody,sex,age,strain,control
51 compositeSuffix   CaltechHistone
52 assembly          mm9
53 dafVersion        2.0
54 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
55
56 # Track/view definition
57 view             FastqRd1
58 longLabelPrefix  Caltech Fastq Read 1
59 type             fastq
60 hasReplicates    no
61 required         no
62 """
63
64 test_daf_extra = """# Lab and general info
65 grant             Hardison
66 lab               Caltech-m
67 dataType          ChipSeq
68 variables         cell,antibody,sex,age,strain
69 extraVariables    controlId,treatment
70 compositeSuffix   CaltechHistone
71 assembly          mm9
72 dafVersion        2.0
73 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
74
75 # Track/view definition
76 view             FastqRd1
77 longLabelPrefix  Caltech Fastq Read 1
78 type             fastq
79 hasReplicates    no
80 required         no
81 """
82
83
84 class TestDAF(TestCase):
85     def test_parse(self):
86
87         parsed = daf.fromstring(test_daf)
88
89         self.failUnlessEqual(parsed['assembly'], 'mm9')
90         self.failUnlessEqual(parsed['grant'], 'Hardison')
91         self.failUnlessEqual(len(parsed['variables']), 6)
92         self.failUnlessEqual(len(parsed['views']), 2)
93         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
94         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
95         signal = parsed['views']['Signal']
96         self.failUnlessEqual(signal['required'], False)
97         self.failUnlessEqual(signal['longLabelPrefix'],
98                              'Caltech Histone Signal')
99
100
101     def test_rdf(self):
102
103         parsed = daf.fromstring(test_daf)
104         #mem = RDF.Storage(storage_name='hashes',
105         #                  options_string='hash-type="memory"'),
106         mem = RDF.MemoryStorage()
107         model = RDF.Model(mem)
108
109         name = 'cursub'
110         subNS = RDF.NS(str(submissionLog[name].uri))
111         daf.add_to_model(model, parsed, submissionLog[name].uri)
112
113         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
114
115         writer = get_serializer()
116         turtle =  writer.serialize_model_to_string(model)
117
118         self.failUnless(str(signal_view_node.uri) in turtle)
119
120         statements = list(model.find_statements(
121             RDF.Statement(
122                 signal_view_node, None, None)))
123         self.failUnlessEqual(len(statements), 6)
124         name = model.get_target(signal_view_node, dafTermOntology['name'])
125         self.failUnlessEqual(fromTypedNode(name), u'Signal')
126
127     def test_get_view_namespace_from_string(self):
128         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
129         target = RDF.NS(url + 'view/')
130         view_namespace = daf.get_view_namespace(url)
131         self.assertEqual(view_namespace[''], target[''])
132
133     def test_get_view_namespace_from_string_no_trailing_slash(self):
134         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
135         target = RDF.NS(url + '/view/')
136         view_namespace = daf.get_view_namespace(url)
137         self.assertEqual(view_namespace[''], target[''])
138
139     def test_get_view_namespace_from_uri_node(self):
140         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
141         node = RDF.Node(RDF.Uri(url))
142         target = RDF.NS(url + 'view/')
143         view_namespace = daf.get_view_namespace(node)
144         self.assertEqual(view_namespace[''], target[''])
145
146
147 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
148     """Load test model in
149     """
150     model = get_model()
151     if ns is None:
152         ns="http://extra"
153
154     if extra_statements is not None:
155         parser = RDF.Parser(name='turtle')
156         parser.parse_string_into_model(model, extra_statements,
157                                        ns)
158
159     test_daf_stream = StringIO(test_daf)
160     mapper = daf.UCSCSubmission(name, daf_file = test_daf_stream, model=model)
161     return mapper
162
163
164 class TestUCSCSubmission(TestCase):
165     def setUp(self):
166         test_results.generate_sample_results_tree(self, 'daf_results')
167
168     def tearDown(self):
169         # see things created by temp_results.generate_sample_results_tree
170         shutil.rmtree(self.tempdir)
171
172     def test_create_mapper_add_pattern(self):
173         name = 'testsub'
174         mapper = load_daf_mapper(name)
175         pattern = '.bam\Z(?ms)'
176         mapper.add_pattern('Signal', pattern)
177
178         s = RDF.Statement(mapper.viewNS['Signal'],
179                           dafTermOntology['filename_re'],
180                           None)
181         search = list(mapper.model.find_statements(s))
182         self.failUnlessEqual(len(search), 1)
183         self.failUnlessEqual(str(search[0].subject),
184                              str(submissionLog['testsub/view/Signal']))
185         self.failUnlessEqual(str(search[0].predicate),
186                              str(dafTermOntology['filename_re']))
187         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
188
189
190     def test_find_one_view(self):
191         name='testfind'
192         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
193 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
194
195 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
196 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
197 '''.format(name)
198         daf_mapper = load_daf_mapper(name, extra_statements = extra)
199
200         view = daf_mapper.find_view('filename_r1.fastq')
201
202         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
203         view_root = view_root.format(name)
204         self.failUnlessEqual(str(view.uri),
205                              '{0}{1}'.format(view_root,'FastqRd1'))
206
207     def test_find_overlapping_view(self):
208         name = 'testfind'
209         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
210 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
211
212 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
213 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
214 '''.format(name)
215         daf_mapper = load_daf_mapper(name, extra_statements = extra)
216
217         self.failUnlessRaises(daf.ModelException,
218                               daf_mapper.find_view,
219                               'filename_r1.fastq')
220
221     def test_find_attributes(self):
222         lib_id = '11204'
223         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
224         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
225 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
226 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
227 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
228
229 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
230       submissionOntology:view_name "Signal" .
231 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
232         submissionOntology:view_name "FastqRd1" .
233 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
234 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
235        'libUrl': lib_url}
236
237         daf_mapper = load_daf_mapper('testfind', extra)
238         libNode = RDF.Node(RDF.Uri(lib_url))
239         daf_mapper._add_library_details_to_model(libNode)
240         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
241         # make sure we can override attributes, the value in our
242         # server is 500 for this library
243         self.failUnlessEqual(gel_cut, 100)
244
245         species = daf_mapper._get_library_attribute(libNode, 'species_name')
246         self.failUnlessEqual(species, "Homo sapiens")
247
248         with mktempdir('analysis') as analysis_dir:
249             path, analysis_name = os.path.split(analysis_dir)
250             with mktempfile('.bam', dir=analysis_dir) as filename:
251                 daf_mapper.construct_track_attributes(analysis_dir,
252                                                       libNode,
253                                                       filename)
254
255         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
256         submission_name = sub_root + analysis_name
257         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
258         self.failUnlessEqual(str(source.uri), submission_name)
259
260         view_name = submission_name + '/Signal'
261         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
262         self.failUnlessEqual(str(view.uri), view_name)
263
264
265     def test_library_url(self):
266         daf_mapper = load_daf_mapper('urltest')
267
268         self.failUnlessEqual(daf_mapper.library_url,
269                              'http://jumpgate.caltech.edu/library/')
270         daf_mapper.library_url = 'http://google.com'
271         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
272
273     def test_daf_with_replicate(self):
274         daf_mapper = load_daf_mapper('test_rep')
275         self.failUnlessEqual(daf_mapper.need_replicate(), True)
276         self.failUnless('replicate' in daf_mapper.get_daf_variables())
277
278     def test_daf_without_replicate(self):
279         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
280         self.failUnlessEqual(daf_mapper.need_replicate(), False)
281         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
282
283     def test_daf_with_extra(self):
284         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_extra)
285         variables = daf_mapper.get_daf_variables()
286         self.assertEqual(len(variables), 11)
287         self.failUnless('treatment' in variables)
288         self.failUnless('controlId' in variables)
289
290
291     def test_link_daf(self):
292         name = 'testsub'
293         submission = load_daf_mapper(name, test_daf=test_daf)
294         result_map = results.ResultMap()
295         result_dir = os.path.join(self.sourcedir,
296                                   test_results.S1_NAME)
297         result_map['1000'] = result_dir
298
299         submission.link_daf(result_map)
300
301         # make sure daf gets linked
302         created_daf = os.path.join(result_dir, name+'.daf')
303         self.failUnless(os.path.exists(created_daf))
304         stream = open(created_daf,'r')
305         daf_body = stream.read()
306         stream.close()
307
308         self.failUnlessEqual(test_daf, daf_body)
309
310
311 @contextmanager
312 def mktempdir(prefix='tmp'):
313     d = tempfile.mkdtemp(prefix=prefix)
314     yield d
315     shutil.rmtree(d)
316
317
318 @contextmanager
319 def mktempfile(suffix='', prefix='tmp', dir=None):
320     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
321     yield pathname
322     os.close(fd)
323     os.unlink(pathname)
324
325 def suite():
326     suite = TestSuite()
327     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestDAF))
328     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestUCSCSubmission))
329     return suite
330
331 if __name__ == "__main__":
332     logging.basicConfig(level=logging.DEBUG)
333     from unittest import main
334     main(defaultTest='suite')