whitespace fixes
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import logging
3 import os
4 from six.moves import StringIO
5 import shutil
6 import tempfile
7 from unittest import TestCase, TestSuite, defaultTestLoader
8
9 from rdflib import Graph, Namespace, URIRef
10 from rdflib.namespace import RDF
11
12 from htsworkflow.submission import daf, results
13 from htsworkflow.util.rdfns import (
14      dafTermOntology,
15      submissionLog,
16      submissionOntology
17 )
18
19 from htsworkflow.submission.test import test_results
20
21 test_daf = """# Lab and general info
22 grant             Hardison
23 lab               Caltech-m
24 dataType          ChipSeq
25 variables         cell, antibody,sex,age,strain,control
26 compositeSuffix   CaltechHistone
27 assembly          mm9
28 dafVersion        2.0
29 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
30
31 # Track/view definition
32 view             FastqRd1
33 longLabelPrefix  Caltech Fastq Read 1
34 type             fastq
35 hasReplicates    yes
36 required         no
37
38 view             Signal
39 longLabelPrefix  Caltech Histone Signal
40 type             bigWig
41 hasReplicates    yes
42 required         no
43 """
44
45 test_daf_no_rep = """# Lab and general info
46 grant             Hardison
47 lab               Caltech-m
48 dataType          ChipSeq
49 variables         cell, antibody,sex,age,strain,control
50 compositeSuffix   CaltechHistone
51 assembly          mm9
52 dafVersion        2.0
53 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
54
55 # Track/view definition
56 view             FastqRd1
57 longLabelPrefix  Caltech Fastq Read 1
58 type             fastq
59 hasReplicates    no
60 required         no
61 """
62
63 test_daf_extra = """# Lab and general info
64 grant             Hardison
65 lab               Caltech-m
66 dataType          ChipSeq
67 variables         cell,antibody,sex,age,strain
68 extraVariables    controlId,treatment
69 compositeSuffix   CaltechHistone
70 assembly          mm9
71 dafVersion        2.0
72 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
73
74 # Track/view definition
75 view             FastqRd1
76 longLabelPrefix  Caltech Fastq Read 1
77 type             fastq
78 hasReplicates    no
79 required         no
80 """
81
82
83 class TestDAF(TestCase):
84     def test_parse(self):
85         parsed = daf.fromstring(test_daf)
86
87         self.failUnlessEqual(parsed['assembly'], 'mm9')
88         self.failUnlessEqual(parsed['grant'], 'Hardison')
89         self.failUnlessEqual(len(parsed['variables']), 6)
90         self.failUnlessEqual(len(parsed['views']), 2)
91         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
92         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
93         signal = parsed['views']['Signal']
94         self.failUnlessEqual(signal['required'], False)
95         self.failUnlessEqual(signal['longLabelPrefix'],
96                              'Caltech Histone Signal')
97
98     def test_rdf(self):
99
100         parsed = daf.fromstring(test_daf)
101         model = Graph()
102
103         name = 'cursub'
104         subNS = Namespace(str(submissionLog[name]))
105         daf.add_to_model(model, parsed, submissionLog[name])
106
107         signal_view_node = subNS['/view/Signal']
108
109         turtle = str(model.serialize(format='turtle'))
110
111         self.failUnless(str(signal_view_node) in turtle)
112
113         statements = list(model.triples((signal_view_node, None, None)))
114         self.failUnlessEqual(len(statements), 6)
115         names = list(model.objects(signal_view_node, dafTermOntology['name']))
116         self.assertEqual(len(names), 1)
117         self.failUnlessEqual(names[0].toPython(), u'Signal')
118
119     def test_get_view_namespace_from_string(self):
120         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
121         target = Namespace(url + 'view/')
122         view_namespace = daf.get_view_namespace(url)
123         self.assertEqual(view_namespace[''], target[''])
124
125     def test_get_view_namespace_from_string_no_trailing_slash(self):
126         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
127         target = Namespace(url + '/view/')
128         view_namespace = daf.get_view_namespace(url)
129         self.assertEqual(view_namespace[''], target[''])
130
131     def test_get_view_namespace_from_uri_node(self):
132         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
133         node = URIRef(url)
134         target = Namespace(url + 'view/')
135         view_namespace = daf.get_view_namespace(node)
136         self.assertEqual(view_namespace[''], target[''])
137
138
139 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
140     """Load test model in
141     """
142     model = Graph()
143     if ns is None:
144         ns = "http://extra"
145
146     if extra_statements is not None:
147         model.parse(data=extra_statements, format='turtle', publicID=ns)
148
149     test_daf_stream = StringIO(test_daf)
150     mapper = daf.UCSCSubmission(name, daf_file=test_daf_stream, model=model)
151     return mapper
152
153
154 class TestUCSCSubmission(TestCase):
155     def setUp(self):
156         test_results.generate_sample_results_tree(self, 'daf_results')
157
158     def tearDown(self):
159         # see things created by temp_results.generate_sample_results_tree
160         shutil.rmtree(self.tempdir)
161
162     def test_create_mapper_add_pattern(self):
163         name = 'testsub'
164         mapper = load_daf_mapper(name)
165         pattern = '.bam\Z(?ms)'
166         mapper.add_pattern('Signal', pattern)
167
168         s = (mapper.viewNS['Signal'],
169              dafTermOntology['filename_re'],
170              None)
171         search = list(mapper.model.triples(s))
172         self.failUnlessEqual(len(search), 1)
173         self.failUnlessEqual(str(search[0][0]),
174                              str(submissionLog['testsub/view/Signal']))
175         self.failUnlessEqual(str(search[0][1]),
176                              str(dafTermOntology['filename_re']))
177         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
178
179
180     def test_find_one_view(self):
181         name='testfind'
182         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
183 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
184
185 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
186 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
187 '''.format(name)
188         daf_mapper = load_daf_mapper(name, extra_statements=extra)
189
190         view = daf_mapper.find_view('filename_r1.fastq')
191
192         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
193         view_root = view_root.format(name)
194         self.failUnlessEqual(str(view),
195                              '{0}{1}'.format(view_root, 'FastqRd1'))
196
197     def test_find_overlapping_view(self):
198         name = 'testfind'
199         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
200 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
201
202 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
203 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
204 '''.format(name)
205         daf_mapper = load_daf_mapper(name, extra_statements=extra)
206
207         self.failUnlessRaises(daf.ModelException,
208                               daf_mapper.find_view,
209                               'filename_r1.fastq')
210
211     def test_find_attributes(self):
212         lib_id = '11204'
213         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
214         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
215 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
216 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
217 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
218
219 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
220       submissionOntology:view_name "Signal" .
221 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
222         submissionOntology:view_name "FastqRd1" .
223 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
224 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
225        'libUrl': lib_url}
226
227         daf_mapper = load_daf_mapper('testfind', extra)
228         libNode = URIRef(lib_url)
229         daf_mapper._add_library_details_to_model(libNode)
230         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
231         # make sure we can override attributes, the value in our
232         # server is 500 for this library
233         self.failUnlessEqual(gel_cut, 100)
234
235         species = daf_mapper._get_library_attribute(libNode, 'species_name')
236         self.failUnlessEqual(species, "Homo sapiens")
237
238         with mktempdir('analysis') as analysis_dir:
239             path, analysis_name = os.path.split(analysis_dir)
240             with mktempfile('.bam', dir=analysis_dir) as filename:
241                 daf_mapper.construct_track_attributes(analysis_dir,
242                                                       libNode,
243                                                       filename)
244
245         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
246         submission_name = sub_root + analysis_name
247         sources = list(daf_mapper.model.subjects(RDF['type'], submissionOntology['submission']))
248         self.assertEqual(len(sources), 1)
249         source = sources[0]
250         self.failUnlessEqual(str(source), submission_name)
251
252         view_name = submission_name + '/Signal'
253         views = list(daf_mapper.model.objects(source, submissionOntology['has_view']))
254         self.assertEqual(len(views), 1)
255         self.failUnlessEqual(str(views[0]), view_name)
256
257     def test_library_url(self):
258         daf_mapper = load_daf_mapper('urltest')
259
260         self.failUnlessEqual(daf_mapper.library_url,
261                              'http://jumpgate.caltech.edu/library/')
262         daf_mapper.library_url = 'http://google.com'
263         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com')
264
265     def test_daf_with_replicate(self):
266         daf_mapper = load_daf_mapper('test_rep')
267         self.failUnlessEqual(daf_mapper.need_replicate(), True)
268         self.failUnless('replicate' in daf_mapper.get_daf_variables())
269
270     def test_daf_without_replicate(self):
271         daf_mapper = load_daf_mapper('test_rep', test_daf=test_daf_no_rep)
272         self.failUnlessEqual(daf_mapper.need_replicate(), False)
273         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
274
275     def test_daf_with_extra(self):
276         daf_mapper = load_daf_mapper('test_rep', test_daf=test_daf_extra)
277         variables = daf_mapper.get_daf_variables()
278
279         self.assertEqual(len(variables), 11)
280         self.failUnless('treatment' in variables)
281         self.failUnless('controlId' in variables)
282
283     def test_link_daf(self):
284         name = 'testsub'
285         submission = load_daf_mapper(name, test_daf=test_daf)
286         result_map = results.ResultMap()
287         result_dir = os.path.join(self.sourcedir,
288                                   test_results.S1_NAME)
289         result_map['1000'] = result_dir
290
291         submission.link_daf(result_map)
292
293         # make sure daf gets linked
294         created_daf = os.path.join(result_dir, name+'.daf')
295         self.failUnless(os.path.exists(created_daf))
296         stream = open(created_daf, 'r')
297         daf_body = stream.read()
298         stream.close()
299
300         self.failUnlessEqual(test_daf, daf_body)
301
302
303 @contextmanager
304 def mktempdir(prefix='tmp'):
305     d = tempfile.mkdtemp(prefix=prefix)
306     yield d
307     shutil.rmtree(d)
308
309
310 @contextmanager
311 def mktempfile(suffix='', prefix='tmp', dir=None):
312     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
313     yield pathname
314     os.close(fd)
315     os.unlink(pathname)
316
317 def suite():
318     suite = TestSuite()
319     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestDAF))
320     suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestUCSCSubmission))
321     return suite
322
323 if __name__ == "__main__":
324     logging.basicConfig(level=logging.DEBUG)
325     from unittest import main
326     main(defaultTest='suite')