Make a test more robust to different versions of librdf
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq 
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 test_daf_no_rep = """# Lab and general info
45 grant             Hardison
46 lab               Caltech-m
47 dataType          ChipSeq 
48 variables         cell, antibody,sex,age,strain,control
49 compositeSuffix   CaltechHistone
50 assembly          mm9
51 dafVersion        2.0
52 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
53
54 # Track/view definition
55 view             FastqRd1
56 longLabelPrefix  Caltech Fastq Read 1
57 type             fastq
58 hasReplicates    no
59 required         no
60 """
61
62 class TestDAF(unittest.TestCase):
63     def test_parse(self):
64
65         parsed = daf.fromstring(test_daf)
66         
67         self.failUnlessEqual(parsed['assembly'], 'mm9')
68         self.failUnlessEqual(parsed['grant'], 'Hardison')
69         self.failUnlessEqual(len(parsed['variables']), 6)
70         self.failUnlessEqual(len(parsed['views']), 2)
71         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
72         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
73         signal = parsed['views']['Signal']
74         self.failUnlessEqual(signal['required'], False)
75         self.failUnlessEqual(signal['longLabelPrefix'],
76                              'Caltech Histone Signal')
77
78     def test_rdf(self):
79
80         parsed = daf.fromstring(test_daf)
81         #mem = RDF.Storage(storage_name='hashes',
82         #                  options_string='hash-type="memory"'),
83         mem = RDF.MemoryStorage()
84         model = RDF.Model(mem)
85
86         name = 'cursub'
87         subNS = RDF.NS(str(submissionLog[name].uri))
88         daf.add_to_model(model, parsed, name)
89
90         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
91
92         writer = get_serializer()
93         turtle =  writer.serialize_model_to_string(model)
94
95         self.failUnless(str(signal_view_node.uri) in turtle)
96
97         statements = list(model.find_statements(
98             RDF.Statement(
99                 signal_view_node, None, None)))
100         self.failUnlessEqual(len(statements), 6)
101         name = model.get_target(signal_view_node, dafTermOntology['name'])
102         self.failUnlessEqual(fromTypedNode(name), u'Signal')
103
104 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
105     """Load test model in
106     """
107     model = get_model()
108     if ns is None:
109         ns="http://extra"
110         
111     if extra_statements is not None:
112         parser = RDF.Parser(name='turtle')
113         parser.parse_string_into_model(model, extra_statements,
114                                        ns)
115         
116     test_daf_stream = StringIO(test_daf)
117     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
118     return mapper
119
120 def dump_model(model):
121     writer = get_serializer()
122     turtle =  writer.serialize_model_to_string(model)
123     print turtle
124     
125 class TestDAFMapper(unittest.TestCase):
126     def test_create_mapper_add_pattern(self):
127         name = 'testsub'
128         mapper = load_daf_mapper(name)
129         pattern = '.bam\Z(?ms)'
130         mapper.add_pattern('Signal', pattern)
131
132         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
133                           dafTermOntology['filename_re'],
134                           None)
135         search = list(mapper.model.find_statements(s))
136         self.failUnlessEqual(len(search), 1)
137         self.failUnlessEqual(str(search[0].subject),
138                              str(submissionLog['testsub/view/Signal']))
139         self.failUnlessEqual(str(search[0].predicate),
140                              str(dafTermOntology['filename_re']))
141         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
142
143         
144     def test_find_one_view(self):
145         name='testfind'
146         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
147 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
148
149 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
150 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
151 '''.format(name)
152         daf_mapper = load_daf_mapper(name, extra_statements = extra)
153
154         view = daf_mapper.find_view('filename_r1.fastq')
155         
156         # dump_model(daf_mapper.model)
157         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
158         view_root = view_root.format(name)
159         self.failUnlessEqual(str(view)[1:-1],
160                              '{0}{1}'.format(view_root,'FastqRd1'))
161
162     def test_find_overlapping_view(self):
163         name = 'testfind'
164         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
165 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
166
167 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
168 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
169 '''.format(name)
170         daf_mapper = load_daf_mapper(name, extra_statements = extra)
171
172         self.failUnlessRaises(daf.ModelException,
173                               daf_mapper.find_view,
174                               'filename_r1.fastq')
175
176     def test_find_attributes(self):
177         lib_id = '11204'
178         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
179         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
180 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
181 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
182 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
183
184 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
185       submissionOntology:view_name "Signal" .
186 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
187         submissionOntology:view_name "FastqRd1" .
188 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
189 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
190        'libUrl': lib_url}
191
192         daf_mapper = load_daf_mapper('testfind', extra)
193         libNode = RDF.Node(RDF.Uri(lib_url))
194         daf_mapper._add_library_details_to_model(libNode)
195         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
196         # make sure we can override attributes, the value in our
197         # server is 500 for this library
198         self.failUnlessEqual(gel_cut, 100)
199
200         species = daf_mapper._get_library_attribute(libNode, 'species')
201         self.failUnlessEqual(species, "Homo sapiens")
202
203         with mktempdir('analysis') as analysis_dir:
204             path, analysis_name = os.path.split(analysis_dir)
205             with mktempfile('.bam', dir=analysis_dir) as filename:
206                 print 'dir', os.listdir(analysis_dir)
207                 daf_mapper.construct_file_attributes(analysis_dir,
208                                                      libNode,
209                                                      filename)
210             
211         #dump_model(daf_mapper.model)
212         
213         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
214         submission_name = sub_root + analysis_name
215         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
216         self.failUnlessEqual(str(source.uri), submission_name)
217
218         view_name = submission_name + '/Signal'
219         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
220         self.failUnlessEqual(str(view.uri), view_name)
221
222         
223     def test_library_url(self):
224         daf_mapper = load_daf_mapper('urltest')
225
226         self.failUnlessEqual(daf_mapper.library_url,
227                              'http://jumpgate.caltech.edu/library/')
228         daf_mapper.library_url = 'http://google.com'
229         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
230
231     def test_daf_with_replicate(self):
232         daf_mapper = load_daf_mapper('test_rep')
233         self.failUnlessEqual(daf_mapper.need_replicate(), True)
234         self.failUnless('replicate' in daf_mapper.get_daf_variables())
235                         
236     def test_daf_without_replicate(self):
237         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
238         self.failUnlessEqual(daf_mapper.need_replicate(), False)
239         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
240         
241 @contextmanager
242 def mktempdir(prefix='tmp'):
243     d = tempfile.mkdtemp(prefix=prefix)
244     print "made", d
245     yield d
246     shutil.rmtree(d)
247     print "unmade", d
248
249 @contextmanager
250 def mktempfile(suffix='', prefix='tmp', dir=None):
251     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
252     yield pathname
253     print "made", pathname
254     os.close(fd)
255     os.unlink(pathname)
256     print "unmade", pathname
257
258     
259 def suite():
260     suite = unittest.makeSuite(TestDAF, 'test')
261     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
262     return suite
263
264 if __name__ == "__main__":
265     unittest.main(defaultTest='suite')