Attempt to download DAF data for a encodesubmit submission
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 test_daf_no_rep = """# Lab and general info
45 grant             Hardison
46 lab               Caltech-m
47 dataType          ChipSeq
48 variables         cell, antibody,sex,age,strain,control
49 compositeSuffix   CaltechHistone
50 assembly          mm9
51 dafVersion        2.0
52 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
53
54 # Track/view definition
55 view             FastqRd1
56 longLabelPrefix  Caltech Fastq Read 1
57 type             fastq
58 hasReplicates    no
59 required         no
60 """
61
62 class TestDAF(unittest.TestCase):
63     def test_parse(self):
64
65         parsed = daf.fromstring(test_daf)
66
67         self.failUnlessEqual(parsed['assembly'], 'mm9')
68         self.failUnlessEqual(parsed['grant'], 'Hardison')
69         self.failUnlessEqual(len(parsed['variables']), 6)
70         self.failUnlessEqual(len(parsed['views']), 2)
71         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
72         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
73         signal = parsed['views']['Signal']
74         self.failUnlessEqual(signal['required'], False)
75         self.failUnlessEqual(signal['longLabelPrefix'],
76                              'Caltech Histone Signal')
77
78     def test_rdf(self):
79
80         parsed = daf.fromstring(test_daf)
81         #mem = RDF.Storage(storage_name='hashes',
82         #                  options_string='hash-type="memory"'),
83         mem = RDF.MemoryStorage()
84         model = RDF.Model(mem)
85
86         name = 'cursub'
87         subNS = RDF.NS(str(submissionLog[name].uri))
88         daf.add_to_model(model, parsed, submissionLog[name].uri)
89
90         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
91
92         writer = get_serializer()
93         turtle =  writer.serialize_model_to_string(model)
94
95         self.failUnless(str(signal_view_node.uri) in turtle)
96
97         statements = list(model.find_statements(
98             RDF.Statement(
99                 signal_view_node, None, None)))
100         self.failUnlessEqual(len(statements), 6)
101         name = model.get_target(signal_view_node, dafTermOntology['name'])
102         self.failUnlessEqual(fromTypedNode(name), u'Signal')
103
104     def test_get_view_namespace_from_string(self):
105         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
106         target = RDF.NS(url + 'view/')
107         view_namespace = daf.get_view_namespace(url)
108         self.assertEqual(view_namespace[''], target[''])
109
110     def test_get_view_namespace_from_string_no_trailing_slash(self):
111         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub"
112         target = RDF.NS(url + '/view/')
113         view_namespace = daf.get_view_namespace(url)
114         self.assertEqual(view_namespace[''], target[''])
115
116     def test_get_view_namespace_from_uri_node(self):
117         url = "http://jumpgate.caltech.edu/wiki/SubmissionLog/cursub/"
118         node = RDF.Node(RDF.Uri(url))
119         target = RDF.NS(url + 'view/')
120         view_namespace = daf.get_view_namespace(node)
121         self.assertEqual(view_namespace[''], target[''])
122
123
124 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
125     """Load test model in
126     """
127     model = get_model()
128     if ns is None:
129         ns="http://extra"
130
131     if extra_statements is not None:
132         parser = RDF.Parser(name='turtle')
133         parser.parse_string_into_model(model, extra_statements,
134                                        ns)
135
136     test_daf_stream = StringIO(test_daf)
137     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
138     return mapper
139
140 def dump_model(model):
141     writer = get_serializer()
142     turtle =  writer.serialize_model_to_string(model)
143     print turtle
144
145 class TestDAFMapper(unittest.TestCase):
146     def test_create_mapper_add_pattern(self):
147         name = 'testsub'
148         mapper = load_daf_mapper(name)
149         pattern = '.bam\Z(?ms)'
150         mapper.add_pattern('Signal', pattern)
151
152         s = RDF.Statement(mapper.viewNS['Signal'],
153                           dafTermOntology['filename_re'],
154                           None)
155         search = list(mapper.model.find_statements(s))
156         self.failUnlessEqual(len(search), 1)
157         self.failUnlessEqual(str(search[0].subject),
158                              str(submissionLog['testsub/view/Signal']))
159         self.failUnlessEqual(str(search[0].predicate),
160                              str(dafTermOntology['filename_re']))
161         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
162
163
164     def test_find_one_view(self):
165         name='testfind'
166         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
167 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
168
169 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
170 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
171 '''.format(name)
172         daf_mapper = load_daf_mapper(name, extra_statements = extra)
173
174         view = daf_mapper.find_view('filename_r1.fastq')
175
176         # dump_model(daf_mapper.model)
177         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
178         view_root = view_root.format(name)
179         self.failUnlessEqual(str(view)[1:-1],
180                              '{0}{1}'.format(view_root,'FastqRd1'))
181
182     def test_find_overlapping_view(self):
183         name = 'testfind'
184         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
185 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
186
187 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
188 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
189 '''.format(name)
190         daf_mapper = load_daf_mapper(name, extra_statements = extra)
191
192         self.failUnlessRaises(daf.ModelException,
193                               daf_mapper.find_view,
194                               'filename_r1.fastq')
195
196     def test_find_attributes(self):
197         lib_id = '11204'
198         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
199         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
200 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
201 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
202 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
203
204 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
205       submissionOntology:view_name "Signal" .
206 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
207         submissionOntology:view_name "FastqRd1" .
208 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal .
209 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
210        'libUrl': lib_url}
211
212         daf_mapper = load_daf_mapper('testfind', extra)
213         libNode = RDF.Node(RDF.Uri(lib_url))
214         daf_mapper._add_library_details_to_model(libNode)
215         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
216         # make sure we can override attributes, the value in our
217         # server is 500 for this library
218         self.failUnlessEqual(gel_cut, 100)
219
220         species = daf_mapper._get_library_attribute(libNode, 'species')
221         self.failUnlessEqual(species, "Homo sapiens")
222
223         with mktempdir('analysis') as analysis_dir:
224             path, analysis_name = os.path.split(analysis_dir)
225             with mktempfile('.bam', dir=analysis_dir) as filename:
226                 daf_mapper.construct_track_attributes(analysis_dir,
227                                                       libNode,
228                                                       filename)
229
230         #dump_model(daf_mapper.model)
231
232         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
233         submission_name = sub_root + analysis_name
234         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
235         self.failUnlessEqual(str(source.uri), submission_name)
236
237         view_name = submission_name + '/Signal'
238         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
239         self.failUnlessEqual(str(view.uri), view_name)
240
241
242     def test_library_url(self):
243         daf_mapper = load_daf_mapper('urltest')
244
245         self.failUnlessEqual(daf_mapper.library_url,
246                              'http://jumpgate.caltech.edu/library/')
247         daf_mapper.library_url = 'http://google.com'
248         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
249
250     def test_daf_with_replicate(self):
251         daf_mapper = load_daf_mapper('test_rep')
252         self.failUnlessEqual(daf_mapper.need_replicate(), True)
253         self.failUnless('replicate' in daf_mapper.get_daf_variables())
254
255     def test_daf_without_replicate(self):
256         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
257         self.failUnlessEqual(daf_mapper.need_replicate(), False)
258         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
259
260 @contextmanager
261 def mktempdir(prefix='tmp'):
262     d = tempfile.mkdtemp(prefix=prefix)
263     print "made", d
264     yield d
265     shutil.rmtree(d)
266     print "unmade", d
267
268 @contextmanager
269 def mktempfile(suffix='', prefix='tmp', dir=None):
270     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
271     yield pathname
272     print "made", pathname
273     os.close(fd)
274     os.unlink(pathname)
275     print "unmade", pathname
276
277
278 def suite():
279     suite = unittest.makeSuite(TestDAF, 'test')
280     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
281     return suite
282
283 if __name__ == "__main__":
284     unittest.main(defaultTest='suite')