913b0813c8bffcf1c37aa33b082ab8f1c69902b2
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq 
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 class TestDAF(unittest.TestCase):
45     def test_parse(self):
46
47         parsed = daf.fromstring(test_daf)
48         
49         self.failUnlessEqual(parsed['assembly'], 'mm9')
50         self.failUnlessEqual(parsed['grant'], 'Hardison')
51         self.failUnlessEqual(len(parsed['variables']), 6)
52         self.failUnlessEqual(len(parsed['views']), 2)
53         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
54         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
55         signal = parsed['views']['Signal']
56         self.failUnlessEqual(signal['required'], False)
57         self.failUnlessEqual(signal['longLabelPrefix'],
58                              'Caltech Histone Signal')
59
60     def test_rdf(self):
61
62         parsed = daf.fromstring(test_daf)
63         #mem = RDF.Storage(storage_name='hashes',
64         #                  options_string='hash-type="memory"'),
65         mem = RDF.MemoryStorage()
66         model = RDF.Model(mem)
67
68         name = 'cursub'
69         subNS = RDF.NS(str(submissionLog[name].uri))
70         daf.add_to_model(model, parsed, name)
71
72         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
73
74         writer = get_serializer()
75         turtle =  writer.serialize_model_to_string(model)
76
77         self.failUnless(str(signal_view_node.uri) in turtle)
78
79         statements = list(model.find_statements(
80             RDF.Statement(
81                 signal_view_node, None, None)))
82         self.failUnlessEqual(len(statements), 6)
83         name = model.get_target(signal_view_node, dafTermOntology['name'])
84         self.failUnlessEqual(fromTypedNode(name), u'Signal')
85
86 def load_daf_mapper(name, extra_statements=None, ns=None):
87     """Load test model in
88     """
89     model = get_model()
90     if ns is None:
91         ns="http://extra"
92         
93     if extra_statements is not None:
94         parser = RDF.Parser(name='turtle')
95         parser.parse_string_into_model(model, extra_statements,
96                                        ns)
97         
98     test_daf_stream = StringIO(test_daf)
99     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
100     return mapper
101
102 def dump_model(model):
103     writer = get_serializer()
104     turtle =  writer.serialize_model_to_string(model)
105     print turtle
106     
107 class TestDAFMapper(unittest.TestCase):
108     def test_create_mapper_add_pattern(self):
109         name = 'testsub'
110         mapper = load_daf_mapper(name)
111         pattern = '.bam\Z(?ms)'
112         mapper.add_pattern('Signal', pattern)
113
114         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
115                           dafTermOntology['filename_re'],
116                           None)
117         search = list(mapper.model.find_statements(s))
118         self.failUnlessEqual(len(search), 1)
119         self.failUnlessEqual(str(search[0].subject),
120                              str(submissionLog['testsub/view/Signal']))
121         self.failUnlessEqual(str(search[0].predicate),
122                              str(dafTermOntology['filename_re']))
123         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
124
125     def test_find_one_view(self):
126         name='testfind'
127         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
128 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
129
130 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
131 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
132 '''.format(name)
133         daf_mapper = load_daf_mapper(name, extra_statements = extra)
134
135         view = daf_mapper.find_view('filename_r1.fastq')
136         
137         # dump_model(daf_mapper.model)
138         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
139         view_root = view_root.format(name)
140         self.failUnlessEqual(str(view), '<{0}{1}>'.format(view_root,'FastqRd1'))
141
142     def test_find_overlapping_view(self):
143         name = 'testfind'
144         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
145 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
146
147 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
148 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
149 '''.format(name)
150         daf_mapper = load_daf_mapper(name, extra_statements = extra)
151
152         self.failUnlessRaises(daf.ModelException,
153                               daf_mapper.find_view,
154                               'filename_r1.fastq')
155
156     def test_find_attributes(self):
157         lib_id = '11204'
158         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
159         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
160 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
161 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
162 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
163
164 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
165       submissionOntology:view_name "Signal" .
166 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
167         submissionOntology:view_name "FastqRd1" .
168 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
169 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
170        'libUrl': lib_url}
171
172         daf_mapper = load_daf_mapper('testfind', extra)
173         libNode = RDF.Node(RDF.Uri(lib_url))
174         daf_mapper._add_library_details_to_model(libNode)
175         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
176         # make sure we can override attributes, the value in our
177         # server is 500 for this library
178         self.failUnlessEqual(gel_cut, 100)
179
180         species = daf_mapper._get_library_attribute(libNode, 'species')
181         self.failUnlessEqual(species, "Homo sapiens")
182
183         with mktempdir('analysis') as analysis_dir:
184             path, analysis_name = os.path.split(analysis_dir)
185             with mktempfile('.bam', dir=analysis_dir) as filename:
186                 print 'dir', os.listdir(analysis_dir)
187                 daf_mapper.construct_file_attributes(analysis_dir,
188                                                      libNode,
189                                                      filename)
190             
191         #dump_model(daf_mapper.model)
192         
193         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
194         submission_name = sub_root + analysis_name
195         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
196         self.failUnlessEqual(str(source.uri), submission_name)
197
198         view_name = submission_name + '/Signal'
199         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
200         self.failUnlessEqual(str(view.uri), view_name)
201
202         
203     def test_library_url(self):
204         daf_mapper = load_daf_mapper('urltest')
205
206         self.failUnlessEqual(daf_mapper.library_url,
207                              'http://jumpgate.caltech.edu/library/')
208         daf_mapper.library_url = 'http://google.com'
209         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
210
211 @contextmanager
212 def mktempdir(prefix='tmp'):
213     d = tempfile.mkdtemp(prefix=prefix)
214     print "made", d
215     yield d
216     shutil.rmtree(d)
217     print "unmade", d
218
219 @contextmanager
220 def mktempfile(suffix='', prefix='tmp', dir=None):
221     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
222     yield pathname
223     print "made", pathname
224     os.close(fd)
225     os.unlink(pathname)
226     print "unmade", pathname
227
228     
229 def suite():
230     suite = unittest.makeSuite(TestDAF, 'test')
231     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
232     return suite
233
234 if __name__ == "__main__":
235     unittest.main(defaultTest='suite')