5d647f6c72c6e254e34acb652cb3ed36fd26ab4f
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq 
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 class TestDAF(unittest.TestCase):
45     def test_parse(self):
46
47         parsed = daf.fromstring(test_daf)
48         
49         self.failUnlessEqual(parsed['assembly'], 'mm9')
50         self.failUnlessEqual(parsed['grant'], 'Hardison')
51         self.failUnlessEqual(len(parsed['variables']), 6)
52         self.failUnlessEqual(len(parsed['views']), 2)
53         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
54         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
55         signal = parsed['views']['Signal']
56         self.failUnlessEqual(signal['required'], False)
57         self.failUnlessEqual(signal['longLabelPrefix'],
58                              'Caltech Histone Signal')
59
60     def test_rdf(self):
61
62         parsed = daf.fromstring(test_daf)
63         #mem = RDF.Storage(storage_name='hashes',
64         #                  options_string='hash-type="memory"'),
65         mem = RDF.MemoryStorage()
66         model = RDF.Model(mem)
67
68         name = 'cursub'
69         subNS = RDF.NS(str(submissionLog[name].uri))
70         daf.add_to_model(model, parsed, name)
71
72         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
73
74         writer = get_serializer()
75         turtle =  writer.serialize_model_to_string(model)
76
77         self.failUnless(str(signal_view_node.uri) in turtle)
78
79         statements = list(model.find_statements(
80             RDF.Statement(
81                 signal_view_node, None, None)))
82         self.failUnlessEqual(len(statements), 6)
83         name = model.get_target(signal_view_node, dafTermOntology['name'])
84         self.failUnlessEqual(fromTypedNode(name), u'Signal')
85
86 def load_daf_mapper(name, extra_statements=None):
87     """Load test model in
88     """
89     model = get_model()
90     if extra_statements is not None:
91         parser = RDF.Parser(name='turtle')
92         parser.parse_string_into_model(model, extra_statements,
93                                        'http://extra.extra')
94         
95     test_daf_stream = StringIO(test_daf)
96     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
97     return mapper
98
99 def dump_model(model):
100     writer = get_serializer()
101     turtle =  writer.serialize_model_to_string(model)
102     print turtle
103     
104 class TestDAFMapper(unittest.TestCase):
105     def test_create_mapper_add_pattern(self):
106         name = 'testsub'
107         mapper = load_daf_mapper(name)
108         pattern = '.bam\Z(?ms)'
109         mapper.add_pattern('Signal', pattern)
110
111         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
112                           dafTermOntology['filename_re'],
113                           None)
114         search = list(mapper.model.find_statements(s))
115         self.failUnlessEqual(len(search), 1)
116         self.failUnlessEqual(str(search[0].subject),
117                              str(submissionLog['testsub/view/Signal']))
118         self.failUnlessEqual(str(search[0].predicate),
119                              str(dafTermOntology['filename_re']))
120         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
121
122     def test_find_one_view(self):
123         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
124
125 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
126 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
127 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
128
129         daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
130
131         view = daf_mapper.find_view('filename_r1.fastq')
132         self.failUnlessEqual(str(view),
133                              str(submissionLog['testfind/view/FastqRd1']))
134
135         #writer = get_serializer()
136         #turtle =  writer.serialize_model_to_string(model)
137         #print turtle
138
139     def test_find_overlapping_view(self):
140         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
141
142 <%(submissionLog)s/testfind/view/fastq> dafTerm:filename_re ".*\\\\.fastq" .
143 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
144 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
145
146         daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
147
148         self.failUnlessRaises(daf.ModelException,
149                               daf_mapper.find_view,
150                               'filename_r1.fastq')
151
152     def test_find_attributes(self):
153         lib_id = '11204'
154         lib_url = 'http://jumpgate.caltech.edu/library/%s' %(lib_id)
155         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
156 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
157
158 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
159 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*\\\\.fastq" .
160 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
161 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog',
162        'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
163        'libUrl': lib_url}
164
165         daf_mapper = load_daf_mapper('testfind', extra)
166         libNode = RDF.Node(RDF.Uri(lib_url))
167         daf_mapper._add_library_details_to_model(libNode)
168         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
169         # make sure we can override attributes, the value in our
170         # server is 500 for this library
171         self.failUnlessEqual(gel_cut, 100)
172         
173         species = daf_mapper._get_library_attribute(libNode, 'species')
174         self.failUnlessEqual(species, "Homo sapiens")
175
176         with mktempdir('analysis') as analysis_dir:
177             path, analysis_name = os.path.split(analysis_dir)
178             with mktempfile('.bam', dir=analysis_dir) as filename:
179                 print 'dir', os.listdir(analysis_dir)
180                 daf_mapper.construct_file_attributes(analysis_dir,
181                                                      libNode,
182                                                      filename)
183             
184         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
185         submission_name = sub_root + analysis_name
186         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
187
188         self.failUnlessEqual(str(source.uri), submission_name)
189
190         view_name = submission_name + '/Signal'
191         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
192         self.failUnlessEqual(str(view.uri), view_name)
193
194     def test_library_url(self):
195         daf_mapper = load_daf_mapper('urltest')
196
197         self.failUnlessEqual(daf_mapper.library_url,
198                              'http://jumpgate.caltech.edu/library/')
199         daf_mapper.library_url = 'http://google.com'
200         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
201
202 @contextmanager
203 def mktempdir(prefix='tmp'):
204     d = tempfile.mkdtemp(prefix=prefix)
205     print "made", d
206     yield d
207     shutil.rmtree(d)
208     print "unmade", d
209
210 @contextmanager
211 def mktempfile(suffix='', prefix='tmp', dir=None):
212     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
213     yield pathname
214     print "made", pathname
215     os.close(fd)
216     os.unlink(pathname)
217     print "unmade", pathname
218     
219 def suite():
220     suite = unittest.makeSuite(TestDAF, 'test')
221     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
222     return suite
223
224 if __name__ == "__main__":
225     unittest.main(defaultTest='suite')