Examine the DAF to determine if the DDF needs to include replicate information
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from contextlib import contextmanager
2 import os
3 from StringIO import StringIO
4 import shutil
5 import tempfile
6 import unittest
7
8 from htsworkflow.submission import daf
9 from htsworkflow.util.rdfhelp import \
10      dafTermOntology, \
11      fromTypedNode, \
12      rdfNS, \
13      submissionLog, \
14      submissionOntology, \
15      get_model, \
16      get_serializer
17
18 import RDF
19
20 test_daf = """# Lab and general info
21 grant             Hardison
22 lab               Caltech-m
23 dataType          ChipSeq 
24 variables         cell, antibody,sex,age,strain,control
25 compositeSuffix   CaltechHistone
26 assembly          mm9
27 dafVersion        2.0
28 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
29
30 # Track/view definition
31 view             FastqRd1
32 longLabelPrefix  Caltech Fastq Read 1
33 type             fastq
34 hasReplicates    yes
35 required         no
36
37 view             Signal
38 longLabelPrefix  Caltech Histone Signal
39 type             bigWig
40 hasReplicates    yes
41 required         no
42 """
43
44 test_daf_no_rep = """# Lab and general info
45 grant             Hardison
46 lab               Caltech-m
47 dataType          ChipSeq 
48 variables         cell, antibody,sex,age,strain,control
49 compositeSuffix   CaltechHistone
50 assembly          mm9
51 dafVersion        2.0
52 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
53
54 # Track/view definition
55 view             FastqRd1
56 longLabelPrefix  Caltech Fastq Read 1
57 type             fastq
58 hasReplicates    no
59 required         no
60 """
61
62 class TestDAF(unittest.TestCase):
63     def test_parse(self):
64
65         parsed = daf.fromstring(test_daf)
66         
67         self.failUnlessEqual(parsed['assembly'], 'mm9')
68         self.failUnlessEqual(parsed['grant'], 'Hardison')
69         self.failUnlessEqual(len(parsed['variables']), 6)
70         self.failUnlessEqual(len(parsed['views']), 2)
71         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
72         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
73         signal = parsed['views']['Signal']
74         self.failUnlessEqual(signal['required'], False)
75         self.failUnlessEqual(signal['longLabelPrefix'],
76                              'Caltech Histone Signal')
77
78     def test_rdf(self):
79
80         parsed = daf.fromstring(test_daf)
81         #mem = RDF.Storage(storage_name='hashes',
82         #                  options_string='hash-type="memory"'),
83         mem = RDF.MemoryStorage()
84         model = RDF.Model(mem)
85
86         name = 'cursub'
87         subNS = RDF.NS(str(submissionLog[name].uri))
88         daf.add_to_model(model, parsed, name)
89
90         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
91
92         writer = get_serializer()
93         turtle =  writer.serialize_model_to_string(model)
94
95         self.failUnless(str(signal_view_node.uri) in turtle)
96
97         statements = list(model.find_statements(
98             RDF.Statement(
99                 signal_view_node, None, None)))
100         self.failUnlessEqual(len(statements), 6)
101         name = model.get_target(signal_view_node, dafTermOntology['name'])
102         self.failUnlessEqual(fromTypedNode(name), u'Signal')
103
104 def load_daf_mapper(name, extra_statements=None, ns=None, test_daf=test_daf):
105     """Load test model in
106     """
107     model = get_model()
108     if ns is None:
109         ns="http://extra"
110         
111     if extra_statements is not None:
112         parser = RDF.Parser(name='turtle')
113         parser.parse_string_into_model(model, extra_statements,
114                                        ns)
115         
116     test_daf_stream = StringIO(test_daf)
117     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
118     return mapper
119
120 def dump_model(model):
121     writer = get_serializer()
122     turtle =  writer.serialize_model_to_string(model)
123     print turtle
124     
125 class TestDAFMapper(unittest.TestCase):
126     def test_create_mapper_add_pattern(self):
127         name = 'testsub'
128         mapper = load_daf_mapper(name)
129         pattern = '.bam\Z(?ms)'
130         mapper.add_pattern('Signal', pattern)
131
132         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
133                           dafTermOntology['filename_re'],
134                           None)
135         search = list(mapper.model.find_statements(s))
136         self.failUnlessEqual(len(search), 1)
137         self.failUnlessEqual(str(search[0].subject),
138                              str(submissionLog['testsub/view/Signal']))
139         self.failUnlessEqual(str(search[0].predicate),
140                              str(dafTermOntology['filename_re']))
141         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
142
143         
144     def test_find_one_view(self):
145         name='testfind'
146         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
147 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
148
149 thisView:Signal dafTerm:filename_re ".*\\\\.bam" .
150 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
151 '''.format(name)
152         daf_mapper = load_daf_mapper(name, extra_statements = extra)
153
154         view = daf_mapper.find_view('filename_r1.fastq')
155         
156         # dump_model(daf_mapper.model)
157         view_root = 'http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/'
158         view_root = view_root.format(name)
159         self.failUnlessEqual(str(view), '<{0}{1}>'.format(view_root,'FastqRd1'))
160
161     def test_find_overlapping_view(self):
162         name = 'testfind'
163         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
164 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/{0}/view/> .
165
166 thisView:fastq dafTerm:filename_re ".*\\\\.fastq" .
167 thisView:FastqRd1 dafTerm:filename_re ".*_r1\\\\.fastq" .
168 '''.format(name)
169         daf_mapper = load_daf_mapper(name, extra_statements = extra)
170
171         self.failUnlessRaises(daf.ModelException,
172                               daf_mapper.find_view,
173                               'filename_r1.fastq')
174
175     def test_find_attributes(self):
176         lib_id = '11204'
177         lib_url = 'http://jumpgate.caltech.edu/library/%s/' %(lib_id)
178         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
179 @prefix submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
180 @prefix thisView: <http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/> .
181 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
182
183 thisView:Signal dafTerm:filename_re ".*\\\\.bam" ;
184       submissionOntology:view_name "Signal" .
185 thisView:FastqRd1 dafTerm:filename_re ".*\\\\.fastq" ;
186         submissionOntology:view_name "FastqRd1" .
187 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
188 ''' % {'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
189        'libUrl': lib_url}
190
191         daf_mapper = load_daf_mapper('testfind', extra)
192         libNode = RDF.Node(RDF.Uri(lib_url))
193         daf_mapper._add_library_details_to_model(libNode)
194         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
195         # make sure we can override attributes, the value in our
196         # server is 500 for this library
197         self.failUnlessEqual(gel_cut, 100)
198
199         species = daf_mapper._get_library_attribute(libNode, 'species')
200         self.failUnlessEqual(species, "Homo sapiens")
201
202         with mktempdir('analysis') as analysis_dir:
203             path, analysis_name = os.path.split(analysis_dir)
204             with mktempfile('.bam', dir=analysis_dir) as filename:
205                 print 'dir', os.listdir(analysis_dir)
206                 daf_mapper.construct_file_attributes(analysis_dir,
207                                                      libNode,
208                                                      filename)
209             
210         #dump_model(daf_mapper.model)
211         
212         sub_root = "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/"
213         submission_name = sub_root + analysis_name
214         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
215         self.failUnlessEqual(str(source.uri), submission_name)
216
217         view_name = submission_name + '/Signal'
218         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
219         self.failUnlessEqual(str(view.uri), view_name)
220
221         
222     def test_library_url(self):
223         daf_mapper = load_daf_mapper('urltest')
224
225         self.failUnlessEqual(daf_mapper.library_url,
226                              'http://jumpgate.caltech.edu/library/')
227         daf_mapper.library_url = 'http://google.com'
228         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
229
230     def test_daf_with_replicate(self):
231         daf_mapper = load_daf_mapper('test_rep')
232         self.failUnlessEqual(daf_mapper.need_replicate(), True)
233         self.failUnless('replicate' in daf_mapper.get_daf_variables())
234                         
235     def test_daf_without_replicate(self):
236         daf_mapper = load_daf_mapper('test_rep',test_daf=test_daf_no_rep)
237         self.failUnlessEqual(daf_mapper.need_replicate(), False)
238         self.failUnless('replicate' not in daf_mapper.get_daf_variables())
239         
240 @contextmanager
241 def mktempdir(prefix='tmp'):
242     d = tempfile.mkdtemp(prefix=prefix)
243     print "made", d
244     yield d
245     shutil.rmtree(d)
246     print "unmade", d
247
248 @contextmanager
249 def mktempfile(suffix='', prefix='tmp', dir=None):
250     fd, pathname = tempfile.mkstemp(suffix=suffix, prefix=prefix, dir=dir)
251     yield pathname
252     print "made", pathname
253     os.close(fd)
254     os.unlink(pathname)
255     print "unmade", pathname
256
257     
258 def suite():
259     suite = unittest.makeSuite(TestDAF, 'test')
260     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
261     return suite
262
263 if __name__ == "__main__":
264     unittest.main(defaultTest='suite')