5da4dc2faff3217fdf7386849add99c089ccc2d8
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from StringIO import StringIO
2 import unittest
3
4 from htsworkflow.submission import daf
5 from htsworkflow.util.rdfhelp import \
6      dafTermOntology, \
7      fromTypedNode, \
8      rdfNS, \
9      submissionLog, \
10      submissionOntology, \
11      get_model, \
12      get_serializer
13
14 import RDF
15
16 test_daf = """# Lab and general info
17 grant             Hardison
18 lab               Caltech-m
19 dataType          ChipSeq 
20 variables         cell, antibody,sex,age,strain,control
21 compositeSuffix   CaltechHistone
22 assembly          mm9
23 dafVersion        2.0
24 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
25
26 # Track/view definition
27 view             FastqRd1
28 longLabelPrefix  Caltech Fastq Read 1
29 type             fastq
30 hasReplicates    yes
31 required         no
32
33 view             Signal
34 longLabelPrefix  Caltech Histone Signal
35 type             bigWig
36 hasReplicates    yes
37 required         no
38 """
39
40 class TestDAF(unittest.TestCase):
41     def test_parse(self):
42
43         parsed = daf.fromstring(test_daf)
44         
45         self.failUnlessEqual(parsed['assembly'], 'mm9')
46         self.failUnlessEqual(parsed['grant'], 'Hardison')
47         self.failUnlessEqual(len(parsed['variables']), 6)
48         self.failUnlessEqual(len(parsed['views']), 2)
49         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
50         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
51         signal = parsed['views']['Signal']
52         self.failUnlessEqual(signal['required'], False)
53         self.failUnlessEqual(signal['longLabelPrefix'],
54                              'Caltech Histone Signal')
55
56     def test_rdf(self):
57
58         parsed = daf.fromstring(test_daf)
59         #mem = RDF.Storage(storage_name='hashes',
60         #                  options_string='hash-type="memory"'),
61         mem = RDF.MemoryStorage()
62         model = RDF.Model(mem)
63
64         name = 'cursub'
65         subNS = RDF.NS(str(submissionLog[name].uri))
66         daf.add_to_model(model, parsed, name)
67
68         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
69
70         writer = get_serializer()
71         turtle =  writer.serialize_model_to_string(model)
72
73         self.failUnless(str(signal_view_node.uri) in turtle)
74
75         statements = list(model.find_statements(
76             RDF.Statement(
77                 signal_view_node, None, None)))
78         self.failUnlessEqual(len(statements), 6)
79         name = model.get_target(signal_view_node, dafTermOntology['name'])
80         self.failUnlessEqual(fromTypedNode(name), u'Signal')
81
82 def load_daf_mapper(name, extra_statements=None):
83     """Load test model in
84     """
85     model = get_model()
86     if extra_statements is not None:
87         parser = RDF.Parser(name='turtle')
88         parser.parse_string_into_model(model, extra_statements,
89                                        'http://extra.extra')
90         
91     test_daf_stream = StringIO(test_daf)
92     mapper = daf.DAFMapper(name, daf_file = test_daf_stream, model=model)
93     return mapper
94
95 def dump_model(model):
96     writer = get_serializer()
97     turtle =  writer.serialize_model_to_string(model)
98     print turtle
99     
100 class TestDAFMapper(unittest.TestCase):
101     def test_create_mapper_add_pattern(self):
102         name = 'testsub'
103         mapper = load_daf_mapper(name)
104         pattern = '.bam\Z(?ms)'
105         mapper.add_pattern('Signal', pattern)
106
107         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
108                           dafTermOntology['filename_re'],
109                           None)
110         search = list(mapper.model.find_statements(s))
111         self.failUnlessEqual(len(search), 1)
112         self.failUnlessEqual(str(search[0].subject),
113                              str(submissionLog['testsub/view/Signal']))
114         self.failUnlessEqual(str(search[0].predicate),
115                              str(dafTermOntology['filename_re']))
116         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
117
118     def test_find_one_view(self):
119         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
120
121 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
122 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
123 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
124
125         daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
126
127         view = daf_mapper.find_view('filename_r1.fastq')
128         self.failUnlessEqual(str(view),
129                              str(submissionLog['testfind/view/FastqRd1']))
130
131         #writer = get_serializer()
132         #turtle =  writer.serialize_model_to_string(model)
133         #print turtle
134
135     def test_find_overlapping_view(self):
136         extra = '''@prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
137
138 <%(submissionLog)s/testfind/view/fastq> dafTerm:filename_re ".*\\\\.fastq" .
139 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
140 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'}
141
142         daf_mapper = load_daf_mapper('testfind', extra_statements = extra)
143
144         self.failUnlessRaises(daf.ModelException,
145                               daf_mapper.find_view,
146                               'filename_r1.fastq')
147
148     def test_find_attributes(self):
149         lib_id = '11204'
150         lib_url = 'http://jumpgate.caltech.edu/library/%s' %(lib_id)
151         extra = '''@prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
152 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
153
154 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
155 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*\\\\.fastq" .
156 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
157 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog',
158        'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
159        'libUrl': lib_url}
160
161         daf_mapper = load_daf_mapper('testfind', extra)
162         libNode = RDF.Node(RDF.Uri(lib_url))
163         daf_mapper._add_library_details_to_model(libNode)
164         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
165         # make sure we can override attributes, the value in our
166         # server is 500 for this library
167         self.failUnlessEqual(gel_cut, 100)
168         
169         species = daf_mapper._get_library_attribute(libNode, 'species')
170         self.failUnlessEqual(species, "Homo sapiens")
171         
172         daf_mapper.construct_file_attributes('/tmp/analysis1', libNode, 'filename.bam')
173         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
174         self.failUnlessEqual(str(source.uri), "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/analysis1")
175         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
176         self.failUnlessEqual(str(view.uri), "http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/analysis1/Signal")
177
178     def test_library_url(self):
179         daf_mapper = load_daf_mapper('urltest')
180
181         self.failUnlessEqual(daf_mapper.library_url,
182                              'http://jumpgate.caltech.edu/library/')
183         daf_mapper.library_url = 'http://google.com'
184         self.failUnlessEqual(daf_mapper.library_url, 'http://google.com' )
185         
186 def suite():
187     suite = unittest.makeSuite(TestDAF, 'test')
188     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
189     return suite
190
191 if __name__ == "__main__":
192     unittest.main(defaultTest='suite')