Rework ucsc gather to use RDF models for gathering and storing track metadata.
[htsworkflow.git] / htsworkflow / submission / test / test_daf.py
1 from StringIO import StringIO
2 import unittest
3
4 from htsworkflow.submission import daf
5 from htsworkflow.util.rdfhelp import \
6      dafTermOntology, \
7      rdfNS, \
8      submissionLog, \
9      submissionOntology, \
10      get_model, \
11      get_serializer
12
13 import RDF
14
15 test_daf = """# Lab and general info
16 grant             Hardison
17 lab               Caltech-m
18 dataType          ChipSeq 
19 variables         cell, antibody,sex,age,strain,control
20 compositeSuffix   CaltechHistone
21 assembly          mm9
22 dafVersion        2.0
23 validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
24
25 # Track/view definition
26 view             FastqRd1
27 longLabelPrefix  Caltech Fastq Read 1
28 type             fastq
29 hasReplicates    yes
30 required         no
31
32 view             Signal
33 longLabelPrefix  Caltech Histone Signal
34 type             bigWig
35 hasReplicates    yes
36 required         no
37 """
38
39 class TestDAF(unittest.TestCase):
40     def test_parse(self):
41
42         parsed = daf.fromstring(test_daf)
43         
44         self.failUnlessEqual(parsed['assembly'], 'mm9')
45         self.failUnlessEqual(parsed['grant'], 'Hardison')
46         self.failUnlessEqual(len(parsed['variables']), 6)
47         self.failUnlessEqual(len(parsed['views']), 2)
48         self.failUnlessEqual(len(parsed['views']['FastqRd1']), 5)
49         self.failUnlessEqual(len(parsed['views']['Signal']), 5)
50         signal = parsed['views']['Signal']
51         self.failUnlessEqual(signal['required'], False)
52         self.failUnlessEqual(signal['longLabelPrefix'],
53                              'Caltech Histone Signal')
54
55     def test_rdf(self):
56
57         parsed = daf.fromstring(test_daf)
58         #mem = RDF.Storage(storage_name='hashes',
59         #                  options_string='hash-type="memory"'),
60         mem = RDF.MemoryStorage()
61         model = RDF.Model(mem)
62
63         name = 'cursub'
64         subNS = RDF.NS(str(submissionLog[name].uri))
65         daf.add_to_model(model, parsed, name)
66
67         signal_view_node = RDF.Node(subNS['/view/Signal'].uri)
68         writer = get_serializer()
69         turtle =  writer.serialize_model_to_string(model)
70         #print turtle
71         
72         self.failUnless(str(signal_view_node) in turtle)
73
74         statements = list(model.find_statements(
75             RDF.Statement(
76                 signal_view_node, None, None)))
77         self.failUnlessEqual(len(statements), 5)
78
79
80 def dump_model(model):
81     writer = get_serializer()
82     turtle =  writer.serialize_model_to_string(model)
83     print turtle
84     
85 class TestDAFMapper(unittest.TestCase):
86     def test_create_mapper_add_pattern(self):
87         name = 'testsub'
88         test_daf_stream = StringIO(test_daf)
89         mapper = daf.DAFMapper(name, daf_file=test_daf_stream)
90         pattern = '.bam\Z(?ms)'
91         mapper.add_pattern('Signal', pattern)
92
93         s = RDF.Statement(daf.get_view_namespace(name)['Signal'],
94                           dafTermOntology['filename_re'],
95                           None)
96         search = list(mapper.model.find_statements(s))
97         self.failUnlessEqual(len(search), 1)
98         self.failUnlessEqual(str(search[0].subject),
99                              str(submissionLog['testsub/view/Signal']))
100         self.failUnlessEqual(str(search[0].predicate),
101                              str(dafTermOntology['filename_re']))
102         #self.failUnlessEqual(search[0].object.literal_value['string'], pattern)
103
104     def test_find_one_view(self):
105         model = get_model()
106
107         parser = RDF.Parser(name='turtle')
108         parser.parse_string_into_model(model, '''
109 @prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
110
111 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
112 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
113 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'},
114         'http://blank')
115         name = 'testfind'
116         test_stream = StringIO(test_daf)
117         daf_mapper = daf.DAFMapper(name, daf_file=test_stream, model=model)
118
119         view = daf_mapper.find_view('filename_r1.fastq')
120         self.failUnlessEqual(str(view),
121                              str(submissionLog['testfind/view/FastqRd1']))
122
123         #writer = get_serializer()
124         #turtle =  writer.serialize_model_to_string(model)
125         #print turtle
126
127     def test_find_overlapping_view(self):
128         model = get_model()
129
130         parser = RDF.Parser(name='turtle')
131         parser.parse_string_into_model(model, '''
132 @prefix dafTerm:<http://jumpgate.caltech.edu/wiki/UcscDaf#> .
133
134 <%(submissionLog)s/testfind/view/fastq> dafTerm:filename_re ".*\\\\.fastq" .
135 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*_r1\\\\.fastq" .
136 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog'},
137         'http://blank')
138         name = 'testfind'
139         test_stream = StringIO(test_daf)
140         daf_mapper = daf.DAFMapper(name, daf_file=test_stream, model=model)
141
142         self.failUnlessRaises(daf.ModelException,
143                               daf_mapper.find_view,
144                               'filename_r1.fastq')
145
146     def test_find_attributes(self):
147         lib_id = '11204'
148         lib_url = 'http://jumpgate.caltech.edu/library/%s' %(lib_id)
149         model = get_model()
150
151         parser = RDF.Parser(name='turtle')
152         parser.parse_string_into_model(model, '''
153 @prefix dafTerm: <http://jumpgate.caltech.edu/wiki/UcscDaf#> .
154 @prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
155
156 <%(submissionLog)s/testfind/view/Signal> dafTerm:filename_re ".*\\\\.bam" .
157 <%(submissionLog)s/testfind/view/FastqRd1> dafTerm:filename_re ".*\\\\.fastq" .
158 <%(libUrl)s> <%(libraryOntology)sgel_cut> "100"^^xsd:decimal . 
159 ''' % {'submissionLog': 'http://jumpgate.caltech.edu/wiki/SubmissionsLog',
160        'libraryOntology': 'http://jumpgate.caltech.edu/wiki/LibraryOntology#',
161        'libUrl': lib_url},
162        'http://blank')
163         name = 'testfind'
164         test_stream = StringIO(test_daf)
165         daf_mapper = daf.DAFMapper(name, daf_file=test_stream, model=model)
166         libNode = RDF.Node(RDF.Uri(lib_url))
167         daf_mapper._add_library_details_to_model(libNode)
168         gel_cut = daf_mapper._get_library_attribute(libNode, 'gel_cut')
169         # make sure we can override attributes, the value in our
170         # server is 500 for this library
171         self.failUnlessEqual(gel_cut, 100)
172         
173         species = daf_mapper._get_library_attribute(libNode, 'species')
174         self.failUnlessEqual(species, "Homo sapiens")
175         
176         daf_mapper.construct_file_attributes('/tmp/analysis1', libNode, 'filename.bam')
177         source = daf_mapper.model.get_source(rdfNS['type'], submissionOntology['submission'])
178         self.failUnlessEqual(str(source), "<http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/analysis1>")
179         view = daf_mapper.model.get_target(source, submissionOntology['has_view'])
180         self.failUnlessEqual(str(view), "<http://jumpgate.caltech.edu/wiki/SubmissionsLog/testfind/view/Signal>")
181
182 def suite():
183     suite = unittest.makeSuite(TestDAF, 'test')
184     suite.addTest(unittest.makeSuite(TestDAFMapper, 'test'))
185     return suite
186
187 if __name__ == "__main__":
188     unittest.main(defaultTest='suite')