--- /dev/null
+"""Parse UCSC DAF File
+"""
+import logging
+import re
+import string
+from StringIO import StringIO
+import types
+
+from htsworkflow.util.rdfhelp import blankOrUri, toTypedNode
+
+logger = logging.getLogger(__name__)
+
+# STATES
+DAF_HEADER = 1
+DAF_VIEW = 2
+
+
+def parse(filename):
+ stream = open(filename,'r')
+ attributes = parse_stream(stream)
+ stream.close()
+ return stream
+
+def fromstring(daf_string):
+ stream = StringIO(daf_string)
+ return parse_stream(stream)
+
+def parse_stream(stream):
+ comment_re = re.compile("#.*$")
+
+ state = DAF_HEADER
+ attributes = {'views': {}}
+ view_name = None
+ view_attributes = {}
+ for line in stream:
+ #remove comments
+ line = comment_re.sub("", line)
+ nstop = _extract_name_index(line)
+ name = line[0:nstop]
+ sstop = _consume_whitespace(line, start=nstop)
+ vstop = _extract_value_index(line, start=sstop)
+ value = line[sstop:vstop]
+
+ if value.lower() in ('yes',):
+ value = True
+ elif value.lower() in ('no',):
+ value = False
+
+ if len(name) == 0:
+ if view_name is not None:
+ attributes['views'][view_name] = view_attributes
+ view_name = None
+ view_attributes = {}
+ state = DAF_HEADER
+ elif state == DAF_HEADER and name == 'variables':
+ attributes[name] = [ x.strip() for x in value.split(',')]
+ elif state == DAF_HEADER and name == 'view':
+ view_name = value
+ view_attributes['view'] = value
+ state = DAF_VIEW
+ elif state == DAF_HEADER:
+ attributes[name] = value
+ elif state == DAF_VIEW:
+ view_attributes[name] = value
+
+ # save last block
+ if view_name is not None:
+ attributes['views'][view_name] = view_attributes
+
+ return attributes
+
+def _consume_whitespace(line, start=0):
+ for i in xrange(start, len(line)):
+ if line[i] not in string.whitespace:
+ return i
+
+ return len(line)
+
+def _extract_name_index(line, start=0):
+ for i in xrange(start, len(line)):
+ if line[i] in string.whitespace:
+ return i
+
+ return len(line)
+
+def _extract_value_index(line, start=0):
+ shortline = line.rstrip()
+ return len(shortline)
+
+try:
+ import RDF
+ def convert_to_rdf_statements(attributes, source=None):
+ ddfNS = RDF.NS("http://encodesubmit.ucsc.edu/pipeline/download_ddf#")
+
+ subject = blankOrUri(source)
+
+ statements = []
+ for name in attributes:
+ predicate = ddfNS[name]
+ if name == 'views':
+ predicate = ddfNS['views']
+ for view_name in attributes.get('views', []):
+ view = attributes['views'][view_name]
+ viewNode = RDF.Node()
+ statements.append(RDF.Statement(subject, predicate, viewNode))
+ statements.extend(convert_to_rdf_statements(view, viewNode))
+ elif name == 'variables':
+ predicate = ddfNS['variables']
+ for var in attributes.get('variables', []):
+ obj = toTypedNode(var)
+ statements.append(RDF.Statement(subject, predicate, obj))
+ else:
+ value = attributes[name]
+ obj = toTypedNode(value)
+ statements.append(RDF.Statement(subject,predicate,obj))
+
+ return statements
+
+
+ def add_to_model(model, attributes, source=None):
+ for statement in convert_to_rdf_statements(attributes, source):
+ model.add_statement(statement)
+
+except ImportError, e:
+ def convert_to_rdf_statements(attributes, source=None):
+ raise NotImplementedError("librdf not installed")
+ def add_to_model(model, attributes, source=None):
+ raise NotImplementedError("librdf not installed")
+
--- /dev/null
+import unittest
+
+from htsworkflow.submission import daf
+
+test_daf = """# Lab and general info
+grant Hardison
+lab Caltech-m
+dataType ChipSeq
+variables cell, antibody,sex,age,strain,control
+compositeSuffix CaltechHistone
+assembly mm9
+dafVersion 2.0
+validationSettings validateFiles.bam:mismatches=2,bamPercent=99.9;validateFiles.fastq:quick=1000
+
+# Track/view definition
+view Peaks
+longLabelPrefix Caltech Histone Peaks
+type narrowPeak
+hasReplicates yes
+required no
+
+view Signal
+longLabelPrefix Caltech Histone Signal
+type bigWig
+hasReplicates yes
+required no
+"""
+
+class TestDAF(unittest.TestCase):
+ def test_parse(self):
+
+ parsed = daf.fromstring(test_daf)
+
+ self.failUnlessEqual(parsed['assembly'], 'mm9')
+ self.failUnlessEqual(parsed['grant'], 'Hardison')
+ self.failUnlessEqual(len(parsed['variables']), 6)
+ self.failUnlessEqual(len(parsed['views']), 2)
+ self.failUnlessEqual(len(parsed['views']['Peaks']), 5)
+ self.failUnlessEqual(len(parsed['views']['Signal']), 5)
+ signal = parsed['views']['Signal']
+ self.failUnlessEqual(signal['required'], False)
+ self.failUnlessEqual(signal['longLabelPrefix'],
+ 'Caltech Histone Signal')
+
+ def test_rdf(self):
+ try:
+ import RDF
+
+ parsed = daf.fromstring(test_daf)
+ #mem = RDF.Storage(storage_name='hashes',
+ # options_string='hash-type="memory"'),
+ mem = RDF.MemoryStorage()
+ model = RDF.Model(mem)
+
+ daf.add_to_model(model, parsed)
+
+ writer = RDF.Serializer(name='turtle')
+ print writer.serialize_model_to_string(model)
+
+ except ImportError, e:
+ print "Skipped test_rdf"
+
+def suite():
+ return unittest.makeSuite(TestDAF, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')
--- /dev/null
+import unittest
+
+from htsworkflow.util.rdfhelp import toTypedNode, blankOrUri
+
+class TestRDFHelp(unittest.TestCase):
+ def test_typed_node_boolean(self):
+ node = toTypedNode(True)
+ self.failUnlessEqual(node.literal_value['string'], u'1')
+ self.failUnlessEqual(str(node.literal_value['datatype']),
+ 'http://www.w3.org/2001/XMLSchema#boolean')
+
+ def test_typed_node_string(self):
+ node = toTypedNode('hello')
+ self.failUnlessEqual(node.literal_value['string'], u'hello')
+ self.failUnlessEqual(str(node.literal_value['datatype']),
+ 'http://www.w3.org/2001/XMLSchema#string')
+
+ def test_blank_or_uri_blank(self):
+ node = blankOrUri()
+ self.failUnlessEqual(node.is_blank(), True)
+
+ def test_blank_or_uri_url(self):
+ s = 'http://google.com'
+ node = blankOrUri(s)
+ self.failUnlessEqual(node.is_resource(), True)
+ self.failUnlessEqual(str(node.uri), s)
+
+def suite():
+ return unittest.makeSuite(testRdfHelp, 'test')
+
+if __name__ == "__main__":
+ unittest.main(defaultTest='suite')