cdc93129b5900ed1596af0953ece31465231c4be
[htsworkflow.git] / htsworkflow / submission / daf.py
1 """Parse UCSC DAF File
2 """
3 import logging
4 import re
5 import string
6 from StringIO import StringIO
7 import types
8
9 from htsworkflow.util.rdfhelp import blankOrUri, toTypedNode
10
11 logger = logging.getLogger(__name__)
12
13 # STATES
14 DAF_HEADER = 1
15 DAF_VIEW = 2
16
17
18 def parse(filename):
19     stream = open(filename,'r')
20     attributes =  parse_stream(stream)
21     stream.close()
22     return stream
23
24 def fromstring(daf_string):
25     stream = StringIO(daf_string)
26     return parse_stream(stream)
27
28 def parse_stream(stream):
29     comment_re = re.compile("#.*$")
30
31     state = DAF_HEADER
32     attributes = {'views': {}}
33     view_name = None
34     view_attributes = {}
35     for line in stream:
36         #remove comments
37         line = comment_re.sub("", line)
38         nstop = _extract_name_index(line)
39         name = line[0:nstop]
40         sstop = _consume_whitespace(line, start=nstop)
41         vstop = _extract_value_index(line, start=sstop)
42         value = line[sstop:vstop]
43
44         if value.lower() in ('yes',):
45             value = True
46         elif value.lower() in ('no',):
47             value = False
48             
49         if len(name) == 0:
50             if view_name is not None:
51                 attributes['views'][view_name] = view_attributes
52                 view_name = None
53                 view_attributes = {}
54             state = DAF_HEADER
55         elif state == DAF_HEADER and name == 'variables':
56             attributes[name] = [ x.strip() for x in value.split(',')]
57         elif state == DAF_HEADER and name == 'view':
58             view_name = value
59             view_attributes['view'] = value
60             state = DAF_VIEW
61         elif state == DAF_HEADER:
62             attributes[name] = value
63         elif state == DAF_VIEW:
64             view_attributes[name] = value
65
66     # save last block
67     if view_name is not None:
68         attributes['views'][view_name] = view_attributes
69         
70     return attributes
71
72 def _consume_whitespace(line, start=0):
73     for i in xrange(start, len(line)):
74         if line[i] not in string.whitespace:
75             return i
76         
77     return len(line)
78
79 def _extract_name_index(line, start=0):
80     for i in xrange(start, len(line)):
81         if line[i] in string.whitespace:
82             return i
83         
84     return len(line)
85
86 def _extract_value_index(line, start=0):
87     shortline = line.rstrip()
88     return len(shortline)
89
90 try:
91     import RDF
92     def convert_to_rdf_statements(attributes, source=None):
93         ddfNS = RDF.NS("http://encodesubmit.ucsc.edu/pipeline/download_ddf#")
94     
95         subject = blankOrUri(source)
96         
97         statements = []
98         for name in attributes:
99             predicate = ddfNS[name]
100             if name == 'views':
101                 predicate = ddfNS['views']
102                 for view_name in attributes.get('views', []):
103                     view = attributes['views'][view_name]
104                     viewNode = RDF.Node()
105                     statements.append(RDF.Statement(subject, predicate, viewNode))
106                     statements.extend(convert_to_rdf_statements(view, viewNode))
107             elif name == 'variables':
108                 predicate = ddfNS['variables']
109                 for var in attributes.get('variables', []):
110                     obj = toTypedNode(var)
111                     statements.append(RDF.Statement(subject, predicate, obj))
112             else:
113                 value = attributes[name]
114                 obj = toTypedNode(value)
115                 statements.append(RDF.Statement(subject,predicate,obj))
116     
117         return statements
118     
119     
120     def add_to_model(model, attributes, source=None):
121         for statement in convert_to_rdf_statements(attributes, source):
122             model.add_statement(statement)
123             
124 except ImportError, e:
125     def convert_to_rdf_statements(attributes, source=None):
126         raise NotImplementedError("librdf not installed")
127     def add_to_model(model, attributes, source=None):
128         raise NotImplementedError("librdf not installed")
129