1 """Utilities for extracting information from the ENCODE DCC
6 UCSCEncodePipeline = "http://encodesubmit.ucsc.edu/pipeline/"
9 def ddf_download_url(submission_id):
10 """Return url to download a DDF for a submission
12 >>> ddf_download_url(1234)
13 'http://encodesubmit.ucsc.edu/pipeline/download_ddf/1234'
15 fragment = 'download_ddf/%s' % (submission_id,)
16 return urlparse.urljoin(UCSCEncodePipeline, fragment)
19 def daf_download_url(submission_id):
20 """Return url to download a DAF for a submission
22 >>> daf_download_url(1234)
23 'http://encodesubmit.ucsc.edu/pipeline/download_daf/1234'
25 fragment = 'download_daf/%s' % (submission_id,)
26 return urlparse.urljoin(UCSCEncodePipeline, fragment)
29 def submission_view_url(submission_id):
30 """Return url to download a DAF for a submission
32 >>> submission_view_url(1234)
33 'http://encodesubmit.ucsc.edu/pipeline/show/1234'
35 fragment = 'show/%s' % (submission_id,)
36 return urlparse.urljoin(UCSCEncodePipeline, fragment)
39 def get_ucsc_file_index(base_url):
40 """Get index of files for a ENCODE collection
42 if base_url[-1] != '/': base_url += '/'
43 request = urllib2.urlopen(base_url + 'files.txt')
44 file_index = parse_ucsc_file_index(request)
48 def parse_ucsc_file_index(stream):
49 """Turn a UCSC DCC files.txt index into a dictionary of name-value pairs
53 filename, attribute_line = line.split('\t')
55 for assignment in attribute_line.split(';'):
56 name, value = assignment.split('=')
57 attributes[name.strip()] = value.strip()
59 file_index[filename] = attributes