Try to figure out where UCSC is hosting our submitted files
[htsworkflow.git] / htsworkflow / submission / ucsc.py
1 """Utilities for extracting information from the ENCODE DCC
2 """
3 import urlparse
4 import urllib2
5
6 UCSCEncodePipeline = "http://encodesubmit.ucsc.edu/pipeline/"
7
8
9 def ddf_download_url(submission_id):
10     """Return url to download a DDF for a submission
11
12     >>> ddf_download_url(1234)
13     'http://encodesubmit.ucsc.edu/pipeline/download_ddf/1234'
14     """
15     fragment = 'download_ddf/%s' % (submission_id,)
16     return urlparse.urljoin(UCSCEncodePipeline, fragment)
17
18
19 def daf_download_url(submission_id):
20     """Return url to download a DAF for a submission
21
22     >>> daf_download_url(1234)
23     'http://encodesubmit.ucsc.edu/pipeline/download_daf/1234'
24     """
25     fragment = 'download_daf/%s' % (submission_id,)
26     return urlparse.urljoin(UCSCEncodePipeline, fragment)
27
28
29 def submission_view_url(submission_id):
30     """Return url to download a DAF for a submission
31
32     >>> submission_view_url(1234)
33     'http://encodesubmit.ucsc.edu/pipeline/show/1234'
34     """
35     fragment = 'show/%s' % (submission_id,)
36     return urlparse.urljoin(UCSCEncodePipeline, fragment)
37
38
39 def get_ucsc_file_index(base_url):
40     """Get index of files for a ENCODE collection
41     """
42     if base_url[-1] != '/': base_url += '/'
43     request = urllib2.urlopen(base_url + 'files.txt')
44     file_index = parse_ucsc_file_index(request)
45     return file_index
46
47
48 def parse_ucsc_file_index(stream):
49     """Turn a UCSC DCC files.txt index into a dictionary of name-value pairs
50     """
51     file_index = {}
52     for line in stream:
53         filename, attribute_line = line.split('\t')
54         attributes = {}
55         for assignment in  attribute_line.split(';'):
56             name, value = assignment.split('=')
57             attributes[name.strip()] = value.strip()
58
59         file_index[filename] = attributes
60     return file_index