3 from BeautifulSoup import BeautifulSoup
4 from datetime import datetime
6 from operator import attrgetter
7 from optparse import OptionParser
18 libraryNS = RDF.NS("http://jumpgate.caltech.edu/library/")
19 submissionNS = RDF.NS("http://encodesubmit.ucsc.edu/pipeline/show/")
20 submitNS = RDF.NS("http://jumpgate.caltech.edu/wiki/EncodeSubmit#")
21 dublinCoreNS = RDF.NS("http://purl.org/dc/elements/1.1/")
22 rdfNS = RDF.NS("http://www.w3.org/1999/02/22-rdf-syntax-ns#")
23 rdfsNS= RDF.NS("http://www.w3.org/2000/01/rdf-schema#")
25 LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
26 USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
27 DETAIL_URL = 'http://encodesubmit.ucsc.edu/pipeline/show/{0}'
28 LIBRARY_URL = 'http://jumpgate.caltech.edu/library/{0}'
31 def main(cmdline=None):
32 parser = make_parser()
33 opts, args = parser.parse_args(cmdline)
37 print "Failed to login"
39 submissions = my_submissions(cookie)
45 parser = OptionParser()
50 keys = keyring.get_keyring()
51 password = keys.get_password(LOGIN_URL, USERNAME)
52 credentials = {'login': USERNAME,
54 headers = {'Content-type': 'application/x-www-form-urlencoded'}
55 http = httplib2.Http()
56 response, content = http.request(LOGIN_URL,
59 body=urllib.urlencode(credentials))
60 logging.debug("Login to {0}, status {1}".format(LOGIN_URL,
63 cookie = response.get('set-cookie', None)
66 def my_submissions(cookie):
67 soup = get_url_as_soup(USER_URL, 'GET', cookie)
68 p = soup.find('table', attrs={'id':'projects'})
70 # first record is header
75 if td is not None and len(td) > 1:
76 subid = td[0].contents[0].contents[0]
77 species = get_contents(td[2])
78 name = get_contents(td[4])
79 status = get_contents(td[6]).strip()
80 date = get_date_contents(td[8])
81 age = get_contents(td[10])
83 Submission(subid, species, name, status, date, age, cookie)
85 tr = tr.findNext('tr')
88 def get_contents(element):
89 """Return contents or none.
91 if len(element.contents) == 0:
98 return element.contents[0]
100 def get_date_contents(element):
101 data = get_contents(element)
103 return datetime.strptime(data, "%Y-%m-%d %H:%M")
107 SUBMISSIONS_LACKING_LIBID = [
108 ('1x75-Directional-HeLa-Rep1', '11208'),
109 ('1x75-Directional-HeLa-Rep2', '11207'),
110 ('1x75-Directional-HepG2-Rep1', '11210'),
111 ('1x75-Directional-HepG2-Rep2', '11209'),
112 ('1x75-Directional-H1-hESC-Rep1', '10947'),
113 ('1x75-Directional-H1-hESC-Rep2', '11009'),
114 ('1x75-Directional-HUVEC-Rep1', '11206'),
115 ('1x75-Directional-HUVEC-Rep2', '11205'),
116 ('1x75-Directional-K562-Rep1', '11008'),
117 ('1x75-Directional-K562-Rep2', '11007'),
118 ('1x75-Directional-NHEK-Rep1', '11204'),
121 class Submission(object):
122 def __init__(self, subid, species, name, status, date, age, cookie=None):
125 self.species = species
130 self._library_id = None
131 self._created_date = None
134 subNode = submissionNS[self.subid.encode('utf-8')]
135 dateNode = self.date.strftime("%Y-%m-%d")
136 s = [RDF.Statement(subNode, submitNS['name'],
137 self.name.encode('utf-8')),
138 RDF.Statement(subNode, submitNS['status'],
139 self.status.encode('utf-8')),
140 RDF.Statement(subNode, submitNS['last_modify_date'], dateNode),
142 if self.species is not None:
143 s.append(RDF.Statement(subNode, submitNS['species'],
144 self.species.encode('utf-8')))
145 if self.library_id is not None:
146 libId = libraryNS[self.library_id.encode('utf-8')]
147 s.append(RDF.Statement(subNode, rdfsNS['seeAlso'], libId))
152 def _get_library_id(self):
153 if self._library_id is None:
154 match = re.search(r"[ -](?P<id>([\d]{5})|(SL[\d]{4}))", self.name)
155 if match is not None:
156 self._library_id = match.group('id')
158 for dir_lib_name, lib_id in SUBMISSIONS_LACKING_LIBID:
159 if dir_lib_name in self.name:
160 self._library_id = lib_id
163 return self._library_id
165 library_id = property(_get_library_id)
167 def _get_detail(self):
168 detail = DETAIL_URL.format(self.subid)
169 soup = get_url_as_soup(detail, 'GET', self.cookie)
171 created_label = soup.find(text="Created: ")
173 self._created_date = get_date_contents(created_label.next)
175 def _get_created_date(self):
176 if self._created_date is None:
178 return self._created_date
179 created_date = property(_get_created_date)
181 def __unicode__(self):
182 return u"{0}\t{1}\t{2}".format(self.subid, self.library_id, self.name)
185 return u"<Submission ({0}) '{1}'>".format(self.subid, self.name)
188 def select_by_library_id(submission_list):
189 subl = [ (x.library_id, x) for x in submission_list if x.library_id ]
191 for lib_id, subobj in subl:
192 libraries.setdefault(lib_id, []).append(subobj)
194 for submission in libraries.values():
195 submission.sort(key=attrgetter('date'), reverse=True)
199 def library_to_freeze(selected_libraries):
200 freezes = ['2010-Jan', '2010-Jul', '2011-Jan']
201 lib_ids = sorted(selected_libraries.keys())
202 report = ['<html><table border="1">']
205 <style type="text/css">
206 td {border-width:0 0 1px 1px; border-style:solid;}
212 report.append('<thead>')
213 report.append('<tr><td>Library ID</td><td>Name</td>')
215 report.append('<td>{0}</td>'.format(f))
216 report.append('</tr>')
217 report.append('</thead>')
218 report.append('<tbody>')
219 for lib_id in lib_ids:
220 report.append('<tr>')
221 lib_url = LIBRARY_URL.format(lib_id)
222 report.append('<td><a href="{0}">{1}</a></td>'.format(lib_url, lib_id))
223 submissions = selected_libraries[lib_id]
224 report.append('<td>{0}</td>'.format(submissions[0].name))
226 for sub in submissions:
227 date = date_to_freeze(sub.date)
228 batched.setdefault(date, []).append(sub)
229 print lib_id, batched
231 report.append('<td>')
232 for s in batched.get(d, []):
233 subid = '<a href="http://encodesubmit.ucsc.edu/pipeline/show/{0}">{0}</a>'.format(s.subid)
234 report.append("{0}:{1}".format(subid, s.status))
235 report.append('</td>')
237 report.append('<td></td>')
238 report.append("</tr>")
239 report.append('</tbody>')
240 report.append("</table></html>")
241 return "\n".join(report)
244 def date_to_freeze(d):
245 freezes = [ (datetime(2010, 1, 30), '2010-Jan'),
246 (datetime(2010, 7, 30), '2010-Jul'),
247 (datetime(2011, 1, 30), '2011-Jan'),
249 for end, name in freezes:
256 def get_url_as_soup(url, method, cookie=None):
257 http = httplib2.Http()
259 if cookie is not None:
260 headers['Cookie'] = cookie
261 response, content = http.request(url, method, headers=headers)
262 if response['status'] == '200':
263 soup = BeautifulSoup(content,
264 fromEncoding="utf-8", # should read from header
265 convertEntities=BeautifulSoup.HTML_ENTITIES
269 msg = "error accessing {0}, status {1}"
270 msg = msg.format(url, response['status'])
271 e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
273 if __name__ == "__main__":