encode_submission/encode_find.py

   1 #!/usr/bin/env python
   2 """
   3 Gather information about our submissions into a single RDF store
   4 """
   5
   6 from datetime import datetime
   7 import hashlib
   8 import httplib2
   9 import keyring
  10 import logging
  11 from lxml.html import fromstring
  12 from operator import attrgetter
  13 from optparse import OptionParser, OptionGroup
  14 # python keyring
  15 import os
  16 import re
  17 # redland rdf lib
  18 import RDF
  19 import sys
  20 import urllib
  21 import urlparse
  22
  23 from htsworkflow.submission import daf, ucsc
  24
  25 from htsworkflow.util import api
  26 from htsworkflow.util.rdfhelp import \
  27      dafTermOntology, \
  28      dublinCoreNS, \
  29      get_model, \
  30      get_serializer, \
  31      sparql_query, \
  32      submissionOntology, \
  33      libraryOntology, \
  34      load_into_model, \
  35      rdfNS, \
  36      rdfsNS, \
  37      xsdNS
  38 TYPE_N = rdfNS['type']
  39 CREATION_DATE = libraryOntology['date']
  40
  41 # URL mappings
  42 LIBRARY_NS = RDF.NS("http://jumpgate.caltech.edu/library/")
  43
  44 from htsworkflow.submission.ucsc import \
  45      daf_download_url, \
  46      ddf_download_url, \
  47      get_encodedcc_file_index, \
  48      submission_view_url, \
  49      UCSCEncodePipeline
  50
  51 DCC_NS = RDF.NS(UCSCEncodePipeline + 'download_ddf#')
  52
  53 DBDIR = os.path.expanduser("~diane/proj/submission")
  54
  55 LOGGER = logging.getLogger("encode_find")
  56
  57 LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
  58 USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
  59
  60 USERNAME = 'detrout'
  61 CHARSET = 'utf-8'
  62
  63 SL_MAP = {'SL2970': '02970',
  64           'SL2971': '02971',
  65           'SL2973': '02973',}
  66
  67 def main(cmdline=None):
  68     """
  69     Parse command line arguments
  70
  71     Takes a list of arguments (assuming arg[0] is the program name) or None
  72     If None, it looks at sys.argv
  73     """
  74     parser = make_parser()
  75     opts, args = parser.parse_args(cmdline)
  76
  77     if opts.debug:
  78         logging.basicConfig(level=logging.DEBUG)
  79     elif opts.verbose:
  80         logging.basicConfig(level=logging.INFO)
  81     else:
  82         logging.basicConfig(level=logging.ERROR)
  83
  84     htsw_authdata = api.make_auth_from_opts(opts, parser)
  85     htswapi = api.HtswApi(opts.host, htsw_authdata)
  86
  87     cookie = None
  88     model = get_model(opts.model, DBDIR)
  89
  90     if opts.load_rdf is not None:
  91         ns_uri = submissionOntology[''].uri
  92         load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
  93
  94     if len(args) == 0:
  95         limit = None
  96     else:
  97         limit = args
  98
  99     if opts.reload_libraries:
 100         reload_libraries(model, args)
 101         return
 102
 103     if opts.update:
 104         opts.update_submission = True
 105         opts.update_libraries = True
 106         opts.update_ucsc_downloads = True
 107
 108     if opts.update_submission:
 109         cookie = login(cookie=cookie)
 110         load_my_submissions(model, limit=limit, cookie=cookie)
 111
 112     if opts.update_libraries:
 113         load_encode_assigned_libraries(model, htswapi)
 114         load_unassigned_submitted_libraries(model)
 115
 116     if opts.update_ucsc_downloads:
 117         our_tracks = [
 118             {'genome':'hg19', 'composite':'wgEncodeCaltechRnaSeq'},
 119             {'genome':'mm9',  'composite':'wgEncodeCaltechHist'},
 120             #{'genome':'mm9',  'composite':'wgEncodeCaltechHistone'},
 121             {'genome':'mm9',  'composite':'wgEncodeCaltechTfbs'}
 122         ]
 123         for track_info in our_tracks:
 124             load_encodedcc_files(model, **track_info )
 125
 126     if opts.sparql is not None:
 127         sparql_query(model, opts.sparql)
 128
 129     if opts.find_submission_with_no_library:
 130         report_submissions_with_no_library(model)
 131
 132     if opts.print_rdf:
 133         serializer = get_serializer(name=opts.rdf_parser_name)
 134         print serializer.serialize_model_to_string(model)
 135
 136
 137 def make_parser():
 138     """Construct option parser
 139     """
 140     parser = OptionParser()
 141     commands = OptionGroup(parser, "Commands")
 142     commands.add_option('--model', default=None,
 143       help="Load model database")
 144     commands.add_option('--load-rdf', default=None,
 145       help="load rdf statements into model")
 146     commands.add_option('--print-rdf', action="store_true", default=False,
 147       help="print ending model state")
 148     commands.add_option('--update', action="store_true", default=False,
 149       help="Do all updates")
 150     commands.add_option('--update-submission', action="store_true",
 151                         default=False,
 152       help="download status from ucsc")
 153     commands.add_option('--update-ucsc-downloads', action="store_true",
 154                         default=False,
 155       help="Update download locations from UCSC")
 156     commands.add_option('--update-libraries', action="store_true",
 157                         default=False,
 158       help="download library info from htsw")
 159     commands.add_option('--reload-libraries', action="store_true",
 160                         default=False,
 161                         help="Delete and redownload library information. "\
 162                              "Optionally list specific library IDs.")
 163     parser.add_option_group(commands)
 164
 165     queries = OptionGroup(parser, "Queries")
 166     queries.add_option('--sparql', default=None,
 167       help="execute arbitrary sparql query")
 168     queries.add_option('--find-submission-with-no-library', default=False,
 169       action="store_true",
 170       help="find submissions with no library ID")
 171     parser.add_option_group(queries)
 172
 173     options = OptionGroup(parser, "Options")
 174     options.add_option("--rdf-parser-name", default="turtle",
 175       help="set rdf file parser type")
 176     options.add_option("-v", "--verbose", action="store_true", default=False)
 177     options.add_option("--debug", action="store_true", default=False)
 178     parser.add_option_group(options)
 179
 180     api.add_auth_options(parser)
 181
 182     return parser
 183
 184
 185 def load_my_submissions(model, limit=None, cookie=None):
 186     """Parse all the submissions from UCSC into model
 187     It will look at the global USER_URL to figure out who to scrape
 188     cookie contains the session cookie, if none, will attempt to login
 189     """
 190     if cookie is None:
 191         cookie = login()
 192
 193     tree = get_url_as_tree(USER_URL, 'GET', cookie)
 194     table_rows = tree.xpath('//table[@id="projects"]/tr')
 195     # first record is header
 196     name_n = submissionOntology['name']
 197     species_n = submissionOntology['species']
 198     library_urn = submissionOntology['library_urn']
 199
 200     # skip header
 201     for row in table_rows[1:]:
 202         cell = row.xpath('td')
 203         if cell is not None and len(cell) > 1:
 204             submission_id = str(cell[0].text_content())
 205             if limit is None or submission_id in limit:
 206                 subUrn = RDF.Uri(submission_view_url(submission_id))
 207
 208                 add_stmt(model,
 209                          subUrn,
 210                          TYPE_N,
 211                          submissionOntology['Submission'])
 212                 add_stmt(model,
 213                          subUrn,
 214                          DCC_NS['subId'],
 215                          RDF.Node(submission_id))
 216
 217                 name = str(cell[4].text_content())
 218                 add_stmt(model, subUrn, name_n, name)
 219
 220                 species = str(cell[2].text_content())
 221                 if species is not None:
 222                     add_stmt(model, subUrn, species_n, species)
 223
 224                 library_id = get_library_id(name)
 225                 if library_id is not None:
 226                     add_submission_to_library_urn(model,
 227                                                   subUrn,
 228                                                   library_urn,
 229                                                   library_id)
 230                 else:
 231                     errmsg = 'Unable to find library id in {0} for {1}'
 232                     LOGGER.warn(errmsg.format(name, str(subUrn)))
 233
 234                 add_submission_creation_date(model, subUrn, cookie)
 235
 236                 # grab changing atttributes
 237                 status = str(cell[6].text_content()).strip()
 238                 last_mod_datetime = get_date_contents(cell[8])
 239                 last_mod = last_mod_datetime.isoformat()
 240
 241                 update_submission_detail(model, subUrn, status, last_mod,
 242                                          cookie=cookie)
 243
 244                 LOGGER.info("Processed {0}".format(subUrn))
 245
 246
 247 def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
 248     """Add a link from a UCSC submission to woldlab library if needed
 249     """
 250     libraryUrn = LIBRARY_NS[library_id + '/']
 251     query = RDF.Statement(submissionUrn, predicate, libraryUrn)
 252     if not model.contains_statement(query):
 253         link = RDF.Statement(submissionUrn, predicate, libraryUrn)
 254         LOGGER.info("Adding Sub -> Lib link: {0}".format(link))
 255         model.add_statement(link)
 256     else:
 257         LOGGER.debug("Found: {0}".format(str(query)))
 258
 259
 260 def report_submissions_with_no_library(model):
 261     missing = find_submissions_with_no_library(model)
 262     for row in results:
 263         subid = row['subid']
 264         name = row['name']
 265         print "# {0}".format(name)
 266         print "<{0}>".format(subid.uri)
 267         print "  encodeSubmit:library_urn "\
 268               "<http://jumpgate.caltech.edu/library/> ."
 269         print ""
 270
 271 def find_submissions_with_no_library(model):
 272     missing_lib_query_text = """
 273 PREFIX submissionOntology:<{submissionOntology}>
 274
 275 SELECT
 276  ?subid ?name
 277 WHERE {{
 278   ?subid submissionOntology:name ?name
 279   OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
 280   FILTER  (!bound(?libid))
 281 }}""".format(submissionOntology=submissionOntology[''].uri)
 282     missing_lib_query = RDF.SPARQLQuery(missing_lib_query_text)
 283
 284     return missing_lib_query.execute(model)
 285
 286
 287 def find_unscanned_submitted_libraries(model):
 288     """Scan model for libraries that don't have library details loaded
 289     """
 290     unscanned_libraries = """
 291 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 292 PREFIX submissionOntology:<{submissionOntology}>
 293
 294 SELECT distinct ?submission ?library_urn
 295 WHERE {{
 296   ?submission submissionOntology:library_urn ?library_urn .
 297   OPTIONAL {{ ?library_urn rdf:type ?library_type  }}
 298   FILTER(!BOUND(?library_type))
 299 }}""".format(submissionOntology=submissionOntology[''].uri)
 300     query = RDF.SPARQLQuery(unscanned_libraries)
 301     return query.execute(model)
 302
 303 def find_all_libraries(model):
 304     """Scan model for every library marked as
 305     """
 306     libraries = """
 307 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 308 PREFIX libraryOntology:<{libraryOntology}>
 309
 310 SELECT distinct ?library_urn
 311 WHERE {{
 312   ?library_urn rdf:type ?library_type .
 313   FILTER(regex(?libray
 314 }}""".format(libraryOntology=libraryOntology[''].uri)
 315     query = RDF.SPARQLQuery(libraries)
 316     return query.execute(model)
 317
 318
 319 def add_submission_creation_date(model, subUrn, cookie):
 320     # in theory the submission page might have more information on it.
 321     creation_dates = get_creation_dates(model, subUrn)
 322     if len(creation_dates) == 0:
 323         LOGGER.info("Getting creation date for: {0}".format(str(subUrn)))
 324         submissionTree = get_url_as_tree(str(subUrn), 'GET', cookie)
 325         parse_submission_page(model, submissionTree, subUrn)
 326     else:
 327         LOGGER.debug("Found creation date for: {0}".format(str(subUrn)))
 328
 329
 330 def get_creation_dates(model, subUrn):
 331     query = RDF.Statement(subUrn, CREATION_DATE, None)
 332     creation_dates = list(model.find_statements(query))
 333     return creation_dates
 334
 335
 336 def parse_submission_page(model, submissionTree, subUrn):
 337     cells = submissionTree.findall('.//td')
 338     dateTimeType = xsdNS['dateTime']
 339     created_label = [x for x in cells
 340                      if x.text_content().startswith('Created')]
 341     if len(created_label) == 1:
 342         created_date = get_date_contents(created_label[0].getnext())
 343         created_date_node = RDF.Node(literal=created_date.isoformat(),
 344                                      datatype=dateTimeType.uri)
 345         add_stmt(model, subUrn, CREATION_DATE, created_date_node)
 346     else:
 347         msg = 'Unable to find creation date for {0}'.format(str(subUrn))
 348         LOGGER.warn(msg)
 349         raise Warning(msg)
 350
 351
 352 def update_submission_detail(model, subUrn, status, recent_update, cookie):
 353     HasStatusN = submissionOntology['has_status']
 354     StatusN = submissionOntology['status']
 355     LastModifyN = submissionOntology['last_modify_date']
 356
 357     status_nodes_query = RDF.Statement(subUrn, HasStatusN, None)
 358     status_nodes = list(model.find_statements(status_nodes_query))
 359
 360     if len(status_nodes) == 0:
 361         # has no status node, add one
 362         LOGGER.info("Adding status node to {0}".format(subUrn))
 363         status_node = create_status_node(subUrn, recent_update)
 364         add_stmt(model, subUrn, HasStatusN, status_node)
 365         add_stmt(model, status_node, rdfNS['type'], StatusN)
 366         add_stmt(model, status_node, StatusN, status)
 367         add_stmt(model, status_node, LastModifyN, recent_update)
 368         update_ddf(model, subUrn, status_node, cookie=cookie)
 369         update_daf(model, subUrn, status_node, cookie=cookie)
 370     else:
 371         LOGGER.info("Found {0} status blanks".format(len(status_nodes)))
 372         for status_statement in status_nodes:
 373             status_node = status_statement.object
 374             last_modified_query = RDF.Statement(status_node,
 375                                                 LastModifyN,
 376                                                 None)
 377             last_mod_nodes = model.find_statements(last_modified_query)
 378             for last_mod_statement in last_mod_nodes:
 379                 last_mod_date = str(last_mod_statement.object)
 380                 if recent_update == str(last_mod_date):
 381                     update_ddf(model, subUrn, status_node, cookie=cookie)
 382                     update_daf(model, subUrn, status_node, cookie=cookie)
 383                     break
 384
 385
 386 def update_daf(model, submission_url, status_node, cookie):
 387     download_daf_uri = str(submission_url).replace('show', 'download_daf')
 388     daf_uri = RDF.Uri(download_daf_uri)
 389
 390     status_is_daf = RDF.Statement(status_node, TYPE_N, dafTermOntology[''])
 391     if not model.contains_statement(status_is_daf):
 392         LOGGER.info('Adding daf to {0}, {1}'.format(submission_url,
 393                                                      status_node))
 394         daf_text = get_url_as_text(download_daf_uri, 'GET', cookie)
 395         daf_hash = hashlib.md5(daf_text).hexdigest()
 396         daf_hash_stmt = RDF.Statement(status_node,
 397                                       dafTermOntology['md5sum'],
 398                                       daf_hash)
 399         model.add_statement(daf_hash_stmt)
 400         daf.fromstring_into_model(model, status_node, daf_text)
 401
 402
 403 def update_ddf(model, subUrn, statusNode, cookie):
 404     download_ddf_url = str(subUrn).replace('show', 'download_ddf')
 405     ddfUrn = RDF.Uri(download_ddf_url)
 406
 407     status_is_ddf = RDF.Statement(statusNode, TYPE_N, DCC_NS[''])
 408     if not model.contains_statement(status_is_ddf):
 409         LOGGER.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
 410         ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
 411         add_ddf_statements(model, statusNode, ddf_text)
 412         model.add_statement(status_is_ddf)
 413
 414
 415 def add_ddf_statements(model, statusNode, ddf_string):
 416     """Convert a ddf text file into RDF Statements
 417     """
 418     ddf_lines = ddf_string.split('\n')
 419     # first line is header
 420     header = ddf_lines[0].split()
 421     attributes = [DCC_NS[x] for x in header]
 422
 423     for ddf_line in ddf_lines[1:]:
 424         ddf_line = ddf_line.strip()
 425         if len(ddf_line) == 0:
 426             continue
 427         if ddf_line.startswith("#"):
 428             continue
 429
 430         ddf_record = ddf_line.split('\t')
 431         files = ddf_record[0].split(',')
 432         file_attributes = ddf_record[1:]
 433
 434         for f in files:
 435             fileNode = RDF.Node()
 436             add_stmt(model,
 437                      statusNode,
 438                      submissionOntology['has_file'],
 439                      fileNode)
 440             add_stmt(model, fileNode, rdfNS['type'], DCC_NS['file'])
 441             add_stmt(model, fileNode, DCC_NS['filename'], f)
 442
 443             for predicate, object in zip(attributes[1:], file_attributes):
 444                 add_stmt(model, fileNode, predicate, object)
 445
 446
 447 def load_encode_assigned_libraries(model, htswapi):
 448     """Get libraries associated with encode.
 449     """
 450     encodeFilters = ["/library/?affiliations__id__exact=44",
 451                      "/library/?affiliations__id__exact=80",
 452                     ]
 453
 454     encodeUrls = [os.path.join(htswapi.root_url + u) for u in encodeFilters]
 455     rdfaParser = RDF.Parser(name='rdfa')
 456     for encodeUrl in encodeUrls:
 457         LOGGER.info("Scanning library url {0}".format(encodeUrl))
 458         rdfaParser.parse_into_model(model, encodeUrl)
 459         query = RDF.Statement(None, libraryOntology['library_id'], None)
 460         libraries = model.find_statements(query)
 461         for statement in libraries:
 462             libraryUrn = statement.subject
 463             load_library_detail(model, libraryUrn)
 464
 465
 466 def load_unassigned_submitted_libraries(model):
 467     unassigned = find_unscanned_submitted_libraries(model)
 468     for query_record in unassigned:
 469         library_urn = query_record['library_urn']
 470         LOGGER.warn("Unassigned, submitted library: {0}".format(library_urn))
 471         load_library_detail(model, library_urn)
 472
 473 def reload_libraries(model, library_list):
 474     if len(library_list) == 0:
 475         # reload everything.
 476         queryset = find_all_libraries(model)
 477         libraries = ( str(s['library_urn']) for s in queryset )
 478     else:
 479         libraries = ( user_library_id_to_library_urn(l) for l in library_list )
 480
 481     for library_urn in libraries:
 482         delete_library(model, library_urn)
 483         load_library_detail(model, library_urn)
 484
 485 def user_library_id_to_library_urn(library_id):
 486     split_url = urlparse.urlsplit(library_id)
 487     if len(split_url.scheme) == 0:
 488         return LIBRARY_NS[library_id]
 489     else:
 490         return library_id
 491
 492 def delete_library(model, library_urn):
 493     if not isinstance(library_urn, RDF.Node):
 494         raise ValueError("library urn must be a RDF.Node")
 495
 496     LOGGER.info("Deleting {0}".format(str(library_urn.uri)))
 497     lane_query = RDF.Statement(library_urn, libraryOntology['has_lane'],None)
 498     for lane in model.find_statements(lane_query):
 499         delete_lane(model, lane.object)
 500     library_attrib_query = RDF.Statement(library_urn, None, None)
 501     for library_attrib in model.find_statements(library_attrib_query):
 502         LOGGER.debug("Deleting {0}".format(str(library_attrib)))
 503         del model[library_attrib]
 504
 505
 506 def delete_lane(model, lane_urn):
 507     if not isinstance(lane_urn, RDF.Node):
 508         raise ValueError("lane urn must be a RDF.Node")
 509
 510     delete_lane_mapping(model, lane_urn)
 511     lane_attrib_query = RDF.Statement(lane_urn,None,None)
 512     for lane_attrib in model.find_statements(lane_attrib_query):
 513         LOGGER.debug("Deleting {0}".format(str(lane_attrib)))
 514         del model[lane_attrib]
 515
 516
 517 def delete_lane_mapping(model, lane_urn):
 518     if not isinstance(lane_urn, RDF.Node):
 519         raise ValueError("lane urn must be a RDF.Node")
 520
 521     lane_mapping_query = RDF.Statement(lane_urn,
 522                                        libraryOntology['has_mappings'],
 523                                        None)
 524     for lane_mapping in model.find_statements(lane_mapping_query):
 525         mapping_attrib_query = RDF.Statement(lane_mapping.object,
 526                                              None,
 527                                              None)
 528         for mapping_attrib in model.find_statements(mapping_attrib_query):
 529             LOGGER.debug("Deleting {0}".format(str(mapping_attrib)))
 530             del model[mapping_attrib]
 531
 532
 533 def load_encodedcc_files(model, genome, composite):
 534     file_index = ucsc.get_encodedcc_file_index(genome, composite)
 535     if file_index is None:
 536         return
 537
 538     for filename, attributes in file_index.items():
 539         s = RDF.Node(RDF.Uri(filename))
 540         for name, value in attributes.items():
 541             p = RDF.Node(DCC_NS[name])
 542             o = RDF.Node(value)
 543             model.add_statement(RDF.Statement(s,p,o))
 544
 545
 546 def load_library_detail(model, libraryUrn):
 547     """Grab detail information from library page
 548     """
 549     rdfaParser = RDF.Parser(name='rdfa')
 550     query = RDF.Statement(libraryUrn, libraryOntology['date'], None)
 551     results = list(model.find_statements(query))
 552     log_message = "Found {0} statements for {1}"
 553     LOGGER.debug(log_message.format(len(results), libraryUrn))
 554     if len(results) == 0:
 555         LOGGER.info("Loading {0}".format(str(libraryUrn)))
 556         try:
 557             body = get_url_as_text(str(libraryUrn.uri), 'GET')
 558             rdfaParser.parse_string_into_model(model, body, libraryUrn.uri)
 559         except httplib2.HttpLib2ErrorWithResponse, e:
 560             LOGGER.error(str(e))
 561     elif len(results) == 1:
 562         pass  # Assuming that a loaded dataset has one record
 563     else:
 564         LOGGER.warning("Many dates for {0}".format(libraryUrn))
 565
 566
 567 def get_library_id(name):
 568     """Guess library ID from library name
 569
 570     >>> get_library_id('2x75-GM12892-rep1-11039 20110217 elements')
 571     '11039'
 572     >>> get_library_id('10150 C2C12-24h-myogenin-2PCR-Rep1.32mers')
 573     '10150'
 574     >>> get_library_id('2x75-GM12892-rep2-SL2970')
 575     '02970'
 576     """
 577     match = re.search(r"([ -]|^)(?P<id>([\d]{5})|(SL[\d]{4}))", name)
 578     library_id = None
 579     if match is not None:
 580         library_id = match.group('id')
 581     if library_id in SL_MAP:
 582         library_id = SL_MAP[library_id]
 583     return library_id
 584
 585
 586 def get_contents(element):
 587     """Return contents or none.
 588     """
 589     if len(element.contents) == 0:
 590         return None
 591
 592     a = element.find('a')
 593     if a is not None:
 594         return a.contents[0].encode(CHARSET)
 595
 596     return element.contents[0].encode(CHARSET)
 597
 598
 599 def create_status_node(submission_uri, timestamp):
 600     submission_uri = daf.submission_uri_to_string(submission_uri)
 601     if submission_uri[-1] != '/':
 602         sumbission_uri += '/'
 603     status_uri = submission_uri + timestamp
 604     return RDF.Node(RDF.Uri(status_uri))
 605
 606
 607 def get_date_contents(element):
 608     data = element.text_content()
 609     if data:
 610         return datetime.strptime(data, "%Y-%m-%d %H:%M")
 611     else:
 612         return None
 613
 614
 615 def add_stmt(model, subject, predicate, rdf_object):
 616     """Convienence create RDF Statement and add to a model
 617     """
 618     return model.add_statement(
 619         RDF.Statement(subject, predicate, rdf_object))
 620
 621
 622 def login(cookie=None):
 623     """Login if we don't have a cookie
 624     """
 625     if cookie is not None:
 626         return cookie
 627
 628     keys = keyring.get_keyring()
 629     password = keys.get_password(LOGIN_URL, USERNAME)
 630     credentials = {'login': USERNAME,
 631                    'password': password}
 632     headers = {'Content-type': 'application/x-www-form-urlencoded'}
 633     http = httplib2.Http()
 634     response, content = http.request(LOGIN_URL,
 635                                      'POST',
 636                                      headers=headers,
 637                                      body=urllib.urlencode(credentials))
 638     LOGGER.debug("Login to {0}, status {1}".format(LOGIN_URL,
 639                                                     response['status']))
 640
 641     cookie = response.get('set-cookie', None)
 642     if cookie is None:
 643         raise RuntimeError("Wasn't able to log into: {0}".format(LOGIN_URL))
 644     return cookie
 645
 646
 647 def get_url_as_tree(url, method, cookie=None):
 648     http = httplib2.Http()
 649     headers = {}
 650     if cookie is not None:
 651         headers['Cookie'] = cookie
 652     response, content = http.request(url, method, headers=headers)
 653     if response['status'] == '200':
 654         tree = fromstring(content, base_url=url)
 655         return tree
 656     else:
 657         msg = "error accessing {0}, status {1}"
 658         msg = msg.format(url, response['status'])
 659         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
 660         raise e
 661
 662
 663 def get_url_as_text(url, method, cookie=None):
 664     http = httplib2.Http()
 665     headers = {}
 666     if cookie is not None:
 667         headers['Cookie'] = cookie
 668     response, content = http.request(url, method, headers=headers)
 669     if response['status'] == '200':
 670         return content
 671     else:
 672         msg = "error accessing {0}, status {1}"
 673         msg = msg.format(url, response['status'])
 674         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
 675         raise e
 676
 677 ################
 678 #  old stuff
 679 SUBMISSIONS_LACKING_LIBID = [
 680     ('1x75-Directional-HeLa-Rep1',    '11208'),
 681     ('1x75-Directional-HeLa-Rep2',    '11207'),
 682     ('1x75-Directional-HepG2-Rep1',   '11210'),
 683     ('1x75-Directional-HepG2-Rep2',   '11209'),
 684     ('1x75-Directional-H1-hESC-Rep1', '10947'),
 685     ('1x75-Directional-H1-hESC-Rep2', '11009'),
 686     ('1x75-Directional-HUVEC-Rep1',   '11206'),
 687     ('1x75-Directional-HUVEC-Rep2',   '11205'),
 688     ('1x75-Directional-K562-Rep1',    '11008'),
 689     ('1x75-Directional-K562-Rep2',    '11007'),
 690     ('1x75-Directional-NHEK-Rep1',    '11204'),
 691     ('1x75-Directional-GM12878-Rep1', '11011'),
 692     ('1x75-Directional-GM12878-Rep2', '11010'),
 693     ]
 694
 695
 696 def select_by_library_id(submission_list):
 697     subl = [(x.library_id, x) for x in submission_list if x.library_id]
 698     libraries = {}
 699     for lib_id, subobj in subl:
 700         libraries.setdefault(lib_id, []).append(subobj)
 701
 702     for submission in libraries.values():
 703         submission.sort(key=attrgetter('date'), reverse=True)
 704
 705     return libraries
 706
 707
 708 def library_to_freeze(selected_libraries):
 709     freezes = ['2010-Jan', '2010-Jul', '2011-Jan']
 710     lib_ids = sorted(selected_libraries.keys())
 711     report = ['<html><table border="1">']
 712     report = ["""<html>
 713 <head>
 714 <style type="text/css">
 715  td {border-width:0 0 1px 1px; border-style:solid;}
 716 </style>
 717 </head>
 718 <body>
 719 <table>
 720 """]
 721     report.append('<thead>')
 722     report.append('<tr><td>Library ID</td><td>Name</td>')
 723     for f in freezes:
 724         report.append('<td>{0}</td>'.format(f))
 725     report.append('</tr>')
 726     report.append('</thead>')
 727     report.append('<tbody>')
 728     for lib_id in lib_ids:
 729         report.append('<tr>')
 730         lib_url = LIBRARY_NS[lib_id].uri
 731         report.append('<td><a href="{0}">{1}</a></td>'.format(lib_url, lib_id))
 732         submissions = selected_libraries[lib_id]
 733         report.append('<td>{0}</td>'.format(submissions[0].name))
 734         batched = {}
 735         for sub in submissions:
 736             date = date_to_freeze(sub.date)
 737             batched.setdefault(date, []).append(sub)
 738         for d in freezes:
 739             report.append('<td>')
 740             for s in batched.get(d, []):
 741                 show_url = submission_view_url(s.subid)
 742                 subid = '<a href="{0}">{1}</a>'.format(show_url, s.subid)
 743                 report.append("{0}:{1}".format(subid, s.status))
 744             report.append('</td>')
 745         else:
 746             report.append('<td></td>')
 747         report.append("</tr>")
 748     report.append('</tbody>')
 749     report.append("</table></html>")
 750     return "\n".join(report)
 751
 752
 753 def date_to_freeze(d):
 754     freezes = [(datetime(2010, 1, 30), '2010-Jan'),
 755                (datetime(2010, 7, 30), '2010-Jul'),
 756                (datetime(2011, 1, 30), '2011-Jan'),
 757                ]
 758     for end, name in freezes:
 759         if d < end:
 760             return name
 761     else:
 762         return None
 763
 764 if __name__ == "__main__":
 765     main()