encode_submission/encode_find.py

   1 #!/usr/bin/env python
   2 """
   3 Gather information about our submissions into a single RDF store
   4 """
   5
   6 from datetime import datetime
   7 import hashlib
   8 import httplib2
   9 import keyring
  10 import logging
  11 from lxml.html import fromstring
  12 from operator import attrgetter
  13 from optparse import OptionParser, OptionGroup
  14 # python keyring
  15 import os
  16 import re
  17 # redland rdf lib
  18 import RDF
  19 import sys
  20 import urllib
  21 import urlparse
  22
  23 from htsworkflow.submission import daf, ucsc
  24
  25 from htsworkflow.util import api
  26 from htsworkflow.util.rdfhelp import \
  27      dafTermOntology, \
  28      dublinCoreNS, \
  29      get_model, \
  30      get_serializer, \
  31      sparql_query, \
  32      submissionOntology, \
  33      libraryOntology, \
  34      load_into_model, \
  35      rdfNS, \
  36      rdfsNS, \
  37      xsdNS
  38 TYPE_N = rdfNS['type']
  39 CREATION_DATE = libraryOntology['date']
  40
  41 # URL mappings
  42 LIBRARY_NS = RDF.NS("http://jumpgate.caltech.edu/library/")
  43
  44 from htsworkflow.submission.ucsc import \
  45      daf_download_url, \
  46      ddf_download_url, \
  47      get_encodedcc_file_index, \
  48      submission_view_url, \
  49      UCSCEncodePipeline
  50
  51 DCC_NS = RDF.NS(UCSCEncodePipeline + 'download_ddf#')
  52
  53 DBDIR = os.path.expanduser("~diane/proj/submission")
  54
  55 LOGGER = logging.getLogger("encode_find")
  56
  57 LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
  58 USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
  59
  60 USERNAME = 'detrout'
  61 CHARSET = 'utf-8'
  62
  63 SL_MAP = {'SL2970': '02970',
  64           'SL2971': '02971',
  65           'SL2973': '02973',}
  66
  67 def main(cmdline=None):
  68     """
  69     Parse command line arguments
  70
  71     Takes a list of arguments (assuming arg[0] is the program name) or None
  72     If None, it looks at sys.argv
  73     """
  74     parser = make_parser()
  75     opts, args = parser.parse_args(cmdline)
  76
  77     if opts.debug:
  78         logging.basicConfig(level=logging.DEBUG)
  79     elif opts.verbose:
  80         logging.basicConfig(level=logging.INFO)
  81     else:
  82         logging.basicConfig(level=logging.ERROR)
  83
  84     htsw_authdata = api.make_auth_from_opts(opts, parser)
  85     htswapi = api.HtswApi(opts.host, htsw_authdata)
  86
  87     cookie = None
  88     model = get_model(opts.model, DBDIR)
  89
  90     if opts.load_rdf is not None:
  91         ns_uri = submissionOntology[''].uri
  92         load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
  93
  94     if len(args) == 0:
  95         limit = None
  96     else:
  97         limit = args
  98
  99     if opts.reload_libraries:
 100         reload_libraries(model, args)
 101         return
 102
 103     if opts.update:
 104         opts.update_submission = True
 105         opts.update_libraries = True
 106         opts.update_ucsc_downloads = True
 107
 108     if opts.update_submission:
 109         cookie = login(cookie=cookie)
 110         load_my_submissions(model, limit=limit, cookie=cookie)
 111
 112     if opts.update_libraries:
 113         load_encode_assigned_libraries(model, htswapi)
 114         load_unassigned_submitted_libraries(model)
 115
 116     if opts.update_ucsc_downloads:
 117         our_tracks = [
 118             {'genome':'hg19', 'composite':'wgEncodeCaltechRnaSeq'},
 119             {'genome':'mm9',  'composite':'wgEncodeCaltechHist'},
 120             #{'genome':'mm9',  'composite':'wgEncodeCaltechHistone'},
 121             {'genome':'mm9',  'composite':'wgEncodeCaltechTfbs'}
 122         ]
 123         for track_info in our_tracks:
 124             load_encodedcc_files(model, **track_info )
 125
 126     if opts.sparql is not None:
 127         sparql_query(model, opts.sparql)
 128
 129     if opts.find_submission_with_no_library:
 130         report_submissions_with_no_library(model)
 131
 132     if opts.print_rdf:
 133         serializer = get_serializer(name=opts.rdf_parser_name)
 134         print serializer.serialize_model_to_string(model)
 135
 136
 137 def make_parser():
 138     """Construct option parser
 139     """
 140     parser = OptionParser()
 141     commands = OptionGroup(parser, "Commands")
 142     commands.add_option('--model', default=None,
 143       help="Load model database")
 144     commands.add_option('--load-rdf', default=None,
 145       help="load rdf statements into model")
 146     commands.add_option('--print-rdf', action="store_true", default=False,
 147       help="print ending model state")
 148     commands.add_option('--update', action="store_true", default=False,
 149       help="Do all updates")
 150     commands.add_option('--update-submission', action="store_true",
 151                         default=False,
 152       help="download status from ucsc")
 153     commands.add_option('--update-ucsc-downloads', action="store_true",
 154                         default=False,
 155       help="Update download locations from UCSC")
 156     commands.add_option('--update-libraries', action="store_true",
 157                         default=False,
 158       help="download library info from htsw")
 159     commands.add_option('--reload-libraries', action="store_true",
 160                         default=False,
 161                         help="Delete and redownload library information. "\
 162                              "Optionally list specific library IDs.")
 163     parser.add_option_group(commands)
 164
 165     queries = OptionGroup(parser, "Queries")
 166     queries.add_option('--sparql', default=None,
 167       help="execute arbitrary sparql query")
 168     queries.add_option('--find-submission-with-no-library', default=False,
 169       action="store_true",
 170       help="find submissions with no library ID")
 171     parser.add_option_group(queries)
 172
 173     options = OptionGroup(parser, "Options")
 174     options.add_option("--rdf-parser-name", default="turtle",
 175       help="set rdf file parser type")
 176     options.add_option("-v", "--verbose", action="store_true", default=False)
 177     options.add_option("--debug", action="store_true", default=False)
 178     parser.add_option_group(options)
 179
 180     api.add_auth_options(parser)
 181
 182     return parser
 183
 184
 185 def load_my_submissions(model, limit=None, cookie=None):
 186     """Parse all the submissions from UCSC into model
 187     It will look at the global USER_URL to figure out who to scrape
 188     cookie contains the session cookie, if none, will attempt to login
 189     """
 190     if cookie is None:
 191         cookie = login()
 192
 193     tree = get_url_as_tree(USER_URL, 'GET', cookie)
 194     table_rows = tree.xpath('//table[@id="projects"]/tr')
 195     # first record is header
 196     name_n = submissionOntology['name']
 197     species_n = submissionOntology['species']
 198     library_urn = submissionOntology['library_urn']
 199
 200     # skip header
 201     for row in table_rows[1:]:
 202         cell = row.xpath('td')
 203         if cell is not None and len(cell) > 1:
 204             submission_id = str(cell[0].text_content())
 205             if limit is None or submission_id in limit:
 206                 subUrn = RDF.Uri(submission_view_url(submission_id))
 207
 208                 add_stmt(model,
 209                          subUrn,
 210                          TYPE_N,
 211                          submissionOntology['Submission'])
 212                 add_stmt(model,
 213                          subUrn,
 214                          DCC_NS['subId'],
 215                          RDF.Node(submission_id))
 216
 217                 name = str(cell[4].text_content())
 218                 add_stmt(model, subUrn, name_n, name)
 219
 220                 species = str(cell[2].text_content())
 221                 if species is not None:
 222                     add_stmt(model, subUrn, species_n, species)
 223
 224                 library_id = get_library_id(name)
 225                 if library_id is not None:
 226                     add_submission_to_library_urn(model,
 227                                                   subUrn,
 228                                                   library_urn,
 229                                                   library_id)
 230                 else:
 231                     errmsg = 'Unable to find library id in {0} for {1}'
 232                     LOGGER.warn(errmsg.format(name, str(subUrn)))
 233
 234                 add_submission_creation_date(model, subUrn, cookie)
 235
 236                 # grab changing atttributes
 237                 status = str(cell[6].text_content()).strip()
 238                 last_mod_datetime = get_date_contents(cell[8])
 239                 last_mod = last_mod_datetime.isoformat()
 240
 241                 update_submission_detail(model, subUrn, status, last_mod,
 242                                          cookie=cookie)
 243
 244                 LOGGER.info("Processed {0}".format(subUrn))
 245
 246
 247 def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
 248     """Add a link from a UCSC submission to woldlab library if needed
 249     """
 250     libraryUrn = LIBRARY_NS[library_id + '/']
 251     query = RDF.Statement(submissionUrn, predicate, libraryUrn)
 252     if not model.contains_statement(query):
 253         link = RDF.Statement(submissionUrn, predicate, libraryUrn)
 254         LOGGER.info("Adding Sub -> Lib link: {0}".format(link))
 255         model.add_statement(link)
 256     else:
 257         LOGGER.debug("Found: {0}".format(str(query)))
 258
 259
 260 def report_submissions_with_no_library(model):
 261     missing = find_submissions_with_no_library(model)
 262     for row in results:
 263         subid = row['subid']
 264         name = row['name']
 265         print "# {0}".format(name)
 266         print "<{0}>".format(subid.uri)
 267         print "  encodeSubmit:library_urn "\
 268               "<http://jumpgate.caltech.edu/library/> ."
 269         print ""
 270
 271 def find_submissions_with_no_library(model):
 272     missing_lib_query_text = """
 273 PREFIX submissionOntology:<{submissionOntology}>
 274
 275 SELECT
 276  ?subid ?name
 277 WHERE {{
 278   ?subid submissionOntology:name ?name
 279   OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
 280   FILTER  (!bound(?libid))
 281 }}""".format(submissionOntology=submissionOntology[''].uri)
 282     missing_lib_query = RDF.SPARQLQuery(missing_lib_query_text)
 283
 284     return missing_lib_query.execute(model)
 285
 286
 287 def find_unscanned_submitted_libraries(model):
 288     """Scan model for libraries that don't have library details loaded
 289     """
 290     unscanned_libraries = """
 291 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 292 PREFIX submissionOntology:<{submissionOntology}>
 293
 294 SELECT distinct ?submission ?library_urn
 295 WHERE {{
 296   ?submission submissionOntology:library_urn ?library_urn .
 297   OPTIONAL {{ ?library_urn rdf:type ?library_type  }}
 298   FILTER(!BOUND(?library_type))
 299 }}""".format(submissionOntology=submissionOntology[''].uri)
 300     query = RDF.SPARQLQuery(unscanned_libraries)
 301     return query.execute(model)
 302
 303 def find_all_libraries(model):
 304     """Scan model for every library marked as
 305     """
 306     libraries = """
 307 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
 308 PREFIX libraryOntology:<{libraryOntology}>
 309
 310 SELECT distinct ?library_urn
 311 WHERE {{
 312   ?library_urn rdf:type ?library_type .
 313   FILTER(regex(?libray
 314 }}""".format(libraryOntology=libraryOntology[''].uri)
 315     query = RDF.SPARQLQuery(libraries)
 316     return query.execute(model)
 317
 318
 319 def add_submission_creation_date(model, subUrn, cookie):
 320     # in theory the submission page might have more information on it.
 321     creation_dates = get_creation_dates(model, subUrn)
 322     if len(creation_dates) == 0:
 323         LOGGER.info("Getting creation date for: {0}".format(str(subUrn)))
 324         submissionTree = get_url_as_tree(str(subUrn), 'GET', cookie)
 325         parse_submission_page(model, submissionTree, subUrn)
 326     else:
 327         LOGGER.debug("Found creation date for: {0}".format(str(subUrn)))
 328
 329
 330 def get_creation_dates(model, subUrn):
 331     query = RDF.Statement(subUrn, CREATION_DATE, None)
 332     creation_dates = list(model.find_statements(query))
 333     return creation_dates
 334
 335
 336 def parse_submission_page(model, submissionTree, subUrn):
 337     cells = submissionTree.findall('.//td')
 338     dateTimeType = xsdNS['dateTime']
 339     created_label = [x for x in cells
 340                      if x.text_content().startswith('Created')]
 341     if len(created_label) == 1:
 342         created_date = get_date_contents(created_label[0].getnext())
 343         created_date_node = RDF.Node(literal=created_date.isoformat(),
 344                                      datatype=dateTimeType.uri)
 345         add_stmt(model, subUrn, CREATION_DATE, created_date_node)
 346     else:
 347         msg = 'Unable to find creation date for {0}'.format(str(subUrn))
 348         LOGGER.warn(msg)
 349         raise Warning(msg)
 350
 351
 352 def update_submission_detail(model, subUrn, status, recent_update, cookie):
 353     HasStatusN = submissionOntology['has_status']
 354     StatusN = submissionOntology['status']
 355     LastModifyN = submissionOntology['last_modify_date']
 356
 357     status_nodes_query = RDF.Statement(subUrn, HasStatusN, None)
 358     status_nodes = list(model.find_statements(status_nodes_query))
 359
 360     if len(status_nodes) == 0:
 361         # has no status node, add one
 362         LOGGER.info("Adding status node to {0}".format(subUrn))
 363         status_node = create_status_node(subUrn, recent_update)
 364         add_stmt(model, subUrn, HasStatusN, status_node)
 365         add_stmt(model, status_node, rdfNS['type'], StatusN)
 366         add_stmt(model, status_node, StatusN, status)
 367         add_stmt(model, status_node, LastModifyN, recent_update)
 368         update_ddf(model, subUrn, status_node, cookie=cookie)
 369         update_daf(model, subUrn, status_node, cookie=cookie)
 370     else:
 371         LOGGER.info("Found {0} status blanks".format(len(status_nodes)))
 372         for status_statement in status_nodes:
 373             status_node = status_statement.object
 374             last_modified_query = RDF.Statement(status_node,
 375                                                 LastModifyN,
 376                                                 None)
 377             last_mod_nodes = model.find_statements(last_modified_query)
 378             for last_mod_statement in last_mod_nodes:
 379                 last_mod_date = str(last_mod_statement.object)
 380                 if recent_update == str(last_mod_date):
 381                     update_ddf(model, subUrn, status_node, cookie=cookie)
 382                     update_daf(model, subUrn, status_node, cookie=cookie)
 383                     break
 384
 385
 386 def update_daf(model, submission_url, status_node, cookie):
 387     download_daf_uri = str(submission_url).replace('show', 'download_daf')
 388     daf_uri = RDF.Uri(download_daf_uri)
 389
 390     status_is_daf = RDF.Statement(status_node, TYPE_N, dafTermOntology[''])
 391     if not model.contains_statement(status_is_daf):
 392         LOGGER.info('Adding daf to {0}, {1}'.format(submission_url,
 393                                                      status_node))
 394         daf_text = get_url_as_text(download_daf_uri, 'GET', cookie)
 395         daf_hash = hashlib.md5(daf_text).hexdigest()
 396         daf_hash_stmt = RDF.Statement(status_node,
 397                                       dafTermOntology['md5sum'],
 398                                       daf_hash)
 399         model.add_statement(daf_hash_stmt)
 400         daf.fromstring_into_model(model, status_node, daf_text)
 401
 402
 403 def update_ddf(model, subUrn, statusNode, cookie):
 404     download_ddf_url = str(subUrn).replace('show', 'download_ddf')
 405     ddfUrn = RDF.Uri(download_ddf_url)
 406
 407     status_is_ddf = RDF.Statement(statusNode, TYPE_N, DCC_NS[''])
 408     if not model.contains_statement(status_is_ddf):
 409         LOGGER.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
 410         ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
 411         add_ddf_statements(model, statusNode, ddf_text)
 412         model.add_statement(status_is_ddf)
 413
 414
 415 def add_ddf_statements(model, statusNode, ddf_string):
 416     """Convert a ddf text file into RDF Statements
 417     """
 418     ddf_lines = ddf_string.split('\n')
 419     # first line is header
 420     header = ddf_lines[0].split()
 421     attributes = [DCC_NS[x] for x in header]
 422
 423     for ddf_line in ddf_lines[1:]:
 424         ddf_line = ddf_line.strip()
 425         if len(ddf_line) == 0:
 426             continue
 427         if ddf_line.startswith("#"):
 428             continue
 429
 430         ddf_record = ddf_line.split('\t')
 431         files = ddf_record[0].split(',')
 432         file_attributes = ddf_record[1:]
 433
 434         for f in files:
 435             fileNode = RDF.Node()
 436             add_stmt(model,
 437                      statusNode,
 438                      submissionOntology['has_file'],
 439                      fileNode)
 440             add_stmt(model, fileNode, rdfNS['type'], DCC_NS['file'])
 441             add_stmt(model, fileNode, DCC_NS['filename'], f)
 442
 443             for predicate, object in zip(attributes[1:], file_attributes):
 444                 add_stmt(model, fileNode, predicate, object)
 445
 446
 447 def load_encode_assigned_libraries(model, htswapi):
 448     """Get libraries associated with encode.
 449     """
 450     encodeFilters = ["/library/?affiliations__id__exact=44",
 451                      "/library/?affiliations__id__exact=80",
 452                     ]
 453
 454     encodeUrls = [os.path.join(htswapi.root_url + u) for u in encodeFilters]
 455     rdfaParser = RDF.Parser(name='rdfa')
 456     for encodeUrl in encodeUrls:
 457         LOGGER.info("Scanning library url {0}".format(encodeUrl))
 458         rdfaParser.parse_into_model(model, encodeUrl)
 459         query = RDF.Statement(None, libraryOntology['library_id'], None)
 460         libraries = model.find_statements(query)
 461         for statement in libraries:
 462             libraryUrn = statement.subject
 463             load_library_detail(model, libraryUrn)
 464
 465
 466 def load_unassigned_submitted_libraries(model):
 467     unassigned = find_unscanned_submitted_libraries(model)
 468     for query_record in unassigned:
 469         library_urn = query_record['library_urn']
 470         LOGGER.warn("Unassigned, submitted library: {0}".format(library_urn))
 471         load_library_detail(model, library_urn)
 472
 473 def reload_libraries(model, library_list):
 474     if len(library_list) == 0:
 475         # reload everything.
 476         queryset = find_all_libraries(model)
 477         libraries = ( str(s['library_urn']) for s in queryset )
 478     else:
 479         libraries = ( user_library_id_to_library_urn(l) for l in library_list )
 480
 481     for library_urn in libraries:
 482         delete_library(model, library_urn)
 483         load_library_detail(model, library_urn)
 484
 485 def user_library_id_to_library_urn(library_id):
 486     split_url = urlparse.urlsplit(library_id)
 487     if len(split_url.scheme) == 0:
 488         return LIBRARY_NS[library_id]
 489     else:
 490         return library_id
 491
 492 def delete_library(model, library_urn):
 493     if not isinstance(library_urn, RDF.Node):
 494         raise ValueError("library urn must be a RDF.Node")
 495
 496     LOGGER.info("Deleting {0}".format(str(library_urn.uri)))
 497     lane_query = RDF.Statement(library_urn, libraryOntology['has_lane'],None)
 498     for lane in model.find_statements(lane_query):
 499         delete_lane(model, lane.object)
 500     library_attrib_query = RDF.Statement(library_urn, None, None)
 501     for library_attrib in model.find_statements(library_attrib_query):
 502         LOGGER.debug("Deleting {0}".format(str(library_attrib)))
 503         del model[library_attrib]
 504
 505
 506 def delete_lane(model, lane_urn):
 507     if not isinstance(lane_urn, RDF.Node):
 508         raise ValueError("lane urn must be a RDF.Node")
 509
 510     delete_lane_mapping(model, lane_urn)
 511     lane_attrib_query = RDF.Statement(lane_urn,None,None)
 512     for lane_attrib in model.find_statements(lane_attrib_query):
 513         LOGGER.debug("Deleting {0}".format(str(lane_attrib)))
 514         del model[lane_attrib]
 515
 516
 517 def delete_lane_mapping(model, lane_urn):
 518     if not isinstance(lane_urn, RDF.Node):
 519         raise ValueError("lane urn must be a RDF.Node")
 520
 521     lane_mapping_query = RDF.Statement(lane_urn,
 522                                        libraryOntology['has_mappings'],
 523                                        None)
 524     for lane_mapping in model.find_statements(lane_mapping_query):
 525         mapping_attrib_query = RDF.Statement(lane_mapping.object,
 526                                              None,
 527                                              None)
 528         for mapping_attrib in model.find_statements(mapping_attrib_query):
 529             LOGGER.debug("Deleting {0}".format(str(mapping_attrib)))
 530             del model[mapping_attrib]
 531
 532
 533 def load_encodedcc_files(model, genome, composite):
 534     file_index = ucsc.get_encodedcc_file_index(genome, composite)
 535     if file_index is None:
 536         return
 537
 538     for filename, attributes in file_index.items():
 539         s = RDF.Node(RDF.Uri(filename))
 540         model.add_statement(
 541             RDF.Statement(s, TYPE_N, submissionOntology['ucsc_track']))
 542         for name, value in attributes.items():
 543             p = RDF.Node(DCC_NS[name])
 544             o = RDF.Node(value)
 545             model.add_statement(RDF.Statement(s,p,o))
 546
 547
 548 def load_library_detail(model, libraryUrn):
 549     """Grab detail information from library page
 550     """
 551     rdfaParser = RDF.Parser(name='rdfa')
 552     query = RDF.Statement(libraryUrn, libraryOntology['date'], None)
 553     results = list(model.find_statements(query))
 554     log_message = "Found {0} statements for {1}"
 555     LOGGER.debug(log_message.format(len(results), libraryUrn))
 556     if len(results) == 0:
 557         LOGGER.info("Loading {0}".format(str(libraryUrn)))
 558         try:
 559             body = get_url_as_text(str(libraryUrn.uri), 'GET')
 560             rdfaParser.parse_string_into_model(model, body, libraryUrn.uri)
 561         except httplib2.HttpLib2ErrorWithResponse, e:
 562             LOGGER.error(str(e))
 563     elif len(results) == 1:
 564         pass  # Assuming that a loaded dataset has one record
 565     else:
 566         LOGGER.warning("Many dates for {0}".format(libraryUrn))
 567
 568
 569 def get_library_id(name):
 570     """Guess library ID from library name
 571
 572     >>> get_library_id('2x75-GM12892-rep1-11039 20110217 elements')
 573     '11039'
 574     >>> get_library_id('10150 C2C12-24h-myogenin-2PCR-Rep1.32mers')
 575     '10150'
 576     >>> get_library_id('2x75-GM12892-rep2-SL2970')
 577     '02970'
 578     """
 579     match = re.search(r"([ -]|^)(?P<id>([\d]{5})|(SL[\d]{4}))", name)
 580     library_id = None
 581     if match is not None:
 582         library_id = match.group('id')
 583     if library_id in SL_MAP:
 584         library_id = SL_MAP[library_id]
 585     return library_id
 586
 587
 588 def get_contents(element):
 589     """Return contents or none.
 590     """
 591     if len(element.contents) == 0:
 592         return None
 593
 594     a = element.find('a')
 595     if a is not None:
 596         return a.contents[0].encode(CHARSET)
 597
 598     return element.contents[0].encode(CHARSET)
 599
 600
 601 def create_status_node(submission_uri, timestamp):
 602     submission_uri = daf.submission_uri_to_string(submission_uri)
 603     if submission_uri[-1] != '/':
 604         sumbission_uri += '/'
 605     status_uri = submission_uri + timestamp
 606     return RDF.Node(RDF.Uri(status_uri))
 607
 608
 609 def get_date_contents(element):
 610     data = element.text_content()
 611     if data:
 612         return datetime.strptime(data, "%Y-%m-%d %H:%M")
 613     else:
 614         return None
 615
 616
 617 def add_stmt(model, subject, predicate, rdf_object):
 618     """Convienence create RDF Statement and add to a model
 619     """
 620     return model.add_statement(
 621         RDF.Statement(subject, predicate, rdf_object))
 622
 623
 624 def login(cookie=None):
 625     """Login if we don't have a cookie
 626     """
 627     if cookie is not None:
 628         return cookie
 629
 630     keys = keyring.get_keyring()
 631     password = keys.get_password(LOGIN_URL, USERNAME)
 632     credentials = {'login': USERNAME,
 633                    'password': password}
 634     headers = {'Content-type': 'application/x-www-form-urlencoded'}
 635     http = httplib2.Http()
 636     response, content = http.request(LOGIN_URL,
 637                                      'POST',
 638                                      headers=headers,
 639                                      body=urllib.urlencode(credentials))
 640     LOGGER.debug("Login to {0}, status {1}".format(LOGIN_URL,
 641                                                     response['status']))
 642
 643     cookie = response.get('set-cookie', None)
 644     if cookie is None:
 645         raise RuntimeError("Wasn't able to log into: {0}".format(LOGIN_URL))
 646     return cookie
 647
 648
 649 def get_url_as_tree(url, method, cookie=None):
 650     http = httplib2.Http()
 651     headers = {}
 652     if cookie is not None:
 653         headers['Cookie'] = cookie
 654     response, content = http.request(url, method, headers=headers)
 655     if response['status'] == '200':
 656         tree = fromstring(content, base_url=url)
 657         return tree
 658     else:
 659         msg = "error accessing {0}, status {1}"
 660         msg = msg.format(url, response['status'])
 661         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
 662         raise e
 663
 664
 665 def get_url_as_text(url, method, cookie=None):
 666     http = httplib2.Http()
 667     headers = {}
 668     if cookie is not None:
 669         headers['Cookie'] = cookie
 670     response, content = http.request(url, method, headers=headers)
 671     if response['status'] == '200':
 672         return content
 673     else:
 674         msg = "error accessing {0}, status {1}"
 675         msg = msg.format(url, response['status'])
 676         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
 677         raise e
 678
 679 ################
 680 #  old stuff
 681 SUBMISSIONS_LACKING_LIBID = [
 682     ('1x75-Directional-HeLa-Rep1',    '11208'),
 683     ('1x75-Directional-HeLa-Rep2',    '11207'),
 684     ('1x75-Directional-HepG2-Rep1',   '11210'),
 685     ('1x75-Directional-HepG2-Rep2',   '11209'),
 686     ('1x75-Directional-H1-hESC-Rep1', '10947'),
 687     ('1x75-Directional-H1-hESC-Rep2', '11009'),
 688     ('1x75-Directional-HUVEC-Rep1',   '11206'),
 689     ('1x75-Directional-HUVEC-Rep2',   '11205'),
 690     ('1x75-Directional-K562-Rep1',    '11008'),
 691     ('1x75-Directional-K562-Rep2',    '11007'),
 692     ('1x75-Directional-NHEK-Rep1',    '11204'),
 693     ('1x75-Directional-GM12878-Rep1', '11011'),
 694     ('1x75-Directional-GM12878-Rep2', '11010'),
 695     ]
 696
 697
 698 def select_by_library_id(submission_list):
 699     subl = [(x.library_id, x) for x in submission_list if x.library_id]
 700     libraries = {}
 701     for lib_id, subobj in subl:
 702         libraries.setdefault(lib_id, []).append(subobj)
 703
 704     for submission in libraries.values():
 705         submission.sort(key=attrgetter('date'), reverse=True)
 706
 707     return libraries
 708
 709
 710 def library_to_freeze(selected_libraries):
 711     freezes = ['2010-Jan', '2010-Jul', '2011-Jan']
 712     lib_ids = sorted(selected_libraries.keys())
 713     report = ['<html><table border="1">']
 714     report = ["""<html>
 715 <head>
 716 <style type="text/css">
 717  td {border-width:0 0 1px 1px; border-style:solid;}
 718 </style>
 719 </head>
 720 <body>
 721 <table>
 722 """]
 723     report.append('<thead>')
 724     report.append('<tr><td>Library ID</td><td>Name</td>')
 725     for f in freezes:
 726         report.append('<td>{0}</td>'.format(f))
 727     report.append('</tr>')
 728     report.append('</thead>')
 729     report.append('<tbody>')
 730     for lib_id in lib_ids:
 731         report.append('<tr>')
 732         lib_url = LIBRARY_NS[lib_id].uri
 733         report.append('<td><a href="{0}">{1}</a></td>'.format(lib_url, lib_id))
 734         submissions = selected_libraries[lib_id]
 735         report.append('<td>{0}</td>'.format(submissions[0].name))
 736         batched = {}
 737         for sub in submissions:
 738             date = date_to_freeze(sub.date)
 739             batched.setdefault(date, []).append(sub)
 740         for d in freezes:
 741             report.append('<td>')
 742             for s in batched.get(d, []):
 743                 show_url = submission_view_url(s.subid)
 744                 subid = '<a href="{0}">{1}</a>'.format(show_url, s.subid)
 745                 report.append("{0}:{1}".format(subid, s.status))
 746             report.append('</td>')
 747         else:
 748             report.append('<td></td>')
 749         report.append("</tr>")
 750     report.append('</tbody>')
 751     report.append("</table></html>")
 752     return "\n".join(report)
 753
 754
 755 def date_to_freeze(d):
 756     freezes = [(datetime(2010, 1, 30), '2010-Jan'),
 757                (datetime(2010, 7, 30), '2010-Jul'),
 758                (datetime(2011, 1, 30), '2011-Jan'),
 759                ]
 760     for end, name in freezes:
 761         if d < end:
 762             return name
 763     else:
 764         return None
 765
 766 if __name__ == "__main__":
 767     main()