Test htsworkflow under several different django & python versions
[htsworkflow.git] / encode_submission / encode_find.py
1 #!/usr/bin/env python
2 """
3 Gather information about our submissions into a single RDF store
4 """
5 from __future__ import print_function
6
7 from datetime import datetime
8 import hashlib
9 import httplib2
10 import keyring
11 import logging
12 from lxml.html import fromstring
13 from operator import attrgetter
14 from optparse import OptionParser, OptionGroup
15 # python keyring
16 import os
17 import re
18 # redland rdf lib
19
20 import sys
21 from six.moves import urllib
22
23 from rdflib import BNode, Graph, Literal, Namespace, URIRef
24 from rdflib.namespace import RDF, RDFS, XSD
25 if not 'DJANGO_SETTINGS_MODULE' in os.environ:
26     os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.settings'
27
28 from htsworkflow.submission import daf, ucsc
29
30 from htsworkflow.util import api
31 from htsworkflow.util.rdfns import (
32      submissionOntology,
33      libraryOntology,
34 )
35 TYPE_N = RDF['type']
36 CREATION_DATE = libraryOntology['date']
37
38 # URL mappings
39 LIBRARY_NS = Namespace("http://jumpgate.caltech.edu/library/")
40
41 from htsworkflow.submission.ucsc import \
42      daf_download_url, \
43      ddf_download_url, \
44      get_encodedcc_file_index, \
45      submission_view_url, \
46      UCSCEncodePipeline
47
48 DCC_NS = Namespace(UCSCEncodePipeline + 'download_ddf#')
49
50 DBDIR = os.path.expanduser("~diane/proj/submission")
51
52 LOGGER = logging.getLogger("encode_find")
53
54 LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
55 USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
56
57 USERNAME = 'detrout'
58 CHARSET = 'utf-8'
59
60 SL_MAP = {'SL2970': '02970',
61           'SL2971': '02971',
62           'SL2973': '02973',}
63
64 def main(cmdline=None):
65     """
66     Parse command line arguments
67
68     Takes a list of arguments (assuming arg[0] is the program name) or None
69     If None, it looks at sys.argv
70     """
71     parser = make_parser()
72     opts, args = parser.parse_args(cmdline)
73
74     if opts.debug:
75         logging.basicConfig(level=logging.DEBUG)
76     elif opts.verbose:
77         logging.basicConfig(level=logging.INFO)
78     else:
79         logging.basicConfig(level=logging.ERROR)
80
81     htsw_authdata = api.make_auth_from_opts(opts, parser)
82     htswapi = api.HtswApi(opts.host, htsw_authdata)
83
84     cookie = None
85     model = get_model(opts.model, DBDIR)
86
87     if opts.load_rdf is not None:
88         ns_uri = submissionOntology[''].uri
89         load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
90
91     if len(args) == 0:
92         limit = None
93     else:
94         limit = args
95
96     if opts.reload_libraries:
97         reload_libraries(model, args)
98         return
99
100     if opts.update:
101         opts.update_submission = True
102         opts.update_libraries = True
103         opts.update_ucsc_downloads = True
104
105     if opts.update_submission:
106         cookie = login(cookie=cookie)
107         load_my_submissions(model, limit=limit, cookie=cookie)
108
109     if opts.update_libraries:
110         load_encode_assigned_libraries(model, htswapi)
111         load_unassigned_submitted_libraries(model)
112
113     if opts.update_ucsc_downloads:
114         our_tracks = [
115             {'genome':'hg19', 'composite':'wgEncodeCaltechRnaSeq'},
116             {'genome':'mm9',  'composite':'wgEncodeCaltechHist'},
117             #{'genome':'mm9',  'composite':'wgEncodeCaltechHistone'},
118             {'genome':'mm9',  'composite':'wgEncodeCaltechTfbs'}
119         ]
120         for track_info in our_tracks:
121             load_encodedcc_files(model, **track_info )
122
123     if opts.sparql is not None:
124         sparql_query(model, opts.sparql, 'html')
125
126     if opts.find_submission_with_no_library:
127         report_submissions_with_no_library(model)
128
129     if opts.print_rdf:
130         serializer = get_serializer(name=opts.rdf_parser_name)
131         print(serializer.serialize_model_to_string(model))
132
133
134 def make_parser():
135     """Construct option parser
136     """
137     parser = OptionParser()
138     commands = OptionGroup(parser, "Commands")
139     commands.add_option('--model', default=None,
140       help="Load model database")
141     commands.add_option('--load-rdf', default=None,
142       help="load rdf statements into model")
143     commands.add_option('--print-rdf', action="store_true", default=False,
144       help="print ending model state")
145     commands.add_option('--update', action="store_true", default=False,
146       help="Do all updates")
147     commands.add_option('--update-submission', action="store_true",
148                         default=False,
149       help="download status from ucsc")
150     commands.add_option('--update-ucsc-downloads', action="store_true",
151                         default=False,
152       help="Update download locations from UCSC")
153     commands.add_option('--update-libraries', action="store_true",
154                         default=False,
155       help="download library info from htsw")
156     commands.add_option('--reload-libraries', action="store_true",
157                         default=False,
158                         help="Delete and redownload library information. "\
159                              "Optionally list specific library IDs.")
160     parser.add_option_group(commands)
161
162     queries = OptionGroup(parser, "Queries")
163     queries.add_option('--sparql', default=None,
164       help="execute arbitrary sparql query")
165     queries.add_option('--find-submission-with-no-library', default=False,
166       action="store_true",
167       help="find submissions with no library ID")
168     parser.add_option_group(queries)
169
170     options = OptionGroup(parser, "Options")
171     options.add_option("--rdf-parser-name", default="turtle",
172       help="set rdf file parser type")
173     options.add_option("-v", "--verbose", action="store_true", default=False)
174     options.add_option("--debug", action="store_true", default=False)
175     parser.add_option_group(options)
176
177     api.add_auth_options(parser)
178
179     return parser
180
181
182 def load_my_submissions(model, limit=None, cookie=None):
183     """Parse all of my submissions from encodesubmit into model
184     It will look at the global USER_URL to figure out who to scrape
185     cookie contains the session cookie, if none, will attempt to login
186     """
187     if cookie is None:
188         cookie = login()
189
190     tree = get_url_as_tree(USER_URL, 'GET', cookie)
191     table_rows = tree.xpath('//table[@id="projects"]/tr')
192     # first record is header
193     name_n = submissionOntology['name']
194     species_n = submissionOntology['species']
195     library_urn = submissionOntology['library_urn']
196
197     # skip header
198     for row in table_rows[1:]:
199         cell = row.xpath('td')
200         if cell is not None and len(cell) > 1:
201             submission_id = str(cell[0].text_content())
202             if limit is None or submission_id in limit:
203                 subUrn = URIRef(submission_view_url(submission_id))
204
205                 add_stmt(model,
206                          subUrn,
207                          TYPE_N,
208                          submissionOntology['Submission'])
209                 add_stmt(model,
210                          subUrn,
211                          DCC_NS['subId'],
212                          Literal(submission_id))
213
214                 name = str(cell[4].text_content())
215                 add_stmt(model, subUrn, name_n, name)
216
217                 species = str(cell[2].text_content())
218                 if species is not None:
219                     add_stmt(model, subUrn, species_n, species)
220
221                 library_id = get_library_id(name)
222                 if library_id is not None:
223                     add_submission_to_library_urn(model,
224                                                   subUrn,
225                                                   library_urn,
226                                                   library_id)
227                 else:
228                     errmsg = 'Unable to find library id in {0} for {1}'
229                     LOGGER.warn(errmsg.format(name, str(subUrn)))
230
231                 add_submission_creation_date(model, subUrn, cookie)
232
233                 # grab changing atttributes
234                 status = str(cell[6].text_content()).strip()
235                 last_mod_datetime = get_date_contents(cell[8])
236                 last_mod = last_mod_datetime.isoformat()
237
238                 update_submission_detail(model, subUrn, status, last_mod,
239                                          cookie=cookie)
240
241                 LOGGER.info("Processed {0}".format(subUrn))
242
243
244 def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
245     """Add a link from a UCSC submission to woldlab library if needed
246     """
247     libraryUrn = LIBRARY_NS[library_id + '/']
248     query = (submissionUrn, predicate, libraryUrn)
249     if not query in model:
250         link = (submissionUrn, predicate, libraryUrn)
251         LOGGER.info("Adding Sub -> Lib link: {0}".format(link))
252         model.add(link)
253     else:
254         LOGGER.debug("Found: {0}".format(str(query)))
255
256
257 def report_submissions_with_no_library(model):
258     missing = find_submissions_with_no_library(model)
259     for row in results:
260         subid = row['subid']
261         name = row['name']
262         print("# {0}".format(name))
263         print("<{0}>".format(subid.uri))
264         print("  encodeSubmit:library_urn "\
265               "<http://jumpgate.caltech.edu/library/> .")
266         print("")
267
268 def find_submissions_with_no_library(model):
269     missing_lib_query_text = """
270 PREFIX submissionOntology:<{submissionOntology}>
271
272 SELECT
273  ?subid ?name
274 WHERE {{
275   ?subid submissionOntology:name ?name
276   OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
277   FILTER  (!bound(?libid))
278 }}""".format(submissionOntology=submissionOntology[''].uri)
279
280     return model.query(missing_lib_query_text)
281
282
283 def find_unscanned_submitted_libraries(model):
284     """Scan model for libraries that don't have library details loaded
285     """
286     unscanned_libraries = """
287 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
288 PREFIX submissionOntology:<{submissionOntology}>
289
290 SELECT distinct ?submission ?library_urn
291 WHERE {{
292   ?submission submissionOntology:library_urn ?library_urn .
293   OPTIONAL {{ ?library_urn rdf:type ?library_type  }}
294   FILTER(!BOUND(?library_type))
295 }}""".format(submissionOntology=submissionOntology[''].uri)
296     return model.query(unscanned_libraries)
297
298 def find_all_libraries(model):
299     """Scan model for every library marked as
300     """
301     libraries = """
302 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
303 PREFIX libraryOntology:<{libraryOntology}>
304
305 SELECT distinct ?library_urn
306 WHERE {{
307   ?library_urn rdf:type ?library_type .
308   FILTER(regex(?libray
309 }}""".format(libraryOntology=libraryOntology[''].uri)
310     return model.query(query)
311
312
313 def add_submission_creation_date(model, subUrn, cookie):
314     # in theory the submission page might have more information on it.
315     creation_dates = get_creation_dates(model, subUrn)
316     if len(creation_dates) == 0:
317         LOGGER.info("Getting creation date for: {0}".format(str(subUrn)))
318         submissionTree = get_url_as_tree(str(subUrn), 'GET', cookie)
319         parse_submission_page(model, submissionTree, subUrn)
320     else:
321         LOGGER.debug("Found creation date for: {0}".format(str(subUrn)))
322
323
324 def get_creation_dates(model, subUrn):
325     query = (subUrn, CREATION_DATE, None)
326     creation_dates = list(model.triples(query))
327     return creation_dates
328
329
330 def parse_submission_page(model, submissionTree, subUrn):
331     cells = submissionTree.findall('.//td')
332     created_label = [x for x in cells
333                      if x.text_content().startswith('Created')]
334     if len(created_label) == 1:
335         created_date = get_date_contents(created_label[0].getnext())
336         created_date_node = Literal(created_date.isoformat(),
337                                     datatype=XSD.dateTime)
338         add_stmt(model, subUrn, CREATION_DATE, created_date_node)
339     else:
340         msg = 'Unable to find creation date for {0}'.format(str(subUrn))
341         LOGGER.warn(msg)
342         raise Warning(msg)
343
344
345 def update_submission_detail(model, subUrn, status, recent_update, cookie):
346     HasStatusN = submissionOntology['has_status']
347     StatusN = submissionOntology['status']
348     LastModifyN = submissionOntology['last_modify_date']
349
350     status_nodes_query = (subUrn, HasStatusN, None)
351     status_nodes = list(model.triples(status_nodes_query))
352
353     if len(status_nodes) == 0:
354         # has no status node, add one
355         LOGGER.info("Adding status node to {0}".format(subUrn))
356         status_node = create_status_node(subUrn, recent_update)
357         add_stmt(model, subUrn, HasStatusN, status_node)
358         add_stmt(model, status_node, RDF['type'], StatusN)
359         add_stmt(model, status_node, StatusN, status)
360         add_stmt(model, status_node, LastModifyN, recent_update)
361         update_ddf(model, subUrn, status_node, cookie=cookie)
362         update_daf(model, subUrn, status_node, cookie=cookie)
363     else:
364         LOGGER.info("Found {0} status blanks".format(len(status_nodes)))
365         for status_statement in status_nodes:
366             status_node = status_statement[2]
367             last_modified_query = (status_node, LastModifyN, None)
368             last_mod_nodes = model.find_statements(last_modified_query)
369             for last_mod_statement in last_mod_nodes:
370                 last_mod_date = str(last_mod_statement[2])
371                 if recent_update == str(last_mod_date):
372                     update_ddf(model, subUrn, status_node, cookie=cookie)
373                     update_daf(model, subUrn, status_node, cookie=cookie)
374                     break
375
376
377 def update_daf(model, submission_url, status_node, cookie):
378     download_daf_uri = str(submission_url).replace('show', 'download_daf')
379     daf_uri = URIRef(download_daf_uri)
380
381     status_is_daf = (status_node, TYPE_N, dafTermOntology[''])
382     if status_is_daf not in model:
383         LOGGER.info('Adding daf to {0}, {1}'.format(submission_url,
384                                                      status_node))
385         daf_text = get_url_as_text(download_daf_uri, 'GET', cookie)
386         daf_hash = hashlib.md5(daf_text).hexdigest()
387         daf_hash_stmt = (status_node, dafTermOntology['md5sum'], daf_hash)
388         model.add(daf_hash_stmt)
389         daf.fromstring_into_model(model, status_node, daf_text)
390
391
392 def update_ddf(model, subUrn, statusNode, cookie):
393     download_ddf_url = str(subUrn).replace('show', 'download_ddf')
394     ddfUrn = URIRef(download_ddf_url)
395
396     status_is_ddf = (statusNode, TYPE_N, DCC_NS[''])
397     if status_is_ddf in model:
398         LOGGER.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
399         ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
400         add_ddf_statements(model, statusNode, ddf_text)
401         model.add_statement(status_is_ddf)
402
403
404 def add_ddf_statements(model, statusNode, ddf_string):
405     """Convert a ddf text file into RDF Statements
406     """
407     ddf_lines = ddf_string.split('\n')
408     # first line is header
409     header = ddf_lines[0].split()
410     attributes = [DCC_NS[x] for x in header]
411
412     for ddf_line in ddf_lines[1:]:
413         ddf_line = ddf_line.strip()
414         if len(ddf_line) == 0:
415             continue
416         if ddf_line.startswith("#"):
417             continue
418
419         ddf_record = ddf_line.split('\t')
420         files = ddf_record[0].split(',')
421         file_attributes = ddf_record[1:]
422
423         for f in files:
424             fileNode = BNode()
425             add_stmt(model,
426                      statusNode,
427                      submissionOntology['has_file'],
428                      fileNode)
429             add_stmt(model, fileNode, RDF['type'], DCC_NS['file'])
430             add_stmt(model, fileNode, DCC_NS['filename'], f)
431
432             for predicate, object in zip(attributes[1:], file_attributes):
433                 add_stmt(model, fileNode, predicate, object)
434
435
436 def load_encode_assigned_libraries(model, htswapi):
437     """Get libraries associated with encode.
438     """
439     encodeFilters = ["/library/?affiliations__id__exact=44",
440                      "/library/?affiliations__id__exact=80",
441                     ]
442
443     encodeUrls = [os.path.join(htswapi.root_url + u) for u in encodeFilters]
444     for encodeUrl in encodeUrls:
445         LOGGER.info("Scanning library url {0}".format(encodeUrl))
446         model.parse(source=encodeUrl, format='rdfa')
447         query = (None, libraryOntology['library_id'], None)
448         libraries = model.triples(query)
449         for statement in libraries:
450             libraryUrn = statement[0]
451             load_library_detail(model, libraryUrn)
452
453
454 def load_unassigned_submitted_libraries(model):
455     unassigned = find_unscanned_submitted_libraries(model)
456     for query_record in unassigned:
457         library_urn = query_record['library_urn']
458         LOGGER.warn("Unassigned, submitted library: {0}".format(library_urn))
459         load_library_detail(model, library_urn)
460
461 def reload_libraries(model, library_list):
462     if len(library_list) == 0:
463         # reload everything.
464         queryset = find_all_libraries(model)
465         libraries = ( str(s['library_urn']) for s in queryset )
466     else:
467         libraries = ( user_library_id_to_library_urn(l) for l in library_list )
468
469     for library_urn in libraries:
470         delete_library(model, library_urn)
471         load_library_detail(model, library_urn)
472
473 def user_library_id_to_library_urn(library_id):
474     split_url = urllib.parse.urlsplit(library_id)
475     if len(split_url.scheme) == 0:
476         return LIBRARY_NS[library_id]
477     else:
478         return library_id
479
480 def delete_library(model, library_urn):
481     if not isinstance(library_urn, (Literal, URIRef)):
482         raise ValueError("library urn must be a Literal")
483
484     LOGGER.info("Deleting {0}".format(str(library_urn)))
485     lane_query = (library_urn, libraryOntology['has_lane'],None)
486     for lane in model.triples(lane_query):
487         delete_lane(model, lane[2])
488     library_attrib_query = (library_urn, None, None)
489     for library_attrib in model.triples(library_attrib_query):
490         LOGGER.debug("Deleting {0}".format(str(library_attrib)))
491         model.remove(library_attrib)
492
493
494 def delete_lane(model, lane_urn):
495     if not isinstance(lane_urn, (Literal, URIRef)):
496         raise ValueError("lane urn must be a Literal or URIRef")
497
498     delete_lane_mapping(model, lane_urn)
499     lane_attrib_query = (lane_urn,None,None)
500     for lane_attrib in model.triples(lane_attrib_query):
501         LOGGER.debug("Deleting {0}".format(str(lane_attrib)))
502         model.remove(lane_attrib)
503
504
505 def delete_lane_mapping(model, lane_urn):
506     if not isinstance(lane_urn, (Literal, URIRef)):
507         raise ValueError("lane urn must be a Literal or URIRef")
508
509     lane_mapping_query = (lane_urn,
510                           libraryOntology['has_mappings'],
511                           None)
512     for lane_mapping in model.triples(lane_mapping_query):
513         mapping_attrib_query = (lane_mapping[2], None, None)
514         for mapping_attrib in model.triples(mapping_attrib_query):
515             LOGGER.debug("Deleting {0}".format(str(mapping_attrib)))
516             model.remove(mapping_attrib)
517
518
519 def load_encodedcc_files(model, genome, composite):
520     file_index = ucsc.get_encodedcc_file_index(genome, composite)
521     if file_index is None:
522         return
523
524     lib_term = submissionOntology['library_urn']
525     sub_term = submissionOntology['submission_urn']
526     for filename, attributes in file_index.items():
527         s = URIRef(filename)
528         model.add((s, TYPE_N, submissionOntology['ucsc_track']))
529         for name, value in attributes.items():
530             p = DCC_NS[name]
531             o = Literal(value)
532             model.add((s,p,o))
533             if name.lower() == 'labexpid':
534                 model.add((s, lib_term, LIBRARY_NS[value+'/']))
535             elif name.lower() == 'subid':
536                 sub_url = URIRef(submission_view_url(value))
537                 model.add((s, sub_term, sub_url))
538
539
540 def load_library_detail(model, libraryUrn):
541     """Grab detail information from library page
542     """
543     rdfaParser = RDF.Parser(name='rdfa')
544     query = (libraryUrn, libraryOntology['date'], None)
545     results = list(model.find_statements(query))
546     log_message = "Found {0} statements for {1}"
547     LOGGER.debug(log_message.format(len(results), libraryUrn))
548     if len(results) == 0:
549         LOGGER.info("Loading {0}".format(str(libraryUrn)))
550         try:
551             body = get_url_as_text(str(libraryUrn), 'GET')
552             rdfaParser.parse_string_into_model(model, body, libraryUrn)
553         except httplib2.HttpLib2ErrorWithResponse as e:
554             LOGGER.error(str(e))
555     elif len(results) == 1:
556         pass  # Assuming that a loaded dataset has one record
557     else:
558         LOGGER.warning("Many dates for {0}".format(libraryUrn))
559
560
561 def get_library_id(name):
562     """Guess library ID from library name
563
564     >>> get_library_id('2x75-GM12892-rep1-11039 20110217 elements')
565     '11039'
566     >>> get_library_id('10150 C2C12-24h-myogenin-2PCR-Rep1.32mers')
567     '10150'
568     >>> get_library_id('2x75-GM12892-rep2-SL2970')
569     '02970'
570     """
571     match = re.search(r"([ -]|^)(?P<id>([\d]{5})|(SL[\d]{4}))", name)
572     library_id = None
573     if match is not None:
574         library_id = match.group('id')
575     if library_id in SL_MAP:
576         library_id = SL_MAP[library_id]
577     return library_id
578
579
580 def get_contents(element):
581     """Return contents or none.
582     """
583     if len(element.contents) == 0:
584         return None
585
586     a = element.find('a')
587     if a is not None:
588         return a.contents[0].encode(CHARSET)
589
590     return element.contents[0].encode(CHARSET)
591
592
593 def create_status_node(submission_uri, timestamp):
594     submission_uri = daf.submission_uri_to_string(submission_uri)
595     if submission_uri[-1] != '/':
596         sumbission_uri += '/'
597     status_uri = submission_uri + timestamp
598     return URIRef(status_uri)
599
600
601 def get_date_contents(element):
602     data = element.text_content()
603     if data:
604         return datetime.strptime(data, "%Y-%m-%d %H:%M")
605     else:
606         return None
607
608
609 def add_stmt(model, subject, predicate, rdf_object):
610     """Convienence create RDF Statement and add to a model
611     """
612     return model.add((subject, predicate, rdf_object))
613
614
615 def login(cookie=None):
616     """Login if we don't have a cookie
617     """
618     if cookie is not None:
619         return cookie
620
621     keys = keyring.get_keyring()
622     password = keys.get_password(LOGIN_URL, USERNAME)
623     credentials = {'login': USERNAME,
624                    'password': password}
625     headers = {'Content-type': 'application/x-www-form-urlencoded'}
626     http = httplib2.Http()
627     response, content = http.request(LOGIN_URL,
628                                      'POST',
629                                      headers=headers,
630                                      body=urllib.parse.urlencode(credentials))
631     LOGGER.debug("Login to {0}, status {1}".format(LOGIN_URL,
632                                                     response['status']))
633
634     cookie = response.get('set-cookie', None)
635     if cookie is None:
636         raise RuntimeError("Wasn't able to log into: {0}".format(LOGIN_URL))
637     return cookie
638
639
640 def get_url_as_tree(url, method, cookie=None):
641     http = httplib2.Http()
642     headers = {}
643     if cookie is not None:
644         headers['Cookie'] = cookie
645     response, content = http.request(url, method, headers=headers)
646     if response['status'] == '200':
647         tree = fromstring(content, base_url=url)
648         return tree
649     else:
650         msg = "error accessing {0}, status {1}"
651         msg = msg.format(url, response['status'])
652         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
653         raise e
654
655
656 def get_url_as_text(url, method, cookie=None):
657     http = httplib2.Http()
658     headers = {}
659     if cookie is not None:
660         headers['Cookie'] = cookie
661     response, content = http.request(url, method, headers=headers)
662     if response['status'] == '200':
663         return content
664     else:
665         msg = "error accessing {0}, status {1}"
666         msg = msg.format(url, response['status'])
667         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
668         raise e
669
670 ################
671 #  old stuff
672 SUBMISSIONS_LACKING_LIBID = [
673     ('1x75-Directional-HeLa-Rep1',    '11208'),
674     ('1x75-Directional-HeLa-Rep2',    '11207'),
675     ('1x75-Directional-HepG2-Rep1',   '11210'),
676     ('1x75-Directional-HepG2-Rep2',   '11209'),
677     ('1x75-Directional-H1-hESC-Rep1', '10947'),
678     ('1x75-Directional-H1-hESC-Rep2', '11009'),
679     ('1x75-Directional-HUVEC-Rep1',   '11206'),
680     ('1x75-Directional-HUVEC-Rep2',   '11205'),
681     ('1x75-Directional-K562-Rep1',    '11008'),
682     ('1x75-Directional-K562-Rep2',    '11007'),
683     ('1x75-Directional-NHEK-Rep1',    '11204'),
684     ('1x75-Directional-GM12878-Rep1', '11011'),
685     ('1x75-Directional-GM12878-Rep2', '11010'),
686     ]
687
688
689 def select_by_library_id(submission_list):
690     subl = [(x.library_id, x) for x in submission_list if x.library_id]
691     libraries = {}
692     for lib_id, subobj in subl:
693         libraries.setdefault(lib_id, []).append(subobj)
694
695     for submission in libraries.values():
696         submission.sort(key=attrgetter('date'), reverse=True)
697
698     return libraries
699
700
701 def library_to_freeze(selected_libraries):
702     freezes = ['2010-Jan', '2010-Jul', '2011-Jan']
703     lib_ids = sorted(selected_libraries.keys())
704     report = ['<html><table border="1">']
705     report = ["""<html>
706 <head>
707 <style type="text/css">
708  td {border-width:0 0 1px 1px; border-style:solid;}
709 </style>
710 </head>
711 <body>
712 <table>
713 """]
714     report.append('<thead>')
715     report.append('<tr><td>Library ID</td><td>Name</td>')
716     for f in freezes:
717         report.append('<td>{0}</td>'.format(f))
718     report.append('</tr>')
719     report.append('</thead>')
720     report.append('<tbody>')
721     for lib_id in lib_ids:
722         report.append('<tr>')
723         lib_url = LIBRARY_NS[lib_id]
724         report.append('<td><a href="{0}">{1}</a></td>'.format(lib_url, lib_id))
725         submissions = selected_libraries[lib_id]
726         report.append('<td>{0}</td>'.format(submissions[0].name))
727         batched = {}
728         for sub in submissions:
729             date = date_to_freeze(sub.date)
730             batched.setdefault(date, []).append(sub)
731         for d in freezes:
732             report.append('<td>')
733             for s in batched.get(d, []):
734                 show_url = submission_view_url(s.subid)
735                 subid = '<a href="{0}">{1}</a>'.format(show_url, s.subid)
736                 report.append("{0}:{1}".format(subid, s.status))
737             report.append('</td>')
738         else:
739             report.append('<td></td>')
740         report.append("</tr>")
741     report.append('</tbody>')
742     report.append("</table></html>")
743     return "\n".join(report)
744
745
746 def date_to_freeze(d):
747     freezes = [(datetime(2010, 1, 30), '2010-Jan'),
748                (datetime(2010, 7, 30), '2010-Jul'),
749                (datetime(2011, 1, 30), '2011-Jan'),
750                ]
751     for end, name in freezes:
752         if d < end:
753             return name
754     else:
755         return None
756
757 if __name__ == "__main__":
758     main()