14d5bdfe8e3b900ce81ab37418d9757f661cf98a
[htsworkflow.git] / encode_submission / encode_find.py
1 #!/usr/bin/env python
2 """
3 Gather information about our submissions into a single RDF store
4 """
5
6 from datetime import datetime
7 import hashlib
8 import httplib2
9 import keyring
10 import logging
11 from lxml.html import fromstring
12 from operator import attrgetter
13 from optparse import OptionParser, OptionGroup
14 # python keyring
15 import os
16 import re
17 # redland rdf lib
18 import RDF
19 import sys
20 import urllib
21 import urlparse
22
23 from htsworkflow.submission import daf, ucsc
24
25 from htsworkflow.util import api
26 from htsworkflow.util.rdfhelp import \
27      dafTermOntology, \
28      dublinCoreNS, \
29      get_model, \
30      get_serializer, \
31      sparql_query, \
32      submissionOntology, \
33      libraryOntology, \
34      load_into_model, \
35      rdfNS, \
36      rdfsNS, \
37      xsdNS
38 TYPE_N = rdfNS['type']
39 CREATION_DATE = libraryOntology['date']
40
41 # URL mappings
42 LIBRARY_NS = RDF.NS("http://jumpgate.caltech.edu/library/")
43
44 from htsworkflow.submission.ucsc import \
45      daf_download_url, \
46      ddf_download_url, \
47      get_encodedcc_file_index, \
48      submission_view_url, \
49      UCSCEncodePipeline
50
51 DCC_NS = RDF.NS(UCSCEncodePipeline + 'download_ddf#')
52
53 DBDIR = os.path.expanduser("~diane/proj/submission")
54
55 LOGGER = logging.getLogger("encode_find")
56
57 LOGIN_URL = 'http://encodesubmit.ucsc.edu/account/login'
58 USER_URL = 'http://encodesubmit.ucsc.edu/pipeline/show_user'
59
60 USERNAME = 'detrout'
61 CHARSET = 'utf-8'
62
63 SL_MAP = {'SL2970': '02970',
64           'SL2971': '02971',
65           'SL2973': '02973',}
66
67 def main(cmdline=None):
68     """
69     Parse command line arguments
70
71     Takes a list of arguments (assuming arg[0] is the program name) or None
72     If None, it looks at sys.argv
73     """
74     parser = make_parser()
75     opts, args = parser.parse_args(cmdline)
76
77     if opts.debug:
78         logging.basicConfig(level=logging.DEBUG)
79     elif opts.verbose:
80         logging.basicConfig(level=logging.INFO)
81     else:
82         logging.basicConfig(level=logging.ERROR)
83
84     htsw_authdata = api.make_auth_from_opts(opts, parser)
85     htswapi = api.HtswApi(opts.host, htsw_authdata)
86
87     cookie = None
88     model = get_model(opts.model, DBDIR)
89
90     if opts.load_rdf is not None:
91         ns_uri = submissionOntology[''].uri
92         load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
93
94     if len(args) == 0:
95         limit = None
96     else:
97         limit = args
98
99     if opts.update:
100         opts.update_submission = True
101         opts.update_libraries = True
102         opts.update_ucsc_downloads = True
103
104     if opts.update_submission:
105         cookie = login(cookie=cookie)
106         load_my_submissions(model, limit=limit, cookie=cookie)
107
108     if opts.update_libraries:
109         load_encode_assigned_libraries(model, htswapi)
110         load_unassigned_submitted_libraries(model)
111
112     if opts.update_ucsc_downloads:
113         our_tracks = [
114             {'genome':'hg19', 'composite':'wgEncodeCaltechRnaSeq'},
115             {'genome':'mm9',  'composite':'wgEncodeCaltechHist'},
116             #{'genome':'mm9',  'composite':'wgEncodeCaltechHistone'},
117             {'genome':'mm9',  'composite':'wgEncodeCaltechTfbs'}
118         ]
119         for track_info in our_tracks:
120             load_encodedcc_files(model, **track_info )
121
122     if opts.sparql is not None:
123         sparql_query(model, opts.sparql)
124
125     if opts.find_submission_with_no_library:
126         report_submissions_with_no_library(model)
127
128     if opts.print_rdf:
129         serializer = get_serializer(name=opts.rdf_parser_name)
130         print serializer.serialize_model_to_string(model)
131
132
133 def make_parser():
134     """Construct option parser
135     """
136     parser = OptionParser()
137     commands = OptionGroup(parser, "Commands")
138     commands.add_option('--model', default=None,
139       help="Load model database")
140     commands.add_option('--load-rdf', default=None,
141       help="load rdf statements into model")
142     commands.add_option('--print-rdf', action="store_true", default=False,
143       help="print ending model state")
144     commands.add_option('--update', action="store_true", default=False,
145       help="Do all updates")
146     commands.add_option('--update-submission', action="store_true",
147                         default=False,
148       help="download status from ucsc")
149     commands.add_option('--update-ucsc-downloads', action="store_true",
150                         default=False,
151       help="Update download locations from UCSC")
152     commands.add_option('--update-libraries', action="store_true",
153                         default=False,
154       help="download library info from htsw")
155     parser.add_option_group(commands)
156
157     queries = OptionGroup(parser, "Queries")
158     queries.add_option('--sparql', default=None,
159       help="execute arbitrary sparql query")
160     queries.add_option('--find-submission-with-no-library', default=False,
161       action="store_true",
162       help="find submissions with no library ID")
163     parser.add_option_group(queries)
164
165     options = OptionGroup(parser, "Options")
166     options.add_option("--rdf-parser-name", default="turtle",
167       help="set rdf file parser type")
168     options.add_option("-v", "--verbose", action="store_true", default=False)
169     options.add_option("--debug", action="store_true", default=False)
170     parser.add_option_group(options)
171
172     api.add_auth_options(parser)
173
174     return parser
175
176
177 def load_my_submissions(model, limit=None, cookie=None):
178     """Parse all the submissions from UCSC into model
179     It will look at the global USER_URL to figure out who to scrape
180     cookie contains the session cookie, if none, will attempt to login
181     """
182     if cookie is None:
183         cookie = login()
184
185     tree = get_url_as_tree(USER_URL, 'GET', cookie)
186     table_rows = tree.xpath('//table[@id="projects"]/tr')
187     # first record is header
188     name_n = submissionOntology['name']
189     species_n = submissionOntology['species']
190     library_urn = submissionOntology['library_urn']
191
192     # skip header
193     for row in table_rows[1:]:
194         cell = row.xpath('td')
195         if cell is not None and len(cell) > 1:
196             submission_id = str(cell[0].text_content())
197             if limit is None or submission_id in limit:
198                 subUrn = RDF.Uri(submission_view_url(submission_id))
199
200                 add_stmt(model,
201                          subUrn,
202                          TYPE_N,
203                          submissionOntology['Submission'])
204                 add_stmt(model,
205                          subUrn,
206                          DCC_NS['subId'],
207                          RDF.Node(submission_id))
208
209                 name = str(cell[4].text_content())
210                 add_stmt(model, subUrn, name_n, name)
211
212                 species = str(cell[2].text_content())
213                 if species is not None:
214                     add_stmt(model, subUrn, species_n, species)
215
216                 library_id = get_library_id(name)
217                 if library_id is not None:
218                     add_submission_to_library_urn(model,
219                                                   subUrn,
220                                                   library_urn,
221                                                   library_id)
222                 else:
223                     errmsg = 'Unable to find library id in {0} for {1}'
224                     LOGGER.warn(errmsg.format(name, str(subUrn)))
225
226                 add_submission_creation_date(model, subUrn, cookie)
227
228                 # grab changing atttributes
229                 status = str(cell[6].text_content()).strip()
230                 last_mod_datetime = get_date_contents(cell[8])
231                 last_mod = last_mod_datetime.isoformat()
232
233                 update_submission_detail(model, subUrn, status, last_mod,
234                                          cookie=cookie)
235
236                 LOGGER.info("Processed {0}".format(subUrn))
237
238
239 def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
240     """Add a link from a UCSC submission to woldlab library if needed
241     """
242     libraryUrn = LIBRARY_NS[library_id + '/']
243     query = RDF.Statement(submissionUrn, predicate, libraryUrn)
244     if not model.contains_statement(query):
245         link = RDF.Statement(submissionUrn, predicate, libraryUrn)
246         LOGGER.info("Adding Sub -> Lib link: {0}".format(link))
247         model.add_statement(link)
248     else:
249         LOGGER.debug("Found: {0}".format(str(query)))
250
251
252 def report_submissions_with_no_library(model):
253     missing = find_submissions_with_no_library(model)
254     for row in results:
255         subid = row['subid']
256         name = row['name']
257         print "# {0}".format(name)
258         print "<{0}>".format(subid.uri)
259         print "  encodeSubmit:library_urn "\
260               "<http://jumpgate.caltech.edu/library/> ."
261         print ""
262
263 def find_submissions_with_no_library(model):
264     missing_lib_query_text = """
265 PREFIX submissionOntology:<{submissionOntology}>
266
267 SELECT
268  ?subid ?name
269 WHERE {{
270   ?subid submissionOntology:name ?name
271   OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
272   FILTER  (!bound(?libid))
273 }}""".format(submissionOntology=submissionOntology[''].uri)
274     missing_lib_query = RDF.SPARQLQuery(missing_lib_query_text)
275
276     return missing_lib_query.execute(model)
277
278
279 def find_unscanned_submitted_libraries(model):
280     """Scan model for libraries that don't have library details loaded
281     """
282     unscanned_libraries = """
283 PREFIX rdf:<http://www.w3.org/1999/02/22-rdf-syntax-ns#>
284 PREFIX submissionOntology:<{submissionOntology}>
285
286 SELECT distinct ?submission ?library_urn
287 WHERE {{
288   ?submission submissionOntology:library_urn ?library_urn .
289   OPTIONAL {{ ?library_urn rdf:type ?library_type  }}
290   FILTER(!BOUND(?library_type))
291 }}""".format(submissionOntology=submissionOntology[''].uri)
292     query = RDF.SPARQLQuery(unscanned_libraries)
293     return query.execute(model)
294
295
296 def add_submission_creation_date(model, subUrn, cookie):
297     # in theory the submission page might have more information on it.
298     creation_dates = get_creation_dates(model, subUrn)
299     if len(creation_dates) == 0:
300         LOGGER.info("Getting creation date for: {0}".format(str(subUrn)))
301         submissionTree = get_url_as_tree(str(subUrn), 'GET', cookie)
302         parse_submission_page(model, submissionTree, subUrn)
303     else:
304         LOGGER.debug("Found creation date for: {0}".format(str(subUrn)))
305
306
307 def get_creation_dates(model, subUrn):
308     query = RDF.Statement(subUrn, CREATION_DATE, None)
309     creation_dates = list(model.find_statements(query))
310     return creation_dates
311
312
313 def parse_submission_page(model, submissionTree, subUrn):
314     cells = submissionTree.findall('.//td')
315     dateTimeType = xsdNS['dateTime']
316     created_label = [x for x in cells
317                      if x.text_content().startswith('Created')]
318     if len(created_label) == 1:
319         created_date = get_date_contents(created_label[0].getnext())
320         created_date_node = RDF.Node(literal=created_date.isoformat(),
321                                      datatype=dateTimeType.uri)
322         add_stmt(model, subUrn, CREATION_DATE, created_date_node)
323     else:
324         msg = 'Unable to find creation date for {0}'.format(str(subUrn))
325         LOGGER.warn(msg)
326         raise Warning(msg)
327
328
329 def update_submission_detail(model, subUrn, status, recent_update, cookie):
330     HasStatusN = submissionOntology['has_status']
331     StatusN = submissionOntology['status']
332     LastModifyN = submissionOntology['last_modify_date']
333
334     status_nodes_query = RDF.Statement(subUrn, HasStatusN, None)
335     status_nodes = list(model.find_statements(status_nodes_query))
336
337     if len(status_nodes) == 0:
338         # has no status node, add one
339         LOGGER.info("Adding status node to {0}".format(subUrn))
340         status_node = create_status_node(subUrn, recent_update)
341         add_stmt(model, subUrn, HasStatusN, status_node)
342         add_stmt(model, status_node, rdfNS['type'], StatusN)
343         add_stmt(model, status_node, StatusN, status)
344         add_stmt(model, status_node, LastModifyN, recent_update)
345         update_ddf(model, subUrn, status_node, cookie=cookie)
346         update_daf(model, subUrn, status_node, cookie=cookie)
347     else:
348         LOGGER.info("Found {0} status blanks".format(len(status_nodes)))
349         for status_statement in status_nodes:
350             status_node = status_statement.object
351             last_modified_query = RDF.Statement(status_node,
352                                                 LastModifyN,
353                                                 None)
354             last_mod_nodes = model.find_statements(last_modified_query)
355             for last_mod_statement in last_mod_nodes:
356                 last_mod_date = str(last_mod_statement.object)
357                 if recent_update == str(last_mod_date):
358                     update_ddf(model, subUrn, status_node, cookie=cookie)
359                     update_daf(model, subUrn, status_node, cookie=cookie)
360                     break
361
362
363 def update_daf(model, submission_url, status_node, cookie):
364     download_daf_uri = str(submission_url).replace('show', 'download_daf')
365     daf_uri = RDF.Uri(download_daf_uri)
366
367     status_is_daf = RDF.Statement(status_node, TYPE_N, dafTermOntology[''])
368     if not model.contains_statement(status_is_daf):
369         LOGGER.info('Adding daf to {0}, {1}'.format(submission_url,
370                                                      status_node))
371         daf_text = get_url_as_text(download_daf_uri, 'GET', cookie)
372         daf_hash = hashlib.md5(daf_text).hexdigest()
373         daf_hash_stmt = RDF.Statement(status_node,
374                                       dafTermOntology['md5sum'],
375                                       daf_hash)
376         model.add_statement(daf_hash_stmt)
377         daf.fromstring_into_model(model, status_node, daf_text)
378
379
380 def update_ddf(model, subUrn, statusNode, cookie):
381     download_ddf_url = str(subUrn).replace('show', 'download_ddf')
382     ddfUrn = RDF.Uri(download_ddf_url)
383
384     status_is_ddf = RDF.Statement(statusNode, TYPE_N, DCC_NS[''])
385     if not model.contains_statement(status_is_ddf):
386         LOGGER.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
387         ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
388         add_ddf_statements(model, statusNode, ddf_text)
389         model.add_statement(status_is_ddf)
390
391
392 def add_ddf_statements(model, statusNode, ddf_string):
393     """Convert a ddf text file into RDF Statements
394     """
395     ddf_lines = ddf_string.split('\n')
396     # first line is header
397     header = ddf_lines[0].split()
398     attributes = [DCC_NS[x] for x in header]
399
400     for ddf_line in ddf_lines[1:]:
401         ddf_line = ddf_line.strip()
402         if len(ddf_line) == 0:
403             continue
404         if ddf_line.startswith("#"):
405             continue
406
407         ddf_record = ddf_line.split('\t')
408         files = ddf_record[0].split(',')
409         file_attributes = ddf_record[1:]
410
411         for f in files:
412             fileNode = RDF.Node()
413             add_stmt(model,
414                      statusNode,
415                      submissionOntology['has_file'],
416                      fileNode)
417             add_stmt(model, fileNode, rdfNS['type'], DCC_NS['file'])
418             add_stmt(model, fileNode, DCC_NS['filename'], f)
419
420             for predicate, object in zip(attributes[1:], file_attributes):
421                 add_stmt(model, fileNode, predicate, object)
422
423
424 def load_encode_assigned_libraries(model, htswapi):
425     """Get libraries associated with encode.
426     """
427     encodeFilters = ["/library/?affiliations__id__exact=44",
428                      "/library/?affiliations__id__exact=80",
429                     ]
430
431     encodeUrls = [os.path.join(htswapi.root_url + u) for u in encodeFilters]
432     rdfaParser = RDF.Parser(name='rdfa')
433     for encodeUrl in encodeUrls:
434         LOGGER.info("Scanning library url {0}".format(encodeUrl))
435         rdfaParser.parse_into_model(model, encodeUrl)
436         query = RDF.Statement(None, libraryOntology['library_id'], None)
437         libraries = model.find_statements(query)
438         for statement in libraries:
439             libraryUrn = statement.subject
440             load_library_detail(model, libraryUrn)
441
442
443 def load_unassigned_submitted_libraries(model):
444     unassigned = find_unscanned_submitted_libraries(model)
445     for query_record in unassigned:
446         library_urn = query_record['library_urn']
447         LOGGER.warn("Unassigned, submitted library: {0}".format(library_urn))
448         load_library_detail(model, library_urn)
449
450
451 def load_encodedcc_files(model, genome, composite):
452     file_index = ucsc.get_encodedcc_file_index(genome, composite)
453     if file_index is None:
454         return
455
456     for filename, attributes in file_index.items():
457         s = RDF.Node(RDF.Uri(filename))
458         for name, value in attributes.items():
459             p = RDF.Node(DCC_NS[name])
460             o = RDF.Node(value)
461             model.add_statement(RDF.Statement(s,p,o))
462
463
464 def load_library_detail(model, libraryUrn):
465     """Grab detail information from library page
466     """
467     rdfaParser = RDF.Parser(name='rdfa')
468     query = RDF.Statement(libraryUrn, libraryOntology['date'], None)
469     results = list(model.find_statements(query))
470     log_message = "Found {0} statements for {1}"
471     LOGGER.debug(log_message.format(len(results), libraryUrn))
472     if len(results) == 0:
473         LOGGER.info("Loading {0}".format(str(libraryUrn)))
474         try:
475             body = get_url_as_text(str(libraryUrn.uri), 'GET')
476             rdfaParser.parse_string_into_model(model, body, libraryUrn.uri)
477         except httplib2.HttpLib2ErrorWithResponse, e:
478             LOGGER.error(str(e))
479     elif len(results) == 1:
480         pass  # Assuming that a loaded dataset has one record
481     else:
482         LOGGER.warning("Many dates for {0}".format(libraryUrn))
483
484
485 def get_library_id(name):
486     """Guess library ID from library name
487
488     >>> get_library_id('2x75-GM12892-rep1-11039 20110217 elements')
489     '11039'
490     >>> get_library_id('10150 C2C12-24h-myogenin-2PCR-Rep1.32mers')
491     '10150'
492     >>> get_library_id('2x75-GM12892-rep2-SL2970')
493     '02970'
494     """
495     match = re.search(r"([ -]|^)(?P<id>([\d]{5})|(SL[\d]{4}))", name)
496     library_id = None
497     if match is not None:
498         library_id = match.group('id')
499     if library_id in SL_MAP:
500         library_id = SL_MAP[library_id]
501     return library_id
502
503
504 def get_contents(element):
505     """Return contents or none.
506     """
507     if len(element.contents) == 0:
508         return None
509
510     a = element.find('a')
511     if a is not None:
512         return a.contents[0].encode(CHARSET)
513
514     return element.contents[0].encode(CHARSET)
515
516
517 def create_status_node(submission_uri, timestamp):
518     submission_uri = daf.submission_uri_to_string(submission_uri)
519     if submission_uri[-1] != '/':
520         sumbission_uri += '/'
521     status_uri = submission_uri + timestamp
522     return RDF.Node(RDF.Uri(status_uri))
523
524
525 def get_date_contents(element):
526     data = element.text_content()
527     if data:
528         return datetime.strptime(data, "%Y-%m-%d %H:%M")
529     else:
530         return None
531
532
533 def add_stmt(model, subject, predicate, rdf_object):
534     """Convienence create RDF Statement and add to a model
535     """
536     return model.add_statement(
537         RDF.Statement(subject, predicate, rdf_object))
538
539
540 def login(cookie=None):
541     """Login if we don't have a cookie
542     """
543     if cookie is not None:
544         return cookie
545
546     keys = keyring.get_keyring()
547     password = keys.get_password(LOGIN_URL, USERNAME)
548     credentials = {'login': USERNAME,
549                    'password': password}
550     headers = {'Content-type': 'application/x-www-form-urlencoded'}
551     http = httplib2.Http()
552     response, content = http.request(LOGIN_URL,
553                                      'POST',
554                                      headers=headers,
555                                      body=urllib.urlencode(credentials))
556     LOGGER.debug("Login to {0}, status {1}".format(LOGIN_URL,
557                                                     response['status']))
558
559     cookie = response.get('set-cookie', None)
560     if cookie is None:
561         raise RuntimeError("Wasn't able to log into: {0}".format(LOGIN_URL))
562     return cookie
563
564
565 def get_url_as_tree(url, method, cookie=None):
566     http = httplib2.Http()
567     headers = {}
568     if cookie is not None:
569         headers['Cookie'] = cookie
570     response, content = http.request(url, method, headers=headers)
571     if response['status'] == '200':
572         tree = fromstring(content, base_url=url)
573         return tree
574     else:
575         msg = "error accessing {0}, status {1}"
576         msg = msg.format(url, response['status'])
577         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
578         raise e
579
580
581 def get_url_as_text(url, method, cookie=None):
582     http = httplib2.Http()
583     headers = {}
584     if cookie is not None:
585         headers['Cookie'] = cookie
586     response, content = http.request(url, method, headers=headers)
587     if response['status'] == '200':
588         return content
589     else:
590         msg = "error accessing {0}, status {1}"
591         msg = msg.format(url, response['status'])
592         e = httplib2.HttpLib2ErrorWithResponse(msg, response, content)
593         raise e
594
595 ################
596 #  old stuff
597 SUBMISSIONS_LACKING_LIBID = [
598     ('1x75-Directional-HeLa-Rep1',    '11208'),
599     ('1x75-Directional-HeLa-Rep2',    '11207'),
600     ('1x75-Directional-HepG2-Rep1',   '11210'),
601     ('1x75-Directional-HepG2-Rep2',   '11209'),
602     ('1x75-Directional-H1-hESC-Rep1', '10947'),
603     ('1x75-Directional-H1-hESC-Rep2', '11009'),
604     ('1x75-Directional-HUVEC-Rep1',   '11206'),
605     ('1x75-Directional-HUVEC-Rep2',   '11205'),
606     ('1x75-Directional-K562-Rep1',    '11008'),
607     ('1x75-Directional-K562-Rep2',    '11007'),
608     ('1x75-Directional-NHEK-Rep1',    '11204'),
609     ('1x75-Directional-GM12878-Rep1', '11011'),
610     ('1x75-Directional-GM12878-Rep2', '11010'),
611     ]
612
613
614 def select_by_library_id(submission_list):
615     subl = [(x.library_id, x) for x in submission_list if x.library_id]
616     libraries = {}
617     for lib_id, subobj in subl:
618         libraries.setdefault(lib_id, []).append(subobj)
619
620     for submission in libraries.values():
621         submission.sort(key=attrgetter('date'), reverse=True)
622
623     return libraries
624
625
626 def library_to_freeze(selected_libraries):
627     freezes = ['2010-Jan', '2010-Jul', '2011-Jan']
628     lib_ids = sorted(selected_libraries.keys())
629     report = ['<html><table border="1">']
630     report = ["""<html>
631 <head>
632 <style type="text/css">
633  td {border-width:0 0 1px 1px; border-style:solid;}
634 </style>
635 </head>
636 <body>
637 <table>
638 """]
639     report.append('<thead>')
640     report.append('<tr><td>Library ID</td><td>Name</td>')
641     for f in freezes:
642         report.append('<td>{0}</td>'.format(f))
643     report.append('</tr>')
644     report.append('</thead>')
645     report.append('<tbody>')
646     for lib_id in lib_ids:
647         report.append('<tr>')
648         lib_url = LIBRARY_NS[lib_id].uri
649         report.append('<td><a href="{0}">{1}</a></td>'.format(lib_url, lib_id))
650         submissions = selected_libraries[lib_id]
651         report.append('<td>{0}</td>'.format(submissions[0].name))
652         batched = {}
653         for sub in submissions:
654             date = date_to_freeze(sub.date)
655             batched.setdefault(date, []).append(sub)
656         for d in freezes:
657             report.append('<td>')
658             for s in batched.get(d, []):
659                 show_url = submission_view_url(s.subid)
660                 subid = '<a href="{0}">{1}</a>'.format(show_url, s.subid)
661                 report.append("{0}:{1}".format(subid, s.status))
662             report.append('</td>')
663         else:
664             report.append('<td></td>')
665         report.append("</tr>")
666     report.append('</tbody>')
667     report.append("</table></html>")
668     return "\n".join(report)
669
670
671 def date_to_freeze(d):
672     freezes = [(datetime(2010, 1, 30), '2010-Jan'),
673                (datetime(2010, 7, 30), '2010-Jul'),
674                (datetime(2011, 1, 30), '2011-Jan'),
675                ]
676     for end, name in freezes:
677         if d < end:
678             return name
679     else:
680         return None
681
682 if __name__ == "__main__":
683     main()