From a6984cfa2e80ef3f70255e3e26c5954210ef2dac Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 21 Dec 2010 15:53:59 -0800 Subject: [PATCH] Add utility to fix the off-by-3 error in the amplified from field. The bug appeared because the library name and library primary key drifted apart because of some accidental deletions. I eventually moved us back to using raw primary keys as that made constructing django urls simplier & easier. However I'd apparently forgotten to adjust the amplified from field. --- extra/fix_ob3.py | 77 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 extra/fix_ob3.py diff --git a/extra/fix_ob3.py b/extra/fix_ob3.py new file mode 100644 index 0000000..1278901 --- /dev/null +++ b/extra/fix_ob3.py @@ -0,0 +1,77 @@ +""" +Fix off by 3 error in our database +""" +from optparse import OptionParser +import os +import re +import sys + +from django.core.management import setup_environ +from htsworkflow.frontend import settings +setup_environ(settings) + +import htsworkflow.frontend.samples.models as samples + +def main(cmdline=None): + parser = make_parser() + opts, args = parser.parse_args(cmdline) + + dry_run = not opts.run + fix_ob3(dry_run=dry_run) + + return 0 + +def make_parser(): + parser = OptionParser("%prog: fix off by 3 error that creeped in") + parser.add_option("--run", default=False, action="store_true", + help="change the database") + return parser + +def fix_ob3(dry_run=True): + libraries = samples.Library.objects.order_by('id') + + mismatch = 0 + wrong_amp = 0 + wrong_amp_ids = set() + for lib in libraries: + if lib.amplified_from_sample is not None: + amp_sample = lib.amplified_from_sample + alt_sample = samples.Library.objects.get(pk=int(amp_sample.id)-3) + if is_alt_sample_right(lib, alt_sample): + wrong_amp += 1 + wrong_amp_ids.add(int(lib.id)) + print "--- wrong lib ---" + display_names(lib, amp_sample, alt_sample) + if not dry_run: + lib.amplified_from_sample = alt_sample + lib.save() + #elif lib_name != amp_sample.library_name: + # mismatch += 1 + # print "--- didn't match ---" + # display_names(lib_name, lib, amp_sample, other_sample) + print "-----" + print "{0} mismatches".format(mismatch) + print "{0} obviously wrong libs".format(wrong_amp) + if len(wrong_amp_ids) > 0: + print " {0} - {1}".format(min(wrong_amp_ids), max(wrong_amp_ids)) + +def clean_lib_name(lib): + """Strip trailing amplified marker character""" + return re.sub(" *a$", "", lib.library_name) + +def is_alt_sample_right(lib, alt_sample): + """Check to see if the alt sample is the right sample + """ + lib_name = clean_lib_name(lib) + return lib_name == alt_sample.library_name + +def display_names(lib, amp_from_sample, alt_from_sample): + lib_name = clean_lib_name(lib) + print "NonA: "+lib.id+"|"+lib_name+"|" + print " A: "+lib.id+"|"+lib.library_name+"|" + print "AmpF: "+ amp_from_sample.id+"|"+amp_from_sample.library_name+"|" + print "FixF: "+ alt_from_sample.id+"|"+alt_from_sample.library_name+"|" + + +if __name__ == "__main__": + sys.exit(main()) -- 2.30.2