From: Diane Trout Date: Mon, 30 Jan 2012 23:23:44 +0000 (-0800) Subject: Add ability to refresh library data from our htsw server X-Git-Tag: v0.5.5~67 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=132c7eb4453aa691f052282a19a7ff33cf2fa60e Add ability to refresh library data from our htsw server (Delete & reload) --- diff --git a/encode_submission/encode_find.py b/encode_submission/encode_find.py index 14d5bdf..eebc3bc 100644 --- a/encode_submission/encode_find.py +++ b/encode_submission/encode_find.py @@ -96,6 +96,10 @@ def main(cmdline=None): else: limit = args + if opts.reload_libraries: + reload_libraries(model, args) + return + if opts.update: opts.update_submission = True opts.update_libraries = True @@ -152,6 +156,10 @@ def make_parser(): commands.add_option('--update-libraries', action="store_true", default=False, help="download library info from htsw") + commands.add_option('--reload-libraries', action="store_true", + default=False, + help="Delete and redownload library information. "\ + "Optionally list specific library IDs.") parser.add_option_group(commands) queries = OptionGroup(parser, "Queries") @@ -292,6 +300,21 @@ WHERE {{ query = RDF.SPARQLQuery(unscanned_libraries) return query.execute(model) +def find_all_libraries(model): + """Scan model for every library marked as + """ + libraries = """ +PREFIX rdf: +PREFIX libraryOntology:<{libraryOntology}> + +SELECT distinct ?library_urn +WHERE {{ + ?library_urn rdf:type ?library_type . + FILTER(regex(?libray +}}""".format(libraryOntology=libraryOntology[''].uri) + query = RDF.SPARQLQuery(libraries) + return query.execute(model) + def add_submission_creation_date(model, subUrn, cookie): # in theory the submission page might have more information on it. @@ -447,6 +470,65 @@ def load_unassigned_submitted_libraries(model): LOGGER.warn("Unassigned, submitted library: {0}".format(library_urn)) load_library_detail(model, library_urn) +def reload_libraries(model, library_list): + if len(library_list) == 0: + # reload everything. + queryset = find_all_libraries(model) + libraries = ( str(s['library_urn']) for s in queryset ) + else: + libraries = ( user_library_id_to_library_urn(l) for l in library_list ) + + for library_urn in libraries: + delete_library(model, library_urn) + load_library_detail(model, library_urn) + +def user_library_id_to_library_urn(library_id): + split_url = urlparse.urlsplit(library_id) + if len(split_url.scheme) == 0: + return LIBRARY_NS[library_id] + else: + return library_id + +def delete_library(model, library_urn): + if not isinstance(library_urn, RDF.Node): + raise ValueError("library urn must be a RDF.Node") + + LOGGER.info("Deleting {0}".format(str(library_urn.uri))) + lane_query = RDF.Statement(library_urn, libraryOntology['has_lane'],None) + for lane in model.find_statements(lane_query): + delete_lane(model, lane.object) + library_attrib_query = RDF.Statement(library_urn, None, None) + for library_attrib in model.find_statements(library_attrib_query): + LOGGER.debug("Deleting {0}".format(str(library_attrib))) + del model[library_attrib] + + +def delete_lane(model, lane_urn): + if not isinstance(lane_urn, RDF.Node): + raise ValueError("lane urn must be a RDF.Node") + + delete_lane_mapping(model, lane_urn) + lane_attrib_query = RDF.Statement(lane_urn,None,None) + for lane_attrib in model.find_statements(lane_attrib_query): + LOGGER.debug("Deleting {0}".format(str(lane_attrib))) + del model[lane_attrib] + + +def delete_lane_mapping(model, lane_urn): + if not isinstance(lane_urn, RDF.Node): + raise ValueError("lane urn must be a RDF.Node") + + lane_mapping_query = RDF.Statement(lane_urn, + libraryOntology['has_mappings'], + None) + for lane_mapping in model.find_statements(lane_mapping_query): + mapping_attrib_query = RDF.Statement(lane_mapping.object, + None, + None) + for mapping_attrib in model.find_statements(mapping_attrib_query): + LOGGER.debug("Deleting {0}".format(str(mapping_attrib))) + del model[mapping_attrib] + def load_encodedcc_files(model, genome, composite): file_index = ucsc.get_encodedcc_file_index(genome, composite) diff --git a/encode_submission/test_encode_find.py b/encode_submission/test_encode_find.py index 9beb102..a02e6b7 100644 --- a/encode_submission/test_encode_find.py +++ b/encode_submission/test_encode_find.py @@ -44,6 +44,101 @@ class TestEncodeFind(unittest.TestCase): object_date = fromTypedNode(dates[0].object) self.assertEqual(object_date, datetime(2011,12,7,15,23,0)) + def test_delete_simple_lane(self): + model = get_model() + parser = RDF.Parser(name='turtle') + parser.parse_string_into_model(model, '''@prefix rdf: . +@prefix : . +@prefix rdfs: . +@prefix xsd: . +@prefix libns: . + + + libns:flowcell ; + libns:total_unique_locations 5789938 . + +''', 'http://jumpgate.caltech.edu/library/') + urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/lane/1232')) + encode_find.delete_lane(model, urn) + self.failUnlessEqual(len(model), 0) + + def test_delete_lane_with_mapping(self): + model = get_model() + parser = RDF.Parser(name='turtle') + parser.parse_string_into_model(model, '''@prefix rdf: . +@prefix : . +@prefix rdfs: . +@prefix xsd: . +@prefix libns: . + + + libns:flowcell ; + libns:has_mappings _:bnode110110 ; + libns:total_unique_locations 5789938 . + +_:bnode110110 + libns:mapped_to "newcontam_UK.fa"@en ; + libns:reads 42473 . +''', 'http://jumpgate.caltech.edu/library/') + self.failUnlessEqual(len(model), 5) + urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/lane/1232')) + encode_find.delete_lane(model, urn) + self.failUnlessEqual(len(model), 0) + + def test_delete_library(self): + model = get_model() + parser = RDF.Parser(name='turtle') + parser.parse_string_into_model(model, '''@prefix rdf: . +@prefix : . +@prefix rdfs: . +@prefix xsd: . +@prefix libns: . + + + libns:flowcell ; + libns:has_mappings _:bnode110110 ; + libns:total_unique_locations 5789938 . + + + libns:affiliation "ENCODE"@en, "ENCODE_Tier1"@en, "Georgi Marinov"@en ; + libns:has_lane ; + libns:library_id "11011"@en ; + libns:library_type "None"@en ; + a "libns:library"@en ; + , . + +_:bnode110110 + libns:mapped_to "newcontam_UK.fa"@en ; + libns:reads 42473 . + + + libns:flowcell ; + libns:has_mappings _:bnode120970 ; + libns:total_unique_locations 39172114 . + + + libns:has_lane ; + libns:library_id "12097"@en ; + libns:library_type "Paired End (non-multiplexed)"@en ; + a "libns:library"@en ; + +_:bnode120970 + libns:mapped_to "newcontam_UK.fa"@en ; + libns:reads 64 . +''', 'http://jumpgate.caltech.edu/library') + urn = RDF.Node(RDF.Uri('http://jumpgate.caltech.edu/library/11011/')) + encode_find.delete_library(model, urn) + q = RDF.Statement(None, encode_find.libraryOntology['reads'], None) + stmts = list(model.find_statements(q)) + self.failUnlessEqual(len(stmts), 1) + self.failUnlessEqual(fromTypedNode(stmts[0].object), + 64) + + q = RDF.Statement(None, encode_find.libraryOntology['library_id'], None) + stmts = list(model.find_statements(q)) + self.failUnlessEqual(len(stmts), 1) + self.failUnlessEqual(fromTypedNode(stmts[0].object), + '12097') def suite(): return unittest.makeSuite(TestEncodeFind, "test") diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index 3dd199f..1f9ec61 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -101,6 +101,8 @@ def fromTypedNode(node): return False else: raise ValueError("Unrecognized boolean %s" % (literal,)) + elif value_type == 'integer': + return int(literal) elif value_type == 'decimal' and literal.find('.') == -1: return int(literal) elif value_type in ('decimal', 'float', 'double'):