From: Diane Trout Date: Tue, 18 Sep 2012 17:55:36 +0000 (-0700) Subject: Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=5d3908c830a1cded1b3bfde11c8293c05e997ac1;hp=630705a8fe7ae64edbc8452036a125513256f078 Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow --- diff --git a/encode_submission/geo_gather.py b/encode_submission/geo_gather.py index 5d6bf11..c67edef 100644 --- a/encode_submission/geo_gather.py +++ b/encode_submission/geo_gather.py @@ -84,6 +84,8 @@ def main(cmdline=None): extractor.create_scripts(results) if opts.scan_submission: + if opts.name is None: + parser.error("Please define a submission name") mapper.scan_submission_dirs(results) if opts.make_soft: diff --git a/htsworkflow/submission/daf.py b/htsworkflow/submission/daf.py index a74d71a..09b285b 100644 --- a/htsworkflow/submission/daf.py +++ b/htsworkflow/submission/daf.py @@ -361,7 +361,7 @@ class UCSCSubmission(object): rdfNS['type'], submissionOntology['submission'])) self.model.add_statement(RDF.Statement(submissionNode, - submissionOntology['library'], + libraryOntology['library'], libNode)) LOGGER.debug("Adding statements to {0}".format(str(submissionView))) diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py index 6137875..413d2c3 100644 --- a/htsworkflow/submission/geo.py +++ b/htsworkflow/submission/geo.py @@ -31,9 +31,13 @@ class GEOSubmission(Submission): for lib_id, result_dir in result_map.items(): an_analysis = self.get_submission_node(result_dir) metadata = self.get_sample_metadata(an_analysis) - if len(metadata) > 1: + if len(metadata) == 0: + errmsg = 'No metadata found for {0}' + LOGGER.error(errmsg.format(str(an_analysis),)) + continue + elif len(metadata) > 1: errmsg = 'Confused there are more than one samples for %s' - LOGGER.debug(errmsg % (str(an_analysis,))) + LOGGER.debug(errmsg % (str(an_analysis),)) metadata = metadata[0] metadata['raw'] = self.get_raw_files(an_analysis) metadata['supplimental'] = self.get_sample_files(an_analysis) diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py index 6dd630a..18fa3b2 100644 --- a/htsworkflow/submission/submission.py +++ b/htsworkflow/submission/submission.py @@ -83,7 +83,7 @@ class Submission(object): rdfNS['type']) if file_classification is None: errmsg = 'Could not find class for {0}' - logger.warning(errmsg.format(str(file_type))) + LOGGER.warning(errmsg.format(str(file_type))) return self.model.add_statement( diff --git a/htsworkflow/templates/geo_samples.sparql b/htsworkflow/templates/geo_samples.sparql index 850d99a..b4d4b0b 100644 --- a/htsworkflow/templates/geo_samples.sparql +++ b/htsworkflow/templates/geo_samples.sparql @@ -7,18 +7,22 @@ PREFIX cells: select distinct ?name ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate, ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?experiment_type ?library_selection ?library_source WHERE { - <{{submission}}> a submissionOntology:submission . + <{{submission}}> a submissionOntology:submission ; + submissionOntology:library ?library ; + submissionOntology:name ?name . OPTIONAL { <{{submission}}> ucscDaf:control ?control } OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId } OPTIONAL { ?library libraryOntology:antibody ?antibody } OPTIONAL { ?library libraryOntology:cell_line ?cell . - ?cell_line cells:cell ?cell ; - cells:documents ?growthProtocol . } + OPTIONAL { ?cell_line cells:cell ?cell ; + cells:documents ?growthProtocol . }} OPTIONAL { ?library ucscDaf:sex ?sex } OPTIONAL { ?library libraryOntology:library_id ?library_id } OPTIONAL { ?library libraryOntology:replicate ?replicate } - OPTIONAL { ?library libraryOntology:species ?species_name } + OPTIONAL { ?library libraryOntology:species ?species_name . + ?species libraryOntology:species ?species_name ; + libraryOntology:taxon_id ?taxon_id . } OPTIONAL { ?library libraryOntology:condition_term ?treatment } OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type } OPTIONAL { ?library libraryOntology:librarySelection ?library_selection } @@ -32,8 +36,6 @@ WHERE { OPTIONAL { ?library libraryOntology:insert_size ?insertLength } OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm } - <{{submission}}> submissionOntology:library ?library ; - submissionOntology:name ?name . ?species libraryOntology:species ?species_name ; libraryOntology:taxon_id ?taxon_id . diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py index fda8772..93b7ada 100644 --- a/htsworkflow/util/rdfhelp.py +++ b/htsworkflow/util/rdfhelp.py @@ -258,13 +258,14 @@ def load_into_model(model, parser_name, path, ns=None): if len(url_parts[0]) == 0 or url_parts[0] == 'file': url_parts[0] = 'file' url_parts[2] = os.path.abspath(url_parts[2]) - if parser_name is None or parser_name == 'guess': - parser_name = guess_parser_by_extension(path) + if parser_name is None or parser_name == 'guess': + parser_name = guess_parser_by_extension(path) url = urlunparse(url_parts) logger.info("Opening {0} with parser {1}".format(url, parser_name)) rdf_parser = RDF.Parser(name=parser_name) + statements = [] retries = 3 while retries > 0: try: @@ -273,7 +274,7 @@ def load_into_model(model, parser_name, path, ns=None): retries = 0 except RDF.RedlandError, e: errmsg = "RDF.RedlandError: {0} {1} tries remaining" - logger.error(errmsg.format(str(e), tries)) + logger.error(errmsg.format(str(e), retries)) for s in statements: conditionally_add_statement(model, s, ns) @@ -384,16 +385,16 @@ def guess_parser(content_type, pathname): return 'turtle' elif content_type in ('text/html',): return 'rdfa' - elif content_type is None: + elif content_type is None or content_type in ('text/plain',): return guess_parser_by_extension(pathname) def guess_parser_by_extension(pathname): _, ext = os.path.splitext(pathname) if ext in ('.xml', '.rdf'): return 'rdfxml' - elif ext in ('.html'): + elif ext in ('.html',): return 'rdfa' - elif ext in ('.turtle'): + elif ext in ('.turtle',): return 'turtle' return 'guess' diff --git a/htsworkflow/util/test/test_rdfhelp.py b/htsworkflow/util/test/test_rdfhelp.py index 9a31ca9..948bcf4 100644 --- a/htsworkflow/util/test/test_rdfhelp.py +++ b/htsworkflow/util/test/test_rdfhelp.py @@ -207,7 +207,8 @@ _:a owl:imports "{loc}extra.turtle" . ('/a/b/c.rdf', 'rdfxml'), ('/a/b/c.xml', 'rdfxml'), ('/a/b/c.html', 'rdfa'), - ('/a/b/c.turtle', 'turtle')] + ('/a/b/c.turtle', 'turtle'), + ('http://foo.bar/bleem.turtle', 'turtle')] for path, parser in DATA: self.assertEqual(guess_parser_by_extension(path), parser) self.assertEqual(guess_parser(None, path), parser) @@ -215,7 +216,10 @@ _:a owl:imports "{loc}extra.turtle" . DATA = [ ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'), ('application/x-turtle', 'http://a.org/b/c', 'turtle'), - ('text/html', 'http://a.org/b/c', 'rdfa') + ('text/html', 'http://a.org/b/c', 'rdfa'), + ('text/html', 'http://a.org/b/c.html', 'rdfa'), + ('text/plain', 'http://a.org/b/c.turtle', 'turtle'), + ('text/plain', 'http://a.org/b/c', 'guess') ] for contenttype, url, parser in DATA: self.assertEqual(guess_parser(contenttype, url), parser)