Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow
authorDiane Trout <diane@caltech.edu>
Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
committerDiane Trout <diane@caltech.edu>
Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
encode_submission/geo_gather.py
htsworkflow/submission/daf.py
htsworkflow/submission/geo.py
htsworkflow/submission/submission.py
htsworkflow/templates/geo_samples.sparql
htsworkflow/util/rdfhelp.py
htsworkflow/util/test/test_rdfhelp.py

index 5d6bf1142f193c3c88a566270ed6579b4f58273c..c67edefc7d45d0e95e7a798dd015a31befbbbac2 100644 (file)
@@ -84,6 +84,8 @@ def main(cmdline=None):
         extractor.create_scripts(results)
 
     if opts.scan_submission:
+        if opts.name is None:
+            parser.error("Please define a submission name")
         mapper.scan_submission_dirs(results)
 
     if opts.make_soft:
index a74d71a667d1cb788cbdaab24cdf8661738c9413..09b285b095ee2a15cfabee5bc5de3d866522c4f9 100644 (file)
@@ -361,7 +361,7 @@ class UCSCSubmission(object):
                           rdfNS['type'],
                           submissionOntology['submission']))
         self.model.add_statement(RDF.Statement(submissionNode,
-                                               submissionOntology['library'],
+                                               libraryOntology['library'],
                                                libNode))
 
         LOGGER.debug("Adding statements to {0}".format(str(submissionView)))
index 6137875b1ad86048b0c08080f6fb947d090e9471..413d2c3ae5e99daa7025a6be3101edb50d96da85 100644 (file)
@@ -31,9 +31,13 @@ class GEOSubmission(Submission):
         for lib_id, result_dir in result_map.items():
             an_analysis = self.get_submission_node(result_dir)
             metadata = self.get_sample_metadata(an_analysis)
-            if len(metadata) > 1:
+            if len(metadata) == 0:
+                errmsg = 'No metadata found for {0}'
+                LOGGER.error(errmsg.format(str(an_analysis),))
+                continue
+            elif len(metadata) > 1:
                 errmsg = 'Confused there are more than one samples for %s'
-                LOGGER.debug(errmsg % (str(an_analysis,)))
+                LOGGER.debug(errmsg % (str(an_analysis),))
             metadata = metadata[0]
             metadata['raw'] = self.get_raw_files(an_analysis)
             metadata['supplimental'] = self.get_sample_files(an_analysis)
index 6dd630aeda90fe5ad08a96da8be610f628642b04..18fa3b2bc9f487b63915ca02a3c060adbfeedf3b 100644 (file)
@@ -83,7 +83,7 @@ class Submission(object):
                                                     rdfNS['type'])
         if file_classification is None:
             errmsg = 'Could not find class for {0}'
-            logger.warning(errmsg.format(str(file_type)))
+            LOGGER.warning(errmsg.format(str(file_type)))
             return
 
         self.model.add_statement(
index 850d99a944ee51009675a381aa5c8a023d521a9d..b4d4b0bc8102db05b84f16472b4508a82db48afa 100644 (file)
@@ -7,18 +7,22 @@ PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
 
 select distinct ?name ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate, ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?experiment_type ?library_selection ?library_source
 WHERE {
-  <{{submission}}> a submissionOntology:submission .
+  <{{submission}}> a submissionOntology:submission ;
+                   submissionOntology:library ?library ;
+                   submissionOntology:name ?name .
 
   OPTIONAL { <{{submission}}> ucscDaf:control ?control }
   OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId }
   OPTIONAL { ?library libraryOntology:antibody ?antibody }
   OPTIONAL { ?library libraryOntology:cell_line ?cell .
-             ?cell_line cells:cell ?cell ;
-                        cells:documents ?growthProtocol . }
+             OPTIONAL { ?cell_line cells:cell ?cell ;
+                                   cells:documents ?growthProtocol . }}
   OPTIONAL { ?library ucscDaf:sex ?sex }
   OPTIONAL { ?library libraryOntology:library_id ?library_id }
   OPTIONAL { ?library libraryOntology:replicate ?replicate }
-  OPTIONAL { ?library libraryOntology:species ?species_name }
+  OPTIONAL { ?library libraryOntology:species ?species_name .
+             ?species libraryOntology:species ?species_name ;
+                      libraryOntology:taxon_id ?taxon_id . }
   OPTIONAL { ?library libraryOntology:condition_term ?treatment }
   OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type }
   OPTIONAL { ?library libraryOntology:librarySelection ?library_selection }
@@ -32,8 +36,6 @@ WHERE {
   OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
   OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
 
-  <{{submission}}> submissionOntology:library ?library ;
-                   submissionOntology:name ?name .
   ?species libraryOntology:species ?species_name ;
            libraryOntology:taxon_id ?taxon_id .
 
index fda8772f858601a0f488c61248da3d39d67c7be3..93b7ada645e876834319236359b3121c73f94c74 100644 (file)
@@ -258,13 +258,14 @@ def load_into_model(model, parser_name, path, ns=None):
     if len(url_parts[0]) == 0 or url_parts[0] == 'file':
         url_parts[0] = 'file'
         url_parts[2] = os.path.abspath(url_parts[2])
-        if parser_name is None or parser_name == 'guess':
-            parser_name = guess_parser_by_extension(path)
+    if parser_name is None or parser_name == 'guess':
+        parser_name = guess_parser_by_extension(path)
     url = urlunparse(url_parts)
     logger.info("Opening {0} with parser {1}".format(url, parser_name))
 
     rdf_parser = RDF.Parser(name=parser_name)
 
+    statements = []
     retries = 3
     while retries > 0:
         try:
@@ -273,7 +274,7 @@ def load_into_model(model, parser_name, path, ns=None):
             retries = 0
         except RDF.RedlandError, e:
             errmsg = "RDF.RedlandError: {0} {1} tries remaining"
-            logger.error(errmsg.format(str(e), tries))
+            logger.error(errmsg.format(str(e), retries))
 
     for s in statements:
         conditionally_add_statement(model, s, ns)
@@ -384,16 +385,16 @@ def guess_parser(content_type, pathname):
         return 'turtle'
     elif content_type in ('text/html',):
         return 'rdfa'
-    elif content_type is None:
+    elif content_type is None or content_type in ('text/plain',):
         return guess_parser_by_extension(pathname)
 
 def guess_parser_by_extension(pathname):
     _, ext = os.path.splitext(pathname)
     if ext in ('.xml', '.rdf'):
         return 'rdfxml'
-    elif ext in ('.html'):
+    elif ext in ('.html',):
         return 'rdfa'
-    elif ext in ('.turtle'):
+    elif ext in ('.turtle',):
         return 'turtle'
     return 'guess'
 
index 9a31ca90e382370b0b5a838cf8fd61fd8b96926a..948bcf407cf976eea74c44b0ff095b475f02770e 100644 (file)
@@ -207,7 +207,8 @@ _:a owl:imports "{loc}extra.turtle" .
                 ('/a/b/c.rdf', 'rdfxml'),
                 ('/a/b/c.xml', 'rdfxml'),
                 ('/a/b/c.html', 'rdfa'),
-                ('/a/b/c.turtle', 'turtle')]
+                ('/a/b/c.turtle', 'turtle'),
+                ('http://foo.bar/bleem.turtle', 'turtle')]
             for path, parser in DATA:
                 self.assertEqual(guess_parser_by_extension(path), parser)
                 self.assertEqual(guess_parser(None, path), parser)
@@ -215,7 +216,10 @@ _:a owl:imports "{loc}extra.turtle" .
             DATA = [
                 ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
                 ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
-                ('text/html', 'http://a.org/b/c', 'rdfa')
+                ('text/html', 'http://a.org/b/c', 'rdfa'),
+                ('text/html', 'http://a.org/b/c.html', 'rdfa'),
+                ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
+                ('text/plain', 'http://a.org/b/c', 'guess')
             ]
             for contenttype, url, parser in DATA:
                 self.assertEqual(guess_parser(contenttype, url), parser)