Merge branch 'master' of mus.cacr.caltech.edu:htsworkflow

author Diane Trout <diane@caltech.edu>

Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)

committer Diane Trout <diane@caltech.edu>

Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
author Diane Trout <diane@caltech.edu>
Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
committer Diane Trout <diane@caltech.edu>
Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
diff --git a/encode_submission/geo_gather.py b/encode_submission/geo_gather.py

index 5d6bf1142f193c3c88a566270ed6579b4f58273c..c67edefc7d45d0e95e7a798dd015a31befbbbac2 100644 (file)
--- a/encode_submission/geo_gather.py
+++ b/encode_submission/geo_gather.py
@@ -84,6 +84,8 @@ def main(cmdline=None):
          extractor.create_scripts(results)
  
      if opts.scan_submission:
+        if opts.name is None:
+            parser.error("Please define a submission name")
          mapper.scan_submission_dirs(results)
  
      if opts.make_soft:
diff --git a/htsworkflow/submission/daf.py b/htsworkflow/submission/daf.py

index a74d71a667d1cb788cbdaab24cdf8661738c9413..09b285b095ee2a15cfabee5bc5de3d866522c4f9 100644 (file)
--- a/htsworkflow/submission/daf.py
+++ b/htsworkflow/submission/daf.py
@@ -361,7 +361,7 @@ class UCSCSubmission(object):
                            rdfNS['type'],
                            submissionOntology['submission']))
          self.model.add_statement(RDF.Statement(submissionNode,
-                                               submissionOntology['library'],
+                                               libraryOntology['library'],
                                                 libNode))
  
          LOGGER.debug("Adding statements to {0}".format(str(submissionView)))
diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py

index 6137875b1ad86048b0c08080f6fb947d090e9471..413d2c3ae5e99daa7025a6be3101edb50d96da85 100644 (file)
--- a/htsworkflow/submission/geo.py
+++ b/htsworkflow/submission/geo.py
@@ -31,9 +31,13 @@ class GEOSubmission(Submission):
          for lib_id, result_dir in result_map.items():
              an_analysis = self.get_submission_node(result_dir)
              metadata = self.get_sample_metadata(an_analysis)
-            if len(metadata) > 1:
+            if len(metadata) == 0:
+                errmsg = 'No metadata found for {0}'
+                LOGGER.error(errmsg.format(str(an_analysis),))
+                continue
+            elif len(metadata) > 1:
                  errmsg = 'Confused there are more than one samples for %s'
-                LOGGER.debug(errmsg % (str(an_analysis,)))
+                LOGGER.debug(errmsg % (str(an_analysis),))
              metadata = metadata[0]
              metadata['raw'] = self.get_raw_files(an_analysis)
              metadata['supplimental'] = self.get_sample_files(an_analysis)
diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py

index 6dd630aeda90fe5ad08a96da8be610f628642b04..18fa3b2bc9f487b63915ca02a3c060adbfeedf3b 100644 (file)
--- a/htsworkflow/submission/submission.py
+++ b/htsworkflow/submission/submission.py
@@ -83,7 +83,7 @@ class Submission(object):
                                                      rdfNS['type'])
          if file_classification is None:
              errmsg = 'Could not find class for {0}'
-            logger.warning(errmsg.format(str(file_type)))
+            LOGGER.warning(errmsg.format(str(file_type)))
              return
  
          self.model.add_statement(
diff --git a/htsworkflow/templates/geo_samples.sparql b/htsworkflow/templates/geo_samples.sparql

index 850d99a944ee51009675a381aa5c8a023d521a9d..b4d4b0bc8102db05b84f16472b4508a82db48afa 100644 (file)
--- a/htsworkflow/templates/geo_samples.sparql
+++ b/htsworkflow/templates/geo_samples.sparql
@@ -7,18 +7,22 @@ PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
  
  select distinct ?name ?cell ?antibody ?sex ?control ?strain ?controlId ?library_id ?treatment ?protocol ?readType ?insertLength ?replicate, ?mapAlgorithm ?species_name ?taxon_id ?extractMolecule ?growthProtocol ?extractProtocol ?dataProtocol ?experiment_type ?library_selection ?library_source
  WHERE {
-  <{{submission}}> a submissionOntology:submission .
+  <{{submission}}> a submissionOntology:submission ;
+                   submissionOntology:library ?library ;
+                   submissionOntology:name ?name .
  
    OPTIONAL { <{{submission}}> ucscDaf:control ?control }
    OPTIONAL { <{{submission}}> ucscDaf:controlId ?controlId }
    OPTIONAL { ?library libraryOntology:antibody ?antibody }
    OPTIONAL { ?library libraryOntology:cell_line ?cell .
-             ?cell_line cells:cell ?cell ;
-                        cells:documents ?growthProtocol . }
+             OPTIONAL { ?cell_line cells:cell ?cell ;
+                                   cells:documents ?growthProtocol . }}
    OPTIONAL { ?library ucscDaf:sex ?sex }
    OPTIONAL { ?library libraryOntology:library_id ?library_id }
    OPTIONAL { ?library libraryOntology:replicate ?replicate }
-  OPTIONAL { ?library libraryOntology:species ?species_name }
+  OPTIONAL { ?library libraryOntology:species ?species_name .
+             ?species libraryOntology:species ?species_name ;
+                      libraryOntology:taxon_id ?taxon_id . }
    OPTIONAL { ?library libraryOntology:condition_term ?treatment }
    OPTIONAL { ?library libraryOntology:experiment_type ?experiment_type }
    OPTIONAL { ?library libraryOntology:librarySelection ?library_selection }
@@ -32,8 +36,6 @@ WHERE {
    OPTIONAL { ?library libraryOntology:insert_size ?insertLength }
    OPTIONAL { ?library ucscDaf:mapAlgorithm ?mapAlgorithm }
  
-  <{{submission}}> submissionOntology:library ?library ;
-                   submissionOntology:name ?name .
    ?species libraryOntology:species ?species_name ;
             libraryOntology:taxon_id ?taxon_id .
  
diff --git a/htsworkflow/util/rdfhelp.py b/htsworkflow/util/rdfhelp.py

index fda8772f858601a0f488c61248da3d39d67c7be3..93b7ada645e876834319236359b3121c73f94c74 100644 (file)
--- a/htsworkflow/util/rdfhelp.py
+++ b/htsworkflow/util/rdfhelp.py
@@ -258,13 +258,14 @@ def load_into_model(model, parser_name, path, ns=None):
      if len(url_parts[0]) == 0 or url_parts[0] == 'file':
          url_parts[0] = 'file'
          url_parts[2] = os.path.abspath(url_parts[2])
-        if parser_name is None or parser_name == 'guess':
-            parser_name = guess_parser_by_extension(path)
+    if parser_name is None or parser_name == 'guess':
+        parser_name = guess_parser_by_extension(path)
      url = urlunparse(url_parts)
      logger.info("Opening {0} with parser {1}".format(url, parser_name))
  
      rdf_parser = RDF.Parser(name=parser_name)
  
+    statements = []
      retries = 3
      while retries > 0:
          try:
@@ -273,7 +274,7 @@ def load_into_model(model, parser_name, path, ns=None):
              retries = 0
          except RDF.RedlandError, e:
              errmsg = "RDF.RedlandError: {0} {1} tries remaining"
-            logger.error(errmsg.format(str(e), tries))
+            logger.error(errmsg.format(str(e), retries))
  
      for s in statements:
          conditionally_add_statement(model, s, ns)
@@ -384,16 +385,16 @@ def guess_parser(content_type, pathname):
          return 'turtle'
      elif content_type in ('text/html',):
          return 'rdfa'
-    elif content_type is None:
+    elif content_type is None or content_type in ('text/plain',):
          return guess_parser_by_extension(pathname)
  
  def guess_parser_by_extension(pathname):
      _, ext = os.path.splitext(pathname)
      if ext in ('.xml', '.rdf'):
          return 'rdfxml'
-    elif ext in ('.html'):
+    elif ext in ('.html',):
          return 'rdfa'
-    elif ext in ('.turtle'):
+    elif ext in ('.turtle',):
          return 'turtle'
      return 'guess'
  
diff --git a/htsworkflow/util/test/test_rdfhelp.py b/htsworkflow/util/test/test_rdfhelp.py

index 9a31ca90e382370b0b5a838cf8fd61fd8b96926a..948bcf407cf976eea74c44b0ff095b475f02770e 100644 (file)
--- a/htsworkflow/util/test/test_rdfhelp.py
+++ b/htsworkflow/util/test/test_rdfhelp.py
@@ -207,7 +207,8 @@ _:a owl:imports "{loc}extra.turtle" .
                  ('/a/b/c.rdf', 'rdfxml'),
                  ('/a/b/c.xml', 'rdfxml'),
                  ('/a/b/c.html', 'rdfa'),
-                ('/a/b/c.turtle', 'turtle')]
+                ('/a/b/c.turtle', 'turtle'),
+                ('http://foo.bar/bleem.turtle', 'turtle')]
              for path, parser in DATA:
                  self.assertEqual(guess_parser_by_extension(path), parser)
                  self.assertEqual(guess_parser(None, path), parser)
@@ -215,7 +216,10 @@ _:a owl:imports "{loc}extra.turtle" .
              DATA = [
                  ('application/rdf+xml', 'http://a.org/b/c', 'rdfxml'),
                  ('application/x-turtle', 'http://a.org/b/c', 'turtle'),
-                ('text/html', 'http://a.org/b/c', 'rdfa')
+                ('text/html', 'http://a.org/b/c', 'rdfa'),
+                ('text/html', 'http://a.org/b/c.html', 'rdfa'),
+                ('text/plain', 'http://a.org/b/c.turtle', 'turtle'),
+                ('text/plain', 'http://a.org/b/c', 'guess')
              ]
              for contenttype, url, parser in DATA:
                  self.assertEqual(guess_parser(contenttype, url), parser)
author	Diane Trout <diane@caltech.edu>
	Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
committer	Diane Trout <diane@caltech.edu>
	Tue, 18 Sep 2012 17:55:36 +0000 (10:55 -0700)
encode_submission/geo_gather.py		patch \| blob \| history
htsworkflow/submission/daf.py		patch \| blob \| history
htsworkflow/submission/geo.py		patch \| blob \| history
htsworkflow/submission/submission.py		patch \| blob \| history
htsworkflow/templates/geo_samples.sparql		patch \| blob \| history
htsworkflow/util/rdfhelp.py		patch \| blob \| history
htsworkflow/util/test/test_rdfhelp.py		patch \| blob \| history