Fix namespace issues in encode_find after the conversion

author Diane Trout <diane@caltech.edu>

Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)

committer Diane Trout <diane@caltech.edu>

Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)
author Diane Trout <diane@caltech.edu>
Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)
committer Diane Trout <diane@caltech.edu>
Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)
diff --git a/extra/ucsc_encode_submission/dt-overrides.turtle b/extra/ucsc_encode_submission/dt-overrides.turtle

index fde9addf3ade3503fa71e93359b934c195d896ae..30f372c114954463e3fbc1fa950d0ea089e6aa25 100644 (file)
--- a/extra/ucsc_encode_submission/dt-overrides.turtle
+++ b/extra/ucsc_encode_submission/dt-overrides.turtle
@@ -5,7 +5,7 @@
  ##
  
  @base <file:///home/diane/proj/solexa/htsworkflow/extra/ucsc_encode_submission/no-lib.sparql> .
-@prefix encodeSubmit:<http://jumpgate.caltech.edu/wiki/UCSCSubmissionOntology#> .
+@prefix encodeSubmit:<http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#> .
  
  # woldlab-hepg2-rnaseq-2009dec
  <http://encodesubmit.ucsc.edu/pipeline/show/805>
diff --git a/extra/ucsc_encode_submission/encode_find.py b/extra/ucsc_encode_submission/encode_find.py

index 2a1890afd4a44ba426f7befe895727b4f4309139..9955e296a8451316424d218daf3036f81e293e2d 100644 (file)
--- a/extra/ucsc_encode_submission/encode_find.py
+++ b/extra/ucsc_encode_submission/encode_find.py
@@ -22,7 +22,7 @@ from htsworkflow.util.rdfhelp import \
       get_model, \
       get_serializer, \
       sparql_query, \
-     submitOntology, \
+     submissionOntology, \
       libraryOntology, \
       load_into_model, \
       rdfNS, \
@@ -34,7 +34,7 @@ libraryNS = RDF.NS("http://jumpgate.caltech.edu/library/")
  
  
  from htsworkflow.submission.ucsc import submission_view_url, UCSCEncodePipeline
-download_ddf = urlparse.urljoin(UCSCEncodePipeline, "download_ddf#", allow_fragments=True)
+download_ddf = UCSCEncodePipeline+"download_ddf#"
  ddfNS = RDF.NS(download_ddf)
                 
  DBDIR = os.path.expanduser("~diane/proj/submission")
@@ -61,7 +61,7 @@ def main(cmdline=None):
      model = get_model(opts.load_model, DBDIR)
      
      if opts.load_rdf is not None:
-        ns_uri = submitOntology[''].uri
+        ns_uri = submissionOntology[''].uri
          load_into_model(model, opts.rdf_parser_name, opts.load_rdf, ns_uri)
          
      if opts.update:
@@ -127,9 +127,9 @@ def load_my_submissions(model, cookie=None):
      # first record is header
      tr = tr.findNext()
      TypeN = rdfsNS['type']
-    NameN = submitOntology['name']
-    SpeciesN = submitOntology['species']
-    LibraryURN = submitOntology['library_urn']
+    NameN = submissionOntology['name']
+    SpeciesN = submissionOntology['species']
+    LibraryURN = submissionOntology['library_urn']
  
      while tr is not None:
          td = tr.findAll('td')
@@ -137,7 +137,7 @@ def load_my_submissions(model, cookie=None):
              subUrnText = td[0].contents[0].contents[0].encode(CHARSET)
              subUrn = RDF.Uri(submission_view_url(subUrnText))
  
-            add_stmt(model, subUrn, TypeN, submitOntology['Submission'])
+            add_stmt(model, subUrn, TypeN, submissionOntology['Submission'])
                  
              name = get_contents(td[4])
              add_stmt(model, subUrn, NameN, name)
@@ -170,10 +170,10 @@ def load_my_submissions(model, cookie=None):
  def add_submission_to_library_urn(model, submissionUrn, predicate, library_id):
      """Add a link from a UCSC submission to woldlab library if needed
      """
-    libraryUrn = libraryNS[library_id]
+    libraryUrn = libraryNS[library_id+'/']
      query = RDF.Statement(submissionUrn, predicate, libraryUrn)
      if not model.contains_statement(query):
-        link = RDF.Statement(submissionUrn, predicate, libraryNS[library_id])
+        link = RDF.Statement(submissionUrn, predicate, libraryUrn)
          logger.info("Adding Sub -> Lib link: {0}".format(link))
          model.add_statement(link)
      else:
@@ -190,7 +190,7 @@ WHERE {{
    ?subid submissionOntology:name ?name
    OPTIONAL {{ ?subid submissionOntology:library_urn ?libid }}
    FILTER  (!bound(?libid))
-}}""".format(submissionOntology=submitOntology[''].uri)
+}}""".format(submissionOntology=submissionOntology[''].uri)
  )    
  
      results = missing_lib_query.execute(model)
@@ -211,7 +211,7 @@ def add_submission_creation_date(model, subUrn, cookie):
      creation_dates = list(model.find_statements(query))
      if len(creation_dates) == 0:
          logger.info("Getting creation date for: {0}".format(str(subUrn)))
-        soup = get_url_as_soup(str(subUrn.uri), 'GET', cookie)
+        soup = get_url_as_soup(str(subUrn), 'GET', cookie)
          created_label = soup.find(text="Created: ")
          if created_label:
              created_date = get_date_contents(created_label.next)
@@ -222,9 +222,9 @@ def add_submission_creation_date(model, subUrn, cookie):
          logger.debug("Found creation date for: {0}".format(str(subUrn)))
  
  def update_submission_detail(model, subUrn, status, recent_update, cookie):
-    HasStatusN = submitOntology['has_status']
-    StatusN = submitOntology['status']
-    LastModifyN = submitOntology['last_modify_date']
+    HasStatusN = submissionOntology['has_status']
+    StatusN = submissionOntology['status']
+    LastModifyN = submissionOntology['last_modify_date']
  
      status_nodes_query = RDF.Statement(subUrn, HasStatusN, None)
      status_nodes = list(model.find_statements(status_nodes_query))
@@ -258,7 +258,7 @@ def update_ddf(model, subUrn, statusNode, cookie):
      download_ddf_url = str(subUrn).replace('show', 'download_ddf')
      ddfUrn = RDF.Uri(download_ddf_url)
      
-    status_is_ddf = RDF.Statement(statusNode, TypeN, ddfNS['ddf'])
+    status_is_ddf = RDF.Statement(statusNode, TypeN, ddfNS[''])
      if not model.contains_statement(status_is_ddf):
          logging.info('Adding ddf to {0}, {1}'.format(subUrn, statusNode))
          ddf_text = get_url_as_text(download_ddf_url, 'GET', cookie)
@@ -288,7 +288,7 @@ def add_ddf_statements(model, statusNode, ddf_string):
  
          for f in files:
              fileNode = RDF.Node()
-            add_stmt(model, statusNode, submitOntology['has_file'], fileNode)
+            add_stmt(model, statusNode, submissionOntology['has_file'], fileNode)
              add_stmt(model, fileNode, rdfsNS['type'], ddfNS['file'])
              add_stmt(model, fileNode, ddfNS['filename'], f)
  
@@ -355,14 +355,6 @@ def get_date_contents(element):
          return None
  
          
-def load_into_model(model, parser_name, filename):
-    if not os.path.exists(filename):
-        raise IOError("Can't find {0}".format(filename))
-    
-    data = open(filename, 'r').read()
-    rdf_parser = RDF.Parser(name=parser_name)
-    rdf_parser.parse_string_into_model(model, data, ns_uri)
-
  def add_stmt(model, subject, predicate, object):
      """Convienence create RDF Statement and add to a model
      """
@@ -370,6 +362,7 @@ def add_stmt(model, subject, predicate, object):
          RDF.Statement(subject, predicate, object)
      )
  
+
  def login(cookie=None):
      """Login if we don't have a cookie
      """
diff --git a/extra/ucsc_encode_submission/find-lib-by-cell.sparql b/extra/ucsc_encode_submission/find-lib-by-cell.sparql

index 1342dac7278d283c37148c05a1c5c5a5143d85c3..ca1615ed9d1b02a56f3d6487870398c236cd9652 100644 (file)
--- a/extra/ucsc_encode_submission/find-lib-by-cell.sparql
+++ b/extra/ucsc_encode_submission/find-lib-by-cell.sparql
@@ -1,14 +1,14 @@
  # Produce list of submissions associated with a cell/replicate
  
  PREFIX rdfs:<http://www.w3.org/2000/01/rdf-schema#>
-PREFIX encodeSubmit:<http://jumpgate.caltech.edu/wiki/UCSCSubmissionOntology#>
+PREFIX ucscSubmission:<http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
  PREFIX libraryOntology:<http://jumpgate.caltech.edu/wiki/LibraryOntology#>
  
  SELECT distinct ?liburn ?cell ?replicate ?subid
  WHERE {
-    ?subid encodeSubmit:library_urn ?liburn ;
-           encodeSubmit:name ?name .
+    ?subid ucscSubmission:library_urn ?liburn ;
+           ucscSubmission:name ?name .
      ?liburn libraryOntology:cell_line ?cell ;
-            libraryOntology:replicate ?replicate
+            libraryOntology:replicate ?replicate .
  }
  ORDER BY ?cell ?replicate ?liburn
author	Diane Trout <diane@caltech.edu>
	Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)
committer	Diane Trout <diane@caltech.edu>
	Thu, 7 Jul 2011 00:42:56 +0000 (17:42 -0700)
extra/ucsc_encode_submission/dt-overrides.turtle		patch \| blob \| history
extra/ucsc_encode_submission/encode_find.py		patch \| blob \| history
extra/ucsc_encode_submission/find-lib-by-cell.sparql		patch \| blob \| history