This might actually generate soft file with raw & supplemental data.

author Diane Trout <diane@caltech.edu>

Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)

committer Diane Trout <diane@caltech.edu>

Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)
author Diane Trout <diane@caltech.edu>
Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)
committer Diane Trout <diane@caltech.edu>
Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)
diff --git a/encode_submission/geo_gather.py b/encode_submission/geo_gather.py

index 9db286d37b6326e1b576e1045a3c04b5e92beabe..f9b07ab775a4491226db9c9b97215ea82f7cc54e 100644 (file)
--- a/encode_submission/geo_gather.py
+++ b/encode_submission/geo_gather.py
@@ -58,9 +58,7 @@ def main(cmdline=None):
      model = get_model(opts.model, opts.db_path)
      mapper = None
      if opts.name:
      model = get_model(opts.model, opts.db_path)
      mapper = None
      if opts.name:
-        mapper = GEOSubmission(opts.name,  model)
-        if opts.library_url is not None:
-            mapper.library_url = opts.library_url
+        mapper = GEOSubmission(opts.name,  model, host=opts.host)
          submission_uri = get_submission_uri(opts.name)
  
  
          submission_uri = get_submission_uri(opts.name)
  
  
diff --git a/htsworkflow/submission/condorfastq.py b/htsworkflow/submission/condorfastq.py

index 01fe6c5a19274869b22e41821011135ee8ccb6ae..f78a964c978da660d88f5ac8f263cc7f54f15149 100644 (file)
--- a/htsworkflow/submission/condorfastq.py
+++ b/htsworkflow/submission/condorfastq.py
@@ -195,7 +195,6 @@ WHERE {
              imported = False
              a_lane = self.model.get_target(r['flowcell'],
                                             libraryOntology['has_lane'])
              imported = False
              a_lane = self.model.get_target(r['flowcell'],
                                             libraryOntology['has_lane'])
-            print a_lane
              if a_lane is None:
                  imported = True
                  # we lack information about which lanes were on this flowcell
              if a_lane is None:
                  imported = True
                  # we lack information about which lanes were on this flowcell
diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py

index 85947158390f699afa8a76cd719a0b1fadf26ebf..097bea1083af8c1ffc39b3edea9a6e94e0a63fcd 100644 (file)
--- a/htsworkflow/submission/geo.py
+++ b/htsworkflow/submission/geo.py
@@ -17,8 +17,8 @@ from django.template import Context, loader
  LOGGER = logging.getLogger(__name__)
  
  class GEOSubmission(Submission):
  LOGGER = logging.getLogger(__name__)
  
  class GEOSubmission(Submission):
-    def __init__(self, name, model):
-        super(GEOSubmission, self).__init__(name, model)
+    def __init__(self, name, model, host):
+        super(GEOSubmission, self).__init__(name, model, host)
  
      def make_soft(self, result_map):
          samples = []
  
      def make_soft(self, result_map):
          samples = []
@@ -36,7 +36,7 @@ class GEOSubmission(Submission):
                  LOGGER.error(errmsg.format(str(an_analysis),))
                  continue
              elif len(metadata) > 1:
                  LOGGER.error(errmsg.format(str(an_analysis),))
                  continue
              elif len(metadata) > 1:
-                errmsg = 'Confused there are more than one samples for %s'
+                errmsg = 'Confused there are more than one sample for %s'
                  LOGGER.debug(errmsg % (str(an_analysis),))
              metadata = metadata[0]
              metadata['raw'] = self.get_raw_files(an_analysis)
                  LOGGER.debug(errmsg % (str(an_analysis),))
              metadata = metadata[0]
              metadata['raw'] = self.get_raw_files(an_analysis)
diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py

index 2b04ff43a05ce6e70d6ccb7b20f7f17d54165699..c944b7612d127266ce62c973cda4a54356b0129d 100644 (file)
--- a/htsworkflow/submission/submission.py
+++ b/htsworkflow/submission/submission.py
@@ -27,13 +27,13 @@ from htsworkflow.submission.daf import \
  LOGGER = logging.getLogger(__name__)
  
  class Submission(object):
  LOGGER = logging.getLogger(__name__)
  
  class Submission(object):
-    def __init__(self, name, model):
+    def __init__(self, name, model, host):
          self.name = name
          self.model = model
  
          self.submissionSet = get_submission_uri(self.name)
          self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
          self.name = name
          self.model = model
  
          self.submissionSet = get_submission_uri(self.name)
          self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
-        self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
+        self.libraryNS = RDF.NS('{0}/library/'.format(host))
  
          self.__view_map = None
  
  
          self.__view_map = None
  
@@ -57,7 +57,8 @@ class Submission(object):
  
          submission_files = os.listdir(analysis_dir)
          for filename in submission_files:
  
          submission_files = os.listdir(analysis_dir)
          for filename in submission_files:
-            self.construct_file_attributes(analysis_dir, libNode, filename)
+            pathname = os.path.abspath(os.path.join(analysis_dir, filename))
+            self.construct_file_attributes(analysis_dir, libNode, pathname)
  
      def construct_file_attributes(self, analysis_dir, libNode, pathname):
          """Looking for the best extension
  
      def construct_file_attributes(self, analysis_dir, libNode, pathname):
          """Looking for the best extension
@@ -113,7 +114,7 @@ class Submission(object):
                            an_analysis))
  
          # add file specific information
                            an_analysis))
  
          # add file specific information
-        fileNode = self.link_file_to_classes(filename,
+        fileNode = self.link_file_to_classes(pathname,
                                               an_analysis,
                                               an_analysis_uri,
                                               analysis_dir)
                                               an_analysis,
                                               an_analysis_uri,
                                               analysis_dir)
@@ -124,9 +125,10 @@ class Submission(object):
                            file_type))
          LOGGER.debug("Done.")
  
                            file_type))
          LOGGER.debug("Done.")
  
-    def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
+    def link_file_to_classes(self, pathname, submissionNode, submission_uri, analysis_dir):
          # add file specific information
          # add file specific information
-        fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename)))
+        path, filename = os.path.split(pathname)
+        fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
          self.model.add_statement(
              RDF.Statement(submissionNode,
                            dafTermOntology['has_file'],
          self.model.add_statement(
              RDF.Statement(submissionNode,
                            dafTermOntology['has_file'],
diff --git a/htsworkflow/templates/geo_fastqs.sparql b/htsworkflow/templates/geo_fastqs.sparql

index de9097ba9962849dde661bbd9ebd38521573f781..428cef7933fb196b1b69760125ecead4b511fdcb 100644 (file)
--- a/htsworkflow/templates/geo_fastqs.sparql
+++ b/htsworkflow/templates/geo_fastqs.sparql
@@ -3,21 +3,23 @@ PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntol
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
  PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
  PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
  
  select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
  WHERE {
  
  select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
  WHERE {
-  <{{submission}}> submissionOntology:library ?library ;
+  <{{submission}}> ucscDaf:has_file ?file ;
+                   submissionOntology:library ?library ;
                     a submissionOntology:submission .
  
    ?file ucscDaf:filename ?filename ;
          ucscDaf:md5sum ?md5sum ;
                     a submissionOntology:submission .
  
    ?file ucscDaf:filename ?filename ;
          ucscDaf:md5sum ?md5sum ;
-        libraryOntology:library ?library ;
+        dc:source ?source ;
          a ?file_type .
    ?file_type a <{{file_class}}> ;
               geoSoft:fileTypeLabel ?file_type_label .
  
          a ?file_type .
    ?file_type a <{{file_class}}> ;
               geoSoft:fileTypeLabel ?file_type_label .
  
-  ?library libraryOntology:has_lane ?lane .
-  ?lane libraryOntology:flowcell ?flowcell .
+  ?source libraryOntology:flowcell ?flowcell .
+
    ?flowcell libraryOntology:flowcell_id ?flowcell_id ;
              libraryOntology:read_length ?read_length ;
              libraryOntology:flowcell_type ?flowcell_type ;
    ?flowcell libraryOntology:flowcell_id ?flowcell_id ;
              libraryOntology:read_length ?read_length ;
              libraryOntology:flowcell_type ?flowcell_type ;
diff --git a/htsworkflow/templates/geo_files.sparql b/htsworkflow/templates/geo_files.sparql

index e3fcb9d8d4028c57f752e254019227f6e61b85bf..6fd7cac6ad92a92517b38240c0cf1b7ccbc57d70 100644 (file)
--- a/htsworkflow/templates/geo_files.sparql
+++ b/htsworkflow/templates/geo_files.sparql
@@ -3,20 +3,22 @@ PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntol
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
  PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
  PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
  PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
  PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX dc: <http://purl.org/dc/elements/1.1/>
  
  select distinct ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
  WHERE {
    <{{submission}}> ucscDaf:has_file ?file ;
  
  select distinct ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
  WHERE {
    <{{submission}}> ucscDaf:has_file ?file ;
+                   submissionOntology:library ?library ;
                     a submissionOntology:submission .
  
    ?file ucscDaf:filename ?filename ;
          ucscDaf:md5sum ?md5sum ;
                     a submissionOntology:submission .
  
    ?file ucscDaf:filename ?filename ;
          ucscDaf:md5sum ?md5sum ;
-        libraryOntology:has_lane ?lane ;
          a ?file_type .
    ?file_type a <{{file_class}}> ;
               geoSoft:fileTypeLabel ?file_type_label .
  
          a ?file_type .
    ?file_type a <{{file_class}}> ;
               geoSoft:fileTypeLabel ?file_type_label .
  
-  OPTIONAL { ?lane libraryOntology:flowcell ?flowcell .
+  OPTIONAL { ?file dc:source ?source_file .
+             ?source_file libraryOntology:flowcell ?flowcell .
               ?flowcell libraryOntology:flowcell_id ?flowcell_id ;
                         libraryOntology:read_length ?read_length ;
                         libraryOntology:flowcell_type ?flowcell_type ;
               ?flowcell libraryOntology:flowcell_id ?flowcell_id ;
                         libraryOntology:read_length ?read_length ;
                         libraryOntology:flowcell_type ?flowcell_type ;
author	Diane Trout <diane@caltech.edu>
	Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)
committer	Diane Trout <diane@caltech.edu>
	Tue, 25 Sep 2012 23:18:42 +0000 (16:18 -0700)
encode_submission/geo_gather.py		patch \| blob \| history
htsworkflow/submission/condorfastq.py		patch \| blob \| history
htsworkflow/submission/geo.py		patch \| blob \| history
htsworkflow/submission/submission.py		patch \| blob \| history
htsworkflow/templates/geo_fastqs.sparql		patch \| blob \| history
htsworkflow/templates/geo_files.sparql		patch \| blob \| history