This might actually generate soft file with raw & supplemental data.
[htsworkflow.git] / htsworkflow / submission / submission.py
index e4ce90c73b073287913ec2f76f5f1d5f0d9bd887..c944b7612d127266ce62c973cda4a54356b0129d 100644 (file)
@@ -27,13 +27,13 @@ from htsworkflow.submission.daf import \
 LOGGER = logging.getLogger(__name__)
 
 class Submission(object):
-    def __init__(self, name, model):
+    def __init__(self, name, model, host):
         self.name = name
         self.model = model
 
         self.submissionSet = get_submission_uri(self.name)
         self.submissionSetNS = RDF.NS(str(self.submissionSet) + '#')
-        self.libraryNS = RDF.NS('http://jumpgate.caltech.edu/library/')
+        self.libraryNS = RDF.NS('{0}/library/'.format(host))
 
         self.__view_map = None
 
@@ -57,7 +57,8 @@ class Submission(object):
 
         submission_files = os.listdir(analysis_dir)
         for filename in submission_files:
-            self.construct_file_attributes(analysis_dir, libNode, filename)
+            pathname = os.path.abspath(os.path.join(analysis_dir, filename))
+            self.construct_file_attributes(analysis_dir, libNode, pathname)
 
     def construct_file_attributes(self, analysis_dir, libNode, pathname):
         """Looking for the best extension
@@ -83,7 +84,7 @@ class Submission(object):
                                                     rdfNS['type'])
         if file_classification is None:
             errmsg = 'Could not find class for {0}'
-            logger.warning(errmsg.format(str(file_type)))
+            LOGGER.warning(errmsg.format(str(file_type)))
             return
 
         self.model.add_statement(
@@ -113,7 +114,7 @@ class Submission(object):
                           an_analysis))
 
         # add file specific information
-        fileNode = self.link_file_to_classes(filename,
+        fileNode = self.link_file_to_classes(pathname,
                                              an_analysis,
                                              an_analysis_uri,
                                              analysis_dir)
@@ -124,9 +125,10 @@ class Submission(object):
                           file_type))
         LOGGER.debug("Done.")
 
-    def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
+    def link_file_to_classes(self, pathname, submissionNode, submission_uri, analysis_dir):
         # add file specific information
-        fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
+        path, filename = os.path.split(pathname)
+        fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(pathname)))
         self.model.add_statement(
             RDF.Statement(submissionNode,
                           dafTermOntology['has_file'],
@@ -149,13 +151,44 @@ class Submission(object):
                 RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
 
     def _add_library_details_to_model(self, libNode):
+        # attributes that can have multiple values
+        set_attributes = set((libraryOntology['has_lane'],
+                              libraryOntology['has_mappings'],
+                              dafTermOntology['has_file']))
         parser = RDF.Parser(name='rdfa')
         new_statements = parser.parse_as_stream(libNode.uri)
+        toadd = []
         for s in new_statements:
+            # always add "collections"
+            if s.predicate in set_attributes:
+                toadd.append(s)
+                continue
             # don't override things we already have in the model
             targets = list(self.model.get_targets(s.subject, s.predicate))
             if len(targets) == 0:
-                self.model.append(s)
+                toadd.append(s)
+
+        for s in toadd:
+            self.model.append(s)
+
+        self._add_lane_details(libNode)
+
+    def _add_lane_details(self, libNode):
+        """Import lane details
+        """
+        query = RDF.Statement(libNode, libraryOntology['has_lane'], None)
+        lanes = []
+        for lane_stmt in self.model.find_statements(query):
+            lanes.append(lane_stmt.object)
+
+        parser = RDF.Parser(name='rdfa')
+        for lane in lanes:
+            LOGGER.debug("Importing %s" % (lane.uri,))
+            try:
+                parser.parse_into_model(self.model, lane.uri)
+            except RDF.RedlandError, e:
+                LOGGER.error("Error accessing %s" % (lane.uri,))
+                raise e
 
 
     def find_best_match(self, filename):