Properly constructing the geo soft file really needed multiple sparql queries.
[htsworkflow.git] / htsworkflow / submission / geo.py
index 737b1bb353458ceb0d961adf551a680cc1f9399f..8ff349fe9ae7444c5cc82f20c15e8458d6cc0599 100644 (file)
@@ -1,4 +1,5 @@
 import logging
+import os
 
 import RDF
 
@@ -6,6 +7,8 @@ from htsworkflow.submission.submission import Submission
 
 from htsworkflow.util.rdfhelp import \
      fromTypedNode, \
+     geoSoftNS, \
+     simplifyUri, \
      submissionOntology
 
 from django.conf import settings
@@ -19,13 +22,33 @@ class GEOSubmission(Submission):
 
     def make_soft(self, result_map):
         samples = []
+        platform = self.get_platform_metadata()
+        platform_attribs = dict(platform)
+        platform_id = platform_attribs['^platform']
+        series = self.get_series_metadata()
+        series_attribs = dict(series)
+        series_id = series_attribs['^series']
         for lib_id, result_dir in result_map.items():
             an_analysis = self.get_submission_node(result_dir)
-            samples.append(self.get_sample_metadata(an_analysis))
+            metadata = self.get_sample_metadata(an_analysis)
+            if len(metadata) > 1:
+                errmsg = 'Confused there are more than one samples for %s'
+                LOGGER.debug(errmsg % (str(an_analysis,)))
+            metadata = metadata[0]
+            metadata['raw'] = self.get_sample_files(an_analysis,
+                                                    geoSoftNS['raw'])
+            metadata['supplimental'] = self.get_sample_files(
+                an_analysis,
+                geoSoftNS['supplemental'])
+            samples.append(metadata)
 
         soft_template = loader.get_template('geo_submission.soft')
         context = Context({
-            'samples': samples
+            'platform': platform,
+            'series': series,
+            'samples': samples,
+            'platform_id': platform_id,
+            'series_id': series_id,
         })
         print str(soft_template.render(context))
 
@@ -39,19 +62,69 @@ class GEOSubmission(Submission):
         else:
             return True
 
+    def get_platform_metadata(self):
+        """Gather information for filling out sample section of a SOFT file
+        """
+        query_template = loader.get_template('geo_platform.sparql')
+        submission = str(self.submissionSetNS[''].uri)
+        context = Context({
+            'submission': submission,
+            })
+
+        results = self.execute_query(query_template, context)
+        return self.query_to_soft_dictionary(results, 'platform')
+
+    def get_series_metadata(self):
+        """Gather information for filling out sample section of a SOFT file
+        """
+        query_template = loader.get_template('geo_series.sparql')
+        submission = str(self.submissionSetNS[''].uri)
+        context = Context({
+            'submission': submission,
+            })
+
+        results = self.execute_query(query_template, context)
+        return self.query_to_soft_dictionary(results, 'series')
+
     def get_sample_metadata(self, analysis_node):
         """Gather information for filling out sample section of a SOFT file
         """
-        query_template = loader.get_template('geo_submission.sparql')
+        query_template = loader.get_template('geo_samples.sparql')
 
         context = Context({
             'submission': str(analysis_node.uri),
+            'submissionSet': str(self.submissionSetNS[''].uri),
             })
 
-        formatted_query = query_template.render(context)
-        query = RDF.SPARQLQuery(str(formatted_query))
-        rdfstream = query.execute(self.model)
-        results = []
-        for r in rdfstream:
-            results.append(r)
+        results = self.execute_query(query_template, context)
+        for r in results:
+
+            r['dataProtocol'] = str(r['dataProtocol']).replace('\n', ' ')
         return results
+
+    def get_sample_files(self, analysis_node, file_class):
+        """Gather files
+        """
+        query_template = loader.get_template('geo_files.sparql')
+
+        context = Context({
+            'submission': str(analysis_node.uri),
+            'file_class': str(file_class)
+            })
+
+        return self.execute_query(query_template, context)
+
+    def query_to_soft_dictionary(self, results, heading):
+        attributes = []
+        for r in results:
+            name = simplifyUri(geoSoftNS, r['name'])
+            if name is not None:
+                if name.lower() == heading.lower():
+                    name = '^' + name
+                else:
+                    name = '!' + name
+                for v in fromTypedNode(r['value']).split(os.linesep):
+                    v = v.strip()
+                    if len(v) > 0:
+                        attributes.append((name, v))
+        return attributes