metadata['supplimental'] = self.get_sample_files(
an_analysis,
geoSoftNS['supplemental'])
+ metadata['run'] = self.get_run_details(an_analysis)
samples.append(metadata)
soft_template = loader.get_template('geo_submission.soft')
results = self.execute_query(query_template, context)
for r in results:
-
r['dataProtocol'] = str(r['dataProtocol']).replace('\n', ' ')
+
return results
def get_sample_files(self, analysis_node, file_class):
return self.execute_query(query_template, context)
+ def get_run_details(self, analysis_node):
+ """Get information about runs
+ """
+ query_template = loader.get_template('geo_run_details.sparql')
+
+ context = Context({
+ 'submission': str(analysis_node.uri),
+ })
+
+ return self.execute_query(query_template, context)
+
def query_to_soft_dictionary(self, results, heading):
attributes = []
for r in results:
RDF.Statement(fileNode, dafTermOntology['md5sum'], md5))
def _add_library_details_to_model(self, libNode):
+ # attributes that can have multiple values
+ set_attributes = set((libraryOntology['has_lane'],
+ libraryOntology['has_mappings'],
+ dafTermOntology['has_file']))
parser = RDF.Parser(name='rdfa')
new_statements = parser.parse_as_stream(libNode.uri)
+ toadd = []
for s in new_statements:
+ # always add "collections"
+ if s.predicate in set_attributes:
+ toadd.append(s)
+ continue
# don't override things we already have in the model
targets = list(self.model.get_targets(s.subject, s.predicate))
if len(targets) == 0:
- self.model.append(s)
+ toadd.append(s)
+
+ for s in toadd:
+ self.model.append(s)
+
+ self._add_lane_details(libNode)
+
+ def _add_lane_details(self, libNode):
+ """Import lane details
+ """
+ query = RDF.Statement(libNode, libraryOntology['has_lane'], None)
+ lanes = []
+ for lane_stmt in self.model.find_statements(query):
+ lanes.append(lane_stmt.object)
+
+ parser = RDF.Parser(name='rdfa')
+ for lane in lanes:
+ LOGGER.debug("Importing %s" % (lane.uri,))
+ try:
+ parser.parse_into_model(self.model, lane.uri)
+ except RDF.RedlandError, e:
+ LOGGER.error("Error accessing %s" % (lane.uri,))
+ raise e
def find_best_match(self, filename):
--- /dev/null
+PREFIX libraryOntology: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+PREFIX submissionOntology: <http://jumpgate.caltech.edu/wiki/UcscSubmissionOntology#>
+PREFIX ucscDaf: <http://jumpgate.caltech.edu/wiki/UcscDaf#>
+PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
+PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
+PREFIX cells: <http://encodewiki.ucsc.edu/EncodeDCC/index.php/Cell_lines#>
+
+# right now we're just grabbing the sequencer model
+# it might make sense to report each of the flowcell/image software.
+#select distinct ?flowcell ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
+select distinct ?sequencer_model
+where {
+ <{{submission}}> submissionOntology:library ?library ;
+ a submissionOntology:submission .
+
+ ?library libraryOntology:library_id ?library_id ;
+ libraryOntology:has_lane ?lane ;
+ a libraryOntology:library .
+ OPTIONAL { ?flowcell libraryOntology:has_lane ?lane .
+ ?flowcell libraryOntology:image_software ?image_software ;
+ libraryOntology:image_version ?image_version ;
+ libraryOntology:basecall_software ?basecall_software ;
+ libraryOntology:basecall_version ?basecall_version ;
+ libraryOntology:sequenced_by ?sequencer .
+ ?sequencer libraryOntology:sequencer_model ?sequencer_model
+ }
+}
^SAMPLE={{row.name}}
!Sample_type=SRA
!Sample_title={{row.name}}
-!Sample_series_id = {{ series_id }}
-!Sample_instrument_model = Illumina Genome Analyzer
-!Sample_instrument_model = Illumina Genome Analyzer II
-!Sample_instrument_model = Illumina Genome Analyzer IIx
-!Sample_instrument_model = Illumina HiSeq 2000
+!Sample_series_id = {{ series_id }}{% for run in row.run %}
+!Sample_instrument_model = {{ run.sequencer_model }}{% endfor %}
!Sample_channel_count = 1
!Sample_organism_ch1 = {{ row.species_name }}
!Sample_taxid_ch1 = {{ row.taxon_id }}