From 8cdbaeb1a2f28dc6760f18050044d0639b577f5c Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Tue, 24 Jul 2012 12:39:34 -0700 Subject: [PATCH] Get actual list of sequencers used for a library. I do have pipeline version numbers available, but with the current version of the query it would end up duplicating the sequencer model number. (there's a lot more version combinations then there are sequencers) --- htsworkflow/submission/geo.py | 14 ++++++++- htsworkflow/submission/submission.py | 33 +++++++++++++++++++- htsworkflow/templates/geo_run_details.sparql | 27 ++++++++++++++++ htsworkflow/templates/geo_submission.soft | 7 ++--- 4 files changed, 74 insertions(+), 7 deletions(-) create mode 100644 htsworkflow/templates/geo_run_details.sparql diff --git a/htsworkflow/submission/geo.py b/htsworkflow/submission/geo.py index 8ff349f..ad66422 100644 --- a/htsworkflow/submission/geo.py +++ b/htsworkflow/submission/geo.py @@ -40,6 +40,7 @@ class GEOSubmission(Submission): metadata['supplimental'] = self.get_sample_files( an_analysis, geoSoftNS['supplemental']) + metadata['run'] = self.get_run_details(an_analysis) samples.append(metadata) soft_template = loader.get_template('geo_submission.soft') @@ -98,8 +99,8 @@ class GEOSubmission(Submission): results = self.execute_query(query_template, context) for r in results: - r['dataProtocol'] = str(r['dataProtocol']).replace('\n', ' ') + return results def get_sample_files(self, analysis_node, file_class): @@ -114,6 +115,17 @@ class GEOSubmission(Submission): return self.execute_query(query_template, context) + def get_run_details(self, analysis_node): + """Get information about runs + """ + query_template = loader.get_template('geo_run_details.sparql') + + context = Context({ + 'submission': str(analysis_node.uri), + }) + + return self.execute_query(query_template, context) + def query_to_soft_dictionary(self, results, heading): attributes = [] for r in results: diff --git a/htsworkflow/submission/submission.py b/htsworkflow/submission/submission.py index e4ce90c..6dd630a 100644 --- a/htsworkflow/submission/submission.py +++ b/htsworkflow/submission/submission.py @@ -149,13 +149,44 @@ class Submission(object): RDF.Statement(fileNode, dafTermOntology['md5sum'], md5)) def _add_library_details_to_model(self, libNode): + # attributes that can have multiple values + set_attributes = set((libraryOntology['has_lane'], + libraryOntology['has_mappings'], + dafTermOntology['has_file'])) parser = RDF.Parser(name='rdfa') new_statements = parser.parse_as_stream(libNode.uri) + toadd = [] for s in new_statements: + # always add "collections" + if s.predicate in set_attributes: + toadd.append(s) + continue # don't override things we already have in the model targets = list(self.model.get_targets(s.subject, s.predicate)) if len(targets) == 0: - self.model.append(s) + toadd.append(s) + + for s in toadd: + self.model.append(s) + + self._add_lane_details(libNode) + + def _add_lane_details(self, libNode): + """Import lane details + """ + query = RDF.Statement(libNode, libraryOntology['has_lane'], None) + lanes = [] + for lane_stmt in self.model.find_statements(query): + lanes.append(lane_stmt.object) + + parser = RDF.Parser(name='rdfa') + for lane in lanes: + LOGGER.debug("Importing %s" % (lane.uri,)) + try: + parser.parse_into_model(self.model, lane.uri) + except RDF.RedlandError, e: + LOGGER.error("Error accessing %s" % (lane.uri,)) + raise e def find_best_match(self, filename): diff --git a/htsworkflow/templates/geo_run_details.sparql b/htsworkflow/templates/geo_run_details.sparql new file mode 100644 index 0000000..aa7e438 --- /dev/null +++ b/htsworkflow/templates/geo_run_details.sparql @@ -0,0 +1,27 @@ +PREFIX libraryOntology: +PREFIX submissionOntology: +PREFIX ucscDaf: +PREFIX ncbiTaxon: +PREFIX geoSoft: +PREFIX cells: + +# right now we're just grabbing the sequencer model +# it might make sense to report each of the flowcell/image software. +#select distinct ?flowcell ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model +select distinct ?sequencer_model +where { + <{{submission}}> submissionOntology:library ?library ; + a submissionOntology:submission . + + ?library libraryOntology:library_id ?library_id ; + libraryOntology:has_lane ?lane ; + a libraryOntology:library . + OPTIONAL { ?flowcell libraryOntology:has_lane ?lane . + ?flowcell libraryOntology:image_software ?image_software ; + libraryOntology:image_version ?image_version ; + libraryOntology:basecall_software ?basecall_software ; + libraryOntology:basecall_version ?basecall_version ; + libraryOntology:sequenced_by ?sequencer . + ?sequencer libraryOntology:sequencer_model ?sequencer_model + } +} diff --git a/htsworkflow/templates/geo_submission.soft b/htsworkflow/templates/geo_submission.soft index 00c0b4d..3541e56 100644 --- a/htsworkflow/templates/geo_submission.soft +++ b/htsworkflow/templates/geo_submission.soft @@ -4,11 +4,8 @@ ^SAMPLE={{row.name}} !Sample_type=SRA !Sample_title={{row.name}} -!Sample_series_id = {{ series_id }} -!Sample_instrument_model = Illumina Genome Analyzer -!Sample_instrument_model = Illumina Genome Analyzer II -!Sample_instrument_model = Illumina Genome Analyzer IIx -!Sample_instrument_model = Illumina HiSeq 2000 +!Sample_series_id = {{ series_id }}{% for run in row.run %} +!Sample_instrument_model = {{ run.sequencer_model }}{% endfor %} !Sample_channel_count = 1 !Sample_organism_ch1 = {{ row.species_name }} !Sample_taxid_ch1 = {{ row.taxon_id }} -- 2.30.2