When collecting files for a geo submission, group on library id
authorDiane Trout <diane@caltech.edu>
Thu, 27 Sep 2012 17:37:56 +0000 (10:37 -0700)
committerDiane Trout <diane@caltech.edu>
Thu, 27 Sep 2012 17:37:56 +0000 (10:37 -0700)
instead of the previous grouping on lane.

htsworkflow/submission/geo.py
htsworkflow/templates/geo_fastqs.sparql

index 097bea1083af8c1ffc39b3edea9a6e94e0a63fcd..ef8d9457218a76ce53f0872f44efafd6d478dea1 100644 (file)
@@ -131,10 +131,10 @@ class GEOSubmission(Submission):
             data = {}
             for k, v in row.items():
                 data[k] = v
-            lane = str(data['lane'])
-            lanes.setdefault(lane, []).append(data)
+            library = str(data['library'])
+            lanes.setdefault(library, []).append(data)
         result = []
-        for lane, files in lanes.items():
+        for library, files in lanes.items():
             if len(files) > 2:
                 errmsg = "Don't know what to do with more than 2 raw files"
                 raise ValueError(errmsg)
@@ -143,7 +143,7 @@ class GEOSubmission(Submission):
             elif len(files) == 1:
                 is_paired = False
             elif len(files) == 0:
-                raise RuntimeError("Empty lane list discovered")
+                raise RuntimeError("Empty library list discovered")
             files = self._format_filename(files, is_paired)
             files = self._format_flowcell_type(files, is_paired)
             files = self._format_read_length(files, is_paired)
index 428cef7933fb196b1b69760125ecead4b511fdcb..8f19c994bcf6b1ad93eb662ad9cfc53a7c5cda55 100644 (file)
@@ -5,7 +5,7 @@ PREFIX ncbiTaxon: <http://www.ncbi.nlm.nih.gov/Taxonomy/Browser/wwwtax.cgi?id=>
 PREFIX geoSoft: <http://www.ncbi.nlm.nih.gov/geo/info/soft2.html#>
 PREFIX dc: <http://purl.org/dc/elements/1.1/>
 
-select distinct ?lane ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
+select distinct ?library ?filename, ?md5sum, ?file_type ?file_type_label ?flowcell_id ?read_length ?flowcell_type ?image_software ?image_version ?basecall_software ?basecall_version ?sequencer_model
 WHERE {
   <{{submission}}> ucscDaf:has_file ?file ;
                    submissionOntology:library ?library ;