Use the same model for building fastqs as for building soft file
authorDiane Trout <diane@caltech.edu>
Mon, 10 Sep 2012 21:16:08 +0000 (14:16 -0700)
committerDiane Trout <diane@caltech.edu>
Mon, 10 Sep 2012 21:16:08 +0000 (14:16 -0700)
(And let user specify a base filename on command line)

encode_submission/geo_gather.py
htsworkflow/submission/condorfastq.py

index a2901058371ecec871224076f94362a3eaf6ce43..5d6bf1142f193c3c88a566270ed6579b4f58273c 100644 (file)
@@ -79,6 +79,7 @@ def main(cmdline=None):
     if opts.fastq:
         flowcells = os.path.join(opts.sequence, 'flowcells')
         extractor = CondorFastqExtract(opts.host, flowcells,
+                                       model=opts.model,
                                        force=opts.force)
         extractor.create_scripts(results)
 
index aab906b8e2212916bbc716dccc76b2225d97d3eb..e48afd5869b48a9bbc69059aa5ebcad90608312f 100644 (file)
@@ -31,6 +31,7 @@ LOGGER = logging.getLogger(__name__)
 class CondorFastqExtract(object):
     def __init__(self, host, sequences_path,
                  log_path='log',
+                 model=None,
                  force=False):
         """Extract fastqs from results archive
 
@@ -42,7 +43,7 @@ class CondorFastqExtract(object):
           force (bool): do we force overwriting current files?
         """
         self.host = host
-        self.model = get_model()
+        self.model = get_model(model)
         self.sequences_path = sequences_path
         self.log_path = log_path
         self.force = force
@@ -179,7 +180,7 @@ class CondorFastqExtract(object):
         flowcell_query =RDF.SPARQLQuery("""
 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 
-select distinct ?library ?flowcell ?flowcell_id
+select distinct ?flowcell ?flowcell_id
 WHERE {
   ?library a libns:library ;
            libns:has_lane ?lane .
@@ -193,9 +194,9 @@ WHERE {
             flowcell_test = RDF.Statement(r['flowcell'],
                                           rdfNS['type'],
                                           libraryOntology['illumina_flowcell'])
-        if not self.model.contains_statement(flowcell_test):
-            # we probably lack full information about the flowcell.
-            load_into_model(self.model, 'rdfa', r['flowcell'])
+            if not self.model.contains_statement(flowcell_test):
+                # we probably lack full information about the flowcell.
+                load_into_model(self.model, 'rdfa', r['flowcell'])
         return flowcell_ids
 
     def import_sequences(self, flowcell_ids):