Merge changing lane_number to string and sequence finding code changes
authorDiane Trout <diane@caltech.edu>
Mon, 24 Sep 2012 23:43:33 +0000 (16:43 -0700)
committerDiane Trout <diane@caltech.edu>
Mon, 24 Sep 2012 23:43:33 +0000 (16:43 -0700)
I started using actual file paths instead synthetic submission
paths for naming where my sequence files are.

This one still one generate geo submissions correctly as I'm
pretty sure not all of the queries have been updated yet.

htsworkflow/pipelines/sequences.py
htsworkflow/pipelines/test/test_sequences.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/geo.py
htsworkflow/submission/submission.py
htsworkflow/submission/test/test_condorfastq.py

index 479ce3ddcd330fa5fe4285215ed08edb81e1c710..acd100575744c94984d7b4b1ab5074188145844e 100644 (file)
@@ -164,7 +164,7 @@ class SequenceFile(object):
         # a bit unreliable... assumes filesystem is encoded in utf-8
         path = os.path.abspath(self.path.encode('utf-8'))
         fileNode = RDF.Node(RDF.Uri('file://' + path))
         # a bit unreliable... assumes filesystem is encoded in utf-8
         path = os.path.abspath(self.path.encode('utf-8'))
         fileNode = RDF.Node(RDF.Uri('file://' + path))
-        add(model, fileNode, rdfNS['type'], libNS['illumina_result'])
+        add(model, fileNode, rdfNS['type'], libNS['IlluminaResult'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
         add_lit(model, fileNode, libNS['lane_number'], self.lane)
         if self.read is not None:
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
         add_lit(model, fileNode, libNS['lane_number'], self.lane)
         if self.read is not None:
@@ -217,7 +217,7 @@ class SequenceFile(object):
             seq_id = RDF.Node(RDF.Uri(seq_id))
         result_statement = RDF.Statement(seq_id,
                                          rdfNS['type'],
             seq_id = RDF.Node(RDF.Uri(seq_id))
         result_statement = RDF.Statement(seq_id,
                                          rdfNS['type'],
-                                         libNS['illumina_result'])
+                                         libNS['IlluminaResult'])
         if not model.contains_statement(result_statement):
             raise KeyError(u"%s not found" % (unicode(seq_id),))
 
         if not model.contains_statement(result_statement):
             raise KeyError(u"%s not found" % (unicode(seq_id),))
 
@@ -275,7 +275,7 @@ def parse_srf(path, filename):
     basename, ext = os.path.splitext(filename)
     records = basename.split('_')
     flowcell = records[4]
     basename, ext = os.path.splitext(filename)
     records = basename.split('_')
     flowcell = records[4]
-    lane = int(records[5][0])
+    lane = records[5][0]
     fullpath = os.path.join(path, filename)
 
     if flowcell_dir != flowcell:
     fullpath = os.path.join(path, filename)
 
     if flowcell_dir != flowcell:
@@ -290,7 +290,7 @@ def parse_qseq(path, filename):
     records = basename.split('_')
     fullpath = os.path.join(path, filename)
     flowcell = records[4]
     records = basename.split('_')
     fullpath = os.path.join(path, filename)
     flowcell = records[4]
-    lane = int(records[5][1])
+    lane = records[5][1]
     read = int(records[6][1])
 
     if flowcell_dir != flowcell:
     read = int(records[6][1])
 
     if flowcell_dir != flowcell:
@@ -309,7 +309,7 @@ def parse_fastq(path, filename):
     if project is not None:
         # demultiplexed sample!
         flowcell = flowcell_dir
     if project is not None:
         # demultiplexed sample!
         flowcell = flowcell_dir
-        lane = int(records[2][-1])
+        lane = records[2][-1]
         read = int(records[3][-1])
         pf = True # as I understand it hiseq runs toss the ones that fail filter
         index = records[1]
         read = int(records[3][-1])
         pf = True # as I understand it hiseq runs toss the ones that fail filter
         index = records[1]
@@ -318,7 +318,7 @@ def parse_fastq(path, filename):
         sequence_type = 'split_fastq'
     else:
         flowcell = records[4]
         sequence_type = 'split_fastq'
     else:
         flowcell = records[4]
-        lane = int(records[5][1])
+        lane = records[5][1]
         read = int(records[6][1])
         pf = parse_fastq_pf_flag(records)
         index = None
         read = int(records[6][1])
         pf = parse_fastq_pf_flag(records)
         index = None
@@ -362,7 +362,7 @@ def parse_eland(path, filename, eland_match=None):
     fullpath = os.path.join(path, filename)
     flowcell, start, stop, project = get_flowcell_cycle(path)
     if eland_match.group('lane'):
     fullpath = os.path.join(path, filename)
     flowcell, start, stop, project = get_flowcell_cycle(path)
     if eland_match.group('lane'):
-        lane = int(eland_match.group('lane'))
+        lane = eland_match.group('lane')
     else:
         lane = None
     if eland_match.group('read'):
     else:
         lane = None
     if eland_match.group('read'):
@@ -415,15 +415,15 @@ def update_model_sequence_library(model, base_url):
     """Find sequence objects and add library information if its missing
     """
     file_body = """
     """Find sequence objects and add library information if its missing
     """
     file_body = """
-    prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+    prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
     select ?filenode ?flowcell_id ?lane_id ?library_id ?flowcell ?library
     where {
     select ?filenode ?flowcell_id ?lane_id ?library_id ?flowcell ?library
     where {
-       ?filenode a libNS:illumina_result ;
-                 libNS:flowcell_id ?flowcell_id ;
-                 libNS:lane_number ?lane_id .
-       OPTIONAL { ?filenode libNS:flowcell ?flowcell . }
-       OPTIONAL { ?filenode libNS:library ?library .}
-       OPTIONAL { ?filenode libNS:library_id ?library_id .}
+       ?filenode a libns:IlluminaResult ;
+                 libns:flowcell_id ?flowcell_id ;
+                 libns:lane_number ?lane_id .
+       OPTIONAL { ?filenode libns:flowcell ?flowcell . }
+       OPTIONAL { ?filenode libns:library ?library .}
+       OPTIONAL { ?filenode libns:library_id ?library_id .}
     }
     """
     LOGGER.debug("update_model_sequence_library query %s", file_body)
     }
     """
     LOGGER.debug("update_model_sequence_library query %s", file_body)
@@ -470,16 +470,16 @@ def guess_library_from_model(model, base_url, flowcell, lane_id):
     flowcellNode = RDF.Node(flowcell)
     flowcell = str(flowcell.uri)
     lane_body = """
     flowcellNode = RDF.Node(flowcell)
     flowcell = str(flowcell.uri)
     lane_body = """
-    prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+    prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
     prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
     prefix xsd: <http://www.w3.org/2001/XMLSchema#>
 
     select ?library ?lane
     where {{
     prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
     prefix xsd: <http://www.w3.org/2001/XMLSchema#>
 
     select ?library ?lane
     where {{
-      <{flowcell}> libNS:has_lane ?lane ;
-                   a libNS:IlluminaFlowcell .
-      ?lane libNS:lane_number {lane_id} ;
-            libNS:library ?library .
+      <{flowcell}> libns:has_lane ?lane ;
+                   a libns:IlluminaFlowcell .
+      ?lane libns:lane_number "{lane_id}" ;
+            libns:library ?library .
     }}
     """
     lane_body = lane_body.format(flowcell=flowcell, lane_id=lane_id)
     }}
     """
     lane_body = lane_body.format(flowcell=flowcell, lane_id=lane_id)
index e621875231d60a0da1207b1e2fb53d0e5c121fbe..accd43ecf8786b4d25a96683bf861940006cf44f 100644 (file)
@@ -74,7 +74,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, None)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 38)
         self.assertEqual(f0.read, None)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 38)
@@ -99,7 +99,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 36)
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 36)
@@ -121,7 +121,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f0.path, pathname)
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
         self.assertEqual(f0.path, pathname)
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 202)
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 202)
@@ -145,7 +145,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.cycle, 38)
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.cycle, 38)
@@ -167,7 +167,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, False)
         self.assertEqual(f0.cycle, 38)
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, False)
         self.assertEqual(f0.cycle, 38)
@@ -191,7 +191,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11111')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11111')
@@ -215,7 +215,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11112')
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11112')
@@ -275,7 +275,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
-        self.assertEqual(f.lane, 4)
+        self.assertEqual(f.lane, '4')
         self.assertEqual(f.read, None)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 38)
         self.assertEqual(f.read, None)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 38)
@@ -290,7 +290,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
-        self.assertEqual(f.lane, 4)
+        self.assertEqual(f.lane, '4')
         self.assertEqual(f.read, 1)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 152)
         self.assertEqual(f.read, 1)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 152)
@@ -346,7 +346,7 @@ class SequenceFileTests(unittest.TestCase):
         files = list(model.find_statements(
             RDF.Statement(None,
                           rdfNS['type'],
         files = list(model.find_statements(
             RDF.Statement(None,
                           rdfNS['type'],
-                          libraryOntology['illumina_result'])))
+                          libraryOntology['IlluminaResult'])))
         self.assertEqual(len(files), 5)
         files = list(model.find_statements(
             RDF.Statement(None,
         self.assertEqual(len(files), 5)
         files = list(model.find_statements(
             RDF.Statement(None,
@@ -411,13 +411,13 @@ class SequenceFileTests(unittest.TestCase):
     a libns:IlluminaFlowcell .
 
 <{base}/lane/1169>
     a libns:IlluminaFlowcell .
 
 <{base}/lane/1169>
-    libns:lane_number 1 ; libns:library <{base}/library/10923/> .
+    libns:lane_number "1" ; libns:library <{base}/library/10923/> .
 <{base}/lane/1170>
 <{base}/lane/1170>
-    libns:lane_number 2 ; libns:library <{base}/library/10924/> .
+    libns:lane_number "2" ; libns:library <{base}/library/10924/> .
 <{base}/lane/1171>
 <{base}/lane/1171>
-    libns:lane_number 3 ; libns:library <{base}/library/12345/> .
+    libns:lane_number "3" ; libns:library <{base}/library/12345/> .
 <{base}/lane/1172>
 <{base}/lane/1172>
-    libns:lane_number 3 ; libns:library <{base}/library/10930/> .
+    libns:lane_number "3" ; libns:library <{base}/library/10930/> .
 """.format(base=base_url)
         model = get_model()
         load_string_into_model(model, 'turtle', flowcellInfo)
 """.format(base=base_url)
         model = get_model()
         load_string_into_model(model, 'turtle', flowcellInfo)
@@ -472,7 +472,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(seq.filetype, seq2.filetype)
         self.assertEqual(seq2.filetype, 'split_fastq')
         self.assertEqual(seq.lane, seq2.lane)
         self.assertEqual(seq.filetype, seq2.filetype)
         self.assertEqual(seq2.filetype, 'split_fastq')
         self.assertEqual(seq.lane, seq2.lane)
-        self.assertEqual(seq2.lane, 3)
+        self.assertEqual(seq2.lane, '3')
         self.assertEqual(seq.read, seq2.read)
         self.assertEqual(seq2.read, 1)
         self.assertEqual(seq.project, seq2.project)
         self.assertEqual(seq.read, seq2.read)
         self.assertEqual(seq2.read, 1)
         self.assertEqual(seq.project, seq2.project)
@@ -491,7 +491,7 @@ class SequenceFileTests(unittest.TestCase):
         file_types_seen = set()
         file_types_to_see = set(['fastq', 'srf', 'eland', 'qseq'])
         lanes = set()
         file_types_seen = set()
         file_types_to_see = set(['fastq', 'srf', 'eland', 'qseq'])
         lanes = set()
-        lanes_to_see = set((1,2,3))
+        lanes_to_see = set(('1','2','3'))
         with SimulateSimpleTree() as tree:
             seqs = sequences.scan_for_sequences([tree.root, '/a/b/c/98345'])
             for s in seqs:
         with SimulateSimpleTree() as tree:
             seqs = sequences.scan_for_sequences([tree.root, '/a/b/c/98345'])
             for s in seqs:
@@ -512,7 +512,7 @@ class SequenceFileTests(unittest.TestCase):
         file_types_seen = set()
         file_types_to_see = set(['split_fastq'])
         lanes = set()
         file_types_seen = set()
         file_types_to_see = set(['split_fastq'])
         lanes = set()
-        lanes_to_see = set((1,2))
+        lanes_to_see = set(('1','2'))
         projects_seen = set()
         projects_to_see = set(('11111', '21111', '31111'))
         with SimulateHiSeqTree() as tree:
         projects_seen = set()
         projects_to_see = set(('11111', '21111', '31111'))
         with SimulateHiSeqTree() as tree:
index 9f4f1368176fa1aa1bccec47b4c302831e56e30b..01fe6c5a19274869b22e41821011135ee8ccb6ae 100644 (file)
@@ -117,7 +117,7 @@ class CondorFastqExtract(object):
         Find archived sequence files associated with our results.
         """
         self.import_libraries(result_map)
         Find archived sequence files associated with our results.
         """
         self.import_libraries(result_map)
-        flowcell_ids = self.find_relavant_flowcell_ids()
+        flowcell_ids = self.find_relevant_flowcell_ids()
         self.import_sequences(flowcell_ids)
 
         query_text = """
         self.import_sequences(flowcell_ids)
 
         query_text = """
@@ -138,7 +138,7 @@ class CondorFastqExtract(object):
                       libns:library ?library ;
                       libns:library_id ?library_id ;
                       libns:file_type ?filetype ;
                       libns:library ?library ;
                       libns:library_id ?library_id ;
                       libns:file_type ?filetype ;
-                      a libns:illumina_result .
+                      a libns:IlluminaResult .
             ?flowcell libns:read_length ?read_length ;
                       libns:flowcell_type ?flowcell_type .
             OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status }
             ?flowcell libns:read_length ?read_length ;
                       libns:flowcell_type ?flowcell_type .
             OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status }
@@ -174,12 +174,12 @@ class CondorFastqExtract(object):
         if not self.model.contains_statement(q):
             present = True
             load_into_model(self.model, 'rdfa', library)
         if not self.model.contains_statement(q):
             present = True
             load_into_model(self.model, 'rdfa', library)
-        LOGGER.debug("Did we import %s: %s", library, present)
+        LOGGER.debug("Did we import %s: %s", library.uri, present)
 
 
-    def find_relavant_flowcell_ids(self):
+    def find_relevant_flowcell_ids(self):
         """Generate set of flowcell ids that had samples of interest on them
         """
         """Generate set of flowcell ids that had samples of interest on them
         """
-        flowcell_query =RDF.SPARQLQuery("""
+        flowcell_query = RDF.SPARQLQuery("""
 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 
 select distinct ?flowcell ?flowcell_id
 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 
 select distinct ?flowcell ?flowcell_id
@@ -192,13 +192,17 @@ WHERE {
         flowcell_ids = set()
         for r in flowcell_query.execute(self.model):
             flowcell_ids.add( fromTypedNode(r['flowcell_id']) )
         flowcell_ids = set()
         for r in flowcell_query.execute(self.model):
             flowcell_ids.add( fromTypedNode(r['flowcell_id']) )
-            LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids)))
-            flowcell_test = RDF.Statement(r['flowcell'],
-                                          rdfNS['type'],
-                                          libraryOntology['IlluminaFlowcell'])
-            if not self.model.contains_statement(flowcell_test):
-                # we probably lack full information about the flowcell.
+            imported = False
+            a_lane = self.model.get_target(r['flowcell'],
+                                           libraryOntology['has_lane'])
+            print a_lane
+            if a_lane is None:
+                imported = True
+                # we lack information about which lanes were on this flowcell
                 load_into_model(self.model, 'rdfa', r['flowcell'])
                 load_into_model(self.model, 'rdfa', r['flowcell'])
+            LOGGER.debug("Did we imported %s: %s" % (r['flowcell'].uri,
+                                                     imported))
+
         return flowcell_ids
 
     def import_sequences(self, flowcell_ids):
         return flowcell_ids
 
     def import_sequences(self, flowcell_ids):
index 413d2c3ae5e99daa7025a6be3101edb50d96da85..85947158390f699afa8a76cd719a0b1fadf26ebf 100644 (file)
@@ -111,7 +111,7 @@ class GEOSubmission(Submission):
 
         context = Context({
             'submission': str(analysis_node.uri),
 
         context = Context({
             'submission': str(analysis_node.uri),
-            'file_class': str(geoSoftNS['supplemental'])
+            'file_class': str(geoSoftNS['supplemental'].uri)
             })
 
         return self.execute_query(query_template, context)
             })
 
         return self.execute_query(query_template, context)
@@ -123,7 +123,7 @@ class GEOSubmission(Submission):
 
         context = Context({
             'submission': str(analysis_node.uri),
 
         context = Context({
             'submission': str(analysis_node.uri),
-            'file_class': str(geoSoftNS['raw']),
+            'file_class': str(geoSoftNS['raw'].uri),
             })
 
         lanes = {}
             })
 
         lanes = {}
index 18fa3b2bc9f487b63915ca02a3c060adbfeedf3b..2b04ff43a05ce6e70d6ccb7b20f7f17d54165699 100644 (file)
@@ -126,7 +126,7 @@ class Submission(object):
 
     def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
         # add file specific information
 
     def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
         # add file specific information
-        fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
+        fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename)))
         self.model.add_statement(
             RDF.Statement(submissionNode,
                           dafTermOntology['has_file'],
         self.model.add_statement(
             RDF.Statement(submissionNode,
                           dafTermOntology['has_file'],
index f4d2e2c71aef39fc8f32dbb6058fa8db71510af9..dd41c61aab348d3877ebd2f3fb81dbc9c957ad35 100644 (file)
@@ -122,22 +122,22 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/3402>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
 <http://localhost/lane/3402>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/3403>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
 <http://localhost/lane/3403>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/3404>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
 <http://localhost/lane/3404>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
         # paired_end 1;
         # read_length 33;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 33;
         # status "Unknown"@en .
@@ -145,22 +145,22 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/3406>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
 <http://localhost/lane/3406>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/3407>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
 <http://localhost/lane/3407>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/3408>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
 <http://localhost/lane/3408>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/42JUYAAXX/>
         a libns:IlluminaFlowcell ;
 
 <http://localhost/flowcell/42JUYAAXX/>
         a libns:IlluminaFlowcell ;
@@ -181,27 +181,27 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/4202>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
 <http://localhost/lane/4202>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/4203>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
 <http://localhost/lane/4203>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/4204>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
 <http://localhost/lane/4204>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/4205>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
 <http://localhost/lane/4205>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -209,17 +209,17 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/4207>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
 <http://localhost/lane/4207>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/4208>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
 <http://localhost/lane/4208>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/61MJTAAXX/>
         a libns:IlluminaFlowcell ;
 
 <http://localhost/flowcell/61MJTAAXX/>
         a libns:IlluminaFlowcell ;
@@ -240,32 +240,32 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/6602>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
 <http://localhost/lane/6602>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/6603>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
 <http://localhost/lane/6603>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/6604>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
 <http://localhost/lane/6604>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/6605>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
 <http://localhost/lane/6605>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/6606>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
 <http://localhost/lane/6606>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -273,12 +273,12 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/6608>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
 <http://localhost/lane/6608>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/30DY0AAXX/>
         a libns:IlluminaFlowcell ;
 
 <http://localhost/flowcell/30DY0AAXX/>
         a libns:IlluminaFlowcell ;
@@ -299,42 +299,42 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/3802>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3802>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/3803>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3803>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/3804>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3804>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/3805>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3805>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/3806>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3806>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/3807>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
 <http://localhost/lane/3807>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/3808>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
 <http://localhost/lane/3808>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -352,7 +352,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/12345/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/12345/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
@@ -361,7 +361,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/11154/> ;
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
@@ -446,39 +446,40 @@ class TestCondorFastq(unittest.TestCase):
         inference = Infer(self.extract.model)
         errmsgs = list(inference.run_validation())
         self.assertEqual(len(errmsgs), 0)
         inference = Infer(self.extract.model)
         errmsgs = list(inference.run_validation())
         self.assertEqual(len(errmsgs), 0)
+        os.chdir(self.tempdir)
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
 
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
 
-    def test_find_relavant_flowcell_ids(self):
+    def test_find_relevant_flowcell_ids(self):
         expected = set(('30221AAXX',
                         '42JUYAAXX',
                         '61MJTAAXX',
                         '30DY0AAXX',
                         'C02F9ACXX'))
         expected = set(('30221AAXX',
                         '42JUYAAXX',
                         '61MJTAAXX',
                         '30DY0AAXX',
                         'C02F9ACXX'))
-        flowcell_ids = self.extract.find_relavant_flowcell_ids()
+        flowcell_ids = self.extract.find_relevant_flowcell_ids()
         self.assertEqual(flowcell_ids, expected)
 
     def test_find_archive_sequence(self):
         seqs = self.extract.find_archive_sequence_files(self.result_map)
 
         expected = set([
         self.assertEqual(flowcell_ids, expected)
 
     def test_find_archive_sequence(self):
         seqs = self.extract.find_archive_sequence_files(self.result_map)
 
         expected = set([
-            (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'),
-            (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'),
-            (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'),
-            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'),
-            (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf')
+            (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
+            (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
+            (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
+            (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
+            (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
         ])
         found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
         self.assertEqual(expected, found)
         ])
         found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
         self.assertEqual(expected, found)