Merge changing lane_number to string and sequence finding code changes
authorDiane Trout <diane@caltech.edu>
Mon, 24 Sep 2012 23:43:33 +0000 (16:43 -0700)
committerDiane Trout <diane@caltech.edu>
Mon, 24 Sep 2012 23:43:33 +0000 (16:43 -0700)
I started using actual file paths instead synthetic submission
paths for naming where my sequence files are.

This one still one generate geo submissions correctly as I'm
pretty sure not all of the queries have been updated yet.

htsworkflow/pipelines/sequences.py
htsworkflow/pipelines/test/test_sequences.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/geo.py
htsworkflow/submission/submission.py
htsworkflow/submission/test/test_condorfastq.py

index 479ce3ddcd330fa5fe4285215ed08edb81e1c710..acd100575744c94984d7b4b1ab5074188145844e 100644 (file)
@@ -164,7 +164,7 @@ class SequenceFile(object):
         # a bit unreliable... assumes filesystem is encoded in utf-8
         path = os.path.abspath(self.path.encode('utf-8'))
         fileNode = RDF.Node(RDF.Uri('file://' + path))
-        add(model, fileNode, rdfNS['type'], libNS['illumina_result'])
+        add(model, fileNode, rdfNS['type'], libNS['IlluminaResult'])
         add_lit(model, fileNode, libNS['flowcell_id'], self.flowcell)
         add_lit(model, fileNode, libNS['lane_number'], self.lane)
         if self.read is not None:
@@ -217,7 +217,7 @@ class SequenceFile(object):
             seq_id = RDF.Node(RDF.Uri(seq_id))
         result_statement = RDF.Statement(seq_id,
                                          rdfNS['type'],
-                                         libNS['illumina_result'])
+                                         libNS['IlluminaResult'])
         if not model.contains_statement(result_statement):
             raise KeyError(u"%s not found" % (unicode(seq_id),))
 
@@ -275,7 +275,7 @@ def parse_srf(path, filename):
     basename, ext = os.path.splitext(filename)
     records = basename.split('_')
     flowcell = records[4]
-    lane = int(records[5][0])
+    lane = records[5][0]
     fullpath = os.path.join(path, filename)
 
     if flowcell_dir != flowcell:
@@ -290,7 +290,7 @@ def parse_qseq(path, filename):
     records = basename.split('_')
     fullpath = os.path.join(path, filename)
     flowcell = records[4]
-    lane = int(records[5][1])
+    lane = records[5][1]
     read = int(records[6][1])
 
     if flowcell_dir != flowcell:
@@ -309,7 +309,7 @@ def parse_fastq(path, filename):
     if project is not None:
         # demultiplexed sample!
         flowcell = flowcell_dir
-        lane = int(records[2][-1])
+        lane = records[2][-1]
         read = int(records[3][-1])
         pf = True # as I understand it hiseq runs toss the ones that fail filter
         index = records[1]
@@ -318,7 +318,7 @@ def parse_fastq(path, filename):
         sequence_type = 'split_fastq'
     else:
         flowcell = records[4]
-        lane = int(records[5][1])
+        lane = records[5][1]
         read = int(records[6][1])
         pf = parse_fastq_pf_flag(records)
         index = None
@@ -362,7 +362,7 @@ def parse_eland(path, filename, eland_match=None):
     fullpath = os.path.join(path, filename)
     flowcell, start, stop, project = get_flowcell_cycle(path)
     if eland_match.group('lane'):
-        lane = int(eland_match.group('lane'))
+        lane = eland_match.group('lane')
     else:
         lane = None
     if eland_match.group('read'):
@@ -415,15 +415,15 @@ def update_model_sequence_library(model, base_url):
     """Find sequence objects and add library information if its missing
     """
     file_body = """
-    prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+    prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
     select ?filenode ?flowcell_id ?lane_id ?library_id ?flowcell ?library
     where {
-       ?filenode a libNS:illumina_result ;
-                 libNS:flowcell_id ?flowcell_id ;
-                 libNS:lane_number ?lane_id .
-       OPTIONAL { ?filenode libNS:flowcell ?flowcell . }
-       OPTIONAL { ?filenode libNS:library ?library .}
-       OPTIONAL { ?filenode libNS:library_id ?library_id .}
+       ?filenode a libns:IlluminaResult ;
+                 libns:flowcell_id ?flowcell_id ;
+                 libns:lane_number ?lane_id .
+       OPTIONAL { ?filenode libns:flowcell ?flowcell . }
+       OPTIONAL { ?filenode libns:library ?library .}
+       OPTIONAL { ?filenode libns:library_id ?library_id .}
     }
     """
     LOGGER.debug("update_model_sequence_library query %s", file_body)
@@ -470,16 +470,16 @@ def guess_library_from_model(model, base_url, flowcell, lane_id):
     flowcellNode = RDF.Node(flowcell)
     flowcell = str(flowcell.uri)
     lane_body = """
-    prefix libNS: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+    prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
     prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
     prefix xsd: <http://www.w3.org/2001/XMLSchema#>
 
     select ?library ?lane
     where {{
-      <{flowcell}> libNS:has_lane ?lane ;
-                   a libNS:IlluminaFlowcell .
-      ?lane libNS:lane_number {lane_id} ;
-            libNS:library ?library .
+      <{flowcell}> libns:has_lane ?lane ;
+                   a libns:IlluminaFlowcell .
+      ?lane libns:lane_number "{lane_id}" ;
+            libns:library ?library .
     }}
     """
     lane_body = lane_body.format(flowcell=flowcell, lane_id=lane_id)
index e621875231d60a0da1207b1e2fb53d0e5c121fbe..accd43ecf8786b4d25a96683bf861940006cf44f 100644 (file)
@@ -74,7 +74,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<srf 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, None)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 38)
@@ -99,7 +99,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 36)
@@ -121,7 +121,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f0.path, pathname)
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<qseq ilmn200901 1 %s>" %(pathname,))
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, None)
         self.assertEqual(f0.cycle, 202)
@@ -145,7 +145,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.cycle, 38)
@@ -167,7 +167,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<fastq 42BW9AAXX 4 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 4)
+        self.assertEqual(f0.lane, '4')
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, False)
         self.assertEqual(f0.cycle, 38)
@@ -191,7 +191,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" %(pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 1)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11111')
@@ -215,7 +215,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(unicode(f0), unicode(pathname))
         self.assertEqual(repr(f0), "<split_fastq 42BW9AAXX 1 %s>" % (pathname,))
         self.assertEqual(f0.flowcell, '42BW9AAXX')
-        self.assertEqual(f0.lane, 1)
+        self.assertEqual(f0.lane, '1')
         self.assertEqual(f0.read, 2)
         self.assertEqual(f0.pf, True)
         self.assertEqual(f0.project, '11112')
@@ -275,7 +275,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
-        self.assertEqual(f.lane, 4)
+        self.assertEqual(f.lane, '4')
         self.assertEqual(f.read, None)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 38)
@@ -290,7 +290,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(f.filetype, 'eland')
         self.assertEqual(f.path, pathname)
         self.assertEqual(f.flowcell, '42BW9AAXX')
-        self.assertEqual(f.lane, 4)
+        self.assertEqual(f.lane, '4')
         self.assertEqual(f.read, 1)
         self.assertEqual(f.pf, None)
         self.assertEqual(f.cycle, 152)
@@ -346,7 +346,7 @@ class SequenceFileTests(unittest.TestCase):
         files = list(model.find_statements(
             RDF.Statement(None,
                           rdfNS['type'],
-                          libraryOntology['illumina_result'])))
+                          libraryOntology['IlluminaResult'])))
         self.assertEqual(len(files), 5)
         files = list(model.find_statements(
             RDF.Statement(None,
@@ -411,13 +411,13 @@ class SequenceFileTests(unittest.TestCase):
     a libns:IlluminaFlowcell .
 
 <{base}/lane/1169>
-    libns:lane_number 1 ; libns:library <{base}/library/10923/> .
+    libns:lane_number "1" ; libns:library <{base}/library/10923/> .
 <{base}/lane/1170>
-    libns:lane_number 2 ; libns:library <{base}/library/10924/> .
+    libns:lane_number "2" ; libns:library <{base}/library/10924/> .
 <{base}/lane/1171>
-    libns:lane_number 3 ; libns:library <{base}/library/12345/> .
+    libns:lane_number "3" ; libns:library <{base}/library/12345/> .
 <{base}/lane/1172>
-    libns:lane_number 3 ; libns:library <{base}/library/10930/> .
+    libns:lane_number "3" ; libns:library <{base}/library/10930/> .
 """.format(base=base_url)
         model = get_model()
         load_string_into_model(model, 'turtle', flowcellInfo)
@@ -472,7 +472,7 @@ class SequenceFileTests(unittest.TestCase):
         self.assertEqual(seq.filetype, seq2.filetype)
         self.assertEqual(seq2.filetype, 'split_fastq')
         self.assertEqual(seq.lane, seq2.lane)
-        self.assertEqual(seq2.lane, 3)
+        self.assertEqual(seq2.lane, '3')
         self.assertEqual(seq.read, seq2.read)
         self.assertEqual(seq2.read, 1)
         self.assertEqual(seq.project, seq2.project)
@@ -491,7 +491,7 @@ class SequenceFileTests(unittest.TestCase):
         file_types_seen = set()
         file_types_to_see = set(['fastq', 'srf', 'eland', 'qseq'])
         lanes = set()
-        lanes_to_see = set((1,2,3))
+        lanes_to_see = set(('1','2','3'))
         with SimulateSimpleTree() as tree:
             seqs = sequences.scan_for_sequences([tree.root, '/a/b/c/98345'])
             for s in seqs:
@@ -512,7 +512,7 @@ class SequenceFileTests(unittest.TestCase):
         file_types_seen = set()
         file_types_to_see = set(['split_fastq'])
         lanes = set()
-        lanes_to_see = set((1,2))
+        lanes_to_see = set(('1','2'))
         projects_seen = set()
         projects_to_see = set(('11111', '21111', '31111'))
         with SimulateHiSeqTree() as tree:
index 9f4f1368176fa1aa1bccec47b4c302831e56e30b..01fe6c5a19274869b22e41821011135ee8ccb6ae 100644 (file)
@@ -117,7 +117,7 @@ class CondorFastqExtract(object):
         Find archived sequence files associated with our results.
         """
         self.import_libraries(result_map)
-        flowcell_ids = self.find_relavant_flowcell_ids()
+        flowcell_ids = self.find_relevant_flowcell_ids()
         self.import_sequences(flowcell_ids)
 
         query_text = """
@@ -138,7 +138,7 @@ class CondorFastqExtract(object):
                       libns:library ?library ;
                       libns:library_id ?library_id ;
                       libns:file_type ?filetype ;
-                      a libns:illumina_result .
+                      a libns:IlluminaResult .
             ?flowcell libns:read_length ?read_length ;
                       libns:flowcell_type ?flowcell_type .
             OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status }
@@ -174,12 +174,12 @@ class CondorFastqExtract(object):
         if not self.model.contains_statement(q):
             present = True
             load_into_model(self.model, 'rdfa', library)
-        LOGGER.debug("Did we import %s: %s", library, present)
+        LOGGER.debug("Did we import %s: %s", library.uri, present)
 
-    def find_relavant_flowcell_ids(self):
+    def find_relevant_flowcell_ids(self):
         """Generate set of flowcell ids that had samples of interest on them
         """
-        flowcell_query =RDF.SPARQLQuery("""
+        flowcell_query = RDF.SPARQLQuery("""
 prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
 
 select distinct ?flowcell ?flowcell_id
@@ -192,13 +192,17 @@ WHERE {
         flowcell_ids = set()
         for r in flowcell_query.execute(self.model):
             flowcell_ids.add( fromTypedNode(r['flowcell_id']) )
-            LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids)))
-            flowcell_test = RDF.Statement(r['flowcell'],
-                                          rdfNS['type'],
-                                          libraryOntology['IlluminaFlowcell'])
-            if not self.model.contains_statement(flowcell_test):
-                # we probably lack full information about the flowcell.
+            imported = False
+            a_lane = self.model.get_target(r['flowcell'],
+                                           libraryOntology['has_lane'])
+            print a_lane
+            if a_lane is None:
+                imported = True
+                # we lack information about which lanes were on this flowcell
                 load_into_model(self.model, 'rdfa', r['flowcell'])
+            LOGGER.debug("Did we imported %s: %s" % (r['flowcell'].uri,
+                                                     imported))
+
         return flowcell_ids
 
     def import_sequences(self, flowcell_ids):
index 413d2c3ae5e99daa7025a6be3101edb50d96da85..85947158390f699afa8a76cd719a0b1fadf26ebf 100644 (file)
@@ -111,7 +111,7 @@ class GEOSubmission(Submission):
 
         context = Context({
             'submission': str(analysis_node.uri),
-            'file_class': str(geoSoftNS['supplemental'])
+            'file_class': str(geoSoftNS['supplemental'].uri)
             })
 
         return self.execute_query(query_template, context)
@@ -123,7 +123,7 @@ class GEOSubmission(Submission):
 
         context = Context({
             'submission': str(analysis_node.uri),
-            'file_class': str(geoSoftNS['raw']),
+            'file_class': str(geoSoftNS['raw'].uri),
             })
 
         lanes = {}
index 18fa3b2bc9f487b63915ca02a3c060adbfeedf3b..2b04ff43a05ce6e70d6ccb7b20f7f17d54165699 100644 (file)
@@ -126,7 +126,7 @@ class Submission(object):
 
     def link_file_to_classes(self, filename, submissionNode, submission_uri, analysis_dir):
         # add file specific information
-        fileNode = RDF.Node(RDF.Uri(submission_uri + '/' + filename))
+        fileNode = RDF.Node(RDF.Uri('file://'+ os.path.abspath(filename)))
         self.model.add_statement(
             RDF.Statement(submissionNode,
                           dafTermOntology['has_file'],
index f4d2e2c71aef39fc8f32dbb6058fa8db71510af9..dd41c61aab348d3877ebd2f3fb81dbc9c957ad35 100644 (file)
@@ -122,22 +122,22 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/3402>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/3403>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/3404>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
         # paired_end 1;
         # read_length 33;
         # status "Unknown"@en .
@@ -145,22 +145,22 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/3406>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/3407>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/3408>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
         libns:library <http://localhost/library/10000/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/42JUYAAXX/>
         a libns:IlluminaFlowcell ;
@@ -181,27 +181,27 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/4202>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/4203>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/4204>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/4205>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -209,17 +209,17 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/4207>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/4208>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
         libns:library <http://localhost/library/1421/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/61MJTAAXX/>
         a libns:IlluminaFlowcell ;
@@ -240,32 +240,32 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/6602>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/6603>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/6604>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/6605>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/6606>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -273,12 +273,12 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/6608>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
         libns:library <http://localhost/library/1661/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
 
 <http://localhost/flowcell/30DY0AAXX/>
         a libns:IlluminaFlowcell ;
@@ -299,42 +299,42 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 1 .
+        libns:lane_number "1" .
 <http://localhost/lane/3802>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 2 .
+        libns:lane_number "2" .
 <http://localhost/lane/3803>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
 <http://localhost/lane/3804>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 4 .
+        libns:lane_number "4" .
 <http://localhost/lane/3805>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 5 .
+        libns:lane_number "5" .
 <http://localhost/lane/3806>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 6 .
+        libns:lane_number "6" .
 <http://localhost/lane/3807>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/1331/> ;
-        libns:lane_number 7 .
+        libns:lane_number "7" .
 <http://localhost/lane/3808>
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 8 .
+        libns:lane_number "8" .
         # paired_end 1;
         # read_length 76;
         # status "Unknown"@en .
@@ -352,7 +352,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/12345/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
@@ -361,7 +361,7 @@ lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
         a libns:IlluminaLane ;
         libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
         libns:library <http://localhost/library/11154/> ;
-        libns:lane_number 3 .
+        libns:lane_number "3" .
         # paired_end 1;
         # read_length 101;
         # status "Unknown"@en .
@@ -446,39 +446,40 @@ class TestCondorFastq(unittest.TestCase):
         inference = Infer(self.extract.model)
         errmsgs = list(inference.run_validation())
         self.assertEqual(len(errmsgs), 0)
+        os.chdir(self.tempdir)
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
 
-    def test_find_relavant_flowcell_ids(self):
+    def test_find_relevant_flowcell_ids(self):
         expected = set(('30221AAXX',
                         '42JUYAAXX',
                         '61MJTAAXX',
                         '30DY0AAXX',
                         'C02F9ACXX'))
-        flowcell_ids = self.extract.find_relavant_flowcell_ids()
+        flowcell_ids = self.extract.find_relevant_flowcell_ids()
         self.assertEqual(flowcell_ids, expected)
 
     def test_find_archive_sequence(self):
         seqs = self.extract.find_archive_sequence_files(self.result_map)
 
         expected = set([
-            (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'),
-            (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'),
-            (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'),
-            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
-            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
-            (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'),
-            (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf')
+            (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
+            (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
+            (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
+            (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+            (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
+            (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
         ])
         found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
         self.assertEqual(expected, found)