Fix matching scanned sequence files to library IDs work for hiseq runs.
authorDiane Trout <diane@caltech.edu>
Fri, 31 Aug 2012 19:29:59 +0000 (12:29 -0700)
committerDiane Trout <diane@caltech.edu>
Fri, 31 Aug 2012 19:29:59 +0000 (12:29 -0700)
The previous version was keying off of flowcell/lane so if you
had multiple libraries from the same flowcell/lane all the sequences
would end up in one of the libraries.

Hopefully this fixes that. Though to do this I ended up changing
the whole structure of condorfastq to be based on updating an RDF model.
This depends on the sequence.py module changes of saving things to
rdf models -- and the new code to infer library ids at that layer.

htsworkflow/submission/condorfastq.py
htsworkflow/submission/test/test_condorfastq.py

index 64eb6a1892a93ab086adb377552db4a828a2a974..5ed9243096b9061c5ec789651fca472d0268fdf9 100644 (file)
@@ -2,26 +2,34 @@
 """
 import logging
 import os
 """
 import logging
 import os
-from pprint import pformat
+from pprint import pformat,pprint
 import sys
 import types
 import sys
 import types
+from urlparse import urljoin, urlparse
 
 
-from htsworkflow.pipelines.sequences import scan_for_sequences
+from htsworkflow.pipelines.sequences import scan_for_sequences, \
+     update_model_sequence_library
 from htsworkflow.pipelines.samplekey import SampleKey
 from htsworkflow.pipelines import qseq2fastq
 from htsworkflow.pipelines import srf2fastq
 from htsworkflow.pipelines import desplit_fastq
 from htsworkflow.pipelines.samplekey import SampleKey
 from htsworkflow.pipelines import qseq2fastq
 from htsworkflow.pipelines import srf2fastq
 from htsworkflow.pipelines import desplit_fastq
-from htsworkflow.util.api import HtswApi
+from htsworkflow.util.rdfhelp import get_model, dump_model, load_into_model, \
+     fromTypedNode, \
+     libraryOntology, \
+     stripNamespace, \
+     rdfNS
 from htsworkflow.util.conversion import parse_flowcell_id
 
 from django.conf import settings
 from django.template import Context, loader
 
 from htsworkflow.util.conversion import parse_flowcell_id
 
 from django.conf import settings
 from django.template import Context, loader
 
+import RDF
+
 LOGGER = logging.getLogger(__name__)
 
 
 class CondorFastqExtract(object):
 LOGGER = logging.getLogger(__name__)
 
 
 class CondorFastqExtract(object):
-    def __init__(self, host, apidata, sequences_path,
+    def __init__(self, host, sequences_path,
                  log_path='log',
                  force=False):
         """Extract fastqs from results archive
                  log_path='log',
                  force=False):
         """Extract fastqs from results archive
@@ -33,7 +41,8 @@ class CondorFastqExtract(object):
           log_path (str): where to put condor log files
           force (bool): do we force overwriting current files?
         """
           log_path (str): where to put condor log files
           force (bool): do we force overwriting current files?
         """
-        self.api = HtswApi(host, apidata)
+        self.host = host
+        self.model = get_model()
         self.sequences_path = sequences_path
         self.log_path = log_path
         self.force = force
         self.sequences_path = sequences_path
         self.log_path = log_path
         self.force = force
@@ -62,7 +71,7 @@ class CondorFastqExtract(object):
                          'logdir': self.log_path,
                          'env': env,
                          'args': condor_entries[script_type],
                          'logdir': self.log_path,
                          'env': env,
                          'args': condor_entries[script_type],
-                         'root_url': self.api.root_url,
+                         'root_url': self.host,
                          }
             context = Context(variables)
 
                          }
             context = Context(variables)
 
@@ -79,8 +88,8 @@ class CondorFastqExtract(object):
                             'split_fastq': self.condor_desplit_fastq
                             }
         by_sample = {}
                             'split_fastq': self.condor_desplit_fastq
                             }
         by_sample = {}
-        lib_db = self.find_archive_sequence_files(result_map)
-        needed_targets = self.find_missing_targets(result_map, lib_db)
+        sequences = self.find_archive_sequence_files(result_map)
+        needed_targets = self.find_missing_targets(result_map, sequences)
 
         for target_pathname, available_sources in needed_targets.items():
             LOGGER.debug(' target : %s' % (target_pathname,))
 
         for target_pathname, available_sources in needed_targets.items():
             LOGGER.debug(' target : %s' % (target_pathname,))
@@ -98,7 +107,7 @@ class CondorFastqExtract(object):
                     condor_entries.setdefault(condor_type, []).append(
                         conversion(sources, target_pathname))
                     for s in sources:
                     condor_entries.setdefault(condor_type, []).append(
                         conversion(sources, target_pathname))
                     for s in sources:
-                        by_sample.setdefault(s.lane_id,[]).append(
+                        by_sample.setdefault(s.lane_number,[]).append(
                             target_pathname)
             else:
                 print " need file", target_pathname
                             target_pathname)
             else:
                 print " need file", target_pathname
@@ -110,41 +119,83 @@ class CondorFastqExtract(object):
         """
         Find archived sequence files associated with our results.
         """
         """
         Find archived sequence files associated with our results.
         """
-        LOGGER.debug("Searching for sequence files in: %s" %(self.sequences_path,))
-
-        lib_db = {}
-        seq_dirs = set()
-        candidate_lanes = {}
+        self.import_libraries(result_map)
+        flowcell_ids = self.find_relavant_flowcell_ids()
+        self.import_sequences(flowcell_ids)
+
+        query = RDF.SPARQLQuery("""
+        prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+        prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+        prefix xsd: <http://www.w3.org/2001/XMLSchema#>
+
+        select ?filenode ?filetype ?cycle ?lane_number ?read
+               ?library  ?library_id
+               ?flowcell ?flowcell_id ?read_length
+               ?flowcell_type ?flowcell_status
+        where {
+            ?filenode libns:cycle ?cycle ;
+                      libns:lane_number ?lane_number ;
+                      libns:read ?read ;
+                      libns:flowcell ?flowcell ;
+                      libns:flowcell_id ?flowcell_id ;
+                      libns:library ?library ;
+                      libns:library_id ?library_id ;
+                      rdf:type ?filetype ;
+                      a libns:raw_file .
+            ?flowcell libns:read_length ?read_length ;
+                      libns:flowcell_type ?flowcell_type .
+            OPTIONAL { ?flowcell libns:flowcell_status ?flowcell_status }
+            FILTER(?filetype != libns:raw_file)
+        }
+        """)
+        results = []
+        for r in query.execute(self.model):
+            library_id = fromTypedNode(r['library_id'])
+            if library_id in result_map:
+                results.append(SequenceResult(r))
+        return results
+
+    def import_libraries(self, result_map):
         for lib_id in result_map.keys():
         for lib_id in result_map.keys():
-            lib_info = self.api.get_library(lib_id)
-            lib_info['lanes'] = {}
-            lib_db[lib_id] = lib_info
-
-            for lane in lib_info['lane_set']:
-                lane_key = (lane['flowcell'], lane['lane_number'])
-                candidate_lanes[lane_key] = (lib_id, lane['lane_id'])
-                seq_dirs.add(os.path.join(self.sequences_path,
-                                             'flowcells',
-                                             lane['flowcell']))
-        LOGGER.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
-        candidate_seq_list = scan_for_sequences(seq_dirs)
-
-        # at this point we have too many sequences as scan_for_sequences
-        # returns all the sequences in a flowcell directory
-        # so lets filter out the extras
-
-        for seq in candidate_seq_list:
-            lane_key = (seq.flowcell, seq.lane)
-            candidate_key = candidate_lanes.get(lane_key, None)
-            if candidate_key is not None:
-                lib_id, lane_id = candidate_key
-                seq.lane_id = lane_id
-                lib_info = lib_db[lib_id]
-                lib_info['lanes'].setdefault(lane_key, set()).add(seq)
-
-        return lib_db
-
-    def find_missing_targets(self, result_map, lib_db):
+            liburl = urljoin(self.host, 'library/%s/' % (lib_id,))
+            library = RDF.Node(RDF.Uri(liburl))
+            self.import_library(library)
+
+    def import_library(self, library):
+        """Import library data into our model if we don't have it already
+        """
+        q = RDF.Statement(library, rdfNS['type'], libraryOntology['library'])
+        if not self.model.contains_statement(q):
+            load_into_model(self.model, 'rdfa', library)
+
+    def find_relavant_flowcell_ids(self):
+        """Generate set of flowcell ids that had samples of interest on them
+        """
+        flowcell_query =RDF.SPARQLQuery("""
+prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#>
+
+select distinct ?library ?flowcell ?flowcell_id
+WHERE {
+  ?library a libns:library ;
+           libns:has_lane ?lane .
+  ?lane libns:flowcell ?flowcell .
+  ?flowcell libns:flowcell_id ?flowcell_id .
+}""")
+        flowcell_ids = set()
+        for r in flowcell_query.execute(self.model):
+            flowcell_ids.add( fromTypedNode(r['flowcell_id']) )
+        LOGGER.debug("Flowcells = %s" %(unicode(flowcell_ids)))
+        return flowcell_ids
+
+    def import_sequences(self, flowcell_ids):
+        seq_dirs = ( os.path.join(self.sequences_path, f)
+                     for f in flowcell_ids )
+        sequences = scan_for_sequences(seq_dirs)
+        for seq in sequences:
+            seq.save_to_model(self.model)
+        update_model_sequence_library(self.model, self.host)
+
+    def find_missing_targets(self, result_map, raw_files):
         """
         Check if the sequence file exists.
         This requires computing what the sequence name is and checking
         """
         Check if the sequence file exists.
         This requires computing what the sequence name is and checking
@@ -156,61 +207,48 @@ class CondorFastqExtract(object):
         fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
         # find what targets we're missing
         needed_targets = {}
         fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
         # find what targets we're missing
         needed_targets = {}
-        for lib_id in result_map.keys():
-            result_dir = result_map[lib_id]
-            lib = lib_db[lib_id]
-            lane_dict = make_lane_dict(lib_db, lib_id)
-
-            for lane_key, sequences in lib['lanes'].items():
-                for seq in sequences:
-                    seq.paired = lane_dict[seq.flowcell]['paired_end']
-                    lane_status = lane_dict[seq.flowcell]['status']
-
-                    if seq.paired and seq.read is None:
-                        seq.read = 1
-                    filename_attributes = {
-                        'flowcell': seq.flowcell,
-                        'lib_id': lib_id,
-                        'lane': seq.lane,
-                        'read': seq.read,
-                        'cycle': seq.cycle
-                        }
-                    # skip bad runs
-                    if lane_status == 'Failed':
-                        continue
-                    if seq.flowcell == '30DY0AAXX':
-                        # 30DY0 only ran for 151 bases instead of 152
-                        # it is actually 76 1st read, 75 2nd read
-                        seq.mid_point = 76
-
-                    # end filters
-                    if seq.paired:
-                        target_name = fastq_paired_template % \
-                                      filename_attributes
-                    else:
-                        target_name = fastq_single_template % \
-                                      filename_attributes
-
-                    target_pathname = os.path.join(result_dir, target_name)
-                    if self.force or not os.path.exists(target_pathname):
-                        t = needed_targets.setdefault(target_pathname, {})
-                        t.setdefault(seq.filetype, []).append(seq)
+        for seq in raw_files:
+            if not seq.isgood:
+                continue
+            filename_attributes = {
+                'flowcell': seq.flowcell_id,
+                'lib_id': seq.library_id,
+                'lane': seq.lane_number,
+                'read': seq.read,
+                'cycle': seq.cycle
+            }
+
+            if seq.ispaired:
+                target_name = fastq_paired_template % \
+                              filename_attributes
+            else:
+                target_name = fastq_single_template % \
+                              filename_attributes
 
 
-        return needed_targets
+            result_dir = result_map[seq.library_id]
+            target_pathname = os.path.join(result_dir, target_name)
+            if self.force or not os.path.exists(target_pathname):
+                t = needed_targets.setdefault(target_pathname, {})
+                t.setdefault(seq.filetype, []).append(seq)
 
 
+        return needed_targets
 
     def condor_srf_to_fastq(self, sources, target_pathname):
         if len(sources) > 1:
             raise ValueError("srf to fastq can only handle one file")
 
 
     def condor_srf_to_fastq(self, sources, target_pathname):
         if len(sources) > 1:
             raise ValueError("srf to fastq can only handle one file")
 
+        mid_point = None
+        if sources[0].flowcell_id == '30DY0AAXX':
+            mid_point = 76
+
         return {
         return {
-            'sources': [os.path.abspath(sources[0].path)],
+            'sources': [sources[0].path],
             'pyscript': srf2fastq.__file__,
             'pyscript': srf2fastq.__file__,
-            'flowcell': sources[0].flowcell,
-            'ispaired': sources[0].paired,
+            'flowcell': sources[0].flowcell_id,
+            'ispaired': sources[0].ispaired,
             'target': target_pathname,
             'target_right': target_pathname.replace('_r1.fastq', '_r2.fastq'),
             'target': target_pathname,
             'target_right': target_pathname.replace('_r1.fastq', '_r2.fastq'),
-            'mid': getattr(sources[0], 'mid_point', None),
+            'mid': mid_point,
             'force': self.force,
         }
 
             'force': self.force,
         }
 
@@ -221,10 +259,10 @@ class CondorFastqExtract(object):
         paths.sort()
         return {
             'pyscript': qseq2fastq.__file__,
         paths.sort()
         return {
             'pyscript': qseq2fastq.__file__,
-            'flowcell': sources[0].flowcell,
+            'flowcell': sources[0].flowcell_id,
             'target': target_pathname,
             'sources': paths,
             'target': target_pathname,
             'sources': paths,
-            'ispaired': sources[0].paired,
+            'ispaired': sources[0].ispaired,
             'istar': len(sources) == 1,
         }
 
             'istar': len(sources) == 1,
         }
 
@@ -237,11 +275,9 @@ class CondorFastqExtract(object):
             'pyscript': desplit_fastq.__file__,
             'target': target_pathname,
             'sources': paths,
             'pyscript': desplit_fastq.__file__,
             'target': target_pathname,
             'sources': paths,
-            'ispaired': sources[0].paired,
+            'ispaired': sources[0].ispaired,
         }
 
         }
 
-    def lane_rdf(self, sources, target_pathname):
-        pass
 
 def make_lane_dict(lib_db, lib_id):
     """
 
 def make_lane_dict(lib_db, lib_id):
     """
@@ -255,3 +291,48 @@ def make_lane_dict(lib_db, lib_id):
         result.append((lane['flowcell'], lane))
     return dict(result)
 
         result.append((lane['flowcell'], lane))
     return dict(result)
 
+class SequenceResult(object):
+    """Convert the sparql query result from find_archive_sequence_files
+    """
+    def __init__(self, result):
+        self.filenode = result['filenode']
+        self._filetype = result['filetype']
+        self.cycle = fromTypedNode(result['cycle'])
+        self.lane_number = fromTypedNode(result['lane_number'])
+        self.read = fromTypedNode(result['read'])
+        if type(self.read) in types.StringTypes:
+            self.read = 1
+        self.library = result['library']
+        self.library_id = fromTypedNode(result['library_id'])
+        self.flowcell = result['flowcell']
+        self.flowcell_id = fromTypedNode(result['flowcell_id'])
+        self.flowcell_type = fromTypedNode(result['flowcell_type'])
+        self.flowcell_status = fromTypedNode(result['flowcell_status'])
+
+    def _is_good(self):
+        """is this sequence / flowcell 'good enough'"""
+        if self.flowcell_status is not None and \
+           self.flowcell_status.lower() == "failed":
+            return False
+        return True
+    isgood = property(_is_good)
+
+    def _get_ispaired(self):
+        if self.flowcell_type.lower() == "paired":
+            return True
+        else:
+            return False
+    ispaired = property(_get_ispaired)
+
+    def _get_filetype(self):
+        return stripNamespace(libraryOntology, self._filetype)
+    filetype = property(_get_filetype)
+
+    def _get_path(self):
+        url = urlparse(str(self.filenode.uri))
+        if url.scheme == 'file':
+            return url.path
+        else:
+            errmsg = u"Unsupported scheme {0} for {1}"
+            raise ValueError(errmsg.format(url.scheme, unicode(url)))
+    path = property(_get_path)
index 6aaf5846de8f3eb86e4f73145e487c73bc2ae914..24d6e4982018dd23ac8e8746d7ade1856ae3d4e7 100644 (file)
@@ -7,8 +7,9 @@ import shutil
 import tempfile
 import unittest
 
 import tempfile
 import unittest
 
-from htsworkflow.submission import condorfastq
+from htsworkflow.submission.condorfastq import CondorFastqExtract
 from htsworkflow.submission.results import ResultMap
 from htsworkflow.submission.results import ResultMap
+from htsworkflow.util.rdfhelp import load_string_into_model, dump_model
 
 FCDIRS = [
     'C02F9ACXX',
 
 FCDIRS = [
     'C02F9ACXX',
@@ -16,6 +17,7 @@ FCDIRS = [
     'C02F9ACXX/C1-202/Project_11154',
     'C02F9ACXX/C1-202/Project_12342_Index1',
     'C02F9ACXX/C1-202/Project_12342_Index2',
     'C02F9ACXX/C1-202/Project_11154',
     'C02F9ACXX/C1-202/Project_12342_Index1',
     'C02F9ACXX/C1-202/Project_12342_Index2',
+    'C02F9ACXX/C1-202/Project_12345',
     '42JUYAAXX',
     '42JUYAAXX/C1-76',
     '30221AAXX',
     '42JUYAAXX',
     '42JUYAAXX/C1-76',
     '30221AAXX',
@@ -31,9 +33,18 @@ DATAFILES = [
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
     'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
-    'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz',
-    'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz',
-    'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
+    'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
     '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
@@ -77,77 +88,288 @@ DATAFILES = [
     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
 ]
 
     '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
 ]
 
-LIBDATA = {
-    '11154':{u'antibody_id': None,
-             u'cell_line': u'Unknown',
-             u'cell_line_id': 1,
-             u'experiment_type': u'RNA-seq',
-             u'experiment_type_id': 4,
-             u'gel_cut_size': 300,
-             u'hidden': False,
-             u'id': u'11154',
-             u'insert_size': 200,
-             u'lane_set': [{u'flowcell': u'30221AAXX',
-                            u'lane_number': 4,
-                            u'lane_id': 3400,
-                            u'paired_end': False,
-                            u'read_length': 33,
-                            u'status': u'Unknown',
-                            u'status_code': None},
-                           {u'flowcell': u'42JUYAAXX',
-                            u'lane_number': 5,
-                            u'lane_id': 4200,
-                            u'paired_end': True,
-                            u'read_length': 76,
-                            u'status': u'Unknown',
-                            u'status_code': None},
-                           {u'flowcell': u'61MJTAAXX',
-                            u'lane_number': 6,
-                            u'lane_id': 6600,
-                            u'paired_end': False,
-                            u'read_length': 76,
-                            u'status': u'Unknown',
-                            u'status_code': None},
-                           {u'flowcell': u'30DY0AAXX',
-                            u'lane_number': 8,
-                            u'lane_id': 3800,
-                            u'paired_end': True,
-                            u'read_length': 76,
-                            u'status': u'Unknown',
-                            u'status_code': None},
-                           {u'flowcell': u'C02F9ACXX',
-                            u'lane_number': 3,
-                            u'lane_id': 12300,
-                            u'paired_end': True,
-                            u'read_length': 101,
-                            u'status': u'Unknown',
-                            u'status_code': None}],
-             u'library_id': u'11154',
-             u'library_name': u'Paired ends ASDF ',
-             u'library_species': u'Mus musculus',
-             u'library_species_id': 9,
-             u'library_type': u'Paired End (non-multiplexed)',
-             u'library_type_id': 2,
-             u'made_by': u'Gary Gygax',
-             u'made_for': u'TSR',
-             u'notes': u'300 bp gel fragment',
-             u'replicate': 1,
-             u'stopping_point': u'1Aa',
-             u'successful_pM': None,
-             u'undiluted_concentration': u'29.7'}
-    }
-
-FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
-
-class FakeApi(object):
-    def __init__(self, *args, **kwargs):
-        self.root_url = 'http://localhost'
-
-    def get_library(self, libid):
-        lib_data = LIBDATA[libid]
-        return copy.deepcopy(lib_data)
-
-
+lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
+
+<http://localhost/flowcell/30221AAXX/>
+        a libns:illumina_flowcell ;
+        libns:read_length 33 ;
+        libns:flowcell_type "Single"@en ;
+        libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+        libns:has_lane <http://localhost/lane/3401> ;
+        libns:has_lane <http://localhost/lane/3402> ;
+        libns:has_lane <http://localhost/lane/3403> ;
+        libns:has_lane <http://localhost/lane/3404> ;
+        libns:has_lane <http://localhost/lane/3405> ;
+        libns:has_lane <http://localhost/lane/3406> ;
+        libns:has_lane <http://localhost/lane/3407> ;
+        libns:has_lane <http://localhost/lane/3408> ;
+        libns:flowcell_id "30221AAXX"@en .
+
+<http://localhost/lane/3401>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 1 .
+<http://localhost/lane/3402>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 2 .
+<http://localhost/lane/3403>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 3 .
+<http://localhost/lane/3404>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/11154/> ;
+        libns:lane_number 4 .
+        # paired_end 1;
+        # read_length 33;
+        # status "Unknown"@en .
+<http://localhost/lane/3405>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 5 .
+<http://localhost/lane/3406>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 6 .
+<http://localhost/lane/3407>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 7 .
+<http://localhost/lane/3408>
+        libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+        libns:library <http://localhost/library/10000/> ;
+        libns:lane_number 8 .
+
+<http://localhost/flowcell/42JUYAAXX/>
+        a libns:illumina_flowcell ;
+        libns:read_length 76 ;
+        libns:flowcell_type "Paired"@en ;
+        libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+        libns:has_lane <http://localhost/lane/4201> ;
+        libns:has_lane <http://localhost/lane/4202> ;
+        libns:has_lane <http://localhost/lane/4203> ;
+        libns:has_lane <http://localhost/lane/4204> ;
+        libns:has_lane <http://localhost/lane/4205> ;
+        libns:has_lane <http://localhost/lane/4206> ;
+        libns:has_lane <http://localhost/lane/4207> ;
+        libns:has_lane <http://localhost/lane/4208> ;
+        libns:flowcell_id "42JUYAAXX"@en .
+
+<http://localhost/lane/4201>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 1 .
+<http://localhost/lane/4202>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 2 .
+<http://localhost/lane/4203>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 3 .
+<http://localhost/lane/4204>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 4 .
+<http://localhost/lane/4205>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/11154/> ;
+        libns:lane_number 5 .
+        # paired_end 1;
+        # read_length 76;
+        # status "Unknown"@en .
+<http://localhost/lane/4206>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 6 .
+<http://localhost/lane/4207>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 7 .
+<http://localhost/lane/4208>
+        libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+        libns:library <http://localhost/library/1421/> ;
+        libns:lane_number 8 .
+
+<http://localhost/flowcell/61MJTAAXX/>
+        a libns:illumina_flowcell ;
+        libns:read_length 76 ;
+        libns:flowcell_type "Single"@en ;
+        libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+        libns:has_lane <http://localhost/lane/6601> ;
+        libns:has_lane <http://localhost/lane/6602> ;
+        libns:has_lane <http://localhost/lane/6603> ;
+        libns:has_lane <http://localhost/lane/6604> ;
+        libns:has_lane <http://localhost/lane/6605> ;
+        libns:has_lane <http://localhost/lane/6606> ;
+        libns:has_lane <http://localhost/lane/6607> ;
+        libns:has_lane <http://localhost/lane/6608> ;
+        libns:flowcell_id "61MJTAAXX"@en .
+
+<http://localhost/lane/6601>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 1 .
+<http://localhost/lane/6602>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 2 .
+<http://localhost/lane/6603>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 3 .
+<http://localhost/lane/6604>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 4 .
+<http://localhost/lane/6605>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 5 .
+<http://localhost/lane/6606>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/11154/> ;
+        libns:lane_number 6 .
+        # paired_end 1;
+        # read_length 76;
+        # status "Unknown"@en .
+<http://localhost/lane/6607>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 7 .
+<http://localhost/lane/6608>
+        libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+        libns:library <http://localhost/library/1661/> ;
+        libns:lane_number 8 .
+
+<http://localhost/flowcell/30DY0AAXX/>
+        a libns:illumina_flowcell ;
+        libns:read_length 76 ;
+        libns:flowcell_type "Paired"@en ;
+        libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+        libns:has_lane <http://localhost/lane/3801> ;
+        libns:has_lane <http://localhost/lane/3802> ;
+        libns:has_lane <http://localhost/lane/3803> ;
+        libns:has_lane <http://localhost/lane/3804> ;
+        libns:has_lane <http://localhost/lane/3805> ;
+        libns:has_lane <http://localhost/lane/3806> ;
+        libns:has_lane <http://localhost/lane/3807> ;
+        libns:has_lane <http://localhost/lane/3808> ;
+        libns:flowcell_id "30DY0AAXX"@en .
+
+<http://localhost/lane/3801>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 1 .
+<http://localhost/lane/3802>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 2 .
+<http://localhost/lane/3803>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 3 .
+<http://localhost/lane/3804>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 4 .
+<http://localhost/lane/3805>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 5 .
+<http://localhost/lane/3806>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 6 .
+<http://localhost/lane/3807>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/1331/> ;
+        libns:lane_number 7 .
+<http://localhost/lane/3808>
+        libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+        libns:library <http://localhost/library/11154/> ;
+        libns:lane_number 8 .
+        # paired_end 1;
+        # read_length 76;
+        # status "Unknown"@en .
+
+<http://localhost/flowcell/C02F9ACXX/>
+        a libns:illumina_flowcell ;
+        libns:read_length 101 ;
+        libns:flowcell_type "Paired"@en ;
+        libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+        libns:has_lane <http://localhost/lane/12300> ;
+        libns:has_lane <http://localhost/lane/12500> ;
+        libns:flowcell_id "C02F9ACXX"@en .
+
+<http://localhost/lane/12300>
+        libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+        libns:library <http://localhost/library/12345/> ;
+        libns:lane_number 3 .
+        # paired_end 1;
+        # read_length 101;
+        # status "Unknown"@en .
+
+<http://localhost/lane/12500>
+        libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+        libns:library <http://localhost/library/11154/> ;
+        libns:lane_number 3 .
+        # paired_end 1;
+        # read_length 101;
+        # status "Unknown"@en .
+
+<http://localhost/library/11154/>
+        a libns:library ;
+        libns:affiliation "TSR"@en;
+        libns:concentration "29.7";
+        libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+        libns:experiment_type "RNA-seq"@en ;
+        libns:gel_cut 300 ;
+        libns:has_lane <http://localhost/lane/3404> ;
+        libns:has_lane <http://localhost/lane/4205> ;
+        libns:has_lane <http://localhost/lane/6606> ;
+        libns:has_lane <http://localhost/lane/3808> ;
+        libns:has_lane <http://localhost/lane/12500> ;
+        libns:insert_size 2000 ;
+        libns:library_id "11154"@en ;
+        libns:library_type "Paired End (Multiplexed)"@en ;
+        libns:made_by "Gary Gygax"@en ;
+        libns:name "Paired Ends ASDF"@en ;
+        libns:replicate "1"@en;
+        libns:species "Mus musculus"@en ;
+        libns:stopping_point "Completed"@en ;
+        libns:total_unique_locations 8841201 .
+        # cell_line
+
+
+<http://localhost/library/12345/>
+        a libns:library ;
+        libns:affiliation "TSR"@en;
+        libns:concentration "12.345";
+        libns:cell_line "Unknown"@en ;
+        libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+        libns:experiment_type "RNA-seq"@en ;
+        libns:gel_cut 300 ;
+        libns:has_lane <http://localhost/lane/12300> ;
+        libns:insert_size 2000 ;
+        libns:library_id "12345"@en ;
+        libns:library_type "Paired End (Multiplexed)"@en ;
+        libns:made_by "Gary Gygax"@en ;
+        libns:name "Paired Ends THING"@en ;
+        libns:replicate "1"@en;
+        libns:species "Mus musculus"@en ;
+        libns:stopping_point "Completed"@en ;
+        libns:total_unique_locations 8841201 .
+        # cell_line
+"""
+HOST = "http://localhost"
 
 class TestCondorFastq(unittest.TestCase):
     def setUp(self):
 
 class TestCondorFastq(unittest.TestCase):
     def setUp(self):
@@ -168,141 +390,146 @@ class TestCondorFastq(unittest.TestCase):
             with open(filename, 'w') as stream:
                 stream.write('testfile')
 
             with open(filename, 'w') as stream:
                 stream.write('testfile')
 
-        self.subname = unicode('sub-11154')
-        self.subdir = os.path.join(self.tempdir, self.subname)
-        os.mkdir(self.subdir)
-
         self.result_map = ResultMap()
         self.result_map = ResultMap()
-        self.result_map['11154'] = self.subname
+        for lib_id in [u'11154', u'12345']:
+            subname = 'sub-%s' % (lib_id,)
+            sub_dir = os.path.join(self.tempdir, subname)
+            os.mkdir(sub_dir)
+            self.result_map[lib_id] =  sub_dir
+
+        self.extract = CondorFastqExtract(HOST,
+                                          self.flowcelldir,
+                                          self.logdir)
+        load_string_into_model(self.extract.model, 'turtle', lib_turtle)
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
 
 
     def tearDown(self):
         shutil.rmtree(self.tempdir)
         os.chdir(self.cwd)
 
+    def test_find_relavant_flowcell_ids(self):
+        expected = set(('30221AAXX',
+                        '42JUYAAXX',
+                        '61MJTAAXX',
+                        '30DY0AAXX',
+                        'C02F9ACXX'))
+        flowcell_ids = self.extract.find_relavant_flowcell_ids()
+        self.assertEqual(flowcell_ids, expected)
+
     def test_find_archive_sequence(self):
     def test_find_archive_sequence(self):
-        extract = condorfastq.CondorFastqExtract('host',
-                                                 FAKE_APIDATA,
-                                                 self.tempdir,
-                                                 self.logdir)
-        extract.api = FakeApi()
-
-        lib_db = extract.find_archive_sequence_files(self.result_map)
-
-        self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
-        lanes = [
-            lib_db['11154']['lanes'][(u'30221AAXX', 4)],
-            lib_db['11154']['lanes'][(u'42JUYAAXX', 5)],
-            lib_db['11154']['lanes'][(u'61MJTAAXX', 6)],
-            lib_db['11154']['lanes'][(u'30DY0AAXX', 8)],
-            lib_db['11154']['lanes'][(u'C02F9ACXX', 3)],
-        ]
-        self.failUnlessEqual(len(lanes[0]), 1)
-        self.failUnlessEqual(len(lanes[1]), 2)
-        self.failUnlessEqual(len(lanes[2]), 1)
-        self.failUnlessEqual(len(lanes[3]), 1)
-        self.failUnlessEqual(len(lanes[4]), 4)
+        seqs = self.extract.find_archive_sequence_files(self.result_map)
+
+        expected = set([
+            (u'11154', u'42JUYAAXX', 5, 1, 76, True, 'qseq'),
+            (u'11154', u'42JUYAAXX', 5, 2, 76, True, 'qseq'),
+            (u'11154', u'61MJTAAXX', 6, 1, 76, False, 'qseq'),
+            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+            (u'11154', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 1, 202, True, 'split_fastq'),
+            (u'12345', u'C02F9ACXX', 3, 2, 202, True, 'split_fastq'),
+            (u'11154', u'30221AAXX', 4, 1, 33, False, 'srf'),
+            (u'11154', u'30DY0AAXX', 8, 1, 151, True, 'srf')
+        ])
+        found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
+        self.assertEqual(expected, found)
 
     def test_find_needed_targets(self):
 
     def test_find_needed_targets(self):
+        lib_db = self.extract.find_archive_sequence_files(self.result_map)
 
 
-        extract = condorfastq.CondorFastqExtract('host',
-                                                 FAKE_APIDATA,
-                                                 self.tempdir,
-                                                 self.logdir)
-        extract.api = FakeApi()
-        lib_db = extract.find_archive_sequence_files(self.result_map)
-
-        needed_targets = extract.find_missing_targets(self.result_map,
-                                                      lib_db)
-        self.failUnlessEqual(len(needed_targets), 7)
+        needed_targets = self.extract.find_missing_targets(self.result_map,
+                                                           lib_db)
+        self.assertEqual(len(needed_targets), 9)
         srf_30221 = needed_targets[
         srf_30221 = needed_targets[
-            self.subname + u'/11154_30221AAXX_c33_l4.fastq']
+            self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
         qseq_42JUY_r1 = needed_targets[
         qseq_42JUY_r1 = needed_targets[
-            self.subname + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
+            self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
         qseq_42JUY_r2 = needed_targets[
         qseq_42JUY_r2 = needed_targets[
-            self.subname + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
+            self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
         qseq_61MJT = needed_targets[
         qseq_61MJT = needed_targets[
-            self.subname + u'/11154_61MJTAAXX_c76_l6.fastq']
+            self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
         split_C02F9_r1 = needed_targets[
         split_C02F9_r1 = needed_targets[
-            self.subname + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
+            self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
         split_C02F9_r2 = needed_targets[
         split_C02F9_r2 = needed_targets[
-            self.subname + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
+            self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
 
 
-        self.failUnlessEqual(len(srf_30221['srf']), 1)
-        self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1)
-        self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1)
-        self.failUnlessEqual(len(qseq_61MJT['qseq']), 1)
-        self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2)
-        self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2)
-
-        #print '-------needed targets---------'
-        #pprint(needed_targets)
+        self.assertEqual(len(srf_30221['srf']), 1)
+        self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
+        self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
+        self.assertEqual(len(qseq_61MJT['qseq']), 1)
+        self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
+        self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
 
     def test_generate_fastqs(self):
 
     def test_generate_fastqs(self):
-        extract = condorfastq.CondorFastqExtract('host',
-                                                 FAKE_APIDATA,
-                                                 self.tempdir,
-                                                 self.logdir)
-        extract.api = FakeApi()
-        commands = extract.build_condor_arguments(self.result_map)
+        commands = self.extract.build_condor_arguments(self.result_map)
 
         srf = commands['srf']
         qseq = commands['qseq']
         split = commands['split_fastq']
 
 
         srf = commands['srf']
         qseq = commands['qseq']
         split = commands['split_fastq']
 
-        self.failUnlessEqual(len(srf), 2)
-        self.failUnlessEqual(len(qseq), 3)
-        self.failUnlessEqual(len(split), 2)
+        self.assertEqual(len(srf), 2)
+        self.assertEqual(len(qseq), 3)
+        self.assertEqual(len(split), 4)
 
         srf_data = {
 
         srf_data = {
-            os.path.join(self.subname, '11154_30221AAXX_c33_l4.fastq'): {
+            os.path.join(self.result_map['11154'],
+                         '11154_30221AAXX_c33_l4.fastq'): {
                 'mid': None,
                 'ispaired': False,
                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                 'flowcell': u'30221AAXX',
                 'mid': None,
                 'ispaired': False,
                 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
                 'flowcell': u'30221AAXX',
-                'target': os.path.join(self.subname,
+                'target': os.path.join(self.result_map['11154'],
                                        u'11154_30221AAXX_c33_l4.fastq'),
             },
                                        u'11154_30221AAXX_c33_l4.fastq'),
             },
-            os.path.join(self.subname, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
+            os.path.join(self.result_map['11154'],
+                         '11154_30DY0AAXX_c151_l8_r1.fastq'): {
                 'mid': None,
                 'ispaired': True,
                 'flowcell': u'30DY0AAXX',
                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                 'mid': 76,
                 'target':
                 'mid': None,
                 'ispaired': True,
                 'flowcell': u'30DY0AAXX',
                 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
                 'mid': 76,
                 'target':
-                    os.path.join(self.subname,
+                    os.path.join(self.result_map['11154'],
                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                 'target_right':
                                  u'11154_30DY0AAXX_c151_l8_r1.fastq'),
                 'target_right':
-                    os.path.join(self.subname,
+                    os.path.join(self.result_map['11154'],
                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
             }
         }
         for args in srf:
             expected = srf_data[args['target']]
                                  u'11154_30DY0AAXX_c151_l8_r2.fastq'),
             }
         }
         for args in srf:
             expected = srf_data[args['target']]
-            self.failUnlessEqual(args['ispaired'], expected['ispaired'])
-            self.failUnlessEqual(len(args['sources']), 1)
+            self.assertEqual(args['ispaired'], expected['ispaired'])
+            self.assertEqual(len(args['sources']), 1)
             _, source_filename = os.path.split(args['sources'][0])
             _, source_filename = os.path.split(args['sources'][0])
-            self.failUnlessEqual(source_filename, expected['sources'][0])
-            self.failUnlessEqual(args['target'], expected['target'])
+            self.assertEqual(source_filename, expected['sources'][0])
+            self.assertEqual(args['target'], expected['target'])
             if args['ispaired']:
             if args['ispaired']:
-                self.failUnlessEqual(args['target_right'],
+                self.assertEqual(args['target_right'],
                                      expected['target_right'])
             if 'mid' in expected:
                                      expected['target_right'])
             if 'mid' in expected:
-                self.failUnlessEqual(args['mid'], expected['mid'])
+                self.assertEqual(args['mid'], expected['mid'])
 
         qseq_data = {
 
         qseq_data = {
-            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
+            os.path.join(self.result_map['11154'],
+                         '11154_42JUYAAXX_c76_l5_r1.fastq'): {
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
             },
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
             },
-            os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
+            os.path.join(self.result_map['11154'],
+                         '11154_42JUYAAXX_c76_l5_r2.fastq'): {
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
             },
                 'istar': True,
                 'ispaired': True,
                 'sources': [
                     u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
             },
-            os.path.join(self.subname, '11154_61MJTAAXX_c76_l6.fastq'): {
+            os.path.join(self.result_map['11154'],
+                         '11154_61MJTAAXX_c76_l6.fastq'): {
                 'istar': True,
                 'ispaired': False,
                 'sources': [
                 'istar': True,
                 'ispaired': False,
                 'sources': [
@@ -311,11 +538,11 @@ class TestCondorFastq(unittest.TestCase):
         }
         for args in qseq:
             expected = qseq_data[args['target']]
         }
         for args in qseq:
             expected = qseq_data[args['target']]
-            self.failUnlessEqual(args['istar'], expected['istar'])
-            self.failUnlessEqual(args['ispaired'], expected['ispaired'])
+            self.assertEqual(args['istar'], expected['istar'])
+            self.assertEqual(args['ispaired'], expected['ispaired'])
             for i in range(len(expected['sources'])):
                 _, filename = os.path.split(args['sources'][i])
             for i in range(len(expected['sources'])):
                 _, filename = os.path.split(args['sources'][i])
-                self.failUnlessEqual(filename, expected['sources'][i])
+                self.assertEqual(filename, expected['sources'][i])
 
 
         split_test = dict((( x['target'], x) for x in
 
 
         split_test = dict((( x['target'], x) for x in
@@ -326,64 +553,82 @@ class TestCondorFastq(unittest.TestCase):
             {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
                          u'11154_NoIndex_L003_R2_002.fastq.gz'],
              'pyscript': 'desplit_fastq.pyc',
             {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
                          u'11154_NoIndex_L003_R2_002.fastq.gz'],
              'pyscript': 'desplit_fastq.pyc',
-             'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}]
+             'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
+            {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
+                         u'12345_CGATGT_L003_R1_002.fastq.gz',
+                         u'12345_CGATGT_L003_R1_003.fastq.gz',
+                         ],
+             'pyscript': 'desplit_fastq.pyc',
+             'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
+            {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
+                         u'12345_CGATGT_L003_R2_002.fastq.gz',
+                         u'12345_CGATGT_L003_R2_003.fastq.gz',
+                         ],
+             'pyscript': 'desplit_fastq.pyc',
+             'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
+             ]
          ))
         for arg in split:
             _, target = os.path.split(arg['target'])
             pyscript = split_test[target]['pyscript']
          ))
         for arg in split:
             _, target = os.path.split(arg['target'])
             pyscript = split_test[target]['pyscript']
-            self.failUnless(arg['pyscript'].endswith(pyscript))
+            self.assertTrue(arg['pyscript'].endswith(pyscript))
             filename = split_test[target]['target']
             filename = split_test[target]['target']
-            self.failUnless(arg['target'].endswith(filename))
+            self.assertTrue(arg['target'].endswith(filename))
             for s_index in range(len(arg['sources'])):
                 s1 = arg['sources'][s_index]
                 s2 = split_test[target]['sources'][s_index]
             for s_index in range(len(arg['sources'])):
                 s1 = arg['sources'][s_index]
                 s2 = split_test[target]['sources'][s_index]
-                self.failUnless(s1.endswith(s2))
-
-        #print '-------commands---------'
-        #pprint (commands)
+                self.assertTrue(s1.endswith(s2))
 
     def test_create_scripts(self):
 
     def test_create_scripts(self):
-        os.chdir(self.tempdir)
-        extract = condorfastq.CondorFastqExtract('host',
-                                                 FAKE_APIDATA,
-                                                 self.tempdir,
-                                                 self.logdir)
-        extract.api = FakeApi()
-        extract.create_scripts(self.result_map)
-
-        self.failUnless(os.path.exists('srf.condor'))
+        self.extract.create_scripts(self.result_map)
+
+        self.assertTrue(os.path.exists('srf.condor'))
         with open('srf.condor', 'r') as srf:
             arguments = [ l for l in srf if l.startswith('argument') ]
             arguments.sort()
         with open('srf.condor', 'r') as srf:
             arguments = [ l for l in srf if l.startswith('argument') ]
             arguments.sort()
-            self.failUnlessEqual(len(arguments), 2)
-            self.failUnless('--single sub-11154/11154_30221AAXX_c33_l4.fastq'
+            self.assertEqual(len(arguments), 2)
+            self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
                             in arguments[0])
                             in arguments[0])
-            self.failUnless(
-                '--right sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
+            self.assertTrue(
+                'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
                 arguments[1])
 
                 arguments[1])
 
-        self.failUnless(os.path.exists('qseq.condor'))
+        self.assertTrue(os.path.exists('qseq.condor'))
         with open('qseq.condor', 'r') as srf:
             arguments = [ l for l in srf if l.startswith('argument') ]
             arguments.sort()
         with open('qseq.condor', 'r') as srf:
             arguments = [ l for l in srf if l.startswith('argument') ]
             arguments.sort()
-            self.failUnlessEqual(len(arguments), 3)
-            self.failUnless('-o sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
+            self.assertEqual(len(arguments), 3)
+            self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
                             arguments[0])
                             arguments[0])
-            self.failUnless(
+            self.assertTrue(
                 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
                 arguments[1])
                 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
                 arguments[1])
-            self.failUnless('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
+            self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
                             arguments[2])
 
                             arguments[2])
 
-        self.failUnless(os.path.exists('split_fastq.condor'))
+        self.assertTrue(os.path.exists('split_fastq.condor'))
         with open('split_fastq.condor', 'r') as split:
             arguments = [ l for l in split if l.startswith('argument') ]
             arguments.sort()
         with open('split_fastq.condor', 'r') as split:
             arguments = [ l for l in split if l.startswith('argument') ]
             arguments.sort()
-            self.failUnlessEqual(len(arguments), 2)
-            self.failUnless('11154_NoIndex_L003_R1_001.fastq.gz' in \
+            self.assertEqual(len(arguments), 4)
+            # Lane 3 Read 1
+            self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
                             arguments[0])
                             arguments[0])
-            self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \
+            # Lane 3 Read 2
+            self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
                             arguments[1])
                             arguments[1])
+            # Lane 3 Read 1
+            self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
+            self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
+            self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
+            self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
+
+            # Lane 3 Read 2
+            self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
+            self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
+            self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
+            self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
+
 
 def suite():
     suite = unittest.makeSuite(TestCondorFastq, 'test')
 
 def suite():
     suite = unittest.makeSuite(TestCondorFastq, 'test')