from pprint import pprint
import shutil
import tempfile
-import unittest
-from htsworkflow.submission import condorfastq
+from django.test import TestCase
+from django.test.utils import setup_test_environment, \
+ teardown_test_environment
+from django.db import connection
+from django.conf import settings
+
+from htsworkflow.submission.condorfastq import CondorFastqExtract
+from htsworkflow.submission.results import ResultMap
+from htsworkflow.util.rdfhelp import \
+ add_default_schemas, load_string_into_model, dump_model
+from htsworkflow.util.rdfinfer import Infer
FCDIRS = [
'C02F9ACXX',
'C02F9ACXX/C1-202/Project_11154',
'C02F9ACXX/C1-202/Project_12342_Index1',
'C02F9ACXX/C1-202/Project_12342_Index2',
+ 'C02F9ACXX/C1-202/Project_12345',
'42JUYAAXX',
'42JUYAAXX/C1-76',
'30221AAXX',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz',
- 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz',
+ 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
'42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
'61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
]
-LIBDATA = {
- '11154':{u'antibody_id': None,
- u'cell_line': u'Unknown',
- u'cell_line_id': 1,
- u'experiment_type': u'RNA-seq',
- u'experiment_type_id': 4,
- u'gel_cut_size': 300,
- u'hidden': False,
- u'id': u'11154',
- u'insert_size': 200,
- u'lane_set': [{u'flowcell': u'30221AAXX',
- u'lane_number': 4,
- u'paired_end': False,
- u'read_length': 33,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'42JUYAAXX',
- u'lane_number': 5,
- u'paired_end': True,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'61MJTAAXX',
- u'lane_number': 6,
- u'paired_end': False,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'30DY0AAXX',
- u'lane_number': 8,
- u'paired_end': True,
- u'read_length': 76,
- u'status': u'Unknown',
- u'status_code': None},
- {u'flowcell': u'C02F9ACXX',
- u'lane_number': 3,
- u'paired_end': True,
- u'read_length': 101,
- u'status': u'Unknown',
- u'status_code': None}],
- u'library_id': u'11154',
- u'library_name': u'Paired ends ASDF ',
- u'library_species': u'Mus musculus',
- u'library_species_id': 9,
- u'library_type': u'Paired End (non-multiplexed)',
- u'library_type_id': 2,
- u'made_by': u'Gary Gygax',
- u'made_for': u'TSR',
- u'notes': u'300 bp gel fragment',
- u'replicate': 1,
- u'stopping_point': u'1Aa',
- u'successful_pM': None,
- u'undiluted_concentration': u'29.7'}
- }
-
-FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
-
-class FakeApi(object):
- def __init__(self, *args, **kwargs):
- pass
-
- def get_library(self, libid):
- lib_data = LIBDATA[libid]
- return copy.deepcopy(lib_data)
-
-class TestCondorFastq(unittest.TestCase):
+lib_turtle = """@prefix : <http://www.w3.org/1999/xhtml> .
+@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
+@prefix dc: <http://purl.org/dc/elements/1.1/> .
+@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .
+@prefix libns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix seqns: <http://jumpgate.caltech.edu/wiki/LibraryOntology#> .
+@prefix invns: <http://jumpgate.caltech.edu/wiki/InventoryOntology#> .
+
+<http://localhost/library/10000/> a libns:Library .
+<http://localhost/library/1331/> a libns:Library .
+<http://localhost/library/1421/> a libns:Library .
+<http://localhost/library/1661/> a libns:Library .
+
+<http://localhost/flowcell/30221AAXX/>
+ a libns:IlluminaFlowcell ;
+ libns:read_length 33 ;
+ libns:flowcell_type "Single"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/3401> ;
+ libns:has_lane <http://localhost/lane/3402> ;
+ libns:has_lane <http://localhost/lane/3403> ;
+ libns:has_lane <http://localhost/lane/3404> ;
+ libns:has_lane <http://localhost/lane/3405> ;
+ libns:has_lane <http://localhost/lane/3406> ;
+ libns:has_lane <http://localhost/lane/3407> ;
+ libns:has_lane <http://localhost/lane/3408> ;
+ libns:flowcell_id "30221AAXX"@en .
+
+<http://localhost/lane/3401>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "1" .
+<http://localhost/lane/3402>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "2" .
+<http://localhost/lane/3403>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "3" .
+<http://localhost/lane/3404>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number "4" .
+ # paired_end 1;
+ # read_length 33;
+ # status "Unknown"@en .
+<http://localhost/lane/3405>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "5" .
+<http://localhost/lane/3406>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "6" .
+<http://localhost/lane/3407>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "7" .
+<http://localhost/lane/3408>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30221AAXX/> ;
+ libns:library <http://localhost/library/10000/> ;
+ libns:lane_number "8" .
+
+<http://localhost/flowcell/42JUYAAXX/>
+ a libns:IlluminaFlowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/4201> ;
+ libns:has_lane <http://localhost/lane/4202> ;
+ libns:has_lane <http://localhost/lane/4203> ;
+ libns:has_lane <http://localhost/lane/4204> ;
+ libns:has_lane <http://localhost/lane/4205> ;
+ libns:has_lane <http://localhost/lane/4206> ;
+ libns:has_lane <http://localhost/lane/4207> ;
+ libns:has_lane <http://localhost/lane/4208> ;
+ libns:flowcell_id "42JUYAAXX"@en .
+
+<http://localhost/lane/4201>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "1" .
+<http://localhost/lane/4202>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "2" .
+<http://localhost/lane/4203>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "3" .
+<http://localhost/lane/4204>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "4" .
+<http://localhost/lane/4205>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number "5" .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+<http://localhost/lane/4206>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "6" .
+<http://localhost/lane/4207>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "7" .
+<http://localhost/lane/4208>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/42JUYAAXX/> ;
+ libns:library <http://localhost/library/1421/> ;
+ libns:lane_number "8" .
+
+<http://localhost/flowcell/61MJTAAXX/>
+ a libns:IlluminaFlowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Single"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/6601> ;
+ libns:has_lane <http://localhost/lane/6602> ;
+ libns:has_lane <http://localhost/lane/6603> ;
+ libns:has_lane <http://localhost/lane/6604> ;
+ libns:has_lane <http://localhost/lane/6605> ;
+ libns:has_lane <http://localhost/lane/6606> ;
+ libns:has_lane <http://localhost/lane/6607> ;
+ libns:has_lane <http://localhost/lane/6608> ;
+ libns:flowcell_id "61MJTAAXX"@en .
+
+<http://localhost/lane/6601>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "1" .
+<http://localhost/lane/6602>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "2" .
+<http://localhost/lane/6603>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "3" .
+<http://localhost/lane/6604>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "4" .
+<http://localhost/lane/6605>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "5" .
+<http://localhost/lane/6606>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number "6" .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+<http://localhost/lane/6607>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "7" .
+<http://localhost/lane/6608>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/61MJTAAXX/> ;
+ libns:library <http://localhost/library/1661/> ;
+ libns:lane_number "8" .
+
+<http://localhost/flowcell/30DY0AAXX/>
+ a libns:IlluminaFlowcell ;
+ libns:read_length 76 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/3801> ;
+ libns:has_lane <http://localhost/lane/3802> ;
+ libns:has_lane <http://localhost/lane/3803> ;
+ libns:has_lane <http://localhost/lane/3804> ;
+ libns:has_lane <http://localhost/lane/3805> ;
+ libns:has_lane <http://localhost/lane/3806> ;
+ libns:has_lane <http://localhost/lane/3807> ;
+ libns:has_lane <http://localhost/lane/3808> ;
+ libns:flowcell_id "30DY0AAXX"@en .
+
+<http://localhost/lane/3801>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "1" .
+<http://localhost/lane/3802>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "2" .
+<http://localhost/lane/3803>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "3" .
+<http://localhost/lane/3804>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "4" .
+<http://localhost/lane/3805>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "5" .
+<http://localhost/lane/3806>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "6" .
+<http://localhost/lane/3807>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/1331/> ;
+ libns:lane_number "7" .
+<http://localhost/lane/3808>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/30DY0AAXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number "8" .
+ # paired_end 1;
+ # read_length 76;
+ # status "Unknown"@en .
+
+<http://localhost/flowcell/C02F9ACXX/>
+ a libns:IlluminaFlowcell ;
+ libns:read_length 101 ;
+ libns:flowcell_type "Paired"@en ;
+ libns:date "2012-01-19T20:23:26"^^xsd:dateTime;
+ libns:has_lane <http://localhost/lane/12300> ;
+ libns:has_lane <http://localhost/lane/12500> ;
+ libns:flowcell_id "C02F9ACXX"@en .
+
+<http://localhost/lane/12300>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+ libns:library <http://localhost/library/12345/> ;
+ libns:lane_number "3" .
+ # paired_end 1;
+ # read_length 101;
+ # status "Unknown"@en .
+
+<http://localhost/lane/12500>
+ a libns:IlluminaLane ;
+ libns:flowcell <http://localhost/flowcell/C02F9ACXX/> ;
+ libns:library <http://localhost/library/11154/> ;
+ libns:lane_number "3" .
+ # paired_end 1;
+ # read_length 101;
+ # status "Unknown"@en .
+
+<http://localhost/library/11154/>
+ a libns:Library ;
+ libns:affiliation "TSR"@en;
+ libns:concentration "29.7";
+ libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+ libns:experiment_type "RNA-seq"@en ;
+ libns:gel_cut 300 ;
+ libns:has_lane <http://localhost/lane/3404> ;
+ libns:has_lane <http://localhost/lane/4205> ;
+ libns:has_lane <http://localhost/lane/6606> ;
+ libns:has_lane <http://localhost/lane/3808> ;
+ libns:has_lane <http://localhost/lane/12500> ;
+ libns:insert_size 2000 ;
+ libns:library_id "11154"@en ;
+ libns:library_type "Paired End (Multiplexed)"@en ;
+ libns:made_by "Gary Gygax"@en ;
+ libns:name "Paired Ends ASDF"@en ;
+ libns:replicate "1"@en;
+ libns:species_name "Mus musculus"@en ;
+ libns:stopping_point "Completed"@en ;
+ libns:total_unique_locations 8841201 .
+ # cell_line
+
+<http://localhost/library/12345/>
+ a libns:Library ;
+ libns:affiliation "TSR"@en;
+ libns:concentration "12.345";
+ libns:cell_line "Unknown"@en ;
+ libns:date "2012-12-28T00:00:00"^^xsd:dateTime ;
+ libns:experiment_type "RNA-seq"@en ;
+ libns:gel_cut 300 ;
+ libns:has_lane <http://localhost/lane/12300> ;
+ libns:insert_size 2000 ;
+ libns:library_id "12345"@en ;
+ libns:library_type "Paired End (Multiplexed)"@en ;
+ libns:made_by "Gary Gygax"@en ;
+ libns:name "Paired Ends THING"@en ;
+ libns:replicate "1"@en;
+ libns:species_name "Mus musculus"@en ;
+ libns:stopping_point "Completed"@en ;
+ libns:total_unique_locations 8841201 .
+ # cell_line
+"""
+HOST = "http://localhost"
+
+class TestCondorFastq(TestCase):
def setUp(self):
self.cwd = os.getcwd()
with open(filename, 'w') as stream:
stream.write('testfile')
- self.subname = unicode('sub-11154')
- self.subdir = os.path.join(self.tempdir, self.subname)
- os.mkdir(self.subdir)
+ self.result_map = ResultMap()
+ for lib_id in [u'11154', u'12345']:
+ subname = 'sub-%s' % (lib_id,)
+ sub_dir = os.path.join(self.tempdir, subname)
+ os.mkdir(sub_dir)
+ self.result_map[lib_id] = sub_dir
+
+ self.extract = CondorFastqExtract(HOST,
+ self.flowcelldir,
+ self.logdir)
+ load_string_into_model(self.extract.model, 'turtle', lib_turtle)
+ add_default_schemas(self.extract.model)
+ inference = Infer(self.extract.model)
+ errmsgs = list(inference.run_validation())
+ self.assertEqual(len(errmsgs), 0)
+ os.chdir(self.tempdir)
def tearDown(self):
shutil.rmtree(self.tempdir)
os.chdir(self.cwd)
+ def test_find_relevant_flowcell_ids(self):
+ expected = set(('30221AAXX',
+ '42JUYAAXX',
+ '61MJTAAXX',
+ '30DY0AAXX',
+ 'C02F9ACXX'))
+ flowcell_ids = self.extract.find_relevant_flowcell_ids()
+ self.assertEqual(flowcell_ids, expected)
+
def test_find_archive_sequence(self):
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- result_map = [('11154', self.subname)]
- lib_db = extract.find_archive_sequence_files(result_map)
-
- self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
- lanes = [
- lib_db['11154']['lanes'][(u'30221AAXX', 4)],
- lib_db['11154']['lanes'][(u'42JUYAAXX', 5)],
- lib_db['11154']['lanes'][(u'61MJTAAXX', 6)],
- lib_db['11154']['lanes'][(u'30DY0AAXX', 8)],
- lib_db['11154']['lanes'][(u'C02F9ACXX', 3)],
- ]
- self.failUnlessEqual(len(lanes[0]), 1)
- self.failUnlessEqual(len(lanes[1]), 2)
- self.failUnlessEqual(len(lanes[2]), 1)
- self.failUnlessEqual(len(lanes[3]), 1)
- self.failUnlessEqual(len(lanes[4]), 4)
+ seqs = self.extract.find_archive_sequence_files(self.result_map)
+
+ expected = set([
+ (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'),
+ (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'),
+ (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'),
+ (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+ (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'),
+ (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'),
+ (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'),
+ (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf')
+ ])
+ found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs])
+ self.assertEqual(expected, found)
def test_find_needed_targets(self):
+ lib_db = self.extract.find_archive_sequence_files(self.result_map)
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- result_map = [('11154', self.subname)]
- lib_db = extract.find_archive_sequence_files(result_map)
-
- needed_targets = extract.find_missing_targets(result_map,
- lib_db)
- self.failUnlessEqual(len(needed_targets), 7)
+ needed_targets = self.extract.update_fastq_targets(self.result_map,
+ lib_db)
+ self.assertEqual(len(needed_targets), 9)
srf_30221 = needed_targets[
- self.subname + u'/11154_30221AAXX_c33_l4.fastq']
+ self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq']
qseq_42JUY_r1 = needed_targets[
- self.subname + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
+ self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
qseq_42JUY_r2 = needed_targets[
- self.subname + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
+ self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
qseq_61MJT = needed_targets[
- self.subname + u'/11154_61MJTAAXX_c76_l6.fastq']
+ self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq']
split_C02F9_r1 = needed_targets[
- self.subname + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
+ self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
split_C02F9_r2 = needed_targets[
- self.subname + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
+ self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
- self.failUnlessEqual(len(srf_30221['srf']), 1)
- self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1)
- self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1)
- self.failUnlessEqual(len(qseq_61MJT['qseq']), 1)
- self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2)
- self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2)
-
- #print '-------needed targets---------'
- #pprint(needed_targets)
+ self.assertEqual(len(srf_30221['srf']), 1)
+ self.assertEqual(len(qseq_42JUY_r1['qseq']), 1)
+ self.assertEqual(len(qseq_42JUY_r2['qseq']), 1)
+ self.assertEqual(len(qseq_61MJT['qseq']), 1)
+ self.assertEqual(len(split_C02F9_r1['split_fastq']), 2)
+ self.assertEqual(len(split_C02F9_r2['split_fastq']), 2)
def test_generate_fastqs(self):
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- result_map = [('11154', self.subdir)]
- commands = extract.build_condor_arguments(result_map)
+ commands = self.extract.build_condor_arguments(self.result_map)
srf = commands['srf']
qseq = commands['qseq']
split = commands['split_fastq']
- self.failUnlessEqual(len(srf), 2)
- self.failUnlessEqual(len(qseq), 3)
- self.failUnlessEqual(len(split), 2)
+ self.assertEqual(len(srf), 2)
+ self.assertEqual(len(qseq), 3)
+ self.assertEqual(len(split), 4)
srf_data = {
- os.path.join(self.subdir, '11154_30221AAXX_c33_l4.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_30221AAXX_c33_l4.fastq'): {
'mid': None,
'ispaired': False,
'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
'flowcell': u'30221AAXX',
- 'target': os.path.join(self.subdir,
+ 'target': os.path.join(self.result_map['11154'],
u'11154_30221AAXX_c33_l4.fastq'),
},
- os.path.join(self.subdir, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_30DY0AAXX_c151_l8_r1.fastq'): {
'mid': None,
'ispaired': True,
'flowcell': u'30DY0AAXX',
'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
'mid': 76,
'target':
- os.path.join(self.subdir,
+ os.path.join(self.result_map['11154'],
u'11154_30DY0AAXX_c151_l8_r1.fastq'),
'target_right':
- os.path.join(self.subdir,
+ os.path.join(self.result_map['11154'],
u'11154_30DY0AAXX_c151_l8_r2.fastq'),
}
}
for args in srf:
expected = srf_data[args['target']]
- self.failUnlessEqual(args['ispaired'], expected['ispaired'])
- self.failUnlessEqual(len(args['sources']), 1)
+ self.assertEqual(args['ispaired'], expected['ispaired'])
+ self.assertEqual(len(args['sources']), 1)
_, source_filename = os.path.split(args['sources'][0])
- self.failUnlessEqual(source_filename, expected['sources'][0])
- self.failUnlessEqual(args['target'], expected['target'])
+ self.assertEqual(source_filename, expected['sources'][0])
+ self.assertEqual(args['target'], expected['target'])
if args['ispaired']:
- self.failUnlessEqual(args['target_right'],
+ self.assertEqual(args['target_right'],
expected['target_right'])
if 'mid' in expected:
- self.failUnlessEqual(args['mid'], expected['mid'])
+ self.assertEqual(args['mid'], expected['mid'])
qseq_data = {
- os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_42JUYAAXX_c76_l5_r1.fastq'): {
'istar': True,
'ispaired': True,
'sources': [
u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
},
- os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_42JUYAAXX_c76_l5_r2.fastq'): {
'istar': True,
'ispaired': True,
'sources': [
u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
},
- os.path.join(self.subdir, '11154_61MJTAAXX_c76_l6.fastq'): {
+ os.path.join(self.result_map['11154'],
+ '11154_61MJTAAXX_c76_l6.fastq'): {
'istar': True,
'ispaired': False,
'sources': [
}
for args in qseq:
expected = qseq_data[args['target']]
- self.failUnlessEqual(args['istar'], expected['istar'])
- self.failUnlessEqual(args['ispaired'], expected['ispaired'])
+ self.assertEqual(args['istar'], expected['istar'])
+ self.assertEqual(args['ispaired'], expected['ispaired'])
for i in range(len(expected['sources'])):
_, filename = os.path.split(args['sources'][i])
- self.failUnlessEqual(filename, expected['sources'][i])
+ self.assertEqual(filename, expected['sources'][i])
- split_test = { x['target']: x for x in
+ split_test = dict((( x['target'], x) for x in
[{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
u'11154_NoIndex_L003_R1_002.fastq.gz'],
'pyscript': 'desplit_fastq.pyc',
{'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
u'11154_NoIndex_L003_R2_002.fastq.gz'],
'pyscript': 'desplit_fastq.pyc',
- 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}]
- }
+ 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'},
+ {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz',
+ u'12345_CGATGT_L003_R1_002.fastq.gz',
+ u'12345_CGATGT_L003_R1_003.fastq.gz',
+ ],
+ 'pyscript': 'desplit_fastq.pyc',
+ 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'},
+ {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz',
+ u'12345_CGATGT_L003_R2_002.fastq.gz',
+ u'12345_CGATGT_L003_R2_003.fastq.gz',
+ ],
+ 'pyscript': 'desplit_fastq.pyc',
+ 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'}
+ ]
+ ))
for arg in split:
_, target = os.path.split(arg['target'])
pyscript = split_test[target]['pyscript']
- self.failUnless(arg['pyscript'].endswith(pyscript))
+ self.assertTrue(arg['pyscript'].endswith(pyscript))
filename = split_test[target]['target']
- self.failUnless(arg['target'].endswith(filename))
+ self.assertTrue(arg['target'].endswith(filename))
for s_index in range(len(arg['sources'])):
s1 = arg['sources'][s_index]
s2 = split_test[target]['sources'][s_index]
- self.failUnless(s1.endswith(s2))
-
- #print '-------commands---------'
- #pprint (commands)
+ self.assertTrue(s1.endswith(s2))
def test_create_scripts(self):
- os.chdir(self.tempdir)
- extract = condorfastq.CondorFastqExtract('host',
- FAKE_APIDATA,
- self.tempdir,
- self.logdir)
- extract.api = FakeApi()
- result_map = [('11154', self.subname)]
- extract.create_scripts(result_map)
-
- self.failUnless(os.path.exists('srf.condor'))
+ self.extract.create_scripts(self.result_map)
+
+ self.assertTrue(os.path.exists('srf.condor'))
with open('srf.condor', 'r') as srf:
arguments = [ l for l in srf if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 2)
- self.failUnless('--single sub-11154/11154_30221AAXX_c33_l4.fastq'
+ self.assertEqual(len(arguments), 2)
+ self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq'
in arguments[0])
- self.failUnless(
- '--right sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
+ self.assertTrue(
+ 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
arguments[1])
- self.failUnless(os.path.exists('qseq.condor'))
+ self.assertTrue(os.path.exists('qseq.condor'))
with open('qseq.condor', 'r') as srf:
arguments = [ l for l in srf if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 3)
- self.failUnless('-o sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
+ self.assertEqual(len(arguments), 3)
+ self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
arguments[0])
- self.failUnless(
+ self.assertTrue(
'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
arguments[1])
- self.failUnless('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
+ self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
arguments[2])
- self.failUnless(os.path.exists('split_fastq.condor'))
+ self.assertTrue(os.path.exists('split_fastq.condor'))
with open('split_fastq.condor', 'r') as split:
arguments = [ l for l in split if l.startswith('argument') ]
arguments.sort()
- self.failUnlessEqual(len(arguments), 2)
- self.failUnless('11154_NoIndex_L003_R1_001.fastq.gz' in \
+ self.assertEqual(len(arguments), 4)
+ # Lane 3 Read 1
+ self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \
arguments[0])
- self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \
+ # Lane 3 Read 2
+ self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \
arguments[1])
+ # Lane 3 Read 1
+ self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2])
+ self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2])
+ self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2])
+ self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2])
+
+ # Lane 3 Read 2
+ self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3])
+ self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3])
+ self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3])
+ self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3])
def suite():
- suite = unittest.makeSuite(TestCondorFastq, 'test')
+ from unittest2 import TestSuite, defaultTestLoader
+ suite = TestSuite()
+ suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq))
return suite
if __name__ == "__main__":
- unittest.main(defaultTest='suite')
-
+ from unittest2 import main
+ main(defaultTest='suite')