X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=htsworkflow%2Fsubmission%2Ftest%2Ftest_condorfastq.py;h=1e1c2d9bc56677cdef09b5b2efca48d296e5fdb3;hb=6b0392dd1788ee645fe4a467fce28d8640d2c8f2;hp=74893eb5facfde2a8e2154816e500f7278cbef14;hpb=3698d4d80f43f34308e723218b7e1ead845f92f1;p=htsworkflow.git diff --git a/htsworkflow/submission/test/test_condorfastq.py b/htsworkflow/submission/test/test_condorfastq.py index 74893eb..1e1c2d9 100644 --- a/htsworkflow/submission/test/test_condorfastq.py +++ b/htsworkflow/submission/test/test_condorfastq.py @@ -5,9 +5,18 @@ import os from pprint import pprint import shutil import tempfile -import unittest -from htsworkflow.submission import condorfastq +from django.test import TestCase +from django.test.utils import setup_test_environment, \ + teardown_test_environment +from django.db import connection +from django.conf import settings + +from htsworkflow.submission.condorfastq import CondorFastqExtract +from htsworkflow.submission.results import ResultMap +from htsworkflow.util.rdfhelp import \ + add_default_schemas, load_string_into_model, dump_model +from htsworkflow.util.rdfinfer import Infer FCDIRS = [ 'C02F9ACXX', @@ -15,6 +24,7 @@ FCDIRS = [ 'C02F9ACXX/C1-202/Project_11154', 'C02F9ACXX/C1-202/Project_12342_Index1', 'C02F9ACXX/C1-202/Project_12342_Index2', + 'C02F9ACXX/C1-202/Project_12345', '42JUYAAXX', '42JUYAAXX/C1-76', '30221AAXX', @@ -30,9 +40,18 @@ DATAFILES = [ 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz', 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz', 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz', - 'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz', - 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz', - 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R1_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index1/12342_GCCAAT_L004_R2_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R1_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L007_R2_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R1_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12342_Index2/12342_CGATGT_L005_R2_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_002.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R1_003.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_001.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_002.fastq.gz', + 'C02F9ACXX/C1-202/Project_12345/12345_CGATGT_L003_R2_003.fastq.gz', '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2', '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2', '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2', @@ -76,72 +95,328 @@ DATAFILES = [ '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2', ] -LIBDATA = { - '11154':{u'antibody_id': None, - u'cell_line': u'Unknown', - u'cell_line_id': 1, - u'experiment_type': u'RNA-seq', - u'experiment_type_id': 4, - u'gel_cut_size': 300, - u'hidden': False, - u'id': u'11154', - u'insert_size': 200, - u'lane_set': [{u'flowcell': u'30221AAXX', - u'lane_number': 4, - u'paired_end': False, - u'read_length': 33, - u'status': u'Unknown', - u'status_code': None}, - {u'flowcell': u'42JUYAAXX', - u'lane_number': 5, - u'paired_end': True, - u'read_length': 76, - u'status': u'Unknown', - u'status_code': None}, - {u'flowcell': u'61MJTAAXX', - u'lane_number': 6, - u'paired_end': False, - u'read_length': 76, - u'status': u'Unknown', - u'status_code': None}, - {u'flowcell': u'30DY0AAXX', - u'lane_number': 8, - u'paired_end': True, - u'read_length': 76, - u'status': u'Unknown', - u'status_code': None}, - {u'flowcell': u'C02F9ACXX', - u'lane_number': 3, - u'paired_end': True, - u'read_length': 101, - u'status': u'Unknown', - u'status_code': None}], - u'library_id': u'11154', - u'library_name': u'Paired ends ASDF ', - u'library_species': u'Mus musculus', - u'library_species_id': 9, - u'library_type': u'Paired End (non-multiplexed)', - u'library_type_id': 2, - u'made_by': u'Gary Gygax', - u'made_for': u'TSR', - u'notes': u'300 bp gel fragment', - u'replicate': 1, - u'stopping_point': u'1Aa', - u'successful_pM': None, - u'undiluted_concentration': u'29.7'} - } - -FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'} - -class FakeApi(object): - def __init__(self, *args, **kwargs): - pass - - def get_library(self, libid): - lib_data = LIBDATA[libid] - return copy.deepcopy(lib_data) - -class TestCondorFastq(unittest.TestCase): +lib_turtle = """@prefix : . +@prefix rdfs: . +@prefix dc: . +@prefix xsd: . +@prefix libns: . +@prefix seqns: . +@prefix invns: . + + a libns:Library . + a libns:Library . + a libns:Library . + a libns:Library . + + + a libns:IlluminaFlowcell ; + libns:read_length 33 ; + libns:flowcell_type "Single"@en ; + libns:date "2012-01-19T20:23:26"^^xsd:dateTime; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:flowcell_id "30221AAXX"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "1" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "2" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "4" . + # paired_end 1; + # read_length 33; + # status "Unknown"@en . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "5" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "6" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "7" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "8" . + + + a libns:IlluminaFlowcell ; + libns:read_length 76 ; + libns:flowcell_type "Paired"@en ; + libns:date "2012-01-19T20:23:26"^^xsd:dateTime; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:flowcell_id "42JUYAAXX"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "1" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "2" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "4" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "5" . + # paired_end 1; + # read_length 76; + # status "Unknown"@en . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "6" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "7" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "8" . + + + a libns:IlluminaFlowcell ; + libns:read_length 76 ; + libns:flowcell_type "Single"@en ; + libns:date "2012-01-19T20:23:26"^^xsd:dateTime; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:flowcell_id "61MJTAAXX"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "1" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "2" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "4" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "5" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "6" . + # paired_end 1; + # read_length 76; + # status "Unknown"@en . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "7" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "8" . + + + a libns:IlluminaFlowcell ; + libns:read_length 76 ; + libns:flowcell_type "Paired"@en ; + libns:date "2012-01-19T20:23:26"^^xsd:dateTime; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:flowcell_id "30DY0AAXX"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "1" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "2" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "4" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "5" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "6" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "7" . + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "8" . + # paired_end 1; + # read_length 76; + # status "Unknown"@en . + + + a libns:IlluminaFlowcell ; + libns:read_length 101 ; + libns:flowcell_type "Paired"@en ; + libns:date "2012-01-19T20:23:26"^^xsd:dateTime; + libns:has_lane ; + libns:has_lane ; + libns:flowcell_id "C02F9ACXX"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + # paired_end 1; + # read_length 101; + # status "Unknown"@en . + + + a libns:IlluminaLane ; + libns:flowcell ; + libns:library ; + libns:lane_number "3" . + # paired_end 1; + # read_length 101; + # status "Unknown"@en . + + + a libns:Library ; + libns:affiliation "TSR"@en; + libns:concentration "29.7"; + libns:date "2012-12-28T00:00:00"^^xsd:dateTime ; + libns:experiment_type "RNA-seq"@en ; + libns:gel_cut 300 ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:has_lane ; + libns:insert_size 2000 ; + libns:library_id "11154"@en ; + libns:library_type "Paired End (Multiplexed)"@en ; + libns:made_by "Gary Gygax"@en ; + libns:name "Paired Ends ASDF"@en ; + libns:replicate "1"@en; + libns:species_name "Mus musculus"@en ; + libns:stopping_point "Completed"@en ; + libns:total_unique_locations 8841201 . + # cell_line + + + a libns:Library ; + libns:affiliation "TSR"@en; + libns:concentration "12.345"; + libns:cell_line "Unknown"@en ; + libns:date "2012-12-28T00:00:00"^^xsd:dateTime ; + libns:experiment_type "RNA-seq"@en ; + libns:gel_cut 300 ; + libns:has_lane ; + libns:insert_size 2000 ; + libns:library_id "12345"@en ; + libns:library_type "Paired End (Multiplexed)"@en ; + libns:made_by "Gary Gygax"@en ; + libns:name "Paired Ends THING"@en ; + libns:replicate "1"@en; + libns:species_name "Mus musculus"@en ; + libns:stopping_point "Completed"@en ; + libns:total_unique_locations 8841201 . + # cell_line +""" +HOST = "http://localhost" + +class TestCondorFastq(TestCase): def setUp(self): self.cwd = os.getcwd() @@ -160,140 +435,151 @@ class TestCondorFastq(unittest.TestCase): with open(filename, 'w') as stream: stream.write('testfile') - self.subname = unicode('sub-11154') - self.subdir = os.path.join(self.tempdir, self.subname) - os.mkdir(self.subdir) + self.result_map = ResultMap() + for lib_id in [u'11154', u'12345']: + subname = 'sub-%s' % (lib_id,) + sub_dir = os.path.join(self.tempdir, subname) + os.mkdir(sub_dir) + self.result_map[lib_id] = sub_dir + + self.extract = CondorFastqExtract(HOST, + self.flowcelldir, + self.logdir) + load_string_into_model(self.extract.model, 'turtle', lib_turtle) + add_default_schemas(self.extract.model) + inference = Infer(self.extract.model) + errmsgs = list(inference.run_validation()) + self.assertEqual(len(errmsgs), 0) + os.chdir(self.tempdir) def tearDown(self): shutil.rmtree(self.tempdir) os.chdir(self.cwd) + def test_find_relevant_flowcell_ids(self): + expected = set(('30221AAXX', + '42JUYAAXX', + '61MJTAAXX', + '30DY0AAXX', + 'C02F9ACXX')) + flowcell_ids = self.extract.find_relevant_flowcell_ids() + self.assertEqual(flowcell_ids, expected) + def test_find_archive_sequence(self): - extract = condorfastq.CondorFastqExtract('host', - FAKE_APIDATA, - self.tempdir, - self.logdir) - extract.api = FakeApi() - result_map = [('11154', self.subname)] - lib_db = extract.find_archive_sequence_files(result_map) - - self.failUnlessEqual(len(lib_db['11154']['lanes']), 5) - lanes = [ - lib_db['11154']['lanes'][(u'30221AAXX', 4)], - lib_db['11154']['lanes'][(u'42JUYAAXX', 5)], - lib_db['11154']['lanes'][(u'61MJTAAXX', 6)], - lib_db['11154']['lanes'][(u'30DY0AAXX', 8)], - lib_db['11154']['lanes'][(u'C02F9ACXX', 3)], - ] - self.failUnlessEqual(len(lanes[0]), 1) - self.failUnlessEqual(len(lanes[1]), 2) - self.failUnlessEqual(len(lanes[2]), 1) - self.failUnlessEqual(len(lanes[3]), 1) - self.failUnlessEqual(len(lanes[4]), 4) + seqs = self.extract.find_archive_sequence_files(self.result_map) + + expected = set([ + (u'11154', u'42JUYAAXX', '5', 1, 76, True, 'qseq'), + (u'11154', u'42JUYAAXX', '5', 2, 76, True, 'qseq'), + (u'11154', u'61MJTAAXX', '6', 1, 76, False, 'qseq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'11154', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 1, 202, True, 'split_fastq'), + (u'12345', u'C02F9ACXX', '3', 2, 202, True, 'split_fastq'), + (u'11154', u'30221AAXX', '4', 1, 33, False, 'srf'), + (u'11154', u'30DY0AAXX', '8', 1, 151, True, 'srf') + ]) + found = set([(l.library_id, l.flowcell_id, l.lane_number, l.read, l.cycle, l.ispaired, l.filetype) for l in seqs]) + self.assertEqual(expected, found) def test_find_needed_targets(self): + lib_db = self.extract.find_archive_sequence_files(self.result_map) - extract = condorfastq.CondorFastqExtract('host', - FAKE_APIDATA, - self.tempdir, - self.logdir) - extract.api = FakeApi() - result_map = [('11154', self.subname)] - lib_db = extract.find_archive_sequence_files(result_map) - - needed_targets = extract.find_missing_targets(result_map, - lib_db) - self.failUnlessEqual(len(needed_targets), 7) + needed_targets = self.extract.update_fastq_targets(self.result_map, + lib_db) + self.assertEqual(len(needed_targets), 9) srf_30221 = needed_targets[ - self.subname + u'/11154_30221AAXX_c33_l4.fastq'] + self.result_map['11154'] + u'/11154_30221AAXX_c33_l4.fastq'] qseq_42JUY_r1 = needed_targets[ - self.subname + u'/11154_42JUYAAXX_c76_l5_r1.fastq'] + self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r1.fastq'] qseq_42JUY_r2 = needed_targets[ - self.subname + u'/11154_42JUYAAXX_c76_l5_r2.fastq'] + self.result_map['11154'] + u'/11154_42JUYAAXX_c76_l5_r2.fastq'] qseq_61MJT = needed_targets[ - self.subname + u'/11154_61MJTAAXX_c76_l6.fastq'] + self.result_map['11154'] + u'/11154_61MJTAAXX_c76_l6.fastq'] split_C02F9_r1 = needed_targets[ - self.subname + u'/11154_C02F9ACXX_c202_l3_r1.fastq'] + self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r1.fastq'] split_C02F9_r2 = needed_targets[ - self.subname + u'/11154_C02F9ACXX_c202_l3_r2.fastq'] + self.result_map['11154'] + u'/11154_C02F9ACXX_c202_l3_r2.fastq'] - self.failUnlessEqual(len(srf_30221['srf']), 1) - self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1) - self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1) - self.failUnlessEqual(len(qseq_61MJT['qseq']), 1) - self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2) - self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2) - - #print '-------needed targets---------' - #pprint(needed_targets) + self.assertEqual(len(srf_30221['srf']), 1) + self.assertEqual(len(qseq_42JUY_r1['qseq']), 1) + self.assertEqual(len(qseq_42JUY_r2['qseq']), 1) + self.assertEqual(len(qseq_61MJT['qseq']), 1) + self.assertEqual(len(split_C02F9_r1['split_fastq']), 2) + self.assertEqual(len(split_C02F9_r2['split_fastq']), 2) def test_generate_fastqs(self): - extract = condorfastq.CondorFastqExtract('host', - FAKE_APIDATA, - self.tempdir, - self.logdir) - extract.api = FakeApi() - result_map = [('11154', self.subdir)] - commands = extract.build_condor_arguments(result_map) + commands = self.extract.build_condor_arguments(self.result_map) srf = commands['srf'] qseq = commands['qseq'] split = commands['split_fastq'] - self.failUnlessEqual(len(srf), 2) - self.failUnlessEqual(len(qseq), 3) - self.failUnlessEqual(len(split), 2) + self.assertEqual(len(srf), 2) + self.assertEqual(len(qseq), 3) + self.assertEqual(len(split), 4) srf_data = { - os.path.join(self.subdir, '11154_30221AAXX_c33_l4.fastq'): { + os.path.join(self.result_map['11154'], + '11154_30221AAXX_c33_l4.fastq'): { 'mid': None, 'ispaired': False, 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'], 'flowcell': u'30221AAXX', - 'target': os.path.join(self.subdir, + 'target': os.path.join(self.result_map['11154'], u'11154_30221AAXX_c33_l4.fastq'), }, - os.path.join(self.subdir, '11154_30DY0AAXX_c151_l8_r1.fastq'): { + os.path.join(self.result_map['11154'], + '11154_30DY0AAXX_c151_l8_r1.fastq'): { 'mid': None, 'ispaired': True, 'flowcell': u'30DY0AAXX', 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'], 'mid': 76, 'target': - os.path.join(self.subdir, + os.path.join(self.result_map['11154'], u'11154_30DY0AAXX_c151_l8_r1.fastq'), 'target_right': - os.path.join(self.subdir, + os.path.join(self.result_map['11154'], u'11154_30DY0AAXX_c151_l8_r2.fastq'), } } for args in srf: expected = srf_data[args['target']] - self.failUnlessEqual(args['ispaired'], expected['ispaired']) - self.failUnlessEqual(len(args['sources']), 1) + self.assertEqual(args['ispaired'], expected['ispaired']) + self.assertEqual(len(args['sources']), 1) _, source_filename = os.path.split(args['sources'][0]) - self.failUnlessEqual(source_filename, expected['sources'][0]) - self.failUnlessEqual(args['target'], expected['target']) + self.assertEqual(source_filename, expected['sources'][0]) + self.assertEqual(args['target'], expected['target']) if args['ispaired']: - self.failUnlessEqual(args['target_right'], + self.assertEqual(args['target_right'], expected['target_right']) if 'mid' in expected: - self.failUnlessEqual(args['mid'], expected['mid']) + self.assertEqual(args['mid'], expected['mid']) qseq_data = { - os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r1.fastq'): { + os.path.join(self.result_map['11154'], + '11154_42JUYAAXX_c76_l5_r1.fastq'): { 'istar': True, 'ispaired': True, 'sources': [ u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2'] }, - os.path.join(self.subdir, '11154_42JUYAAXX_c76_l5_r2.fastq'): { + os.path.join(self.result_map['11154'], + '11154_42JUYAAXX_c76_l5_r2.fastq'): { 'istar': True, 'ispaired': True, 'sources': [ u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2'] }, - os.path.join(self.subdir, '11154_61MJTAAXX_c76_l6.fastq'): { + os.path.join(self.result_map['11154'], + '11154_61MJTAAXX_c76_l6.fastq'): { 'istar': True, 'ispaired': False, 'sources': [ @@ -302,14 +588,14 @@ class TestCondorFastq(unittest.TestCase): } for args in qseq: expected = qseq_data[args['target']] - self.failUnlessEqual(args['istar'], expected['istar']) - self.failUnlessEqual(args['ispaired'], expected['ispaired']) + self.assertEqual(args['istar'], expected['istar']) + self.assertEqual(args['ispaired'], expected['ispaired']) for i in range(len(expected['sources'])): _, filename = os.path.split(args['sources'][i]) - self.failUnlessEqual(filename, expected['sources'][i]) + self.assertEqual(filename, expected['sources'][i]) - split_test = { x['target']: x for x in + split_test = dict((( x['target'], x) for x in [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz', u'11154_NoIndex_L003_R1_002.fastq.gz'], 'pyscript': 'desplit_fastq.pyc', @@ -317,71 +603,89 @@ class TestCondorFastq(unittest.TestCase): {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz', u'11154_NoIndex_L003_R2_002.fastq.gz'], 'pyscript': 'desplit_fastq.pyc', - 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}] - } + 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}, + {'sources': [u'12345_CGATGT_L003_R1_001.fastq.gz', + u'12345_CGATGT_L003_R1_002.fastq.gz', + u'12345_CGATGT_L003_R1_003.fastq.gz', + ], + 'pyscript': 'desplit_fastq.pyc', + 'target': u'12345_C02F9ACXX_c202_l3_r1.fastq'}, + {'sources': [u'12345_CGATGT_L003_R2_001.fastq.gz', + u'12345_CGATGT_L003_R2_002.fastq.gz', + u'12345_CGATGT_L003_R2_003.fastq.gz', + ], + 'pyscript': 'desplit_fastq.pyc', + 'target': u'12345_C02F9ACXX_c202_l3_r2.fastq'} + ] + )) for arg in split: _, target = os.path.split(arg['target']) pyscript = split_test[target]['pyscript'] - self.failUnless(arg['pyscript'].endswith(pyscript)) + self.assertTrue(arg['pyscript'].endswith(pyscript)) filename = split_test[target]['target'] - self.failUnless(arg['target'].endswith(filename)) + self.assertTrue(arg['target'].endswith(filename)) for s_index in range(len(arg['sources'])): s1 = arg['sources'][s_index] s2 = split_test[target]['sources'][s_index] - self.failUnless(s1.endswith(s2)) - - #print '-------commands---------' - #pprint (commands) + self.assertTrue(s1.endswith(s2)) def test_create_scripts(self): - os.chdir(self.tempdir) - extract = condorfastq.CondorFastqExtract('host', - FAKE_APIDATA, - self.tempdir, - self.logdir) - extract.api = FakeApi() - result_map = [('11154', self.subname)] - extract.create_scripts(result_map) - - self.failUnless(os.path.exists('srf.condor')) + self.extract.create_scripts(self.result_map) + + self.assertTrue(os.path.exists('srf.condor')) with open('srf.condor', 'r') as srf: arguments = [ l for l in srf if l.startswith('argument') ] arguments.sort() - self.failUnlessEqual(len(arguments), 2) - self.failUnless('--single sub-11154/11154_30221AAXX_c33_l4.fastq' + self.assertEqual(len(arguments), 2) + self.assertTrue('sub-11154/11154_30221AAXX_c33_l4.fastq' in arguments[0]) - self.failUnless( - '--right sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in + self.assertTrue( + 'sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in arguments[1]) - self.failUnless(os.path.exists('qseq.condor')) + self.assertTrue(os.path.exists('qseq.condor')) with open('qseq.condor', 'r') as srf: arguments = [ l for l in srf if l.startswith('argument') ] arguments.sort() - self.failUnlessEqual(len(arguments), 3) - self.failUnless('-o sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in + self.assertEqual(len(arguments), 3) + self.assertTrue('sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in arguments[0]) - self.failUnless( + self.assertTrue( 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in arguments[1]) - self.failUnless('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in + self.assertTrue('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in arguments[2]) - self.failUnless(os.path.exists('split_fastq.condor')) + self.assertTrue(os.path.exists('split_fastq.condor')) with open('split_fastq.condor', 'r') as split: arguments = [ l for l in split if l.startswith('argument') ] arguments.sort() - self.failUnlessEqual(len(arguments), 2) - self.failUnless('11154_NoIndex_L003_R1_001.fastq.gz' in \ + self.assertEqual(len(arguments), 4) + # Lane 3 Read 1 + self.assertTrue('11154_NoIndex_L003_R1_001.fastq.gz' in \ arguments[0]) - self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \ + # Lane 3 Read 2 + self.assertTrue('11154_NoIndex_L003_R2_002.fastq.gz' in \ arguments[1]) + # Lane 3 Read 1 + self.assertTrue('12345_CGATGT_L003_R1_001.fastq.gz' in arguments[2]) + self.assertTrue('12345_CGATGT_L003_R1_002.fastq.gz' in arguments[2]) + self.assertTrue('12345_CGATGT_L003_R1_003.fastq.gz' in arguments[2]) + self.assertTrue('12345_C02F9ACXX_c202_l3_r1.fastq' in arguments[2]) + + # Lane 3 Read 2 + self.assertTrue('12345_CGATGT_L003_R2_001.fastq.gz' in arguments[3]) + self.assertTrue('12345_CGATGT_L003_R2_002.fastq.gz' in arguments[3]) + self.assertTrue('12345_CGATGT_L003_R2_003.fastq.gz' in arguments[3]) + self.assertTrue('12345_C02F9ACXX_c202_l3_r2.fastq' in arguments[3]) def suite(): - suite = unittest.makeSuite(TestCondorFastq, 'test') + from unittest2 import TestSuite, defaultTestLoader + suite = TestSuite() + suite.addTests(defaultTestLoader.loadTestsFromTestCase(TestCondorFastq)) return suite if __name__ == "__main__": - unittest.main(defaultTest='suite') - + from unittest2 import main + main(defaultTest='suite')