5 from pprint import pprint
10 from htsworkflow.submission import condorfastq
15 'C02F9ACXX/C1-202/Project_11154',
16 'C02F9ACXX/C1-202/Project_12342_Index1',
17 'C02F9ACXX/C1-202/Project_12342_Index2',
27 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
28 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
29 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
30 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
31 'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz',
32 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz',
33 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz',
34 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
35 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
36 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
37 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
38 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
39 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
40 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
41 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
42 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
43 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
44 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
45 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
46 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
47 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
48 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
49 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
50 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
51 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
52 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
53 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
54 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
55 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
56 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
57 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
58 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
59 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
60 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
61 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
62 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
63 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
64 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
65 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
66 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
70 '11154':{u'antibody_id': None,
71 u'cell_line': u'Unknown',
73 u'experiment_type': u'RNA-seq',
74 u'experiment_type_id': 4,
79 u'lane_set': [{u'flowcell': u'30221AAXX',
83 u'status': u'Unknown',
84 u'status_code': None},
85 {u'flowcell': u'42JUYAAXX',
89 u'status': u'Unknown',
90 u'status_code': None},
91 {u'flowcell': u'61MJTAAXX',
95 u'status': u'Unknown',
96 u'status_code': None},
97 {u'flowcell': u'C02F9ACXX',
101 u'status': u'Unknown',
102 u'status_code': None}],
103 u'library_id': u'11154',
104 u'library_name': u'Paired ends ASDF ',
105 u'library_species': u'Mus musculus',
106 u'library_species_id': 9,
107 u'library_type': u'Paired End (non-multiplexed)',
108 u'library_type_id': 2,
109 u'made_by': u'Gary Gygax',
111 u'notes': u'300 bp gel fragment',
113 u'stopping_point': u'1Aa',
114 u'successful_pM': None,
115 u'undiluted_concentration': u'29.7'}
118 FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
120 class FakeApi(object):
121 def __init__(self, *args, **kwargs):
124 def get_library(self, libid):
125 lib_data = LIBDATA[libid]
126 return copy.deepcopy(lib_data)
128 class TestCondorFastq(unittest.TestCase):
130 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
131 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
132 os.mkdir(self.flowcelldir)
134 self.logdir = os.path.join(self.tempdir, 'log')
135 os.mkdir(self.logdir)
138 os.mkdir(os.path.join(self.flowcelldir, d))
141 filename = os.path.join(self.flowcelldir, f)
142 with open(filename, 'w') as stream:
143 stream.write('testfile')
146 shutil.rmtree(self.tempdir)
148 def test_find_archive_sequence(self):
149 extract = condorfastq.CondorFastqExtract('host',
153 extract.api = FakeApi()
154 result_map = [('11154', '/notarealplace')]
155 lib_db = extract.find_archive_sequence_files(result_map)
157 self.failUnlessEqual(len(lib_db['11154']['lanes']), 4)
159 lib_db['11154']['lanes'][(u'30221AAXX', 4)],
160 lib_db['11154']['lanes'][(u'42JUYAAXX', 5)],
161 lib_db['11154']['lanes'][(u'61MJTAAXX', 6)],
162 lib_db['11154']['lanes'][(u'C02F9ACXX', 3)],
164 self.failUnlessEqual(len(lanes[0]), 1)
165 self.failUnlessEqual(len(lanes[1]), 2)
166 self.failUnlessEqual(len(lanes[2]), 1)
167 self.failUnlessEqual(len(lanes[3]), 4)
169 def test_find_needed_targets(self):
171 extract = condorfastq.CondorFastqExtract('host',
175 extract.api = FakeApi()
176 result_map = [('11154', '/notarealplace')]
177 lib_db = extract.find_archive_sequence_files(result_map)
179 needed_targets = extract.find_missing_targets(result_map,
181 self.failUnlessEqual(len(needed_targets), 6)
182 srf_30221 = needed_targets[
183 u'/notarealplace/11154_30221AAXX_c33_l4.fastq']
184 qseq_42JUY_r1 = needed_targets[
185 u'/notarealplace/11154_42JUYAAXX_c76_l5_r1.fastq']
186 qseq_42JUY_r2 = needed_targets[
187 u'/notarealplace/11154_42JUYAAXX_c76_l5_r2.fastq']
188 qseq_61MJT = needed_targets[
189 u'/notarealplace/11154_61MJTAAXX_c76_l6.fastq']
190 split_C02F9_r1 = needed_targets[
191 u'/notarealplace/11154_C02F9ACXX_c202_l3_r1.fastq']
192 split_C02F9_r2 = needed_targets[
193 u'/notarealplace/11154_C02F9ACXX_c202_l3_r2.fastq']
195 self.failUnlessEqual(len(srf_30221['srf']), 1)
196 self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1)
197 self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1)
198 self.failUnlessEqual(len(qseq_61MJT['qseq']), 1)
199 self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2)
200 self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2)
202 #print '-------needed targets---------'
203 #pprint(needed_targets)
205 def test_generate_fastqs(self):
206 extract = condorfastq.CondorFastqExtract('host',
210 extract.api = FakeApi()
211 result_map = [('11154', '/notarealplace')]
212 commands = extract.build_condor_arguments(result_map)
214 srf = commands['srf']
215 qseq = commands['qseq']
216 split = commands['split_fastq']
218 self.failUnlessEqual(len(srf), 1)
219 self.failUnlessEqual(len(qseq), 3)
220 self.failUnlessEqual(len(split), 2)
222 srf_data = {u'/notarealplace/11154_30221AAXX_c33_l4.fastq':
224 u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
227 args = extract_argument_list(args)
228 expected = srf_data[args[3]]
229 self.failUnless(expected[0] in args[5])
230 self.failUnless(expected[1] in args[0])
232 qseq_data = {u'/notarealplace/11154_42JUYAAXX_c76_l5_r1.fastq':
234 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2'],
235 u'/notarealplace/11154_61MJTAAXX_c76_l6.fastq':
237 'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
238 u'/notarealplace/11154_42JUYAAXX_c76_l5_r2.fastq':
240 'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2'],
243 args = extract_argument_list(args)
244 expected = qseq_data[args[1]]
245 self.failUnless(expected[0] in args[3])
246 self.failUnless(expected[1] in args[5])
248 split_data ={u'/notarealplace/11154_C02F9ACXX_c202_l3_r2.fastq':
249 [u'11154_NoIndex_L003_R2_001.fastq.gz',
250 u'11154_NoIndex_L003_R2_002.fastq.gz'],
251 u'/notarealplace/11154_C02F9ACXX_c202_l3_r1.fastq':
252 [u'11154_NoIndex_L003_R1_001.fastq.gz',
253 u'11154_NoIndex_L003_R1_002.fastq.gz'],
256 args = extract_argument_list(args)
257 expected = split_data[args[1]]
258 self.failUnless(expected[0] in args[2])
259 self.failUnless(expected[1] in args[3])
261 #print '-------commands---------'
264 def extract_argument_list(condor_argument):
265 args = condor_argument.split()
266 # eat the command name, and the trailing queue
270 suite = unittest.makeSuite(TestCondorFastq, 'test')
273 if __name__ == "__main__":
274 unittest.main(defaultTest='suite')