5 from pprint import pprint
10 from htsworkflow.submission import condorfastq
11 from htsworkflow.submission.results import ResultMap
16 'C02F9ACXX/C1-202/Project_11154',
17 'C02F9ACXX/C1-202/Project_12342_Index1',
18 'C02F9ACXX/C1-202/Project_12342_Index2',
30 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_001.fastq.gz',
31 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R1_002.fastq.gz',
32 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_001.fastq.gz',
33 'C02F9ACXX/C1-202/Project_11154/11154_NoIndex_L003_R2_002.fastq.gz',
34 'C02F9ACXX/C1-202/Project_12342_Index1/11114_GCCAAT_L004_R1_001.fastq.gz',
35 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L007_R1_001.fastq.gz',
36 'C02F9ACXX/C1-202/Project_12342_Index2/11119_CGATGT_L005_R1_001.fastq.gz',
37 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r1.tar.bz2',
38 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r1.tar.bz2',
39 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r1.tar.bz2',
40 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r1.tar.bz2',
41 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2',
42 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r1.tar.bz2',
43 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r1.tar.bz2',
44 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r1.tar.bz2',
45 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
46 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l1_r2.tar.bz2',
47 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l2_r2.tar.bz2',
48 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l3_r2.tar.bz2',
49 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l4_r2.tar.bz2',
50 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2',
51 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l6_r2.tar.bz2',
52 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l7_r2.tar.bz2',
53 '42JUYAAXX/C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l8_r2.tar.bz2',
54 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_1.srf',
55 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_2.srf',
56 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_3.srf',
57 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf',
58 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_5.srf',
59 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_6.srf',
60 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_7.srf',
61 '30221AAXX/C1-33/woldlab_090425_HWI-EAS229_0110_30221AAXX_8.srf',
62 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_1.srf',
63 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_2.srf',
64 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_3.srf',
65 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_4.srf',
66 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_5.srf',
67 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_6.srf',
68 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_7.srf',
69 '30DY0AAXX/C1-151/woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf',
70 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l1_r1.tar.bz2',
71 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l2_r1.tar.bz2',
72 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l3_r1.tar.bz2',
73 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l4_r1.tar.bz2',
74 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l5_r1.tar.bz2',
75 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2',
76 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l7_r1.tar.bz2',
77 '61MJTAAXX/C1-76/woldlab_100826_HSI-123_0001_61MJTAAXX_l8_r1.tar.bz2',
81 '11154':{u'antibody_id': None,
82 u'cell_line': u'Unknown',
84 u'experiment_type': u'RNA-seq',
85 u'experiment_type_id': 4,
90 u'lane_set': [{u'flowcell': u'30221AAXX',
95 u'status': u'Unknown',
96 u'status_code': None},
97 {u'flowcell': u'42JUYAAXX',
102 u'status': u'Unknown',
103 u'status_code': None},
104 {u'flowcell': u'61MJTAAXX',
107 u'paired_end': False,
109 u'status': u'Unknown',
110 u'status_code': None},
111 {u'flowcell': u'30DY0AAXX',
116 u'status': u'Unknown',
117 u'status_code': None},
118 {u'flowcell': u'C02F9ACXX',
123 u'status': u'Unknown',
124 u'status_code': None}],
125 u'library_id': u'11154',
126 u'library_name': u'Paired ends ASDF ',
127 u'library_species': u'Mus musculus',
128 u'library_species_id': 9,
129 u'library_type': u'Paired End (non-multiplexed)',
130 u'library_type_id': 2,
131 u'made_by': u'Gary Gygax',
133 u'notes': u'300 bp gel fragment',
135 u'stopping_point': u'1Aa',
136 u'successful_pM': None,
137 u'undiluted_concentration': u'29.7'}
140 FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
142 class FakeApi(object):
143 def __init__(self, *args, **kwargs):
144 self.root_url = 'http://localhost'
146 def get_library(self, libid):
147 lib_data = LIBDATA[libid]
148 return copy.deepcopy(lib_data)
152 class TestCondorFastq(unittest.TestCase):
154 self.cwd = os.getcwd()
156 self.tempdir = tempfile.mkdtemp(prefix='condorfastq_test')
157 self.flowcelldir = os.path.join(self.tempdir, 'flowcells')
158 os.mkdir(self.flowcelldir)
160 self.logdir = os.path.join(self.tempdir, 'log')
161 os.mkdir(self.logdir)
164 os.mkdir(os.path.join(self.flowcelldir, d))
167 filename = os.path.join(self.flowcelldir, f)
168 with open(filename, 'w') as stream:
169 stream.write('testfile')
171 self.subname = unicode('sub-11154')
172 self.subdir = os.path.join(self.tempdir, self.subname)
173 os.mkdir(self.subdir)
175 self.result_map = ResultMap()
176 self.result_map['11154'] = self.subname
179 shutil.rmtree(self.tempdir)
182 def test_find_archive_sequence(self):
183 extract = condorfastq.CondorFastqExtract('host',
187 extract.api = FakeApi()
189 lib_db = extract.find_archive_sequence_files(self.result_map)
191 self.failUnlessEqual(len(lib_db['11154']['lanes']), 5)
193 lib_db['11154']['lanes'][(u'30221AAXX', 4)],
194 lib_db['11154']['lanes'][(u'42JUYAAXX', 5)],
195 lib_db['11154']['lanes'][(u'61MJTAAXX', 6)],
196 lib_db['11154']['lanes'][(u'30DY0AAXX', 8)],
197 lib_db['11154']['lanes'][(u'C02F9ACXX', 3)],
199 self.failUnlessEqual(len(lanes[0]), 1)
200 self.failUnlessEqual(len(lanes[1]), 2)
201 self.failUnlessEqual(len(lanes[2]), 1)
202 self.failUnlessEqual(len(lanes[3]), 1)
203 self.failUnlessEqual(len(lanes[4]), 4)
205 def test_find_needed_targets(self):
207 extract = condorfastq.CondorFastqExtract('host',
211 extract.api = FakeApi()
212 lib_db = extract.find_archive_sequence_files(self.result_map)
214 needed_targets = extract.find_missing_targets(self.result_map,
216 self.failUnlessEqual(len(needed_targets), 7)
217 srf_30221 = needed_targets[
218 self.subname + u'/11154_30221AAXX_c33_l4.fastq']
219 qseq_42JUY_r1 = needed_targets[
220 self.subname + u'/11154_42JUYAAXX_c76_l5_r1.fastq']
221 qseq_42JUY_r2 = needed_targets[
222 self.subname + u'/11154_42JUYAAXX_c76_l5_r2.fastq']
223 qseq_61MJT = needed_targets[
224 self.subname + u'/11154_61MJTAAXX_c76_l6.fastq']
225 split_C02F9_r1 = needed_targets[
226 self.subname + u'/11154_C02F9ACXX_c202_l3_r1.fastq']
227 split_C02F9_r2 = needed_targets[
228 self.subname + u'/11154_C02F9ACXX_c202_l3_r2.fastq']
230 self.failUnlessEqual(len(srf_30221['srf']), 1)
231 self.failUnlessEqual(len(qseq_42JUY_r1['qseq']), 1)
232 self.failUnlessEqual(len(qseq_42JUY_r2['qseq']), 1)
233 self.failUnlessEqual(len(qseq_61MJT['qseq']), 1)
234 self.failUnlessEqual(len(split_C02F9_r1['split_fastq']), 2)
235 self.failUnlessEqual(len(split_C02F9_r2['split_fastq']), 2)
237 #print '-------needed targets---------'
238 #pprint(needed_targets)
240 def test_generate_fastqs(self):
241 extract = condorfastq.CondorFastqExtract('host',
245 extract.api = FakeApi()
246 commands = extract.build_condor_arguments(self.result_map)
248 srf = commands['srf']
249 qseq = commands['qseq']
250 split = commands['split_fastq']
252 self.failUnlessEqual(len(srf), 2)
253 self.failUnlessEqual(len(qseq), 3)
254 self.failUnlessEqual(len(split), 2)
257 os.path.join(self.subname, '11154_30221AAXX_c33_l4.fastq'): {
260 'sources': [u'woldlab_090425_HWI-EAS229_0110_30221AAXX_4.srf'],
261 'flowcell': u'30221AAXX',
262 'target': os.path.join(self.subname,
263 u'11154_30221AAXX_c33_l4.fastq'),
265 os.path.join(self.subname, '11154_30DY0AAXX_c151_l8_r1.fastq'): {
268 'flowcell': u'30DY0AAXX',
269 'sources': [u'woldlab_090725_HWI-EAS229_0110_30DY0AAXX_8.srf'],
272 os.path.join(self.subname,
273 u'11154_30DY0AAXX_c151_l8_r1.fastq'),
275 os.path.join(self.subname,
276 u'11154_30DY0AAXX_c151_l8_r2.fastq'),
280 expected = srf_data[args['target']]
281 self.failUnlessEqual(args['ispaired'], expected['ispaired'])
282 self.failUnlessEqual(len(args['sources']), 1)
283 _, source_filename = os.path.split(args['sources'][0])
284 self.failUnlessEqual(source_filename, expected['sources'][0])
285 self.failUnlessEqual(args['target'], expected['target'])
287 self.failUnlessEqual(args['target_right'],
288 expected['target_right'])
289 if 'mid' in expected:
290 self.failUnlessEqual(args['mid'], expected['mid'])
293 os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r1.fastq'): {
297 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r1.tar.bz2']
299 os.path.join(self.subname, '11154_42JUYAAXX_c76_l5_r2.fastq'): {
303 u'woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2']
305 os.path.join(self.subname, '11154_61MJTAAXX_c76_l6.fastq'): {
309 u'woldlab_100826_HSI-123_0001_61MJTAAXX_l6_r1.tar.bz2'],
313 expected = qseq_data[args['target']]
314 self.failUnlessEqual(args['istar'], expected['istar'])
315 self.failUnlessEqual(args['ispaired'], expected['ispaired'])
316 for i in range(len(expected['sources'])):
317 _, filename = os.path.split(args['sources'][i])
318 self.failUnlessEqual(filename, expected['sources'][i])
321 split_test = dict((( x['target'], x) for x in
322 [{'sources': [u'11154_NoIndex_L003_R1_001.fastq.gz',
323 u'11154_NoIndex_L003_R1_002.fastq.gz'],
324 'pyscript': 'desplit_fastq.pyc',
325 'target': u'11154_C02F9ACXX_c202_l3_r1.fastq'},
326 {'sources': [u'11154_NoIndex_L003_R2_001.fastq.gz',
327 u'11154_NoIndex_L003_R2_002.fastq.gz'],
328 'pyscript': 'desplit_fastq.pyc',
329 'target': u'11154_C02F9ACXX_c202_l3_r2.fastq'}]
332 _, target = os.path.split(arg['target'])
333 pyscript = split_test[target]['pyscript']
334 self.failUnless(arg['pyscript'].endswith(pyscript))
335 filename = split_test[target]['target']
336 self.failUnless(arg['target'].endswith(filename))
337 for s_index in range(len(arg['sources'])):
338 s1 = arg['sources'][s_index]
339 s2 = split_test[target]['sources'][s_index]
340 self.failUnless(s1.endswith(s2))
342 #print '-------commands---------'
345 def test_create_scripts(self):
346 os.chdir(self.tempdir)
347 extract = condorfastq.CondorFastqExtract('host',
351 extract.api = FakeApi()
352 extract.create_scripts(self.result_map)
354 self.failUnless(os.path.exists('srf.condor'))
355 with open('srf.condor', 'r') as srf:
356 arguments = [ l for l in srf if l.startswith('argument') ]
358 self.failUnlessEqual(len(arguments), 2)
359 self.failUnless('--single sub-11154/11154_30221AAXX_c33_l4.fastq'
362 '--right sub-11154/11154_30DY0AAXX_c151_l8_r2.fastq' in
365 self.failUnless(os.path.exists('qseq.condor'))
366 with open('qseq.condor', 'r') as srf:
367 arguments = [ l for l in srf if l.startswith('argument') ]
369 self.failUnlessEqual(len(arguments), 3)
370 self.failUnless('-o sub-11154/11154_42JUYAAXX_c76_l5_r1.fastq ' in
373 'C1-76/woldlab_100826_HSI-123_0001_42JUYAAXX_l5_r2.tar.bz2' in
375 self.failUnless('61MJTAAXX_c76_l6.fastq -f 61MJTAAXX' in
378 self.failUnless(os.path.exists('split_fastq.condor'))
379 with open('split_fastq.condor', 'r') as split:
380 arguments = [ l for l in split if l.startswith('argument') ]
382 self.failUnlessEqual(len(arguments), 2)
383 self.failUnless('11154_NoIndex_L003_R1_001.fastq.gz' in \
385 self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \
389 suite = unittest.makeSuite(TestCondorFastq, 'test')
392 if __name__ == "__main__":
393 unittest.main(defaultTest='suite')