TEST_CODE_DIR = os.path.split(__file__)[0]
TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
-LANE_LIST = range(1,9)
-TILE_LIST = range(1,101)
+LANE_LIST = list(range(1,9))
+TILE_LIST = list(range(1,101))
HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
f.write(config)
f.close()
+def make_runinfo(runfolder_dir, flowcell_id):
+ """Simulate a RunInfo.xml file created by >= RTA 1.9
+ """
+ xml = '''<?xml version="1.0"?>
+<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
+ <Run Id="{runfolder}" Number="101">
+ <Flowcell>{flowcell}</Flowcell>
+ <Instrument>SN787</Instrument>
+ <Date>110815</Date>
+ <Reads>
+ <Read Number="1" NumCycles="50" IsIndexedRead="N" />
+ <Read Number="2" NumCycles="7" IsIndexedRead="Y" />
+ </Reads>
+ <FlowcellLayout LaneCount="8" SurfaceCount="2" SwathCount="3" TileCount="8" />
+ <AlignToPhiX />
+ </Run>
+</RunInfo>
+'''
+ path, runfolder = os.path.split(runfolder_dir)
+ runinfo = os.path.join(runfolder_dir, 'RunInfo.xml')
+ stream = open(runinfo, 'w')
+ stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id))
+ stream.close()
+ return runinfo
+
def make_bustard_config132(image_dir):
source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
destination = os.path.join(image_dir, 'config.xml')
def make_aligned_config_1_12(aligned_dir):
"""This is rouglhly equivalent to the old gerald file"""
- source = os.path.join(TESTDATA_DIR, 'aligned_config_1_12.xml')
+ source = os.path.join(TESTDATA_DIR, '1_12', 'aligned_config_1_12.xml')
destination = os.path.join(aligned_dir, 'config.xml')
shutil.copy(source, destination)
def make_unaligned_config_1_12(unaligned_dir):
demultiplex_pairs = [ # (src,
# dest),
- (os.path.join(TESTDATA_DIR, 'demultiplex_1.12.4.2.xml'),
- os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
- (os.path.join(TESTDATA_DIR, 'demultiplexed_bustard_1.12.4.2.xml'),
- os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
- (os.path.join(TESTDATA_DIR, 'demultiplexed_summary_1.12.4.2.xml'),
+ (os.path.join(TESTDATA_DIR, '1_12', 'demultiplex_1.12.4.2.xml'),
os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
+ (os.path.join(TESTDATA_DIR, '1_12',
+ 'demultiplexed_bustard_1.12.4.2.xml'),
+ os.path.join(unaligned_dir, 'DemultiplexedBustardConfig.xml')),
+ (os.path.join(TESTDATA_DIR, '1_12',
+ 'demultiplexed_summary_1.12.4.2.xml'),
+ os.path.join(unaligned_dir, 'DemultiplexedBustardSummary.xml')),
]
for src, dest in demultiplex_pairs:
shutil.copy(src, dest)
+
+def make_unaligned_status_1_12(unaligned_dir, flowcell_id):
+ basecall_status = ['All.htm', 'Demultiplex_Stats.htm', 'IVC.htm']
+ test_data_root = os.path.join(TESTDATA_DIR, '1_12', 'basecall_stats')
+ basecall_stats = os.path.join(unaligned_dir,
+ 'Basecall_Stats_{0}'.format(flowcell_id))
+ os.mkdir(basecall_stats)
+ for filename in basecall_status:
+ source = os.path.join(test_data_root, filename)
+ destination = os.path.join(basecall_stats, filename)
+ shutil.copy(source, destination)
def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
"""
if not os.path.exists(intensities_dir):
os.mkdir(intensities_dir)
- param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.12.4.2.xml')
+ param_file = os.path.join(TESTDATA_DIR, '1_12',
+ 'rta_intensities_config_1.12.4.2.xml')
shutil.copy(param_file, os.path.join(intensities_dir, 'RTAConfig.xml'))
return intensities_dir
os.mkdir(basecalls_dir)
make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
- param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.12.4.2.xml')
+ param_file = os.path.join(TESTDATA_DIR, '1_12',
+ 'rta_basecalls_config_1.12.4.2.xml')
shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
return basecalls_dir
destination = os.path.join(gerald_dir, 'Summary.xml')
shutil.copy(source, destination)
+def make_status_rta1_12(datadir):
+ sourcedir = os.path.join(TESTDATA_DIR, '1_12')
+ status_htm = os.path.join(sourcedir, 'Status.htm')
+ destination = os.path.join(datadir, 'Status.htm')
+ shutil.copy(status_htm, destination)
+
+ status_dir = os.path.join(datadir, 'Status_Files')
+ status_source_dir = os.path.join(sourcedir, 'Status_Files')
+ shutil.copytree(status_source_dir, status_dir)
+
+ report_source_dir = os.path.join(sourcedir, 'reports')
+ report_dir = os.path.join(datadir, 'reports')
+ shutil.copytree(report_source_dir, report_dir)
def make_eland_results(gerald_dir):
eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
shutil.copy(summary_source, summary_dest)
- body = get_unaligned_sample_export(lane, index_seq)
+ body = get_aligned_sample_export(lane, index_seq)
for split in ['001','002']:
for read in UNALIGNED_READS:
suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
self.sample_dir = os.path.join(self.project_dir, self.sample_dir)
self.summary_dir = 'Summary_Stats_{0}'.format(self.flowcell_id)
self.summary_dir = os.path.join(self.project_dir, self.summary_dir)
- print "HI:", self.summary_dir
def make_sample_dirs(self):
os.mkdir(self.sample_dir)
def make_summary_dirs(self):
- print "HI:", self.summary_dir
if not os.path.isdir(self.summary_dir):
os.mkdir(self.summary_dir)
filename = self.rootname + suffix
pathname = os.path.join(self.sample_dir, filename)
return pathname
+
def dump(self):
- print ('index seq: {0}'.format(self.index_seq))
+ print(('index seq: {0}'.format(self.index_seq)))
- print ('project dir: {0}'.format(self.project_dir))
- print ('sample dir: {0}'.format(self.sample_dir))
- print ('rootname: {0}'.format(self.rootname))
- print ('path: {0}'.format(
+ print(('project dir: {0}'.format(self.project_dir)))
+ print(('sample dir: {0}'.format(self.sample_dir)))
+ print(('rootname: {0}'.format(self.rootname)))
+ print(('path: {0}'.format(
os.path.join(self.project_dir,
self.sample_dir,
- self.rootname+'R1_001.fastq.gz')))
+ self.rootname+'R1_001.fastq.gz'))))
def get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq):
""".format(flowcell=flowcell_id, lane=lane, index=index_seq)
return seq
-def get_unaligned_sample_export(lane, index_seq):
+def get_aligned_sample_export(lane, index_seq):
body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
-HWI-ST0787\t102 {lane} 1101 1478 1997 {index} 1 CAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB NM N
-HWI-ST0787 102 {lane} 1101 1625 1994 {index} 1 AANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA \^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c chrII.fa
+HWI-ST0787\t102\t{lane}\t1101\t1478\t1997\t{index}\t1\tCAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\tNM\t\t\t\t\t\t\t\t\t\t\tN
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\tchr9.fa\t67717938\tR\t99\t72
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\t3:4:3\t\t\t\t\t\t\t\t\t\t\tY
""".format(lane=lane, index=index_seq)
return body
-def ls_tree(root):
+def print_ls_tree(root):
+ """List tree contents, useful for debugging.
+ """
for dirpath, dirnames, filenames in os.walk(root):
for filename in filenames:
- print os.path.join(dirpath, filename)
+ print(os.path.join(dirpath, filename))
class BaseCallInfo(object):