"""
Create simulated solexa/illumina runfolders for testing
"""
-
+import gzip
import os
+import shutil
+
+TEST_CODE_DIR = os.path.split(__file__)[0]
+TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
+LANE_LIST = list(range(1,9))
+TILE_LIST = list(range(1,101))
+HISEQ_TILE_LIST = [1101, 1102, 1103, 1104, 1105, 1106, 1107, 1108,
+ 1201, 1202, 1203, 1204, 1205, 1206, 1207, 1208,
+ 2101, 2102, 2103, 2104, 2105, 2106, 2107, 2108,
+ 2201, 2202, 2203, 2204, 2205, 2206, 2207, 2208,]
def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37):
- firecrest_dir = os.path.join(data_dir,
+ firecrest_dir = os.path.join(data_dir,
'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version)
)
os.mkdir(firecrest_dir)
return firecrest_dir
-
-def make_ipar_dir(data_dir):
+
+def make_ipar_dir(data_dir, version='1.01'):
"""
Construct an artificial ipar parameter file and directory
"""
- params = """<?xml version="1.0"?>
-<ImageAnalysis>
- <Run Name="IPAR_1.01">
- <Software Name="IPAR" Version="2.01.192.0" />
- <Cycles First="1" Last="37" Number="37" />
- <RunParameters>
- <ImagingReads Index="1">
- <FirstCycle>1</FirstCycle>
- <LastCycle>37</LastCycle>
- <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
- </ImagingReads>
- <Reads Index="1">
- <FirstCycle>1</FirstCycle>
- <LastCycle>37</LastCycle>
- <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
- </Reads>
- <Compression>gzip</Compression>
- <CompressionSuffix>.p.gz</CompressionSuffix>
- <Instrument>HWI-EAS229</Instrument>
- <RunFolder>081021_HWI-EAS229_0063_30HKUAAXX</RunFolder>
- </RunParameters>
- <ImageParameters>
- <AutoOffsetFlag>1</AutoOffsetFlag>
- <Fwhm>2.7</Fwhm>
- <RemappingDistance>1.5</RemappingDistance>
- <Threshold>4</Threshold>
- </ImageParameters>
- <TileSelection>
- <Lane Index="1">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="2">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="3">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="4">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="5">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="6">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="7">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- <Lane Index="8">
- <Sample>s</Sample>
- <TileRange Max="100" Min="1" />
- </Lane>
- </TileSelection>
- </Run>
-</ImageAnalysis>
-"""
- f = open(os.path.join(data_dir, '.params'),'w')
- f.write(params)
- f.close()
- ipar_dir = os.path.join(data_dir, 'IPAR_1.01')
+ ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params')
+ shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params'))
+
+ ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,))
if not os.path.exists(ipar_dir):
os.mkdir(ipar_dir)
return ipar_dir
f.write(config)
f.close()
-def make_matrix(matrix_dir):
+def make_runinfo(runfolder_dir, flowcell_id):
+ """Simulate a RunInfo.xml file created by >= RTA 1.9
+ """
+ xml = '''<?xml version="1.0"?>
+<RunInfo xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" Version="2">
+ <Run Id="{runfolder}" Number="101">
+ <Flowcell>{flowcell}</Flowcell>
+ <Instrument>SN787</Instrument>
+ <Date>110815</Date>
+ <Reads>
+ <Read Number="1" NumCycles="50" IsIndexedRead="N" />
+ <Read Number="2" NumCycles="7" IsIndexedRead="Y" />
+ </Reads>
+ <FlowcellLayout LaneCount="8" SurfaceCount="2" SwathCount="3" TileCount="8" />
+ <AlignToPhiX />
+ </Run>
+</RunInfo>
+'''
+ path, runfolder = os.path.split(runfolder_dir)
+ runinfo = os.path.join(runfolder_dir, 'RunInfo.xml')
+ stream = open(runinfo, 'w')
+ stream.write(xml.format(runfolder=runfolder, flowcell=flowcell_id))
+ stream.close()
+ return runinfo
+
+def make_bustard_config132(image_dir):
+ source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml')
+ destination = os.path.join(image_dir, 'config.xml')
+ shutil.copy(source, destination)
+
+def make_aligned_config_1_12(aligned_dir):
+ """This is rouglhly equivalent to the old gerald file"""
+ source = os.path.join(TESTDATA_DIR, '1_12', 'aligned_config_1_12.xml')
+ destination = os.path.join(aligned_dir, 'config.xml')
+ shutil.copy(source, destination)
+
+def make_unaligned_config_1_12(unaligned_dir):
+ demultiplex_pairs = [ # (src,
+ # dest),
+ (os.path.join(TESTDATA_DIR, '1_12', 'demultiplex_1.12.4.2.xml'),
+ os.path.join(unaligned_dir, 'DemultiplexConfig.xml')),
+ (os.path.join(TESTDATA_DIR, '1_12',
+ 'demultiplexed_bustard_1.12.4.2.xml'),
+ os.path.join(unaligned_dir, 'DemultiplexedBustardConfig.xml')),
+ (os.path.join(TESTDATA_DIR, '1_12',
+ 'demultiplexed_summary_1.12.4.2.xml'),
+ os.path.join(unaligned_dir, 'DemultiplexedBustardSummary.xml')),
+ ]
+ for src, dest in demultiplex_pairs:
+ shutil.copy(src, dest)
+
+def make_unaligned_status_1_12(unaligned_dir, flowcell_id):
+ basecall_status = ['All.htm', 'Demultiplex_Stats.htm', 'IVC.htm']
+ test_data_root = os.path.join(TESTDATA_DIR, '1_12', 'basecall_stats')
+ basecall_stats = os.path.join(unaligned_dir,
+ 'Basecall_Stats_{0}'.format(flowcell_id))
+ os.mkdir(basecall_stats)
+ for filename in basecall_status:
+ source = os.path.join(test_data_root, filename)
+ destination = os.path.join(basecall_stats, filename)
+ shutil.copy(source, destination)
+
+def make_rta_intensities_1460(data_dir, version='1.4.6.0'):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ intensities_dir = os.path.join(data_dir, 'Intensities')
+ if not os.path.exists(intensities_dir):
+ os.mkdir(intensities_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config.xml')
+ shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
+
+ return intensities_dir
+
+def make_rta_basecalls_1460(intensities_dir):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
+ if not os.path.exists(basecalls_dir):
+ os.mkdir(basecalls_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config.xml')
+ shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
+
+ return basecalls_dir
+
+def make_rta_intensities_1870(data_dir, version='1.8.70.0'):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ intensities_dir = os.path.join(data_dir, 'Intensities')
+ if not os.path.exists(intensities_dir):
+ os.mkdir(intensities_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml')
+ shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
+
+ return intensities_dir
+
+def make_rta_intensities_1_10(data_dir, version='1.10.36.0'):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ intensities_dir = os.path.join(data_dir, 'Intensities')
+ if not os.path.exists(intensities_dir):
+ os.mkdir(intensities_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1.10.xml')
+ shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml'))
+
+ return intensities_dir
+
+def make_rta_intensities_1_12(data_dir, version='1.12.4.2'):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ intensities_dir = os.path.join(data_dir, 'Intensities')
+ if not os.path.exists(intensities_dir):
+ os.mkdir(intensities_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, '1_12',
+ 'rta_intensities_config_1.12.4.2.xml')
+ shutil.copy(param_file, os.path.join(intensities_dir, 'RTAConfig.xml'))
+
+ return intensities_dir
+
+def make_rta_basecalls_1870(intensities_dir):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
+ if not os.path.exists(basecalls_dir):
+ os.mkdir(basecalls_dir)
+
+ param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml')
+ shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
+
+ return basecalls_dir
+
+def make_rta_basecalls_1_10(intensities_dir):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
+ if not os.path.exists(basecalls_dir):
+ os.mkdir(basecalls_dir)
+
+ make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
+ param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1.10.xml')
+ shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
+
+ return basecalls_dir
+
+def make_rta_basecalls_1_12(intensities_dir):
+ """
+ Construct an artificial RTA Intensities parameter file and directory
+ """
+ basecalls_dir = os.path.join(intensities_dir, 'BaseCalls')
+ if not os.path.exists(basecalls_dir):
+ os.mkdir(basecalls_dir)
+
+ make_qseqs(basecalls_dir, basecall_info=ABXX_BASE_CALL_INFO)
+ param_file = os.path.join(TESTDATA_DIR, '1_12',
+ 'rta_basecalls_config_1.12.4.2.xml')
+ shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml'))
+
+ return basecalls_dir
+
+
+def make_qseqs(bustard_dir, basecall_info=None):
+ """
+ Fill gerald directory with qseq files
+ """
+ if basecall_info is None:
+ qseq_file = '42BRJAAXX_8_1_0039_qseq.txt'
+ tile_list = TILE_LIST
+ summary_file = '42BRJAAXX_BustardSummary.xml'
+ else:
+ qseq_file = basecall_info.qseq_file
+ tile_list = basecall_info.tile_list
+ summary_file = basecall_info.basecall_summary
+
+ # 42BRJ 8 1 0039 happened to be a better than usual tile, in that there
+ # was actually sequence at the start
+ source = os.path.join(TESTDATA_DIR, qseq_file)
+ destdir = bustard_dir
+ if not os.path.isdir(destdir):
+ os.mkdir(destdir)
+
+ for lane in LANE_LIST:
+ for tile in tile_list:
+ destination = os.path.join(bustard_dir, 's_%d_1_%04d_qseq.txt' % (lane, tile))
+ shutil.copy(source, destination)
+
+ make_matrix_dir(bustard_dir)
+ make_phasing_dir(bustard_dir)
+
+ summary_source = os.path.join(TESTDATA_DIR, summary_file)
+ summary_dest = os.path.join(bustard_dir, 'BustardSummary.xml')
+ shutil.copy(summary_source, summary_dest)
+
+ return destdir
+
+def make_scores(gerald_dir, in_temp=True):
+ """
+ Fill gerald directory with score temp files
+ will create the directory if it doesn't exist.
+ """
+ source = os.path.join(TESTDATA_DIR, 's_1_0001_score.txt')
+ destdir = gerald_dir
+ if in_temp:
+ destdir = os.path.join(destdir, 'Temp')
+ if not os.path.isdir(destdir):
+ os.mkdir(destdir)
+
+ for lane in LANE_LIST:
+ for tile in TILE_LIST:
+ destination = os.path.join(destdir, 's_%d_%04d_score.txt' % (lane, tile))
+ shutil.copy(source, destination)
+
+ return destdir
+
+def make_matrix_dir(bustard_dir):
+ """
+ Create several matrix files in <bustard_dir>/Matrix/
+
+ from pipeline 1.4
+ """
+ destdir = os.path.join(bustard_dir, 'Matrix')
+ if not os.path.isdir(destdir):
+ os.mkdir(destdir)
+
+ source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_02_matrix.txt')
+ for lane in LANE_LIST:
+ destination = os.path.join(destdir, 's_%d_02_matrix.txt' % ( lane, ))
+ shutil.copy(source, destination)
+
+def make_matrix(matrix_filename):
contents = """# Auto-generated frequency response matrix
> A
> C
-0.10 -0.10 1.17 -0.03
-0.13 -0.12 0.80 1.27
"""
- s_matrix = os.path.join(matrix_dir, 's_matrix.txt')
- f = open(s_matrix, 'w')
+ f = open(matrix_filename, 'w')
f.write(contents)
f.close()
+def make_matrix_dir_rta160(bustard_dir):
+ """
+ Create several matrix files in <bustard_dir>/Matrix/
+ """
+ destdir = os.path.join(bustard_dir, 'Matrix')
+ if not os.path.isdir(destdir):
+ os.mkdir(destdir)
+
+ source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt')
+ lane_fragments = [ "_%d" % (l,) for l in LANE_LIST]
+ for fragment in lane_fragments:
+ destination = os.path.join(destdir, 's%s_1_matrix.txt' % ( fragment, ))
+ shutil.copy(source, destination)
+
+def make_matrix_dir_rta_1_10(bustard_dir):
+ make_matrix_dir_rta160(bustard_dir)
+
+def make_matrix_dir_rta_1_12(bustard_dir):
+ make_matrix_dir_rta160(bustard_dir)
+
+def make_phasing_dir(bustard_dir):
+ """
+ Create several phasing files in <bustard_dir>/Phasing/
+
+ from pipeline 1.4
+ """
+ destdir = os.path.join(bustard_dir, 'Phasing')
+ if not os.path.isdir(destdir):
+ os.mkdir(destdir)
+
+ source = os.path.join(TESTDATA_DIR, '42BRJAAXX_8_01_phasing.xml')
+ for lane in LANE_LIST:
+ destination = os.path.join(destdir, 's_%d_01_phasing.xml' % ( lane, ))
+ shutil.copy(source, destination)
+
def make_phasing_params(bustard_dir):
- for lane in range(1,9):
+ for lane in LANE_LIST:
pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane))
f = open(pathname, 'w')
f.write("""<Parameters>
""")
f.close()
-def make_gerald_config(gerald_dir):
- config_xml = """<RunParameters>
-<ChipWideRunParameters>
- <ANALYSIS>default</ANALYSIS>
- <BAD_LANES></BAD_LANES>
- <BAD_TILES></BAD_TILES>
- <CONTAM_DIR></CONTAM_DIR>
- <CONTAM_FILE></CONTAM_FILE>
- <ELAND_GENOME>Need_to_specify_ELAND_genome_directory</ELAND_GENOME>
- <ELAND_MULTIPLE_INSTANCES>8</ELAND_MULTIPLE_INSTANCES>
- <ELAND_REPEAT></ELAND_REPEAT>
- <EMAIL_DOMAIN>domain.com</EMAIL_DOMAIN>
- <EMAIL_LIST>diane</EMAIL_LIST>
- <EMAIL_SERVER>localhost:25</EMAIL_SERVER>
- <EXPT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane</EXPT_DIR>
- <EXPT_DIR_ROOT>/home/diane/gec</EXPT_DIR_ROOT>
- <FORCE>1</FORCE>
- <GENOME_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes</GENOME_DIR>
- <GENOME_FILE>Need_to_specify_genome_file_name</GENOME_FILE>
- <HAMSTER_FLAG>genome</HAMSTER_FLAG>
- <OUT_DIR>/home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane</OUT_DIR>
- <POST_RUN_COMMAND></POST_RUN_COMMAND>
- <PRB_FILE_SUFFIX>_prb.txt</PRB_FILE_SUFFIX>
- <PURE_BASES>12</PURE_BASES>
- <QF_PARAMS>'((CHASTITY>=0.6))'</QF_PARAMS>
- <QHG_FILE_SUFFIX>_qhg.txt</QHG_FILE_SUFFIX>
- <QUALITY_FORMAT>--symbolic</QUALITY_FORMAT>
- <READ_LENGTH>32</READ_LENGTH>
- <SEQUENCE_FORMAT>--scarf</SEQUENCE_FORMAT>
- <SEQ_FILE_SUFFIX>_seq.txt</SEQ_FILE_SUFFIX>
- <SIG_FILE_SUFFIX_DEPHASED>_sig2.txt</SIG_FILE_SUFFIX_DEPHASED>
- <SIG_FILE_SUFFIX_NOT_DEPHASED>_sig.txt</SIG_FILE_SUFFIX_NOT_DEPHASED>
- <SOFTWARE_VERSION>@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp</SOFTWARE_VERSION>
- <TILE_REGEX>s_[1-8]_[0-9][0-9][0-9][0-9]</TILE_REGEX>
- <TILE_ROOT>s</TILE_ROOT>
- <TIME_STAMP>Sat Apr 19 19:08:30 2008</TIME_STAMP>
- <TOOLS_DIR>/home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald</TOOLS_DIR>
- <USE_BASES>all</USE_BASES>
- <WEB_DIR_ROOT>http://host.domain.com/yourshare/</WEB_DIR_ROOT>
-</ChipWideRunParameters>
-<LaneSpecificRunParameters>
- <ANALYSIS>
- <s_1>eland</s_1>
- <s_2>eland</s_2>
- <s_3>eland</s_3>
- <s_4>eland</s_4>
- <s_5>eland</s_5>
- <s_6>eland</s_6>
- <s_7>eland</s_7>
- <s_8>eland</s_8>
- </ANALYSIS>
- <ELAND_GENOME>
- <s_1>/g/dm3</s_1>
- <s_2>/g/equcab1</s_2>
- <s_3>/g/equcab1</s_3>
- <s_4>/g/canfam2</s_4>
- <s_5>/g/hg18</s_5>
- <s_6>/g/hg18</s_6>
- <s_7>/g/hg18</s_7>
- <s_8>/g/hg18</s_8>
- </ELAND_GENOME>
- <READ_LENGTH>
- <s_1>32</s_1>
- <s_2>32</s_2>
- <s_3>32</s_3>
- <s_4>32</s_4>
- <s_5>32</s_5>
- <s_6>32</s_6>
- <s_7>32</s_7>
- <s_8>32</s_8>
- </READ_LENGTH>
- <USE_BASES>
- <s_1>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_1>
- <s_2>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_2>
- <s_3>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_3>
- <s_4>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_4>
- <s_5>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_5>
- <s_6>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_6>
- <s_7>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_7>
- <s_8>YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY</s_8>
- </USE_BASES>
-</LaneSpecificRunParameters>
-</RunParameters>
-"""
- pathname = os.path.join(gerald_dir, 'config.xml')
- f = open(pathname,'w')
- f.write(config_xml)
- f.close()
+def make_gerald_config_026(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml')
+ destination = os.path.join(gerald_dir, 'config.xml')
+ shutil.copy(source, destination)
-def make_summary100_htm(gerald_dir):
- summary_htm="""<!--RUN_TIME Wed Jul 2 06:47:44 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080627_HWI-EAS229_0036_3055HAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>HWI-EAS229</td></tr>
-<tr><td>Run Folder</td><td>080627_HWI-EAS229_0036_3055HAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Chip Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Clusters</td>
-<td>Clusters (PF)</td>
-<td>Yield (kbases)</td>
-</tr>
-<tr><td>80933224</td>
-<td>43577803</td>
-<td>1133022</td>
-</tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Num Tiles</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>elegans170</td>
-<td>ELAND</td>
-<td>26</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td colspan="2">Lane Info</td>
-<td colspan="8">Tile Mean +/- SD for Lane</td>
-</tr>
-<tr>
-<td>Lane </td>
-<td>Lane Yield (kbases) </td>
-<td>Clusters (raw)</td>
-<td>Clusters (PF) </td>
-<td>1st Cycle Int (PF) </td>
-<td>% intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>158046</td>
-<td>96483 +/- 9074</td>
-<td>60787 +/- 4240</td>
-<td>329 +/- 35</td>
-<td>101.88 +/- 6.03</td>
-<td>63.21 +/- 3.29</td>
-<td>70.33 +/- 0.24</td>
-<td>9054.08 +/- 59.16</td>
-<td>0.46 +/- 0.18</td>
-</tr>
-<tr>
-<td>2</td>
-<td>156564</td>
-<td>133738 +/- 7938</td>
-<td>60217 +/- 1926</td>
-<td>444 +/- 39</td>
-<td>92.62 +/- 7.58</td>
-<td>45.20 +/- 3.31</td>
-<td>51.98 +/- 0.74</td>
-<td>6692.04 +/- 92.49</td>
-<td>0.46 +/- 0.09</td>
-</tr>
-<tr>
-<td>3</td>
-<td>185818</td>
-<td>152142 +/- 10002</td>
-<td>71468 +/- 2827</td>
-<td>366 +/- 36</td>
-<td>91.53 +/- 8.66</td>
-<td>47.19 +/- 3.80</td>
-<td>82.24 +/- 0.44</td>
-<td>10598.68 +/- 64.13</td>
-<td>0.41 +/- 0.04</td>
-</tr>
-<tr>
-<td>4</td>
-<td>34953</td>
-<td>15784 +/- 2162</td>
-<td>13443 +/- 1728</td>
-<td>328 +/- 40</td>
-<td>97.53 +/- 9.87</td>
-<td>85.29 +/- 1.91</td>
-<td>80.02 +/- 0.53</td>
-<td>10368.82 +/- 71.08</td>
-<td>0.15 +/- 0.05</td>
-</tr>
-<tr>
-<td>5</td>
-<td>167936</td>
-<td>119735 +/- 8465</td>
-<td>64590 +/- 2529</td>
-<td>417 +/- 37</td>
-<td>88.69 +/- 14.79</td>
-<td>54.10 +/- 2.59</td>
-<td>76.95 +/- 0.32</td>
-<td>9936.47 +/- 65.75</td>
-<td>0.28 +/- 0.02</td>
-</tr>
-<tr>
-<td>6</td>
-<td>173463</td>
-<td>152177 +/- 8146</td>
-<td>66716 +/- 2493</td>
-<td>372 +/- 39</td>
-<td>87.06 +/- 9.86</td>
-<td>43.98 +/- 3.12</td>
-<td>78.80 +/- 0.43</td>
-<td>10162.28 +/- 49.65</td>
-<td>0.38 +/- 0.03</td>
-</tr>
-<tr>
-<td>7</td>
-<td>149287</td>
-<td>84649 +/- 7325</td>
-<td>57418 +/- 3617</td>
-<td>295 +/- 28</td>
-<td>89.40 +/- 8.23</td>
-<td>67.97 +/- 1.82</td>
-<td>33.38 +/- 0.25</td>
-<td>4247.92 +/- 32.37</td>
-<td>1.00 +/- 0.03</td>
-</tr>
-<tr>
-<td>8</td>
-<td>106953</td>
-<td>54622 +/- 4812</td>
-<td>41136 +/- 3309</td>
-<td>284 +/- 37</td>
-<td>90.21 +/- 9.10</td>
-<td>75.39 +/- 2.27</td>
-<td>48.33 +/- 0.29</td>
-<td>6169.21 +/- 169.50</td>
-<td>0.86 +/- 1.22</td>
-</tr>
-<tr><td colspan="13">Tile mean across chip</td></tr>
-<tr>
-<td>Av.</td>
-<td></td>
-<td>101166</td>
-<td>54472</td>
-<td>354</td>
-<td>92.36</td>
-<td>60.29</td>
-<td>65.25</td>
-<td>8403.69</td>
-<td>0.50</td>
-</tr>
-</table>
-<h2><br></br>Expanded Lane Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<tr><td colspan="2">Lane Info</td>
-<td colspan="2">Phasing Info</td>
-<td colspan="2">Raw Data (tile mean)</td>
-<td colspan="7">Filtered Data (tile mean)</td></tr>
-<td>Lane </td>
-<td>Clusters (tile mean) (raw)</td>
-<td>% Phasing </td>
-<td>% Prephasing </td>
-<td>% Error Rate (raw) </td>
-<td> Equiv Perfect Clusters (raw) </td>
-<td>% retained </td>
-<td>Cycle 2-4 Av Int (PF) </td>
-<td>Cycle 2-10 Av % Loss (PF) </td>
-<td>Cycle 10-20 Av % Loss (PF) </td>
-<td>% Align (PF) </td>
-<td>% Error Rate (PF) </td>
-<td> Equiv Perfect Clusters (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>96483</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.00</td>
-<td>49676</td>
-<td>63.21</td>
-<td>317 +/- 32</td>
-<td>0.13 +/- 0.44</td>
-<td>-1.14 +/- 0.34</td>
-<td>70.33</td>
-<td>0.46</td>
-<td>41758</td>
-</tr>
-<tr>
-<td>2</td>
-<td>133738</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.22</td>
-<td>40467</td>
-<td>45.20</td>
-<td>415 +/- 33</td>
-<td>0.29 +/- 0.40</td>
-<td>-0.79 +/- 0.35</td>
-<td>51.98</td>
-<td>0.46</td>
-<td>30615</td>
-</tr>
-<tr>
-<td>3</td>
-<td>152142</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.30</td>
-<td>78588</td>
-<td>47.19</td>
-<td>344 +/- 26</td>
-<td>0.68 +/- 0.51</td>
-<td>-0.77 +/- 0.42</td>
-<td>82.24</td>
-<td>0.41</td>
-<td>57552</td>
-</tr>
-<tr>
-<td>4</td>
-<td>15784</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.29</td>
-<td>11095</td>
-<td>85.29</td>
-<td>306 +/- 34</td>
-<td>0.20 +/- 0.69</td>
-<td>-1.28 +/- 0.66</td>
-<td>80.02</td>
-<td>0.15</td>
-<td>10671</td>
-</tr>
-<tr>
-<td>5</td>
-<td>119735</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>0.85</td>
-<td>60335</td>
-<td>54.10</td>
-<td>380 +/- 32</td>
-<td>0.34 +/- 0.49</td>
-<td>-1.55 +/- 4.69</td>
-<td>76.95</td>
-<td>0.28</td>
-<td>49015</td>
-</tr>
-<tr>
-<td>6</td>
-<td>152177</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.21</td>
-<td>70905</td>
-<td>43.98</td>
-<td>333 +/- 27</td>
-<td>0.57 +/- 0.50</td>
-<td>-0.91 +/- 0.39</td>
-<td>78.80</td>
-<td>0.38</td>
-<td>51663</td>
-</tr>
-<tr>
-<td>7</td>
-<td>84649</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.38</td>
-<td>21069</td>
-<td>67.97</td>
-<td>272 +/- 20</td>
-<td>1.15 +/- 0.52</td>
-<td>-0.84 +/- 0.58</td>
-<td>33.38</td>
-<td>1.00</td>
-<td>18265</td>
-</tr>
-<tr>
-<td>8</td>
-<td>54622</td>
-<td>0.7700</td>
-<td>0.3100</td>
-<td>1.17</td>
-<td>21335</td>
-<td>75.39</td>
-<td>262 +/- 31</td>
-<td>1.10 +/- 0.59</td>
-<td>-1.01 +/- 0.47</td>
-<td>48.33</td>
-<td>0.86</td>
-<td>19104</td>
-</tr>
-</table>
-<b><br></br>IVC Plots</b>
-<p> <a href='IVC.htm' target="_blank"> IVC.htm
- </a></p>
-<b><br></br>All Intensity Plots</b>
-<p> <a href='All.htm' target="_blank"> All.htm
- </a></p>
-<b><br></br>Error graphs: </b>
-<p> <a href='Error.htm' target="_blank"> Error.htm
- </a></p>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane1"><h2><br></br>Lane 1<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>0001</td>
-<td>114972</td>
-<td>326.48</td>
-<td>94.39</td>
-<td>57.44</td>
-<td>70.2</td>
-<td>9038.6</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane2"><h2><br></br>Lane 2<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>2</td>
-<td>0001</td>
-<td>147793</td>
-<td>448.12</td>
-<td>83.68</td>
-<td>38.57</td>
-<td>53.7</td>
-<td>6905.4</td>
-<td>0.54</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane3"><h2><br></br>Lane 3<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>3</td>
-<td>0001</td>
-<td>167904</td>
-<td>374.05</td>
-<td>86.91</td>
-<td>40.36</td>
-<td>81.3</td>
-<td>10465.0</td>
-<td>0.47</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane4"><h2><br></br>Lane 4<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>4</td>
-<td>0001</td>
-<td>20308</td>
-<td>276.85</td>
-<td>92.87</td>
-<td>84.26</td>
-<td>80.4</td>
-<td>10413.8</td>
-<td>0.16</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane5"><h2><br></br>Lane 5<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane6"><h2><br></br>Lane 6<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>6</td>
-<td>0001</td>
-<td>166844</td>
-<td>348.12</td>
-<td>77.59</td>
-<td>38.13</td>
-<td>79.7</td>
-<td>10264.4</td>
-<td>0.44</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane7"><h2><br></br>Lane 7<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>7</td>
-<td>0001</td>
-<td>98913</td>
-<td>269.90</td>
-<td>86.66</td>
-<td>64.55</td>
-<td>33.2</td>
-<td>4217.5</td>
-<td>1.02</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-<a name="Lane8"><h2><br></br>Lane 8<br></br></h2></a>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane </td>
-<td>Tile </td>
-<td>Clusters (raw)</td>
-<td>Av 1st Cycle Int (PF) </td>
-<td>Av % intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Av Alignment Score (PF) </td>
-<td>% Error Rate (PF) </td>
-</tr>
-<tr>
-<td>8</td>
-<td>0001</td>
-<td>64972</td>
-<td>243.60</td>
-<td>89.40</td>
-<td>73.17</td>
-<td>48.3</td>
-<td>6182.8</td>
-<td>0.71</td>
-</tr>
-</table>
-<td><a href="#Top">Back to top</a></td>
-</body>
-</html>
-"""
- pathname = os.path.join(gerald_dir, 'Summary.htm')
- f = open(pathname, 'w')
- f.write(summary_htm)
- f.close()
+def make_gerald_config_100(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml')
+ destination = os.path.join(gerald_dir, 'config.xml')
+ shutil.copy(source, destination)
+
+def make_gerald_config_1_7(gerald_dir):
+ """CASAVA 1.7 gerald config"""
+ source = os.path.join(TESTDATA_DIR, 'gerald_config_1.7.xml')
+ destination = os.path.join(gerald_dir, 'config.xml')
+ shutil.copy(source, destination)
+
+def make_summary_htm_100(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm')
+ destination = os.path.join(gerald_dir, 'Summary.htm')
+ shutil.copy(source, destination)
+
+def make_summary_htm_110(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm')
+ destination = os.path.join(gerald_dir, 'Summary.htm')
+ shutil.copy(source, destination)
def make_summary_paired_htm(gerald_dir):
- summary_htm = """<!--RUN_TIME Thu Nov 13 15:11:29 2008 -->
-<!--SOFTWARE_VERSION @(#) $Id: jerboa.pl,v 1.94 2007/12/04 09:59:07 rshaw Exp $-->
-<html>
-<body>
-
-<a name="Top"><h2><title>080920_HWI-EAS229_0057_30GBJAAXX Summary</title></h2></a>
-<h1>Summary Information For Experiment 080920_HWI-EAS229_0057_30GBJAAXX on Machine unknown</h1>
-<h2><br></br>Chip Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr><td>Machine</td><td>UNKNOWN</td></tr>
-<tr><td>Run Folder</td><td>080920_HWI-EAS229_0057_30GBJAAXX</td></tr>
-<tr><td>Chip ID</td><td>unknown</td></tr>
-</table>
-<h2><br></br>Chip Results Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Clusters</td>
-<td>Clusters (PF)</td>
-<td>Yield (kbases)</td>
-</tr>
-<tr><td>126151880</td>
-<td>95923456</td>
-<td>3549167</td>
-</tr>
-</table>
-<h2><br></br>Lane Parameter Summary<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td>Lane</td>
-<td>Sample ID</td>
-<td>Sample Target</td>
-<td>Sample Type</td>
-<td>Length</td>
-<td>Filter</td>
-<td>Num Tiles</td>
-<td>Tiles</td>
-</tr>
-<tr>
-<td>1</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane1">Lane 1</a></td>
-</tr>
-<tr>
-<td>2</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane2">Lane 2</a></td>
-</tr>
-<tr>
-<td>3</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane3">Lane 3</a></td>
-</tr>
-<tr>
-<td>4</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane4">Lane 4</a></td>
-</tr>
-<tr>
-<td>5</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane5">Lane 5</a></td>
-</tr>
-<tr>
-<td>6</td>
-<td>unknown</td>
-<td>hg18</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane6">Lane 6</a></td>
-</tr>
-<tr>
-<td>7</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane7">Lane 7</a></td>
-</tr>
-<tr>
-<td>8</td>
-<td>unknown</td>
-<td>mm9</td>
-<td>ELAND_PAIR</td>
-<td>37, 37</td>
-<td>'((CHASTITY>=0.6))'</td>
-<td>100</td>
-<td><a href="#Lane8">Lane 8</a></td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary : Read 1<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td colspan="2">Lane Info</td>
-<td colspan="8">Tile Mean +/- SD for Lane</td>
-</tr>
-<tr>
-<td>Lane </td>
-<td>Lane Yield (kbases) </td>
-<td>Clusters (raw)</td>
-<td>Clusters (PF) </td>
-<td>1st Cycle Int (PF) </td>
-<td>% intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>277083</td>
-<td>103646 +/- 4515</td>
-<td>74887 +/- 6080</td>
-<td>290 +/- 17</td>
-<td>99.34 +/- 3.52</td>
-<td>72.22 +/- 4.63</td>
-<td>89.19 +/- 0.59</td>
-<td>14.16 +/- 0.63</td>
-<td>0.94 +/- 0.17</td>
-</tr>
-<tr>
-<td>2</td>
-<td>289563</td>
-<td>106678 +/- 4652</td>
-<td>78260 +/- 2539</td>
-<td>294 +/- 16</td>
-<td>98.23 +/- 2.66</td>
-<td>73.43 +/- 2.52</td>
-<td>87.05 +/- 0.64</td>
-<td>16.81 +/- 0.55</td>
-<td>0.92 +/- 0.17</td>
-</tr>
-<tr>
-<td>3</td>
-<td>259242</td>
-<td>84583 +/- 5963</td>
-<td>70065 +/- 4194</td>
-<td>284 +/- 18</td>
-<td>99.82 +/- 3.05</td>
-<td>82.90 +/- 1.32</td>
-<td>89.49 +/- 0.20</td>
-<td>18.13 +/- 0.66</td>
-<td>0.81 +/- 0.13</td>
-</tr>
-<tr>
-<td>4</td>
-<td>210549</td>
-<td>68813 +/- 4782</td>
-<td>56905 +/- 4145</td>
-<td>300 +/- 29</td>
-<td>102.00 +/- 14.74</td>
-<td>82.91 +/- 5.89</td>
-<td>56.93 +/- 0.82</td>
-<td>25.85 +/- 2.30</td>
-<td>0.95 +/- 0.30</td>
-</tr>
-<tr>
-<td>5</td>
-<td>295555</td>
-<td>104854 +/- 4664</td>
-<td>79879 +/- 6270</td>
-<td>281 +/- 19</td>
-<td>98.26 +/- 5.85</td>
-<td>76.34 +/- 6.67</td>
-<td>57.71 +/- 0.30</td>
-<td>26.16 +/- 1.68</td>
-<td>0.97 +/- 0.19</td>
-</tr>
-<tr>
-<td>6</td>
-<td>140401</td>
-<td>43555 +/- 1632</td>
-<td>37946 +/- 2140</td>
-<td>233 +/- 16</td>
-<td>105.74 +/- 8.40</td>
-<td>87.14 +/- 3.87</td>
-<td>89.08 +/- 1.00</td>
-<td>33.53 +/- 2.18</td>
-<td>1.05 +/- 0.21</td>
-</tr>
-<tr>
-<td>7</td>
-<td>154217</td>
-<td>54265 +/- 1588</td>
-<td>41680 +/- 5319</td>
-<td>224 +/- 18</td>
-<td>111.33 +/- 8.90</td>
-<td>76.94 +/- 10.52</td>
-<td>84.50 +/- 1.41</td>
-<td>27.44 +/- 2.33</td>
-<td>1.32 +/- 0.25</td>
-</tr>
-<tr>
-<td>8</td>
-<td>147969</td>
-<td>64363 +/- 2697</td>
-<td>39991 +/- 6785</td>
-<td>248 +/- 43</td>
-<td>109.93 +/- 7.80</td>
-<td>62.45 +/- 12.05</td>
-<td>82.20 +/- 2.08</td>
-<td>24.63 +/- 2.53</td>
-<td>1.57 +/- 0.22</td>
-</tr>
-<tr><td colspan="13">Tile mean across chip</td></tr>
-<tr>
-<td>Av.</td>
-<td></td>
-<td>78844</td>
-<td>59952</td>
-<td>269</td>
-<td>103.08</td>
-<td>76.79</td>
-<td>79.52</td>
-<td>23.34</td>
-<td>1.06</td>
-</tr>
-</table>
-<h2><br></br>Lane Results Summary : Read 2<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-<td colspan="2">Lane Info</td>
-<td colspan="8">Tile Mean +/- SD for Lane</td>
-</tr>
-<tr>
-<td>Lane </td>
-<td>Lane Yield (kbases) </td>
-<td>Clusters (raw)</td>
-<td>Clusters (PF) </td>
-<td>1st Cycle Int (PF) </td>
-<td>% intensity after 20 cycles (PF) </td>
-<td>% PF Clusters </td>
-<td>% Align (PF) </td>
-<td>Alignment Score (PF) </td>
-<td> % Error Rate (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>277083</td>
-<td>103646 +/- 4515</td>
-<td>74887 +/- 6080</td>
-<td>277 +/- 17</td>
-<td>94.42 +/- 5.68</td>
-<td>72.22 +/- 4.63</td>
-<td>81.54 +/- 2.13</td>
-<td>42.70 +/- 5.49</td>
-<td>0.89 +/- 0.27</td>
-</tr>
-<tr>
-<td>2</td>
-<td>289563</td>
-<td>106678 +/- 4652</td>
-<td>78260 +/- 2539</td>
-<td>259 +/- 13</td>
-<td>93.57 +/- 2.55</td>
-<td>73.43 +/- 2.52</td>
-<td>82.05 +/- 0.37</td>
-<td>43.98 +/- 3.02</td>
-<td>0.76 +/- 0.15</td>
-</tr>
-<tr>
-<td>3</td>
-<td>259242</td>
-<td>84583 +/- 5963</td>
-<td>70065 +/- 4194</td>
-<td>252 +/- 12</td>
-<td>94.23 +/- 2.19</td>
-<td>82.90 +/- 1.32</td>
-<td>84.94 +/- 0.28</td>
-<td>51.76 +/- 2.29</td>
-<td>0.59 +/- 0.07</td>
-</tr>
-<tr>
-<td>4</td>
-<td>210549</td>
-<td>68813 +/- 4782</td>
-<td>56905 +/- 4145</td>
-<td>226 +/- 16</td>
-<td>96.82 +/- 7.12</td>
-<td>82.91 +/- 5.89</td>
-<td>56.01 +/- 0.99</td>
-<td>27.86 +/- 3.48</td>
-<td>0.95 +/- 0.33</td>
-</tr>
-<tr>
-<td>5</td>
-<td>295555</td>
-<td>104854 +/- 4664</td>
-<td>79879 +/- 6270</td>
-<td>200 +/- 24</td>
-<td>103.56 +/- 15.45</td>
-<td>76.34 +/- 6.67</td>
-<td>56.76 +/- 0.41</td>
-<td>25.68 +/- 2.06</td>
-<td>0.98 +/- 0.17</td>
-</tr>
-<tr>
-<td>6</td>
-<td>140401</td>
-<td>43555 +/- 1632</td>
-<td>37946 +/- 2140</td>
-<td>179 +/- 10</td>
-<td>100.82 +/- 5.47</td>
-<td>87.14 +/- 3.87</td>
-<td>88.64 +/- 1.42</td>
-<td>34.05 +/- 2.60</td>
-<td>0.98 +/- 0.22</td>
-</tr>
-<tr>
-<td>7</td>
-<td>154217</td>
-<td>54265 +/- 1588</td>
-<td>41680 +/- 5319</td>
-<td>184 +/- 5</td>
-<td>103.42 +/- 3.47</td>
-<td>76.94 +/- 10.52</td>
-<td>83.90 +/- 1.32</td>
-<td>27.60 +/- 2.07</td>
-<td>1.26 +/- 0.16</td>
-</tr>
-<tr>
-<td>8</td>
-<td>147969</td>
-<td>64363 +/- 2697</td>
-<td>39991 +/- 6785</td>
-<td>206 +/- 31</td>
-<td>99.48 +/- 3.23</td>
-<td>62.45 +/- 12.05</td>
-<td>79.81 +/- 3.35</td>
-<td>23.06 +/- 2.50</td>
-<td>1.56 +/- 0.23</td>
-</tr>
-<tr><td colspan="13">Tile mean across chip</td></tr>
-<tr>
-<td>Av.</td>
-<td></td>
-<td>78844</td>
-<td>59952</td>
-<td>223</td>
-<td>98.29</td>
-<td>76.79</td>
-<td>76.70</td>
-<td>34.59</td>
-<td>1.00</td>
-</tr>
-</table>
-<h2><br></br>Expanded Lane Summary : Read 1<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<tr><td colspan="2">Lane Info</td>
-<td colspan="2">Phasing Info</td>
-<td colspan="2">Raw Data (tile mean)</td>
-<td colspan="7">Filtered Data (tile mean)</td></tr>
-<td>Lane </td>
-<td>Clusters (tile mean) (raw)</td>
-<td>% Phasing </td>
-<td>% Prephasing </td>
-<td>% Error Rate (raw) </td>
-<td> Equiv Perfect Clusters (raw) </td>
-<td>% retained </td>
-<td>Cycle 2-4 Av Int (PF) </td>
-<td>Cycle 2-10 Av % Loss (PF) </td>
-<td>Cycle 10-20 Av % Loss (PF) </td>
-<td>% Align (PF) </td>
-<td>% Error Rate (PF) </td>
-<td> Equiv Perfect Clusters (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>103646</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.37</td>
-<td>74813</td>
-<td>72.22</td>
-<td>266 +/- 17</td>
-<td>-0.53 +/- 0.37</td>
-<td>-0.42 +/- 0.21</td>
-<td>89.19</td>
-<td>0.94</td>
-<td>64718</td>
-</tr>
-<tr>
-<td>2</td>
-<td>106678</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.34</td>
-<td>74842</td>
-<td>73.43</td>
-<td>284 +/- 16</td>
-<td>0.08 +/- 0.43</td>
-<td>-0.17 +/- 0.34</td>
-<td>87.05</td>
-<td>0.92</td>
-<td>65850</td>
-</tr>
-<tr>
-<td>3</td>
-<td>84583</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.09</td>
-<td>65493</td>
-<td>82.90</td>
-<td>286 +/- 14</td>
-<td>0.29 +/- 0.48</td>
-<td>-0.02 +/- 0.17</td>
-<td>89.49</td>
-<td>0.81</td>
-<td>60899</td>
-</tr>
-<tr>
-<td>4</td>
-<td>68813</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.19</td>
-<td>33697</td>
-<td>82.91</td>
-<td>286 +/- 23</td>
-<td>-0.01 +/- 0.62</td>
-<td>-0.37 +/- 0.30</td>
-<td>56.93</td>
-<td>0.95</td>
-<td>31080</td>
-</tr>
-<tr>
-<td>5</td>
-<td>104854</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.32</td>
-<td>50075</td>
-<td>76.34</td>
-<td>258 +/- 25</td>
-<td>-0.03 +/- 0.46</td>
-<td>-0.49 +/- 0.27</td>
-<td>57.71</td>
-<td>0.97</td>
-<td>44149</td>
-</tr>
-<tr>
-<td>6</td>
-<td>43555</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.24</td>
-<td>34399</td>
-<td>87.14</td>
-<td>231 +/- 14</td>
-<td>-0.19 +/- 0.46</td>
-<td>-0.34 +/- 0.40</td>
-<td>89.08</td>
-<td>1.05</td>
-<td>32302</td>
-</tr>
-<tr>
-<td>7</td>
-<td>54265</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>1.67</td>
-<td>38188</td>
-<td>76.94</td>
-<td>224 +/- 14</td>
-<td>-0.41 +/- 0.49</td>
-<td>-0.55 +/- 0.23</td>
-<td>84.50</td>
-<td>1.32</td>
-<td>33435</td>
-</tr>
-<tr>
-<td>8</td>
-<td>64363</td>
-<td>0.8600</td>
-<td>0.4900</td>
-<td>2.15</td>
-<td>38077</td>
-<td>62.45</td>
-<td>247 +/- 42</td>
-<td>-0.52 +/- 0.36</td>
-<td>-0.29 +/- 0.19</td>
-<td>82.20</td>
-<td>1.57</td>
-<td>31036</td>
-</tr>
-</table>
-<h2><br></br>Expanded Lane Summary : Read 2<br></br></h2>
-<table border="1" cellpadding="5">
-<tr>
-
-<tr><td colspan="2">Lane Info</td>
-<td colspan="2">Phasing Info</td>
-<td colspan="2">Raw Data (tile mean)</td>
-<td colspan="7">Filtered Data (tile mean)</td></tr>
-<td>Lane </td>
-<td>Clusters (tile mean) (raw)</td>
-<td>% Phasing </td>
-<td>% Prephasing </td>
-<td>% Error Rate (raw) </td>
-<td> Equiv Perfect Clusters (raw) </td>
-<td>% retained </td>
-<td>Cycle 2-4 Av Int (PF) </td>
-<td>Cycle 2-10 Av % Loss (PF) </td>
-<td>Cycle 10-20 Av % Loss (PF) </td>
-<td>% Align (PF) </td>
-<td>% Error Rate (PF) </td>
-<td> Equiv Perfect Clusters (PF) </td>
-</tr>
-<tr>
-<td>1</td>
-<td>103646</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.24</td>
-<td>68870</td>
-<td>72.22</td>
-<td>254 +/- 15</td>
-<td>-0.53 +/- 0.37</td>
-<td>-0.42 +/- 0.21</td>
-<td>81.54</td>
-<td>0.89</td>
-<td>59272</td>
-</tr>
-<tr>
-<td>2</td>
-<td>106678</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.11</td>
-<td>71980</td>
-<td>73.43</td>
-<td>247 +/- 12</td>
-<td>0.08 +/- 0.43</td>
-<td>-0.17 +/- 0.34</td>
-<td>82.05</td>
-<td>0.76</td>
-<td>62240</td>
-</tr>
-<tr>
-<td>3</td>
-<td>84583</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>0.80</td>
-<td>63500</td>
-<td>82.90</td>
-<td>243 +/- 8</td>
-<td>0.29 +/- 0.48</td>
-<td>-0.02 +/- 0.17</td>
-<td>84.94</td>
-<td>0.59</td>
-<td>58029</td>
-</tr>
-<tr>
-<td>4</td>
-<td>68813</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.12</td>
-<td>33534</td>
-<td>82.91</td>
-<td>210 +/- 19</td>
-<td>-0.01 +/- 0.62</td>
-<td>-0.37 +/- 0.30</td>
-<td>56.01</td>
-<td>0.95</td>
-<td>30548</td>
-</tr>
-<tr>
-<td>5</td>
-<td>104854</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.24</td>
-<td>49951</td>
-<td>76.34</td>
-<td>193 +/- 12</td>
-<td>-0.03 +/- 0.46</td>
-<td>-0.49 +/- 0.27</td>
-<td>56.76</td>
-<td>0.98</td>
-<td>43366</td>
-</tr>
-<tr>
-<td>6</td>
-<td>43555</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.12</td>
-<td>34751</td>
-<td>87.14</td>
-<td>174 +/- 7</td>
-<td>-0.19 +/- 0.46</td>
-<td>-0.34 +/- 0.40</td>
-<td>88.64</td>
-<td>0.98</td>
-<td>32208</td>
-</tr>
-<tr>
-<td>7</td>
-<td>54265</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>1.55</td>
-<td>38418</td>
-<td>76.94</td>
-<td>178 +/- 4</td>
-<td>-0.41 +/- 0.49</td>
-<td>-0.55 +/- 0.23</td>
-<td>83.90</td>
-<td>1.26</td>
-<td>33240</td>
-</tr>
-<tr>
-<td>8</td>
-<td>64363</td>
-<td>0.7900</td>
-<td>0.4600</td>
-<td>2.07</td>
-<td>36968</td>
-<td>62.45</td>
-<td>198 +/- 32</td>
-<td>-0.52 +/- 0.36</td>
-<td>-0.29 +/- 0.19</td>
-<td>79.81</td>
-<td>1.56</td>
-<td>30181</td>
-</tr>
-</table>
-</body>
-</html>"""
- pathname = os.path.join(gerald_dir, 'Summary.htm')
- f = open(pathname, 'w')
- f.write(summary_htm)
- f.close()
+ source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm')
+ destination = os.path.join(gerald_dir, 'Summary.htm')
+ shutil.copy(source, destination)
+
+def make_summary_ipar130_htm(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm')
+ destination = os.path.join(gerald_dir, 'Summary.htm')
+ shutil.copy(source, destination)
+
+def make_summary_rta160_xml(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'Summary-rta160.xml')
+ destination = os.path.join(gerald_dir, 'Summary.xml')
+ shutil.copy(source, destination)
+
+
+def make_summary_casava1_7_xml(gerald_dir):
+ source = os.path.join(TESTDATA_DIR, 'Summary-casava1.7.xml')
+ destination = os.path.join(gerald_dir, 'Summary.xml')
+ shutil.copy(source, destination)
+
+def make_status_rta1_12(datadir):
+ sourcedir = os.path.join(TESTDATA_DIR, '1_12')
+ status_htm = os.path.join(sourcedir, 'Status.htm')
+ destination = os.path.join(datadir, 'Status.htm')
+ shutil.copy(status_htm, destination)
+
+ status_dir = os.path.join(datadir, 'Status_Files')
+ status_source_dir = os.path.join(sourcedir, 'Status_Files')
+ shutil.copytree(status_source_dir, status_dir)
+
+ report_source_dir = os.path.join(sourcedir, 'reports')
+ report_dir = os.path.join(datadir, 'reports')
+ shutil.copytree(report_source_dir, report_dir)
def make_eland_results(gerald_dir):
eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D.
>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0
>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T
"""
- for i in range(1,9):
+ for i in LANE_LIST:
pathname = os.path.join(gerald_dir,
's_%d_eland_result.txt' % (i,))
f = open(pathname, 'w')
f.write(eland_result)
f.close()
-def make_eland_multi(gerald_dir):
- eland_multi = """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
+def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST):
+ eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0
>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
+""", """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM
+>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R
+>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0
+>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0
+"""]
+ if paired:
+ for e in [1,2]:
+ for i in lane_list:
+ pathname = os.path.join(gerald_dir,
+ 's_%d_%d_eland_multi.txt' % (i,e))
+ f = open(pathname, 'w')
+ f.write(eland_multi[e-1])
+ f.close()
+ else:
+ for i in lane_list:
+ pathname = os.path.join(gerald_dir,
+ 's_%d_eland_multi.txt' % (i,))
+ f = open(pathname, 'w')
+ f.write(eland_multi[0])
+ f.close()
+
+def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST):
+ source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt')
+
+ for i in lane_list:
+ destination = os.path.join(gerald_dir,
+ 's_%d_export.txt' % (i,))
+ shutil.copy(source, destination)
+
+
+def make_scarf(gerald_dir, lane_list=LANE_LIST):
+ seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER
+HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a
+HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V
+HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^
+HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE
"""
- for i in range(1,9):
- pathname = os.path.join(gerald_dir,
- 's_%d_eland_multi.txt' % (i,))
- f = open(pathname, 'w')
- f.write(eland_multi)
+ for l in lane_list:
+ pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
+ f = open(pathname,'w')
+ f.write(seq)
f.close()
+
+def make_fastq(gerald_dir, lane_list=LANE_LIST):
+ seq = """@HWI-EAS229:1:2:182:712#0/1
+AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
++HWI-EAS229:1:2:182:712#0/1
+\\bab_bbaabbababbaaa]]D]bb_baabbab\baa
+@HWI-EAS229:1:2:198:621#0/1
+CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC
++HWI-EAS229:1:2:198:621#0/1
+[aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa
+@HWI-EAS229:1:2:209:1321#0/1
+AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA
++HWI-EAS229:1:2:209:1321#0/1
+_bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa
+"""
+ for l in lane_list:
+ pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,))
+ f = open(pathname,'w')
+ f.write(seq)
+ f.close()
+
+UNALIGNED_READS = [1,2]
+UNALIGNED_SAMPLES = [ (1, UNALIGNED_READS, '11111', None, None),
+ (2, UNALIGNED_READS, '11112', None, None),
+ (3, UNALIGNED_READS, '11113', 1, 'ATCACG'),
+ (3, UNALIGNED_READS, '11113', 2, 'CGATGT'),
+ (3, UNALIGNED_READS, '11113', 3, 'TTAGGC'),
+ (4, UNALIGNED_READS, '11114', 6, 'GCCAAT'),
+ (5, UNALIGNED_READS, '11115', 1, 'ATCACG'),
+ (5, UNALIGNED_READS, '11116', 7, 'ACTTGA'),
+ (5, UNALIGNED_READS, '11117', 9, 'GATCAG'),
+ (6, UNALIGNED_READS, '11118', 1, 'ATCACG'),
+ (7, UNALIGNED_READS, '11119', 2, 'CGATGT'),
+ (8, UNALIGNED_READS, '11120', 3, 'TTAGGC'),
+ (1, UNALIGNED_READS, None, None, None),
+ (2, UNALIGNED_READS, None, None, None),
+ (3, UNALIGNED_READS, None, None, None),
+ (4, UNALIGNED_READS, None, None, None),
+ (5, UNALIGNED_READS, None, None, None)]
+
+
+def make_aligned_eland_export(aligned_dir, flowcell_id):
+ summary_source = os.path.join(TESTDATA_DIR, 'sample_summary_1_12.htm')
+ for lane, read, project_id, index_id, index_seq in UNALIGNED_SAMPLES:
+ paths = DemultiplexedPaths(aligned_dir,
+ flowcell_id,
+ lane,
+ project_id,
+ index_id,
+ index_seq)
+ paths.make_sample_dirs()
+ paths.make_summary_dirs()
+ summary_dest = os.path.join(paths.summary_dir, 'Sample_Summary.htm')
+ shutil.copy(summary_source, summary_dest)
+
+ body = get_aligned_sample_export(lane, index_seq)
+ for split in ['001','002']:
+ for read in UNALIGNED_READS:
+ suffix = 'R{0}_{1}_export.txt.gz'.format(read, split)
+ pathname = paths.make_test_filename(suffix)
+ stream = gzip.open(pathname, 'w')
+ stream.write(body)
+ stream.close()
+
+
+def make_unaligned_fastqs_1_12(unaligned_dir, flowcell_id):
+ """Create a default mix of unaligned sample files
+ """
+ for lane, read, name, index_id, index in UNALIGNED_SAMPLES:
+ make_unaligned_fastq_sample_1_12(unaligned_dir,
+ flowcell_id,
+ lane,
+ read,
+ name,
+ index_id,
+ index)
+
+def make_unaligned_fastq_sample_1_12(unaligned_dir,
+ flowcell_id,
+ lane,
+ reads,
+ project_id,
+ index_id=None,
+ index_seq=None):
+
+ paths = DemultiplexedPaths(unaligned_dir,
+ flowcell_id,
+ lane,
+ project_id,
+ index_id,
+ index_seq)
+ paths.make_sample_dirs()
+
+ sample_seq = get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq)
+ for split in ['001','002']:
+ for read in reads:
+ suffix = 'R{0}_{1}.fastq.gz'.format(read, split)
+ pathname = paths.make_test_filename(suffix)
+ stream = gzip.open(pathname, 'w')
+ stream.write(sample_seq)
+ stream.close()
+
+ sheetname = os.path.join(paths.sample_dir, 'SampleSheet.csv')
+ stream = open(sheetname, 'w')
+ stream.write('FCID,Lane,SampleID,SampleRef,Index,Description,Control,Recipe,Operator,SampleProject'+os.linesep)
+ template = '{flowcell},{lane},{id},mm9,{index},Sample #{id},N,PR_indexing,Operator,{sample_project}'+os.linesep
+ stream.write(template.format(flowcell=flowcell_id,
+ lane=lane,
+ id=paths.sample_id,
+ index=paths.index_seq,
+ sample_project=paths.sample_project))
+ stream.close()
+
+
+class DemultiplexedPaths(object):
+ def __init__(self, basedir, flowcell_id, lane, project_id, index_id, index_seq):
+ if lane not in LANE_LIST:
+ raise ValueError("Invalid lane ID: {0}".format(lane))
+ self.basedir = basedir
+ self.flowcell_id = flowcell_id
+ self.lane = lane
+
+ if project_id is None:
+ # undetermined
+ self.index_seq = ''
+ self.sample_id = 'lane{0}'.format(lane)
+ self.sample_project = 'Undetermined_indices'
+ self.rootname = 'lane{lane}_Undetermined_L00{lane}_'.format(
+ lane=lane)
+ self.project_dir = 'Undetermined_indices'
+ self.sample_dir = 'Sample_lane{lane}'.format(lane=lane)
+ elif index_seq is None:
+ self.index_seq = ''
+ self.sample_id = project_id
+ self.sample_project = '{project_id}'.format(project_id=project_id)
+ self.rootname = '{project_id}_NoIndex_L00{lane}_'.format(
+ project_id=project_id,
+ lane=lane)
+ self.project_dir = 'Project_' + self.sample_project
+ self.sample_dir = 'Sample_{project_id}'.format(
+ project_id=project_id)
+ else:
+ self.index_seq = index_seq
+ self.sample_id = project_id
+ self.sample_project = '{project_id}_Index{index_id}'.format(
+ project_id=project_id,
+ index_id=index_id)
+ self.rootname = '{project_id}_{index}_L00{lane}_'.format(
+ project_id=project_id,
+ index=index_seq,
+ lane=lane)
+ self.project_dir = 'Project_' + self.sample_project
+ self.sample_dir = 'Sample_{project_id}'.format(
+ project_id=project_id)
+
+ self.project_dir = os.path.join(self.basedir, self.project_dir)
+ self.sample_dir = os.path.join(self.project_dir, self.sample_dir)
+ self.summary_dir = 'Summary_Stats_{0}'.format(self.flowcell_id)
+ self.summary_dir = os.path.join(self.project_dir, self.summary_dir)
+
+
+ def make_sample_dirs(self):
+ if not os.path.isdir(self.project_dir):
+ os.mkdir(self.project_dir)
+ if not os.path.isdir(self.sample_dir):
+ os.mkdir(self.sample_dir)
+
+ def make_summary_dirs(self):
+ if not os.path.isdir(self.summary_dir):
+ os.mkdir(self.summary_dir)
+
+ def make_test_filename(self, suffix):
+ filename = self.rootname + suffix
+ pathname = os.path.join(self.sample_dir, filename)
+ return pathname
+
+ def dump(self):
+ print(('index seq: {0}'.format(self.index_seq)))
+
+ print(('project dir: {0}'.format(self.project_dir)))
+ print(('sample dir: {0}'.format(self.sample_dir)))
+ print(('rootname: {0}'.format(self.rootname)))
+ print(('path: {0}'.format(
+ os.path.join(self.project_dir,
+ self.sample_dir,
+ self.rootname+'R1_001.fastq.gz'))))
+
+
+def get_unaligned_sample_fastq_data(flowcell_id, lane, index_seq):
+ seq = """@HWI-ST0787:101:{flowcell}:{lane}:1101:2416:3469 1:Y:0:{index}
+TCCTTCATTCCACCGGAGTCTGTGGAATTCTCGGGTGCCAAGGAACTCCA
++
+CCCFFFFFHHHHHJJJJJJJJJIJJJJJJJJJJJJJJJJIIJJIIJJJJJ
+@HWI-ST0787:101:{flowcell}:{lane}:1101:2677:3293 1:Y:0:{index}
+TGGAAATCCATTGGGGTTTCCCCTGGAATTCTCGGGTGCCAAGGAACTCC
++
+@CCFF3BDHHHHHIIIIIHHIIIDIIIGIIIEGIIIIIIIIIIIIIIIHH
+@HWI-ST0787:101:{flowcell}:{lane}:1101:2616:3297 1:Y:0:{index}
+TAATACTGCCGGGTAATGATGGCTGGAATTCTCGGGTGCCAAGGAACTCC
++
+CCCFFFFFHHHHHCGHJJJJJJJJJJJJJJJJJIIJJJJJJJJJIHJJJI
+@HWI-ST0787:101:{flowcell}:{lane}:1101:2545:3319 1:N:0:{index}
+TCCTTCATTCCACCGGAGTCTGCTGGAATTCTCGGGTGCCAAGGAACTCC
++
+CCCFFFFFHHHFHJGIGHIJHIIGHIGIGIGEHFIJJJIHIJHJIIJJIH
+""".format(flowcell=flowcell_id, lane=lane, index=index_seq)
+ return seq
+
+def get_aligned_sample_export(lane, index_seq):
+ body = """HWI-ST0787\t102\t{lane}\t1101\t1207\t1993\t{index}\t1\tAANGGATTCGATCCGGCTTAAGAGATGAAAACCGAAAGGGCCGACCGAA\taaBS`ccceg[`ae[dRR_[[SPPPP__ececfYYWaegh^\\ZLLY\\X`\tNM\t\t\t\t\t\t
+HWI-ST0787\t102\t{lane}\t1101\t1478\t1997\t{index}\t1\tCAAGAACCCCGGGGGGGGGGGGGCAGAGAGGGGGAATTTTTTTTTTGTT\tBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\tNM\t\t\t\t\t\t\t\t\t\t\tN
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\tchr9.fa\t67717938\tR\t99\t72
+HWI-ST0787\t102\t{lane}\t1101\t1625\t1994\t{index}\t1\tAANAATGCTACAGAGACAAAACAAAACTGATATGAAAGTTGAGAATAAA\tB^BS\cccgegg[Q[QQQ[`egdgffbeggfgh^^YcfgfhXaHY^O^c\t3:4:3\t\t\t\t\t\t\t\t\t\t\tY
+""".format(lane=lane, index=index_seq)
+ return body
+
+def print_ls_tree(root):
+ """List tree contents, useful for debugging.
+ """
+ for dirpath, dirnames, filenames in os.walk(root):
+ for filename in filenames:
+ print(os.path.join(dirpath, filename))
+
+
+class BaseCallInfo(object):
+ """Provide customization for how to setup the base call mock data
+ """
+ def __init__(self, qseq_file, tile_list, basecall_summary):
+ self.qseq_file = qseq_file
+ self.tile_list = tile_list
+ self.basecall_summary = basecall_summary
+
+# First generation HiSeq Flowcell
+ABXX_BASE_CALL_INFO = BaseCallInfo(
+ qseq_file='AA01CCABXX_8_2_2207_qseq.txt',
+ tile_list = HISEQ_TILE_LIST,
+ basecall_summary = 'AA01CCABXX_BustardSummary.xml')