From: Diane Trout Date: Fri, 10 Sep 2010 22:35:20 +0000 (+0000) Subject: Add support for CASAVA 1.7 X-Git-Tag: 0.4.5~14 X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=6c636058f0e4ec88f4acf6b8a77f4b99b929f16f Add support for CASAVA 1.7 They stopped generating eland_extended files so I needed to parse the export files instead. Also more carefully test how I'm computing the U0-2 and R0-2 scores. --- diff --git a/htsworkflow/pipelines/bustard.py b/htsworkflow/pipelines/bustard.py index 85b1d4c..25b2737 100644 --- a/htsworkflow/pipelines/bustard.py +++ b/htsworkflow/pipelines/bustard.py @@ -85,14 +85,19 @@ class CrosstalkMatrix(object): def _initialize_from_file(self, pathname): data = open(pathname).readlines() auto_header = '# Auto-generated frequency response matrix' - if data[0].strip() != auto_header or len(data) != 9: + if data[0].strip() == auto_header and len(data) == 9: + # skip over lines 1,2,3,4 which contain the 4 bases + self.base['A'] = [ float(v) for v in data[5].split() ] + self.base['C'] = [ float(v) for v in data[6].split() ] + self.base['G'] = [ float(v) for v in data[7].split() ] + self.base['T'] = [ float(v) for v in data[8].split() ] + elif len(data) == 16: + self.base['A'] = [ float(v) for v in data[:4] ] + self.base['C'] = [ float(v) for v in data[4:8] ] + self.base['G'] = [ float(v) for v in data[8:12] ] + self.base['T'] = [ float(v) for v in data[12:16] ] + else: raise RuntimeError("matrix file %s is unusual" % (pathname,)) - # skip over lines 1,2,3,4 which contain the 4 bases - self.base['A'] = [ float(v) for v in data[5].split() ] - self.base['C'] = [ float(v) for v in data[6].split() ] - self.base['G'] = [ float(v) for v in data[7].split() ] - self.base['T'] = [ float(v) for v in data[8].split() ] - def get_elements(self): root = ElementTree.Element(CrosstalkMatrix.CROSSTALK) root.tail = os.linesep @@ -162,7 +167,12 @@ def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree): if matrix_auto_flag: # we estimated the matrix from something in this run. # though we don't really care which lane it was - matrix_path = os.path.join(bustard_path, 'Matrix', 's_02_matrix.txt') + if matrix_auto_lane == 0: + matrix_path = os.path.join(bustard_path, + 'Matrix', 's_02_matrix.txt') + else: + matrix_path = os.path.join(bustard_path, 'Matrix', + 's_%d_1_matrix.txt' % (matrix_auto_lane,)) crosstalk = CrosstalkMatrix(matrix_path) else: matrix_elements = call_parameters.find('MatrixElements') diff --git a/htsworkflow/pipelines/eland.py b/htsworkflow/pipelines/eland.py index 5f3a345..f291f0f 100644 --- a/htsworkflow/pipelines/eland.py +++ b/htsworkflow/pipelines/eland.py @@ -7,6 +7,7 @@ import logging import os import re import stat +import sys from htsworkflow.pipelines.runfolder import ElementTree, LANE_LIST from htsworkflow.util.ethelp import indent, flatten @@ -30,7 +31,6 @@ ELAND_MULTI = 1 ELAND_EXTENDED = 2 ELAND_EXPORT = 3 - class ResultLane(object): """ Base class for result lanes @@ -82,6 +82,12 @@ class ElandLane(ResultLane): """ XML_VERSION = 2 LANE = "ElandLane" + MATCH_COUNTS_RE = re.compile("([\d]+):([\d]+):([\d]+)") + DESCRIPTOR_MISMATCH_RE = re.compile("[AGCT]") + DESCRIPTOR_INDEL_RE = re.compile("^[\dAGCT]$") + SCORE_UNRECOGNIZED = 0 + SCORE_QC = 1 + SCORE_READ = 2 def __init__(self, pathname=None, lane_id=None, end=None, genome_map=None, eland_type=None, xml=None): super(ElandLane, self).__init__(pathname, lane_id, end) @@ -125,16 +131,19 @@ class ElandLane(ResultLane): logging.info("summarizing results for %s" % (self.pathname)) + stream = autoopen(self.pathname, 'r') if self.eland_type == ELAND_SINGLE: - result = self._update_eland_result(self.pathname) + result = self._update_eland_result(stream) elif self.eland_type == ELAND_MULTI or \ self.eland_type == ELAND_EXTENDED: - result = self._update_eland_multi(self.pathname) + result = self._update_eland_multi(stream) + elif self.eland_type == ELAND_EXPORT: + result = self._update_eland_export(stream) else: raise NotImplementedError("Only support single/multi/extended eland files") self._match_codes, self._mapped_reads, self._reads = result - def _update_eland_result(self, pathname): + def _update_eland_result(self, instream): reads = 0 mapped_reads = {} @@ -142,7 +151,7 @@ class ElandLane(ResultLane): 'U0':0, 'U1':0, 'U2':0, 'R0':0, 'R1':0, 'R2':0, } - for line in autoopen(pathname,'r'): + for line in instream: reads += 1 fields = line.split() # code = fields[2] @@ -156,7 +165,10 @@ class ElandLane(ResultLane): mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 return match_codes, mapped_reads, reads - def _update_eland_multi(self, pathname): + def _update_eland_multi(self, instream): + """Summarize an eland_extend.""" + MATCH_INDEX = 2 + LOCATION_INDEX = 3 reads = 0 mapped_reads = {} @@ -164,51 +176,134 @@ class ElandLane(ResultLane): 'U0':0, 'U1':0, 'U2':0, 'R0':0, 'R1':0, 'R2':0, } - match_counts_re = re.compile("([\d]+):([\d]+):([\d]+)") - for line in autoopen(pathname,'r'): + for line in instream: reads += 1 fields = line.split() # fields[2] = QC/NM/or number of matches - groups = match_counts_re.match(fields[2]) - if groups is None: - match_codes[fields[2]] += 1 - else: - # when there are too many hit, eland writes a - where + score_type = self._score_mapped_mismatches(fields[MATCH_INDEX], + match_codes) + if score_type == ElandLane.SCORE_READ: + # when there are too many hits, eland writes a - where # it would have put the list of hits. # or in a different version of eland, it just leaves # that column blank, and only outputs 3 fields. - if len(fields) < 4 or fields[3] == '-': + if len(fields) < 4 or fields[LOCATION_INDEX] == '-': continue - zero_mismatches = int(groups.group(1)) - if zero_mismatches == 1: - match_codes['U0'] += 1 - elif zero_mismatches < 255: - match_codes['R0'] += zero_mismatches - - one_mismatches = int(groups.group(2)) - if one_mismatches == 1: - match_codes['U1'] += 1 - elif one_mismatches < 255: - match_codes['R1'] += one_mismatches - - two_mismatches = int(groups.group(3)) - if two_mismatches == 1: - match_codes['U2'] += 1 - elif two_mismatches < 255: - match_codes['R2'] += two_mismatches - - chromo = None - for match in fields[3].split(','): - match_fragment = match.split(':') - if len(match_fragment) == 2: - chromo = match_fragment[0] - pos = match_fragment[1] - - fasta = self.genome_map.get(chromo, chromo) - assert fasta is not None - mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 + + self._count_mapped_multireads(mapped_reads, fields[LOCATION_INDEX]) + return match_codes, mapped_reads, reads + def _update_eland_export(self, instream): + """Summarize a gerald export file.""" + MATCH_INDEX = 10 + LOCATION_INDEX = 10 + DESCRIPTOR_INDEX= 13 + reads = 0 + mapped_reads = {} + + match_codes = {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + } + + for line in instream: + reads += 1 + fields = line.split() + # fields[2] = QC/NM/or number of matches + score_type = self._score_mapped_mismatches(fields[MATCH_INDEX], + match_codes) + if score_type == ElandLane.SCORE_UNRECOGNIZED: + # export files have three states for the match field + # QC code, count of multi-reads, or a single + # read location. The score_mapped_mismatches function + # only understands the first two types. + # if we get unrecognized, that implies the field is probably + # a location. + code = self._count_mapped_export(mapped_reads, + fields[LOCATION_INDEX], + fields[DESCRIPTOR_INDEX]) + match_codes[code] += 1 + + return match_codes, mapped_reads, reads + + + def _score_mapped_mismatches(self, match, match_codes): + """Update match_codes with eland map counts, or failure code. + + Returns True if the read mapped, false if it was an error code. + """ + groups = ElandLane.MATCH_COUNTS_RE.match(match) + if groups is None: + # match is not of the form [\d]+:[\d]+:[\d]+ + if match_codes.has_key(match): + # match is one quality control codes QC/NM etc + match_codes[match] += 1 + return ElandLane.SCORE_QC + else: + return ElandLane.SCORE_UNRECOGNIZED + else: + # match is of the form [\d]+:[\d]+:[\d]+ + # AKA Multiread + zero_mismatches = int(groups.group(1)) + one_mismatches = int(groups.group(2)) + two_mismatches = int(groups.group(3)) + + if zero_mismatches == 1: + match_codes['U0'] += 1 + elif zero_mismatches < 255: + match_codes['R0'] += zero_mismatches + + if one_mismatches == 1: + match_codes['U1'] += 1 + elif one_mismatches < 255: + match_codes['R1'] += one_mismatches + + if two_mismatches == 1: + match_codes['U2'] += 1 + elif two_mismatches < 255: + match_codes['R2'] += two_mismatches + + return ElandLane.SCORE_READ + + + def _count_mapped_multireads(self, mapped_reads, match_string): + chromo = None + for match in match_string.split(','): + match_fragment = match.split(':') + if len(match_fragment) == 2: + chromo = match_fragment[0] + pos = match_fragment[1] + + fasta = self.genome_map.get(chromo, chromo) + assert fasta is not None + mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 + + + def _count_mapped_export(self, mapped_reads, match_string, descriptor): + """Count a read as defined in an export file + + match_string contains the chromosome + descriptor contains the an ecoding of bases that match, mismatch, + and have indels. + returns the "best" match code + + Currently "best" match code is ignoring the possibility of in-dels + """ + chromo = match_string + fasta = self.genome_map.get(chromo, chromo) + assert fasta is not None + mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 + + mismatch_bases = ElandLane.DESCRIPTOR_MISMATCH_RE.findall(descriptor) + if len(mismatch_bases) == 0: + return 'U0' + elif len(mismatch_bases) == 1: + return 'U1' + else: + return 'U2' + + def _get_mapped_reads(self): if self._mapped_reads is None: self._update() @@ -482,6 +577,7 @@ def check_for_eland_file(basedir, pattern, lane_id, end): full_lane_id = "%d_%d" % ( lane_id, end ) basename = pattern % (full_lane_id,) + logging.info("Eland pattern: %s" %(basename,)) pathname = os.path.join(basedir, basename) if os.path.exists(pathname): logging.info('found eland file in %s' % (pathname,)) @@ -558,6 +654,9 @@ def eland(gerald_dir, gerald=None, genome_maps=None): ('s_%s_eland_multi.txt', MAPPED_ELAND), ('s_%s_eland_multi.txt.bz2', MAPPED_ELAND), ('s_%s_eland_multi.txt.gz', MAPPED_ELAND), + ('s_%s_export.txt', MAPPED_ELAND), + ('s_%s_export.txt.bz2', MAPPED_ELAND), + ('s_%s_export.txt.gz', MAPPED_ELAND), ('s_%s_sequence.txt', SEQUENCE),] for basedir in basedirs: @@ -608,3 +707,21 @@ def extract_eland_sequence(instream, outstream, start, end): result = [record[0][start:end]] outstream.write("\t".join(result)) outstream.write(os.linesep) + + +def main(cmdline=None): + """Run eland extraction against the specified gerald directory""" + from optparse import OptionParser + parser = OptionParser("%prog: +") + opts, args = parser.parse_args(cmdline) + logging.basicConfig(level=logging.DEBUG) + for a in args: + logging.info("Starting scan of %s" % (a,)) + e = eland(a) + print e.get_elements() + + return + + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/htsworkflow/pipelines/test/simulate_runfolder.py b/htsworkflow/pipelines/test/simulate_runfolder.py index 855912d..50fca0a 100644 --- a/htsworkflow/pipelines/test/simulate_runfolder.py +++ b/htsworkflow/pipelines/test/simulate_runfolder.py @@ -77,6 +77,32 @@ def make_rta_basecalls_1460(intensities_dir): return basecalls_dir +def make_rta_intensities_1870(data_dir, version='1.8.70.0'): + """ + Construct an artificial RTA Intensities parameter file and directory + """ + intensities_dir = os.path.join(data_dir, 'Intensities') + if not os.path.exists(intensities_dir): + os.mkdir(intensities_dir) + + param_file = os.path.join(TESTDATA_DIR, 'rta_intensities_config_1870.xml') + shutil.copy(param_file, os.path.join(intensities_dir, 'config.xml')) + + return intensities_dir + +def make_rta_basecalls_1870(intensities_dir): + """ + Construct an artificial RTA Intensities parameter file and directory + """ + basecalls_dir = os.path.join(intensities_dir, 'BaseCalls') + if not os.path.exists(basecalls_dir): + os.mkdir(basecalls_dir) + + param_file = os.path.join(TESTDATA_DIR, 'rta_basecalls_config_1870.xml') + shutil.copy(param_file, os.path.join(basecalls_dir, 'config.xml')) + + return basecalls_dir + def make_qseqs(bustard_dir, in_temp=True): """ Fill gerald directory with qseq files @@ -151,6 +177,19 @@ def make_matrix(matrix_filename): f.write(contents) f.close() +def make_matrix_dir_rta160(bustard_dir): + """ + Create several matrix files in /Matrix/ + """ + destdir = os.path.join(bustard_dir, 'Matrix') + if not os.path.isdir(destdir): + os.mkdir(destdir) + + source = os.path.join(TESTDATA_DIR, '61MMFAAXX_4_1_matrix.txt') + for lane in LANE_LIST: + destination = os.path.join(destdir, 's_%d_1_matrix.txt' % ( lane, )) + shutil.copy(source, destination) + def make_phasing_dir(bustard_dir): """ Create several phasing files in /Phasing/ @@ -256,6 +295,15 @@ def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST): f.write(eland_multi[0]) f.close() +def make_eland_export(gerald_dir, paired=False, lane_list=LANE_LIST): + source = os.path.join(TESTDATA_DIR, 'casava_1.7_export.txt') + + for i in lane_list: + destination = os.path.join(gerald_dir, + 's_%d_export.txt' % (i,)) + shutil.copy(source, destination) + + def make_scarf(gerald_dir, lane_list=LANE_LIST): seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a diff --git a/htsworkflow/pipelines/test/test_eland.py b/htsworkflow/pipelines/test/test_eland.py new file mode 100644 index 0000000..bd215c1 --- /dev/null +++ b/htsworkflow/pipelines/test/test_eland.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python +"""More direct synthetic test cases for the eland output file processing +""" +from StringIO import StringIO +import unittest + +from htsworkflow.pipelines.eland import ElandLane + +class ElandTests(unittest.TestCase): + """Test specific Eland modules + """ + def compare_match_array(self, current, expected): + for key in expected.keys(): + self.failUnlessEqual(current[key], expected[key], + "Key %s: %s != %s" % (key,current[key],expected[key])) + + def test_eland_score_mapped_mismatches(self): + eland = ElandLane() + match_codes = {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + } + r = eland._score_mapped_mismatches("QC", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_QC) + self.compare_match_array(match_codes, + {'NM':0, 'QC':1, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + }) + + r = eland._score_mapped_mismatches("NM", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_QC) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + }) + + r = eland._score_mapped_mismatches("1:0:0", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_READ) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':1, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + }) + + r = eland._score_mapped_mismatches("2:4:16", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_READ) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':1, 'U1':0, 'U2':0, + 'R0':2, 'R1':4, 'R2':16, + }) + + r = eland._score_mapped_mismatches("1:1:1", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_READ) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':2, 'U1':1, 'U2':1, + 'R0':2, 'R1':4, 'R2':16, + }) + + r = eland._score_mapped_mismatches("1:0:0", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_READ) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':3, 'U1':1, 'U2':1, + 'R0':2, 'R1':4, 'R2':16, + }) + + r = eland._score_mapped_mismatches("0:0:1", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_READ) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':3, 'U1':1, 'U2':2, + 'R0':2, 'R1':4, 'R2':16, + }) + + r = eland._score_mapped_mismatches("chr3.fa", match_codes) + self.failUnlessEqual(r, ElandLane.SCORE_UNRECOGNIZED) + self.compare_match_array(match_codes, + {'NM':1, 'QC':1, 'RM':0, + 'U0':3, 'U1':1, 'U2':2, + 'R0':2, 'R1':4, 'R2':16, + }) + + def test_count_mapped_export(self): + eland = ElandLane() + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "38") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U0') + + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "36A4") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U1') + + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "30A2T2") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U2') + + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "26AG2T2") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U2') + + # deletion + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "26^AG$4") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U2') + + # insertion + mapped_reads = {} + r = eland._count_mapped_export(mapped_reads, "chr3.fa", "26^2$4") + self.failUnlessEqual(mapped_reads['chr3.fa'], 1) + self.failUnlessEqual(r, 'U0') + + def test_update_eland_export(self): + """Test scoring the pipeline export file""" + eland = ElandLane() + qc_read = StringIO("ILLUMINA-33A494 1 1 1 3291 1036 0 1 GANNTCCTCACCCGACANNNNNNNANNNCGGGNNACTC \XBB]^^^^[`````BBBBBBBBBBBBBBBBBBBBBBB QC") + one_read_exact = StringIO("ILLUMINA-33A494 1 1 1 2678 1045 0 1 AAGGTGAAGAAGGAGATGNNGATGATGACGACGATAGA ]]WW[[W]W]]R\WWZ[RBBS^\XVa____]W[]]___ chrX.fa 148341829 F 38 45") + one_read_mismatch = StringIO("ILLUMINA-33A494 1 1 1 2678 1045 0 1 AAGGTGAAGAAGGAGATGNNGATGATGACGACGATAGA ]]WW[[W]W]]R\WWZ[RBBS^\XVa____]W[]]___ chrX.fa 148341829 F 18AA15G1T 45") + multi_read = StringIO("ILLUMINA-33A494 1 1 1 4405 1046 0 1 GTGGTTTCGCTGGATAGTNNGTAGGGACAGTGGGAATC ``````````__a__V^XBB^SW^^a_____a______ 9:2:1") + + match_codes, match_reads, reads = eland._update_eland_export(qc_read) + self.compare_match_array(match_codes, + {'NM':0, 'QC':1, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + }) + self.failUnlessEqual(len(match_reads), 0) + self.failUnlessEqual(reads, 1) + + match_codes, match_reads, reads = eland._update_eland_export(one_read_exact) + self.compare_match_array(match_codes, + {'NM':0, 'QC':0, 'RM':0, + 'U0':1, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + }) + self.failUnlessEqual(match_reads['chrX.fa'], 1) + self.failUnlessEqual(reads, 1) + + match_codes, match_reads, reads = eland._update_eland_export(one_read_mismatch) + self.compare_match_array(match_codes, + {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':1, + 'R0':0, 'R1':0, 'R2':0, + }) + self.failUnlessEqual(match_reads['chrX.fa'], 1) + self.failUnlessEqual(reads, 1) + + match_codes, match_reads, reads = eland._update_eland_export(multi_read) + self.compare_match_array(match_codes, + {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':1, + 'R0':9, 'R1':2, 'R2':0, + }) + self.failUnlessEqual(len(match_reads), 0) + self.failUnlessEqual(reads, 1) + + +def suite(): + return unittest.makeSquite(ElandTests, 'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/htsworkflow/pipelines/test/test_runfolder_rta180.py b/htsworkflow/pipelines/test/test_runfolder_rta180.py new file mode 100644 index 0000000..7475b36 --- /dev/null +++ b/htsworkflow/pipelines/test/test_runfolder_rta180.py @@ -0,0 +1,285 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import eland +from htsworkflow.pipelines import ipar +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '090608_HWI-EAS229_0117_4286GAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + intensities_dir = make_rta_intensities_1870(data_dir) + + basecalls_dir = make_rta_basecalls_1870(intensities_dir) + make_matrix_dir_rta160(basecalls_dir) + + gerald_dir = os.path.join(basecalls_dir, + 'GERALD_07-09-2010_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_rta160_xml(gerald_dir) + make_eland_export(gerald_dir, lane_list=[1,2,3,4,5,6,]) + make_scarf(gerald_dir, lane_list=[7,]) + make_fastq(gerald_dir, lane_list=[8,]) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = intensities_dir + obj.bustard_dir = basecalls_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_bustard(self): + """Construct a bustard object""" + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.70.0') + self.failUnlessEqual(b.date, None) + self.failUnlessEqual(b.user, None) + self.failUnlessEqual(len(b.phasing), 0) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # test data extracted from summary file + clusters = [None, + (281331, 11169), (203841, 13513), + (220889, 15653), (137294, 14666), + (129388, 14525), (262092, 10751), + (185754, 13503), (233765, 9537),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + return + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + if isinstance(g_results, eland.ElandLane): + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + return + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + # I added sequence lanes to the last 2 lanes of this test case + for i in range(1,7): + lane = eland_container.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + # test scarf + lane = eland_container.results[0][7] + self.failUnlessEqual(lane.reads, 5) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 7) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) + + # test fastq + lane = eland_container.results[0][8] + self.failUnlessEqual(lane.reads, 3) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 8) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.FASTQ_TYPE) + + xml = eland_container.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland_container.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + if isinstance(l1, eland.ElandLane): + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + elif isinstance(l1, eland.SequenceLane): + self.failUnlessEqual(l1.sequence_type, l2.sequence_type) + + def test_runfolder(self): + return + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + name = 'run_4286GAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/htsworkflow/pipelines/test/testdata/61MMFAAXX_4_1_matrix.txt b/htsworkflow/pipelines/test/testdata/61MMFAAXX_4_1_matrix.txt new file mode 100755 index 0000000..45bef90 --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/61MMFAAXX_4_1_matrix.txt @@ -0,0 +1,16 @@ +1.617195 +0.220814 +0 +0 +1.009254 +0.8633183 +0 +0 +0 +0 +1.319025 +0.05441951 +0 +0 +0.4378621 +0.676676 diff --git a/htsworkflow/pipelines/test/testdata/casava_1.7_export.txt b/htsworkflow/pipelines/test/testdata/casava_1.7_export.txt new file mode 100644 index 0000000..f1d1f90 --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/casava_1.7_export.txt @@ -0,0 +1,142 @@ +ILLUMINA-33A494 1 1 1 2112 1040 0 1 CCNCATCTCAAAGAAGTNNNNNNNCNNNAAGTTTAATG \ZBZ^]]]]W__a_BBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 2130 1046 0 1 CTAGACCTGCAGGTGAGCNNGGTGGCCTATACATAGGC ^Z]PXG^^[Q^BBBBBBBBBBBBBBBBBBBBBBBBBBB 3:0:0 N +ILLUMINA-33A494 1 1 1 2152 1043 0 1 CTTCCTCTTCACTCGGCCNNGANGGCNGCCGCCATAAG ^]]TU\[^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 2181 1040 0 1 CTNGGCTCTGGTATTTANNNNNNNTNNNTATTACATGG \LBSQ[T]XQY__BBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 2220 1045 0 1 GTGTTGTTGCCATGGTAANNCTGCTCAGTACGAGAGGA ``````````_____UXUBBX]YXX`T```____T___ 2:0:0 N +ILLUMINA-33A494 1 1 1 2398 1045 0 1 CCACCAACTAAGCTTGGCNNTCCACCACCACCCACGGA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB NM N +ILLUMINA-33A494 1 1 1 2468 1041 0 1 CTNCCTTTCTATGTGCCNNNNNNCTGNNAGGCCAGCCG ^^BZ\\]^]]____aBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 2537 1044 0 1 GACTCCCTCTCCATCGCCNNCGNGCCGCCTGGCCCATC ^H[^BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 2678 1045 0 1 AAGGTGAAGAAGGAGATGNNGATGATGACGACGATAGA ]]WW[[W]W]]R\WWZ[RBBS^\XVa____]W[]]___ chrX.fa 148341829 F 18AA15G1T 45 N +ILLUMINA-33A494 1 1 1 3023 1046 0 1 AGGCTCTGCCCAGCCCCGNNGAGGCCGCCGCCGCCGCC ```````````````[R^BBWZXPU_a___```````` chr7.fa 148513191 R 18AT18 117 N +ILLUMINA-33A494 1 1 1 3042 1039 0 1 CCNGGCACTTGGGGTGGNNNNNNNGNNNGAGCNCAAGA \ZBZ^aaaa_`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3089 1043 0 1 GGACTCTCTTCTTCTTTTNNAANAGTNTTTTGATGCAC ```````````````]YYBB\]BZ[OBTX[^^\^]``` QC N +ILLUMINA-33A494 1 1 1 3134 1041 0 1 ATNGTTCAGTAGATTTTNNNNNNAGANNCCATATTAGG ^^B^^^^]\[`````[[BBBBBBY\[BB][]^[Z^``` QC N +ILLUMINA-33A494 1 1 1 3161 1041 0 1 TCNTAGCACCCACCATTGNNNNNAACNGCCACCACATT [YBTVZ\^Z[___^[\\UBBBBBRVOBVVVOUZRW___ QC N +ILLUMINA-33A494 1 1 1 3186 1037 0 1 GGNNGGTTTAAAAAAAANNNNNNNANNNTTTGNNCAGT ]WBBVZRV[WW___BBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3291 1036 0 1 GANNTCCTCACCCGACANNNNNNNANNNCGGGNNACTC \XBB]^^^^[`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3338 1043 0 1 AGGGCGTATTGGTTCTTTNNNANATANTTTTATGGCGT `````_______a_aVROBBBZB[W[BUKOX^^W\a__ QC N +ILLUMINA-33A494 1 1 1 3458 1037 0 1 GGNNGCTTGTTGGTGGANNNNNNNTNNNCTTCNNGGAC ]\BB][^^^[`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3577 1045 0 1 CCGCGTCCAACCCCAACCNNTGCTTGTTGTCATAAACT ```````````````^]^BB^^[^^````````````` chr2.fa 126187548 F 18GC18 117 N +ILLUMINA-33A494 1 1 1 3615 1038 0 1 ATNNGATCCACTCTCTCNNNNNNNCNNNAAACNNAATC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3710 1045 0 1 CAAAGACCTTGTCATCGCNNGGTAACACAAGTCCCTTG ```````````````\ZZBB][TYV_________a___ chr19.fa 12878540 F 18AG18 117 N +ILLUMINA-33A494 1 1 1 3751 1038 0 1 GANNGCAAATAAACAAANNNNNNNANNNGAAGNTGTTT ZUBBZ\]\^^`^^[`^^BBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3791 1038 0 1 CGNNTCTGCCCCTGTTGNNNNNNNGNNNGTCANNATTG [ZBBYZXWZ]`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3822 1037 0 1 GCNNATTATGGCCACGGNNNNNNNTNNNGGACNNTGTA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3842 1041 0 1 GGNCAGTGTGCATATTANNNNNNAGANNAAACTCCTCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 3997 1037 0 1 ATNNCAGTCATTCCCTANNNNNNNCNNNGATGNNAGAT VKBBZVOUV[WX___BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4021 1037 0 1 GGNNGCGGGTCGTGATGNNNNNNNANNNCAACNNTCCT [[BBSV[ZUS___aBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4071 1045 0 1 TGAGGGAAGAATGTTATGNNTACTCCTACGAATATGAT ```````````````[^^BB^^Z^X````````````` 2:1:0 N +ILLUMINA-33A494 1 1 1 4107 1038 0 1 CGNCCGATTGGCGCCTCNNNNNNNCNNNTACANCAAAT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4138 1036 0 1 AGNNTCACAGCTTGTTGNNNNNNNTNNNTGTTNNAGCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4349 1045 0 1 CTGTGAGCAGTAGTGGCTNNTGTTATGTCAACATAGGG aX__a___a_aa___ZZ\BBZV[YHUU[[\aX___X^^ chr6.fa 35461795 R 18GC18 116 N +ILLUMINA-33A494 1 1 1 4372 1040 0 1 GANGTGTAGTAGGCAGANNNNNNNGNNNACTGGAGCTG ]LB]VZSP]P_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4405 1046 0 1 GTGGTTTCGCTGGATAGTNNGTAGGGACAGTGGGAATC ``````````__a__V^XBB^SW^^a_____a______ 9:2:0 N +ILLUMINA-33A494 1 1 1 4622 1040 0 1 GCNGGTACCAGACTTGCNNNNNNNTNNNTCCTCGTTAA \^B\Z^]]^^```W`BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4651 1038 0 1 AANAGAGTCATGACGGANNNNNNNCNNNAGCANTTTGA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4674 1040 0 1 GANATCCGCCTGCCTCANNNNNNNGNNNGCTGGGATTA ^XB[^\]^[\`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4753 1040 0 1 CANTATTTGCTAACTCTNNNNNNNANNNCACCTCATCT ^^B\^^^^^^`````[[BBBBBBB^BBBY\[Z[^X``` QC N +ILLUMINA-33A494 1 1 1 4826 1040 0 1 AGNGGCAGCAATAAGATNNNNNNNGNNNTTAAAAAGCC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 4859 1045 0 1 CATTATTGTTATACCAAANNTCATATCCTATAATCTGG a_XX_[[__^`````^^^BB^\^SZ`````____a^__ Lambda_1-1_11936nts.fa 11716 F 18TG18 117 N +ILLUMINA-33A494 1 1 1 4900 1043 0 1 AAAACCCCTTGTTTTATANNGANTTTNATCTTATTACC MU[OXUXMZZ^]^U^OOOBBWPBVWTBOVVZZ^Z^``` QC N +ILLUMINA-33A494 1 1 1 5046 1046 0 1 AGTCGGTTGTAATGATACNNAACTCCAGGACACCAGTG ___a______````_Y[ZBB^^^^^````````````` chr10.fa 85365712 F CA16AG18 46 N +ILLUMINA-33A494 1 1 1 5090 1040 0 1 CANACTGTAAGAGTCACNNNNNNNANNNCAATGCCTTT ^^B^^___aa`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5154 1038 0 1 ACNNGCATAGTTCACCANNNNNNNGNNNCTAANNCGTA YYBBVVYVTU__aaaBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5208 1037 0 1 TGNNTTTTTTTGTTTTGNNNNNNNTNNNTGGTNNCATT ]]BB\^^^^^^^BBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5268 1037 0 1 GTNNAGGGTGGGGGGCANNNNNNNTNNNATAGNNTCTC V]BBR[Y[Q[[^[^^BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5332 1038 0 1 GGNNGGGGGGCGGACGGNNNNNNNGNNNTGGANGGGGG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5546 1044 0 1 CGCTGTGCTCTCGTCAGGNNATNCTGNGATTTGTTGGC ```````````````^]^BB\]B]\]B[[^]]^]^[`` QC N +ILLUMINA-33A494 1 1 1 5603 1036 0 1 GGNNGAGCGATTTGTCTNNNNNNNTNNNATAANNAACN YOBB[LSOXOaaa__BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5710 1039 0 1 GCNGAATTAACCCATTGNNNNNNNCNNNTAGANTTTGG \\B\U\^\[\___aaBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5765 1043 0 1 CAGGGCGCCCGGGATCACNNGGNCTGNTAGGTCGATGG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5811 1045 0 1 GGGCTACTAAGGTGAACANNCGACTTCTTGTGAAGGGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB NM N +ILLUMINA-33A494 1 1 1 5834 1037 0 1 CCNNGAGCATCTATGATNNNNNNNANNNTGATNNGCTG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5865 1037 0 1 GGNNGGAGCCGCCGCAGNNNNNNNTNNNGGTGNNAGTA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 5892 1043 0 1 GTCGCTGTCTTTAGAGCTNNGCNTGCNTGCATGTCTTC ``````````_____^X^BB[ZBZ[]B[^[[^^\\``` QC N +ILLUMINA-33A494 1 1 1 5939 1046 0 1 AAGACAGGATGTCTGTGTNNAATTGAACCTCTCTTGCC ```````````````][]BBZ\\Z]````````````` chr7.fa 75376418 F 18AA18 117 N +ILLUMINA-33A494 1 1 1 5965 1041 0 1 TGNGTCCCACTGCAGAGNNNNNNGCCNNACCGGCCGCG ^^BXX[^[[X`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6000 1041 0 1 CTNTCTATTTTTTAATTNNNNNNTTTNNCCATGGCTGG ^TBV^\]T\\XX[[aWTBBBBBBV[RBB]]^^^[^``` QC N +ILLUMINA-33A494 1 1 1 6083 1041 0 1 CTNAGATGGCCGGGGCTNNNNNNGCNNNACACTGACTT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6416 1036 0 1 GCNNACTTCTTTTTCCANNNNNNNGNNNGATGNNTAAA \]BB\^^^^^`[^BBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6536 1036 0 1 GCNNGTATTTTATTTCTNNNNNNNCNNNAGGCNNTGAN ][BBV\LZ[[^T^BBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6641 1046 0 1 CATAAGGAAGTAGGTTTCNNAGAAACAGACCCCACTGG `````__I_____W_][[BBVYXYX`````````[[__ chr7.fa 148154014 F 18TG18 36 N +ILLUMINA-33A494 1 1 1 6669 1037 0 1 GTNNACCTCTACATGCANNNNNNNCNNNCCCANNTCCC ]]BBY^X^^^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6707 1037 0 1 GGNNAGGGAAGGGAAGGNNNNNNNANNNAAGGNNAGGG [[BBR\\[NV_a___BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6936 1035 0 1 GCNNCTTATCCCTCAGCNNNNNNNGNNNACCCNNGGTN [VBB]^^^^^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 6973 1040 0 1 GTNCTGCAGTCCTCCCTNNNNNNNCNNNCCTGGGCAAC ^[B[^^][^[`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7024 1038 0 1 TGNNAGTGAGAGCCCACNNNNNNNCNNNAGGGNNAGCC ]\BBZ^W^X^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7062 1044 0 1 ACCTGCCTTTCTATGTGCNNTTNTGCNGCGAGGCCAGC aa____a_aa`````XZZBB\]B[ZZB\^^[]^][`_` QC N +ILLUMINA-33A494 1 1 1 7151 1037 0 1 CGNNCCCGGTCATGACANNNNNNNGNNNACAGNNGACG [[BB[\[ZY]`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7444 1038 0 1 GCNNTTGTCTTGCTTAGNNNNNNNTNNNTTCCNNGTTA ]XBB]^^[[^___a_BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7491 1038 0 1 CTNNCCTTCAGTGACTGNNNNNNNGNNNCACCNNTGCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7520 1036 0 1 ATNNTGGCCAAATCTAANNNNNNNCNNNAGAANNGGCT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7563 1044 0 1 GGAGAACCACCTGAACTTNNCANAGCNCGACGTCATCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7611 1041 0 1 AANAAGAAATCCTGCCTNNNNNNATANNAAGATGTATG YZBU[S[[XV_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7672 1037 0 1 CCNNGCTGGGGTGGCATNNNNNNNTNNNATGTNNATCC []BBU^XVVV__[[^BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7702 1043 0 1 GGGTGCACAGGAGCTGGGNNNANCCANAGTGGCCACTC _______aXaa____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7728 1036 0 1 CCNNGCCATGAGTTTTTNNNNNNNCNNNTTCTNNTGGN ]]BBV^Z^XQ__a__BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 7849 1041 0 1 CTNCTCAGCATCACTACNNNNNNCTTNNGTCTTCCTCC ^\B^^\^^]]`````[ZBBBBBB^^[BB[ZZ[[X]``` QC N +ILLUMINA-33A494 1 1 1 7919 1046 0 1 GTTCTGTTTTCCTTATTGNNGATGTGTTGTTGTTATGG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB NM N +ILLUMINA-33A494 1 1 1 8052 1037 0 1 GCNNTTAACTATGGGACNNNNNNNANNNAGTCNNACGT Y]BBW]UUU\_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 8243 1042 0 1 GTNGTAATTCTCTGATTTNNNTNTTTNCTTGCTGGATT ^]B^\ZZ^^Z___a_\ZZBBB[B[[[BO]]^V\^^``` QC N +ILLUMINA-33A494 1 1 1 8319 1036 0 1 GTNNATGTATTCCACTANNNNNNNANNNCTGTNNTCTT YZBBWYZ[Q[___X_BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 8400 1039 0 1 GCNGGGGGGTTCCGTGGNNNNNNNGNNNGTGTNTGTAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 8445 1038 0 1 GGNNCTGTAGGAAGGACNNNNNNNGNNNTACTNNGGTG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 8644 1038 0 1 CTNNGCAGTGACCTCATNNNNNNNGNNNCCCANNCCTA WSBBRZYX[R_a___BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 8662 1045 0 1 TGCCATTGTGAGGAACCTNNAGNGCATCATCTCCTAGG ]S]]S]T\^Xaa___[VVBBVWBXO\XMX\]^W][T^[ QC N +ILLUMINA-33A494 1 1 1 8747 1044 0 1 AGAGGAAGCCCCCGACTCNNATNGAGNGGTGGATGGTG ```````````````^^[BBQYBZR[B[[P^[R^\___ QC N +ILLUMINA-33A494 1 1 1 8831 1040 0 1 GGNGGAGGGAGGTCCAGNNNNNNNTNNNACATNTGTGT BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9122 1039 0 1 GGNGATCCACTTAGTCCNNNNNNNGNNNGAGANAAAGG ][BUJW\XUZ____aBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9142 1039 0 1 CANAGATGTGTTTGGCTNNNNNNNCNNNACCANTTCAG ]\BXX[[^W^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9169 1045 0 1 GTTTTAAAGTTATTCCCTNNATNCTTTACTGAGACCTT _a____a_a_`````ZX^BBR[BSZ^^[]^```````` QC N +ILLUMINA-33A494 1 1 1 9195 1036 0 1 GCNNCTTCTAAAGTATGNNNNNNNGNNNTTCGNNGGGN Y]BBW\W]Y[aa___BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9322 1040 0 1 GGNGTTTTAAATCTCCANNNNNNNANNNTACCNATATC ^]B^]^^^^]`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9393 1041 0 1 CANCACATTTATTTATCNNNNNNGTANNCTTACATCTC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9520 1042 0 1 TTNCCCAGTTGCCAACTNNNNNNAGGNGACACGGTACC ^[B^^^^^\^`````^BBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9542 1044 0 1 CTGGTGGTGTACTATTCANNTGNTGANATCTGTACCAG a_________a__a_Z[]BBVWBVWNBUZZWZSZUa__ QC N +ILLUMINA-33A494 1 1 1 9637 1040 0 1 AGNGCATGTCAAAGGACNNNNNNNTNNNCACANAGGTG YZB[FY[X[[___aaBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9731 1037 0 1 TCNNCTTGGAGTTTGAANNNNNNNTNNNGGAGNNCGAC ]]BBYZX\]X`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9760 1041 0 1 AGNCAGAACAACTCGCTNNNNNNCCTNNAGGGTATGCT \]B\YZ^]^^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9785 1038 0 1 GCNNTGGTCCAGCTGCANNNNNNNCNNNACTCNNGACC \]BBV^Z[^^`````^BBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9817 1036 0 1 TGNNCTTTCCTTTCAGGNNNNNNNGNNNAATCNNTTCN VWBBVOVVXY_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 9888 1044 0 1 GCACATCCCAGGCTGACTNNGCNGTCNTGATTGTTGCT ```````````````\^[BB\\B\]^B^\U[[^][``` QC N +ILLUMINA-33A494 1 1 1 9910 1037 0 1 GTNNTTTTAACTTGGTTNNNNNNNCNNNCTCCNNTAAA ]ZBB]\X[UY_aa_aBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 10024 1036 0 1 GGNNTGCATGGCTGGGCNNNNNNNCNNNAGCANNTTGN [\BB\\[^^[`````^BBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 10089 1038 0 1 CGNNAGTACTCTGTCAGNNNNNNNGNNNGCTCNNTTTC ]]BB]\\Z\^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 10220 1045 0 1 CTCTACTATTTCTACCTGNNCCNTCCTGGACTGTTGAA ```````````````^^]BB[]BX]^^^^\`````__a QC N +ILLUMINA-33A494 1 1 1 10251 1044 0 1 ACCCCCTCTATTCGTCCCNNTCNTCTNGGAAATGCTAA ```````^^``````^^^BB]]B]\[B]Z[]^^^]``` QC N +ILLUMINA-33A494 1 1 1 10567 1043 0 1 CTNGGCTTCTTGTGACCCNNNTNTTTNGCTTCCTTCCT ^^B^^^^\^^`````]^^BBB[BX[[B]\]_aaaa``` QC N +ILLUMINA-33A494 1 1 1 10861 1043 0 1 GTNGGGTGATGATGAAACNNNTNCCANATTCTACCGCA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 10889 1044 0 1 CCGGCGCTGGCGGTTGTANNTCNCCTNGTGGTGACCGC BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11038 1041 0 1 GTNGTGTGTGAGACTTTNNNNNNNGNNNTATTGAGGAC YUB]R[X^XZ[]H^^BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11128 1041 0 1 CANGTACCGCCCTGATCNNNNNNNGNNNGCCATCCGCA ^[B^[^^^\]`[^``BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11338 1039 0 1 GGNNACCAACACAATCTNNNNNNNTNNNTTCANNCTCC [YBB]ZZ^^Z`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11402 1044 0 1 GGACCAGCCATAACATGCNNACNCCANTCGCCTCTGTA ```````````````^[[BBZUB[Z[B[\\\^][^___ QC N +ILLUMINA-33A494 1 1 1 11626 1038 0 1 CTNNCGTAGCTCCCGGANNNNNNNGNNNGCCTNNGACA VSBBVZYYZY`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11737 1039 0 1 ATNNGTTCCTTGGTTCTNNNNNNNCNNNCCCTNNTGTA BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11819 1038 0 1 ATNNGCGTTGCTGTGGTNNNNNNNGNNNACAANNAGCT ]QBB]\T[\X`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 11929 1046 0 1 GGGAATGCTATATCTGGGNNTCCGATTATTAGTGGGAC ```````````````^^^BB^]\^X``````````___ 2:0:0 N +ILLUMINA-33A494 1 1 1 11981 1036 0 1 TTNNAATCAATGGGCTGNNNNNNNCNNNCTCANNGCTN ]]BB[Y][YV`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12039 1044 0 1 GTCACTTCTTACTCTAAGNNGGNGTCNCGCAGCTAGAG ```````````````^^^BB\]B[[XB[[[[^Y^[__a QC N +ILLUMINA-33A494 1 1 1 12226 1037 0 1 CCNNTTTCACCCAACCGNNNNNNNTNNNGTGGNNCATG ]VBBY[YVVU`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12257 1045 0 1 GGCCTTAGCAACCAGGGTNNCCNTTCNTTCAATATTTG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12291 1046 0 1 CCGAAGTGGAGAAGGGTTNNATGTGAACAGCAGTTGAA a____a_________^X^BBZ^ZX[_a_______aa__ 2:0:1 N +ILLUMINA-33A494 1 1 1 12315 1043 0 1 GANATTTTGAAGAAAAAGNNNNNTGGNAGATACTAGTC ^WBW^^^^^[````[QUUBBBBBX]]BS]X][[^^``` QC N +ILLUMINA-33A494 1 1 1 12339 1041 0 1 GANTAATCGAACCATCTNNNNNNNGNNNCCCTCCGAAG BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12376 1037 0 1 ATNNTATATCTTGAAGANNNNNNNCNNNAGCANNGTTN BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12569 1038 0 1 CCNNCCTCAACAACCTCNNNNNNNCNNNTCCCNNCCAG ]WBBYZZUTZ`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12662 1040 0 1 GGNTAGCTTTGATGACANNNNNNNCNNNGAGTNCCTCC YYBVQXX\^Z_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12850 1037 0 1 TGNNCCTTGCTTTTCTTNNNNNNNTNNNACTTNNGATC YYBBY\]\Y\`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12895 1039 0 1 GGNNTGTGTTTGGTCATNNNNNNNANNNATTTNNTAGA \^BB^^]^^[`````\^BBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 12949 1043 0 1 GGNGTTACGTGGAGGATGNNNGNGTGNCCTACATTTCC ^^B^^aaaa_`````I]]BBB[B[U^B^^^a_aa_``` QC N +ILLUMINA-33A494 1 1 1 13225 1039 0 1 GTNNAAAAACTGGTGCANNNNNNNTNNNCGTTNNCCAG YXBBT[XVU[_____BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 13301 1035 0 1 CTNNGCAGGGCGAAGCCNNNNNNNANNNTGGTNNAGGN ]]BB][Z^^^`````^^BBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 13612 1047 0 1 AGGAGGACGAGGACACAGNNCTGGTAGACATGGCTTCG X`````````____aXZSBB[ZZZPX]]\]`````___ chr2.fa 13496052 R 18AC18 117 N +ILLUMINA-33A494 1 1 1 13661 1041 0 1 CCNTGGCCCTAGTGACANNNNNNNCNNNAGAAGTTCTC ]VB]][^[[XaX__BBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 13705 1043 0 1 AGNGCAGCTCAGACAGAGNNNNNTCANCTGAGAGCTCT ^^B^^]^^]^`````^ZZBBBBBZ]ZB\]Z\][]Y``` QC N +ILLUMINA-33A494 1 1 1 14048 1044 0 1 CCTGTGGATGTCATGCGGNNGGNCGGNATCAAAGTCAC ]]]Y]]US]]^BBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 14338 1036 0 1 GGNNAACCTCTTTCCATNNNNNNNGNNNACAANNGGGN BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 14371 1042 0 1 GGNTGGAAAAGGTTCCGNNNNNNTGNNNAGGGAGTAGT [ZBUY[[\[^_a_a_BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 14614 1037 0 1 CCNNAGCAGTTCGATTTNNNNNNNGNNNCCCANNTACA ^^BB^^^]^^`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 14633 1039 0 1 GANNGATAGTCAACAAGNNNNNNNANNNAAAGNNGAAA [ZBB]XX[^\`````BBBBBBBBBBBBBBBBBBBBBBB QC N +ILLUMINA-33A494 1 1 1 14668 1042 0 1 AGNCCCTGATAGTTGGANNNNNNGATNCACTTGAGTAG ^^B^^][^[^`````\\BBBBBBYZ]B]]W^^\X[___ QC N +ILLUMINA-33A494 1 1 1 14687 1045 0 1 GGAGAAGGAACTTACAAANNGTNTAGNAGATCAAAAAG aa_____a_a`````T[VBB]\BXWZB]]S^]Y]]``` QC N diff --git a/htsworkflow/pipelines/test/testdata/rta_basecalls_config_1870.xml b/htsworkflow/pipelines/test/testdata/rta_basecalls_config_1870.xml new file mode 100755 index 0000000..70d31c3 --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/rta_basecalls_config_1870.xml @@ -0,0 +1,887 @@ + + + + + 0.6 + + 1 + 8 + 1 + 0 + 1 + 38 + 1 + + + 1 + 8 + 2 + 1 + 1 + 38 + 1 + 0 + 0 + + 0 + failed-chastity + le + 1.0 + + + + + 0 + 0 + 0 + 0 + 0 + + 1 + 38 + + ILLUMINA-33A494 + 0 + 0 + 0 + 0 + + 1 + 38 + + 100903_ILLUMINA-33A494_00001_61MMFAAXX + 100903 + 00001 + PL14 + + + + + s + 28 + 41 + 42 + 44 + 45 + 31 + 48 + 47 + 34 + 51 + 37 + 50 + 54 + 53 + 40 + 57 + 56 + 43 + 46 + 59 + 60 + 49 + 62 + 63 + 52 + 65 + 66 + 55 + 68 + 69 + 58 + 71 + 72 + 61 + 74 + 75 + 64 + 78 + 77 + 67 + 81 + 80 + 70 + 84 + 83 + 73 + 76 + 87 + 86 + 79 + 90 + 89 + 82 + 93 + 92 + 85 + 95 + 96 + 88 + 98 + 99 + 91 + 94 + 97 + 100 + 3 + 2 + 6 + 5 + 9 + 8 + 12 + 11 + 15 + 14 + 18 + 17 + 1 + 21 + 20 + 4 + 24 + 23 + 7 + 26 + 27 + 10 + 29 + 30 + 13 + 32 + 33 + 16 + 35 + 36 + 19 + 38 + 39 + 22 + 25 + + + s + 2 + 3 + 5 + 6 + 8 + 9 + 11 + 12 + 14 + 1 + 15 + 17 + 18 + 4 + 20 + 21 + 7 + 24 + 10 + 23 + 27 + 13 + 26 + 30 + 16 + 29 + 33 + 19 + 32 + 36 + 22 + 35 + 39 + 38 + 25 + 42 + 41 + 28 + 45 + 44 + 31 + 48 + 47 + 34 + 51 + 50 + 37 + 54 + 53 + 40 + 57 + 56 + 43 + 60 + 59 + 46 + 63 + 62 + 49 + 66 + 65 + 52 + 69 + 68 + 72 + 55 + 71 + 58 + 75 + 74 + 61 + 78 + 77 + 64 + 81 + 80 + 84 + 67 + 83 + 70 + 87 + 86 + 73 + 90 + 89 + 76 + 93 + 92 + 79 + 96 + 95 + 82 + 99 + 98 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 3 + 2 + 6 + 5 + 9 + 8 + 12 + 11 + 15 + 14 + 18 + 17 + 1 + 21 + 20 + 24 + 4 + 23 + 27 + 7 + 26 + 30 + 10 + 29 + 33 + 32 + 13 + 36 + 35 + 16 + 38 + 39 + 19 + 41 + 42 + 22 + 44 + 45 + 25 + 47 + 48 + 28 + 51 + 50 + 31 + 34 + 54 + 53 + 56 + 57 + 37 + 60 + 59 + 40 + 63 + 43 + 62 + 66 + 46 + 65 + 49 + 69 + 68 + 52 + 72 + 71 + 55 + 75 + 74 + 58 + 77 + 78 + 61 + 81 + 80 + 64 + 84 + 83 + 67 + 86 + 87 + 70 + 89 + 90 + 73 + 93 + 92 + 76 + 96 + 95 + 79 + 99 + 98 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 3 + 2 + 6 + 5 + 9 + 8 + 12 + 11 + 15 + 14 + 18 + 17 + 21 + 20 + 24 + 23 + 27 + 26 + 30 + 1 + 29 + 33 + 4 + 32 + 36 + 7 + 35 + 10 + 39 + 38 + 13 + 42 + 41 + 16 + 45 + 44 + 19 + 48 + 47 + 51 + 22 + 50 + 54 + 25 + 53 + 57 + 28 + 56 + 60 + 31 + 59 + 63 + 34 + 62 + 66 + 37 + 65 + 69 + 40 + 68 + 72 + 43 + 71 + 75 + 46 + 74 + 49 + 78 + 77 + 52 + 81 + 80 + 55 + 84 + 83 + 58 + 87 + 86 + 61 + 90 + 89 + 64 + 93 + 92 + 67 + 96 + 95 + 70 + 99 + 98 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 3 + 2 + 6 + 5 + 9 + 8 + 12 + 11 + 15 + 14 + 18 + 17 + 1 + 20 + 21 + 4 + 23 + 24 + 7 + 26 + 27 + 10 + 29 + 30 + 13 + 32 + 33 + 16 + 35 + 36 + 19 + 39 + 38 + 22 + 41 + 42 + 25 + 44 + 45 + 28 + 47 + 48 + 31 + 50 + 51 + 53 + 34 + 54 + 56 + 37 + 57 + 59 + 60 + 40 + 62 + 63 + 43 + 65 + 66 + 46 + 69 + 68 + 49 + 72 + 52 + 71 + 75 + 74 + 55 + 78 + 58 + 77 + 81 + 61 + 80 + 84 + 64 + 83 + 87 + 67 + 86 + 90 + 70 + 89 + 73 + 93 + 92 + 76 + 96 + 95 + 79 + 99 + 98 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 3 + 6 + 5 + 9 + 8 + 11 + 12 + 14 + 15 + 17 + 18 + 1 + 20 + 21 + 4 + 24 + 23 + 7 + 26 + 27 + 10 + 29 + 30 + 13 + 33 + 32 + 16 + 36 + 35 + 19 + 39 + 38 + 22 + 42 + 41 + 25 + 45 + 44 + 28 + 47 + 48 + 31 + 51 + 50 + 34 + 54 + 53 + 37 + 57 + 56 + 40 + 60 + 59 + 43 + 63 + 62 + 46 + 66 + 65 + 49 + 69 + 68 + 52 + 72 + 71 + 55 + 75 + 74 + 58 + 78 + 77 + 61 + 81 + 80 + 64 + 84 + 83 + 67 + 86 + 87 + 70 + 89 + 90 + 73 + 92 + 93 + 76 + 95 + 79 + 96 + 98 + 82 + 99 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 3 + 5 + 6 + 8 + 9 + 11 + 12 + 14 + 15 + 18 + 17 + 1 + 21 + 20 + 4 + 24 + 23 + 7 + 27 + 26 + 10 + 29 + 30 + 13 + 32 + 33 + 16 + 35 + 36 + 19 + 38 + 39 + 22 + 41 + 42 + 25 + 44 + 45 + 28 + 48 + 47 + 31 + 51 + 50 + 34 + 54 + 53 + 37 + 56 + 57 + 40 + 59 + 60 + 43 + 62 + 63 + 46 + 65 + 66 + 49 + 52 + 68 + 69 + 55 + 71 + 72 + 58 + 74 + 75 + 61 + 77 + 78 + 64 + 80 + 81 + 83 + 67 + 84 + 86 + 70 + 87 + 89 + 73 + 90 + 92 + 76 + 93 + 95 + 79 + 96 + 98 + 82 + 99 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 3 + 6 + 5 + 9 + 8 + 12 + 11 + 15 + 14 + 18 + 17 + 21 + 1 + 20 + 24 + 23 + 4 + 27 + 26 + 7 + 30 + 29 + 10 + 33 + 32 + 13 + 36 + 35 + 16 + 39 + 38 + 42 + 19 + 41 + 22 + 45 + 44 + 48 + 25 + 47 + 51 + 28 + 50 + 54 + 31 + 53 + 57 + 34 + 56 + 60 + 37 + 59 + 63 + 40 + 62 + 66 + 43 + 65 + 69 + 46 + 68 + 72 + 49 + 71 + 75 + 52 + 74 + 78 + 55 + 77 + 81 + 58 + 80 + 84 + 61 + 83 + 87 + 64 + 86 + 90 + 67 + 89 + 93 + 70 + 92 + 96 + 73 + 95 + 99 + 76 + 98 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + + \ No newline at end of file diff --git a/htsworkflow/pipelines/test/testdata/rta_intensities_config_1870.xml b/htsworkflow/pipelines/test/testdata/rta_intensities_config_1870.xml new file mode 100755 index 0000000..471a0f3 --- /dev/null +++ b/htsworkflow/pipelines/test/testdata/rta_intensities_config_1870.xml @@ -0,0 +1,868 @@ + + + + + + 0 + 0 + 0 + 0 + 0 + + + 0 + 0 + 0 + 0 + 0 + + 1 + 38 + 100903_ILLUMINA-33A494_00001_61MMFAAXX + + ILLUMINA-33A494 + 0 + 0 + 0 + 0 + + 1 + 38 + 100903_ILLUMINA-33A494_00001_61MMFAAXX + + 100903_ILLUMINA-33A494_00001_61MMFAAXX + 100903 + 00001 + PL14 + + + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 1 + 50 + 36 + 4 + 53 + 39 + 7 + 56 + 42 + 10 + 59 + 45 + 13 + 62 + 48 + 16 + 65 + 51 + 19 + 68 + 54 + 22 + 71 + 57 + 25 + 74 + 60 + 28 + 77 + 63 + 31 + 80 + 66 + 34 + 83 + 69 + 37 + 86 + 72 + 40 + 89 + 75 + 43 + 92 + 78 + 46 + 95 + 81 + 49 + 98 + 84 + 52 + 87 + 55 + 90 + 58 + 93 + 61 + 96 + 64 + 99 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 1 + 53 + 39 + 4 + 56 + 42 + 7 + 59 + 45 + 10 + 62 + 48 + 13 + 65 + 51 + 16 + 68 + 54 + 19 + 71 + 57 + 22 + 74 + 60 + 25 + 77 + 63 + 28 + 80 + 66 + 31 + 83 + 69 + 34 + 86 + 72 + 37 + 89 + 75 + 40 + 92 + 78 + 43 + 95 + 81 + 46 + 98 + 84 + 49 + 87 + 52 + 90 + 55 + 93 + 58 + 96 + 61 + 99 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 1 + 56 + 42 + 4 + 59 + 45 + 7 + 62 + 48 + 10 + 65 + 51 + 13 + 68 + 54 + 16 + 71 + 57 + 19 + 74 + 60 + 22 + 77 + 63 + 25 + 80 + 66 + 28 + 83 + 69 + 31 + 86 + 72 + 34 + 89 + 75 + 37 + 92 + 78 + 40 + 95 + 81 + 43 + 98 + 84 + 46 + 87 + 49 + 90 + 52 + 93 + 55 + 96 + 58 + 99 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 56 + 42 + 1 + 59 + 45 + 4 + 62 + 48 + 7 + 65 + 51 + 10 + 68 + 54 + 13 + 71 + 57 + 16 + 74 + 60 + 19 + 77 + 63 + 22 + 80 + 66 + 25 + 83 + 69 + 28 + 86 + 72 + 31 + 89 + 75 + 34 + 92 + 78 + 37 + 95 + 81 + 98 + 40 + 84 + 87 + 43 + 90 + 46 + 93 + 49 + 96 + 52 + 99 + 55 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 56 + 42 + 59 + 45 + 62 + 48 + 65 + 51 + 68 + 1 + 54 + 71 + 4 + 57 + 74 + 7 + 60 + 77 + 10 + 63 + 80 + 13 + 66 + 83 + 16 + 69 + 86 + 19 + 72 + 89 + 22 + 75 + 92 + 25 + 78 + 95 + 28 + 81 + 98 + 31 + 84 + 34 + 87 + 37 + 90 + 40 + 93 + 43 + 96 + 46 + 99 + 49 + 52 + 55 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 56 + 42 + 59 + 45 + 62 + 48 + 65 + 51 + 68 + 54 + 71 + 1 + 57 + 74 + 4 + 60 + 77 + 7 + 63 + 80 + 10 + 66 + 83 + 13 + 69 + 86 + 16 + 72 + 89 + 19 + 75 + 92 + 22 + 78 + 95 + 25 + 81 + 98 + 28 + 84 + 31 + 87 + 34 + 90 + 37 + 93 + 40 + 96 + 43 + 99 + 46 + 49 + 52 + 55 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 56 + 42 + 59 + 45 + 62 + 48 + 65 + 51 + 68 + 54 + 71 + 57 + 74 + 1 + 60 + 77 + 4 + 63 + 80 + 7 + 66 + 83 + 10 + 69 + 86 + 13 + 72 + 89 + 16 + 75 + 92 + 19 + 78 + 95 + 22 + 81 + 98 + 25 + 84 + 28 + 87 + 31 + 90 + 34 + 93 + 37 + 96 + 40 + 99 + 43 + 46 + 49 + 52 + 55 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + s + 2 + 5 + 8 + 11 + 14 + 17 + 3 + 20 + 6 + 23 + 9 + 26 + 12 + 29 + 15 + 32 + 18 + 35 + 21 + 38 + 24 + 41 + 27 + 44 + 30 + 47 + 33 + 50 + 36 + 53 + 39 + 56 + 42 + 59 + 45 + 62 + 48 + 65 + 51 + 68 + 54 + 71 + 57 + 74 + 60 + 77 + 1 + 63 + 80 + 4 + 66 + 83 + 7 + 69 + 86 + 10 + 72 + 89 + 13 + 75 + 92 + 16 + 78 + 95 + 19 + 81 + 98 + 22 + 84 + 25 + 87 + 28 + 90 + 31 + 93 + 34 + 96 + 37 + 99 + 40 + 43 + 46 + 49 + 52 + 55 + 58 + 61 + 64 + 67 + 70 + 73 + 76 + 79 + 82 + 85 + 88 + 91 + 94 + 97 + 100 + + + + \ No newline at end of file