6 # Convert a pair of FASTQ files to a single file in a one-pair-per-line
7 # format, where each line has these five tab-delimited fields:
10 # 2. Sequence of mate 1
11 # 3. Qualities of mate 1
12 # 4. Sequence of mate 2
13 # 5. Qualities of mate 2
15 # This script comes in handy if (a) you'd just like to store your
16 # paired-end data in a less awkward format than the usual pair of
17 # parallel FASTQ files, or (b) you'd like to use Bowtie with gzipped
18 # paired-end input without unzipping it first. In that case, you can
19 # pipe the output of this script (which handles gzipped inputs by
20 # piping them through gzip -dc) to Bowtie and use '--12 -'.
22 # Note that this script can also handle unpaired input (with -u), which
23 # Bowtie handles approrpaitely in --12 mode even when it's intermingled
24 # with paired-end input.
31 # Courtesy: http://www.perlmonks.org/?node_id=625977
38 (${$a[$n]}, $a[$n] = $a[$i])[0];
47 GetOptions ("u=s" => \$unpaired,
50 "shuffle" => \$shuffle) || die "Bad option";
53 my @unpaireds = split(/,/, $unpaired);
55 for my $f (@unpaireds) {
56 open UNP, "$f" || die;
60 $name = substr($name, 1);
66 push @output, "$name\t$seq\t$qual\n";
71 my @mate1s = split(/,/, $mate1);
72 my @mate2s = split(/,/, $mate2);
74 for(my $i = 0; $i <= $#mate1s; $i++) {
75 if($mate1s[$i] =~ /\.gz$/) {
76 open M1, "gzip -dc $mate1s[$i] |" || die;
78 open M1, "$mate1s[$i]" || die;
80 if($mate2s[$i] =~ /\.gz$/) {
81 open M2, "gzip -dc $mate2s[$i] |" || die;
83 open M2, "$mate2s[$i]" || die;
88 $name1 = substr($name1, 1, -2);
91 $name2 = substr($name2, 1);
102 print "$name1\t$seq1\t$qual1\t$seq2\t$qual2\n" unless $shuffle;
103 push @output, "$name1\t$seq1\t$qual1\t$seq2\t$qual2\n" if $shuffle;
110 @output = shuffle(@output);
111 print join("", @output);