7 # Transform Align.txt format, example: TTTTTCTTTCTTTTCTCTCTTTCTT 12500 1 chr9:19863256 F TTTTTCTTTTCTTTCTCTCTTTCTT 11453
9 # ENCODE "TagAlign" format: chrom | chromStart | chromEnd | Sequence | Score | Strand (+/-)|
10 # See online documentation of ENCODE data submission formats at http://encodewiki.ucsc.edu/EncodeDCC/index.php/File_Formats
12 open (IFILE, "< $ARGV[0]") or die "Can't open file $ARGV[0]";
14 open (OFILE, "> $ARGV[0].TagAlign") or die "Can't open output file";
19 # print "\nchr\tstart\tend\tsbjseq\tmismatched\tstrand\treps";
21 while(<IFILE>) ### && $i < 10)
23 # print "\nRead record $i: $_";
24 if(!defined($_)) { $i++; print "\nRecord $i not defined."; next; }
28 @testArray = split(/$delimit/,$_);
31 my($sbjseq,$score,$reps,$chr_pos,$strand,$genseq,$score2) = split(/$delimit/,$_);
32 if($chr_pos =~ /^chr/)
34 my($chr,$pos) = split(/:/,$chr_pos);
35 my $end = $pos + length($sbjseq);
39 case 12500 {$mismatches = 0}
40 case 11453 {$mismatches = 1}
41 case 10406 {$mismatches = 2}
45 $BEDrec = "$chr\t$pos\t$end\t$sbjseq\t$mismatches\t$strand";
46 # print "\nBED rec: $BEDrec";
47 print OFILE "$BEDrec\n";
55 print "\n==== F I N I S H E D processing $i records ===========";
56 print "\n==== INPUT FILE NAME: $ARGV[0] ===============";
57 print "\n==== OUTPUT FILE NAME: $ARGV[0].TagAlign ===============\n";