--- /dev/null
+#!/usr/bin/perl -w
+
+use strict;
+use warnings;
+use Switch;
+
+# Transform Align.txt format, example: TTTTTCTTTCTTTTCTCTCTTTCTT 12500 1 chr9:19863256 F TTTTTCTTTTCTTTCTCTCTTTCTT 11453
+# to
+# ENCODE "TagAlign" format: chrom | chromStart | chromEnd | Sequence | Score | Strand (+/-)|
+# See online documentation of ENCODE data submission formats at http://encodewiki.ucsc.edu/EncodeDCC/index.php/File_Formats
+
+open (IFILE, "< $ARGV[0]") or die "Can't open file $ARGV[0]";
+
+open (OFILE, "> $ARGV[0].TagAlign") or die "Can't open output file";
+
+my $i = 0;
+my $mismatches = 0;
+my $delimit = '\s+';
+# print "\nchr\tstart\tend\tsbjseq\tmismatched\tstrand\treps";
+my @testArray = [];
+while(<IFILE>) ### && $i < 10)
+{
+ # print "\nRead record $i: $_";
+ if(!defined($_)) { $i++; print "\nRecord $i not defined."; next; }
+ chomp;
+ my $BEDrec = '';
+
+ @testArray = split(/$delimit/,$_);
+ if($#testArray eq 6)
+ {
+ my($sbjseq,$score,$reps,$chr_pos,$strand,$genseq,$score2) = split(/$delimit/,$_);
+ if($chr_pos =~ /^chr/)
+ {
+ my($chr,$pos) = split(/:/,$chr_pos);
+ my $end = $pos + length($sbjseq);
+ $mismatches = 0;
+ switch($score)
+ {
+ case 12500 {$mismatches = 0}
+ case 11453 {$mismatches = 1}
+ case 10406 {$mismatches = 2}
+ }
+ $strand =~ s/F/+/i;
+ $strand =~ s/R/-/i;
+ $BEDrec = "$chr\t$pos\t$end\t$sbjseq\t$mismatches\t$strand";
+ # print "\nBED rec: $BEDrec";
+ print OFILE "$BEDrec\n";
+ $i++;
+ }
+ }
+}
+
+close IFILE;
+close OFILE;
+print "\n==== F I N I S H E D processing $i records ===========";
+print "\n==== INPUT FILE NAME: $ARGV[0] ===============";
+print "\n==== OUTPUT FILE NAME: $ARGV[0].TagAlign ===============\n";
+
+exit;
+