From 053e754d7b60c8387c1dd6ae22ec6f5db7712f56 Mon Sep 17 00:00:00 2001 From: Rami Rauch Date: Wed, 19 Nov 2008 19:01:53 +0000 Subject: [PATCH] converts the HTSW align format (derived from Eland format) into ENCODE 'TagAlign' format --- htswanalysis/scripts/align2TagAlign.pm | 60 ++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 htswanalysis/scripts/align2TagAlign.pm diff --git a/htswanalysis/scripts/align2TagAlign.pm b/htswanalysis/scripts/align2TagAlign.pm new file mode 100755 index 0000000..9bf60b1 --- /dev/null +++ b/htswanalysis/scripts/align2TagAlign.pm @@ -0,0 +1,60 @@ +#!/usr/bin/perl -w + +use strict; +use warnings; +use Switch; + +# Transform Align.txt format, example: TTTTTCTTTCTTTTCTCTCTTTCTT 12500 1 chr9:19863256 F TTTTTCTTTTCTTTCTCTCTTTCTT 11453 +# to +# ENCODE "TagAlign" format: chrom | chromStart | chromEnd | Sequence | Score | Strand (+/-)| +# See online documentation of ENCODE data submission formats at http://encodewiki.ucsc.edu/EncodeDCC/index.php/File_Formats + +open (IFILE, "< $ARGV[0]") or die "Can't open file $ARGV[0]"; + +open (OFILE, "> $ARGV[0].TagAlign") or die "Can't open output file"; + +my $i = 0; +my $mismatches = 0; +my $delimit = '\s+'; +# print "\nchr\tstart\tend\tsbjseq\tmismatched\tstrand\treps"; +my @testArray = []; +while() ### && $i < 10) +{ + # print "\nRead record $i: $_"; + if(!defined($_)) { $i++; print "\nRecord $i not defined."; next; } + chomp; + my $BEDrec = ''; + + @testArray = split(/$delimit/,$_); + if($#testArray eq 6) + { + my($sbjseq,$score,$reps,$chr_pos,$strand,$genseq,$score2) = split(/$delimit/,$_); + if($chr_pos =~ /^chr/) + { + my($chr,$pos) = split(/:/,$chr_pos); + my $end = $pos + length($sbjseq); + $mismatches = 0; + switch($score) + { + case 12500 {$mismatches = 0} + case 11453 {$mismatches = 1} + case 10406 {$mismatches = 2} + } + $strand =~ s/F/+/i; + $strand =~ s/R/-/i; + $BEDrec = "$chr\t$pos\t$end\t$sbjseq\t$mismatches\t$strand"; + # print "\nBED rec: $BEDrec"; + print OFILE "$BEDrec\n"; + $i++; + } + } +} + +close IFILE; +close OFILE; +print "\n==== F I N I S H E D processing $i records ==========="; +print "\n==== INPUT FILE NAME: $ARGV[0] ==============="; +print "\n==== OUTPUT FILE NAME: $ARGV[0].TagAlign ===============\n"; + +exit; + -- 2.30.2