Added script front-end for primer-design code
[htsworkflow.git] / htswanalysis / scripts / primer_design.pm
1 #!/usr/bin/perl -w
2
3 use strict;
4 use File::Basename;
5 use File::Temp qw(tempfile);
6
7 my $bed_file = shift;
8 my $library = shift;
9 my $root_dir = shift;
10 my $data_dir = shift;
11
12 if(!defined($library) || $library eq "") {
13   die "Error: no library indicated\n";
14 }
15 if( -z "~Data/Libraries/$library.txt") {
16   die "Error: library $library does not exist\n";
17 }
18
19 my @data = `$root_dir/bin/refine_peaks $bed_file $data_dir/Libraries/$library.txt $root_dir/reference_data/hg18_chrom_list.txt 2> $bed_file.err`;
20
21 unless(-z "$bed_file.err") {
22   warn "Errors found: check $tmp_filename.err\n";
23 }
24
25 my $designed = "";
26 my $missed = "";
27
28 my($total,$no_match) = (0,0);
29 my($id,$fseq,$rseq,$ftm,$rtm,$len);
30 my @primers;
31 for(@data) {
32   if($_ =~ /^PRIMER_SEQUENCE_ID=(.*)$/) { $id = $1; }
33   if($_ =~ /^PRIMER_LEFT_SEQUENCE=(.*)$/) { $fseq = $1; }
34   if($_ =~ /^PRIMER_RIGHT_SEQUENCE=(.*)$/) { $rseq = $1; }
35   if($_ =~ /^PRIMER_LEFT_TM=(.*)$/) { $ftm = $1; }
36   if($_ =~ /^PRIMER_RIGHT_TM=(.*)$/) { $rtm = $1; }
37   if($_ =~ /^PRIMER_PRODUCT_SIZE=(.*)$/) { 
38     $len = $1;
39     my %primer;
40
41     my($chr,$start,$stop,$score,$count,$call_start,$call_end,$call_count) = split(/_/,$id);
42     $primer{chr} = $chr;
43     $primer{start} = $start;
44     $primer{stop} = $stop;
45     $primer{score} = $score;
46     $primer{count} = $count;
47     $primer{call_start} = $call_start;
48     $primer{call_end} = $call_end;
49     $primer{call_count} = $call_count;
50     $primer{F}{seq} = $fseq; 
51     $primer{R}{seq} = $rseq; 
52     $primer{F}{tm} = $ftm;
53     $primer{R}{tm} = $rtm;
54     $primer{len} = $len;
55     push(@primers, \%primer);
56   }
57   if($_ =~ /^=$/) {
58     if(!defined($fseq)) {
59       $no_match++;
60     }
61     $total++;
62     ($id,$fseq,$rseq,$ftm,$rtm) = (undef,undef,undef,undef,undef);
63   }
64 }
65
66 # sort primers, get min and max score
67 my @sorted_primers = sort { my %c = %{$a}; my %d = %{$b}; return $c{score} <=> $d{score}; } @primers;
68 my $min_score = $sorted_primers[0]{score};
69 my $max_score = $sorted_primers[scalar(@sorted_primers)-1]{score};
70
71 #my @sorted_primers = sort { my %c = %{$a}; my %d = %{$b}; return $c{call_count} <=> $d{call_count}; } @primers;
72 #my $min_score = $sorted_primers[0]{call_count};
73 #my $max_score = $sorted_primers[scalar(@sorted_primers)-1]{call_count};
74
75 my $num_primers = 44;
76 my $score_range = $max_score - $min_score;
77 my $score_increment = $score_range / ($num_primers+1);
78
79 print '
80 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "DTD/xhtml1-strict.dtd">
81 <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
82  <head>
83    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
84    <title>Results</title>
85    <style type="text/css">
86      img {border: none;}
87    </style>
88  </head>
89  <body>
90  ';
91 print "<H3>Score range ($min_score,$max_score)</H3><BR>\n";
92 print "<TABLE BORDER=1>\n";
93
94 print "<TR><TD><EM>Score</EM></TD><TD><EM>Count</EM></TD><TD><EM>Call</EM></TD><TD><EM>Call Count</EM></TD><TD><EM>Sequence</EM></TD><TD><EM>Tm (&deg;C)</EM></TD><TD><EM>Amplicon Size (bp)</EM></TR>\n";
95
96 my %plate;
97 $plate{A1} = "TGCTTTGGTTTGGTTTGATTTCC";  #neg2-F
98 $plate{A2} = "AGCCAAAAGAAGGAAGCAACAGC";  #neg5-F
99 $plate{C1} = "TGCTTTGGTTTGGTTTGATTTCC";  #neg2-F
100 $plate{C2} = "AGCCAAAAGAAGGAAGCAACAGC";  #neg5-F
101 $plate{E1} = "GCTCCAAAATTCAGGTGAAATGG";  #neg2-R
102 $plate{E2} = "AGGGCTGGAAGCAATGAGAAAAG";  #neg5-R
103 $plate{G1} = "GCTCCAAAATTCAGGTGAAATGG";  #neg2-R
104 $plate{G2} = "AGGGCTGGAAGCAATGAGAAAAG";  #neg5-R
105
106 my @f_output_order = (
107 "A3","A4","A5","A6","A7","A8","A9","A10","A11","A12",
108 "B1","B2","B3","B4","B5","B6","B7","B8","B9","B10","B11","B12",
109 "C3","C4","C5","C6","C7","C8","C9","C10","C11","C12",
110 "D1","D2","D3","D4","D5","D6","D7","D8","D9","D10","D11","D12"
111 );
112
113 my @r_output_order = (
114 "E3","E4","E5","E6","E7","E8","E9","E10","E11","E12",
115 "F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12",
116 "G3","G4","G5","G6","G7","G8","G9","G10","G11","G12",
117 "H1","H2","H3","H4","H5","H6","H7","H8","H9","H10","H11","H12"
118 );
119
120 my @plate_output_order = (
121 "A1","A2","A3","A4","A5","A6","A7","A8","A9","A10","A11","A12",
122 "B1","B2","B3","B4","B5","B6","B7","B8","B9","B10","B11","B12",
123 "C1","C2","C3","C4","C5","C6","C7","C8","C9","C10","C11","C12",
124 "D1","D2","D3","D4","D5","D6","D7","D8","D9","D10","D11","D12",
125 "E1","E2","E3","E4","E5","E6","E7","E8","E9","E10","E11","E12",
126 "F1","F2","F3","F4","F5","F6","F7","F8","F9","F10","F11","F12",
127 "G1","G2","G3","G4","G5","G6","G7","G8","G9","G10","G11","G12",
128 "H1","H2","H3","H4","H5","H6","H7","H8","H9","H10","H11","H12"
129 );
130
131 my @chosen_primers;
132 my $primer_index = 0;
133 my $score_thresh = $min_score;
134
135 my %bins;
136 my $score_low = $min_score;
137 my $score_high = $min_score;
138 my $bin_id;
139 for(0..$num_primers-1) {
140   $score_low = $score_high;
141   $score_high = $score_high + $score_increment;
142   $bin_id = $_;
143   my @a; $bins{$bin_id} = \@a;
144  
145   while($sorted_primers[$primer_index]{score} < $score_high) { 
146     push @{$bins{$bin_id}}, $primer_index; 
147     $primer_index++; 
148     if($primer_index >= scalar(@sorted_primers)) { last; } 
149   } 
150 }
151
152 for(0..$num_primers-1) {
153   $score_thresh += $score_increment;
154   my $bin_idx = $_;
155   my $bin_size;
156   
157   $bin_size = scalar(@{$bins{$bin_idx}});
158   while($bin_size == 0) { 
159     $bin_idx--; 
160     $bin_size = scalar(@{$bins{$bin_idx}});
161   }
162   $primer_index = pop(@{$bins{$bin_idx}});
163   print "<TR><TD ROWSPAN=2>$sorted_primers[$primer_index]{score}</TD><TD ROWSPAN=2>$sorted_primers[$primer_index]{chr}:$sorted_primers[$primer_index]{call_start}-$sorted_primers[$primer_index]{call_end}<TD ROWSPAN=2>$sorted_primers[$primer_index]{count}</TD><TD ROWSPAN=2>$sorted_primers[$primer_index]{call_count}</TD><TD>$sorted_primers[$primer_index]{F}{seq}</TD><TD>$sorted_primers[$primer_index]{F}{tm}</TD><TD ROWSPAN=2>$sorted_primers[$primer_index]{len}</TD></TR>\n";
164   print "<TR><TD>$sorted_primers[$primer_index]{R}{seq}</TD><TD>$sorted_primers[$primer_index]{R}{tm}</TD></TR>\n";
165   $plate{$f_output_order[$_]} = $sorted_primers[$primer_index]{F}{seq};
166   $plate{$r_output_order[$_]} = $sorted_primers[$primer_index]{R}{seq};
167 }
168
169 print '</table><BR><BR><H2>Plate Order</H2><TABLE BORDER=1>';
170 for (@plate_output_order) {
171   print "<TR><TD>$_</TD><TD>$plate{$_}</TD></TR>\n";
172 }
173 print '</TABLE></body></html>';