projects
/
htsworkflow.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
61f5644
)
Bug-fixes in sequence repository code and projects code
author
Tim Reddy Tim
<treddy@hudsonalpha.org>
Wed, 3 Dec 2008 00:55:36 +0000
(
00:55
+0000)
committer
Tim Reddy Tim
<treddy@hudsonalpha.org>
Wed, 3 Dec 2008 00:55:36 +0000
(
00:55
+0000)
htswanalysis/conf/TSSProfileFormat.gnuplot
patch
|
blob
|
history
htswanalysis/conf/config_tasks.plist
patch
|
blob
|
history
htswanalysis/conf/projectsummary.plist
patch
|
blob
|
history
htswanalysis/scripts/CollectLibraries.pm
patch
|
blob
|
history
htswanalysis/scripts/ConfigureTasks.pm
patch
|
blob
|
history
htswanalysis/scripts/LibrariesMakefile
patch
|
blob
|
history
htswanalysis/scripts/SummarizeProject2.pm
patch
|
blob
|
history
htswanalysis/scripts/count_bases.pm
patch
|
blob
|
history
diff --git
a/htswanalysis/conf/TSSProfileFormat.gnuplot
b/htswanalysis/conf/TSSProfileFormat.gnuplot
index 79ad9f1b68feddb64aedfb005eb0481d453bf8ed..a5f05eb3608cacc33221b8ee16656765afbb3572 100644
(file)
--- a/
htswanalysis/conf/TSSProfileFormat.gnuplot
+++ b/
htswanalysis/conf/TSSProfileFormat.gnuplot
@@
-1,4
+1,5
@@
set terminal png
set terminal png
+set autoscale
set linestyle lw 3
set linestyle ps 3
set xtics 250
set linestyle lw 3
set linestyle ps 3
set xtics 250
@@
-7,4
+8,4
@@
set size 0.6,0.6
set xlabel "cycle"
set ylabel "percent"
set arrow 1 from first "0",graph 0.0 to first "0",graph 1.0 nohead front lw 3 linecolor rgb "#222222"
set xlabel "cycle"
set ylabel "percent"
set arrow 1 from first "0",graph 0.0 to first "0",graph 1.0 nohead front lw 3 linecolor rgb "#222222"
-plot "FILENAME" using 1:2 title '' smooth
cspline
with filledcurve y1=0;
+plot "FILENAME" using 1:2 title '' smooth
bezier
with filledcurve y1=0;
diff --git
a/htswanalysis/conf/config_tasks.plist
b/htswanalysis/conf/config_tasks.plist
index c764a934717ff7630410221e4e52125c3f554e7c..4df6906bd00688a0355bdce3dea91f951804f2cb 100644
(file)
--- a/
htswanalysis/conf/config_tasks.plist
+++ b/
htswanalysis/conf/config_tasks.plist
@@
-12,10
+12,17
@@
<string>/Users/Data/Projects/config_tasks.log</string>
<key>StandardErrorPath</key>
<string>/Users/Data/Projects/config_tasks.err</string>
<string>/Users/Data/Projects/config_tasks.log</string>
<key>StandardErrorPath</key>
<string>/Users/Data/Projects/config_tasks.err</string>
+ <key>EnvironmentVariables</key>
+ <dict>
+ <key>MACS</key>
+ <string>/usr/local/bin/macs</string>
+ </dict>
<key>ProgramArguments</key>
<array>
<string>/usr/bin/perl</string>
<key>ProgramArguments</key>
<array>
<string>/usr/bin/perl</string>
- <string>ConfigureTasks.pm</string>
+ <string>/Users/ENCODE/htsworkflow/htswanalysis/scripts/ConfigureTasks.pm</string>
+ <string>/Users/ENCODE/htsworkflow/htswanalysis</string>
+ <string>/Users/Data</string>
<string>all</string>
</array>
<key>KeepAlive</key>
<string>all</string>
</array>
<key>KeepAlive</key>
diff --git
a/htswanalysis/conf/projectsummary.plist
b/htswanalysis/conf/projectsummary.plist
index 897d7aff383fffa95378219f67ba69f73b4f0ccf..b3b1c27026f285841a85da6c2f78dca19ee43563 100644
(file)
--- a/
htswanalysis/conf/projectsummary.plist
+++ b/
htswanalysis/conf/projectsummary.plist
@@
-17,7
+17,7
@@
<string>-j</string>
<string>8</string>
<string>-f</string>
<string>-j</string>
<string>8</string>
<string>-f</string>
- <string>ProjectMakefile</string>
+ <string>
/Users/Data/Projects/
ProjectMakefile</string>
</array>
<key>StartInterval</key>
<integer>600</integer>
</array>
<key>StartInterval</key>
<integer>600</integer>
diff --git
a/htswanalysis/scripts/CollectLibraries.pm
b/htswanalysis/scripts/CollectLibraries.pm
index f3d6aed41f5b5db1382369ceaa7a4a595a00a886..fcaa7928d348f8f15f19a934574fb7d869dc715d 100755
(executable)
--- a/
htswanalysis/scripts/CollectLibraries.pm
+++ b/
htswanalysis/scripts/CollectLibraries.pm
@@
-14,12
+14,13
@@
for my $filename (@ARGV) {
my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4);
open(COUNT,$filename.".count");
my($date,$flowcell,$lanes,$lib) = ($1,$2,$3,$4);
open(COUNT,$filename.".count");
- my $count = <COUNT>; chomp $count;
+ my $count = <COUNT>; chomp $count;
$count =~ s/\s//g;
if(!defined($count)) { print STDERR $filename,"\n"; }
close(COUNT);
if(!defined($lib)) {
print STDERR "MISSING LIB: ", $filename,"\n";
if(!defined($count)) { print STDERR $filename,"\n"; }
close(COUNT);
if(!defined($lib)) {
print STDERR "MISSING LIB: ", $filename,"\n";
+ next;
}
if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; }
}
if(!exists($libraries{$lib})) { my @a; $libraries{$lib} = \@a; }
diff --git
a/htswanalysis/scripts/ConfigureTasks.pm
b/htswanalysis/scripts/ConfigureTasks.pm
index ba7a4723e2b92888581aaaa4eb3e657482c82665..b250f39a5601eb0b76aea04acfd58d4ce9c94ca0 100755
(executable)
--- a/
htswanalysis/scripts/ConfigureTasks.pm
+++ b/
htswanalysis/scripts/ConfigureTasks.pm
@@
-30,7
+30,7
@@
my %libs;
my $BIOP = "$root_dir/bin/BioProspector.mac";
my $QUESTDIR = "$root_dir/bin/QuEST";
my $BIOP = "$root_dir/bin/BioProspector.mac";
my $QUESTDIR = "$root_dir/bin/QuEST";
-my $MACS = `which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path";
+my $MACS =
$ENV{'MACS'} ||
`which macs | perl -e '\$l = <>; chomp \$l; print \$l;'` || die "MACS not found in PATH. Install and put in path";
my $WINGPEAKSDIR = "$root_dir/bin";
my $WINGPEAKSGENOMEDIR = "$root_dir/reference_data";
my $PROFILEDIR = "$root_dir/bin";
my $WINGPEAKSDIR = "$root_dir/bin";
my $WINGPEAKSGENOMEDIR = "$root_dir/reference_data";
my $PROFILEDIR = "$root_dir/bin";
@@
-317,11
+317,11
@@
sub WritePeakCallingTasks {
$outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta";
$cmd .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
$outfile .= "peak_caller.ChIP.out peak_caller.ChIP.out.bedgraph peak_caller.ChIP.out.fasta";
$cmd .= "peak_caller.ChIP.out: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n";
- $cmd .= "\trm -f background_RX_noIP.align.txt\n";
- $cmd .= "\trm -f pseudo_ChIP_RX_noIP.align.txt\n";
+ $cmd .= "\trm -f *.align.txt\n";
$cmd .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP '.$data_dir.'/Libraries/'.$signal.'.txt -solexa_align_RX_noIP '.$data_dir.'/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
$cmd .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n";
$cmd .= "\t".$QUESTDIR.'/generate_QuEST_parameters.pl -rp '.$GENOMEDIR.'/QuEST_'.$genome.' -solexa_align_ChIP '.$data_dir.'/Libraries/'.$signal.'.txt -solexa_align_RX_noIP '.$data_dir.'/Libraries/'.$bg.'.txt -ap '.$data_dir.'/Tasks/'.$task.' -silent > '.$name.'.QuEST.log;'."\n";
$cmd .= "\t".$QUESTDIR.'/run_QuEST_with_param_file.pl -p QuEST.batch.pars >> '.$name.'.QuEST.log;'."\n";
- $cmd .= "\t".'rm -rf scores;'."\n\n";
+ $cmd .= "\t".'rm -rf scores;'."\n";
+ $cmd .= "\trm -f *.align.txt\n\n";
$cmd .= "peak_caller.ChIP.out.tab: peak_caller.ChIP.out\n";
$cmd .= "\t$root_dir/scripts/tabify_quest.sh \$< > \$@\n\n";
$cmd .= "peak_caller.ChIP.out.tab: peak_caller.ChIP.out\n";
$cmd .= "\t$root_dir/scripts/tabify_quest.sh \$< > \$@\n\n";
@@
-353,6
+353,7
@@
sub WritePeakCallingTasks {
$cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n";
$cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n";
$cmd .= "\trm -f $signal.bed $bg.bed\n";
$cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n";
$cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n";
$cmd .= "\trm -f $signal.bed $bg.bed\n";
+ $cmd .= "\trm -f Background.ELAND.pos $name.ELAND.pos $name.R0.*.bar Background.ELAND.sorted $name.ELAND.sorted\n";
$cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n";
$cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n";
$cmd .= "\t".'exit `grep -c "^CRITICAL" '.$name.'.err`'."\n\n";
$cmd .= "\n".$name."_peaks.bed: ".$name."_peaks.xls\n";
diff --git
a/htswanalysis/scripts/LibrariesMakefile
b/htswanalysis/scripts/LibrariesMakefile
index 7305f6af2240cd55329c8f3fc2f4108aab1cda0f..3a30810f2f3ee8d96dcf96bab517cf7496d863de 100644
(file)
--- a/
htswanalysis/scripts/LibrariesMakefile
+++ b/
htswanalysis/scripts/LibrariesMakefile
@@
-18,20
+18,22
@@
HTML_DIR=/Library/WebServer/Documents/SequencingSummaries
# Error messages are collected so as not to bug the user. (if there are no matching files, ls errors.)
FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err)
# Error messages are collected so as not to bug the user. (if there are no matching files, ls errors.)
FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err)
-QC_FILES=$(shell ls -1d
~Data
/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' )
+QC_FILES=$(shell ls -1d
$(DATA_DIR)
/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' )
LIBFILES=$(shell ls -1d $(DATA_DIR)/Libraries/.*.config 2>> LibrariesMakefile.err | sed -e s/config/txt/ -e "s/\/\./\//")
all: $(QC_FILES) $(FILES) $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute
LIBFILES=$(shell ls -1d $(DATA_DIR)/Libraries/.*.config 2>> LibrariesMakefile.err | sed -e s/config/txt/ -e "s/\/\./\//")
all: $(QC_FILES) $(FILES) $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute
+# TODO: Add error handling if flowcell_qc fails for some reason. Think about how we'd like this reported. Email?
$(QC_FILES):
cd $(DATA_DIR)/Flowcells/`basename $@ | awk -F_ '{print $$1}'` && $(MAKE) -f $(ROOT_DIR)/scripts/Flowcell_QC_Makefile
$(QC_FILES):
cd $(DATA_DIR)/Flowcells/`basename $@ | awk -F_ '{print $$1}'` && $(MAKE) -f $(ROOT_DIR)/scripts/Flowcell_QC_Makefile
-$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | LibraryInfo.xml
+$(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config |
$(DATA_DIR)/
LibraryInfo.xml
cat `cat $<` > $@;
$(DATA_DIR)/LibraryInfo.xml: $(QC_FILES)
$(ROOT_DIR)/scripts/CollectLibraries.pm `ls $(DATA_DIR)/Flowcells/**/*.align*.txt` > $@;
$(ROOT_DIR)/scripts/RecompileLibraries.pm $@ $(DATA_DIR)
cat `cat $<` > $@;
$(DATA_DIR)/LibraryInfo.xml: $(QC_FILES)
$(ROOT_DIR)/scripts/CollectLibraries.pm `ls $(DATA_DIR)/Flowcells/**/*.align*.txt` > $@;
$(ROOT_DIR)/scripts/RecompileLibraries.pm $@ $(DATA_DIR)
+ $(ROOT_DIR)/scripts/analys_track_main.py updLibInfo
$(DATA_DIR)/SequencingSummary.html: $(DATA_DIR)/LibraryInfo.xml
$(ROOT_DIR)/scripts/SummarizeLibrary.pm $< > $@;
$(DATA_DIR)/SequencingSummary.html: $(DATA_DIR)/LibraryInfo.xml
$(ROOT_DIR)/scripts/SummarizeLibrary.pm $< > $@;
diff --git
a/htswanalysis/scripts/SummarizeProject2.pm
b/htswanalysis/scripts/SummarizeProject2.pm
index ead12c8c184b90705e883d531a0a458cfd6df323..b509ac32480c326114883337b0d2ad5b501907d5 100755
(executable)
--- a/
htswanalysis/scripts/SummarizeProject2.pm
+++ b/
htswanalysis/scripts/SummarizeProject2.pm
@@
-41,6
+41,7
@@
for my $i (0..scalar(@{$xml->{PeakCalling}})-1) {
$desc{Summary} = `$root_dir/scripts/SummarizeMACS.pm $peakfile $negpeakfile`;
$desc{outfile} = "$caller_dir/".$name."_peaks.bed";
$desc{fasta} = "$caller_dir/".$name."_peaks.fasta";
$desc{Summary} = `$root_dir/scripts/SummarizeMACS.pm $peakfile $negpeakfile`;
$desc{outfile} = "$caller_dir/".$name."_peaks.bed";
$desc{fasta} = "$caller_dir/".$name."_peaks.fasta";
+ $desc{primer_design} = "$caller_dir/ValidationPrimers.html";
}
if($genome eq "scer") { $genome = "sacCer1"; }
}
if($genome eq "scer") { $genome = "sacCer1"; }
@@
-196,6
+197,7
@@
for(@peak_calling) {
print "<TR BGCOLOR=$color><TD><B>$hash{Name}</B></TD>";
print "<TD>$hash{Caller}</TD><TD>$hash{Summary}</TD>\n";
print "<TD><A HREF=$hash{outfile}>BED file</A><BR><A HREF=http://genome.ucsc.edu/cgi-bin/hgTracks?db=$hash{Genome}&hgt.customText=http://171.65.76.194/Tasks/$hash{Task}/$hash{outfile}>View in Genome Browser</A></TD><TD><A HREF=$hash{fasta}>FASTA</A></TD>\n";
print "<TR BGCOLOR=$color><TD><B>$hash{Name}</B></TD>";
print "<TD>$hash{Caller}</TD><TD>$hash{Summary}</TD>\n";
print "<TD><A HREF=$hash{outfile}>BED file</A><BR><A HREF=http://genome.ucsc.edu/cgi-bin/hgTracks?db=$hash{Genome}&hgt.customText=http://171.65.76.194/Tasks/$hash{Task}/$hash{outfile}>View in Genome Browser</A></TD><TD><A HREF=$hash{fasta}>FASTA</A></TD>\n";
+ if(exists($hash{primer_design})) { print "<TD><A HREF=$hash{primer_design}>Validation Primers</A></TD>\n"; }
print "</TR>\n";
}
}
print "</TR>\n";
}
}
diff --git
a/htswanalysis/scripts/count_bases.pm
b/htswanalysis/scripts/count_bases.pm
index 672718bf48573579d681281105b5baf67eea1116..a4f357da9f1835bcd927fb81cc021b635a16073a 100755
(executable)
--- a/
htswanalysis/scripts/count_bases.pm
+++ b/
htswanalysis/scripts/count_bases.pm
@@
-19,8
+19,7
@@
my $length;
while( my $read = <>) {
chomp $read;
while( my $read = <>) {
chomp $read;
- if(!defined($length)) { $length = length($read); }
- if($read =~ /\./) { next; }
+ $length = length($read);
$count++;
for(0..$length-1) {
my $base = uc(substr($read,$_,1));
$count++;
for(0..$length-1) {
my $base = uc(substr($read,$_,1));