From 092953969dddabc2b80d784b70a19683d43937d7 Mon Sep 17 00:00:00 2001 From: Tim Reddy Tim Date: Fri, 12 Dec 2008 00:33:50 +0000 Subject: [PATCH] Final installation of methylseq analysis. --- htswanalysis/reference_data/qPCR_Tests/RNA | 6 ++++ htswanalysis/scripts/ConfigureTasks.pm | 11 ++++--- htswanalysis/scripts/SummarizeProject2.pm | 12 ++++---- htswanalysis/scripts/Summarize_Methylseq.pm | 33 ++++++++++++++------- 4 files changed, 40 insertions(+), 22 deletions(-) create mode 100755 htswanalysis/reference_data/qPCR_Tests/RNA diff --git a/htswanalysis/reference_data/qPCR_Tests/RNA b/htswanalysis/reference_data/qPCR_Tests/RNA new file mode 100755 index 0000000..de12e2c --- /dev/null +++ b/htswanalysis/reference_data/qPCR_Tests/RNA @@ -0,0 +1,6 @@ +1 chr19 18251563 18253432 1 +2 chr12 6517076 6517400 1 +3 chr7 5534457 5534627 1 +4 chrX 153412800 153413450 1 +5 chr22 39902800 39904900 1 +6 chr6 44327720 44327950 1 diff --git a/htswanalysis/scripts/ConfigureTasks.pm b/htswanalysis/scripts/ConfigureTasks.pm index 8fe851e..d4348c8 100755 --- a/htswanalysis/scripts/ConfigureTasks.pm +++ b/htswanalysis/scripts/ConfigureTasks.pm @@ -52,8 +52,7 @@ my $SUMMARIZE_PROJECT="$root_dir/scripts/SummarizeProject2.pm"; ### ### Important: get the xml file form the server. ### -#my $projects_file = getProjectsXML($parm); -my $projects_file = "/Users/Data/Projects.xml"; +my $projects_file = getProjectsXML($parm); my $xmldoc = XML::Simple->new(); @@ -101,7 +100,7 @@ close(MAKE); print STDERR "Project makefile compete. Starting to build tasks\n"; -#`cd $data_dir/Projects && make -j 4 -f TaskMakefile > make_tasks.log 2> make_tasks.err`; +`cd $data_dir/Projects && make -j 4 -f TaskMakefile > make_tasks.log 2> make_tasks.err`; sub writeProject { my $project = shift; @@ -187,7 +186,7 @@ sub WriteMethylseqTasks { $seqcheck .= "\tif [ ! -e $root_dir/reference_data/".$genome."_methylseq_regions ]; then $root_dir/scripts/analys_track_main.py updsts $task \"No methylseq regions available for $genome.\"; fi;\n"; my $cmd = "$outfile: $data_dir/Libraries/$msp1.txt $data_dir/Libraries/$hpa2.txt\n"; - $cmd .= "\t$METHYLSEQDIR/Methylseq_Analysis $data_dir/Libraries/$msp1.txt $data_dir/Libraries/$hpa2.txt $root_dir/reference_data/".$genome."_methylseq_regions > \$@\n"; + $cmd .= "\t$METHYLSEQDIR/Methylseq_Analysis $data_dir/Libraries/$msp1.txt $data_dir/Libraries/$hpa2.txt $root_dir/reference_data/".$genome."_methylseq_regions $name > \$@\n"; writeTask($mseq, $root, $outfile, $cmd, $seqcheck); $tasks .= $task." "; @@ -381,8 +380,8 @@ sub WritePeakCallingTasks { $cmd .= "\n.PRECIOUS: ".$name.'_peaks.xls '.$name.'_peaks.bed '.$name."_peaks.fasta\n\n"; $cmd .= "\n".$name."_peaks.xls: $data_dir/Libraries/$signal.txt $data_dir/Libraries/$bg.txt\n"; - $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $signal.bed\n"; - $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n"; + $cmd .= "\tcat $data_dir/Libraries/$signal.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'splice' | grep -v 'contam' | grep -v 'humRibosomal' > $signal.bed\n"; + $cmd .= "\tcat $data_dir/Libraries/$bg.txt | $root_dir/scripts/align_to_bed.pm | grep -v 'splice' | grep -v 'contam' | grep -v 'humRibosomal' > $bg.bed\n"; $cmd .= "\t$MACS -t $signal.bed -c ./$bg.bed --name=$name --pvalue=1e-10 --mfold=20 > $name.log 2> $name.err\n"; $cmd .= "\trm -f $signal.bed $bg.bed\n"; $cmd .= "\trm -f Background.ELAND.pos $name.ELAND.pos $name.R0.*.bar Background.ELAND.sorted $name.ELAND.sorted\n"; diff --git a/htswanalysis/scripts/SummarizeProject2.pm b/htswanalysis/scripts/SummarizeProject2.pm index 25ffa02..1ba8334 100755 --- a/htswanalysis/scripts/SummarizeProject2.pm +++ b/htswanalysis/scripts/SummarizeProject2.pm @@ -7,7 +7,7 @@ my $library_info = shift; my %libs; my %quest; my $expxml = XML::Simple->new(); -my $xml = $expxml->XMLin("Project.xml", ForceArray => [ qw(ComparePeakCalls CompareLibraries PeakCalling ProfileReads qPCR MotifFinding)]); +my $xml = $expxml->XMLin("Project.xml", ForceArray => [ qw(ComparePeakCalls CompareLibraries PeakCalling ProfileReads qPCR MotifFinding Methylseq)]); my @peak_calling = (); my $project_name = $xml->{Name}; @@ -76,8 +76,10 @@ $qPCR_Summary .= "\n"; my $methylseq_summary = ""; if(exists($xml->{Methylseq})) { - $methylseq_sumary "\n"; + $methylseq_summary .= "

Methylseq Analysis

\n"; + $methylseq_summary .= "
\n"; $methylseq_summary .= "\n"; + $methylseq_summary .= "\n"; $methylseq_summary .= "\n"; $methylseq_summary .= "\n"; $methylseq_summary .= "\n"; @@ -93,7 +95,7 @@ if(exists($xml->{Methylseq})) { my $msp1 = $xml->{Methylseq}->[$i]->{Msp1}->{Library}; my $hpa2 = $xml->{Methylseq}->[$i]->{Hpa2}->{Library}; - $methylseq_summary .= `$root_dir/scripts/SummarizeMethylseq.pm $name $task $msp1 $hpa2 $genome`; + $methylseq_summary .= `$root_dir/scripts/Summarize_Methylseq.pm $name $task ../../Tasks $msp1 $hpa2 $genome`; } $methylseq_summary .= "
Taskmsp1hpa2Assayed
\n"; @@ -112,7 +114,7 @@ if(exists($xml->{ProfileReads})) { $libs{$lib} = 0; if($genome eq "scer") { $genome = "sacCer1"; } - $profile_summary .= "$name ($lib)$lib.wig.gz
View in UCSC Genome Browser\n"; + $profile_summary .= "$name ($lib)$lib.wig.gz
View in UCSC Genome Browser\n"; $profile_summary .= "\n"; } $profile_summary .= "\n"; @@ -223,7 +225,7 @@ for(@peak_calling) { my $color = "#EEEEFF"; print "$hash{Name}"; print "$hash{Caller}$hash{Summary}\n"; - print "BED file
View in Genome BrowserFASTA\n"; + print "BED file
View in Genome BrowserFASTA\n"; if(exists($hash{primer_design})) { print "Validation Primers\n"; } print "\n"; } diff --git a/htswanalysis/scripts/Summarize_Methylseq.pm b/htswanalysis/scripts/Summarize_Methylseq.pm index 044f6c9..d5f84ef 100755 --- a/htswanalysis/scripts/Summarize_Methylseq.pm +++ b/htswanalysis/scripts/Summarize_Methylseq.pm @@ -7,21 +7,33 @@ my $name = shift; if(!defined($name)) { print STDERR "Error: No name provided to summarize methylseq\n"; exit(1); } my $task = shift; if(!defined($task)) { print STDERR "Error: No task provided to summarize methylseq\n"; exit(1); } +my $tasks_root = shift; +if(!defined($tasks_root)) { print STDERR "Error: No root task directory provided to summarize methylseq\n"; exit(1); } my $msp1_lib = shift; if(!defined($msp1_lib)) { print STDERR "Error: No msp1 library provided to summarize methylseq\n"; exit(1); } my $hpa2_lib = shift; if(!defined($hpa2_lib)) { print STDERR "Error: No hpa2 library provided to summarize methylseq\n"; exit(1); } my $genome = shift; -my $assayed_bed = "Assayed.bed"; -my $methylated_bed = "Methylated.bed"; +my $methylseq_bed = "MethylseqCalls.bed"; my $methylseq_calls = "MethylseqCalls.tab"; my $summary = "methylseq_summary"; -if(!defined($summary)) { return; } -open(FILE,$summary); +if(! -e $tasks_root."/".$task."/".$summary) { + printf "Processing...\n"; + exit; +} -my $tag_count = ; chomp $tag_count; +open(FILE,$tasks_root."/".$task."/".$summary); + +my $tag_count = ; + +if(!defined($tag_count)) { + printf "Processing...\n"; + exit; +} + +chomp $tag_count; $tag_count =~ /^(\d+) MSP1 tags, (\d+) HPA2 tags$/; my($msp_count,$hpa_count) = ($1,$2); @@ -37,6 +49,7 @@ my $error_count = ; chomp $error_count; $error_count =~ /^(\d+) of \d+ regions with error/; my $n_error = $1; + my $pcnt_assayed = $n_assayed / $n_regions; my $pcnt_methylated = $n_methylated / $n_assayed; my $pcnt_error = $n_error / $n_regions; @@ -57,10 +70,8 @@ elsif($pcnt_error > 0.01) { $error_color = "4444FF"; } else {$error_color= "66FF66"; } print ""; -printf "$msp1_lib$hpa2_lib$n_assayed (%.4f)$n_methylated (%0.4f)$n_error (%0.4f)",$pcnt_assayed,$pcnt_methylated,$pcnt_error; -print "Calls
\n"; -print "Assayed Regions"; -print "(Genome Browser)
\n"; -print "Methylated Regions "; -print "(Genome Browser)\n"; +printf "$name$msp1_lib$hpa2_lib$n_assayed (%.4f)$n_methylated (%0.4f)$n_error (%0.4f)",$pcnt_assayed,$pcnt_methylated,$pcnt_error; +print "Results
\n"; +print "(Genome Browser)
\n"; +print ""; print ""; -- 2.30.2