From: Tim Reddy Tim Date: Tue, 25 Nov 2008 19:53:23 +0000 (+0000) Subject: Re-factored the repository to automatically calculate QC on every flowcell. I also... X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=f6d0f74c3a14b5591a58a951024338db748e810c Re-factored the repository to automatically calculate QC on every flowcell. I also split the library code form the flowcell_qc code into two different makefiles to make things cleaner. --- diff --git a/htswanalysis/scripts/Flowcell_QC_Makefile b/htswanalysis/scripts/Flowcell_QC_Makefile index 8f91e43..1c94fd0 100644 --- a/htswanalysis/scripts/Flowcell_QC_Makefile +++ b/htswanalysis/scripts/Flowcell_QC_Makefile @@ -5,11 +5,15 @@ FLOWCELL=$(shell pwd | awk -F/ '{print $$NF}') FILES=$(shell ls -1d *.align*.txt) QPCR_FILES=$(shell ls -1d *.align*.txt | sed -e s/txt/txt.qPCR/) COUNT_FILES=$(shell ls -1d *.align*.txt | sed -e s/txt/txt.count/) +CMPLX_FILES=$(shell ls -1d *.align*.txt | sed -e s/txt/txt.complexity/) PROFILE_FILES=$(shell ls -1d *.align*.txt | sed -e s/txt/txt.profile/) PROFILE_IMAGES=$(shell ls -1d *.align*.txt | sed -e s/txt/txt.profile.png/) PERCENT_BASE_IMAGES=$(shell ls -1d *.pf.txt.gz | sed -e s/pf.txt.gz/percent_base.png/) -all: $(QPCR_FILES) $(PROFILE_FILES) $(PROFILE_IMAGES) $(PERCENT_BASE_IMAGES) $(COUNT_FILES) $(FILES) $(FLOWCELL)_qPCR_summary.txt $(FLOWCELL)_qPCR_summary.html $(FLOWCELL)_LibraryInfo.xml $(FLOWCELL)_SequencingSummary.html $(FLOWCELL)_QC_Summary.html +all: $(QPCR_FILES) $(PROFILE_FILES) $(CMPLX_FILES) $(PROFILE_IMAGES) $(PERCENT_BASE_IMAGES) $(COUNT_FILES) $(FILES) $(FLOWCELL)_qPCR_summary.txt $(FLOWCELL)_qPCR_summary.html $(FLOWCELL)_LibraryInfo.xml $(FLOWCELL)_SequencingSummary.html $(FLOWCELL)_QC_Summary.html + +%.txt.complexity: %.txt + $(ROOT_DIR)/bin/complexity_count `basename $<` $< > $@ %.txt.count: %.txt grep -v contam $< | awk '{if(NF > 3) {print $$1} }' | wc -l > $@; @@ -44,5 +48,5 @@ $(FLOWCELL)_LibraryInfo.xml: $(COUNT_FILES) $(FLOWCELL)_SequencingSummary.html: $(FLOWCELL)_LibraryInfo.xml $(EXPTRACK_DIR)/scripts/SummarizeLibrary.pm $< > $@ -$(FLOWCELL)_QC_Summary.html: $(FLOWCELL)_SequencingSummary.html $(FLOWCELL)_qPCR_summary.html $(PROFILE_FILES) +$(FLOWCELL)_QC_Summary.html: $(FLOWCELL)_SequencingSummary.html $(FLOWCELL)_qPCR_summary.html $(PROFILE_IMAGES) $(PERCENT_BASE_IMAGES) $(EXPTRACK_DIR)/scripts/WriteQCSummary.pm $(FLOWCELL)_LibraryInfo.xml $(FLOWCELL)_qPCR_summary.txt > $@; diff --git a/htswanalysis/scripts/LibrariesMakefile b/htswanalysis/scripts/LibrariesMakefile index 0305e8f..71abc37 100644 --- a/htswanalysis/scripts/LibrariesMakefile +++ b/htswanalysis/scripts/LibrariesMakefile @@ -1,11 +1,5 @@ # -# Makefile to bring new flowcells ointo the repository. A few bits of computation are done upfront: -# -# qPCR -- checks in-silico to see if the lane looks like a factor we know about -# count -- counts the number of (not adapter) aligned reads in the lane -# complexity -- looks for isolated peaks that are a symptom of low-complexity libraries (or sequencing of a restriction enzyme) -# profile -- profile of read density relative to the TSS -# libfiles -- the configure files for the libraries +# Makefile to bring new flowcells into the repository, calculating QC for each of them. # # Once the makefile has run once (please, for your sanity, use make -j N (where N is the number of CPU cores you have), # then you need to run it again to actually make the libraries proper. This will be (needs to be fixed). @@ -24,43 +18,25 @@ HTML_DIR=/Library/WebServer/Documents/SequencingSummaries # Error messages are collected so as not to bug the user. (if there are no matching files, ls errors.) FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err) -QPCR_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err | sed -e s/txt/txt.qPCR/) -COUNT_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err | sed -e s/txt/txt.count/) -CMPLX_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err | sed -e s/txt/txt.complexity/) -PROFILE_FILES=$(shell ls -1d $(DATA_DIR)/Flowcells/**/*.align*.txt 2>> LibrariesMakefile.err | sed -e s/txt/txt.profile/) +QC_FILES=$(shell ls -1d ~Data/Flowcells/**/ | awk -F/ '{print $$0"/"$$(NF-1)"_QC_Summary.html"}' ) LIBFILES=$(shell ls -1d $(DATA_DIR)/Libraries/.*.config 2>> LibrariesMakefile.err | sed -e s/config/txt/ -e "s/\/\./\//") -all: $(QPCR_FILES) $(COUNT_FILES) $(FILES) $(DATA_DIR)/qPCR_summary.txt $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute - -%.txt.complexity: %.txt - $(ROOT_DIR)/bin/complexity_count `basename $<` $< > $@ - -%.txt.count: %.txt - grep -v contam $< | awk '{if(NF > 3) {print $$1} }' | wc -l > $@; +all: $(QC_FILES) $(FILES) $(DATA_DIR)/LibraryInfo.xml $(LIBFILES) $(DATA_DIR)/SequencingSummary.html Distribute -%.txt.qPCR: %.txt - $(ROOT_DIR)/bin/qPCR $(subst .txt.qPCR,.txt,$@) $(ROOT_DIR)/reference_data/GenericBackground $(ROOT_DIR)/reference_data/qPCR_Tests/ | sort -k 2 -g -r > $@ - -%.txt.profile: %.txt - $(ROOT_DIR)/profile_reads/profile_reads_against_features `echo $@ | sed -e s/\.profile//` $(ROOT_DIR)/reference_data/`basename $@ | awk -F\. '{ print $$3 }'`_tx_start_sites > $@ - $(ROOT_DIR)/profile_reads/profile_to_svg.pm $@ > $@.svg +$(QC_FILES): + cd $(DATA_DIR)/Flowcells/`basename $@ | awk -F_ '{print $$1}'` && $(MAKE) -f $(ROOT_DIR)/scripts/Flowcell_QC_Makefile $(DATA_DIR)/Libraries/%.txt: $(DATA_DIR)/Libraries/.%.config | LibraryInfo.xml cat `cat $<` > $@; -$(DATA_DIR)/qPCR_summary.txt: $(QPCR_FILES) - rm -f $@; - for f in $^; do echo `echo $$f` `cat $$f | head -n 1` >> $@; done; - cat $@ | sort -k 2,1 -g -r > t && mv t $@; - -$(DATA_DIR)/LibraryInfo.xml: $(COUNT_FILES) $(CMPLX_FILES) +$(DATA_DIR)/LibraryInfo.xml: $(QC_FILES) $(ROOT_DIR)/scripts/CollectLibraries.pm `ls $(DATA_DIR)/Flowcells/**/*.align*.txt` > $@; $(ROOT_DIR)/scripts/RecompileLibraries.pm $@ $(DATA_DIR) $(DATA_DIR)/SequencingSummary.html: $(DATA_DIR)/LibraryInfo.xml $(ROOT_DIR)/scripts/SummarizeLibrary.pm $< > $@; -Distribute: SequencingSummary.html qPCR_summary.txt +Distribute: SequencingSummary.html cp $^ $(HTML_DIR); Libraries: $(LIBFILES)