From c95c72775743c6cdcda457e4ccc3328b204d2fbf Mon Sep 17 00:00:00 2001 From: Brandon King Date: Tue, 16 Jun 2009 22:57:33 +0000 Subject: [PATCH] Library view now shows when a library has archived data. --- trunk/TODO.txt | 49 + trunk/docs/Conv_CaltechDB_Nov112008.txt | 76 + trunk/docs/Conv_StanfordDB_2009Jan20.txt | 21 + trunk/docs/conv_caltech_v0.1_made_for.py | 108 + trunk/docs/conv_caltech_v0.1_to_htsw.py | 188 + trunk/docs/gaworkflow.xmi | 452 + trunk/docs/htsworkflow.ini.example | 22 + trunk/htsworkflow/__init__.py | 0 trunk/htsworkflow/automation/__init__.py | 0 trunk/htsworkflow/automation/copier.py | 288 + trunk/htsworkflow/automation/runner.py | 224 + trunk/htsworkflow/automation/spoolwatcher.py | 319 + .../automation/test/test_runner.py | 46 + trunk/htsworkflow/frontend/__init__.py | 0 .../htsworkflow/frontend/analysis/__init__.py | 0 trunk/htsworkflow/frontend/analysis/admin.py | 31 + trunk/htsworkflow/frontend/analysis/main.py | 118 + trunk/htsworkflow/frontend/analysis/models.py | 101 + trunk/htsworkflow/frontend/analysis/urls.py | 6 + .../frontend/eland_config/__init__.py | 0 .../frontend/eland_config/admin.py | 4 + .../frontend/eland_config/forms.py | 163 + .../frontend/eland_config/models.py | 3 + .../htsworkflow/frontend/eland_config/urls.py | 10 + .../frontend/eland_config/views.py | 415 + .../frontend/experiments/__init__.py | 0 .../htsworkflow/frontend/experiments/admin.py | 80 + .../frontend/experiments/experiments.py | 199 + .../frontend/experiments/models.py | 161 + .../htsworkflow/frontend/experiments/urls.py | 12 + .../htsworkflow/frontend/experiments/views.py | 34 + .../frontend/inventory/__init__.py | 0 trunk/htsworkflow/frontend/inventory/admin.py | 35 + .../htsworkflow/frontend/inventory/models.py | 126 + trunk/htsworkflow/frontend/inventory/urls.py | 5 + trunk/htsworkflow/frontend/inventory/views.py | 117 + trunk/htsworkflow/frontend/manage.py | 11 + .../frontend/reports/LibraryInfo.xml | 1214 +++ .../htsworkflow/frontend/reports/__init__.py | 0 trunk/htsworkflow/frontend/reports/admin.py | 10 + .../frontend/reports/libinfopar.py | 103 + trunk/htsworkflow/frontend/reports/models.py | 238 + trunk/htsworkflow/frontend/reports/reports.py | 308 + trunk/htsworkflow/frontend/reports/urls.py | 9 + trunk/htsworkflow/frontend/reports/utils.py | 61 + .../htsworkflow/frontend/samples/__init__.py | 0 trunk/htsworkflow/frontend/samples/admin.py | 148 + .../frontend/samples/changelist.py | 239 + trunk/htsworkflow/frontend/samples/models.py | 249 + trunk/htsworkflow/frontend/samples/results.py | 134 + trunk/htsworkflow/frontend/samples/tests.py | 102 + trunk/htsworkflow/frontend/samples/views.py | 393 + trunk/htsworkflow/frontend/settings.py | 175 + .../htsworkflow/frontend/static/css/base.css | 14 + .../frontend/static/css/changelists.css | 50 + .../frontend/static/css/click-table.css | 19 + .../frontend/static/css/data-browse-index.css | 2 + .../htsworkflow/frontend/static/css/forms.css | 84 + .../frontend/static/css/global.css | 142 + .../frontend/static/css/layout.css | 29 + .../htsworkflow/frontend/static/css/null.css | 1 + .../frontend/static/css/patch-iewin.css | 8 + .../frontend/static/img/changelist-bg.gif | Bin 0 -> 58 bytes .../frontend/static/img/default-bg.gif | Bin 0 -> 844 bytes .../frontend/static/img/hdd_unmount.png | Bin 0 -> 916 bytes .../frontend/static/img/icon_searchbox.png | Bin 0 -> 667 bytes .../frontend/static/img/nav-bg-reverse.gif | Bin 0 -> 186 bytes .../frontend/static/img/nav-bg.gif | Bin 0 -> 273 bytes .../frontend/static/img/readme.txt | 11 + .../frontend/templates/admin/base_site.html | 16 + .../frontend/templates/admin/index.html | 135 + .../htsworkflow/frontend/templates/base.html | 56 + .../frontend/templates/base_site.html | 10 + .../templates/experiments/detail.html | 7 + .../templates/experiments/flowcellSheet.html | 117 + .../frontend/templates/experiments/index.html | 9 + .../frontend/templates/reports/report.html | 11 + .../templates/samples/library_detail.html | 140 + .../templates/samples/library_index.html | 59 + .../frontend/templates/search_form.html | 18 + trunk/htsworkflow/frontend/urls.py | 50 + trunk/htsworkflow/pipelines/__init__.py | 6 + trunk/htsworkflow/pipelines/bustard.py | 331 + trunk/htsworkflow/pipelines/configure_run.py | 608 ++ trunk/htsworkflow/pipelines/eland.py | 605 ++ trunk/htsworkflow/pipelines/firecrest.py | 144 + trunk/htsworkflow/pipelines/genome_mapper.py | 141 + trunk/htsworkflow/pipelines/gerald.py | 208 + trunk/htsworkflow/pipelines/ipar.py | 239 + trunk/htsworkflow/pipelines/recipe_parser.py | 48 + .../htsworkflow/pipelines/retrieve_config.py | 186 + trunk/htsworkflow/pipelines/run_status.py | 454 + trunk/htsworkflow/pipelines/runfolder.py | 499 + trunk/htsworkflow/pipelines/summary.py | 302 + trunk/htsworkflow/pipelines/test/__init__.py | 0 .../pipelines/test/simulate_runfolder.py | 186 + .../pipelines/test/test_genome_mapper.py | 33 + .../pipelines/test/test_runfolder026.py | 474 + .../pipelines/test/test_runfolder030.py | 898 ++ .../pipelines/test/test_runfolder110.py | 302 + .../pipelines/test/test_runfolder_ipar100.py | 301 + .../pipelines/test/test_runfolder_ipar130.py | 343 + .../pipelines/test/test_runfolder_pair.py | 327 + .../pipelines/test/testdata/IPAR1.01.params | 63 + .../test/testdata/Summary-ipar130.htm | 9325 +++++++++++++++++ .../testdata/Summary-paired-pipeline110.htm | 662 ++ .../test/testdata/Summary-pipeline100.htm | 598 ++ .../test/testdata/Summary-pipeline110.htm | 400 + .../test/testdata/bustard-config132.xml | 129 + .../test/testdata/gerald_config_0.2.6.xml | 82 + .../test/testdata/gerald_config_1.0.xml | 156 + trunk/htsworkflow/util/__init__.py | 0 trunk/htsworkflow/util/alphanum.py | 69 + trunk/htsworkflow/util/ethelp.py | 32 + trunk/htsworkflow/util/fctracker.py | 201 + trunk/htsworkflow/util/hdquery.py | 25 + trunk/htsworkflow/util/makebed.py | 170 + trunk/htsworkflow/util/mount.py | 64 + trunk/htsworkflow/util/opener.py | 57 + trunk/htsworkflow/util/queuecommands.py | 99 + trunk/htsworkflow/util/test/test_alphanum.py | 39 + trunk/htsworkflow/util/test/test_ethelp.py | 35 + trunk/htsworkflow/util/test/test_makebed.py | 55 + .../util/test/test_queuecommands.py | 58 + trunk/scripts/configure_pipeline | 88 + trunk/scripts/copier | 6 + trunk/scripts/elandseq | 51 + trunk/scripts/gerald2bed.py | 96 + trunk/scripts/library.py | 39 + trunk/scripts/make-library-tree | 241 + trunk/scripts/makebed | 113 + trunk/scripts/mark_archived_data | 102 + trunk/scripts/rerun_eland.py | 158 + trunk/scripts/retrieve_config | 52 + trunk/scripts/runfolder | 138 + trunk/scripts/runner | 6 + trunk/scripts/spoolwatcher | 6 + trunk/scripts/srf | 183 + trunk/setup.py | 34 + trunk/templates/config_form.html | 17 + trunk/test/test_copier.py | 69 + trunk/test/tree.py | 81 + 142 files changed, 28229 insertions(+) create mode 100644 trunk/TODO.txt create mode 100644 trunk/docs/Conv_CaltechDB_Nov112008.txt create mode 100644 trunk/docs/Conv_StanfordDB_2009Jan20.txt create mode 100644 trunk/docs/conv_caltech_v0.1_made_for.py create mode 100644 trunk/docs/conv_caltech_v0.1_to_htsw.py create mode 100644 trunk/docs/gaworkflow.xmi create mode 100644 trunk/docs/htsworkflow.ini.example create mode 100644 trunk/htsworkflow/__init__.py create mode 100644 trunk/htsworkflow/automation/__init__.py create mode 100644 trunk/htsworkflow/automation/copier.py create mode 100644 trunk/htsworkflow/automation/runner.py create mode 100644 trunk/htsworkflow/automation/spoolwatcher.py create mode 100644 trunk/htsworkflow/automation/test/test_runner.py create mode 100644 trunk/htsworkflow/frontend/__init__.py create mode 100644 trunk/htsworkflow/frontend/analysis/__init__.py create mode 100644 trunk/htsworkflow/frontend/analysis/admin.py create mode 100644 trunk/htsworkflow/frontend/analysis/main.py create mode 100644 trunk/htsworkflow/frontend/analysis/models.py create mode 100644 trunk/htsworkflow/frontend/analysis/urls.py create mode 100644 trunk/htsworkflow/frontend/eland_config/__init__.py create mode 100644 trunk/htsworkflow/frontend/eland_config/admin.py create mode 100644 trunk/htsworkflow/frontend/eland_config/forms.py create mode 100644 trunk/htsworkflow/frontend/eland_config/models.py create mode 100644 trunk/htsworkflow/frontend/eland_config/urls.py create mode 100644 trunk/htsworkflow/frontend/eland_config/views.py create mode 100755 trunk/htsworkflow/frontend/experiments/__init__.py create mode 100644 trunk/htsworkflow/frontend/experiments/admin.py create mode 100755 trunk/htsworkflow/frontend/experiments/experiments.py create mode 100755 trunk/htsworkflow/frontend/experiments/models.py create mode 100755 trunk/htsworkflow/frontend/experiments/urls.py create mode 100755 trunk/htsworkflow/frontend/experiments/views.py create mode 100644 trunk/htsworkflow/frontend/inventory/__init__.py create mode 100644 trunk/htsworkflow/frontend/inventory/admin.py create mode 100644 trunk/htsworkflow/frontend/inventory/models.py create mode 100644 trunk/htsworkflow/frontend/inventory/urls.py create mode 100644 trunk/htsworkflow/frontend/inventory/views.py create mode 100644 trunk/htsworkflow/frontend/manage.py create mode 100644 trunk/htsworkflow/frontend/reports/LibraryInfo.xml create mode 100644 trunk/htsworkflow/frontend/reports/__init__.py create mode 100644 trunk/htsworkflow/frontend/reports/admin.py create mode 100644 trunk/htsworkflow/frontend/reports/libinfopar.py create mode 100644 trunk/htsworkflow/frontend/reports/models.py create mode 100755 trunk/htsworkflow/frontend/reports/reports.py create mode 100644 trunk/htsworkflow/frontend/reports/urls.py create mode 100644 trunk/htsworkflow/frontend/reports/utils.py create mode 100644 trunk/htsworkflow/frontend/samples/__init__.py create mode 100644 trunk/htsworkflow/frontend/samples/admin.py create mode 100644 trunk/htsworkflow/frontend/samples/changelist.py create mode 100644 trunk/htsworkflow/frontend/samples/models.py create mode 100644 trunk/htsworkflow/frontend/samples/results.py create mode 100644 trunk/htsworkflow/frontend/samples/tests.py create mode 100644 trunk/htsworkflow/frontend/samples/views.py create mode 100644 trunk/htsworkflow/frontend/settings.py create mode 100644 trunk/htsworkflow/frontend/static/css/base.css create mode 100644 trunk/htsworkflow/frontend/static/css/changelists.css create mode 100644 trunk/htsworkflow/frontend/static/css/click-table.css create mode 100644 trunk/htsworkflow/frontend/static/css/data-browse-index.css create mode 100644 trunk/htsworkflow/frontend/static/css/forms.css create mode 100644 trunk/htsworkflow/frontend/static/css/global.css create mode 100644 trunk/htsworkflow/frontend/static/css/layout.css create mode 100644 trunk/htsworkflow/frontend/static/css/null.css create mode 100644 trunk/htsworkflow/frontend/static/css/patch-iewin.css create mode 100644 trunk/htsworkflow/frontend/static/img/changelist-bg.gif create mode 100644 trunk/htsworkflow/frontend/static/img/default-bg.gif create mode 100755 trunk/htsworkflow/frontend/static/img/hdd_unmount.png create mode 100644 trunk/htsworkflow/frontend/static/img/icon_searchbox.png create mode 100644 trunk/htsworkflow/frontend/static/img/nav-bg-reverse.gif create mode 100644 trunk/htsworkflow/frontend/static/img/nav-bg.gif create mode 100644 trunk/htsworkflow/frontend/static/img/readme.txt create mode 100644 trunk/htsworkflow/frontend/templates/admin/base_site.html create mode 100644 trunk/htsworkflow/frontend/templates/admin/index.html create mode 100644 trunk/htsworkflow/frontend/templates/base.html create mode 100644 trunk/htsworkflow/frontend/templates/base_site.html create mode 100644 trunk/htsworkflow/frontend/templates/experiments/detail.html create mode 100644 trunk/htsworkflow/frontend/templates/experiments/flowcellSheet.html create mode 100644 trunk/htsworkflow/frontend/templates/experiments/index.html create mode 100644 trunk/htsworkflow/frontend/templates/reports/report.html create mode 100644 trunk/htsworkflow/frontend/templates/samples/library_detail.html create mode 100644 trunk/htsworkflow/frontend/templates/samples/library_index.html create mode 100644 trunk/htsworkflow/frontend/templates/search_form.html create mode 100644 trunk/htsworkflow/frontend/urls.py create mode 100644 trunk/htsworkflow/pipelines/__init__.py create mode 100644 trunk/htsworkflow/pipelines/bustard.py create mode 100644 trunk/htsworkflow/pipelines/configure_run.py create mode 100644 trunk/htsworkflow/pipelines/eland.py create mode 100644 trunk/htsworkflow/pipelines/firecrest.py create mode 100644 trunk/htsworkflow/pipelines/genome_mapper.py create mode 100644 trunk/htsworkflow/pipelines/gerald.py create mode 100644 trunk/htsworkflow/pipelines/ipar.py create mode 100644 trunk/htsworkflow/pipelines/recipe_parser.py create mode 100644 trunk/htsworkflow/pipelines/retrieve_config.py create mode 100644 trunk/htsworkflow/pipelines/run_status.py create mode 100644 trunk/htsworkflow/pipelines/runfolder.py create mode 100644 trunk/htsworkflow/pipelines/summary.py create mode 100644 trunk/htsworkflow/pipelines/test/__init__.py create mode 100644 trunk/htsworkflow/pipelines/test/simulate_runfolder.py create mode 100644 trunk/htsworkflow/pipelines/test/test_genome_mapper.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder026.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder030.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder110.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder_ipar100.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder_ipar130.py create mode 100644 trunk/htsworkflow/pipelines/test/test_runfolder_pair.py create mode 100644 trunk/htsworkflow/pipelines/test/testdata/IPAR1.01.params create mode 100644 trunk/htsworkflow/pipelines/test/testdata/Summary-ipar130.htm create mode 100644 trunk/htsworkflow/pipelines/test/testdata/Summary-paired-pipeline110.htm create mode 100644 trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline100.htm create mode 100644 trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline110.htm create mode 100644 trunk/htsworkflow/pipelines/test/testdata/bustard-config132.xml create mode 100644 trunk/htsworkflow/pipelines/test/testdata/gerald_config_0.2.6.xml create mode 100644 trunk/htsworkflow/pipelines/test/testdata/gerald_config_1.0.xml create mode 100644 trunk/htsworkflow/util/__init__.py create mode 100644 trunk/htsworkflow/util/alphanum.py create mode 100644 trunk/htsworkflow/util/ethelp.py create mode 100644 trunk/htsworkflow/util/fctracker.py create mode 100644 trunk/htsworkflow/util/hdquery.py create mode 100755 trunk/htsworkflow/util/makebed.py create mode 100644 trunk/htsworkflow/util/mount.py create mode 100644 trunk/htsworkflow/util/opener.py create mode 100644 trunk/htsworkflow/util/queuecommands.py create mode 100644 trunk/htsworkflow/util/test/test_alphanum.py create mode 100644 trunk/htsworkflow/util/test/test_ethelp.py create mode 100644 trunk/htsworkflow/util/test/test_makebed.py create mode 100644 trunk/htsworkflow/util/test/test_queuecommands.py create mode 100644 trunk/scripts/configure_pipeline create mode 100644 trunk/scripts/copier create mode 100755 trunk/scripts/elandseq create mode 100644 trunk/scripts/gerald2bed.py create mode 100644 trunk/scripts/library.py create mode 100644 trunk/scripts/make-library-tree create mode 100755 trunk/scripts/makebed create mode 100755 trunk/scripts/mark_archived_data create mode 100644 trunk/scripts/rerun_eland.py create mode 100644 trunk/scripts/retrieve_config create mode 100644 trunk/scripts/runfolder create mode 100644 trunk/scripts/runner create mode 100644 trunk/scripts/spoolwatcher create mode 100644 trunk/scripts/srf create mode 100644 trunk/setup.py create mode 100644 trunk/templates/config_form.html create mode 100644 trunk/test/test_copier.py create mode 100644 trunk/test/tree.py diff --git a/trunk/TODO.txt b/trunk/TODO.txt new file mode 100644 index 0000000..27f2fdd --- /dev/null +++ b/trunk/TODO.txt @@ -0,0 +1,49 @@ +Improvements: + +* Auto restart spoolwatcher's watch + + * use dbus to detect newly mounted drive (hard to do) + * spoolwatcher hangs out on jabber and accepts a "make new drive" command + (which runs a script to format/mount/watch the drive) (security hole) + * a script is added to jumpgate which does the drive formatting/mounting/etc + and then when finishes sends an xml-rpc message to spoolwatcher to + start watching again. + +* Change umask group for the rsync to be writable for multiple users + really really needs to be 002 +* the directory also needs to be set group id +* need to make sure that there's a final rsync that finishes without coping any files +* demon processes need to restart + + * most likely solution, they should detect if there's a currently running + version and stop running. This'd allow a cron script to restart them + +* The machine to machine communication needs to be via xml-rpc + + * Update benderjab and/or xmppy to send/receive xml-rpc messages + +* Add logging to everything + + * high priority messages go out as jabber messages + * low priority go to disk + * some subset of recent messages should be stored in ram so they can + be retrieved by a user jabber status message (or a web page view) + +* Generate config files + + * For goat (from brandon's web interface) + * For bed file generator + +* Need longish term storage to make generating bed and mapped read files + from multiple lanes on multiple flow cells + +* View current status page. + + * once xml-rpc is working it should be easier to update the jumpgate + web page to report the current status of a run. + + * sequencing + * waiting on copy (final rsync) + * running goat + * running pipeline + * ... (more processing) diff --git a/trunk/docs/Conv_CaltechDB_Nov112008.txt b/trunk/docs/Conv_CaltechDB_Nov112008.txt new file mode 100644 index 0000000..9d6997a --- /dev/null +++ b/trunk/docs/Conv_CaltechDB_Nov112008.txt @@ -0,0 +1,76 @@ +Conversion SQLs for Caltech DB (schema of 2008Aug08 @ 5:06 PM) +______________________________________________________________ + + +Step by step do the following: + +''' Note: Instead of '?' you can put one of your machine names. +sqlite> ALTER TABLE fctracker_flowcell ADD cluster_mac_id varchar(50) NOT NULL DEFAULT '?'; +sqlite> ALTER TABLE fctracker_flowcell ADD seq_mac_id varchar(50) NOT NULL DEFAULT '?'; +sqlite> ALTER TABLE fctracker_library RENAME TO PREV_fctracker_library; + +Now, do a syncdb. The output should look like this (assuming you have migrated to the new models.py): +sh-3.2# pym syncdb +Creating table fctracker_cellline +Creating table fctracker_library +Creating table fctracker_primer +Creating table fctracker_antibody +Creating table fctracker_condition +Creating table exp_track_datarun +Creating table exp_track_flowcell +Creating table analys_track_project +Creating table analys_track_task +Creating table htsw_reports_progressreport +Installing index for fctracker.Library model +Failed to install index for fctracker.Library model: index fctracker_library_library_species_id already existsInstalling index for fctracker.Primer model +Installing index for fctracker.Antibody model +Installing index for exp_track.DataRun model +Installing index for exp_track.FlowCell model +Installing index for analys_track.Task model +Installing index for htsw_reports.ProgressReport model +sh-3.2# + +''' Copy all records from "fctracker_flowcell" to "exp_track_flowcell" table. (Why? Because, Flowcell table moves now from the "fctracker" to the "exp_track" component). +sqlite> insert into experiments_flowcell select * from fctracker_flowcell; + +''' Now to fctracker_library, a bit more complex case + +'''Back to the sqlite prompt.. +sqlite> insert into samples_cellline (cellline_name,notes) values('Unknown','Unknown'); +sqlite> insert into samples_condition (condition_name,notes) values('Unknown','Unknown'); +''' Now we can put 1 in these fields for the Library insert. +''' Note: avg_lib_size field is missing in Caltech DB (although it's in the models.py Trac), so I put default value 225. + +''' Now the actual migration to the new fctracker_library table +''' (This version looses data, the current Nov 11, 2008 schema, has made_for as a one to many +''' relationship to the auth_user table, instead of being a text field. Here I just assigned +''' the made for to a (semi)-random user. +sqlite> INSERT INTO samples_library (library_id,library_name,library_species_id,experiment_type,cell_line_id,condition_id,replicate,made_by,creation_date,made_for_id,stopping_point,amplified_from_sample_id,undiluted_concentration,ten_nM_dilution,successful_pM,avg_lib_size,notes) select library_id,library_name,library_species_id,'unknown',1,1,1,made_by,creation_date,12,stopping_point,amplified_from_sample_id,undiluted_concentration,ten_nM_dilution,successful_pM,0,notes from PREV_fctracker_library; + +''' Set the right values for "experiment_type" +sqlite> update samples_library set experiment_type = "RNA-seq" where library_idin (select library_id from prev_fctracker_library where RNASeq = 1); +''' YOU CAN ADD SIMILAR SQL CMD TO SET THE VALUE FOR "avg_lib_size" FIELD (WHICH IS NOW SET TO 0) ... + +---------------------------------------------------------------------------------------- +THAT SHOULD BE IT --- NOW YOUR WEB SITE SHOULD SUCESSFULY LOAD THE NEW DB WITH YOUR DATA. + +2009 Jan 13 + +I had a working database and then merged in a few more changes from +stanford. I ended up needing to do the following: + +alter table analysis_task add task_params varchar(200) null; +alter table samples_cellline add nickname varchar(20) null; +alter table samples_condition add nickname varchar(20) null; + +Those changes might happen automatically when reconverting from our +original database, or they might not. + +CREATE TABLE "samples_library_tags" ( + "id" integer NOT NULL PRIMARY KEY, + "library_id" varchar(30) NOT NULL REFERENCES "samples_library" ("library_id"), + "tag_id" integer NOT NULL REFERENCES "samples_tag" ("id"), + UNIQUE ("library_id", "tag_id") +) +; + diff --git a/trunk/docs/Conv_StanfordDB_2009Jan20.txt b/trunk/docs/Conv_StanfordDB_2009Jan20.txt new file mode 100644 index 0000000..aed4bfa --- /dev/null +++ b/trunk/docs/Conv_StanfordDB_2009Jan20.txt @@ -0,0 +1,21 @@ +# mostly I just renamed tables +# + +alter table analys_track_projects rename to analysis_projects; +alter table analys_track_project rename to analysis_project; +alter table analys_track_project_tasks rename to analysis_project_tasks; +alter table analys_track_task rename to analysis_task; +alter table exp_track_datarun rename to experiments_datarun; +alter table exp_track_flowcell rename to experiments_flowcell; +alter table fctracker_affiliation rename to samples_affiliation; +alter table fctracker_antibody rename to samples_antibody; +alter table fctracker_cellline rename to samples_cellline; +alter table fctracker_condition rename to samples_condition; +alter table fctracker_flowcell rename to samples_flowcell; +alter table fctracker_library rename to samples_library; +alter table fctracker_library_affiliations rename to samples_library_affiliations; +alter table fctracker_library_tags rename to samples_library_tags; +alter table fctracker_primary rename to samples_primer; +alter table fctracker_species rename to samples_species; +alter table fctracker_tag rename to samples_tag; +alter table htsw_reports_progressreport rename to reports_progressreport; diff --git a/trunk/docs/conv_caltech_v0.1_made_for.py b/trunk/docs/conv_caltech_v0.1_made_for.py new file mode 100644 index 0000000..206de00 --- /dev/null +++ b/trunk/docs/conv_caltech_v0.1_made_for.py @@ -0,0 +1,108 @@ +""" +Read the made-for field and split them up into different affiliations +while fixing the different spellings for some of our users +""" +import os + +script_dir = os.path.split(__file__)[0] +settings_path = os.path.join(script_dir, 'htsworkflow','frontend') +os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.frontend.settings' + +from htsworkflow.frontend.samples import models as samples + +def main(): + # names ( {'target name': ('current name', 'current name') ) + names = [ + 'Unknown', + 'Adam Rosenthal', + 'Adler Dillman', + 'Ali', + 'Ali/EHD', + 'Ali/PWS', + 'Andrew Medina-Marino', + 'Brian Williams', + 'Davidson', + 'Elowitz', + 'Erich Schwarz', + 'Georgi Warinov', + 'Gilberto Desalvo', + 'Gilberto Hernandez', + 'Gordon Kwan', + 'Hudson-Alpha', + 'James Puckett', + 'Jingli Zhang', + 'Ellen Rothenberg', + 'Jose Luis', + 'Katherine Fisher', + 'Meyerowitz', + 'Ryan', + 'Demo', + 'Angela Stathopoulos', + 'Steven Kuntz', + 'Tony', + 'Tristan', + 'Yuling Jiao', + u'Anil Ozdemir', + ] + + name_map = { + '': ('Unknown',) , + 'Adam Rosenthal': ('Adam Rosenthal',), + 'Adler Dillman': ('Adler Dillman',), + 'Ali': ('Ali',), + 'Ali/EHD': ('Ali/EHD',), + 'Ali/PWS': ('Ali/PWS',), + 'Andrew Medina-Marina': ('Andrew Medina-Marino',), + 'Andrew Medina-Marino': ('Andrew Medina-Marino',), + 'Brian': ('Brian Williams',), + 'Brian Williams': ('Brian Williams',), + 'Davidson': ('Davidson',), + 'Elowitz': ('Elowitz',), + 'Erich Schwarz': ('Erich Schwarz',), + 'Erich Schwartz': ('Erich Schwarz',), + 'Georgi Warinov': ('Georgi Warinov',), + 'Gilberto Desalvo': ('Gilberto Desalvo',), + 'Gilberto Hernandez': ('Gilberto Hernandez',), + 'Gordon Kwan': ('Gordon Kwan',), + 'Gordon': ('Gordon Kwan',), + 'Alpha-Hudson': ('Hudson-Alpha',), + 'Hudson-Alpha': ('Hudson-Alpha',), + 'James Puckett': ('James Puckett',), + 'Jingli Zhang, Rothenberg': ('Jingli Zhang', 'Ellen Rothenberg',), + 'Jingli Zhang': ('Jingli Zhang',), + 'Jose Luis': ('Jose Luis',), + 'Katherine Fisher': ('Katherine Fisher',), + 'Katherine, Gigio': ('Katherine Fisher', 'Gilberto Desalvo',), + 'Meyerowitz': ('Meyerowitz',), + 'Ryan, Demo': ('Ryan', 'Demo',), + 'Stathopoulos': ('Angela Stathopoulos',), + 'Steve Kuntz': ('Steven Kuntz',), + 'Steven Kuntz': ('Steven Kuntz',), + 'Tony': ('Tony',), + 'Tristan': ('Tristan',), + 'Yuling Jiao': ('Yuling Jiao',), + u'Anil Ozdemir': (u'Anil Ozdemir',), + } + + affiliations = {} + for name in names: + aff = samples.Affiliation(name=name) + affiliations[name] = aff + aff.save() + + for lib in samples.Library.objects.all(): + made_list = name_map[lib.made_for] + assert type(made_list) == type((None,)) + affiliation_list = [] + for n in made_list: + lib.affiliations.add(affiliations[n]) + lib.save() + +if __name__ == "__main__": + print "don't run this unless you know what its for" + print "it converts the caltech 'made_for' field into a set of" + print "affiliations." + print "" + print "The user lists are hard coded and exist mostly for my" + print "convienence." + main() diff --git a/trunk/docs/conv_caltech_v0.1_to_htsw.py b/trunk/docs/conv_caltech_v0.1_to_htsw.py new file mode 100644 index 0000000..e69d9fa --- /dev/null +++ b/trunk/docs/conv_caltech_v0.1_to_htsw.py @@ -0,0 +1,188 @@ +import shutil +import sqlite3 +import sys + +def main(cmdline=None): + if len(cmdline) == 1: + dest='/tmp/fctracker.db' + else: + dest = cmdline[1] + shutil.copy(cmdline[0], dest) + conn = sqlite3.connect(dest) + c = conn.cursor() + c.execute('drop table fctracker_elandresult'); + c.execute('''CREATE TABLE "experiments_clusterstation" ( + "id" integer NOT NULL PRIMARY KEY, + "name" varchar(50) NOT NULL UNIQUE);''') + c.execute('''INSERT INTO experiments_clusterstation (name) values ("station");''') + c.execute('''CREATE TABLE "experiments_sequencer" ( + "id" integer NOT NULL PRIMARY KEY, + "name" varchar(50) NOT NULL UNIQUE);''') + c.execute('''INSERT INTO experiments_sequencer (name) values ("HWI-EAS229");''') + + c.execute('''CREATE TABLE "experiments_flowcell" ( + "id" integer NOT NULL PRIMARY KEY, + "flowcell_id" varchar(20) NOT NULL UNIQUE, + "run_date" datetime NOT NULL, + "advanced_run" bool NOT NULL, + "paired_end" bool NOT NULL, + "read_length" integer NOT NULL, + "lane_1_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_2_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_3_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_4_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_5_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_6_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_7_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_8_library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "lane_1_pM" decimal NOT NULL, + "lane_2_pM" decimal NOT NULL, + "lane_3_pM" decimal NOT NULL, + "lane_4_pM" decimal NOT NULL, + "lane_5_pM" decimal NOT NULL, + "lane_6_pM" decimal NOT NULL, + "lane_7_pM" decimal NOT NULL, + "lane_8_pM" decimal NOT NULL, + "lane_1_cluster_estimate" integer NULL, + "lane_2_cluster_estimate" integer NULL, + "lane_3_cluster_estimate" integer NULL, + "lane_4_cluster_estimate" integer NULL, + "lane_5_cluster_estimate" integer NULL, + "lane_6_cluster_estimate" integer NULL, + "lane_7_cluster_estimate" integer NULL, + "lane_8_cluster_estimate" integer NULL, + "cluster_station_id" integer NOT NULL REFERENCES "experiments_clusterstation" ("id"), + "sequencer_id" integer NOT NULL REFERENCES "experiments_sequencer" ("id"), + "notes" text NOT NULL +);''') + c.execute('''insert into experiments_flowcell + (id, flowcell_id, run_date, advanced_run, paired_end, read_length, + lane_1_library_id, lane_2_library_id, lane_3_library_id, + lane_4_library_id, lane_5_library_id, lane_6_library_id, + lane_7_library_id, lane_8_library_id, lane_1_pm, + lane_2_pM, lane_3_pM, lane_4_pM, lane_5_pM, lane_6_pM, + lane_7_pM, lane_8_pM, lane_1_cluster_estimate, + lane_2_cluster_estimate, lane_3_cluster_estimate, + lane_4_cluster_estimate, lane_5_cluster_estimate, + lane_6_cluster_estimate, lane_7_cluster_estimate, + lane_8_cluster_estimate, cluster_station_id, sequencer_id, + notes) + select + id, flowcell_id, run_date, advanced_run, paired_end, read_length, + lane_1_library_id, lane_2_library_id, lane_3_library_id, + lane_4_library_id, lane_5_library_id, lane_6_library_id, + lane_7_library_id, lane_8_library_id, lane_1_pm, + lane_2_pM, lane_3_pM, lane_4_pM, lane_5_pM, lane_6_pM, + lane_7_pM, lane_8_pM, lane_1_cluster_estimate, + lane_2_cluster_estimate, lane_3_cluster_estimate, + lane_4_cluster_estimate, lane_5_cluster_estimate, + lane_6_cluster_estimate, lane_7_cluster_estimate, + lane_8_cluster_estimate, 1, 1, + notes from fctracker_flowcell;''') + c.execute('''drop table fctracker_flowcell;''') + + # create samples.cellline + c.execute('''CREATE TABLE "samples_cellline" ( + "id" integer NOT NULL PRIMARY KEY, + "cellline_name" varchar(100) NOT NULL UNIQUE, + "nickname" varchar(20) NULL, + "notes" text NOT NULL);''') + c.execute('''insert into samples_cellline (cellline_name,notes) values("Unknown","Unknown");''') + + # Create samples.condition + c.execute('''CREATE TABLE "samples_condition" ( + "id" integer NOT NULL PRIMARY KEY, + "condition_name" varchar(2000) NOT NULL UNIQUE, + "nickname" varchar(20) NULL, + "notes" text NOT NULL);''') + c.execute('''insert into samples_condition (condition_name,notes) values("Unknown","Unknown");''') + + # create samples.experiment type + c.execute('''CREATE TABLE "samples_experimenttype" ( + "id" integer NOT NULL PRIMARY KEY, + "name" varchar(50) NOT NULL UNIQUE);''') + for et in [ ('Unknown',), + ('ChIP-seq',), + ('Sheared',), + ('RNA-seq',), + ('Methyl-seq',), + ('DIP-seq',), + ('De Novo',)]: + c.execute('insert into samples_experimenttype (name) values (?)', et) + + # create samples.library + c.execute('''CREATE TABLE "samples_library" ( + "id" integer NOT NULL PRIMARY KEY, + "library_id" varchar(30) NOT NULL, + "library_name" varchar(100) NOT NULL UNIQUE, + "library_species_id" integer NOT NULL REFERENCES "samples_species" ("id"), + "hidden" bool NOT NULL, + "cell_line_id" integer NOT NULL REFERENCES "samples_cellline" ("id"), + "condition_id" integer NOT NULL REFERENCES "samples_condition" ("id"), + "antibody_id" integer NULL REFERENCES "samples_antibody" ("id"), + "replicate" smallint unsigned NOT NULL, + "experiment_type_id" NOT NULL REFERENCES "samples_experimenttype" ("id"), + "creation_date" date NULL, + "made_for" varchar(50) NOT NULL, + "made_by" varchar(50) NOT NULL, + "stopping_point" varchar(25) NOT NULL, + "amplified_from_sample_id" integer NULL, + "undiluted_concentration" decimal NULL, + "successful_pM" decimal NULL, + "ten_nM_dilution" bool NOT NULL, + "avg_lib_size" integer NULL, + "notes" text NOT NULL);''') + c.execute('''INSERT INTO samples_library + (id,library_id,library_name,library_species_id, hidden, experiment_type_id, + cell_line_id,condition_id,replicate,made_by,creation_date, + made_for,stopping_point,amplified_from_sample_id, + undiluted_concentration,ten_nM_dilution,successful_pM, + avg_lib_size,notes) +select library_id,library_id,library_name,library_species_id, 0, 1, + 1, 1, 1, made_by,creation_date, + made_for,stopping_point,amplified_from_sample_id, + undiluted_concentration,ten_nM_dilution,successful_pM, + 225,notes from fctracker_library;'''); + + # mark gel isolates as "hidden" + c.execute('''update samples_library set hidden=1 + where stopping_point = "1A" or stopping_point = "1Ab";'''); + + # get pk for RNA-seq experiment type + c.execute('select id from samples_experimenttype where name = "RNA-seq";') + rna_seq_id = list(c)[0] + # change everything marked as rnaseq to experiment_type rnaseq + c.execute('''update samples_library set experiment_type_id=? where library_id in (select library_id from fctracker_library where RNASeq = 1);''', rna_seq_id) + #c.execute('''drop table fctracker_library;''') + + # add affiliation linking table + c.execute('''CREATE TABLE "samples_library_affiliations" ( + "id" integer NOT NULL PRIMARY KEY, + "library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "affiliation_id" integer NOT NULL REFERENCES "samples_affiliation" ("id"), + UNIQUE ("library_id", "affiliation_id"));''') + + # add library to tags linking table + c.execute('''CREATE TABLE "samples_library_tags" ( + "id" integer NOT NULL PRIMARY KEY, + "library_id" integer NOT NULL REFERENCES "samples_library" ("id"), + "tag_id" integer NOT NULL REFERENCES "samples_tag" ("id"), + UNIQUE ("library_id", "tag_id"));''') + + + + # + c.execute('''CREATE TABLE "samples_species" ( + "id" integer NOT NULL PRIMARY KEY, + "scientific_name" varchar(256) NOT NULL, + "common_name" varchar(256) NOT NULL);''') + c.execute('''insert into samples_species + (id, scientific_name, common_name) + select + id, scientific_name, common_name + from fctracker_species;''') + c.execute('''drop table fctracker_species''') + conn.commit() + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/trunk/docs/gaworkflow.xmi b/trunk/docs/gaworkflow.xmi new file mode 100644 index 0000000..41c227a --- /dev/null +++ b/trunk/docs/gaworkflow.xmi @@ -0,0 +1,452 @@ + + + + + umbrello uml modeller http://uml.sf.net + 1.5.8 + UnicodeUTF8 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/trunk/docs/htsworkflow.ini.example b/trunk/docs/htsworkflow.ini.example new file mode 100644 index 0000000..e00b8ca --- /dev/null +++ b/trunk/docs/htsworkflow.ini.example @@ -0,0 +1,22 @@ +[frontend] +; database engine, currently only sqlite3 will work right +database_engine=sqlite3 +; location of the sqlite3 database +database_name=/htsworkflow/htswfrontend/dev_tracker.db + +; settings for what email server to use +email_host = localhost +email_port = 25 + +; default timezone for this server +time_zone = America/Los_Angels + +; set the default picomolarity when creating a new flowcell +default_pm = 5 + + +[allowed_hosts] +localhost=localhost + +[allowed_analysis_hosts] +localhost=localhost diff --git a/trunk/htsworkflow/__init__.py b/trunk/htsworkflow/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/automation/__init__.py b/trunk/htsworkflow/automation/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/automation/copier.py b/trunk/htsworkflow/automation/copier.py new file mode 100644 index 0000000..9ec1e2b --- /dev/null +++ b/trunk/htsworkflow/automation/copier.py @@ -0,0 +1,288 @@ +import ConfigParser +import copy +import logging +import logging.handlers +import os +import re +import shlex +import subprocess +import sys +import time +import traceback + +from benderjab import rpc + +def runfolder_validate(fname): + """ + Return True if fname looks like a runfolder name + """ + if re.match("^[0-9]{6}_[-A-Za-z0-9_]*$", fname): + return True + else: + return False + +class rsync(object): + def __init__(self, sources, dest, pwfile): + self.cmd = ['/usr/bin/rsync', ] + self.pwfile = os.path.expanduser(pwfile) + self.cmd.append('--password-file=%s' % (self.pwfile)) + self.source_base_list = [ self._normalize_rsync_source(x) for x in sources] + self.dest_base = dest + self.processes = {} + self.exit_code = None + + def list(self): + """ + Get a directory listing for all our sources + """ + logging.debug("searching for entries in: %s" % (self.source_base_list,)) + entries = [] + for source in self.source_base_list: + logging.debug("Scanning %s" % (source,)) + args = copy.copy(self.cmd) + args.append(source) + + logging.debug("Rsync cmd:" + " ".join(args)) + short_process = subprocess.Popen(args, stdout=subprocess.PIPE) + exit_code = short_process.wait() + stdout = short_process.stdout + # We made sure source ends in a / earlier + cur_list = [ source+subdir for subdir in self.list_filter(stdout)] + entries.extend(cur_list) + logging.debug(u"Found the following: %s" % (unicode(entries))) + return entries + + def list_filter(self, lines): + """ + parse rsync directory listing + """ + dirs_to_copy = [] + direntries = [ x[0:42].split() + [x[43:-1]] for x in lines ] + logging.debug(u'direntries: %s' % (unicode(direntries),)) + for permissions, size, filedate, filetime, filename in direntries: + if permissions[0] == 'd': + # hey its a directory, the first step to being something we want to + # copy + if re.match("[0-9]{6}", filename): + # it starts with something that looks like a 6 digit date + # aka good enough for me + dirs_to_copy.append(filename) + return dirs_to_copy + + def create_copy_process(self, urlname): + args = copy.copy(self.cmd) + # args.append('--dry-run') # Makes testing easier + # we want to copy everything + args.append('-rlt') + # from here + args.append(urlname) + # to here + args.append(self.dest_base) + logging.debug("Rsync cmd:" + " ".join(args)) + return subprocess.Popen(args) + + def copy(self): + """ + copy any interesting looking directories over + return list of items that we started copying. + """ + # clean up any lingering non-running processes + self.poll() + + # what's available to copy? + dirs_to_copy = self.list() + + # lets start copying + started = [] + for d in dirs_to_copy: + process = self.processes.get(d, None) + + if process is None: + # we don't have a process, so make one + logging.info("rsyncing %s" % (d)) + self.processes[d] = self.create_copy_process(d) + started.append(d) + return started + + def _normalize_rsync_source(self, source): + """ + Make sure that we have a reasonable looking source + a source must be a directory/collection. + """ + # we must be a directory + if source[-1] != '/': + source += '/' + # I suppose we could check to see if we start with rsync:// or something + return source + + def poll(self): + """ + check currently running processes to see if they're done + + return path roots that have finished. + """ + for dir_key, proc_value in self.processes.items(): + retcode = proc_value.poll() + if retcode is None: + # process hasn't finished yet + pass + elif retcode == 0: + logging.info("finished rsyncing %s, exitcode %d" %( dir_key, retcode)) + del self.processes[dir_key] + else: + logging.error("rsync failed for %s, exit code %d" % (dir_key, retcode)) + + def __len__(self): + """ + Return how many active rsync processes we currently have + + Call poll first to close finished processes. + """ + return len(self.processes) + + def keys(self): + """ + Return list of current run folder names + """ + return self.processes.keys() + +class CopierBot(rpc.XmlRpcBot): + def __init__(self, section=None, configfile=None): + #if configfile is None: + # configfile = '~/.htsworkflow' + + super(CopierBot, self).__init__(section, configfile) + + # options for rsync command + self.cfg['rsync_password_file'] = None + self.cfg['rsync_source'] = None + self.cfg['rsync_destination'] = None + + # options for reporting we're done + self.cfg['notify_users'] = None + self.cfg['notify_runner'] = None + + self.pending = [] + self.rsync = None + self.notify_users = None + self.notify_runner = None + + self.register_function(self.startCopy) + self.register_function(self.sequencingFinished) + self.eventTasks.append(self.update) + + def _init_rsync(self): + """ + Initalize rsync class + + This is only accessible for test purposes. + """ + # we can't call any logging function until after start finishes. + # this got moved to a seperate function from run to help with test code + if self.rsync is None: + self.rsync = rsync(self.sources, self.destination, self.password) + + def read_config(self, section=None, configfile=None): + """ + read the config file + """ + super(CopierBot, self).read_config(section, configfile) + + self.sources = shlex.split(self._check_required_option('rsync_sources')) + self.password = self._check_required_option('rsync_password_file') + self.destination = self._check_required_option('rsync_destination') + + self.notify_users = self._parse_user_list(self.cfg['notify_users']) + try: + self.notify_runner = \ + self._parse_user_list(self.cfg['notify_runner'], + require_resource=True) + except bot.JIDMissingResource: + msg = 'need a full jabber ID + resource for xml-rpc destinations' + print >>sys.stderr, msg + raise bot.JIDMissingResource(msg) + + def run(self): + """ + Start application + """ + self._init_rsync() + super(CopierBot, self).run() + + def startCopy(self, *args): + """ + start our copy + """ + logging.info("starting copy scan, %s" % (args,)) + started = self.rsync.copy() + logging.info("copying:" + " ".join(started)+".") + return started + + def sequencingFinished(self, runDir, *args): + """ + The run was finished, if we're done copying, pass the message on + """ + # close any open processes + self.rsync.poll() + + # see if we're still copying + if runfolder_validate(runDir): + logging.info("recevied sequencing finshed for %s" % (runDir)) + self.pending.append(runDir) + self.startCopy() + return "PENDING" + else: + errmsg = "received bad runfolder name (%s)" % (runDir) + logging.warning(errmsg) + # maybe I should use a different error message + raise RuntimeError(errmsg) + + def reportSequencingFinished(self, runDir): + """ + Send the sequencingFinished message to the interested parties + """ + if self.notify_users is not None: + for u in self.notify_users: + self.send(u, 'Sequencing run %s finished' % (runDir)) + if self.notify_runner is not None: + for r in self.notify_runner: + self.rpc_send(r, (runDir,), 'sequencingFinished') + logging.info("forwarding sequencingFinshed message for %s" % (runDir)) + + def update(self, *args): + """ + Update our current status. + Report if we've finished copying files. + """ + self.rsync.poll() + for p in self.pending: + if p not in self.rsync.keys(): + self.reportSequencingFinished(p) + self.pending.remove(p) + + def _parser(self, msg, who): + """ + Parse xmpp chat messages + """ + help = u"I can [copy], or report current [status]" + if re.match(u"help", msg): + reply = help + elif re.match("copy", msg): + started = self.startCopy() + reply = u"started copying " + ", ".join(started) + elif re.match(u"status", msg): + msg = [u"Currently %d rsync processes are running." % (len(self.rsync))] + for d in self.rsync.keys(): + msg.append(u" " + d) + reply = os.linesep.join(msg) + else: + reply = u"I didn't understand '%s'" % (unicode(msg)) + return reply + +def main(args=None): + bot = CopierBot() + bot.main(args) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/htsworkflow/automation/runner.py b/trunk/htsworkflow/automation/runner.py new file mode 100644 index 0000000..45d4ffc --- /dev/null +++ b/trunk/htsworkflow/automation/runner.py @@ -0,0 +1,224 @@ +#!/usr/bin/env python +from glob import glob +import logging +import os +import re +import sys +import time +import threading + +from benderjab import rpc + +from htsworkflow.pipelines.configure_run import * + +#s_fc = re.compile('FC[0-9]+') +s_fc = re.compile('_[0-9a-zA-Z]*$') + + +def _get_flowcell_from_rundir(run_dir): + """ + Returns flowcell string based on run_dir. + Returns None and logs error if flowcell can't be found. + """ + junk, dirname = os.path.split(run_dir) + mo = s_fc.search(dirname) + if not mo: + logging.error('RunDir 2 FlowCell error: %s' % (run_dir)) + return None + + return dirname[mo.start()+1:] + + + +class Runner(rpc.XmlRpcBot): + """ + Manage running pipeline jobs. + """ + def __init__(self, section=None, configfile=None): + #if configfile is None: + # self.configfile = "~/.htsworkflow" + super(Runner, self).__init__(section, configfile) + + self.cfg['notify_users'] = None + self.cfg['genome_dir'] = None + self.cfg['base_analysis_dir'] = None + + self.cfg['notify_users'] = None + self.cfg['notify_postanalysis'] = None + + self.conf_info_dict = {} + + self.register_function(self.sequencingFinished) + #self.eventTasks.append(self.update) + + + def read_config(self, section=None, configfile=None): + super(Runner, self).read_config(section, configfile) + + self.genome_dir = self._check_required_option('genome_dir') + self.base_analysis_dir = self._check_required_option('base_analysis_dir') + + self.notify_users = self._parse_user_list(self.cfg['notify_users']) + #FIXME: process notify_postpipeline cfg + + + def _parser(self, msg, who): + """ + Parse xmpp chat messages + """ + help = u"I can send [start] a run, or report [status]" + if re.match(u"help", msg): + reply = help + elif re.match("status", msg): + words = msg.split() + if len(words) == 2: + reply = self.getStatusReport(words[1]) + else: + reply = u"Status available for: %s" \ + % (', '.join([k for k in self.conf_info_dict.keys()])) + elif re.match(u"start", msg): + words = msg.split() + if len(words) == 2: + self.sequencingFinished(words[1]) + reply = u"starting run for %s" % (words[1]) + else: + reply = u"need runfolder name" + elif re.match(u"path", msg): + reply = u"My path is: " + unicode(os.environ['PATH']) + else: + reply = u"I didn't understand '%s'" %(msg) + + logging.debug("reply: " + str(reply)) + return reply + + + def getStatusReport(self, fc_num): + """ + Returns text status report for flow cell number + """ + if fc_num not in self.conf_info_dict: + return "No record of a %s run." % (fc_num) + + status = self.conf_info_dict[fc_num].status + + if status is None: + return "No status information for %s yet." \ + " Probably still in configure step. Try again later." % (fc_num) + + output = status.statusReport() + + return '\n'.join(output) + + + def sequencingFinished(self, run_dir): + """ + Sequenceing (and copying) is finished, time to start pipeline + """ + logging.debug("received sequencing finished message") + + # Setup config info object + ci = ConfigInfo() + ci.base_analysis_dir = self.base_analysis_dir + ci.analysis_dir = os.path.join(self.base_analysis_dir, run_dir) + + # get flowcell from run_dir name + flowcell = _get_flowcell_from_rundir(run_dir) + + # Store ci object in dictionary + self.conf_info_dict[flowcell] = ci + + + # Launch the job in it's own thread and turn. + self.launchJob(run_dir, flowcell, ci) + return "started" + + + def pipelineFinished(self, run_dir): + # need to strip off self.watch_dir from rundir I suspect. + logging.info("pipeline finished in" + str(run_dir)) + #pattern = self.watch_dir + #if pattern[-1] != os.path.sep: + # pattern += os.path.sep + #stripped_run_dir = re.sub(pattern, "", run_dir) + #logging.debug("stripped to " + stripped_run_dir) + + # Notify each user that the run has finished. + if self.notify_users is not None: + for u in self.notify_users: + self.send(u, 'Pipeline run %s finished' % (run_dir)) + + #if self.notify_runner is not None: + # for r in self.notify_runner: + # self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished') + + def reportMsg(self, msg): + + if self.notify_users is not None: + for u in self.notify_users: + self.send(u, msg) + + + def _runner(self, run_dir, flowcell, conf_info): + + # retrieve config step + cfg_filepath = os.path.join(conf_info.analysis_dir, + 'config-auto.txt') + status_retrieve_cfg = retrieve_config(conf_info, + flowcell, + cfg_filepath, + self.genome_dir) + if status_retrieve_cfg: + logging.info("Runner: Retrieve config: success") + self.reportMsg("Retrieve config (%s): success" % (run_dir)) + else: + logging.error("Runner: Retrieve config: failed") + self.reportMsg("Retrieve config (%s): FAILED" % (run_dir)) + + + # configure step + if status_retrieve_cfg: + status = configure(conf_info) + if status: + logging.info("Runner: Configure: success") + self.reportMsg("Configure (%s): success" % (run_dir)) + self.reportMsg( + os.linesep.join(glob(os.path.join(run_dir,'Data','C*'))) + ) + else: + logging.error("Runner: Configure: failed") + self.reportMsg("Configure (%s): FAILED" % (run_dir)) + + #if successful, continue + if status: + # Setup status cmdline status monitor + #startCmdLineStatusMonitor(ci) + + # running step + print 'Running pipeline now!' + run_status = run_pipeline(conf_info) + if run_status is True: + logging.info('Runner: Pipeline: success') + self.reportMsg("Pipeline run (%s): Finished" % (run_dir,)) + else: + logging.info('Runner: Pipeline: failed') + self.reportMsg("Pipeline run (%s): FAILED" % (run_dir)) + + + def launchJob(self, run_dir, flowcell, conf_info): + """ + Starts up a thread for running the pipeline + """ + t = threading.Thread(target=self._runner, + args=[run_dir, flowcell, conf_info]) + t.setDaemon(True) + t.start() + + + +def main(args=None): + bot = Runner() + return bot.main(args) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/htsworkflow/automation/spoolwatcher.py b/trunk/htsworkflow/automation/spoolwatcher.py new file mode 100644 index 0000000..25c012d --- /dev/null +++ b/trunk/htsworkflow/automation/spoolwatcher.py @@ -0,0 +1,319 @@ +#!/usr/bin/env python +import logging +import os +import re +import shlex +import sys +import time + +from htsworkflow.util import mount + +# this uses pyinotify +import pyinotify +from pyinotify import EventsCodes + +from benderjab import rpc + +def get_top_dir(root, path): + """ + Return the directory in path that is a subdirectory of root. + e.g. + + >>> print get_top_dir('/a/b/c', '/a/b/c/d/e/f') + d + >>> print get_top_dir('/a/b/c/', '/a/b/c/d/e/f') + d + >>> print get_top_dir('/a/b/c', '/g/e/f') + None + >>> print get_top_dir('/a/b/c', '/a/b/c') + + """ + if path.startswith(root): + subpath = path[len(root):] + if subpath.startswith('/'): + subpath = subpath[1:] + return subpath.split(os.path.sep)[0] + else: + return None + +class WatcherEvents(object): + # two events need to be tracked + # one to send startCopy + # one to send OMG its broken + # OMG its broken needs to stop when we've seen enough + # cycles + # this should be per runfolder. + # read the xml files + def __init__(self): + pass + + +class Handler(pyinotify.ProcessEvent): + def __init__(self, watchmanager, bot, ipar=False): + """ + ipar flag indicates we should wait for ipar to finish, instead of + just the run finishing + """ + self.last_event = {} + self.watchmanager = watchmanager + self.bot = bot + self.ipar_mode = ipar + if self.ipar_mode: + self.last_file = 'IPAR_Netcopy_Complete.txt'.lower() + else: + self.last_file = "run.completed".lower() + + def process_IN_CREATE(self, event): + for wdd in self.bot.wdds: + for watch_path in self.bot.watchdirs: + if event.path.startswith(watch_path): + target = get_top_dir(watch_path, event.path) + self.last_event.setdefault(watch_path, {})[target] = time.time() + + msg = "Create: %s %s %s" % (event.path, event.name, target) + + if event.name.lower() == self.last_file: + try: + self.bot.sequencingFinished(event.path) + except IOError, e: + logging.error("Couldn't send sequencingFinished") + logging.debug(msg) + + def process_IN_DELETE(self, event): + logging.debug("Remove: %s" % os.path.join(event.path, event.name)) + pass + + def process_IN_UNMOUNT(self, event): + pathname = os.path.join(event.path, event.name) + logging.debug("IN_UNMOUNT: %s" % (pathname,)) + self.bot.unmount_watch(event.path) + +class SpoolWatcher(rpc.XmlRpcBot): + """ + Watch a directory and send a message when another process is done writing. + + This monitors a directory tree using inotify (linux specific) and + after some files having been written will send a message after + seconds of no file writing. + + (Basically when the solexa machine finishes dumping a round of data + this'll hopefully send out a message saying hey look theres data available + + """ + # these params need to be in the config file + # I wonder where I should put the documentation + #:Parameters: + # `watchdirs` - list of directories to monitor for modifications + # `profile` - specify which .htsworkflow profile to use + # `write_timeout` - how many seconds to wait for writes to finish to + # the spool + # `notify_timeout` - how often to timeout from notify + + def __init__(self, section=None, configfile=None): + #if configfile is None: + # self.configfile = "~/.htsworkflow" + super(SpoolWatcher, self).__init__(section, configfile) + + self.cfg['watchdirs'] = None + self.cfg['write_timeout'] = 10 + self.cfg['notify_users'] = None + self.cfg['notify_runner'] = None + self.cfg['wait_for_ipar'] = 0 + + self.watchdirs = [] + self.watchdir_url_map = {} + self.notify_timeout = 0.001 + + self.wm = None + self.notify_users = None + self.notify_runner = None + self.wdds = [] + + # keep track if the specified mount point is currently mounted + self.mounted_points = {} + # keep track of which mount points tie to which watch directories + # so maybe we can remount them. + self.mounts_to_watches = {} + + self.eventTasks.append(self.process_notify) + + def read_config(self, section=None, configfile=None): + # Don't give in to the temptation to use logging functions here, + # need to wait until after we detach in start + super(SpoolWatcher, self).read_config(section, configfile) + + self.watchdirs = shlex.split(self._check_required_option('watchdirs')) + # see if there's an alternate url that should be used for the watchdir + for watchdir in self.watchdirs: + self.watchdir_url_map[watchdir] = self.cfg.get(watchdir, watchdir) + + self.write_timeout = int(self.cfg['write_timeout']) + self.wait_for_ipar = int(self.cfg['wait_for_ipar']) + + self.notify_users = self._parse_user_list(self.cfg['notify_users']) + try: + self.notify_runner = \ + self._parse_user_list(self.cfg['notify_runner'], + require_resource=True) + except bot.JIDMissingResource: + msg = 'need a full jabber ID + resource for xml-rpc destinations' + raise bot.JIDMissingResource(msg) + + self.handler = None + self.notifier = None + + def add_watch(self, watchdirs=None): + """ + start watching watchdir or self.watchdir + we're currently limited to watching one directory tree. + """ + # create the watch managers if we need them + if self.wm is None: + self.wm = pyinotify.WatchManager() + self.handler = Handler(self.wm, self, self.wait_for_ipar) + self.notifier = pyinotify.Notifier(self.wm, self.handler) + + # the one tree limit is mostly because self.wdd is a single item + # but managing it as a list might be a bit more annoying + if watchdirs is None: + watchdirs = self.watchdirs + + mask = EventsCodes.IN_CREATE | EventsCodes.IN_UNMOUNT + # rec traverses the tree and adds all the directories that are there + # at the start. + # auto_add will add in new directories as they are created + for w in watchdirs: + mount_location = mount.find_mount_point_for(w) + self.mounted_points[mount_location] = True + mounts = self.mounts_to_watches.get(mount_location, []) + if w not in mounts: + mounts.append(w) + self.mounts_to_watches[mount_location] = mounts + + logging.info(u"Watching:"+unicode(w)) + self.wdds.append(self.wm.add_watch(w, mask, rec=True, auto_add=True)) + + def unmount_watch(self, event_path): + # remove backwards so we don't get weirdness from + # the list getting shorter + for i in range(len(self.wdds),0, -1): + wdd = self.wdds[i] + logging.info(u'unmounting: '+unicode(wdd.items())) + self.wm.rm_watch(wdd.values()) + del self.wdds[i] + self.mounted = False + + def make_copy_url(self, watchdir, list_event_dir): + root_copy_url = self.watchdir_url_map[watchdir] + if root_copy_url[-1] != '/': + root_copy_url += '/' + copy_url = root_copy_url + list_event_dir + logging.debug('Copy url: %s' % (copy_url,)) + return copy_url + + def process_notify(self, *args): + if self.notifier is None: + # nothing to do yet + return + # process the queue of events as explained above + self.notifier.process_events() + #check events waits timeout + if self.notifier.check_events(self.notify_timeout): + # read notified events and enqeue them + self.notifier.read_events() + # should we do something? + # has something happened? + for watchdir, last_events in self.handler.last_event.items(): + for last_event_dir, last_event_time in last_events.items(): + time_delta = time.time() - last_event_time + if time_delta > self.write_timeout: + copy_url = self.make_copy_url(watchdir, last_event_dir) + self.startCopy(copy_url) + self.handler.last_event[watchdir] = {} + # handle unmounted filesystems + for mount_point, was_mounted in self.mounted_points.items(): + if not was_mounted and mount.is_mounted(mount_point): + # we've been remounted. Huzzah! + # restart the watch + for watch in self.mounts_to_watches[mount_point]: + self.add_watch(watch) + logging.info( + "%s was remounted, restarting watch" % \ + (mount_point) + ) + self.mounted_points[mount_point] = True + + def _parser(self, msg, who): + """ + Parse xmpp chat messages + """ + help = u"I can send [copy] message, or squencer [finished]" + if re.match(u"help", msg): + reply = help + elif re.match("copy", msg): + self.startCopy() + reply = u"sent copy message" + elif re.match(u"finished", msg): + words = msg.split() + if len(words) == 2: + self.sequencingFinished(words[1]) + reply = u"sending sequencing finished for %s" % (words[1]) + else: + reply = u"need runfolder name" + else: + reply = u"I didn't understand '%s'" %(msg) + return reply + + def run(self): + """ + Start application + """ + # we have to configure pyinotify after BenderJab.start is called + # as weird things happen to pyinotify if the stdio is closed + # after it's initialized. + self.add_watch() + super(SpoolWatcher, self).run() + + def stop(self): + """ + shutdown application + """ + # destroy the inotify's instance on this interrupt (stop monitoring) + if self.notifier is not None: + self.notifier.stop() + super(SpoolWatcher, self).stop() + + def startCopy(self, copy_url=None): + logging.debug("writes seem to have stopped") + if self.notify_runner is not None: + for r in self.notify_runner: + self.rpc_send(r, tuple(), 'startCopy') + if self.notify_users is not None: + for u in self.notify_users: + self.send(u, 'startCopy %s.' % (copy_url,)) + + def sequencingFinished(self, run_dir): + # need to strip off self.watchdirs from rundir I suspect. + logging.info("run.completed in " + str(run_dir)) + pattern = self.watch_dir + if pattern[-1] != os.path.sep: + pattern += os.path.sep + stripped_run_dir = re.sub(pattern, "", run_dir) + logging.debug("stripped to " + stripped_run_dir) + if self.notify_users is not None: + for u in self.notify_users: + self.send(u, 'Sequencing run %s finished' % (stripped_run_dir)) + if self.notify_runner is not None: + for r in self.notify_runner: + self.rpc_send(r, (stripped_run_dir,), 'sequencingFinished') + +def main(args=None): + bot = SpoolWatcher() + return bot.main(args) + +if __name__ == "__main__": + ret = main(sys.argv[1:]) + #sys.exit(ret) + +# TODO: +# send messages to copier specifying which mount to copy diff --git a/trunk/htsworkflow/automation/test/test_runner.py b/trunk/htsworkflow/automation/test/test_runner.py new file mode 100644 index 0000000..6c3b9df --- /dev/null +++ b/trunk/htsworkflow/automation/test/test_runner.py @@ -0,0 +1,46 @@ +import unittest + + +import os +from htsworkflow.automation.copier import runfolder_validate + +def extract_runfolder_path(watchdir, event): + runfolder_path = watchdir + path = event.path + if not path.startswith(watchdir): + return None + + fragments = path[len(watchdir):].split(os.path.sep) + for f in fragments: + runfolder_path = os.path.join(runfolder_path, f) + if runfolder_validate(f): + return runfolder_path + return None + +class Event(object): + def __init__(self, path=None, name=None): + self.path = path + self.name = name + +class testRunner(unittest.TestCase): + + def test_extract_runfolder(self): + watchdir = os.path.join('root', 'server', 'mount') + runfolder = os.path.join(watchdir, '080909_HWI-EAS229_0052_1234ABCD') + ipar = os.path.join(runfolder, 'Data', 'IPAR_1.01') + other = os.path.join(watchdir, 'other') + + event = Event( path=runfolder ) + self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder) + + event = Event( path=ipar ) + self.failUnlessEqual(extract_runfolder_path(watchdir, event), runfolder) + + event = Event( path=other) + self.failUnlessEqual(extract_runfolder_path(watchdir, event), None ) + +def suite(): + return unittest.makeSuite(testRunner,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/trunk/htsworkflow/frontend/__init__.py b/trunk/htsworkflow/frontend/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/analysis/__init__.py b/trunk/htsworkflow/frontend/analysis/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/analysis/admin.py b/trunk/htsworkflow/frontend/analysis/admin.py new file mode 100644 index 0000000..d3f903e --- /dev/null +++ b/trunk/htsworkflow/frontend/analysis/admin.py @@ -0,0 +1,31 @@ +from htsworkflow.frontend.analysis.models import Task, Project +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +class ProjectOptions(admin.ModelAdmin): + list_display = ('ProjTitle','ProjectTasks') + list_filter = () + search_fieldsets = ['project_name','=tasks__subject1__library_id','=tasks__subject2__library_id','tasks__subject1__library_name','tasks__subject2__library_name','project_notes'] + fieldsets = ( + (None, { + 'fields': (('project_name'),('tasks'),('project_notes'))}), + ) + filter_horizontal = ('tasks',) + +class TaskOptions(admin.ModelAdmin): + list_display = ('task_name','apply_calc','subject1','subject2','task_params','InProjects','submitted_on','task_status') + list_filter = ('apply_calc',) + search_fieldsets = ['task_name','id','=subject1__library_id','=subject2__library_id'] + fieldsets = ( + (None, { + 'fields': (('task_name'),('apply_calc'),('subject1'),('subject2'),('task_params')) + }), + ('system fields', { + 'classes': ('collapse',), + 'fields': (('submitted_on'),('task_status','run_note')) + }), + ) + +admin.site.register(Project, ProjectOptions) +admin.site.register(Task, TaskOptions) + diff --git a/trunk/htsworkflow/frontend/analysis/main.py b/trunk/htsworkflow/frontend/analysis/main.py new file mode 100644 index 0000000..b5217dc --- /dev/null +++ b/trunk/htsworkflow/frontend/analysis/main.py @@ -0,0 +1,118 @@ +# some core functions of analysis manager module +from django.http import HttpResponse +from datetime import datetime +from string import * +import re +from htsworkflow.frontend import settings +from htsworkflow.frontend.analysis.models import Task, Project +from django.core.exceptions import ObjectDoesNotExist + +def updStatus(request): + ClIP = request.META['REMOTE_ADDR'] + #Check client access permission + granted = False + if (settings.ALLOWED_ANALYS_IPS.has_key(ClIP)): granted = True + if not granted: return HttpResponse("access denied.") + + output='' + taskid=-1; + # Check required param + if request.has_key('taskid'): taskid = request['taskid'] + else: return HttpResponse('missing param task id') + + try: + rec = Task.objects.get(id=taskid) + mytimestamp = datetime.now().__str__() + mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp) + if request.has_key('msg'): + rec.task_status += ", "+request['msg']+" ("+mytimestamp+")" + else : + rec.task_status = "Registered ("+mytimestamp+")" + rec.save() + output = "Hello "+settings.ALLOWED_ANALYS_IPS[ClIP]+". Updated status for task "+taskid + except ObjectDoesNotExist: + output = "entry not found: taskid="+taskid + + return HttpResponse(output) + + +def getProjects(request): + ClIP = request.META['REMOTE_ADDR'] + #Check client access permission + granted = False + if (settings.ALLOWED_ANALYS_IPS.has_key(ClIP)): granted = True + if not granted: return HttpResponse("access denied.") + + outputfile = '' + + All=False + if (request.has_key('mode')): + if request['mode']=='all': + All=True + + try: + if(All): + rec = Project.objects.all().distinct() + else: + rec = Project.objects.filter(tasks__task_status__exact='defined').distinct() + + outputfile = '' + outputfile += '\n' + for p in rec: + outputfile += '\n' + outputfile += '\n' + prj_tasks = p.tasks.all() + for t in prj_tasks: + outputfile += '\n' + if (t.apply_calc == 'QuEST' or t.apply_calc == 'WingPeaks' or t.apply_calc == 'MACS'): + outputfile += '\n' + if t.subject1: + outputfile += '\n' + if t.subject2: + outputfile += '\n' + else: + outputfile += '\nBackground Library Missing' + else: + outputfile += '\nSignal Library Missing' + outputfile += '\n'+t.task_params.__str__()+'' + outputfile += '\n' + + if (t.apply_calc == 'Methylseq'): + outputfile += '\n' + if t.subject1: + outputfile += '\n' + if t.subject2: + outputfile += '\n' + else: + outputfile += '\nMsp1 Library Missing' + else: + outputfile += '\nHpa2 Library Missing' + outputfile += '\n'+t.task_params.__str__()+'' + outputfile += '\n' + + if (t.apply_calc == 'ProfileReads' or t.apply_calc == 'qPCR'): + outputfile += '\n<'+t.apply_calc+' TaskId="'+t.id.__str__()+'" Name="'+t.task_name+'" Genome="'+t.subject1.library_species.use_genome_build+'" Library="'+t.subject1.library_id+'"/>' + + if (t.apply_calc == 'CompareLibs'): + outputfile += '\n' + if t.subject1: + outputfile += '\n' + else: + outputfile += '\nLibrary Missing' + if t.subject2: + outputfile += '\n' + else: + outputfile += '\nLibrary Missing' + outputfile += '\n'+t.task_params.__str__()+'' + outputfile += '\n' + + #if (t.apply_calc == 'ComparePeakCalls'): + # + # outputfile += '\n' + # TO DO: Define these new fields in Task: PCaller1 (QuEST,WingPeaks), PCaller2, Set1(FK to self), Set2 (FK..) ALL NULL=TRUE + outputfile += '\n' + outputfile += '\n' + except ObjectDoesNotExist: + outputfile = "" + + return HttpResponse(outputfile, mimetype='text/plain') diff --git a/trunk/htsworkflow/frontend/analysis/models.py b/trunk/htsworkflow/frontend/analysis/models.py new file mode 100644 index 0000000..e2ddff4 --- /dev/null +++ b/trunk/htsworkflow/frontend/analysis/models.py @@ -0,0 +1,101 @@ +from django.db import models +from datetime import datetime +from htsworkflow.frontend import settings +from htsworkflow.frontend.samples.models import Library +from string import * + +class Task(models.Model): + task_name = models.CharField(max_length=50,unique=True, db_index=True) + subject1 = models.ForeignKey(Library,related_name='sbj1_library',verbose_name="Subject1 (Signal/Hpa2)") + subject2 = models.ForeignKey(Library,related_name='sbj2_library',verbose_name="Subject2 (Control/Msp1)",blank=True,null=True) + CALCS = ( + ('QuEST', 'QuEST Peak Calling'), + ('WingPeaks', 'Wing Peak Calling'), + ('MACS', 'MACS Peak Calling'), + ('qPCR', 'In Silico qPCR'), + ('CompareLibs', 'Compare Libaraies'), + ('ComparePeakCalls','Compare Peak Calls'), + ('ProfileReads','Profile Reads'), + ('Methylseq','Methylseq'), + ) + apply_calc = models.CharField(max_length=50,choices=CALCS,verbose_name='Applied Calculation') + ## userid = # logged in user + task_params = models.CharField(max_length=200,blank=True,null=True,default="") + task_status = models.CharField(max_length=500,blank=True,null=True,default='defined') + results_location = models.CharField(max_length=2000,blank=True,null=True) + submitted_on = models.DateTimeField(default=datetime.now()) + run_note = models.CharField(max_length=500,blank=True,null=True) + + def __str__(self): + return '"%s" - %s on [%s]/[%s]' % (self.task_name,self.apply_calc,self.subject1,self.subject2) + + def InProjects(self): + return '...' + ps = self.project_set.all() + pstr = 'In ' + return pstr + for p in ps: + pstr += '%s, ' % (p.project_name) + return pstr + +class Project(models.Model): + project_name = models.CharField(max_length=50,unique=True, db_index=True) + tasks = models.ManyToManyField(Task,related_name='project_tasks',null=True) + project_notes = models.CharField(max_length=500,blank=True,null=True) + + def __str__(self): + return '%s' % (self.project_name) + + def ProjectTasks(self): + ptasks = self.tasks.all().order_by('id') + surl = settings.TASKS_PROJS_SERVER+'/projects/' + tstr = '' + Style = '' + if len(ptasks) > 8: Style = ' style="height:200px;overflow:auto" ' + tstr += '
' + tstr += '' + isregistered = False + for t in ptasks: + taskdesc = t.task_name+'
Details: '+t.apply_calc+' on '+t.subject1.library_id + if t.subject2 is not None: + taskdesc += ' and '+t.subject2.library_id + taskdesc += ' (TaskId:'+t.id.__str__()+')' + tstr += '
' % (taskdesc,replace(t.task_status,'Complete','Complete')) + if t.task_status != 'defined': isregistered = True + + tstr += '
TasksJob Status
%s%s
' + tstr += '
' + tstr += '
' + tstr += '
' + if isregistered: + tstr += 'VIEW PROJECT RESULTS' + tstr += '(view in new window)' + else: + tstr += 'REGISTERING ...' + + tstr += '
' + tstr += '' + tstr += '
' + return tstr + + ProjectTasks.allow_tags = True + + def ProjTitle(self): + ptasks = self.tasks.all().order_by('id') + tasks_counter = '('+len(ptasks).__str__() + ' tasks)' + htmlstr = '%s
%s' % (self.project_name,tasks_counter) + return htmlstr + + ProjTitle.allow_tags = True + diff --git a/trunk/htsworkflow/frontend/analysis/urls.py b/trunk/htsworkflow/frontend/analysis/urls.py new file mode 100644 index 0000000..b1be3d2 --- /dev/null +++ b/trunk/htsworkflow/frontend/analysis/urls.py @@ -0,0 +1,6 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('', + (r'^updStatus$', 'htsworkflow.frontend.analysis.main.updStatus'), + (r'^getProjects/$', 'htsworkflow.frontend.analysis.main.getProjects'), +) diff --git a/trunk/htsworkflow/frontend/eland_config/__init__.py b/trunk/htsworkflow/frontend/eland_config/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/eland_config/admin.py b/trunk/htsworkflow/frontend/eland_config/admin.py new file mode 100644 index 0000000..56107ff --- /dev/null +++ b/trunk/htsworkflow/frontend/eland_config/admin.py @@ -0,0 +1,4 @@ +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + + diff --git a/trunk/htsworkflow/frontend/eland_config/forms.py b/trunk/htsworkflow/frontend/eland_config/forms.py new file mode 100644 index 0000000..a2245f9 --- /dev/null +++ b/trunk/htsworkflow/frontend/eland_config/forms.py @@ -0,0 +1,163 @@ +from django import forms +from django.forms.util import ErrorList + + +SPECIES_LIST = [#('--choose--', '--Choose--'), + ('hg18', 'Homo sapiens (Hg18)'), + ('Mm8', 'Mus musculus (Mm8)'), + ('arabv6', 'Arabadopsis Thaliana v6'), + ('other', 'Other species (Include in description)')] + + +class DivErrorList(ErrorList): + def __unicode__(self): + return self.as_divs() + + def as_divs(self): + if not self: return u'' + return u'
%s
' % (''.join([u'
%s
' % e for e in self])) + + + +class ConfigForm(forms.Form): + + flow_cell_number = forms.CharField(min_length=2) + run_date = forms.DateTimeField() + advanced_run = forms.BooleanField(required=False) + read_length = forms.IntegerField(min_value=1, initial=32) + #eland_repeat = forms.BooleanField() + + #needs a for loop or something to allow for n configurations + #analysis_type = forms.ChoiceField(choices=[('eland','eland')]) + lane1_species = forms.ChoiceField(choices=SPECIES_LIST) + lane1_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane2_species = forms.ChoiceField(choices=SPECIES_LIST) + lane2_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane3_species = forms.ChoiceField(choices=SPECIES_LIST) + lane3_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane4_species = forms.ChoiceField(choices=SPECIES_LIST) + lane4_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane5_species = forms.ChoiceField(choices=SPECIES_LIST) + lane5_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane6_species = forms.ChoiceField(choices=SPECIES_LIST) + lane6_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane7_species = forms.ChoiceField(choices=SPECIES_LIST) + lane7_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + lane8_species = forms.ChoiceField(choices=SPECIES_LIST) + lane8_description = forms.CharField(widget=forms.TextInput(attrs={'size':'60'})) + + notes = forms.CharField(widget=forms.Textarea(attrs={'cols':'70'}), required=False) + + #lane_specific_read_length = forms.IntegerField(min_value=1) + + #eland_genome_lanes = forms.MultipleChoiceField(choices=[('lane1','1'), + # ('lane2','2'), + # ('lane3','3'), + # ('lane4','4'), + # ('lane5','5'), + # ('lane6','6'), + # ('lane7','7'), + # ('lane8','8') ]) + + #eland_genome = forms.ChoiceField(choices=) + + #use_bases_lanes = forms.MultipleChoiceField(choices=[('lane1','1'), + # ('lane2','2'), + # ('lane3','3'), + # ('lane4','4'), + # ('lane5','5'), + # ('lane6','6'), + # ('lane7','7'), + # ('lane8','8') ]) + + #use_bases_mask = forms.CharField() + + #sequence_format = forms.ChoiceField(choices=[('scarf', 'scarf')]) + + + + #subject = forms.CharField(max_length=100) + #message = forms.CharField() + #sender = forms.EmailField() + #cc_myself = forms.BooleanField() + + def as_custom(self): + """ + Displays customized html output + """ + html = [] + + fcn = self['flow_cell_number'] + + html.append(fcn.label_tag() + ': ' + str(fcn) + str(fcn.errors) + '
') + + run_date = self['run_date'] + html.append(run_date.label_tag() + ': ' + str(run_date) + str(run_date.errors) + '
') + + arun = self['advanced_run'] + html.append(arun.label_tag() + ': ' + str(arun) + str(arun.errors) + '
') + + rl = self['read_length'] + html.append(rl.label_tag() + ': ' + str(rl) + str(rl.errors) + '

') + + html.append('') + html.append(' ' \ + % ('Lane', 'Species', 'Description')) + + l1s = self['lane1_species'] + l1d = self['lane1_description'] + html.append(' ' \ + % ('1', str(l1s), str(l1s.errors), str(l1d), str(l1d.errors))) + + l2s = self['lane2_species'] + l2d = self['lane2_description'] + html.append(' ' \ + % ('2', str(l2s), str(l2s.errors), str(l2d), str(l2d.errors))) + + l3s = self['lane3_species'] + l3d = self['lane3_description'] + html.append(' ' \ + % ('3', str(l3s), str(l3s.errors), str(l3d), str(l3d.errors))) + + l4s = self['lane4_species'] + l4d = self['lane4_description'] + html.append(' ' \ + % ('4', str(l4s), str(l4s.errors), str(l4d), str(l4d.errors))) + + l5s = self['lane5_species'] + l5d = self['lane5_description'] + html.append(' ' \ + % ('5', str(l5s), str(l5s.errors), str(l5d), str(l5d.errors))) + + l6s = self['lane6_species'] + l6d = self['lane6_description'] + html.append(' ' \ + % ('6', str(l6s), str(l6s.errors), str(l6d), str(l6d.errors))) + + l7s = self['lane7_species'] + l7d = self['lane7_description'] + html.append(' ' \ + % ('7', str(l7s), str(l7s.errors), str(l7d), str(l7d.errors))) + + l8s = self['lane8_species'] + l8d = self['lane8_description'] + html.append(' ' \ + % ('8', str(l8s), str(l8s.errors), str(l8d), str(l8d.errors))) + + html.append('
%s%s%s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s
%s%s %s%s %s

') + + notes = self['notes'] + html.append('

Notes:

') + html.append(' %s
' % (str(notes))) + + return '\n'.join(html) + + + diff --git a/trunk/htsworkflow/frontend/eland_config/models.py b/trunk/htsworkflow/frontend/eland_config/models.py new file mode 100644 index 0000000..71a8362 --- /dev/null +++ b/trunk/htsworkflow/frontend/eland_config/models.py @@ -0,0 +1,3 @@ +from django.db import models + +# Create your models here. diff --git a/trunk/htsworkflow/frontend/eland_config/urls.py b/trunk/htsworkflow/frontend/eland_config/urls.py new file mode 100644 index 0000000..129f57c --- /dev/null +++ b/trunk/htsworkflow/frontend/eland_config/urls.py @@ -0,0 +1,10 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('', + # Example: + + (r'^(?P\w+)/$', 'htsworkflow.frontend.eland_config.views.config'), + (r'^$', 'htsworkflow.frontend.eland_config.views.config'), + #(r'^$', 'htsworkflow.frontend.eland_config.views.index') + +) diff --git a/trunk/htsworkflow/frontend/eland_config/views.py b/trunk/htsworkflow/frontend/eland_config/views.py new file mode 100644 index 0000000..02b3f13 --- /dev/null +++ b/trunk/htsworkflow/frontend/eland_config/views.py @@ -0,0 +1,415 @@ +from django.http import HttpResponse +from django.shortcuts import render_to_response +from django.core.exceptions import ObjectDoesNotExist + +from htsworkflow.frontend.eland_config import forms +from htsworkflow.frontend import settings +from htsworkflow.frontend.experiments import models + +import os +import glob +# Create your views here. + + +def _validate_input(data): + #if data.find('..') == -1 or data.find('/') == -1 or data.find('\\') == -1: + return data.replace('..', '').replace('/', '_').replace('\\', '_') + +#def contact(request): +# if request.method == 'POST': +# form = ContactForm(request.POST) +# if form.is_valid(): +# # Do form processing here... +# return HttpResponseRedirect('/url/on_success/') +# else: +# form = ContactForm() +# return + + + +#def _saveConfigFile(form): +# """ +# Given a valid form, save eland config to file based on flowcell number. +# """ +# assert form.is_valid() +# +# clean_data = form.cleaned_data +# flowcell = clean_data['flow_cell_number'].replace('/','_').replace('..', '__') +# +# file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell) +# +# f = open(file_path, 'w') +# cfg = generateElandConfig(form) +# f.write(cfg) +# f.close() +# +# +#def _saveToDb(form): +# """ +# Save info to the database. +# """ +# clean_data = form.cleaned_data +# +# fc_id = clean_data['flow_cell_number'] +# +# try: +# fc = models.FlowCell.objects.get(flowcell_id=fc_id) +# except models.FlowCell.DoesNotExist: +# fc = models.FlowCell() +# +# fc.flowcell_id = fc_id +# fc.run_date = clean_data['run_date'] +# +# #LANE 1 +# fc.lane1_sample = clean_data['lane1_description'] +# species_name = clean_data['lane1_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane1_species = specie +# +# #LANE 2 +# fc.lane2_sample = clean_data['lane2_description'] +# species_name = clean_data['lane2_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane2_species = specie +# +# #LANE 3 +# fc.lane3_sample = clean_data['lane3_description'] +# species_name = clean_data['lane3_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane3_species = specie +# +# #LANE 4 +# fc.lane4_sample = clean_data['lane4_description'] +# species_name = clean_data['lane4_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane4_species = specie +# +# #LANE 5 +# fc.lane5_sample = clean_data['lane5_description'] +# species_name = clean_data['lane5_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane5_species = specie +# +# #LANE 6 +# fc.lane6_sample = clean_data['lane6_description'] +# species_name = clean_data['lane6_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane6_species = specie +# +# #LANE 7 +# fc.lane7_sample = clean_data['lane7_description'] +# species_name = clean_data['lane7_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane7_species = specie +# +# #LANE 8 +# fc.lane8_sample = clean_data['lane8_description'] +# species_name = clean_data['lane8_species'] +# try: +# specie = models.Specie.objects.get(scientific_name=species_name) +# except models.Specie.DoesNotExist: +# specie = models.Specie(scientific_name=species_name) +# specie.save() +# fc.lane8_species = specie +# +# fc.notes = clean_data['notes'] +# +# fc.save() +# +# return fc +# +# +#def generateElandConfig(form): +# data = [] +# +# form = form.cleaned_data +# +# BASE_DIR = '/data-store01/compbio/genomes' +# +# data.append("# FLOWCELL: %s" % (form['flow_cell_number'])) +# data.append("#") +# +# notes = form['notes'].replace('\r\n', '\n').replace('\r', '\n') +# notes = notes.replace('\n', '\n# ') +# data.append("# NOTES:") +# data.append("# %s\n#" % (notes)) +# +# #Convert all newline conventions to unix style +# l1d = form['lane1_description'].replace('\r\n', '\n').replace('\r', '\n') +# l2d = form['lane2_description'].replace('\r\n', '\n').replace('\r', '\n') +# l3d = form['lane3_description'].replace('\r\n', '\n').replace('\r', '\n') +# l4d = form['lane4_description'].replace('\r\n', '\n').replace('\r', '\n') +# l5d = form['lane5_description'].replace('\r\n', '\n').replace('\r', '\n') +# l6d = form['lane6_description'].replace('\r\n', '\n').replace('\r', '\n') +# l7d = form['lane7_description'].replace('\r\n', '\n').replace('\r', '\n') +# l8d = form['lane8_description'].replace('\r\n', '\n').replace('\r', '\n') +# +# # Turn new lines into indented commented newlines +# l1d = l1d.replace('\n', '\n# ') +# l2d = l2d.replace('\n', '\n# ') +# l3d = l3d.replace('\n', '\n# ') +# l4d = l4d.replace('\n', '\n# ') +# l5d = l5d.replace('\n', '\n# ') +# l6d = l6d.replace('\n', '\n# ') +# l7d = l7d.replace('\n', '\n# ') +# l8d = l8d.replace('\n', '\n# ') +# +# data.append("# Lane1: %s" % (l1d)) +# data.append("# Lane2: %s" % (l2d)) +# data.append("# Lane3: %s" % (l3d)) +# data.append("# Lane4: %s" % (l4d)) +# data.append("# Lane5: %s" % (l5d)) +# data.append("# Lane6: %s" % (l6d)) +# data.append("# Lane7: %s" % (l7d)) +# data.append("# Lane8: %s" % (l8d)) +# +# #data.append("GENOME_DIR %s" % (BASE_DIR)) +# #data.append("CONTAM_DIR %s" % (BASE_DIR)) +# read_length = form['read_length'] +# data.append("READ_LENGTH %d" % (read_length)) +# #data.append("ELAND_REPEAT") +# data.append("ELAND_MULTIPLE_INSTANCES 8") +# +# #Construct genome dictionary to figure out what lanes to put +# # in the config file. +# genome_dict = {} +# l1s = form['lane1_species'] +# genome_dict.setdefault(l1s, []).append('1') +# l2s = form['lane2_species'] +# genome_dict.setdefault(l2s, []).append('2') +# l3s = form['lane3_species'] +# genome_dict.setdefault(l3s, []).append('3') +# l4s = form['lane4_species'] +# genome_dict.setdefault(l4s, []).append('4') +# l5s = form['lane5_species'] +# genome_dict.setdefault(l5s, []).append('5') +# l6s = form['lane6_species'] +# genome_dict.setdefault(l6s, []).append('6') +# l7s = form['lane7_species'] +# genome_dict.setdefault(l7s, []).append('7') +# l8s = form['lane8_species'] +# genome_dict.setdefault(l8s, []).append('8') +# +# genome_list = genome_dict.keys() +# genome_list.sort() +# +# #Loop through and create entries for each species. +# for genome in genome_list: +# lanes = ''.join(genome_dict[genome]) +# data.append('%s:ANALYSIS eland' % (lanes)) +# data.append('%s:READ_LENGTH %s' % (lanes, read_length)) +# data.append('%s:ELAND_GENOME %s' % (lanes, os.path.join(BASE_DIR, genome))) +# data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length))) +# +# data.append('SEQUENCE_FORMAT --scarf') +# +# return '\n'.join(data) + + +def getElandConfig(flowcell, regenerate=False): + + file_path = os.path.join(settings.UPLOADTO_CONFIG_FILE, flowcell) + + #If we are regenerating the config file, skip + # reading of existing file. If the file doesn't + # exist, try to generate it form the DB. + if not regenerate and os.path.isfile(file_path): + f = open(file_path, 'r') + data = f.read() + f.close() + return data + + try: + fcObj = models.FlowCell.objects.get(flowcell_id__iexact=flowcell) + except ObjectDoesNotExist: + return None + + data = [] + + #form = form.cleaned_data + + BASE_DIR = '/data-store01/compbio/genomes' + + data.append("# FLOWCELL: %s" % (fcObj.flowcell_id)) + data.append("#") + + notes = fcObj.notes.replace('\r\n', '\n').replace('\r', '\n') + notes = notes.replace('\n', '\n# ') + data.append("# NOTES:") + data.append("# %s\n#" % (notes)) + + #Convert all newline conventions to unix style + l1d = str(fcObj.lane_1_library.library_id) + '|' \ + + fcObj.lane_1_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l2d = str(fcObj.lane_2_library.library_id) + '|' \ + + fcObj.lane_2_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l3d = str(fcObj.lane_3_library.library_id) + '|' \ + + fcObj.lane_3_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l4d = str(fcObj.lane_4_library.library_id) + '|' \ + + fcObj.lane_4_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + + l5d = str(fcObj.lane_5_library.library_id) + '|' \ + + fcObj.lane_5_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l6d = str(fcObj.lane_6_library.library_id) + '|' \ + + fcObj.lane_6_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l7d = str(fcObj.lane_7_library.library_id) + '|' \ + + fcObj.lane_7_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + l8d = str(fcObj.lane_8_library.library_id) + '|' \ + + fcObj.lane_8_library.library_name.replace('\r\n', '\n').replace('\r', '\n').replace('%', '%%') + + # Turn new lines into indented commented newlines + l1d = l1d.replace('\n', '\n# ') + l2d = l2d.replace('\n', '\n# ') + l3d = l3d.replace('\n', '\n# ') + l4d = l4d.replace('\n', '\n# ') + l5d = l5d.replace('\n', '\n# ') + l6d = l6d.replace('\n', '\n# ') + l7d = l7d.replace('\n', '\n# ') + l8d = l8d.replace('\n', '\n# ') + + data.append("# Lane1: %s" % (l1d)) + data.append("# Lane2: %s" % (l2d)) + data.append("# Lane3: %s" % (l3d)) + data.append("# Lane4: %s" % (l4d)) + data.append("# Lane5: %s" % (l5d)) + data.append("# Lane6: %s" % (l6d)) + data.append("# Lane7: %s" % (l7d)) + data.append("# Lane8: %s" % (l8d)) + + #data.append("GENOME_DIR %s" % (BASE_DIR)) + #data.append("CONTAM_DIR %s" % (BASE_DIR)) + read_length = fcObj.read_length + #data.append("ELAND_REPEAT") + data.append("ELAND_MULTIPLE_INSTANCES 8") + + #Construct genome dictionary to figure out what lanes to put + # in the config file. + genome_dict = {} + + #l1s = form['lane1_species'] + l1s = fcObj.lane_1_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_1_library.library_species.use_genome_build + genome_dict.setdefault(l1s, []).append('1') + l2s = fcObj.lane_2_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_2_library.library_species.use_genome_build + genome_dict.setdefault(l2s, []).append('2') + l3s = fcObj.lane_3_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_3_library.library_species.use_genome_build + genome_dict.setdefault(l3s, []).append('3') + l4s = fcObj.lane_4_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_4_library.library_species.use_genome_build + genome_dict.setdefault(l4s, []).append('4') + l5s = fcObj.lane_5_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_5_library.library_species.use_genome_build + genome_dict.setdefault(l5s, []).append('5') + l6s = fcObj.lane_6_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_6_library.library_species.use_genome_build + genome_dict.setdefault(l6s, []).append('6') + l7s = fcObj.lane_7_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_7_library.library_species.use_genome_build + genome_dict.setdefault(l7s, []).append('7') + l8s = fcObj.lane_8_library.library_species.scientific_name #+ '|' + \ + #fcObj.lane_8_library.library_species.use_genome_build + genome_dict.setdefault(l8s, []).append('8') + + genome_list = genome_dict.keys() + genome_list.sort() + + #Loop through and create entries for each species. + for genome in genome_list: + lanes = ''.join(genome_dict[genome]) + if fcObj.paired_end: + data.append('%s:ANALYSIS eland_pair' % (lanes)) + else: + data.append('%s:ANALYSIS eland_extended' % (lanes)) + data.append('%s:READ_LENGTH %s' % (lanes, read_length)) + data.append('%s:ELAND_GENOME %s' % (lanes, '%%(%s)s' % (genome))) + data.append('%s:USE_BASES %s' % (lanes, 'Y'*int(read_length))) + + data.append('SEQUENCE_FORMAT --fastq') + + data = '\n'.join(data) + + f = open(file_path, 'w') + f.write(data) + f.close() + + return data + + + +def config(request, flowcell=None): + """ + Returns eland config file for a given flowcell number, + or returns a list of available flowcell numbers. + """ + + # Provide INDEX of available Flowcell config files. + if flowcell is None: + #Find all FC* config files and report an index html file + #fc_list = [ os.path.split(file_path)[1] for file_path in glob.glob(os.path.join(settings.UPLOADTO_CONFIG_FILE, 'FC*')) ] + fc_list = [ fc.flowcell_id for fc in models.FlowCell.objects.all() ] + + #Convert FC* list to html links + fc_html = [ '%s' % (fc_name, fc_name) for fc_name in fc_list ] + + return HttpResponse('
'.join(fc_html)) + + #FIXME: Should validate flowcell input before using. + flowcell = _validate_input(flowcell) + cfg = getElandConfig(flowcell, regenerate=True) + + if not cfg: + return HttpResponse("Hmm, config file for %s does not seem to exist." % (flowcell)) + + + return HttpResponse(cfg, mimetype="text/plain") + + + + +#def index(request): +# """ +# Return a form for filling out information about the flowcell +# """ +# if request.method == 'POST': +# form = forms.ConfigForm(request.POST, error_class=forms.DivErrorList) +# if form.is_valid(): +# #cfg = generateElandConfig(form) +# _saveConfigFile(form) +# _saveToDb(form) +# return HttpResponse("Eland Config Saved!", mimetype="text/plain") +# else: +# return render_to_response('config_form.html', {'form': form }) +# +# else: +# fm = forms.ConfigForm(error_class=forms.DivErrorList) +# return render_to_response('config_form.html', {'form': fm }) diff --git a/trunk/htsworkflow/frontend/experiments/__init__.py b/trunk/htsworkflow/frontend/experiments/__init__.py new file mode 100755 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/experiments/admin.py b/trunk/htsworkflow/frontend/experiments/admin.py new file mode 100644 index 0000000..4e9074b --- /dev/null +++ b/trunk/htsworkflow/frontend/experiments/admin.py @@ -0,0 +1,80 @@ +from htsworkflow.frontend.experiments.models import FlowCell, DataRun, ClusterStation, Sequencer +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +class DataRunOptions(admin.ModelAdmin): + search_fields = [ + 'run_folder', + 'run_note', + 'config_params', + '=fcid__lane_1_library__library_id', + '=fcid__lane_2_library__library_id', + '=fcid__lane_3_library__library_id', + '=fcid__lane_4_library__library_id', + '=fcid__lane_5_library__library_id', + '=fcid__lane_6_library__library_id', + '=fcid__lane_7_library__library_id', + '=fcid__lane_8_library__library_id' + 'fcid__lane_1_library__library_name', + 'fcid__lane_2_library__library_name', + 'fcid__lane_3_library__library_name', + 'fcid__lane_4_library__library_name', + 'fcid__lane_5_library__library_name', + 'fcid__lane_6_library__library_name', + 'fcid__lane_7_library__library_name', + 'fcid__lane_8_library__library_name' ] + list_display = [ + 'run_folder', + 'Flowcell_Info', + 'run_start_time', + 'main_status', + 'run_note', + ] + list_filter = ('run_status', 'run_start_time') + +class FlowCellOptions(admin.ModelAdmin): + date_hierarchy = "run_date" + save_on_top = True + search_fields = ('flowcell_id', + 'sequencer__name', + 'cluster_station__name', + '=lane_1_library__library_id', + '=lane_2_library__library_id', + '=lane_3_library__library_id', + '=lane_4_library__library_id', + '=lane_5_library__library_id', + '=lane_6_library__library_id', + '=lane_7_library__library_id', + '=lane_8_library__library_id', + 'lane_1_library__library_name', + 'lane_2_library__library_name', + 'lane_3_library__library_name', + 'lane_4_library__library_name', + 'lane_5_library__library_name', + 'lane_6_library__library_name', + 'lane_7_library__library_name', + 'lane_8_library__library_name') + list_display = ('flowcell_id','run_date','Lanes') + list_filter = ('sequencer','cluster_station') + fieldsets = ( + (None, { + 'fields': ('run_date', ('flowcell_id','cluster_station','sequencer'), ('read_length', 'paired_end'),) + }), + ('Lanes:', { + 'fields' : (('lane_1_library', 'lane_1_pM', 'lane_1_cluster_estimate'), ('lane_2_library', 'lane_2_pM', 'lane_2_cluster_estimate'), ('lane_3_library', 'lane_3_pM', 'lane_3_cluster_estimate'), ('lane_4_library', 'lane_4_pM', 'lane_4_cluster_estimate'), ('lane_5_library', 'lane_5_pM', 'lane_5_cluster_estimate'), ('lane_6_library', 'lane_6_pM', 'lane_6_cluster_estimate'), ('lane_7_library', 'lane_7_pM', 'lane_7_cluster_estimate'), ('lane_8_library', 'lane_8_pM', 'lane_8_cluster_estimate'),) + }), + ('Notes:', { 'fields': ('notes',),}), + ) + +class ClusterStationOptions(admin.ModelAdmin): + list_display = ('name', ) + fieldsets = ( ( None, { 'fields': ( 'name', ) } ), ) + +class SequencerOptions(admin.ModelAdmin): + list_display = ('name', ) + fieldsets = ( ( None, { 'fields': ( 'name', ) } ), ) + +admin.site.register(DataRun, DataRunOptions) +admin.site.register(FlowCell, FlowCellOptions) +admin.site.register(ClusterStation, ClusterStationOptions) +admin.site.register(Sequencer, SequencerOptions) diff --git a/trunk/htsworkflow/frontend/experiments/experiments.py b/trunk/htsworkflow/frontend/experiments/experiments.py new file mode 100755 index 0000000..ca224a9 --- /dev/null +++ b/trunk/htsworkflow/frontend/experiments/experiments.py @@ -0,0 +1,199 @@ +# some core functions of the exp tracker module +from django.http import HttpResponse +from datetime import datetime +from string import * +import re +from htsworkflow.frontend import settings +from htsworkflow.frontend.experiments.models import FlowCell, DataRun +from htsworkflow.frontend.samples.models import Library +from django.core.exceptions import ObjectDoesNotExist +from django.core.mail import send_mail, mail_admins + +def updStatus(request): + output='' + user = 'none' + pswd = '' + UpdatedStatus = 'unknown' + fcid = 'none' + runfolder = 'unknown' + ClIP = request.META['REMOTE_ADDR'] + granted = False + + if request.has_key('user'): + user = request['user'] + + #Check access permission + if (user == 'rami' and settings.ALLOWED_IPS.has_key(ClIP)): granted = True + if not granted: return HttpResponse("access denied.") + + + # ~~~~~~Parameters for the job ~~~~ + if request.has_key('fcid'): + fcid = request['fcid'] + else: + return HttpResponse('missing fcid') + + if request.has_key('runf'): + runfolder = request['runf'] + else: + return HttpResponse('missing runf') + + + if request.has_key('updst'): + UpdatedStatus = request['updst'] + else: + return HttpResponse('missing status') + + # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + # Update Data Run status in DB + # Try get rec. If not found return 'entry not found + ', if found try update and return updated + try: + rec = DataRun.objects.get(run_folder=runfolder) + rec.run_status = UpdatedStatus + + #if there's a message update that too + mytimestamp = datetime.now().__str__() + mytimestamp = re.sub(pattern=":[^:]*$",repl="",string=mytimestamp) + if request.has_key('msg'): + rec.run_note += ", "+request['msg']+" ("+mytimestamp+")" + else : + if UpdatedStatus == '1': + rec.run_note = "Started ("+mytimestamp+")" + + rec.save() + output = "Hello "+settings.ALLOWED_IPS[ClIP]+". Updated to:'"+DataRun.RUN_STATUS_CHOICES[int(UpdatedStatus)][1].__str__()+"'" + except ObjectDoesNotExist: + output = "entry not found: "+fcid+", "+runfolder + + + #Notify researcher by email + # Doesn't work + #send_mail('Exp Tracker', 'Data Run Status '+output, 'rrauch@stanford.edu', ['rrrami@gmail.com'], fail_silently=False) + #mail_admins("test subject", "testing , testing", fail_silently=False) + # gives error: (49, "Can't assign requested address") + return HttpResponse(output) + +def generateConfile(request,fcid): + #granted = False + #ClIP = request.META['REMOTE_ADDR'] + #if (settings.ALLOWED_IPS.has_key(ClIP)): granted = True + + #if not granted: return HttpResponse("access denied.") + + cnfgfile = 'READ_LENGTH 25\n' + cnfgfile += 'ANALYSIS eland\n' + cnfgfile += 'GENOME_FILE all_chr.fa\n' + cnfgfile += 'ELAND_MULTIPLE_INSTANCES 8\n' + genome_dir = 'GENOME_DIR /Volumes/Genomes/' + eland_genome = 'ELAND_GENOME /Volumes/Genomes/' + + try: + rec = FlowCell.objects.get(flowcell_id=fcid) + + cnfgfile += '1:'+genome_dir+rec.lane_1_library.library_species.use_genome_build+'\n' + cnfgfile += '1:'+eland_genome+rec.lane_1_library.library_species.use_genome_build+'\n' + + cnfgfile += '2:'+genome_dir+rec.lane_2_library.library_species.use_genome_build+'\n' + cnfgfile += '2:'+eland_genome+rec.lane_2_library.library_species.use_genome_build+'\n' + + cnfgfile += '3:'+genome_dir+rec.lane_3_library.library_species.use_genome_build+'\n' + cnfgfile += '3:'+eland_genome+rec.lane_3_library.library_species.use_genome_build+'\n' + + cnfgfile += '4:'+genome_dir+rec.lane_4_library.library_species.use_genome_build+'\n' + cnfgfile += '4:'+eland_genome+rec.lane_4_library.library_species.use_genome_build+'\n' + + cnfgfile += '5:'+genome_dir+rec.lane_5_library.library_species.use_genome_build+'\n' + cnfgfile += '5:'+eland_genome+rec.lane_5_library.library_species.use_genome_build+'\n' + + cnfgfile += '6:'+genome_dir+rec.lane_6_library.library_species.use_genome_build+'\n' + cnfgfile += '6:'+eland_genome+rec.lane_6_library.library_species.use_genome_build+'\n' + + cnfgfile += '7:'+genome_dir+rec.lane_7_library.library_species.use_genome_build+'\n' + cnfgfile += '7:'+eland_genome+rec.lane_7_library.library_species.use_genome_build+'\n' + + cnfgfile += '8:'+genome_dir+rec.lane_8_library.library_species.use_genome_build+'\n' + cnfgfile += '8:'+eland_genome+rec.lane_8_library.library_species.use_genome_build + + except ObjectDoesNotExist: + cnfgfile = 'Entry not found for fcid = '+fcid + + return cnfgfile + +def getConfile(req): + granted = False + ClIP = req.META['REMOTE_ADDR'] + if (settings.ALLOWED_IPS.has_key(ClIP)): granted = True + + if not granted: return HttpResponse("access denied. IP: "+ClIP) + + fcid = 'none' + cnfgfile = 'Nothing found' + runfolder = 'unknown' + request = req.REQUEST + print request, dir(request) + print request['fcid'], request.has_key('fcid') + print request['runf'] + if request.has_key('fcid'): + fcid = request['fcid'] + if request.has_key('runf'): + runfolder = request['runf'] + try: + rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid) + cnfgfile = rec.config_params + #match_str = re.compile(r"READ_LENGTH.+$") + match_str = re.compile('^READ_LENGTH.+') + if not match_str.search(cnfgfile): + cnfgfile = generateConfile(request,fcid) + if match_str.search(cnfgfile): + rec = DataRun.objects.get(run_folder=runfolder) #,flowcell_id=fcid) + rec.config_params = cnfgfile + rec.save() + else: + cnfgfile = 'Failed generating config params for RunFolder = '+runfolder +', Flowcell id = '+ fcid+ ' Config Text:\n'+cnfgfile + + except ObjectDoesNotExist: + cnfgfile = 'Entry not found for RunFolder = '+runfolder + + return HttpResponse(cnfgfile, mimetype='text/plain') + +def getLaneLibs(req): + granted = False + ClIP = req.META['REMOTE_ADDR'] + if (settings.ALLOWED_IPS.has_key(ClIP)): granted = True + + if not granted: return HttpResponse("access denied.") + + request = req.REQUEST + fcid = 'none' + outputfile = '' + if request.has_key('fcid'): + fcid = request['fcid'] + try: + rec = FlowCell.objects.get(flowcell_id=fcid) + #Ex: 071211 + year = datetime.today().year.__str__() + year = replace(year,'20','') + month = datetime.today().month + if month < 10: month = "0"+month.__str__() + else: month = month.__str__() + day = datetime.today().day + if day < 10: day = "0"+day.__str__() + else: day = day.__str__() + mydate = year+month+day + outputfile = '' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + outputfile += '\n' + except ObjectDoesNotExist: + outputfile = 'Flowcell entry not found for: '+fcid + else: outputfile = 'Missing input: flowcell id' + + return HttpResponse(outputfile, mimetype='text/plain') diff --git a/trunk/htsworkflow/frontend/experiments/models.py b/trunk/htsworkflow/frontend/experiments/models.py new file mode 100755 index 0000000..1ea00d9 --- /dev/null +++ b/trunk/htsworkflow/frontend/experiments/models.py @@ -0,0 +1,161 @@ +from django.db import models +from htsworkflow.frontend.samples.models import * +from htsworkflow.frontend.settings import options +from django.core.exceptions import ObjectDoesNotExist +import logging + +class ClusterStation(models.Model): + name = models.CharField(max_length=50, unique=True) + + def __unicode__(self): + return unicode(self.name) + +class Sequencer(models.Model): + name = models.CharField(max_length=50, unique=True) + + def __unicode__(self): + return unicode(self.name) + +default_pM = 5 +try: + default_pM = int(options.get('frontend', 'default_pm')) +except ValueError,e: + logging.error("invalid value for frontend.default_pm") + +class FlowCell(models.Model): + + flowcell_id = models.CharField(max_length=20, unique=True, db_index=True) + run_date = models.DateTimeField() + advanced_run = models.BooleanField(default=False) + paired_end = models.BooleanField(default=False) + read_length = models.IntegerField(default=32) #Stanford is currenlty 25 + + lane_1_library = models.ForeignKey(Library, related_name="lane_1_library") + lane_2_library = models.ForeignKey(Library, related_name="lane_2_library") + lane_3_library = models.ForeignKey(Library, related_name="lane_3_library") + lane_4_library = models.ForeignKey(Library, related_name="lane_4_library") + lane_5_library = models.ForeignKey(Library, related_name="lane_5_library") + lane_6_library = models.ForeignKey(Library, related_name="lane_6_library") + lane_7_library = models.ForeignKey(Library, related_name="lane_7_library") + lane_8_library = models.ForeignKey(Library, related_name="lane_8_library") + + lane_1_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_2_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_3_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_4_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_5_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_6_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_7_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + lane_8_pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM) + + lane_1_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_2_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_3_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_4_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_5_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_6_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_7_cluster_estimate = models.IntegerField(blank=True, null=True) + lane_8_cluster_estimate = models.IntegerField(blank=True, null=True) + + # lane_1_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_1_primer") + # lane_2_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_2_primer") + # lane_3_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_3_primer") + # lane_4_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_4_primer") + # lane_5_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_5_primer") + # lane_6_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_6_primer") + # lane_7_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_7_primer") + # lane_8_primer = models.ForeignKey(Primer,blank=True,null=True,related_name="lane_8_primer") + + cluster_station = models.ForeignKey(ClusterStation, default=1) + sequencer = models.ForeignKey(Sequencer, default=1) + + notes = models.TextField(blank=True) + + def __unicode__(self): + return unicode(self.flowcell_id) + + def Create_LOG(self): + str = '' + str +='Create LOG' + try: + t = DataRun.objects.get(fcid=self.id) + str +='
DataRun ..' + except ObjectDoesNotExist: + str += '
not sequenced' + return str + Create_LOG.allow_tags = True + + def Lanes(self): + library_url = '/admin/samples/library/%s' + html = [''] + for i in range(1,9): + cluster_estimate = getattr(self, 'lane_%d_cluster_estimate' % (i,)) + if cluster_estimate is not None: + cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), ) + else: + cluster_estimate = 'None' + library_id = getattr(self, 'lane_%d_library_id' % (i,)) + library = getattr(self, 'lane_%d_library' % i) + element = '' + expanded_library_url = library_url %(library_id,) + html.append(element % (i, expanded_library_url, library, cluster_estimate)) + html.append('
%d%s%s
') + return "\n".join(html) + Lanes.allow_tags = True + + class Meta: + ordering = ["-run_date"] + +### ----------------------- +class DataRun(models.Model): + ConfTemplate = "CONFIG PARAMS WILL BE GENERATED BY THE PIPELINE SCRIPT.\nYOU'LL BE ABLE TO EDIT AFTER IF NEEDED." + run_folder = models.CharField(max_length=50,unique=True, db_index=True) + fcid = models.ForeignKey(FlowCell,verbose_name="Flowcell Id") + config_params = models.TextField(default=ConfTemplate) + run_start_time = models.DateTimeField() + RUN_STATUS_CHOICES = ( + (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'), + (1, 'Data Pipeline Started'), + (2, 'Data Pipeline Interrupted'), + (3, 'Data Pipeline Finished'), + (4, 'CollectReads Started'), + (5, 'CollectReads Finished'), + (6, 'QC Finished'), + (7, 'DONE'), + ) + run_status = models.IntegerField(choices=RUN_STATUS_CHOICES, default=0) + run_note = models.TextField(blank=True) + + + def main_status(self): + str = '= 5: + str += ' style="color:green">' + str += ''+self.RUN_STATUS_CHOICES[self.run_status][1]+'' + str += '

' #New!' + str +='
View QC Page' + else: + str += '>'+self.RUN_STATUS_CHOICES[self.run_status][1] + + str += '' + return str + main_status.allow_tags = True + + main_status.allow_tags = True + + def Flowcell_Info(self): + str = ''+self.fcid.__str__()+'' + str += ' (c: '+self.fcid.cluster_mac_id+', s: '+self.fcid.seq_mac_id+')' + str += '
' + str +='View/hide lanes' + str += '
' + LanesList = '1: '+self.fcid.lane_1_library.__str__()+' ('+self.fcid.lane_1_library.library_species.use_genome_build+')
2: '+self.fcid.lane_2_library.__str__()+' ('+self.fcid.lane_2_library.library_species.use_genome_build+')
3: '+self.fcid.lane_3_library.__str__()+' ('+self.fcid.lane_3_library.library_species.use_genome_build+')
4: '+self.fcid.lane_4_library.__str__()+' ('+self.fcid.lane_4_library.library_species.use_genome_build+')
5: '+self.fcid.lane_5_library.__str__()+' ('+self.fcid.lane_5_library.library_species.use_genome_build+')
6: '+self.fcid.lane_6_library.__str__()+' ('+self.fcid.lane_6_library.library_species.use_genome_build+')
7: '+self.fcid.lane_7_library.__str__()+' ('+self.fcid.lane_7_library.library_species.use_genome_build+')
8: '+self.fcid.lane_8_library.__str__()+' ('+self.fcid.lane_8_library.library_species.use_genome_build+')' + str += LanesList ## self.fcid.Lanes() + str += '
' + str += '
Edit Flowcell record' + #str += 'New!' + str +='GA LOG Page' + str += '
' + str += '
' + return str + Flowcell_Info.allow_tags = True diff --git a/trunk/htsworkflow/frontend/experiments/urls.py b/trunk/htsworkflow/frontend/experiments/urls.py new file mode 100755 index 0000000..c4df6a8 --- /dev/null +++ b/trunk/htsworkflow/frontend/experiments/urls.py @@ -0,0 +1,12 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('', + + (r'^$', 'htsworkflow.frontend.experiments.views.index'), + #(r'^liblist$', 'htsworkflow.frontend.experiments.views.test_Libs'), + #(r'^(?P.+)/$', 'gaworkflow.frontend.experiments.views.detail'), + (r'^(?P.+)/$', 'htsworkflow.frontend.experiments.views.makeFCSheet'), + (r'^updStatus$', 'htsworkflow.frontend.experiments.experiments.updStatus'), + (r'^getConfile$', 'htsworkflow.frontend.experiments.experiments.getConfile'), + (r'^getLanesNames$', 'htsworkflow.frontend.experiments.experiments.getLaneLibs') +) diff --git a/trunk/htsworkflow/frontend/experiments/views.py b/trunk/htsworkflow/frontend/experiments/views.py new file mode 100755 index 0000000..a2d14bb --- /dev/null +++ b/trunk/htsworkflow/frontend/experiments/views.py @@ -0,0 +1,34 @@ +# Create your views here. +#from django.template import Context, loader +#shortcut to the above modules +from django.shortcuts import render_to_response, get_object_or_404 +from htsworkflow.frontend.experiments.models import * +from django.http import HttpResponse +from django.core.exceptions import ObjectDoesNotExist + +def index(request): + all_runs = DataRun.objects.order_by('-run_start_time') + #t = loader.get_template('experiments/index.html') + #c = Context({ + # 'data_run_list': all_runs, + #}) + #return HttpResponse(t.render(c)) + # shortcut to the above module usage + return render_to_response('experiments/index.html',{'data_run_list': all_runs}) + +def detail(request, run_folder): + html_str = '

Exp Track Details Page

' + html_str += 'Run Folder: '+run_folder + r = get_object_or_404(DataRun,run_folder=run_folder) + return render_to_response('experiments/detail.html',{'run_f': r}) + +def makeFCSheet(request,fcid): + # get Flowcell by input fcid + # ... + rec = None + try: + rec = FlowCell.objects.get(flowcell_id=fcid) + except ObjectDoesNotExist: + pass + lanes = ['1','2','3','4','5','6','7','8'] + return render_to_response('experiments/flowcellSheet.html',{'fc': rec}) diff --git a/trunk/htsworkflow/frontend/inventory/__init__.py b/trunk/htsworkflow/frontend/inventory/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/inventory/admin.py b/trunk/htsworkflow/frontend/inventory/admin.py new file mode 100644 index 0000000..4e17177 --- /dev/null +++ b/trunk/htsworkflow/frontend/inventory/admin.py @@ -0,0 +1,35 @@ +from django.contrib import admin + +from htsworkflow.frontend.inventory.models import Item, ItemInfo, ItemType, Vendor, Location, LongTermStorage, ItemStatus + +class ItemAdmin(admin.ModelAdmin): + list_display = ('uuid', 'barcode_id','item_type', 'item_info', 'location', 'force_use_uuid', 'creation_date') + list_filter = ( + 'item_type', + ) + +class ItemInfoAdmin(admin.ModelAdmin): + pass + +class ItemTypeAdmin(admin.ModelAdmin): + pass + +class VendorAdmin(admin.ModelAdmin): + pass + +class LocationAdmin(admin.ModelAdmin): + pass + +class LongTermStorageAdmin(admin.ModelAdmin): + pass + +class ItemStatusAdmin(admin.ModelAdmin): + pass + +admin.site.register(Item, ItemAdmin) +admin.site.register(ItemInfo, ItemInfoAdmin) +admin.site.register(ItemType, ItemTypeAdmin) +admin.site.register(Vendor, VendorAdmin) +admin.site.register(Location, LocationAdmin) +admin.site.register(LongTermStorage, LongTermStorageAdmin) +admin.site.register(ItemStatus, ItemStatusAdmin) diff --git a/trunk/htsworkflow/frontend/inventory/models.py b/trunk/htsworkflow/frontend/inventory/models.py new file mode 100644 index 0000000..8f54fc3 --- /dev/null +++ b/trunk/htsworkflow/frontend/inventory/models.py @@ -0,0 +1,126 @@ +from django.db import models +from django.db.models.signals import pre_save + +from htsworkflow.frontend.samples.models import Library +from htsworkflow.frontend.experiments.models import FlowCell + + +import uuid + +def _assign_uuid(sender, instance, **kwargs): + """ + Assigns a UUID to model on save + """ + print 'Entered _assign_uuid' + if instance.uuid is None or len(instance.uuid) != 32: + instance.uuid = uuid.uuid1().hex + + +class Vendor(models.Model): + name = models.CharField(max_length=256) + url = models.URLField(blank=True, null=True) + + def __unicode__(self): + return u"%s" % (self.name) + + +class Location(models.Model): + + name = models.CharField(max_length=256, unique=True) + location_description = models.TextField() + + uuid = models.CharField(max_length=32, blank=True, help_text="Leave blank for automatic UUID generation") + + notes = models.TextField(blank=True, null=True) + + def __unicode__(self): + if len(self.location_description) > 16: + return u"%s: %s" % (self.name, self.location_description[0:16]+u"...") + else: + return u"%s: %s" % (self.name, self.location_description) + +pre_save.connect(_assign_uuid, sender=Location) + +class ItemInfo(models.Model): + model_id = models.CharField(max_length=256, blank=True, null=True) + part_number = models.CharField(max_length=256, blank=True, null=True) + lot_number = models.CharField(max_length=256, blank=True, null=True) + + url = models.URLField(blank=True, null=True) + + qty_purchased = models.IntegerField(default=1) + + vendor = models.ForeignKey(Vendor) + purchase_date = models.DateField(blank=True, null=True) + warranty_months = models.IntegerField(blank=True, null=True) + + notes = models.TextField(blank=True, null=True) + + def __unicode__(self): + name = u'' + if self.model_id: + name += u"model:%s " % (self.model_id) + if self.part_number: + name += u"part:%s " % (self.part_number) + if self.lot_number: + name += u"lot:%s " % (self.lot_number) + + return u"%s: %s" % (name, self.purchase_date) + + +class ItemType(models.Model): + + name = models.CharField(max_length=64, unique=True) + description = models.TextField(blank=True, null=True) + + def __unicode__(self): + return u"%s" % (self.name) + +class ItemStatus(models.Model): + name = models.CharField(max_length=64, unique=True) + notes = models.TextField(blank=True, null=True) + + def __unicode__(self): + return self.name + +class Item(models.Model): + + item_type = models.ForeignKey(ItemType) + + #Automatically assigned uuid; used for barcode if one is not provided in + # barcode_id + uuid = models.CharField(max_length=32, blank=True, help_text="Leave blank for automatic UUID generation") + + # field for existing barcodes; used instead of uuid if provided + barcode_id = models.CharField(max_length=256, blank=True, null=True) + force_use_uuid = models.BooleanField(default=False) + + item_info = models.ForeignKey(ItemInfo) + + location = models.ForeignKey(Location) + + status = models.ForeignKey(ItemStatus, blank=True, null=True) + + creation_date = models.DateTimeField(auto_now_add=True) + modified_date = models.DateTimeField(auto_now=True) + + notes = models.TextField(blank=True, null=True) + + def __unicode__(self): + if self.barcode_id is None or len(self.barcode_id) == 0: + return u"invu|%s" % (self.uuid) + else: + return u"invb|%s" % (self.barcode_id) + +pre_save.connect(_assign_uuid, sender=Item) + + +class LongTermStorage(models.Model): + + flowcell = models.ForeignKey(FlowCell) + libraries = models.ManyToManyField(Library) + + storage_devices = models.ManyToManyField(Item) + + def __unicode__(self): + return u"%s: %s" % (str(self.flowcell), ', '.join([ str(s) for s in self.storage_devices.iterator() ])) \ No newline at end of file diff --git a/trunk/htsworkflow/frontend/inventory/urls.py b/trunk/htsworkflow/frontend/inventory/urls.py new file mode 100644 index 0000000..844208e --- /dev/null +++ b/trunk/htsworkflow/frontend/inventory/urls.py @@ -0,0 +1,5 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('', + (r'^lts/link/(?P.+)/(?P.+)/$', 'htsworkflow.frontend.inventory.views.link_flowcell_and_device'), + ) diff --git a/trunk/htsworkflow/frontend/inventory/views.py b/trunk/htsworkflow/frontend/inventory/views.py new file mode 100644 index 0000000..19bcb0b --- /dev/null +++ b/trunk/htsworkflow/frontend/inventory/views.py @@ -0,0 +1,117 @@ +from htsworkflow.frontend.inventory.models import Item, LongTermStorage +from htsworkflow.frontend.experiments.models import FlowCell + +from django.core.exceptions import ObjectDoesNotExist +from django.http import HttpResponse + + +def link_flowcell_and_device(request, flowcell, serial): + """ + Updates database records of a flowcell being archived on a device with a particular serial # + """ + assert flowcell is not None + assert serial is not None + + LTS_UPDATED = False + SD_UPDATED = False + LIBRARY_UPDATED = False + + ########################################### + # Retrieve Storage Device + try: + sd = Item.objects.get(barcode_id=serial) + except ObjectDoesNotExist, e: + msg = "Item with barcode_id of %s not found." % (serial) + raise ObjectDoesNotExist(msg) + + ########################################### + # Retrieve FlowCell + try: + fc = FlowCell.objects.get(flowcell_id=flowcell) + except ObjectDoesNotExist, e: + msg = "FlowCell with flowcell_id of %s not found." % (flowcell) + raise ObjectDoesNotExist(msg) + + ########################################### + # Retrieve or create LongTermStorage Object + count = fc.longtermstorage_set.count() + lts = None + if count > 1: + msg = "There really should only be one longtermstorage object per flowcell" + raise ValueError, msg + elif count == 1: + # lts already attached to flowcell + lts = fc.longtermstorage_set.all()[0] + else: + lts = LongTermStorage() + # Attach flowcell + lts.flowcell = fc + # Need a primary keey before linking to storage devices + lts.save() + LTS_UPDATED = True + + + ############################################ + # Link Storage to Flowcell + + # Add a link to this storage device if it is not already linked. + if sd not in lts.storage_devices.all(): + lts.storage_devices.add(sd) + SD_UPDATED = True + + ########################################### + # Add Library Links to LTS + + if fc.lane_1_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_1_library) + LIBRARY_UPDATED = True + print 1 + + if fc.lane_2_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_2_library) + LIBRARY_UPDATED = True + print 2 + + if fc.lane_3_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_3_library) + LIBRARY_UPDATED = True + print 3 + + if fc.lane_4_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_4_library) + LIBRARY_UPDATED = True + print 4 + + + if fc.lane_5_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_5_library) + LIBRARY_UPDATED = True + print 5 + + if fc.lane_6_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_6_library) + LIBRARY_UPDATED = True + print 6 + + if fc.lane_7_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_7_library) + LIBRARY_UPDATED = True + print 7 + + if fc.lane_8_library not in lts.libraries.all(): + lts.libraries.add(fc.lane_8_library) + LIBRARY_UPDATED = True + print 8 + + # Save Changes + lts.save() + + msg = ['Success:'] + if LTS_UPDATED or SD_UPDATED or LIBRARY_UPDATED: + msg.append(' LongTermStorage (LTS) Created: %s' % (LTS_UPDATED)) + msg.append(' Storage Device Linked to LTS: %s' % (SD_UPDATED)) + msg.append(' Libraries updated in LTS: %s' % (LIBRARY_UPDATED)) + else: + msg.append(' No Updates Needed.') + + return HttpResponse('\n'.join(msg)) \ No newline at end of file diff --git a/trunk/htsworkflow/frontend/manage.py b/trunk/htsworkflow/frontend/manage.py new file mode 100644 index 0000000..5e78ea9 --- /dev/null +++ b/trunk/htsworkflow/frontend/manage.py @@ -0,0 +1,11 @@ +#!/usr/bin/env python +from django.core.management import execute_manager +try: + import settings # Assumed to be in the same directory. +except ImportError: + import sys + sys.stderr.write("Error: Can't find the file 'settings.py' in the directory containing %r. It appears you've customized things.\nYou'll have to run django-admin.py, passing it your settings module.\n(If the file settings.py does indeed exist, it's causing an ImportError somehow.)\n" % __file__) + sys.exit(1) + +if __name__ == "__main__": + execute_manager(settings) diff --git a/trunk/htsworkflow/frontend/reports/LibraryInfo.xml b/trunk/htsworkflow/frontend/reports/LibraryInfo.xml new file mode 100644 index 0000000..ec630b5 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/LibraryInfo.xml @@ -0,0 +1,1214 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/trunk/htsworkflow/frontend/reports/__init__.py b/trunk/htsworkflow/frontend/reports/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/reports/admin.py b/trunk/htsworkflow/frontend/reports/admin.py new file mode 100644 index 0000000..b5a0eb2 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/admin.py @@ -0,0 +1,10 @@ +from htsworkflow.frontend.reports.models import ProgressReport +from django.contrib import admin +from django.utils.translation import ugettext_lazy as _ + +class ProgressReportOptions(admin.ModelAdmin): + list_display = ('Study','ab_batch','cell_line','library','sequencing','aligned_reads','QPCR','submit_to_DCC','submit_to_NCBI','interactome_complete') + ## list_filter = ('interactome_complete') + +admin.site.register(ProgressReport, ProgressReportOptions) + diff --git a/trunk/htsworkflow/frontend/reports/libinfopar.py b/trunk/htsworkflow/frontend/reports/libinfopar.py new file mode 100644 index 0000000..954fce6 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/libinfopar.py @@ -0,0 +1,103 @@ +from htsworkflow.frontend import settings +from django.http import HttpResponse +from datetime import datetime +from string import * +import re +from xml.sax import make_parser +from xml.sax.handler import ContentHandler +import urllib +import urllib2 +import os + +''' +Example library node from LibraryInfo.xml: + + + + + +''' +class LibInfoHandler(ContentHandler): + + def __init__ (self, searchTerm): + self.searchTerm= searchTerm + self.currlibid = '' + self.LanesCount, self.ReadsCount = 0, 0 + self.Msg = 'OK' + + def startElement(self, name, attrs): + try: + if name == 'Library': + self.currlibid = attrs.get('Name',"") + elif name == 'Track' and self.searchTerm == self.currlibid: + self.LanesCount += len(attrs.get('Lane',"")) + self.ReadsCount += int(attrs.get('Count',"")) + #else: + # self.Msg += ' | name = '+name+', currlibid = '+ self.currlibid + except: + self.Msg = 'failed parsing xml file' + return + + #def characters (self, ch): + # return .. + + #def endElement(self, name): + # return .. + + +## TO DO: Change this to read the LibraryInfo.xml only ONCE per ReoprtRequest (do it in the models.py). + Read it directly from the analysis_server + +def getLibReads(libid): + searchTerm= libid + parser = make_parser() + curHandler = LibInfoHandler(searchTerm) + parser.setContentHandler(curHandler) + reports_dir = os.path.split(__file__)[0] + library_info = os.path.join(reports_dir, 'LibraryInfo.xml') + parser.parse(open(library_info)) + arRes = [] + arRes.append(curHandler.LanesCount) + arRes.append(curHandler.ReadsCount) + arRes.append(curHandler.Msg) + + return arRes + +def getWebPage(url,params): + pdata = urllib.urlencode(params) + req = urllib2.Request(url,pdata) + wpage = urllib2.urlopen(req) + restext = wpage.read() + wpage.close() + return restext + +def refreshLibInfoFile(request): + varStatus = 'getting conf file from exp trac server' + url = settings.TASKS_PROJS_SERVER+'/LibraryInfo.xml' + params = {} + readw = getWebPage(url,params) + # make sure file content starts as xml + match_str = re.compile('^<\?xml.+') + if match_str.search(readw): ##tempstr): + # Rename current file with timestamp + year = datetime.today().year.__str__() + year = replace(year,'20','') + month = datetime.today().month + if month < 10: month = "0"+month.__str__() + else: month = month.__str__() + day = datetime.today().day + if day < 10: day = "0"+day.__str__() + else: day = day.__str__() + mydate = year+month+day + folder_loc = '/htsworkflow/htswfrontend/htswfrontend' # DEV + #folder_loc = '/Library/WebServer/gaworkflow/gaworkflow/frontend' # PROD + folder = folder_loc+'/htsw_reports/LibInfo/' + os.rename(folder+'LibraryInfo.xml',folder+mydate+'_LibraryInfo.xml') + # create file in curret folder + file_path = os.path.join(folder,'LibraryInfo.xml') + f = open(file_path, 'w') + f.write(readw) + f.close() + varStatus = 'OK. LibraryInfo.xml refreshed at Web server.' + else: + varStatus = 'Failed reading valid LibraryInfo.xml server reply:\n'+readw + return HttpResponse(varStatus) diff --git a/trunk/htsworkflow/frontend/reports/models.py b/trunk/htsworkflow/frontend/reports/models.py new file mode 100644 index 0000000..b898537 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/models.py @@ -0,0 +1,238 @@ +from django.db import models +from django.db.models import Q +from django.core.exceptions import ObjectDoesNotExist +from datetime import datetime +from htsworkflow.frontend.samples.models import * +from htsworkflow.frontend.analysis.models import * +from htsworkflow.frontend.experiments.models import * +from string import * +from htsworkflow.frontend.reports.utils import * +import re +##from p1 import LibInfo +from libinfopar import * + +## This is a table based REPORT generator. The goal is to display a Progress Report for all the ENCODE projects, based on Study Name (e.g. NRSF, FOXP2, Methy-Seq on .. etc). + +class ProgressReport(models.Model): + st_sbj = models.ForeignKey(Project,limit_choices_to = Q(project_name__startswith='ENCODE '),related_name='project',db_index=True,verbose_name="Studied Subject") + interactome_complete = models.BooleanField(default=False) + + def Study(self): + str = self.st_sbj.__str__() + str += '

' + str += 'Edit Project' + return str + Study.allow_tags = True + + def submit_to_DCC(self): + varText = '' + if self.note_about_DCC: + varText += '
Note:
'+self.note_about_DCC + return '%s
%s' % (self.submitted_to_DCC,varText) + submit_to_DCC.allow_tags = True + + def submit_to_NCBI(self): + varText = '' + if self.note_about_NCBI: + varText += '
Note:
'+self.note_about_NCBI + return '%s
%s' % (self.submitted_to_NCBI,varText) + submit_to_NCBI.allow_tags = True + + ## -- Utility functions <-- This method was transfered to untils.py + + ## --- LIBARAY PREPARATION SECTION + def getLibIds(self): + ptasks = self.st_sbj.tasks.distinct() + arLibs = [] + for t in ptasks: + if t.subject1 is not None: + arLibs.append(t.subject1.library_id) + if t.subject2 is not None: + arLibs.append(t.subject2.library_id) + arLibs = unique(arLibs) + return arLibs #.sort() + + def getFCInfo(self,libid): ## This is the haviest function + arFCLanes = [] + ##Test return arFCLanes + # can't get this to work: FC_L1 = FlowCell.objects.filter(lane_5_library__exact=libid) + allFCs = FlowCell.objects.all() + for f in allFCs: + entry = '' + lanes = [] + #found = False +# for i in range(1,9): +# if eval('f.lane_'+i.__str__()+'_library.library_id==libid'): +# lanes.append(i.__str__()) +# found = True + +# maybe a bit faster this way: + if f.lane_1_library.library_id==libid: + lanes.append('1') + #found = True + if f.lane_2_library.library_id==libid: + lanes.append('2') + #found = True + if f.lane_3_library.library_id==libid: + lanes.append('3') + #found = True + if f.lane_4_library.library_id==libid: + lanes.append('4') + #found = True + if f.lane_5_library.library_id==libid: + lanes.append('5') + #found = True + if f.lane_6_library.library_id==libid: + lanes.append('6') + #found = True + if f.lane_7_library.library_id==libid: + lanes.append('7') + #found = True + if f.lane_8_library.library_id==libid: + lanes.append('8') + #found = True + + + #if found: + if len(lanes)>0: + rundate = re.sub(pattern="\s.*$",repl="",string=f.run_date.__str__()) + entry = ''+f.flowcell_id + ' Lanes No.: '+','.join(lanes)+' ('+rundate+')' + arFCLanes.append(entry) + if len(arFCLanes)==0: + arFCLanes.append('Flowcell not found.') + return arFCLanes + + def ab_batch(self): + ## To have the Company's lot number, apearing on the (source) tube, we need to add new Field in Library. + arlibs = self.getLibIds() + tstr = '
    ' ##Ab from '+len(arlibs).__str__()+' libs: ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l,antibody__isnull=False) + arRows.append('
  • '+rec.antibody.antibodies+' for '+rec.antibody.antigene+' (src:'+rec.antibody.source+', cat:'+rec.antibody.catalog+')
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(unique(arRows))+'
' + return tstr + ab_batch.allow_tags = True + + def cell_line(self): + arlibs = self.getLibIds() + tstr = '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.cell_line.cellline_name+' ('+rec.condition.condition_name+')
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(unique(arRows))+'
' + return tstr + cell_line.allow_tags = True + + def cell_harvest_batch(self): # <- data now displayed in "cell_line" + ## name + date + arlibs = self.getLibIds() + tstr = '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.condition.condition_name+'
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(unique(arRows))+'
' + return tstr + cell_harvest_batch.allow_tags = True + + def ChIP_made(self): + ## person + date + return '...' + + def library(self): + ## Lib Id + Date + Person + tstr = '' + arlibs = self.getLibIds() ##.sort() + arlibs = arlibs + tstr +='view /hide' + tstr += '
    ' + arRows = [] + for l in arlibs: + try: + rec = Library.objects.get(library_id=l) + arRows.append('
  • '+rec.library_id+': '+rec.library_name+'.
    Made By: '+rec.made_by+', On: '+ rec.creation_date.__str__()+'
  • ') + except ObjectDoesNotExist: + tstr += "" + tstr += "".join(unique(arRows))+'
' + return tstr + library.allow_tags = True + + + ## -- SEQUENCING SECTION + def sequencing(self): + ## FCId + Lane + Date + arlibs = self.getLibIds() + tstr ='view /hide' + tstr += '
    ' + for l in arlibs: + tstr += '
  • '+l+':
    '+(' / '.join(self.getFCInfo(l)))+'
  • ' + tstr += '
' + return tstr + sequencing.allow_tags = True + + def aligned_reads(self): + ## Mega reads/lane + arlibs = self.getLibIds() + tstr = 'view /hide' + tstr += '
' + tstr += '' + LanesCnt, ReadsCnt = 0, 0 + for l in arlibs: + res = getLibReads(l) + LanesCnt += res[0] + ReadsCnt += res[1] + rc = "%1.2f" % (res[1]/1000000.0) + tstr += '' + tstr += '
Library IdTotal LanesM Reads
'+l+''+res[0].__str__()+''+rc+'
' + #tstr += 'Project results page' + tstr += '
' + myNum = (ReadsCnt/1000000.0) + myNum = "%1.2f" % (myNum) + tstr += '
Total: '+LanesCnt.__str__()+' lanes and '+myNum+' M Reads
' + tstr += 'Project results page' + return tstr + aligned_reads.allow_tags = True + + def peak_calling(self): + # date + what etc.. + return 'coming up ..' + + QPCR = models.CharField(max_length=500,blank=True,null=True) + submitted_to_DCC = models.DateTimeField(blank=True,null=True) + submitted_to_NCBI = models.DateTimeField(blank=True,null=True) + note_about_DCC = models.TextField(blank=True) + note_about_NCBI = models.TextField(blank=True) + + def __str__(self): + return '"%s" - %s' % (self.st_sbj,self.interactome_complete) + + class Meta: + #verbose_name_plural = "Reports" + ordering = ["id"] + + class Admin: + list_display = ('Study','ab_batch','cell_line','library','sequencing','aligned_reads','QPCR','submit_to_DCC','submit_to_NCBI','interactome_complete') + ## list_filter = ('interactome_complete') + + +############################################# diff --git a/trunk/htsworkflow/frontend/reports/reports.py b/trunk/htsworkflow/frontend/reports/reports.py new file mode 100755 index 0000000..30d9ad4 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/reports.py @@ -0,0 +1,308 @@ +from htsworkflow.frontend.experiments.models import * +from django.http import HttpResponse +from django.core.exceptions import ObjectDoesNotExist +from django.shortcuts import render_to_response, get_object_or_404 + +def getBgColor(reads_cnt,exp_type): + # Color Scheme: green is more than 12M, blue is more than 5M, orange is more than 3M and red is less. For RNAseq, all those thresholds are ~ double + bgcolor = '#ff3300' # Red is the color for minimum read counts + rc_thr = [12000000,5000000,3000000] # Default for ChIP-Seq and Methyl-Seq + if exp_type == 'RNA-seq': + rc_thr = [20000000,10000000,6000000] + + if reads_cnt > rc_thr[0]: + bgcolor = '#66ff66' # Green + else: + if reads_cnt > rc_thr[1]: + bgcolor ='#00ccff' # Blue + else: + if reads_cnt > rc_thr[2]: + bgcolor ='#ffcc33' # Orange + #tstr = '
' + #tstr += res[0].__str__()+' Lanes, '+rc+' M Reads' + #tstr += '
' + + return bgcolor + +def report1(request): + EXP = 'ChIP-seq' + + if request.GET.has_key('aflid'): + AFL_Id = request.GET['aflid'] + try: + AFL = Affiliation.objects.get(id=AFL_Id).name + AFL_CNT = Affiliation.objects.get(id=AFL_Id).contact + except ObjectDoesNotExist: + return HttpResponse("ERROR: Affiliation Record Not Found for: '"+AFL_ID+"'") + else: + AFL = 'ENCODE_Tier1' + AFL_CNT = '' + try: + AFL_Id = Affiliation.objects.get(name=AFL,contact=AFL_CNT).id.__str__() + except ObjectDoesNotExist: + return HttpResponse("ERROR: Affiliation Record Not Found for: '"+AFL+"'") + + TFall = Library.objects.values('antibody').order_by('antibody').distinct() + CLLall = Library.objects.values('cell_line').order_by('cell_line').distinct() + + TFs = TFall.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT) + CLLs = CLLall.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT) + + # Check Replicate numbers + Reps = 1 + RepRecs = Library.objects.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT).order_by('-replicate') + if len(RepRecs) > 0: Reps = RepRecs[0].replicate + + ######## + str = '' + str += 'Main Page' + ##str += 'Max Replicates: '+MaxRep.replicate.__str__()+'' + str += 'Select another '+EXP+' Report: ' + + str += 'color scheme: > 12 M > 5 M > 3 M < 3 M' + + str += '' + str += 'Switch to: '+AFL+' '+AFL_CNT+' RNA-Seq Report' + str += ' | ' + str += 'Methyl-Seq Report' + + bgc = '#ffffff' + pbgc = '#f7f7f7' + str += '

' + str += '' + str += '' + for H in CLLs: + str += '' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + for r in range(1,Reps+1): + str += '' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + str += '' + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + for r in range(1,Reps+1): + repReads = Library.objects.filter(experiment_type='INPUT_RXLCh',affiliations__name=AFL,affiliations__contact=AFL_CNT,cell_line=H['cell_line'].__str__(),replicate=r) + str += "' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + + for T in TFs: + str += '' + try: + if T['antibody']: + str += '' + except Antibody.DoesNotExist: + str += '' + + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + for r in range(1,Reps+1): + repReads = Library.objects.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT,cell_line=H['cell_line'].__str__(),antibody=T['antibody'].__str__(),replicate=r) + str += "' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + str += '
PROJECT'+AFL+' '+AFL_CNT+' '+EXP+'
CELL LINE'+Cellline.objects.get(id=H['cell_line']).cellline_name+'
TFRep. '+r.__str__()+'
Total Chromatin" + if len(repReads) == 0: + str += 'No Libraries' + else: + cnt = 0 + for R1 in repReads: + rres = R1.aligned_m_reads() + # Check data sanlty + if rres[2] != 'OK': + str += '
'+rres[2] + else: + cnt = rres[1] + if cnt > 0: + str += "
" + str += "%1.2f" % (cnt/1000000.0)+" M" + else: str += "
0 Reads" + str += "
"+R1.library_id+", "+R1.condition.nickname+"
" + str += "
" + str += '
'+Antibody.objects.get(id=T['antibody']).nickname+'n/a" + if len(repReads) == 0: + str += 'No Libraries' + else: + cnt = 0 + for R1 in repReads: + rres = R1.aligned_m_reads() + # Check data sanlty + if rres[2] != 'OK': + str += '
'+rres[2] + else: + cnt = rres[1] + if cnt > 0: + str += "
" + str += "%1.2f" % (cnt/1000000.0)+" M" + else: str += "
0 Reads" + str += "
"+R1.library_id+", "+R1.condition.nickname+"
" + str += "
" + str += '
' + + return render_to_response('reports/report.html',{'main': str}) + + +def report_RM(request): #for RNA-Seq and Methyl-Seq + EXP = 'RNA-seq' + + if request.GET.has_key('exp'): + EXP = request.GET['exp'] # Methyl-seq + + if request.GET.has_key('aflid'): + AFL_Id = request.GET['aflid'] + try: + AFL = Affiliation.objects.get(id=AFL_Id).name + AFL_CNT = Affiliation.objects.get(id=AFL_Id).contact + except ObjectDoesNotExist: + return HttpResponse("ERROR: Affiliation Record Not Found for: '"+AFL_ID+"'") + else: + AFL = 'ENCODE_Tier1' + AFL_CNT = '' + try: + AFL_Id = Affiliation.objects.get(name=AFL,contact=AFL_CNT).id.__str__() + except ObjectDoesNotExist: + return HttpResponse("ERROR: Affiliation Record Not Found for: '"+AFL+"'") + + CLLall = Library.objects.values('cell_line').order_by('cell_line').distinct() + CLLs = CLLall.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT) + + ######## + # Check Replicate numbers + Reps = 1 + RepRecs = Library.objects.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT).order_by('-replicate') + if len(RepRecs) > 0: Reps = RepRecs[0].replicate + + str = '' + str += 'Main Page' + str += 'Select another '+EXP+' Report: ' + + if EXP == 'RNA-seq': + str += 'color scheme: > 20 M > 10 M > 6 M < 6 M' + str += '' + str += 'Switch to: '+AFL+' '+AFL_CNT+' ChIP-Seq Report' + str += ' | ' + str += 'Methyl-Seq Report' + else: + str += 'color scheme: > 12 M > 5 M > 3 M < 3 M' + str += '' + str += 'Switch to: '+AFL+' '+AFL_CNT+' ChIP-Seq Report' + str += ' | ' + str += 'RNA-Seq Report' + + str += '

' + str += '' + str += '' + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + str += '' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + for r in range(1,Reps+1): + str += '' + tbgc = bgc + bgc = pbgc + pbgc = tbgc + str += '' + + str += '' + bgc = '#ffffff' + pbgc = '#f7f7f7' + for H in CLLs: + for r in range(1,Reps+1): + repReads = Library.objects.filter(experiment_type=EXP,affiliations__name=AFL,affiliations__contact=AFL_CNT,cell_line=H['cell_line'],replicate=r) + str += "' + str += "
'+AFL+' '+AFL_CNT+' '+EXP+'
'+Cellline.objects.get(id=H['cell_line']).cellline_name+'
Rep. '+r.__str__()+'
" + if len(repReads) == 0: + str += 'No Libraries' + else: + cnt = 0 + for R1 in repReads: + rres = R1.aligned_m_reads() + # Check data sanlty + if rres[2] != 'OK': + str += '
'+rres[2] + else: + cnt = rres[1] + if cnt > 0: + str += "
" + str += "%1.2f" % (cnt/1000000.0)+" M" + else: str += "
0 Reads" + str += "
"+R1.library_id+", "+R1.condition.nickname+", "+R1.library_species.common_name+"
" + str += "
\""+R1.library_name+"\"
'+f.Lanes()+''+f.run_date.__str__()+'
" + return render_to_response('reports/report.html',{'main':str}) + +def test_Libs(request): + str = '' + str += '' + allLibs = Library.objects.all() + #allLibs = Library.objects.filter(antibody__isnull=False) + for L in allLibs: + str += '' + str += '' + str += '' + + str += '
Lib IDCurrent Libaray Name (Free Text)Auto-composed Libaray Name (antibody + celline + libid + species + [replicate])
'+L.library_id+''+L.library_name+'' + str += L.experiment_type+'_' + if L.cell_line.cellline_name != 'Unknown': + str += L.cell_line.cellline_name+'_' + + try: + if L.antibody is not None: + str += L.antibody.nickname + '_' + except Antibody.DoesNotExist: + pass + + str += 'Rep'+L.replicate.__str__() + str += '
' + return HttpResponse(str) diff --git a/trunk/htsworkflow/frontend/reports/urls.py b/trunk/htsworkflow/frontend/reports/urls.py new file mode 100644 index 0000000..5a004f2 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/urls.py @@ -0,0 +1,9 @@ +from django.conf.urls.defaults import * + +urlpatterns = patterns('', + (r'^updLibInfo$', 'htsworkflow.frontend.reports.libinfopar.refreshLibInfoFile'), + (r'^report$', 'htsworkflow.frontend.reports.reports.report1'), + (r'^report_RM$', 'htsworkflow.frontend.reports.reports.report_RM'), + (r'^report_FCs$', 'htsworkflow.frontend.reports.reports.getNotRanFCs'), + (r'^liblist$', 'htsworkflow.frontend.reports.reports.test_Libs') +) diff --git a/trunk/htsworkflow/frontend/reports/utils.py b/trunk/htsworkflow/frontend/reports/utils.py new file mode 100644 index 0000000..7b2d1b8 --- /dev/null +++ b/trunk/htsworkflow/frontend/reports/utils.py @@ -0,0 +1,61 @@ +def unique(s): + """Return a list of the elements in s, but without duplicates. + For example, unique([1,2,3,1,2,3]) is some permutation of [1,2,3], + unique("abcabc") some permutation of ["a", "b", "c"], and + unique(([1, 2], [2, 3], [1, 2])) some permutation of + [[2, 3], [1, 2]]. + For best speed, all sequence elements should be hashable. Then + unique() will usually work in linear time. + If not possible, the sequence elements should enjoy a total + ordering, and if list(s).sort() doesn't raise TypeError it's + assumed that they do enjoy a total ordering. Then unique() will + usually work in O(N*log2(N)) time. + If that's not possible either, the sequence elements must support + equality-testing. Then unique() will usually work in quadratic + time. + """ + + n = len(s) + if n == 0: + return [] + + # Try using a dict first, as that's the fastest and will usually + # work. If it doesn't work, it will usually fail quickly, so it + # usually doesn't cost much to *try* it. It requires that all the + # sequence elements be hashable, and support equality comparison. + u = {} + try: + for x in s: + u[x] = 1 + except TypeError: + del u # move on to the next method + else: + return u.keys() + # We can't hash all the elements. Second fastest is to sort, + # which brings the equal elements together; then duplicates are + # easy to weed out in a single pass. + # NOTE: Python's list.sort() was designed to be efficient in the + # presence of many duplicate elements. This isn't true of all + # sort functions in all languages or libraries, so this approach + # is more effective in Python than it may be elsewhere. + try: + t = list(s) + t.sort() + except TypeError: + del t # move on to the next method + else: + assert n > 0 + last = t[0] + lasti = i = 1 + while i < n: + if t[i] != last: + t[lasti] = last = t[i] + lasti += 1 + i += 1 + return t[:lasti] + # Brute force is all that's left. + u = [] + for x in s: + if x not in u: + u.append(x) + return u diff --git a/trunk/htsworkflow/frontend/samples/__init__.py b/trunk/htsworkflow/frontend/samples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/frontend/samples/admin.py b/trunk/htsworkflow/frontend/samples/admin.py new file mode 100644 index 0000000..643413f --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/admin.py @@ -0,0 +1,148 @@ +from django.contrib import admin +from django.contrib.admin import widgets +from django.db import models +from django.utils.translation import ugettext_lazy as _ + +from htsworkflow.frontend.samples.models import Antibody, Cellline, Condition, ExperimentType, Species, Affiliation, Library, Tag + +class Library_Inline(admin.TabularInline): + model = Library + +class CelllineOptions(admin.ModelAdmin): + list_display = ('cellline_name', 'nickname', 'notes') + search_fields = ('cellline_name', 'nickname', 'notes') + fieldsets = ( + (None, { + 'fields': (('cellline_name'),('notes'),) + }), + ) + +class ExperimentTypeOptions(admin.ModelAdmin): + model = ExperimentType + #list_display = ('name',) + #fieldsets = ( (None, { 'fields': ('name',) }), ) + +class LibraryOptions(admin.ModelAdmin): + date_hierarchy = "creation_date" + save_as = True + save_on_top = True + search_fields = ( + 'library_id', + 'library_name', + 'cell_line__cellline_name', + 'library_species__scientific_name', + 'library_species__common_name', + ) + list_display = ( + 'library_id', + #'aligned_reads', + #'DataRun', + 'library_name', + 'public', + #'experiment_type', + #'organism', + #'antibody_name', + #'cell_line', + #'libtags', + #'made_for', + 'affiliation', + #'made_by', + 'undiluted_concentration', + 'creation_date', + 'stopping_point', + #'condition', + + ) + list_filter = ( + 'experiment_type', + 'library_species', + 'tags', + #'made_for', + 'affiliations', + 'made_by', + 'antibody', + 'cell_line', + 'condition', + 'stopping_point', + 'hidden') + list_display_links = ('library_id', 'library_name',) + fieldsets = ( + (None, { + 'fields': ( + ('library_id','library_name','hidden'), + ('library_species'), + ('experiment_type', 'replicate'), + ('cell_line','condition','antibody'),) + }), + ('Creation Information:', { + 'fields' : (('made_for', 'made_by', 'creation_date'), ('stopping_point', 'amplified_from_sample'), ('avg_lib_size','undiluted_concentration', 'ten_nM_dilution', 'successful_pM'), 'notes',) + }), + ('Library/Project Affiliation:', { + 'fields' : (('affiliations'), ('tags'),) + }), + ) + + # some post 1.0.2 version of django has formfield_overrides + # which would replace this code with: + # formfield_overrids = { + # models.ManyToMany: { 'widget': widgets.FilteredSelectMultiple } + # } + def formfield_for_dbfield(self, db_field, **kwargs): + if db_field.name == 'affiliations': + kwargs['widget'] = widgets.FilteredSelectMultiple(db_field.verbose_name, (db_field.name in self.filter_vertical)) + rv = super(LibraryOptions, self).formfield_for_dbfield(db_field, **kwargs) + print db_field.name, kwargs + return rv + +class AffiliationOptions(admin.ModelAdmin): + list_display = ('name','contact','email') + fieldsets = ( + (None, { + 'fields': (('name','contact','email')) + }), + ) + +# class UserOptions(admin.ModelAdmin): +# inlines = [Library_Inline] + +class AntibodyOptions(admin.ModelAdmin): + search_fields = ('antigene','nickname','catalog','antibodies','source','biology','notes') + list_display = ('antigene','nickname','antibodies','catalog','source','biology','notes') + list_filter = ('antibodies','source') + fieldsets = ( + (None, { + 'fields': (('antigene','nickname','antibodies'),('catalog','source'),('biology'),('notes')) + }), + ) + +class SpeciesOptions(admin.ModelAdmin): + fieldsets = ( + (None, { + 'fields': (('scientific_name', 'common_name'),) + }), + ) + +class ConditionOptions(admin.ModelAdmin): + list_display = (('condition_name'), ('notes'),) + fieldsets = ( + (None, { + 'fields': (('condition_name'),('nickname'),('notes'),) + }), + ) + +class TagOptions(admin.ModelAdmin): + list_display = ('tag_name', 'context') + fieldsets = ( + (None, { + 'fields': ('tag_name', 'context') + }), + ) + +admin.site.register(Affiliation, AffiliationOptions) +admin.site.register(Antibody, AntibodyOptions) +admin.site.register(Cellline, CelllineOptions) +admin.site.register(Condition, ConditionOptions) +admin.site.register(ExperimentType, ExperimentTypeOptions) +admin.site.register(Library, LibraryOptions) +admin.site.register(Species, SpeciesOptions) +admin.site.register(Tag, TagOptions) diff --git a/trunk/htsworkflow/frontend/samples/changelist.py b/trunk/htsworkflow/frontend/samples/changelist.py new file mode 100644 index 0000000..dccba27 --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/changelist.py @@ -0,0 +1,239 @@ +""" +Slightly modified version of the django admin component that handles filters and searches +""" +from django.contrib.admin.filterspecs import FilterSpec +from django.contrib.admin.options import IncorrectLookupParameters +from django.core.paginator import Paginator, InvalidPage, EmptyPage +from django.db import models +from django.db.models.query import QuerySet +from django.utils.encoding import smart_str +from django.utils.http import urlencode + +import operator + +MAX_SHOW_ALL_ALLOWED = 20000 + +#change list settings +ALL_VAR = 'all' +ORDER_VAR = 'o' +ORDER_TYPE_VAR = 'ot' +PAGE_VAR = 'p' +SEARCH_VAR = 'q' +IS_POPUP_VAR = 'pop' +ERROR_FLAG = 'e' + +class ChangeList(object): + def __init__(self, request, model, list_filter, search_fields, list_per_page, queryset=None): + self.model = model + self.opts = model._meta + self.lookup_opts = self.opts + if queryset is None: + self.root_query_set = model.objects + else: + self.root_query_set = queryset + self.list_display = [] + self.list_display_links = None + self.list_filter = list_filter + + self.search_fields = search_fields + self.list_select_related = None + self.list_per_page = list_per_page + self.model_admin = None + + try: + self.page_num = int(request.GET.get(PAGE_VAR,'0')) + except ValueError: + self.page_num = 0 + self.show_all = 'all' in request.GET + self.params = dict(request.GET.items()) + if PAGE_VAR in self.params: + del self.params[PAGE_VAR] + if ERROR_FLAG in self.params: + del self.params[ERROR_FLAG] + + self.multi_page = True + self.can_show_all = False + + self.order_field, self.order_type = self.get_ordering() + self.query = request.GET.get(SEARCH_VAR, '') + self.query_set = self.get_query_set() + self.get_results(request) + self.filter_specs, self.has_filters = self.get_filters(request) + + #self.result_count = 'result count' + #self.full_result_count = 'full result count' + + def get_filters(self, request): + filter_specs = [] + if self.list_filter: + filter_fields = [self.lookup_opts.get_field(field_name) for field_name in self.list_filter] + for f in filter_fields: + spec = FilterSpec.create(f, request, self.params, self.model, self.model_admin) + if spec and spec.has_output(): + filter_specs.append(spec) + return filter_specs, bool(filter_specs) + + def get_query_string(self, new_params=None, remove=None): + if new_params is None: new_params = {} + if remove is None: remove = [] + p = self.params.copy() + for r in remove: + for k in p.keys(): + if k.startswith(r): + del p[k] + for k, v in new_params.items(): + if v is None: + if k in p: + del p[k] + else: + p[k] = v + return '?%s' % urlencode(p) + + def get_results(self, request): + paginator = Paginator(self.query_set, self.list_per_page) + # Get the number of objects, with admin filters applied. + result_count = paginator.count + + # Get the total number of objects, with no admin filters applied. + # Perform a slight optimization: Check to see whether any filters were + # given. If not, use paginator.hits to calculate the number of objects, + # because we've already done paginator.hits and the value is cached. + if not self.query_set.query.where: + full_result_count = result_count + else: + full_result_count = self.root_query_set.count() + + can_show_all = result_count <= MAX_SHOW_ALL_ALLOWED + multi_page = result_count > self.list_per_page + + # Get the list of objects to display on this page. + if (self.show_all and can_show_all) or not multi_page: + result_list = list(self.query_set) + else: + try: + result_list = paginator.page(self.page_num+1).object_list + except InvalidPage: + result_list = () + + self.result_count = result_count + self.full_result_count = full_result_count + self.result_list = result_list + self.can_show_all = can_show_all + self.multi_page = multi_page + self.paginator = paginator + + def get_ordering(self): + lookup_opts, params = self.lookup_opts, self.params + # For ordering, first check the "ordering" parameter in the admin + # options, then check the object's default ordering. If neither of + # those exist, order descending by ID by default. Finally, look for + # manually-specified ordering from the query string. + ordering = lookup_opts.ordering or ['-' + lookup_opts.pk.name] + + if ordering[0].startswith('-'): + order_field, order_type = ordering[0][1:], 'desc' + else: + order_field, order_type = ordering[0], 'asc' + if ORDER_VAR in params: + try: + field_name = self.list_display[int(params[ORDER_VAR])] + try: + f = lookup_opts.get_field(field_name) + except models.FieldDoesNotExist: + # See whether field_name is a name of a non-field + # that allows sorting. + try: + if callable(field_name): + attr = field_name + elif hasattr(self.model_admin, field_name): + attr = getattr(self.model_admin, field_name) + else: + attr = getattr(self.model, field_name) + order_field = attr.admin_order_field + except AttributeError: + pass + else: + order_field = f.name + except (IndexError, ValueError): + pass # Invalid ordering specified. Just use the default. + if ORDER_TYPE_VAR in params and params[ORDER_TYPE_VAR] in ('asc', 'desc'): + order_type = params[ORDER_TYPE_VAR] + return order_field, order_type + + def get_query_set(self): + qs = self.root_query_set + lookup_params = self.params.copy() # a dictionary of the query string + for i in (ALL_VAR, ORDER_VAR, ORDER_TYPE_VAR, SEARCH_VAR, IS_POPUP_VAR): + if i in lookup_params: + del lookup_params[i] + for key, value in lookup_params.items(): + if not isinstance(key, str): + # 'key' will be used as a keyword argument later, so Python + # requires it to be a string. + del lookup_params[key] + lookup_params[smart_str(key)] = value + + # if key ends with __in, split parameter into separate values + if key.endswith('__in'): + lookup_params[key] = value.split(',') + + # Apply lookup parameters from the query string. + try: + qs = qs.filter(**lookup_params) + # Naked except! Because we don't have any other way of validating "params". + # They might be invalid if the keyword arguments are incorrect, or if the + # values are not in the correct type, so we might get FieldError, ValueError, + # ValicationError, or ? from a custom field that raises yet something else + # when handed impossible data. + except Exception, e: + print e + raise IncorrectLookupParameters + + # Use select_related() if one of the list_display options is a field + # with a relationship. + if self.list_select_related: + qs = qs.select_related() + else: + for field_name in self.list_display: + try: + f = self.lookup_opts.get_field(field_name) + except models.FieldDoesNotExist: + pass + else: + if isinstance(f.rel, models.ManyToOneRel): + qs = qs.select_related() + break + + # Set ordering. + if self.order_field: + qs = qs.order_by('%s%s' % ((self.order_type == 'desc' and '-' or ''), self.order_field)) + + # Apply keyword searches. + def construct_search(field_name): + if field_name.startswith('^'): + return "%s__istartswith" % field_name[1:] + elif field_name.startswith('='): + return "%s__iexact" % field_name[1:] + elif field_name.startswith('@'): + return "%s__search" % field_name[1:] + else: + return "%s__icontains" % field_name + + if self.search_fields and self.query: + for bit in self.query.split(): + or_queries = [models.Q(**{construct_search(field_name): bit}) for field_name in self.search_fields] + other_qs = QuerySet(self.model) + other_qs.dup_select_related(qs) + other_qs = other_qs.filter(reduce(operator.or_, or_queries)) + qs = qs & other_qs + for field_name in self.search_fields: + if '__' in field_name: + qs = qs.distinct() + break + + if self.opts.one_to_one_field: + qs = qs.complex_filter(self.opts.one_to_one_field.rel.limit_choices_to) + + return qs + + diff --git a/trunk/htsworkflow/frontend/samples/models.py b/trunk/htsworkflow/frontend/samples/models.py new file mode 100644 index 0000000..f51d250 --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/models.py @@ -0,0 +1,249 @@ +import urlparse +from django.db import models +from django.contrib.auth.models import User +from htsworkflow.frontend import settings +from htsworkflow.frontend.reports.libinfopar import * + +# Create your models here. + +class Antibody(models.Model): + antigene = models.CharField(max_length=500, db_index=True) + # New field Aug/20/08 + # SQL to add column: + # alter table fctracker_antibody add column "nickname" varchar(20) NULL; + nickname = models.CharField( + max_length=20, + blank=True, + null=True, + db_index=True, + verbose_name = 'Short Name' + ) + catalog = models.CharField(max_length=50, unique=True, db_index=True) + antibodies = models.CharField(max_length=500, db_index=True) + source = models.CharField(max_length=500, blank=True, db_index=True) + biology = models.TextField(blank=True) + notes = models.TextField(blank=True) + def __unicode__(self): + return u'%s - %s (%s)' % (self.antigene, self.antibodies, self.catalog) + class Meta: + verbose_name_plural = "antibodies" + ordering = ["antigene"] + +class Cellline(models.Model): + cellline_name = models.CharField(max_length=100, unique=True, db_index=True) + nickname = models.CharField(max_length=20, + blank=True, + null=True, + db_index=True, + verbose_name = 'Short Name') + notes = models.TextField(blank=True) + def __unicode__(self): + return unicode(self.cellline_name) + + class Meta: + ordering = ["cellline_name"] + +class Condition(models.Model): + condition_name = models.CharField( + max_length=2000, unique=True, db_index=True) + nickname = models.CharField(max_length=20, + blank=True, + null=True, + db_index=True, + verbose_name = 'Short Name') + notes = models.TextField(blank=True) + + def __unicode__(self): + return unicode(self.condition_name) + + class Meta: + ordering = ["condition_name"] + +class ExperimentType(models.Model): + name = models.CharField(max_length=50, unique=True) + + def __unicode__(self): + return unicode(self.name) + +class Tag(models.Model): + tag_name = models.CharField(max_length=100, db_index=True,blank=False,null=False) + TAG_CONTEXT = ( + #('Antibody','Antibody'), + #('Cellline', 'Cellline'), + #('Condition', 'Condition'), + ('Library', 'Library'), + ('ANY','ANY'), + ) + context = models.CharField(max_length=50, + choices=TAG_CONTEXT, default='Library') + + def __unicode__(self): + return u'%s' % (self.tag_name) + + class Meta: + ordering = ["context","tag_name"] + +class Species(models.Model): + scientific_name = models.CharField(max_length=256, + unique=False, + db_index=True + ) + common_name = models.CharField(max_length=256, blank=True) + #use_genome_build = models.CharField(max_length=100, blank=False, null=False) + + def __unicode__(self): + return u'%s (%s)' % (self.scientific_name, self.common_name) + + class Meta: + verbose_name_plural = "species" + ordering = ["scientific_name"] + +class Affiliation(models.Model): + name = models.CharField(max_length=256, db_index=True, verbose_name='Name') + contact = models.CharField(max_length=256, null=True, blank=True,verbose_name='Lab Name') + email = models.EmailField(null=True,blank=True) + + def __unicode__(self): + str = unicode(self.name) + if self.contact is not None and len(self.contact) > 0: + str += u' ('+self.contact+u')' + return str + + class Meta: + ordering = ["name","contact"] + unique_together = (("name", "contact"),) + +class Library(models.Model): + id = models.AutoField(primary_key=True) + library_id = models.CharField(max_length=30, db_index=True, unique=True) + library_name = models.CharField(max_length=100, unique=True) + library_species = models.ForeignKey(Species) + # new field 2008 Mar 5, alter table samples_library add column "hidden" NOT NULL default 0; + hidden = models.BooleanField() + cell_line = models.ForeignKey(Cellline, null=True) + condition = models.ForeignKey(Condition, null=True) + antibody = models.ForeignKey(Antibody,blank=True,null=True) + # New field Aug/25/08. SQL: alter table fctracker_library add column "lib_affiliation" varchar(256) NULL; + affiliations = models.ManyToManyField(Affiliation,related_name='library_affiliations',null=True) + # new field Nov/14/08 + tags = models.ManyToManyField(Tag,related_name='library_tags',blank=True,null=True) + # New field Aug/19/08 + # SQL to add column: alter table fctracker_library add column "replicate" smallint unsigned NULL; + REPLICATE_NUM = ((1,1),(2,2),(3,3),(4,4)) + replicate = models.PositiveSmallIntegerField(choices=REPLICATE_NUM,default=1) + experiment_type = models.ForeignKey(ExperimentType) + creation_date = models.DateField(blank=True, null=True) + made_for = models.CharField(max_length=50, blank=True, + verbose_name='ChIP/DNA/RNA Made By') + made_by = models.CharField(max_length=50, blank=True, default="Lorian") + + PROTOCOL_END_POINTS = ( + ('?', 'Unknown'), + ('Sample', 'Raw sample'), + ('Progress', 'In progress'), + ('1A', 'Ligation, then gel'), + ('PCR', 'Ligation, then PCR'), + ('1Ab', 'Ligation, PCR, then gel'), + ('1Aa', 'Ligation, gel, then PCR'), + ('2A', 'Ligation, PCR, gel, PCR'), + ('Done', 'Completed'), + ) + stopping_point = models.CharField(max_length=25, choices=PROTOCOL_END_POINTS, default='Done') + amplified_from_sample = models.ForeignKey('self', blank=True, null=True, related_name='amplified_into_sample') + + undiluted_concentration = models.DecimalField("Concentration", + max_digits=5, decimal_places=2, blank=True, null=True, + help_text=u"Undiluted concentration (ng/\u00b5l)") + # note \u00b5 is the micro symbol in unicode + successful_pM = models.DecimalField(max_digits=9, decimal_places=1, blank=True, null=True) + ten_nM_dilution = models.BooleanField() + avg_lib_size = models.IntegerField(default=225, blank=True, null=True) + notes = models.TextField(blank=True) + + def __unicode__(self): + return u'#%s: %s' % (self.library_id, self.library_name) + + class Meta: + verbose_name_plural = "libraries" + #ordering = ["-creation_date"] + ordering = ["-library_id"] + + def antibody_name(self): + str =''+self.antibody.nickname+'' + return str + antibody_name.allow_tags = True + + def organism(self): + return self.library_species.common_name + + def affiliation(self): + affs = self.affiliations.all().order_by('name') + tstr = '' + ar = [] + for t in affs: + ar.append(t.__unicode__()) + return '%s' % (", ".join(ar)) + + def is_archived(self): + """ + returns True if archived else False + """ + if self.longtermstorage_set.count() > 0: + return True + else: + return False + + def libtags(self): + affs = self.tags.all().order_by('tag_name') + ar = [] + for t in affs: + ar.append(t.__unicode__()) + return u'%s' % ( ", ".join(ar)) + + def DataRun(self): + str ='Data Run' + return str + DataRun.allow_tags = True + + def aligned_m_reads(self): + return getLibReads(self.library_id) + + def aligned_reads(self): + res = getLibReads(self.library_id) + + # Check data sanity + if res[2] != "OK": + return u'
'+res[2]+'
' + + rc = "%1.2f" % (res[1]/1000000.0) + # Color Scheme: green is more than 10M, blue is more than 5M, orange is more than 3M and red is less. For RNAseq, all those thresholds should be doubled + if res[0] > 0: + bgcolor = '#ff3300' # Red + rc_thr = [10000000,5000000,3000000] + if self.experiment_type == 'RNA-seq': + rc_thr = [20000000,10000000,6000000] + + if res[1] > rc_thr[0]: + bgcolor = '#66ff66' # Green + else: + if res[1] > rc_thr[1]: + bgcolor ='#00ccff' # Blue + else: + if res[1] > rc_thr[2]: + bgcolor ='#ffcc33' # Orange + tstr = '
' + tstr += res[0].__unicode__()+' Lanes, '+rc+' M Reads' + tstr += '
' + else: tstr = 'not processed yet' + return tstr + aligned_reads.allow_tags = True + + def public(self): + SITE_ROOT = '/' + summary_url = self.get_absolute_url() + return 'S' % (summary_url,) + public.allow_tags = True + + @models.permalink + def get_absolute_url(self): + return ('htsworkflow.frontend.samples.views.library_to_flowcells', [str(self.library_id)]) diff --git a/trunk/htsworkflow/frontend/samples/results.py b/trunk/htsworkflow/frontend/samples/results.py new file mode 100644 index 0000000..c2419cd --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/results.py @@ -0,0 +1,134 @@ +from htsworkflow.frontend import settings + +import glob +import os +import re + +s_paren = re.compile("^\w+") + +def get_flowcell_result_dict(flowcell_id): + """ + returns a dictionary following the following pattern for + a given flowcell_id: + + + d['C1-33']['summary'] # Summary.htm file path + d['C1-33']['eland_results'][5] # C1-33 lane 5 file eland results file path + d['C1-33']['run_xml'] # run_*.xml file path + d['C1-33']['scores'] # scores.tar.gz file path + """ + flowcell_id = flowcell_id.strip() + + d = {} + + ################################ + # Flowcell Directory + fc_dir = glob.glob(os.path.join(settings.RESULT_HOME_DIR, flowcell_id)) + + # Not found + if len(fc_dir) == 0: + return None + + # No duplicates! + assert len(fc_dir) <= 1 + + # Found fc dir + fc_dir = fc_dir[0] + + ################################ + # C#-## dirs + c_dir_list = glob.glob(os.path.join(fc_dir, 'C*')) + + # Not found + if len(c_dir_list) == 0: + return d + + for c_dir_path in c_dir_list: + summary_file = glob.glob(os.path.join(c_dir_path, 'Summary.htm')) + pathdir, c_dir = os.path.split(c_dir_path) + + # Create sub-dictionary + d[c_dir] = {} + + + ############################### + # Summary.htm file + + # Not found + if len(summary_file) == 0: + d[c_dir]['summary'] = None + + # Found + else: + # No duplicates! + assert len(summary_file) == 1 + + summary_file = summary_file[0] + d[c_dir]['summary'] = summary_file + + ############################### + # Result files + + d[c_dir]['eland_results'] = {} + + result_filepaths = glob.glob(os.path.join(c_dir_path, 's_*_eland_*')) + + for filepath in result_filepaths: + + junk, result_name = os.path.split(filepath) + + #lanes 1-8, single digit, therefore s_#; # == index 2 + lane = int(result_name[2]) + d[c_dir]['eland_results'][lane] = filepath + + ############################### + # run*.xml file + run_xml_filepath = glob.glob(os.path.join(c_dir_path, 'run_*.xml')) + + if len(run_xml_filepath) == 0: + d[c_dir]['run_xml'] = None + else: + # No duplicates + assert len(run_xml_filepath) == 1 + + d[c_dir]['run_xml'] = run_xml_filepath[0] + + ############################### + # scores.tar.gz + scores_filepath = glob.glob(os.path.join(c_dir_path, 'scores*')) + + if len(scores_filepath) == 0: + d[c_dir]['scores'] = None + else: + # No duplicates + assert len(scores_filepath) == 1 + + d[c_dir]['scores'] = scores_filepath[0] + + return d + + +def cn_mTobp(cn_m): + """ + Converts CN-M (i.e. C1-33, C1-26, C4-28) cycle information into + number of base pairs. + """ + pass + + +def parse_flowcell_id(flowcell_id): + """ + Return flowcell id and any status encoded in the id + + We stored the status information in the flowcell id name. + this was dumb, but database schemas are hard to update. + """ + fields = flowcell_id.split() + fcid = None + status = None + if len(fields) > 0: + fcid = fields[0] + if len(fields) > 1: + status = fields[1] + return fcid, status + diff --git a/trunk/htsworkflow/frontend/samples/tests.py b/trunk/htsworkflow/frontend/samples/tests.py new file mode 100644 index 0000000..9e8a2a3 --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/tests.py @@ -0,0 +1,102 @@ +import datetime +import unittest +from htsworkflow.frontend.samples.models import \ + Affiliation, \ + ExperimentType, \ + Species, \ + Library + +# The django test runner flushes the database between test suites not cases, +# so to be more compatible with running via nose we flush the database tables +# of interest before creating our sample data. +def create_db(obj): + Species.objects.all().delete() + obj.species_human = Species( + scientific_name = 'Homo Sapeins', + common_name = 'human', + ) + obj.species_human.save() + obj.species_worm = Species( + scientific_name = 'C. Elegans', + common_name = 'worm', + ) + obj.species_worm.save() + obj.species_phix = Species( + scientific_name = 'PhiX', + common_name = 'PhiX' + ) + obj.species_phix.save() + + ExperimentType.objects.all().delete() + obj.experiment_de_novo = ExperimentType( + name = 'De Novo', + ) + obj.experiment_de_novo.save() + obj.experiment_chip_seq = ExperimentType( + name = 'ChIP-Seq' + ) + obj.experiment_chip_seq.save() + obj.experiment_rna_seq = ExperimentType( + name = 'RNA-Seq' + ) + obj.experiment_rna_seq.save() + + Affiliation.objects.all().delete() + obj.affiliation_alice = Affiliation( + name = 'Alice', + contact = 'Lab Boss', + email = 'alice@some.where.else.' + ) + obj.affiliation_alice.save() + obj.affiliation_bob = Affiliation( + name = 'Bob', + contact = 'Other Lab Boss', + email = 'bob@some.where.else', + ) + obj.affiliation_bob.save() + + Library.objects.all().delete() + obj.library_10001 = Library( + library_id = 10001, + library_name = 'C2C12 named poorly', + library_species = obj.species_human, + experiment_type = obj.experiment_rna_seq, + creation_date = datetime.datetime.now(), + made_for = 'scientist unit 2007', + made_by = 'microfludics system 7321', + stopping_point = '2A', + undiluted_concentration = '5.01', + ) + obj.library_10001.save() + obj.library_10002 = Library( + library_id = 10002, + library_name = 'Worm named poorly', + library_species = obj.species_human, + experiment_type = obj.experiment_rna_seq, + creation_date = datetime.datetime.now(), + made_for = 'scientist unit 2007', + made_by = 'microfludics system 7321', + stopping_point = '2A', + undiluted_concentration = '5.01', + ) + obj.library_10002.save() + +class LibraryTestCase(unittest.TestCase): + def setUp(self): + create_db(self) + + def testOrganism(self): + self.assertEquals(self.library_10001.organism(), 'human') + + def testAffiliations(self): + self.library_10001.affiliations.add(self.affiliation_alice) + self.library_10002.affiliations.add( + self.affiliation_alice, + self.affiliation_bob + ) + self.failUnless(len(self.library_10001.affiliations.all()), 1) + self.failUnless(self.library_10001.affiliation(), 'Alice') + + self.failUnless(len(self.library_10002.affiliations.all()), 2) + self.failUnless(self.library_10001.affiliation(), 'Alice, Bob') + diff --git a/trunk/htsworkflow/frontend/samples/views.py b/trunk/htsworkflow/frontend/samples/views.py new file mode 100644 index 0000000..b8956d5 --- /dev/null +++ b/trunk/htsworkflow/frontend/samples/views.py @@ -0,0 +1,393 @@ +# Create your views here. +from htsworkflow.frontend.experiments.models import FlowCell +from htsworkflow.frontend.samples.changelist import ChangeList +from htsworkflow.frontend.samples.models import Library +from htsworkflow.frontend.samples.results import get_flowcell_result_dict, parse_flowcell_id +from htsworkflow.pipelines.runfolder import load_pipeline_run_xml +from htsworkflow.pipelines import runfolder +from htsworkflow.frontend import settings +from htsworkflow.util import makebed +from htsworkflow.util import opener + +from django.core.exceptions import ObjectDoesNotExist +from django.http import HttpResponse, HttpResponseRedirect +from django.shortcuts import render_to_response +from django.template import RequestContext +from django.template.loader import get_template + +import StringIO +import logging +import os + +LANE_LIST = [1,2,3,4,5,6,7,8] + +def create_library_context(cl): + """ + Create a list of libraries that includes how many lanes were run + """ + records = [] + #for lib in library_items.object_list: + for lib in cl.result_list: + summary = {} + summary['library_id'] = lib.library_id + summary['library_name'] = lib.library_name + summary['species_name' ] = lib.library_species.scientific_name + if lib.amplified_from_sample is not None: + summary['amplified_from'] = lib.amplified_from_sample.library_id + else: + summary['amplified_from'] = '' + lanes_run = 0 + for lane_id in LANE_LIST: + lane = getattr(lib, 'lane_%d_library' % (lane_id,)) + lanes_run += len( lane.all() ) + summary['lanes_run'] = lanes_run + summary['is_archived'] = lib.is_archived() + records.append(summary) + cl.result_count = unicode(cl.paginator._count) + u" libraries" + return {'library_list': records } + +def library(request): + # build changelist + fcl = ChangeList(request, Library, + list_filter=['affiliations', 'library_species'], + search_fields=['library_id', 'library_name', 'amplified_from_sample__library_id'], + list_per_page=200, + queryset=Library.objects.filter(hidden__exact=0) + ) + + context = { 'cl': fcl, 'title': 'Library Index'} + context.update(create_library_context(fcl)) + t = get_template('samples/library_index.html') + c = RequestContext(request, context) + return HttpResponse( t.render(c) ) + +def library_to_flowcells(request, lib_id): + """ + Display information about all the flowcells a library has been run on. + """ + + try: + lib = Library.objects.get(library_id=lib_id) + except: + return HttpResponse("Library %s does not exist" % (lib_id)) + + flowcell_list = [] + interesting_flowcells = {} # aka flowcells we're looking at + for lane in LANE_LIST: + lane_library = getattr(lib, 'lane_%d_library' % (lane,)) + for fc in lane_library.all(): + flowcell_id, id = parse_flowcell_id(fc.flowcell_id) + if flowcell_id not in interesting_flowcells: + interesting_flowcells[flowcell_id] = get_flowcell_result_dict(flowcell_id) + flowcell_list.append((fc.flowcell_id, lane)) + + flowcell_list.sort() + + lane_summary_list = [] + eland_results = [] + for fc, lane in flowcell_list: + lane_summary, err_list = _summary_stats(fc, lane) + + eland_results.extend(_make_eland_results(fc, lane, interesting_flowcells)) + lane_summary_list.extend(lane_summary) + + return render_to_response( + 'samples/library_detail.html', + {'lib': lib, + 'eland_results': eland_results, + 'lane_summary_list': lane_summary_list, + }, + context_instance = RequestContext(request)) + +def summaryhtm_fc_cnm(request, flowcell_id, cnm): + """ + returns a Summary.htm file if it exists. + """ + fc_id, status = parse_flowcell_id(flowcell_id) + d = get_flowcell_result_dict(fc_id) + + if d is None: + return HttpResponse('Results for Flowcell %s not found.' % (fc_id)) + + if cnm not in d: + return HttpResponse('Results for Flowcell %s; %s not found.' % (fc_id, cnm)) + + summary_filepath = d[cnm]['summary'] + + if summary_filepath is None: + return HttpResponse('Summary.htm for Flowcell %s; %s not found.' % (fc_id, cnm)) + + f = open(summary_filepath, 'r') + + return HttpResponse(f) + + +def result_fc_cnm_eland_lane(request, flowcell_id, cnm, lane): + """ + returns an eland_file upon calling. + """ + fc_id, status = parse_flowcell_id(flowcell_id) + d = get_flowcell_result_dict(fc_id) + + if d is None: + return HttpResponse('Results for Flowcell %s not found.' % (fc_id)) + + if cnm not in d: + return HttpResponse('Results for Flowcell %s; %s not found.' % (fc_id, cnm)) + + erd = d[cnm]['eland_results'] + lane = int(lane) + + if lane not in erd: + return HttpResponse('Results for Flowcell %s; %s; lane %s not found.' % (fc_id, cnm, lane)) + + filepath = erd[lane] + + #f = opener.autoopen(filepath, 'r') + # return HttpResponse(f, mimetype="application/x-elandresult") + + f = open(filepath, 'r') + return HttpResponse(f, mimetype='application/x-bzip2') + + + +def bedfile_fc_cnm_eland_lane_ucsc(request, fc_id, cnm, lane): + """ + returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane (ucsc compatible) + """ + return bedfile_fc_cnm_eland_lane(request, fc_id, cnm, lane, ucsc_compatible=True) + + +def bedfile_fc_cnm_eland_lane(request, flowcell_id, cnm, lane, ucsc_compatible=False): + """ + returns a bed file for a given flowcell, CN-M (i.e. C1-33), and lane + """ + fc_id, status = parse_flowcell_id(flowcell_id) + d = get_flowcell_result_dict(fc_id) + + if d is None: + return HttpResponse('Results for Flowcell %s not found.' % (fc_id)) + + if cnm not in d: + return HttpResponse('Results for Flowcell %s; %s not found.' % (fc_id, cnm)) + + erd = d[cnm]['eland_results'] + lane = int(lane) + + if lane not in erd: + return HttpResponse('Results for Flowcell %s; %s; lane %s not found.' % (fc_id, cnm, lane)) + + filepath = erd[lane] + + # Eland result file + fi = opener.autoopen(filepath, 'r') + # output memory file + + name, description = makebed.make_description( fc_id, lane ) + + bedgen = makebed.make_bed_from_eland_generator(fi, name, description) + + if ucsc_compatible: + return HttpResponse(bedgen) + else: + return HttpResponse(bedgen, mimetype="application/x-bedfile") + + +def _summary_stats(flowcell_id, lane_id): + """ + Return the summary statistics for a given flowcell, lane, and end. + """ + fc_id, status = parse_flowcell_id(flowcell_id) + fc_result_dict = get_flowcell_result_dict(fc_id) + + summary_list = [] + err_list = [] + + if fc_result_dict is None: + err_list.append('Results for Flowcell %s not found.' % (fc_id)) + return (summary_list, err_list) + + for cycle_width in fc_result_dict: + xmlpath = fc_result_dict[cycle_width]['run_xml'] + + if xmlpath is None: + err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cycle_width)) + continue + + run = load_pipeline_run_xml(xmlpath) + gerald_summary = run.gerald.summary.lane_results + for end in range(len(gerald_summary)): + eland_summary = run.gerald.eland_results.results[end][lane_id] + # add information to lane_summary + eland_summary.flowcell_id = flowcell_id + eland_summary.clusters = gerald_summary[end][lane_id].cluster + eland_summary.cycle_width = cycle_width + if hasattr(eland_summary, 'genome_map'): + eland_summary.summarized_reads = runfolder.summarize_mapped_reads( + eland_summary.genome_map, + eland_summary.mapped_reads) + + # grab some more information out of the flowcell db + flowcell = FlowCell.objects.get(flowcell_id=flowcell_id) + pm_field = 'lane_%d_pM' % (lane_id) + eland_summary.successful_pm = getattr(flowcell, pm_field) + + summary_list.append(eland_summary) + + #except Exception, e: + # summary_list.append("Summary report needs to be updated.") + # logging.error("Exception: " + str(e)) + + return (summary_list, err_list) + +def _summary_stats_old(flowcell_id, lane): + """ + return a dictionary of summary stats for a given flowcell_id & lane. + """ + fc_id, status = parse_flowcell_id(flowcell_id) + fc_result_dict = get_flowcell_result_dict(fc_id) + + dict_list = [] + err_list = [] + summary_list = [] + + if fc_result_dict is None: + err_list.append('Results for Flowcell %s not found.' % (fc_id)) + return (dict_list, err_list, summary_list) + + for cnm in fc_result_dict: + + xmlpath = fc_result_dict[cnm]['run_xml'] + + if xmlpath is None: + err_list.append('Run xml for Flowcell %s(%s) not found.' % (fc_id, cnm)) + continue + + tree = ElementTree.parse(xmlpath).getroot() + results = runfolder.PipelineRun(pathname='', xml=tree) + try: + lane_report = runfolder.summarize_lane(results.gerald, lane) + summary_list.append(os.linesep.join(lane_report)) + except Exception, e: + summary_list.append("Summary report needs to be updated.") + logging.error("Exception: " + str(e)) + + print "----------------------------------" + print "-- DOES NOT SUPPORT PAIRED END ---" + print "----------------------------------" + lane_results = results.gerald.summary[0][lane] + lrs = lane_results + + d = {} + + d['average_alignment_score'] = lrs.average_alignment_score + d['average_first_cycle_intensity'] = lrs.average_first_cycle_intensity + d['cluster'] = lrs.cluster + d['lane'] = lrs.lane + d['flowcell'] = flowcell_id + d['cnm'] = cnm + d['percent_error_rate'] = lrs.percent_error_rate + d['percent_intensity_after_20_cycles'] = lrs.percent_intensity_after_20_cycles + d['percent_pass_filter_align'] = lrs.percent_pass_filter_align + d['percent_pass_filter_clusters'] = lrs.percent_pass_filter_clusters + + #FIXME: function finished, but need to take advantage of + # may need to take in a list of lanes so we only have to + # load the xml file once per flowcell rather than once + # per lane. + dict_list.append(d) + + return (dict_list, err_list, summary_list) + + +def get_eland_result_type(pathname): + """ + Guess the eland result file type from the filename + """ + path, filename = os.path.split(pathname) + if 'extended' in filename: + return 'extended' + elif 'multi' in filename: + return 'multi' + elif 'result' in filename: + return 'result' + else: + return 'unknown' + +def _make_eland_results(flowcell_id, lane, interesting_flowcells): + fc_id, status = parse_flowcell_id(flowcell_id) + cur_fc = interesting_flowcells.get(fc_id, None) + if cur_fc is None: + return [] + + results = [] + for cycle in cur_fc.keys(): + result_path = cur_fc[cycle]['eland_results'].get(lane, None) + result_link = make_result_link(fc_id, cycle, lane, result_path) + results.append({'flowcell_id': fc_id, + 'cycle': cycle, + 'lane': lane, + 'summary_url': make_summary_url(flowcell_id, cycle), + 'result_url': result_link[0], + 'result_label': result_link[1], + 'bed_url': result_link[2], + }) + return results + +def make_summary_url(flowcell_id, cycle_name): + url = '/results/%s/%s/summary/' % (flowcell_id, cycle_name) + return url + +def make_result_link(flowcell_id, cycle_name, lane, eland_result_path): + if eland_result_path is None: + return ("", "", "") + + result_type = get_eland_result_type(eland_result_path) + result_url = '/results/%s/%s/eland_result/%s' % (flowcell_id, cycle_name, lane) + result_label = 'eland %s' % (result_type,) + bed_url = None + if result_type == 'result': + bed_url_pattern = '/results/%s/%s/bedfile/%s' + bed_url = bed_url_pattern % (flowcell_id, cycle_name, lane) + + return (result_url, result_label, bed_url) + +def _files(flowcell_id, lane): + """ + Sets up available files for download + """ + lane = int(lane) + + flowcell_id, id = parse_flowcell_id(flowcell_id) + d = get_flowcell_result_dict(flowcell_id) + + if d is None: + return '' + + output = [] + + # c_name == 'CN-M' (i.e. C1-33) + for c_name in d: + + if d[c_name]['summary'] is not None: + output.append('summary(%s)' \ + % (flowcell_id, c_name, c_name)) + + erd = d[c_name]['eland_results'] + if lane in erd: + result_type = get_eland_result_type(erd[lane]) + result_url_pattern = 'eland %s(%s)' + output.append(result_url_pattern % (flowcell_id, c_name, lane, result_type, c_name)) + if result_type == 'result': + bed_url_pattern = 'bedfile(%s)' + output.append(bed_url_pattern % (flowcell_id, c_name, lane, c_name)) + + if len(output) == 0: + return '' + + return '(' + '|'.join(output) + ')' + +def library_id_to_admin_url(request, lib_id): + lib = Library.objects.get(library_id=lib_id) + return HttpResponseRedirect('/admin/samples/library/%s' % (lib.id,)) + diff --git a/trunk/htsworkflow/frontend/settings.py b/trunk/htsworkflow/frontend/settings.py new file mode 100644 index 0000000..d971bbc --- /dev/null +++ b/trunk/htsworkflow/frontend/settings.py @@ -0,0 +1,175 @@ +""" +Generate settings for the Django Application. + +To make it easier to customize the application the settings can be +defined in a configuration file read by ConfigParser. + +The options understood by this module are (with their defaults): + + [frontend] + email_host=localhost + email_port=25 + database_engine=sqlite3 + database_name=/path/to/db + + [admins] + #name1=email1 + + [allowed_hosts] + #name1=ip + localhost=127.0.0.1 + + [allowed_analysis_hosts] + #name1=ip + localhost=127.0.0.1 + +""" +import ConfigParser +import os + +# make epydoc happy +__docformat__ = "restructuredtext en" + +def options_to_list(dest, section_name): + """ + Load a options from section_name and store in a dictionary + """ + if options.has_section(section_name): + for name in options.options(section_name): + dest.append( options.get(section_name, name) ) + +def options_to_dict(dest, section_name): + """ + Load a options from section_name and store in a dictionary + """ + if options.has_section(section_name): + for name in options.options(section_name): + dest[name] = options.get(section_name, name) + +# define your defaults here +options = ConfigParser.SafeConfigParser( + { 'email_host': 'localhost', + 'email_port': '25', + 'database_engine': 'sqlite3', + 'database_name': + os.path.abspath('/htsworkflow/htswfrontend/dev_fctracker.db'), + 'time_zone': 'America/Los_Angeles', + 'default_pm': '5', + 'link_flowcell_storage_device_url': "http://localhost:8000/inventory/lts/link/" + }) + +options.read([os.path.expanduser("~/.htsworkflow.ini"), + '/etc/htsworkflow.ini',]) + +# Django settings for elandifier project. + +DEBUG = True +TEMPLATE_DEBUG = DEBUG + +ADMINS = [] +options_to_list(ADMINS, 'admins') + +MANAGERS = ADMINS + +EMAIL_HOST = options.get('frontend', 'email_host') +EMAIL_PORT = int(options.get('frontend', 'email_port')) + +# 'postgresql_psycopg2', 'postgresql', 'mysql', 'sqlite3' or 'ado_mssql'. +DATABASE_ENGINE = options.get('frontend', 'database_engine') + +# Or path to database file if using sqlite3. +DATABASE_NAME = options.get('frontend', 'database_name' ) +DATABASE_USER = '' # Not used with sqlite3. +DATABASE_PASSWORD = '' # Not used with sqlite3. +DATABASE_HOST = '' # Set to empty string for localhost. Not used with sqlite3. +DATABASE_PORT = '' # Set to empty string for default. Not used with sqlite3. + +# Local time zone for this installation. Choices can be found here: +# http://www.postgresql.org/docs/8.1/static/datetime-keywords.html#DATETIME-TIMEZONE-SET-TABLE +# although not all variations may be possible on all operating systems. +# If running in a Windows environment this must be set to the same as your +# system time zone. +TIME_ZONE = options.get('frontend', 'time_zone') + +# Language code for this installation. All choices can be found here: +# http://www.w3.org/TR/REC-html40/struct/dirlang.html#langcodes +# http://blogs.law.harvard.edu/tech/stories/storyReader$15 +LANGUAGE_CODE = 'en-us' + +SITE_ID = 1 + +# If you set this to False, Django will make some optimizations so as not +# to load the internationalization machinery. +USE_I18N = True + +# Absolute path to the directory that holds media. +# Example: "/home/media/media.lawrence.com/" +MEDIA_ROOT = os.path.abspath(os.path.split(__file__)[0]) + '/static/' + +# URL that handles the media served from MEDIA_ROOT. +# Example: "http://media.lawrence.com" +MEDIA_URL = '/static/' + +# URL prefix for admin media -- CSS, JavaScript and images. Make sure to use a +# trailing slash. +# Examples: "http://foo.com/media/", "/media/". +ADMIN_MEDIA_PREFIX = '/media/' + +# Make this unique, and don't share it with anybody. +SECRET_KEY = '(ekv^=gf(j9f(x25@a7r+8)hqlz%&_1!tw^75l%^041#vi=@4n' + +# List of callables that know how to import templates from various sources. +TEMPLATE_LOADERS = ( + 'django.template.loaders.filesystem.load_template_source', + 'django.template.loaders.app_directories.load_template_source', +# 'django.template.loaders.eggs.load_template_source', +) + +MIDDLEWARE_CLASSES = ( + 'django.middleware.common.CommonMiddleware', + 'django.contrib.sessions.middleware.SessionMiddleware', + 'django.contrib.auth.middleware.AuthenticationMiddleware', + 'django.middleware.doc.XViewMiddleware', +) + +ROOT_URLCONF = 'htsworkflow.frontend.urls' + +TEMPLATE_DIRS = ( + # Put strings here, like "/home/html/django_templates" or "C:/www/django/templates". + # Always use forward slashes, even on Windows. + # Don't forget to use absolute paths, not relative paths. + os.path.join(os.path.split(__file__)[0], 'templates'), +) + +INSTALLED_APPS = ( + 'django.contrib.admin', + 'django.contrib.auth', + 'django.contrib.contenttypes', + 'django.contrib.humanize', + 'django.contrib.sessions', + 'django.contrib.sites', + 'htsworkflow.frontend.eland_config', + 'htsworkflow.frontend.samples', + # modules from htsworkflow branch + 'htsworkflow.frontend.experiments', + 'htsworkflow.frontend.analysis', + 'htsworkflow.frontend.reports', + 'htsworkflow.frontend.inventory', + 'django.contrib.databrowse', +) + +# Project specific settings + +ALLOWED_IPS={'127.0.0.1': '127.0.0.1'} +options_to_dict(ALLOWED_IPS, 'allowed_hosts') + +ALLOWED_ANALYS_IPS = {'127.0.0.1': '127.0.0.1'} +options_to_dict(ALLOWED_ANALYS_IPS, 'allowed_analysis_hosts') +#UPLOADTO_HOME = os.path.abspath('../../uploads') +#UPLOADTO_CONFIG_FILE = os.path.join(UPLOADTO_HOME, 'eland_config') +#UPLOADTO_ELAND_RESULT_PACKS = os.path.join(UPLOADTO_HOME, 'eland_results') +#UPLOADTO_BED_PACKS = os.path.join(UPLOADTO_HOME, 'bed_packs') +RESULT_HOME_DIR='/Users/diane/proj/solexa/results/flowcells' + +LINK_FLOWCELL_STORAGE_DEVICE_URL = options.get('frontend', 'link_flowcell_storage_device_url') + diff --git a/trunk/htsworkflow/frontend/static/css/base.css b/trunk/htsworkflow/frontend/static/css/base.css new file mode 100644 index 0000000..9760d67 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/base.css @@ -0,0 +1,14 @@ +/* + DJANGO Admin + by Wilson Miner wilson@lawrence.com +*/ + +/* Block IE 5 */ +@import "null.css?\"\{"; + +/* Import other styles */ +@import url('global.css'); +@import url('layout.css'); + +/* Import patch for IE 6 Windows */ +/*\*/ @import "patch-iewin.css"; /**/ diff --git a/trunk/htsworkflow/frontend/static/css/changelists.css b/trunk/htsworkflow/frontend/static/css/changelists.css new file mode 100644 index 0000000..a156c54 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/changelists.css @@ -0,0 +1,50 @@ +@import url('base.css'); + +/* CHANGELISTS */ +#changelist { position:relative; width:100%; } +#changelist table { width:100%; } +.change-list .filtered table { border-right:1px solid #ddd; } +.change-list .filtered { min-height:400px; } +.change-list .filtered { background:white url(../img/changelist-bg.gif) top right repeat-y !important; } +.change-list .filtered table, .change-list .filtered .paginator, .filtered #toolbar, .filtered div.xfull { margin-right:160px !important; width:auto !important; } +.change-list .filtered table tbody th { padding-right:1em; } +#changelist .toplinks { border-bottom:1px solid #ccc !important; } +#changelist .paginator { color:#666; border-top:1px solid #eee; border-bottom:1px solid #eee; background:white url(../img/nav-bg.gif) 0 180% repeat-x; overflow:hidden; } +.change-list .filtered .paginator { border-right:1px solid #ddd; } + +/* CHANGELIST TABLES */ +#changelist table thead th { white-space:nowrap; } +#changelist table tbody td { border-left: 1px solid #ddd; } +#changelist table tfoot { color: #666; } + +/* TOOLBAR */ +#changelist #toolbar { padding:3px; border-bottom:1px solid #ddd; background:#e1e1e1 url(../img/nav-bg.gif) top left repeat-x; color:#666; } +#changelist #toolbar form input { font-size:11px; padding:1px 2px; } +#changelist #toolbar form #searchbar { padding:2px; } +#changelist #changelist-search img { vertical-align:middle; } + +/* FILTER COLUMN */ +#changelist-filter { position:absolute; top:0; right:0; z-index:1000; width:160px; border-left:1px solid #ddd; background:#efefef; margin:0; } +#changelist-filter h2 { font-size:11px; padding:2px 5px; border-bottom:1px solid #ddd; } +#changelist-filter h3 { font-size:12px; margin-bottom:0; } +#changelist-filter ul { padding-left:0;margin-left:10px; } +#changelist-filter li { list-style-type:none; margin-left:0; padding-left:0; } +#changelist-filter a { color:#999; } +#changelist-filter a:hover { color:#036; } +#changelist-filter li.selected { border-left:5px solid #ccc; padding-left:5px;margin-left:-10px; } +#changelist-filter li.selected a { color:#5b80b2 !important; } + +/* DATE DRILLDOWN */ +.change-list ul.toplinks { display:block; background:white url(../img/nav-bg-reverse.gif) 0 -10px repeat-x; border-top:1px solid white; float:left; padding:0 !important; margin:0 !important; width:100%; } +.change-list ul.toplinks li { float: left; width: 9em; padding:3px 6px; font-weight: bold; list-style-type:none; } +.change-list ul.toplinks .date-back a { color:#999; } +.change-list ul.toplinks .date-back a:hover { color:#036; } + +/* PAGINATOR */ +.paginator { font-size:11px; padding-top:10px; padding-bottom:10px; line-height:22px; margin:0; border-top:1px solid #ddd; } +.paginator a:link, .paginator a:visited { padding:2px 6px; border:solid 1px #ccc; background:white; text-decoration:none; } +.paginator a.showall { padding:0 !important; border:none !important; } +.paginator a.showall:hover { color:#036 !important; background:transparent !important; } +.paginator .end { border-width:2px !important; margin-right:6px; } +.paginator .this-page { padding:2px 6px; font-weight:bold; font-size:13px; vertical-align:top; } +.paginator a:hover { color:white; background:#5b80b2; border-color:#036; } diff --git a/trunk/htsworkflow/frontend/static/css/click-table.css b/trunk/htsworkflow/frontend/static/css/click-table.css new file mode 100644 index 0000000..1d48412 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/click-table.css @@ -0,0 +1,19 @@ +table, td { + border-style: solid; +} +table { + border-width: 0 0 1px 1px; + border-spacing: 0; + border-collapse: collapse; +} +thead { + text-align: center; +} +td { + margin: 0; + padding: 4px; + border-width: 1px 1px 0 0; +} +td a { + display: block; +} diff --git a/trunk/htsworkflow/frontend/static/css/data-browse-index.css b/trunk/htsworkflow/frontend/static/css/data-browse-index.css new file mode 100644 index 0000000..59119bf --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/data-browse-index.css @@ -0,0 +1,2 @@ +@import url('changelists.css'); +@import url('click-table.css'); diff --git a/trunk/htsworkflow/frontend/static/css/forms.css b/trunk/htsworkflow/frontend/static/css/forms.css new file mode 100644 index 0000000..a4b145f --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/forms.css @@ -0,0 +1,84 @@ +@import url('base.css'); +@import url('widgets.css'); + +/* FORM ROWS */ +.form-row { overflow:hidden; padding:8px 12px; font-size:11px; border-bottom:1px solid #eee; } +.form-row img, .form-row input { vertical-align:middle; } +form .form-row p { padding-left:0; font-size:11px; } + +/* FORM LABELS */ +form h4 { margin:0 !important; padding:0 !important; border:none !important; } +label { font-weight:normal !important; color:#666; font-size:12px; } +.required label, label.required { font-weight:bold !important; color:#333 !important; } + +/* RADIO BUTTONS */ +form ul.radiolist li { list-style-type:none; } +form ul.radiolist label { float:none; display:inline; } +form ul.inline { margin-left:0; padding:0; } +form ul.inline li { float:left; padding-right:7px; } + +/* ALIGNED FIELDSETS */ +.aligned label { display:block; padding:3px 10px 0 0; float:left; width:8em; } +.colMS .aligned .vLargeTextField, .colMS .aligned .vXMLLargeTextField { width:350px; } +form .aligned p, form .aligned ul { margin-left:7em; padding-left:30px; } +form .aligned table p { margin-left:0; padding-left:0; } +form .aligned p.help { padding-left:38px; } +.aligned .vCheckboxLabel { float:none !important; display:inline; padding-left:4px; } +.colM .aligned .vLargeTextField, .colM .aligned .vXMLLargeTextField { width:610px; } +.checkbox-row p.help { margin-left:0; padding-left:0 !important; } +fieldset .field-box { float:left; margin-right: 20px; } + +/* WIDE FIELDSETS */ +.wide label { width:15em !important; } +form .wide p { margin-left:15em; } +form .wide p.help { padding-left:38px; } +.colM fieldset.wide .vLargeTextField, .colM fieldset.wide .vXMLLargeTextField { width:450px; } + +/* COLLAPSED FIELDSETS */ +fieldset.collapsed * { display:none; } +fieldset.collapsed h2, fieldset.collapsed { display:block !important; } +fieldset.collapsed h2 { background-image:url(../img/nav-bg.gif); background-position:bottom left; color:#999; } +fieldset.collapsed .collapse-toggle { padding:3px 5px !important; background:transparent; display:inline !important;} + +/* MONOSPACE TEXTAREAS */ +fieldset.monospace textarea { font-family:"Bitstream Vera Sans Mono",Monaco,"Courier New",Courier,monospace; } + +/* SUBMIT ROW */ +.submit-row { padding:5px 7px; text-align:right; background:white url(../img/nav-bg.gif) 0 100% repeat-x; border:1px solid #ccc; margin:5px 0; overflow:hidden; } +.submit-row input { margin:0 0 0 5px; } +.submit-row p { margin:0.3em; } +.submit-row p.deletelink-box { float: left; } +.submit-row .deletelink { background:url(../img/icon_deletelink.gif) 0 50% no-repeat; padding-left:14px; } + +/* CUSTOM FORM FIELDS */ +.vSelectMultipleField { vertical-align:top !important; } +.vCheckboxField { border:none; } +.vDateField, .vTimeField { margin-right:2px; } +.vURLField { width:30em; } +.vLargeTextField, .vXMLLargeTextField { width:48em; } +.flatpages-flatpage #id_content { height:40.2em; } +.module table .vPositiveSmallIntegerField { width:2.2em; } +.vTextField { width:20em; } +.vIntegerField { width:5em; } +.vForeignKeyRawIdAdminField { width: 5em; } + +/* INLINES */ +.inline-group {padding:0; border:1px solid #ccc; margin:10px 0;} +.inline-group .aligned label { width: 8em; } + +.inline-related {position:relative;} +.inline-related h3 {margin: 0; color:#666; padding:3px 5px; font-size:11px; background:#e1e1e1 url(../img/nav-bg.gif) top left repeat-x; border-bottom:1px solid #ddd;} +.inline-related h3 span.delete {padding-left:20px; position:absolute; top:2px; right:10px;} +.inline-related h3 span.delete label {margin-left:2px; font-size: 11px;} +.inline-related fieldset {margin: 0; background:#fff; border: none; } +.inline-related fieldset.module h3 { margin:0; padding:2px 5px 3px 5px; font-size:11px; text-align:left; font-weight:bold; background:#bcd; color:#fff; } +.inline-related.tabular fieldset.module table {width:100%;} +.last-related fieldset {border: none;} + +.inline-group .tabular tr.has_original td {padding-top:2em;} +.inline-group .tabular tr td.original { padding:2px 0 0 0; width:0; _position:relative; } +.inline-group .tabular th.original {width:0px; padding:0;} +.inline-group .tabular td.original p {position:absolute; left:0; height:1.1em; padding:2px 7px; overflow:hidden; font-size:9px; font-weight:bold; color:#666; _width:700px; } +.inline-group ul.tools {padding:0; margin: 0; list-style:none;} +.inline-group ul.tools li {display:inline; padding:0 5px;} +.inline-group ul.tools a.add {background:url(../img/icon_addlink.gif) 0 50% no-repeat; padding-left:14px;} diff --git a/trunk/htsworkflow/frontend/static/css/global.css b/trunk/htsworkflow/frontend/static/css/global.css new file mode 100644 index 0000000..fd84060 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/global.css @@ -0,0 +1,142 @@ +body { margin:0; padding:0; font-size:12px; font-family:"Lucida Grande","DejaVu Sans","Bitstream Vera Sans",Verdana,Arial,sans-serif; color:#333; background:#fff; } + +/* LINKS */ +a:link, a:visited { color: #5b80b2; text-decoration:none; } +a:hover { color: #036; } +a img { border:none; } +a.section:link, a.section:visited { color: white; text-decoration:none; } + +/* GLOBAL DEFAULTS */ +p, ol, ul, dl { margin:.2em 0 .8em 0; } +p { padding:0; line-height:140%; } + +h1,h2,h3,h4,h5 { font-weight:bold; } +h1 { font-size:18px; color:#666; padding:0 6px 0 0; margin:0 0 .2em 0; } +h2 { font-size:16px; margin:1em 0 .5em 0; } +h2.subhead { font-weight:normal;margin-top:0; } +h3 { font-size:14px; margin:.8em 0 .3em 0; color:#666; font-weight:bold; } +h4 { font-size:12px; margin:1em 0 .8em 0; padding-bottom:3px; } +h5 { font-size:10px; margin:1.5em 0 .5em 0; color:#666; text-transform:uppercase; letter-spacing:1px; } + +ul li { list-style-type:square; padding:1px 0; } +ul.plainlist { margin-left:0 !important; } +ul.plainlist li { list-style-type:none; } +li ul { margin-bottom:0; } +li, dt, dd { font-size:11px; line-height:14px; } +dt { font-weight:bold; margin-top:4px; } +dd { margin-left:0; } + +form { margin:0; padding:0; } +fieldset { margin:0; padding:0; } + +blockquote { font-size:11px; color:#777; margin-left:2px; padding-left:10px; border-left:5px solid #ddd; } +code, pre { font-family:"Bitstream Vera Sans Mono", Monaco, "Courier New", Courier, monospace; background:inherit; color:#666; font-size:11px; } +pre.literal-block { margin:10px; background:#eee; padding:6px 8px; } +code strong { color:#930; } +hr { clear:both; color:#eee; background-color:#eee; height:1px; border:none; margin:0; padding:0; font-size:1px; line-height:1px; } + +/* TEXT STYLES & MODIFIERS */ +.small { font-size:11px; } +.tiny { font-size:10px; } +p.tiny { margin-top:-2px; } +.mini { font-size:9px; } +p.mini { margin-top:-3px; } +.help, p.help { font-size:10px !important; color:#999; } +p img, h1 img, h2 img, h3 img, h4 img, td img { vertical-align:middle; } +.quiet, a.quiet:link, a.quiet:visited { color:#999 !important;font-weight:normal !important; } +.quiet strong { font-weight:bold !important; } +.float-right { float:right; } +.float-left { float:left; } +.clear { clear:both; } +.align-left { text-align:left; } +.align-right { text-align:right; } +.example { margin:10px 0; padding:5px 10px; background:#efefef; } +.nowrap { white-space:nowrap; } + +/* TABLES */ +table { border-collapse:collapse; border-color:#ccc; } +td, th { font-size:11px; line-height:13px; border-bottom:1px solid #eee; vertical-align:top; padding:5px; font-family:"Lucida Grande", Verdana, Arial, sans-serif; } +th { text-align:left; font-size:12px; font-weight:bold; } +thead th, +tfoot td { color:#666; padding:2px 5px; font-size:11px; background:#e1e1e1 url(../img/nav-bg.gif) top left repeat-x; border-left:1px solid #ddd; border-bottom:1px solid #ddd; } +tfoot td { border-bottom:none; border-top:1px solid #ddd; } +thead th:first-child, +tfoot td:first-child { border-left:none !important; } +thead th.optional { font-weight:normal !important; } +fieldset table { border-right:1px solid #eee; } +tr.row-label td { font-size:9px; padding-top:2px; padding-bottom:0; border-bottom:none; color:#666; margin-top:-1px; } +tr.alt { background:#f6f6f6; } +.row1 { background:#EDF3FE; } +.row2 { background:white; } + +/* SORTABLE TABLES */ +thead th a:link, thead th a:visited { color:#666; display:block; } +table thead th.sorted { background-position:bottom left !important; } +table thead th.sorted a { padding-right:13px; } +table thead th.ascending a { background:url(../img/arrow-down.gif) right .4em no-repeat; } +table thead th.descending a { background:url(../img/arrow-up.gif) right .4em no-repeat; } + +/* ORDERABLE TABLES */ +table.orderable tbody tr td:hover { cursor:move; } +table.orderable tbody tr td:first-child { padding-left:14px; background-image:url(../img/nav-bg-grabber.gif); background-repeat:repeat-y; } +table.orderable-initalized .order-cell, body>tr>td.order-cell { display:none; } + +/* FORM DEFAULTS */ +input, textarea, select { margin:2px 0; padding:2px 3px; vertical-align:middle; font-family:"Lucida Grande", Verdana, Arial, sans-serif; font-weight:normal; font-size:11px; } +textarea { vertical-align:top !important; } +input[type=text], input[type=password], textarea, select, .vTextField { border:1px solid #ccc; } + +/* FORM BUTTONS */ +.button, input[type=submit], input[type=button], .submit-row input { background:white url(../img/nav-bg.gif) bottom repeat-x; padding:3px; color:black; border:1px solid #bbb; border-color:#ddd #aaa #aaa #ddd; } +.button:active, input[type=submit]:active, input[type=button]:active { background-image:url(../img/nav-bg-reverse.gif); background-position:top; } +.button.default, input[type=submit].default, .submit-row input.default { border:2px solid #5b80b2; background:#7CA0C7 url(../img/default-bg.gif) bottom repeat-x; font-weight:bold; color:white; float:right; } +.button.default:active, input[type=submit].default:active { background-image:url(../img/default-bg-reverse.gif); background-position:top; } + +/* MODULES */ +.module { border:1px solid #ccc; margin-bottom:5px; background:white; } +.module p, .module ul, .module h3, .module h4, .module dl, .module pre { padding-left:10px; padding-right:10px; } +.module blockquote { margin-left:12px; } +.module ul, .module ol { margin-left:1.5em; } +.module h3 { margin-top:.6em; } +.module h2, .module caption, .inline-group h2 { margin:0; padding:2px 5px 3px 5px; font-size:11px; text-align:left; font-weight:bold; background:#7CA0C7 url(../img/default-bg.gif) top left repeat-x; color:white; } +.module table { border-collapse: collapse; } + +/* MESSAGES & ERRORS */ +ul.messagelist { padding:0 0 5px 0; margin:0; } +ul.messagelist li { font-size:12px; display:block; padding:4px 5px 4px 25px; margin:0 0 3px 0; border-bottom:1px solid #ddd; color:#666; background:#ffc url(../img/icon_success.gif) 5px .3em no-repeat; } +.errornote { font-size:12px !important; display:block; padding:4px 5px 4px 25px; margin:0 0 3px 0; border:1px solid red; color:red;background:#ffc url(../img/icon_error.gif) 5px .3em no-repeat; } +ul.errorlist { margin:0 !important; padding:0 !important; } +.errorlist li { font-size:12px !important; display:block; padding:4px 5px 4px 25px; margin:0 0 3px 0; border:1px solid red; color:white; background:red url(../img/icon_alert.gif) 5px .3em no-repeat; } +td ul.errorlist { margin:0 !important; padding:0 !important; } +td ul.errorlist li { margin:0 !important; } +.errors { background:#ffc; } +.errors input, .errors select { border:1px solid red; } +div.system-message { background: #ffc; margin: 10px; padding: 6px 8px; font-size: .8em; } +div.system-message p.system-message-title { padding:4px 5px 4px 25px; margin:0; color:red; background:#ffc url(../img/icon_error.gif) 5px .3em no-repeat; } +.description { font-size:12px; padding:5px 0 0 12px; } + +/* BREADCRUMBS */ +div.breadcrumbs { background:white url(../img/nav-bg-reverse.gif) 0 -10px repeat-x; padding:2px 8px 3px 8px; font-size:11px; color:#999; border-top:1px solid white; border-bottom:1px solid #ccc; text-align:left; } + +/* ACTION ICONS */ +.addlink { padding-left:12px; background:url(../img/icon_addlink.gif) 0 .2em no-repeat; } +.changelink { padding-left:12px; background:url(../img/icon_changelink.gif) 0 .2em no-repeat; } +.deletelink { padding-left:12px; background:url(../img/icon_deletelink.gif) 0 .25em no-repeat; } +a.deletelink:link, a.deletelink:visited { color:#CC3434; } +a.deletelink:hover { color:#993333; } + +/* OBJECT TOOLS */ +.object-tools { font-size:10px; font-weight:bold; font-family:Arial,Helvetica,sans-serif; padding-left:0; float:right; position:relative; margin-top:-2.4em; margin-bottom:-2em; } +.form-row .object-tools { margin-top:5px; margin-bottom:5px; float:none; height:2em; padding-left:3.5em; } +.object-tools li { display:block; float:left; background:url(../img/tool-left.gif) 0 0 no-repeat; padding:0 0 0 8px; margin-left:2px; height:16px; } +.object-tools li:hover { background:url(../img/tool-left_over.gif) 0 0 no-repeat; } +.object-tools a:link, .object-tools a:visited { display:block; float:left; color:white; padding:.1em 14px .1em 8px; height:14px; background:#999 url(../img/tool-right.gif) 100% 0 no-repeat; } +.object-tools a:hover, .object-tools li:hover a { background:#5b80b2 url(../img/tool-right_over.gif) 100% 0 no-repeat; } +.object-tools a.viewsitelink, .object-tools a.golink { background:#999 url(../img/tooltag-arrowright.gif) top right no-repeat; padding-right:28px; } +.object-tools a.viewsitelink:hover, .object-tools a.golink:hover { background:#5b80b2 url(../img/tooltag-arrowright_over.gif) top right no-repeat; } +.object-tools a.addlink { background:#999 url(../img/tooltag-add.gif) top right no-repeat; padding-right:28px; } +.object-tools a.addlink:hover { background:#5b80b2 url(../img/tooltag-add_over.gif) top right no-repeat; } + +/* OBJECT HISTORY */ +table#change-history { width:100%; } +table#change-history tbody th { width:16em; } diff --git a/trunk/htsworkflow/frontend/static/css/layout.css b/trunk/htsworkflow/frontend/static/css/layout.css new file mode 100644 index 0000000..bab2121 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/layout.css @@ -0,0 +1,29 @@ +/* PAGE STRUCTURE */ +#container { position:relative; width:100%; min-width:760px; padding:0; } +#content { margin:10px 15px; } +#header { width:100%; } +#content-main { float:left; width:100%; } +#content-related { float:right; width:18em; position:relative; margin-right:-19em; } +#footer { clear:both; padding:10px; } + +/* COLUMN TYPES */ +.colMS { margin-right:20em !important; } +.colSM { margin-left:20em !important; } +.colSM #content-related { float:left; margin-right:0; margin-left:-19em; } +.colSM #content-main { float:right; } +.popup .colM { width:95%; } +.subcol { float:left; width:46%; margin-right:15px; } +.dashboard #content { width:500px; } + +/* HEADER */ +#header { background:#417690; color:#ffc; overflow:hidden; } +#header a:link, #header a:visited { color:white; } +#header a:hover { text-decoration:underline; } +#branding h1 { padding:0 10px; font-size:18px; margin:8px 0; font-weight:normal; color:#f4f379; } +#branding h2 { padding:0 10px; font-size:14px; margin:-8px 0 8px 0; font-weight:normal; color:#ffc; } +#user-tools { position:absolute; top:0; right:0; padding:1.2em 10px; font-size:11px; text-align:right; } + +/* SIDEBAR */ +#content-related h3 { font-size:12px; color:#666; margin-bottom:3px; } +#content-related h4 { font-size:11px; } +#content-related .module h2 { background:#eee url(../img/nav-bg.gif) bottom left repeat-x; color:#666; } diff --git a/trunk/htsworkflow/frontend/static/css/null.css b/trunk/htsworkflow/frontend/static/css/null.css new file mode 100644 index 0000000..1a93f22 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/null.css @@ -0,0 +1 @@ +/* Nothing to see here. Dummy file to feed to the high pass filter which hides CSS from IE5/win. Details: http://tantek.com/CSS/Examples/highpass.html */ \ No newline at end of file diff --git a/trunk/htsworkflow/frontend/static/css/patch-iewin.css b/trunk/htsworkflow/frontend/static/css/patch-iewin.css new file mode 100644 index 0000000..2de1305 --- /dev/null +++ b/trunk/htsworkflow/frontend/static/css/patch-iewin.css @@ -0,0 +1,8 @@ +* html #container { position:static; } /* keep header from flowing off the page */ +* html .colMS #content-related { margin-right:0; margin-left:10px; position:static; } /* put the right sidebars back on the page */ +* html .colSM #content-related { margin-right:10px; margin-left:-115px; position:static; } /* put the left sidebars back on the page */ +* html .form-row { height:1%; } +* html .dashboard #content { width:768px; } /* proper fixed width for dashboard in IE6 */ +* html .dashboard #content-main { width:535px; } /* proper fixed width for dashboard in IE6 */ +* html #changelist-filter ul { margin-right:-10px; } /* fix right margin for changelist filters in IE6 */ +* html .change-list .filtered { height:400px; } /* IE ignores min-height, but treats height as if it were min-height */ \ No newline at end of file diff --git a/trunk/htsworkflow/frontend/static/img/changelist-bg.gif b/trunk/htsworkflow/frontend/static/img/changelist-bg.gif new file mode 100644 index 0000000000000000000000000000000000000000..7f4699470adc8c021740023dcd4f0a1bd5f50b84 GIT binary patch literal 58 zcmZ?wbhEHbT*$!0(7?d({{8#Acke3xWMKdS9S{MMVPF#E>05dFE&t*Wc((SfDPyV2;h%;kux&VHiEgQv@Yq{*Sa z+l{W!qQ2X>&*yuf#)zuThN#S(y4aey*L$GHimcC{yxF9{+^)ysgs01&yxO+S=DE=5 zq`=*$!rrpW%Ia#o)Ef<&3V-tHt57%Hx^1)~CYVyVB@=p~tMo;ew{h zv&!VG#o?8+)Qqmrnz+`6sm*|+$&#|ts>I*7&E~4Z;GetNkg(B_veJR3%8#$nw9VzU z&E=W6*PXi9oVwYCsLX|_%&NoRhN#WD(ddt_(zwp%xX$ON!QH9E;Dn~jv&-bS&gP%H z*|yE)hN;YZp~kn*=6s;Xx6kLIz1xql(y__nrNG^btIvL;$&0MdA^8LV00000EC2ui z01*Hm000O7fB=GngoT0x1WZIsh=@c4X^)Hq8yhz_C@7YhmN%6fC~FO)b|)tdr>LlS zYz-PAudpFmS#fh3xFN7$Mg_iMy zEiEN27bWH6=HM3>ML{o4NKx(YFF{dAFGWZyZdf27DgX-9f+e7qGeUGM>CmP_hlM&i z=nx`A;t~!VesHj0A)^I895r&}pnw5`k|<3oSa~uJg9;fkY^eZKW(JxS@T~!jKv(R@CsZ0tXHsK5$_Dsx`$288T9sm2&pO z+7vRWKDg+SBgd59aXPJ|Fhl0-Zn$ExzP z{p-)D^-VOkous6>N$>vI^6#AtaO<#qUe&K(&M#0y?(v&2vAX>*LHVzSL?b?DwQIW$uKxLNJmEpON)!l-n+-m$&a~x z?FuKIMTTB`xvJ9RF^(drnpXe(Gi}O=wl;>&o#WO1es*?t5JJ$?)5G}qI1>{Se0TRQ zpG;2jN`Jr9)zw`OJd}hGjEs!D-S9;H_iqjj@={+PGcz+7hJk6CXqrZyWl>#IL!nUM z_N_0NpZ^ir;kdN0u<(rx;B-2h&%QChsoq{P=`;fa11O3@EEXdgjk0k6KGA5DTsF(4 z(NQj6eh;6|7dYzRbl>T=GiP4MKCmei3fQ(yDwQG}4zskh#9lm(EX!oFS!6}#*^cL^ zsi|oPkUSob>qaoRxVE=!~)oX?z(9(c0R|#dj|rtMqs* zhpy}1=+>5}&a$X8O>P8(Ajwoz=%}hjazBY8%M^=648x$cr3IH$XFC@28-`Km$mMdm za5$XV+T5(J@>enc(>(cnp2p*i_^bTYE7IiDdb1yL40>F>qsOL zd#kIfzwalK4asCOUlsyMmPN5tlJfb-sH%!XQE)k3>?ilJ4{Yp>oTjiFiET%>B_RZE zx7#~4HD$H8w|l%^ug~pv`xQm;DT-2|s;Vl45T#Pdwrx9O+jcUS%O&Hx@yPiL=l=ox qPYR$NuGDe(Y5|miB2YMV8Q^cq@lDR!W-CSj0000`~3X;@bK{O@9+Kn{rLF!_xJbl@$pSfP5S!!_4W1n`T6YZ?B3qqPft(K z(9rAa>(gwwA^YilZ@=HrgUS3{FNlEth_IP-BSy@@<=jZ6? z=$e|EaBy(V&CR>JyVcdz;o;$EXlUQx-&9moTU%RKS68&OwCU;T!NJ+t*-}zc&)b92AH zzpJaOZfvlTPVlRhn*D zzYj!ubf8e2}%QyRfsr*SWe%Rb`=>R1ae%6%2~5|TAxeL zzADQysO81>*ZZYCyl6@$BdV$biI?uic&0bzMrFb%Aq1#ZUv-+v+w0VD#)3jAg-{rn zQK{SBuPmhC5$}W{D84q?yXp06=7SUzUxd5@c^_-Jtgg3OcgI0-bjam_F)Z9$Uf=5< z*hz6@lo{ZZu*P|f^TenPFmQjgIBP%6a@>yq0{~})9l-cU)g%A_002ovPDHLkV1oH8 BY;phq literal 0 HcmV?d00001 diff --git a/trunk/htsworkflow/frontend/static/img/nav-bg-reverse.gif b/trunk/htsworkflow/frontend/static/img/nav-bg-reverse.gif new file mode 100644 index 0000000000000000000000000000000000000000..f11029f90fc965141b8815a78ac2651759099475 GIT binary patch literal 186 zcmV;r07d^tNk%w1VG#fy0J8u9|NsB^`1t<*{`U6v@$vEd`}^(f?dRv`;^N}{{r&3d z>hA9D(@t*9{u_A=gpfpKY#xG_U+rVXV3oq`}hC||i=Q&8(m$()z5YF*CjUEw001<4;4tjrTwH+&%FA_x~E6rskH`CdQ7= zuI>)uzWxal`(>Zw+Pr1!Chi?O LckSMx$Y2cs9wnNk literal 0 HcmV?d00001 diff --git a/trunk/htsworkflow/frontend/static/img/readme.txt b/trunk/htsworkflow/frontend/static/img/readme.txt new file mode 100644 index 0000000..81c803e --- /dev/null +++ b/trunk/htsworkflow/frontend/static/img/readme.txt @@ -0,0 +1,11 @@ +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +This copyright and license notice covers the following images: + * hdd_unmount.png +************************************************************************ + +TITLE: Crystal Project Icons +AUTHOR: Everaldo Coelho +SITE: http://www.everaldo.com +CONTACT: everaldo@everaldo.com + +Copyright (c) 2006-2007 Everaldo Coelho. diff --git a/trunk/htsworkflow/frontend/templates/admin/base_site.html b/trunk/htsworkflow/frontend/templates/admin/base_site.html new file mode 100644 index 0000000..c212302 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/admin/base_site.html @@ -0,0 +1,16 @@ +{% extends "admin/base.html" %} +{% load i18n %} + +{% block title %} +{{ title }}|{%trans "dev site admin" %} +{% endblock %} + +{% block branding %} +

+ {%block sitename %} + {% trans 'HTS Workflow Dev Server' %} + {%endblock%} +

+{% endblock %} + +{% block nav-global %}{% endblock %} diff --git a/trunk/htsworkflow/frontend/templates/admin/index.html b/trunk/htsworkflow/frontend/templates/admin/index.html new file mode 100644 index 0000000..e712275 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/admin/index.html @@ -0,0 +1,135 @@ +{% extends "admin/base_site.html" %} +{% load i18n %} + +{% block stylesheet %}{% load adminmedia %}{% admin_media_prefix %}css/dashboard.css{% endblock %} + +{% block coltype %}colMS{% endblock %} + +{% block bodyclass %}dashboard{% endblock %} + +{% block breadcrumbs %}{% endblock %} + +{% block content %} +
+ +{% if app_list %} + {% for app in app_list %} +
+ + + {% for model in app.models %} + + {% if model.perms.change %} + + {% else %} + + {% endif %} + + {% if model.perms.add %} + + {% else %} + + {% endif %} + + {% if model.perms.change %} + + {% else %} + + {% endif %} + + {% endfor %} +
{% blocktrans with app.name as name %}{{ name }}{% endblocktrans %}
{{ model.name }}{{ model.name }}{% trans 'Add' %} {% trans 'Change' %} 
+
+ {% endfor %} +{% else %} +

{% trans "You don't have permission to edit anything." %}

+{% endif %} +
+ +
+ + + + + + + + + + + + + + +
New! Reports (Note: Some features currently work only on Firefox)
ChIP-Seq Reports  
RNA-Seq Reports  
Methyl-Seq Reports  
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Docs & Tools
Sequencing Summary Page  
ChIP QC Primer Design   + ChIP Validation Design +   
Protocols - Myers Lab  
Protocols - Wold Lab  
QuEST  
+
+ +{% endblock %} + + +{% block sidebar %} + +{% endblock %} diff --git a/trunk/htsworkflow/frontend/templates/base.html b/trunk/htsworkflow/frontend/templates/base.html new file mode 100644 index 0000000..5899e59 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/base.html @@ -0,0 +1,56 @@ + + + +{% block title %}{% endblock %} + +{% if LANGUAGE_BIDI %}{% endif %} +{% block extrastyle %}{% endblock %} +{% block extrahead %}{% endblock %} +{% block blockbots %}{% endblock %} + +{% load i18n %} + + + + +
+ + {% if not is_popup %} + + + + {% block breadcrumbs %}{% endblock %} + {% endif %} + + {% if messages %} +
    {% for message in messages %}
  • {{ message }}
  • {% endfor %}
+ {% endif %} + + +
+ {% block pretitle %}{% endblock %} + {% block content_title %}{% if title %}

{{ title }}

{% endif %}{% endblock %} + {% block content %} + {% block object-tools %}{% endblock %} + {{ content }} + {% endblock %} + {% block sidebar %}{% endblock %} +
+
+ + + {% block footer %}{% endblock %} +
+ + + + diff --git a/trunk/htsworkflow/frontend/templates/base_site.html b/trunk/htsworkflow/frontend/templates/base_site.html new file mode 100644 index 0000000..3cf8738 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/base_site.html @@ -0,0 +1,10 @@ +{% extends "admin/base.html" %} +{% load i18n %} + +{% block title %}{{ sitename }}{% endblock %} + +{% block branding %} +

HTS Workflow

+{% endblock %} + +{% block nav-global %}{% endblock %} diff --git a/trunk/htsworkflow/frontend/templates/experiments/detail.html b/trunk/htsworkflow/frontend/templates/experiments/detail.html new file mode 100644 index 0000000..65d3c00 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/experiments/detail.html @@ -0,0 +1,7 @@ +{% if run_f %} +
    + RUN FOLDER:
  • {{ run_f.run_folder }}
  • +
+{% else %} +

Run folder not found.

+{% endif %} diff --git a/trunk/htsworkflow/frontend/templates/experiments/flowcellSheet.html b/trunk/htsworkflow/frontend/templates/experiments/flowcellSheet.html new file mode 100644 index 0000000..66b7b4d --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/experiments/flowcellSheet.html @@ -0,0 +1,117 @@ +{{ fc.flowcell_id }} - GA SEQUENCING (SOLEXA) LOG + + +{% if fc %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GA SEQUENCING (SOLEXA) LOG       
         
Date Run Started{{ fc.run_date }}      
         
Cluster station used{{ fc.cluster_mac_id }}      
GA used{{ fc.seq_mac_id }}      
         
Flowcell number{{ fc.flowcell_id }}      
Number of Tiles per Lane100       
Number of Cycles{{ fc.read_length }}       
         
         
SAMPLE INFORMATION        
 FC#     FC bar code
Lane12345678
Solexa Library Number{{ fc.lane_1_library.library_id }}{{ fc.lane_2_library.library_id }}{{ fc.lane_3_library.library_id }}{{ fc.lane_4_library.library_id }}{{ fc.lane_5_library.library_id }}{{ fc.lane_6_library.library_id }}{{ fc.lane_7_library.library_id }}{{ fc.lane_8_library.library_id }}
Sample Name{{ fc.lane_1_library.library_name }}{{ fc.lane_2_library.library_name }}{{ fc.lane_3_library.library_name }}{{ fc.lane_4_library.library_name }}{{ fc.lane_5_library.library_name }}{{ fc.lane_6_library.library_name }}{{ fc.lane_7_library.library_name }}{{ fc.lane_8_library.library_name }}
Organism{{ fc.lane_1_library.library_species.common_name }}{{ fc.lane_2_library.library_species.common_name }}{{ fc.lane_3_library.library_species.common_name }}{{ fc.lane_4_library.library_species.common_name }}{{ fc.lane_5_library.library_species.common_name }}{{ fc.lane_6_library.library_species.common_name }}{{ fc.lane_7_library.library_species.common_name }}{{ fc.lane_8_library.library_species.common_name }}
Submitter{{ fc.lane_1_library.made_by }}{{ fc.lane_2_library.made_by }}{{ fc.lane_3_library.made_by }}{{ fc.lane_4_library.made_by }}{{ fc.lane_5_library.made_by }}{{ fc.lane_6_library.made_by }}{{ fc.lane_7_library.made_by }}{{ fc.lane_8_library.made_by }}
First time run?
Average Library Size (bp){{ fc.lane_1_library.avg_lib_size }}{{ fc.lane_2_library.avg_lib_size }}{{ fc.lane_3_library.avg_lib_size }}{{ fc.lane_4_library.avg_lib_size }}{{ fc.lane_5_library.avg_lib_size }}{{ fc.lane_6_library.avg_lib_size }}{{ fc.lane_7_library.avg_lib_size }}{{ fc.lane_8_library.avg_lib_size }}
Template Concentration (ng/ul){{ fc.lane_1_library.undiluted_concentration }}{{ fc.lane_2_library.undiluted_concentration }}{{ fc.lane_3_library.undiluted_concentration }}{{ fc.lane_4_library.undiluted_concentration }}{{ fc.lane_5_library.undiluted_concentration }}{{ fc.lane_6_library.undiluted_concentration }}{{ fc.lane_7_library.undiluted_concentration }}{{ fc.lane_8_library.undiluted_concentration }}
Run Concentration (pM){{ fc.lane_1_pM }}{{ fc.lane_2_pM }}{{ fc.lane_3_pM }}{{ fc.lane_4_pM }}{{ fc.lane_5_pM }}{{ fc.lane_6_pM }}{{ fc.lane_7_pM }}{{ fc.lane_8_pM }}
+ +{% else %} +

Flowcell object missing. Can't create sheet.

+{% endif %} + + + {% for run in data_run_list %} +
  • {{ run.run_folder }}
  • + {% endfor %} + +{% else %} +

    No data runs are available.

    +{% endif %} diff --git a/trunk/htsworkflow/frontend/templates/reports/report.html b/trunk/htsworkflow/frontend/templates/reports/report.html new file mode 100644 index 0000000..c134377 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/reports/report.html @@ -0,0 +1,11 @@ +{% extends "admin/base_site.html" %} + +{% if main %} +{% block content %} + + {{ main|safe }} + +{% endblock %} +{% else %} +

    No content. Can't create report.

    +{% endif %} diff --git a/trunk/htsworkflow/frontend/templates/samples/library_detail.html b/trunk/htsworkflow/frontend/templates/samples/library_detail.html new file mode 100644 index 0000000..4c5c2b8 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/samples/library_detail.html @@ -0,0 +1,140 @@ +{% load humanize %} + + +

    About this library

    +Library ID: {{ lib.library_id }}
    +Name: {{ lib.library_name }}
    +Species: {{ lib.library_species.scientific_name }}
    +Affiliations: +
      + {% for individual in lib.affiliations.all %} +
    • {{ individual.name }} ( {{ individual.contact }} )
    • + {% endfor %} +
    + +

    Raw Result Files

    + + + + + + + + + + +{% for result in eland_results %} + + + + + + + + +{% endfor %} +
    CycleFlowcellLaneSummaryElandBed
    {{ result.cycle }}{{ result.flowcell_id }}{{ result.lane }}Summary{{ result.result_label }} + {% if result.bed_url %} + Bed + {% endif %} +
    + +

    Lane Summary Statistics

    +{% block summary_stats %} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {% for lane in lane_summary_list %} + + + + + + + + + + + + + + + + + + + + + + {% endfor %} + +
    No MatchQC FailedUniqueRepeat
    CyclesFlowcellLaneEndCluster / TilepMRaw Readstotal%total%0 mismatch1 mismatch2 mismatchTotal0 mismatch1 mismatch2 mismatchTotal
    {{ lane.cycle_width }}{{ lane.flowcell_id }}{{ lane.lane_id }}{% if lane.end %}{{ lane.end }}{% endif %}{{ lane.clusters.0|intcomma }}{{ lane.successful_pm }}{{ lane.reads|intcomma }}{{ lane.no_match|intcomma }}{{ lane.no_match_percent|stringformat:".2f" }}{{ lane.qc_failed|intcomma }}{{ lane.qc_failed_percent|stringformat:".2f" }}{{ lane.match_codes.U0|intcomma }}{{ lane.match_codes.U1|intcomma }}{{ lane.match_codes.U2|intcomma }}{{ lane.unique_reads|intcomma }}{{ lane.match_codes.R0|intcomma }}{{ lane.match_codes.R1|intcomma }}{{ lane.match_codes.R2|intcomma }}{{ lane.repeat_reads|intcomma }}
    +
    +
    +

    Count of multi-reads

    +{% for lane in lane_summary_list %} + {% if lane.summarized_reads %} +

    + {{lane.cycle_width}} {{ lane.flowcell_id }} lane {{ lane.lane_id }} + {% if lane.end %} end {{ lane.end }}{% endif %} +

    +
      + {% for name, counts in lane.summarized_reads.items %} +
    • {{ name }}: {{ counts|intcomma }}
    • + {% endfor %} +
    + {% endif %} +{% endfor %} +{% endblock %} diff --git a/trunk/htsworkflow/frontend/templates/samples/library_index.html b/trunk/htsworkflow/frontend/templates/samples/library_index.html new file mode 100644 index 0000000..c0a37e6 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/samples/library_index.html @@ -0,0 +1,59 @@ +{% extends "base_site.html" %} +{% load adminmedia admin_list i18n %} + +{% block stylesheet %}{{ MEDIA_URL }}css/data-browse-index.css{% endblock %} + +{% block bodyclass %}change-list{% endblock %} +{% block coltype %}flex{% endblock %} + +{% block content %} +
    +
    + {% block search %}{% search_form cl %}{% endblock %} + + {% block filters %} + {% if cl.has_filters %} +
    +

    {% trans 'Filter' %}

    + {% for spec in cl.filter_specs %} + {% admin_list_filter cl spec %} + {% endfor %} +
    + {% endif %} + {% endblock %} + + + {% block pagination %}{% pagination cl %}{% endblock %} + +{% block summary_stats %} + + + + + + + + + + + + + {% for lib in library_list %} + + + + + + + {% if lib.is_archived %} + + {% else %} + + {% endif %} + + {% endfor %} + +
    ParentLibrary IDSpeciesLibrary NameTotal LanesHD
    {{ lib.amplified_from }}{{ lib.library_id }}{{ lib.species_name }}{{ lib.library_name }}{{ lib.lanes_run }}Archived
    +
    +{% endblock %} +{% endblock %} diff --git a/trunk/htsworkflow/frontend/templates/search_form.html b/trunk/htsworkflow/frontend/templates/search_form.html new file mode 100644 index 0000000..97ef8f1 --- /dev/null +++ b/trunk/htsworkflow/frontend/templates/search_form.html @@ -0,0 +1,18 @@ +{% load adminmedia %} +{% load i18n %} +{% if cl.search_fields %} +
    + +{% endif %} diff --git a/trunk/htsworkflow/frontend/urls.py b/trunk/htsworkflow/frontend/urls.py new file mode 100644 index 0000000..8a558d1 --- /dev/null +++ b/trunk/htsworkflow/frontend/urls.py @@ -0,0 +1,50 @@ +from django.conf.urls.defaults import * +from django.contrib import admin +admin.autodiscover() + +# Databrowser: +#from django.contrib import databrowse +#from htsworkflow.frontend.samples.models import Library +#databrowse.site.register(Library) +#databrowse.site.register(FlowCell) + +from htsworkflow.frontend import settings + +urlpatterns = patterns('', + # Base: + (r'^eland_config/', include('htsworkflow.frontend.eland_config.urls')), + # Admin: + (r'^admin/(.*)', admin.site.root), + # Experiments: + (r'^experiments/', include('htsworkflow.frontend.experiments.urls')), + # AnalysTrack: + #(r'^analysis/', include('htsworkflow.frontend.analysis.urls')), + # Report Views: + (r'^inventory/', include('htsworkflow.frontend.inventory.urls')), + (r'^reports/', include('htsworkflow.frontend.reports.urls')), + # Library browser + (r'^library/$', 'htsworkflow.frontend.samples.views.library'), + (r'^library/(?P\w+)/$', + 'htsworkflow.frontend.samples.views.library_to_flowcells'), + # library id to admin url + (r'^library_id_to_admin_url/(?P\w+)/$', + 'htsworkflow.frontend.samples.views.library_id_to_admin_url'), + # Raw result files + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/summary/', + 'htsworkflow.frontend.samples.views.summaryhtm_fc_cnm'), + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/eland_result/(?P[1-8])', + 'htsworkflow.frontend.samples.views.result_fc_cnm_eland_lane'), + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/bedfile/(?P[1-8])/ucsc', + 'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane_ucsc'), + (r'^results/(?P\w+)/(?PC[1-9]-[0-9]+)/bedfile/(?P[1-8])', + 'htsworkflow.frontend.samples.views.bedfile_fc_cnm_eland_lane'), + + # databrowser + #(r'^databrowse/(.*)', databrowse.site.root) +) + +if settings.DEBUG: + urlpatterns += patterns('', + (r'^static/(?P.*)$', 'django.views.static.serve', + {'document_root': settings.MEDIA_ROOT}), + ) diff --git a/trunk/htsworkflow/pipelines/__init__.py b/trunk/htsworkflow/pipelines/__init__.py new file mode 100644 index 0000000..beabfd1 --- /dev/null +++ b/trunk/htsworkflow/pipelines/__init__.py @@ -0,0 +1,6 @@ +""" +Provide code to interact with the vendor tools to produce useable "raw" data. + +the illumina sub-package contains components to interact with the Illumina provided +GAPipeline +""" diff --git a/trunk/htsworkflow/pipelines/bustard.py b/trunk/htsworkflow/pipelines/bustard.py new file mode 100644 index 0000000..f1d73eb --- /dev/null +++ b/trunk/htsworkflow/pipelines/bustard.py @@ -0,0 +1,331 @@ +""" +Extract configuration from Illumina Bustard Directory. + +This includes the version number, run date, bustard executable parameters, and +phasing estimates. +""" +from copy import copy +from datetime import date +from glob import glob +import logging +import os +import re +import sys +import time + +from htsworkflow.pipelines.runfolder import \ + ElementTree, \ + VERSION_RE, \ + EUROPEAN_STRPTIME + +# make epydoc happy +__docformat__ = "restructuredtext en" + +LANE_LIST = range(1,9) + +class Phasing(object): + PHASING = 'Phasing' + PREPHASING = 'Prephasing' + + def __init__(self, fromfile=None, xml=None): + self.lane = None + self.phasing = None + self.prephasing = None + + if fromfile is not None: + self._initialize_from_file(fromfile) + elif xml is not None: + self.set_elements(xml) + + def _initialize_from_file(self, pathname): + path, name = os.path.split(pathname) + basename, ext = os.path.splitext(name) + # the last character of the param base filename should be the + # lane number + tree = ElementTree.parse(pathname).getroot() + self.set_elements(tree) + self.lane = int(basename[-1]) + + def get_elements(self): + root = ElementTree.Element(Phasing.PHASING, {'lane': str(self.lane)}) + root.tail = os.linesep + phasing = ElementTree.SubElement(root, Phasing.PHASING) + phasing.text = str(self.phasing) + phasing.tail = os.linesep + prephasing = ElementTree.SubElement(root, Phasing.PREPHASING) + prephasing.text = str(self.prephasing) + prephasing.tail = os.linesep + return root + + def set_elements(self, tree): + if tree.tag not in ('Phasing', 'Parameters'): + raise ValueError('exptected Phasing or Parameters') + lane = tree.attrib.get('lane', None) + if lane is not None: + self.lane = int(lane) + for element in list(tree): + if element.tag == Phasing.PHASING: + self.phasing = float(element.text) + elif element.tag == Phasing.PREPHASING: + self.prephasing = float(element.text) + +class CrosstalkMatrix(object): + CROSSTALK = "MatrixElements" + BASE = 'Base' + ELEMENT = 'Element' + + def __init__(self, fromfile=None, xml=None): + self.base = {} + + if fromfile is not None: + self._initialize_from_file(fromfile) + elif xml is not None: + self.set_elements(xml) + + def _initialize_from_file(self, pathname): + data = open(pathname).readlines() + auto_header = '# Auto-generated frequency response matrix' + if data[0].strip() != auto_header or len(data) != 9: + raise RuntimeError("matrix file %s is unusual" % (pathname,)) + # skip over lines 1,2,3,4 which contain the 4 bases + self.base['A'] = [ float(v) for v in data[5].split() ] + self.base['C'] = [ float(v) for v in data[6].split() ] + self.base['G'] = [ float(v) for v in data[7].split() ] + self.base['T'] = [ float(v) for v in data[8].split() ] + + def get_elements(self): + root = ElementTree.Element(CrosstalkMatrix.CROSSTALK) + root.tail = os.linesep + base_order = ['A','C','G','T'] + for b in base_order: + base_element = ElementTree.SubElement(root, CrosstalkMatrix.BASE) + base_element.text = b + base_element.tail = os.linesep + for b in base_order: + for value in self.base[b]: + crosstalk_value = ElementTree.SubElement(root, CrosstalkMatrix.ELEMENT) + crosstalk_value.text = unicode(value) + crosstalk_value.tail = os.linesep + + return root + + def set_elements(self, tree): + if tree.tag != CrosstalkMatrix.CROSSTALK: + raise ValueError('Invalid run-xml exptected '+CrosstalkMatrix.CROSSTALK) + base_order = [] + current_base = None + current_index = 0 + for element in tree.getchildren(): + # read in the order of the bases + if element.tag == 'Base': + base_order.append(element.text) + elif element.tag == 'Element': + # we're done reading bases, now its just the 4x4 matrix + # written out as a list of elements + # if this is the first element, make a copy of the list + # to play with and initialize an empty list for the current base + if current_base is None: + current_base = copy(base_order) + self.base[current_base[0]] = [] + # we found (probably) 4 bases go to the next base + if current_index == len(base_order): + current_base.pop(0) + current_index = 0 + self.base[current_base[0]] = [] + value = float(element.text) + self.base[current_base[0]].append(value) + + current_index += 1 + else: + raise RuntimeError("Unrecognized tag in run xml: %s" %(element.tag,)) + +def crosstalk_matrix_from_bustard_config(bustard_path, bustard_config_tree): + """ + Analyze the bustard config file and try to find the crosstalk matrix. + """ + bustard_run = bustard_config_tree[0] + if bustard_run.tag != 'Run': + raise RuntimeError('Expected Run tag, got %s' % (bustard_run.tag,)) + + call_parameters = bustard_run.find('BaseCallParameters') + if call_parameters is None: + raise RuntimeError('Missing BaseCallParameters section') + + matrix = call_parameters.find('Matrix') + if matrix is None: + raise RuntimeError('Expected to find Matrix in Bustard BaseCallParameters') + + matrix_auto_flag = int(matrix.find('AutoFlag').text) + matrix_auto_lane = int(matrix.find('AutoLane').text) + + if matrix_auto_flag: + # we estimated the matrix from something in this run. + # though we don't really care which lane it was + matrix_path = os.path.join(bustard_path, 'Matrix', 's_02_matrix.txt') + matrix = CrosstalkMatrix(matrix_path) + else: + # the matrix was provided + matrix_elements = call_parameters.find('MatrixElements') + if matrix_elements is None: + raise RuntimeError('Expected to find MatrixElements in Bustard BaseCallParameters') + matrix = CrosstalkMatrix(xml=matrix_elements) + + return matrix + +class Bustard(object): + XML_VERSION = 2 + + # Xml Tags + BUSTARD = 'Bustard' + SOFTWARE_VERSION = 'version' + DATE = 'run_time' + USER = 'user' + PARAMETERS = 'Parameters' + BUSTARD_CONFIG = 'BaseCallAnalysis' + + def __init__(self, xml=None): + self.version = None + self.date = date.today() + self.user = None + self.phasing = {} + self.crosstalk = None + self.pathname = None + self.bustard_config = None + + if xml is not None: + self.set_elements(xml) + + def _get_time(self): + return time.mktime(self.date.timetuple()) + time = property(_get_time, doc='return run time as seconds since epoch') + + def dump(self): + #print ElementTree.tostring(self.get_elements()) + ElementTree.dump(self.get_elements()) + + def get_elements(self): + root = ElementTree.Element('Bustard', + {'version': str(Bustard.XML_VERSION)}) + version = ElementTree.SubElement(root, Bustard.SOFTWARE_VERSION) + version.text = self.version + run_date = ElementTree.SubElement(root, Bustard.DATE) + run_date.text = str(self.time) + user = ElementTree.SubElement(root, Bustard.USER) + user.text = self.user + params = ElementTree.SubElement(root, Bustard.PARAMETERS) + + # add phasing parameters + for lane in LANE_LIST: + params.append(self.phasing[lane].get_elements()) + + # add crosstalk matrix if it exists + if self.crosstalk is not None: + root.append(self.crosstalk.get_elements()) + + # add bustard config if it exists + if self.bustard_config is not None: + root.append(self.bustard_config) + return root + + def set_elements(self, tree): + if tree.tag != Bustard.BUSTARD: + raise ValueError('Expected "Bustard" SubElements') + xml_version = int(tree.attrib.get('version', 0)) + if xml_version > Bustard.XML_VERSION: + logging.warn('Bustard XML tree is a higher version than this class') + for element in list(tree): + if element.tag == Bustard.SOFTWARE_VERSION: + self.version = element.text + elif element.tag == Bustard.DATE: + self.date = date.fromtimestamp(float(element.text)) + elif element.tag == Bustard.USER: + self.user = element.text + elif element.tag == Bustard.PARAMETERS: + for param in element: + p = Phasing(xml=param) + self.phasing[p.lane] = p + elif element.tag == CrosstalkMatrix.CROSSTALK: + self.crosstalk = CrosstalkMatrix(xml=element) + elif element.tag == Bustard.BUSTARD_CONFIG: + self.bustard_config = element + else: + raise ValueError("Unrecognized tag: %s" % (element.tag,)) + +def bustard(pathname): + """ + Construct a Bustard object by analyzing an Illumina Bustard directory. + + :Parameters: + - `pathname`: A bustard directory + + :Return: + Fully initialized Bustard object. + """ + b = Bustard() + pathname = os.path.abspath(pathname) + path, name = os.path.split(pathname) + groups = name.split("_") + version = re.search(VERSION_RE, groups[0]) + b.version = version.group(1) + t = time.strptime(groups[1], EUROPEAN_STRPTIME) + b.date = date(*t[0:3]) + b.user = groups[2] + b.pathname = pathname + bustard_config_filename = os.path.join(pathname, 'config.xml') + paramfiles = glob(os.path.join(pathname, "params?.xml")) + for paramfile in paramfiles: + phasing = Phasing(paramfile) + assert (phasing.lane >= 1 and phasing.lane <= 8) + b.phasing[phasing.lane] = phasing + # I only found these in Bustard1.9.5/1.9.6 directories + if b.version in ('1.9.5', '1.9.6'): + # at least for our runfolders for 1.9.5 and 1.9.6 matrix[1-8].txt are always the same + crosstalk_file = os.path.join(pathname, "matrix1.txt") + b.crosstalk = CrosstalkMatrix(crosstalk_file) + # for version 1.3.2 of the pipeline the bustard version number went down + # to match the rest of the pipeline. However there's now a nifty + # new (useful) bustard config file. + elif os.path.exists(bustard_config_filename): + bustard_config_root = ElementTree.parse(bustard_config_filename) + b.bustard_config = bustard_config_root.getroot() + b.crosstalk = crosstalk_matrix_from_bustard_config(b.pathname, b.bustard_config) + + return b + +def fromxml(tree): + """ + Reconstruct a htsworkflow.pipelines.Bustard object from an xml block + """ + b = Bustard() + b.set_elements(tree) + return b + +def make_cmdline_parser(): + from optparse import OptionParser + parser = OptionParser('%prog: bustard_directory') + return parser + +def main(cmdline): + parser = make_cmdline_parser() + opts, args = parser.parse_args(cmdline) + + for bustard_dir in args: + print u'analyzing bustard directory: ' + unicode(bustard_dir) + bustard_object = bustard(bustard_dir) + bustard_object.dump() + + bustard_object2 = Bustard(xml=bustard_object.get_elements()) + print ('-------------------------------------') + bustard_object2.dump() + print ('=====================================') + b1_tree = bustard_object.get_elements() + b1 = ElementTree.tostring(b1_tree).split(os.linesep) + b2_tree = bustard_object2.get_elements() + b2 = ElementTree.tostring(b2_tree).split(os.linesep) + for line1, line2 in zip(b1, b2): + if b1 != b2: + print "b1: ", b1 + print "b2: ", b2 + +if __name__ == "__main__": + main(sys.argv[1:]) diff --git a/trunk/htsworkflow/pipelines/configure_run.py b/trunk/htsworkflow/pipelines/configure_run.py new file mode 100644 index 0000000..4d5cf86 --- /dev/null +++ b/trunk/htsworkflow/pipelines/configure_run.py @@ -0,0 +1,608 @@ +#!/usr/bin/python +__docformat__ = "restructuredtext en" + +import subprocess +import logging +import time +import re +import os + +from htsworkflow.pipelines.retrieve_config import \ + CONFIG_SYSTEM, CONFIG_USER, \ + FlowCellNotFound, getCombinedOptions, saveConfigFile, WebError404 +from htsworkflow.pipelines.genome_mapper import DuplicateGenome, getAvailableGenomes, constructMapperDict +from htsworkflow.pipelines.run_status import GARunStatus + +from pyinotify import WatchManager, ThreadedNotifier +from pyinotify import EventsCodes, ProcessEvent + +class ConfigInfo: + + def __init__(self): + #run_path = firecrest analysis directory to run analysis from + self.run_path = None + self.bustard_path = None + self.config_filepath = None + self.status = None + + #top level directory where all analyses are placed + self.base_analysis_dir = None + #analysis_dir, top level analysis dir... + # base_analysis_dir + '/070924_USI-EAS44_0022_FC12150' + self.analysis_dir = None + + + def createStatusObject(self): + """ + Creates a status object which can be queried for + status of running the pipeline + + returns True if object created + returns False if object cannot be created + """ + if self.config_filepath is None: + return False + + self.status = GARunStatus(self.config_filepath) + return True + + + +#################################### +# inotify event processor + +s_firecrest_finished = re.compile('Firecrest[0-9\._\-A-Za-z]+/finished.txt') +s_bustard_finished = re.compile('Bustard[0-9\._\-A-Za-z]+/finished.txt') +s_gerald_finished = re.compile('GERALD[0-9\._\-A-Za-z]+/finished.txt') + +s_gerald_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/GERALD[0-9\._\-A-Za-z]+/') +s_bustard_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/Bustard[0-9\._\-A-Za-z]+/') +s_firecrest_all = re.compile('Firecrest[0-9\._\-A-Za-z]+/') + +class RunEvent(ProcessEvent): + + def __init__(self, conf_info): + + self.run_status_dict = {'firecrest': False, + 'bustard': False, + 'gerald': False} + + self._ci = conf_info + + ProcessEvent.__init__(self) + + + def process_IN_CREATE(self, event): + fullpath = os.path.join(event.path, event.name) + if s_finished.search(fullpath): + logging.info("File Found: %s" % (fullpath)) + + if s_firecrest_finished.search(fullpath): + self.run_status_dict['firecrest'] = True + self._ci.status.updateFirecrest(event.name) + elif s_bustard_finished.search(fullpath): + self.run_status_dict['bustard'] = True + self._ci.status.updateBustard(event.name) + elif s_gerald_finished.search(fullpath): + self.run_status_dict['gerald'] = True + self._ci.status.updateGerald(event.name) + + #WARNING: The following order is important!! + # Firecrest regex will catch all gerald, bustard, and firecrest + # Bustard regex will catch all gerald and bustard + # Gerald regex will catch all gerald + # So, order needs to be Gerald, Bustard, Firecrest, or this + # won't work properly. + elif s_gerald_all.search(fullpath): + self._ci.status.updateGerald(event.name) + elif s_bustard_all.search(fullpath): + self._ci.status.updateBustard(event.name) + elif s_firecrest_all.search(fullpath): + self._ci.status.updateFirecrest(event.name) + + #print "Create: %s" % (os.path.join(event.path, event.name)) + + def process_IN_DELETE(self, event): + #print "Remove %s" % (os.path.join(event.path, event.name)) + pass + + + + +#FLAGS +# Config Step Error +RUN_ABORT = 'abort' +# Run Step Error +RUN_FAILED = 'failed' + + +##################################### +# Configure Step (goat_pipeline.py) +#Info +s_start = re.compile('Starting Genome Analyzer Pipeline') +s_gerald = re.compile("[\S\s]+--GERALD[\S\s]+--make[\S\s]+") +s_generating = re.compile('^Generating journals, Makefiles') +s_seq_folder = re.compile('^Sequence folder: ') +s_seq_folder_sub = re.compile('want to make ') +s_stderr_taskcomplete = re.compile('^Task complete, exiting') + +#Errors +s_invalid_cmdline = re.compile('Usage:[\S\s]*goat_pipeline.py') +s_species_dir_err = re.compile('Error: Lane [1-8]:') +s_goat_traceb = re.compile("^Traceback \(most recent call last\):") +s_missing_cycles = re.compile('^Error: Tile s_[1-8]_[0-9]+: Different number of cycles: [0-9]+ instead of [0-9]+') + +SUPPRESS_MISSING_CYCLES = False + + +##Ignore - Example of out above each ignore regex. +#NOTE: Commenting out an ignore will cause it to be +# logged as DEBUG with the logging module. +#CF_STDERR_IGNORE_LIST = [] +s_skip = re.compile('s_[0-8]_[0-9]+') + + +########################################## +# Pipeline Run Step (make -j8 recursive) + +##Info +s_finished = re.compile('finished') + +##Errors +s_make_error = re.compile('^make[\S\s]+Error') +s_no_gnuplot = re.compile('gnuplot: command not found') +s_no_convert = re.compile('^Can\'t exec "convert"') +s_no_ghostscript = re.compile('gs: command not found') + +##Ignore - Example of out above each ignore regex. +#NOTE: Commenting out an ignore will cause it to be +# logged as DEBUG with the logging module. +# +PL_STDERR_IGNORE_LIST = [] +# Info: PF 11802 +PL_STDERR_IGNORE_LIST.append( re.compile('^Info: PF') ) +# About to analyse intensity file s_4_0101_sig2.txt +PL_STDERR_IGNORE_LIST.append( re.compile('^About to analyse intensity file') ) +# Will send output to standard output +PL_STDERR_IGNORE_LIST.append( re.compile('^Will send output to standard output') ) +# Found 31877 clusters +PL_STDERR_IGNORE_LIST.append( re.compile('^Found [0-9]+ clusters') ) +# Will use quality criterion ((CHASTITY>=0.6) +PL_STDERR_IGNORE_LIST.append( re.compile('^Will use quality criterion') ) +# Quality criterion translated to (($F[5]>=0.6)) +PL_STDERR_IGNORE_LIST.append( re.compile('^Quality criterion translated to') ) +# opened /woldlab/trog/data1/king/070924_USI-EAS44_0022_FC12150/Data/C1-36_Firecrest1.9.1_14-11-2007_king.4/Bustard1.9.1_14-11-2007_king/s_4_0101_qhg.txt +# AND +# opened s_4_0103_qhg.txt +PL_STDERR_IGNORE_LIST.append( re.compile('^opened[\S\s]+qhg.txt') ) +# 81129 sequences out of 157651 passed filter criteria +PL_STDERR_IGNORE_LIST.append( re.compile('^[0-9]+ sequences out of [0-9]+ passed filter criteria') ) + + +def pl_stderr_ignore(line): + """ + Searches lines for lines to ignore (i.e. not to log) + + returns True if line should be ignored + returns False if line should NOT be ignored + """ + for s in PL_STDERR_IGNORE_LIST: + if s.search(line): + return True + return False + + +def config_stdout_handler(line, conf_info): + """ + Processes each line of output from GOAT + and stores useful information using the logging module + + Loads useful information into conf_info as well, for future + use outside the function. + + returns True if found condition that signifies success. + """ + + # Skip irrelevant line (without logging) + if s_skip.search(line): + pass + + # Detect invalid command-line arguments + elif s_invalid_cmdline.search(line): + logging.error("Invalid commandline options!") + + # Detect starting of configuration + elif s_start.search(line): + logging.info('START: Configuring pipeline') + + # Detect it made it past invalid arguments + elif s_gerald.search(line): + logging.info('Running make now') + + # Detect that make files have been generated (based on output) + elif s_generating.search(line): + logging.info('Make files generted') + return True + + # Capture run directory + elif s_seq_folder.search(line): + mo = s_seq_folder_sub.search(line) + #Output changed when using --tiles= + # at least in pipeline v0.3.0b2 + if mo: + firecrest_bustard_gerald_makefile = line[mo.end():] + firecrest_bustard_gerald, junk = \ + os.path.split(firecrest_bustard_gerald_makefile) + firecrest_bustard, junk = os.path.split(firecrest_bustard_gerald) + firecrest, junk = os.path.split(firecrest_bustard) + + conf_info.bustard_path = firecrest_bustard + conf_info.run_path = firecrest + + #Standard output handling + else: + print 'Sequence line:', line + mo = s_seq_folder.search(line) + conf_info.bustard_path = line[mo.end():] + conf_info.run_path, temp = os.path.split(conf_info.bustard_path) + + # Log all other output for debugging purposes + else: + logging.warning('CONF:?: %s' % (line)) + + return False + + + +def config_stderr_handler(line, conf_info): + """ + Processes each line of output from GOAT + and stores useful information using the logging module + + Loads useful information into conf_info as well, for future + use outside the function. + + returns RUN_ABORT upon detecting failure; + True on success message; + False if neutral message + (i.e. doesn't signify failure or success) + """ + global SUPPRESS_MISSING_CYCLES + + # Detect invalid species directory error + if s_species_dir_err.search(line): + logging.error(line) + return RUN_ABORT + # Detect goat_pipeline.py traceback + elif s_goat_traceb.search(line): + logging.error("Goat config script died, traceback in debug output") + return RUN_ABORT + # Detect indication of successful configuration (from stderr; odd, but ok) + elif s_stderr_taskcomplete.search(line): + logging.info('Configure step successful (from: stderr)') + return True + # Detect missing cycles + elif s_missing_cycles.search(line): + + # Only display error once + if not SUPPRESS_MISSING_CYCLES: + logging.error("Missing cycles detected; Not all cycles copied?") + logging.debug("CONF:STDERR:MISSING_CYCLES: %s" % (line)) + SUPPRESS_MISSING_CYCLES = True + return RUN_ABORT + + # Log all other output as debug output + else: + logging.debug('CONF:STDERR:?: %s' % (line)) + + # Neutral (not failure; nor success) + return False + + +#def pipeline_stdout_handler(line, conf_info): +# """ +# Processes each line of output from running the pipeline +# and stores useful information using the logging module +# +# Loads useful information into conf_info as well, for future +# use outside the function. +# +# returns True if found condition that signifies success. +# """ +# +# #f.write(line + '\n') +# +# return True + + + +def pipeline_stderr_handler(line, conf_info): + """ + Processes each line of stderr from pipelien run + and stores useful information using the logging module + + ##FIXME: Future feature (doesn't actually do this yet) + #Loads useful information into conf_info as well, for future + #use outside the function. + + returns RUN_FAILED upon detecting failure; + #True on success message; (no clear success state) + False if neutral message + (i.e. doesn't signify failure or success) + """ + + if pl_stderr_ignore(line): + pass + elif s_make_error.search(line): + logging.error("make error detected; run failed") + return RUN_FAILED + elif s_no_gnuplot.search(line): + logging.error("gnuplot not found") + return RUN_FAILED + elif s_no_convert.search(line): + logging.error("imagemagick's convert command not found") + return RUN_FAILED + elif s_no_ghostscript.search(line): + logging.error("ghostscript not found") + return RUN_FAILED + else: + logging.debug('PIPE:STDERR:?: %s' % (line)) + + return False + + +def retrieve_config(conf_info, flowcell, cfg_filepath, genome_dir): + """ + Gets the config file from server... + requires config file in: + /etc/ga_frontend/ga_frontend.conf + or + ~/.ga_frontend.conf + + with: + [config_file_server] + base_host_url: http://host:port + + return True if successful, False is failure + """ + options = getCombinedOptions() + + if options.url is None: + logging.error("%s or %s missing base_host_url option" % \ + (CONFIG_USER, CONFIG_SYSTEM)) + return False + + try: + saveConfigFile(flowcell, options.url, cfg_filepath) + conf_info.config_filepath = cfg_filepath + except FlowCellNotFound, e: + logging.error(e) + return False + except WebError404, e: + logging.error(e) + return False + except IOError, e: + logging.error(e) + return False + except Exception, e: + logging.error(e) + return False + + f = open(cfg_filepath, 'r') + data = f.read() + f.close() + + genome_dict = getAvailableGenomes(genome_dir) + mapper_dict = constructMapperDict(genome_dict) + + logging.debug(data) + + f = open(cfg_filepath, 'w') + f.write(data % (mapper_dict)) + f.close() + + return True + + + +def configure(conf_info): + """ + Attempts to configure the GA pipeline using goat. + + Uses logging module to store information about status. + + returns True if configuration successful, otherwise False. + """ + #ERROR Test: + #pipe = subprocess.Popen(['goat_pipeline.py', + # '--GERALD=config32bk.txt', + # '--make .',], + # #'.'], + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE) + + #ERROR Test (2), causes goat_pipeline.py traceback + #pipe = subprocess.Popen(['goat_pipeline.py', + # '--GERALD=%s' % (conf_info.config_filepath), + # '--tiles=s_4_100,s_4_101,s_4_102,s_4_103,s_4_104', + # '--make', + # '.'], + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE) + + ########################## + # Run configuration step + # Not a test; actual configure attempt. + #pipe = subprocess.Popen(['goat_pipeline.py', + # '--GERALD=%s' % (conf_info.config_filepath), + # '--make', + # '.'], + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE) + + + stdout_filepath = os.path.join(conf_info.analysis_dir, + "pipeline_configure_stdout.txt") + stderr_filepath = os.path.join(conf_info.analysis_dir, + "pipeline_configure_stderr.txt") + + fout = open(stdout_filepath, 'w') + ferr = open(stderr_filepath, 'w') + + pipe = subprocess.Popen(['goat_pipeline.py', + '--GERALD=%s' % (conf_info.config_filepath), + '--make', + conf_info.analysis_dir], + stdout=fout, + stderr=ferr) + + print "Configuring pipeline: %s" % (time.ctime()) + error_code = pipe.wait() + + # Clean up + fout.close() + ferr.close() + + + ################## + # Process stdout + fout = open(stdout_filepath, 'r') + + stdout_line = fout.readline() + + complete = False + while stdout_line != '': + # Handle stdout + if config_stdout_handler(stdout_line, conf_info): + complete = True + stdout_line = fout.readline() + + fout.close() + + + #error_code = pipe.wait() + if error_code: + logging.error('Recieved error_code: %s' % (error_code)) + else: + logging.info('We are go for launch!') + + #Process stderr + ferr = open(stderr_filepath, 'r') + stderr_line = ferr.readline() + + abort = 'NO!' + stderr_success = False + while stderr_line != '': + stderr_status = config_stderr_handler(stderr_line, conf_info) + if stderr_status == RUN_ABORT: + abort = RUN_ABORT + elif stderr_status is True: + stderr_success = True + stderr_line = ferr.readline() + + ferr.close() + + + #Success requirements: + # 1) The stdout completed without error + # 2) The program exited with status 0 + # 3) No errors found in stdout + print '#Expect: True, False, True, True' + print complete, bool(error_code), abort != RUN_ABORT, stderr_success is True + status = complete is True and \ + bool(error_code) is False and \ + abort != RUN_ABORT and \ + stderr_success is True + + # If everything was successful, but for some reason + # we didn't retrieve the path info, log it. + if status is True: + if conf_info.bustard_path is None or conf_info.run_path is None: + logging.error("Failed to retrieve run_path") + return False + + return status + + +def run_pipeline(conf_info): + """ + Run the pipeline and monitor status. + """ + # Fail if the run_path doesn't actually exist + if not os.path.exists(conf_info.run_path): + logging.error('Run path does not exist: %s' \ + % (conf_info.run_path)) + return False + + # Change cwd to run_path + stdout_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stdout.txt') + stderr_filepath = os.path.join(conf_info.analysis_dir, 'pipeline_run_stderr.txt') + + # Create status object + conf_info.createStatusObject() + + # Monitor file creation + wm = WatchManager() + mask = EventsCodes.IN_DELETE | EventsCodes.IN_CREATE + event = RunEvent(conf_info) + notifier = ThreadedNotifier(wm, event) + notifier.start() + wdd = wm.add_watch(conf_info.run_path, mask, rec=True) + + # Log pipeline starting + logging.info('STARTING PIPELINE @ %s' % (time.ctime())) + + # Start the pipeline (and hide!) + #pipe = subprocess.Popen(['make', + # '-j8', + # 'recursive'], + # stdout=subprocess.PIPE, + # stderr=subprocess.PIPE) + + fout = open(stdout_filepath, 'w') + ferr = open(stderr_filepath, 'w') + + pipe = subprocess.Popen(['make', + '--directory=%s' % (conf_info.run_path), + '-j8', + 'recursive'], + stdout=fout, + stderr=ferr) + #shell=True) + # Wait for run to finish + retcode = pipe.wait() + + + # Clean up + notifier.stop() + fout.close() + ferr.close() + + # Process stderr + ferr = open(stderr_filepath, 'r') + + run_failed_stderr = False + for line in ferr: + err_status = pipeline_stderr_handler(line, conf_info) + if err_status == RUN_FAILED: + run_failed_stderr = True + + ferr.close() + + # Finished file check! + print 'RUN SUCCESS CHECK:' + for key, value in event.run_status_dict.items(): + print ' %s: %s' % (key, value) + + dstatus = event.run_status_dict + + # Success or failure check + status = (retcode == 0) and \ + run_failed_stderr is False and \ + dstatus['firecrest'] is True and \ + dstatus['bustard'] is True and \ + dstatus['gerald'] is True + + return status + + diff --git a/trunk/htsworkflow/pipelines/eland.py b/trunk/htsworkflow/pipelines/eland.py new file mode 100644 index 0000000..559a2a2 --- /dev/null +++ b/trunk/htsworkflow/pipelines/eland.py @@ -0,0 +1,605 @@ +""" +Analyze ELAND files +""" + +from glob import glob +import logging +import os +import re +import stat + +from htsworkflow.pipelines.runfolder import ElementTree +from htsworkflow.util.ethelp import indent, flatten +from htsworkflow.util.opener import autoopen + +SAMPLE_NAME = 'SampleName' +LANE_ID = 'LaneID' +END = 'End' +READS = 'Reads' + +GENOME_MAP = 'GenomeMap' +GENOME_ITEM = 'GenomeItem' +MAPPED_READS = 'MappedReads' +MAPPED_ITEM = 'MappedItem' +MATCH_CODES = 'MatchCodes' +MATCH_ITEM = 'Code' +READS = 'Reads' + +ELAND_SINGLE = 0 +ELAND_MULTI = 1 +ELAND_EXTENDED = 2 +ELAND_EXPORT = 3 + + +class ResultLane(object): + """ + Base class for result lanes + """ + XML_VERSION = 2 + LANE = 'ResultLane' + + def __init__(self, pathname=None, lane_id=None, end=None, xml=None): + self.pathname = pathname + self._sample_name = None + self.lane_id = lane_id + self.end = end + self._reads = None + + if xml is not None: + self.set_elements(xml) + + def _update(self): + """ + Actually read the file and actually count the reads + """ + raise NotImplementedError("Can't count abstract classes") + + def _update_name(self): + # extract the sample name + if self.pathname is None: + return + + path, name = os.path.split(self.pathname) + split_name = name.split('_') + self._sample_name = split_name[0] + + def _get_sample_name(self): + if self._sample_name is None: + self._update_name() + return self._sample_name + sample_name = property(_get_sample_name) + + def _get_reads(self): + if self._reads is None: + self._update() + return self._reads + reads = property(_get_reads) + + +class ElandLane(ResultLane): + """ + Process an eland result file + """ + XML_VERSION = 2 + LANE = "ElandLane" + + def __init__(self, pathname=None, lane_id=None, end=None, genome_map=None, eland_type=None, xml=None): + super(ElandLane, self).__init__(pathname, lane_id, end) + + self._mapped_reads = None + self._match_codes = None + if genome_map is None: + genome_map = {} + self.genome_map = genome_map + self.eland_type = None + + if xml is not None: + self.set_elements(xml) + + def _guess_eland_type(self, pathname): + if self.eland_type is None: + # attempt autodetect eland file type + pathn, name = os.path.split(pathname) + if re.search('result', name): + self.eland_type = ELAND_SINGLE + elif re.search('multi', name): + self.eland_type = ELAND_MULTI + elif re.search('extended', name): + self.eland_type = ELAND_EXTENDED + elif re.search('export', name): + self.eland_type = ELAND_EXPORT + else: + self.eland_type = ELAND_SINGLE + + def _update(self): + """ + Actually read the file and actually count the reads + """ + # can't do anything if we don't have a file to process + if self.pathname is None: + return + self._guess_eland_type(self.pathname) + + if os.stat(self.pathname)[stat.ST_SIZE] == 0: + raise RuntimeError("Eland isn't done, try again later.") + + logging.info("summarizing results for %s" % (self.pathname)) + + if self.eland_type == ELAND_SINGLE: + result = self._update_eland_result(self.pathname) + elif self.eland_type == ELAND_MULTI or \ + self.eland_type == ELAND_EXTENDED: + result = self._update_eland_multi(self.pathname) + else: + raise NotImplementedError("Only support single/multi/extended eland files") + self._match_codes, self._mapped_reads, self._reads = result + + def _update_eland_result(self, pathname): + reads = 0 + mapped_reads = {} + + match_codes = {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + } + for line in autoopen(pathname,'r'): + reads += 1 + fields = line.split() + # code = fields[2] + # match_codes[code] = match_codes.setdefault(code, 0) + 1 + # the QC/NM etc codes are in the 3rd field and always present + match_codes[fields[2]] += 1 + # ignore lines that don't have a fasta filename + if len(fields) < 7: + continue + fasta = self.genome_map.get(fields[6], fields[6]) + mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 + return match_codes, mapped_reads, reads + + def _update_eland_multi(self, pathname): + reads = 0 + mapped_reads = {} + + match_codes = {'NM':0, 'QC':0, 'RM':0, + 'U0':0, 'U1':0, 'U2':0, + 'R0':0, 'R1':0, 'R2':0, + } + match_counts_re = re.compile("([\d]+):([\d]+):([\d]+)") + for line in autoopen(pathname,'r'): + reads += 1 + fields = line.split() + # fields[2] = QC/NM/or number of matches + groups = match_counts_re.match(fields[2]) + if groups is None: + match_codes[fields[2]] += 1 + else: + # when there are too many hit, eland writes a - where + # it would have put the list of hits. + # or in a different version of eland, it just leaves + # that column blank, and only outputs 3 fields. + if len(fields) < 4 or fields[3] == '-': + continue + zero_mismatches = int(groups.group(1)) + if zero_mismatches == 1: + match_codes['U0'] += 1 + elif zero_mismatches < 255: + match_codes['R0'] += zero_mismatches + + one_mismatches = int(groups.group(2)) + if one_mismatches == 1: + match_codes['U1'] += 1 + elif one_mismatches < 255: + match_codes['R1'] += one_mismatches + + two_mismatches = int(groups.group(3)) + if two_mismatches == 1: + match_codes['U2'] += 1 + elif two_mismatches < 255: + match_codes['R2'] += two_mismatches + + chromo = None + for match in fields[3].split(','): + match_fragment = match.split(':') + if len(match_fragment) == 2: + chromo = match_fragment[0] + pos = match_fragment[1] + + fasta = self.genome_map.get(chromo, chromo) + assert fasta is not None + mapped_reads[fasta] = mapped_reads.setdefault(fasta, 0) + 1 + return match_codes, mapped_reads, reads + + def _get_mapped_reads(self): + if self._mapped_reads is None: + self._update() + return self._mapped_reads + mapped_reads = property(_get_mapped_reads) + + def _get_match_codes(self): + if self._match_codes is None: + self._update() + return self._match_codes + match_codes = property(_get_match_codes) + + def _get_no_match(self): + if self._mapped_reads is None: + self._update() + return self._match_codes['NM'] + no_match = property(_get_no_match, + doc="total reads that didn't match the target genome.") + + def _get_no_match_percent(self): + return float(self.no_match)/self.reads * 100 + no_match_percent = property(_get_no_match_percent, + doc="no match reads as percent of total") + + def _get_qc_failed(self): + if self._mapped_reads is None: + self._update() + return self._match_codes['QC'] + qc_failed = property(_get_qc_failed, + doc="total reads that didn't match the target genome.") + + def _get_qc_failed_percent(self): + return float(self.qc_failed)/self.reads * 100 + qc_failed_percent = property(_get_qc_failed_percent, + doc="QC failed reads as percent of total") + + def _get_unique_reads(self): + if self._mapped_reads is None: + self._update() + sum = 0 + for code in ['U0','U1','U2']: + sum += self._match_codes[code] + return sum + unique_reads = property(_get_unique_reads, + doc="total unique reads") + + def _get_repeat_reads(self): + if self._mapped_reads is None: + self._update() + sum = 0 + for code in ['R0','R1','R2']: + sum += self._match_codes[code] + return sum + repeat_reads = property(_get_repeat_reads, + doc="total repeat reads") + + def get_elements(self): + lane = ElementTree.Element(ElandLane.LANE, + {'version': + unicode(ElandLane.XML_VERSION)}) + sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME) + sample_tag.text = self.sample_name + lane_tag = ElementTree.SubElement(lane, LANE_ID) + lane_tag.text = str(self.lane_id) + if self.end is not None: + end_tag = ElementTree.SubElement(lane, END) + end_tag.text = str(self.end) + genome_map = ElementTree.SubElement(lane, GENOME_MAP) + for k, v in self.genome_map.items(): + item = ElementTree.SubElement( + genome_map, GENOME_ITEM, + {'name':k, 'value':unicode(v)}) + mapped_reads = ElementTree.SubElement(lane, MAPPED_READS) + for k, v in self.mapped_reads.items(): + item = ElementTree.SubElement( + mapped_reads, MAPPED_ITEM, + {'name':k, 'value':unicode(v)}) + match_codes = ElementTree.SubElement(lane, MATCH_CODES) + for k, v in self.match_codes.items(): + item = ElementTree.SubElement( + match_codes, MATCH_ITEM, + {'name':k, 'value':unicode(v)}) + reads = ElementTree.SubElement(lane, READS) + reads.text = unicode(self.reads) + + return lane + + def set_elements(self, tree): + if tree.tag != ElandLane.LANE: + raise ValueError('Exptecting %s' % (ElandLane.LANE,)) + + # reset dictionaries + self._mapped_reads = {} + self._match_codes = {} + + for element in tree: + tag = element.tag.lower() + if tag == SAMPLE_NAME.lower(): + self._sample_name = element.text + elif tag == LANE_ID.lower(): + self.lane_id = int(element.text) + elif tag == END.lower(): + self.end = int(element.text) + elif tag == GENOME_MAP.lower(): + for child in element: + name = child.attrib['name'] + value = child.attrib['value'] + self.genome_map[name] = value + elif tag == MAPPED_READS.lower(): + for child in element: + name = child.attrib['name'] + value = child.attrib['value'] + self._mapped_reads[name] = int(value) + elif tag == MATCH_CODES.lower(): + for child in element: + name = child.attrib['name'] + value = int(child.attrib['value']) + self._match_codes[name] = value + elif tag == READS.lower(): + self._reads = int(element.text) + else: + logging.warn("ElandLane unrecognized tag %s" % (element.tag,)) + +class SequenceLane(ResultLane): + XML_VERSION=1 + LANE = 'SequenceLane' + SEQUENCE_TYPE = 'SequenceType' + + NONE_TYPE = None + SCARF_TYPE = 1 + FASTQ_TYPE = 2 + SEQUENCE_DESCRIPTION = { NONE_TYPE: 'None', SCARF_TYPE: 'SCARF', FASTQ_TYPE: 'FASTQ' } + + def __init__(self, pathname=None, lane_id=None, end=None, xml=None): + self.sequence_type = None + super(SequenceLane, self).__init__(pathname, lane_id, end, xml) + + def _guess_sequence_type(self, pathname): + """ + Determine if we have a scarf or fastq sequence file + """ + f = open(pathname,'r') + l = f.readline() + f.close() + + if l[0] == '@': + # fastq starts with a @ + self.sequence_type = SequenceLane.FASTQ_TYPE + else: + self.sequence_type = SequenceLane.SCARF_TYPE + return self.sequence_type + + def _update(self): + """ + Actually read the file and actually count the reads + """ + # can't do anything if we don't have a file to process + if self.pathname is None: + return + + if os.stat(self.pathname)[stat.ST_SIZE] == 0: + raise RuntimeError("Sequencing isn't done, try again later.") + + self._guess_sequence_type(self.pathname) + + logging.info("summarizing results for %s" % (self.pathname)) + lines = 0 + f = open(self.pathname) + for l in f.xreadlines(): + lines += 1 + f.close() + + if self.sequence_type == SequenceLane.SCARF_TYPE: + self._reads = lines + elif self.sequence_type == SequenceLane.FASTQ_TYPE: + self._reads = lines / 4 + else: + raise NotImplementedError("This only supports scarf or fastq squence files") + + def get_elements(self): + lane = ElementTree.Element(SequenceLane.LANE, + {'version': + unicode(SequenceLane.XML_VERSION)}) + sample_tag = ElementTree.SubElement(lane, SAMPLE_NAME) + sample_tag.text = self.sample_name + lane_tag = ElementTree.SubElement(lane, LANE_ID) + lane_tag.text = str(self.lane_id) + if self.end is not None: + end_tag = ElementTree.SubElement(lane, END) + end_tag.text = str(self.end) + reads = ElementTree.SubElement(lane, READS) + reads.text = unicode(self.reads) + sequence_type = ElementTree.SubElement(lane, SequenceLane.SEQUENCE_TYPE) + sequence_type.text = unicode(SequenceLane.SEQUENCE_DESCRIPTION[self.sequence_type]) + + return lane + + def set_elements(self, tree): + if tree.tag != SequenceLane.LANE: + raise ValueError('Exptecting %s' % (SequenceLane.LANE,)) + lookup_sequence_type = dict([ (v,k) for k,v in SequenceLane.SEQUENCE_DESCRIPTION.items()]) + + for element in tree: + tag = element.tag.lower() + if tag == SAMPLE_NAME.lower(): + self._sample_name = element.text + elif tag == LANE_ID.lower(): + self.lane_id = int(element.text) + elif tag == END.lower(): + self.end = int(element.text) + elif tag == READS.lower(): + self._reads = int(element.text) + elif tag == SequenceLane.SEQUENCE_TYPE.lower(): + self.sequence_type = lookup_sequence_type.get(element.text, None) + print self.sequence_type + else: + logging.warn("SequenceLane unrecognized tag %s" % (element.tag,)) + +class ELAND(object): + """ + Summarize information from eland files + """ + XML_VERSION = 3 + + ELAND = 'ElandCollection' + LANE = 'Lane' + LANE_ID = 'id' + END = 'end' + + def __init__(self, xml=None): + # we need information from the gerald config.xml + self.results = [{},{}] + + if xml is not None: + self.set_elements(xml) + + def get_elements(self): + root = ElementTree.Element(ELAND.ELAND, + {'version': unicode(ELAND.XML_VERSION)}) + for end in range(len(self.results)): + end_results = self.results[end] + for lane_id, lane in end_results.items(): + eland_lane = lane.get_elements() + eland_lane.attrib[ELAND.END] = unicode (end) + eland_lane.attrib[ELAND.LANE_ID] = unicode(lane_id) + root.append(eland_lane) + return root + + def set_elements(self, tree): + if tree.tag.lower() != ELAND.ELAND.lower(): + raise ValueError('Expecting %s', ELAND.ELAND) + for element in list(tree): + lane_id = int(element.attrib[ELAND.LANE_ID]) + end = int(element.attrib.get(ELAND.END, 0)) + if element.tag.lower() == ElandLane.LANE.lower(): + lane = ElandLane(xml=element) + elif element.tag.lower() == SequenceLane.LANE.lower(): + lane = SequenceLane(xml=element) + + self.results[end][lane_id] = lane + +def check_for_eland_file(basedir, pattern, lane_id, end): + if end is None: + full_lane_id = lane_id + else: + full_lane_id = "%d_%d" % ( lane_id, end ) + + basename = pattern % (full_lane_id,) + pathname = os.path.join(basedir, basename) + if os.path.exists(pathname): + logging.info('found eland file in %s' % (pathname,)) + return pathname + else: + return None + +def update_result_with_eland(gerald, results, lane_id, end, pathname, genome_maps): + # yes the lane_id is also being computed in ElandLane._update + # I didn't want to clutter up my constructor + # but I needed to persist the sample_name/lane_id for + # runfolder summary_report + path, name = os.path.split(pathname) + logging.info("Adding eland file %s" %(name,)) + # split_name = name.split('_') + # lane_id = int(split_name[1]) + + if genome_maps is not None: + genome_map = genome_maps[lane_id] + elif gerald is not None: + genome_dir = gerald.lanes[lane_id].eland_genome + genome_map = build_genome_fasta_map(genome_dir) + else: + genome_map = {} + + lane = ElandLane(pathname, lane_id, end, genome_map) + + if end is None: + effective_end = 0 + else: + effective_end = end - 1 + + results[effective_end][lane_id] = lane + +def update_result_with_sequence(gerald, results, lane_id, end, pathname): + result = SequenceLane(pathname, lane_id, end) + + if end is None: + effective_end = 0 + else: + effective_end = end - 1 + + results[effective_end][lane_id] = result + + +def eland(gerald_dir, gerald=None, genome_maps=None): + e = ELAND() + + lane_ids = range(1,9) + ends = [None, 1, 2] + + basedirs = [gerald_dir] + + # if there is a basedir/Temp change basedir to point to the temp + # directory, as 1.1rc1 moves most of the files we've historically + # cared about to that subdirectory. + # we should look into what the official 'result' files are. + # and 1.3 moves them back + basedir_temp = os.path.join(gerald_dir, 'Temp') + if os.path.isdir(basedir_temp): + basedirs.append(basedir_temp) + + + # the order in patterns determines the preference for what + # will be found. + MAPPED_ELAND = 0 + SEQUENCE = 1 + patterns = [('s_%s_eland_result.txt', MAPPED_ELAND), + ('s_%s_eland_result.txt.bz2', MAPPED_ELAND), + ('s_%s_eland_result.txt.gz', MAPPED_ELAND), + ('s_%s_eland_extended.txt', MAPPED_ELAND), + ('s_%s_eland_extended.txt.bz2', MAPPED_ELAND), + ('s_%s_eland_extended.txt.gz', MAPPED_ELAND), + ('s_%s_eland_multi.txt', MAPPED_ELAND), + ('s_%s_eland_multi.txt.bz2', MAPPED_ELAND), + ('s_%s_eland_multi.txt.gz', MAPPED_ELAND), + ('s_%s_sequence.txt', SEQUENCE),] + + for basedir in basedirs: + for end in ends: + for lane_id in lane_ids: + for p in patterns: + pathname = check_for_eland_file(basedir, p[0], lane_id, end) + if pathname is not None: + if p[1] == MAPPED_ELAND: + update_result_with_eland(gerald, e.results, lane_id, end, pathname, genome_maps) + elif p[1] == SEQUENCE: + update_result_with_sequence(gerald, e.results, lane_id, end, pathname) + break + else: + logging.debug("No eland file found in %s for lane %s and end %s" %(basedir, lane_id, end)) + continue + + return e + +def build_genome_fasta_map(genome_dir): + # build fasta to fasta file map + logging.info("Building genome map") + genome = genome_dir.split(os.path.sep)[-1] + fasta_map = {} + for vld_file in glob(os.path.join(genome_dir, '*.vld')): + is_link = False + if os.path.islink(vld_file): + is_link = True + vld_file = os.path.realpath(vld_file) + path, vld_name = os.path.split(vld_file) + name, ext = os.path.splitext(vld_name) + if is_link: + fasta_map[name] = name + else: + fasta_map[name] = os.path.join(genome, name) + return fasta_map + + +def extract_eland_sequence(instream, outstream, start, end): + """ + Extract a chunk of sequence out of an eland file + """ + for line in instream: + record = line.split() + if len(record) > 1: + result = [record[0], record[1][start:end]] + else: + result = [record[0][start:end]] + outstream.write("\t".join(result)) + outstream.write(os.linesep) diff --git a/trunk/htsworkflow/pipelines/firecrest.py b/trunk/htsworkflow/pipelines/firecrest.py new file mode 100644 index 0000000..fe5d01a --- /dev/null +++ b/trunk/htsworkflow/pipelines/firecrest.py @@ -0,0 +1,144 @@ +""" +Extract information about the Firecrest run + +Firecrest + class holding the properties we found +firecrest + Firecrest factory function initalized from a directory name +fromxml + Firecrest factory function initalized from an xml dump from + the Firecrest object. +""" + +from datetime import date +from glob import glob +import os +import re +import time + +from htsworkflow.pipelines.runfolder import \ + ElementTree, \ + VERSION_RE, \ + EUROPEAN_STRPTIME + +__docformat__ = "restructuredtext en" + +class Firecrest(object): + XML_VERSION=1 + + # xml tag names + FIRECREST = 'Firecrest' + SOFTWARE_VERSION = 'version' + START = 'FirstCycle' + STOP = 'LastCycle' + DATE = 'run_time' + USER = 'user' + MATRIX = 'matrix' + + def __init__(self, xml=None): + self.start = None + self.stop = None + self.version = None + self.date = date.today() + self.user = None + self.matrix = None + + if xml is not None: + self.set_elements(xml) + + def _get_time(self): + return time.mktime(self.date.timetuple()) + time = property(_get_time, doc='return run time as seconds since epoch') + + def dump(self): + print "Starting cycle:", self.start + print "Ending cycle:", self.stop + print "Firecrest version:", self.version + print "Run date:", self.date + print "user:", self.user + + def get_elements(self): + attribs = {'version': str(Firecrest.XML_VERSION) } + root = ElementTree.Element(Firecrest.FIRECREST, attrib=attribs) + version = ElementTree.SubElement(root, Firecrest.SOFTWARE_VERSION) + version.text = self.version + start_cycle = ElementTree.SubElement(root, Firecrest.START) + start_cycle.text = str(self.start) + stop_cycle = ElementTree.SubElement(root, Firecrest.STOP) + stop_cycle.text = str(self.stop) + run_date = ElementTree.SubElement(root, Firecrest.DATE) + run_date.text = str(self.time) + user = ElementTree.SubElement(root, Firecrest.USER) + user.text = self.user + if self.matrix is not None: + matrix = ElementTree.SubElement(root, Firecrest.MATRIX) + matrix.text = self.matrix + return root + + def set_elements(self, tree): + if tree.tag != Firecrest.FIRECREST: + raise ValueError('Expected "Firecrest" SubElements') + xml_version = int(tree.attrib.get('version', 0)) + if xml_version > Firecrest.XML_VERSION: + logging.warn('Firecrest XML tree is a higher version than this class') + for element in list(tree): + if element.tag == Firecrest.SOFTWARE_VERSION: + self.version = element.text + elif element.tag == Firecrest.START: + self.start = int(element.text) + elif element.tag == Firecrest.STOP: + self.stop = int(element.text) + elif element.tag == Firecrest.DATE: + self.date = date.fromtimestamp(float(element.text)) + elif element.tag == Firecrest.USER: + self.user = element.text + elif element.tag == Firecrest.MATRIX: + self.matrix = element.text + else: + raise ValueError("Unrecognized tag: %s" % (element.tag,)) + +def firecrest(pathname): + """ + Examine the directory at pathname and initalize a Firecrest object + """ + f = Firecrest() + f.pathname = pathname + + # parse firecrest directory name + path, name = os.path.split(pathname) + groups = name.split('_') + # grab the start/stop cycle information + cycle = re.match("C([0-9]+)-([0-9]+)", groups[0]) + f.start = int(cycle.group(1)) + f.stop = int(cycle.group(2)) + # firecrest version + version = re.search(VERSION_RE, groups[1]) + f.version = (version.group(1)) + # datetime + t = time.strptime(groups[2], EUROPEAN_STRPTIME) + f.date = date(*t[0:3]) + # username + f.user = groups[3] + + bustard_pattern = os.path.join(pathname, 'Bustard*') + # should I parse this deeper than just stashing the + # contents of the matrix file? + matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt') + if os.path.exists(matrix_pathname): + # this is for firecrest < 1.3.2 + f.matrix = open(matrix_pathname, 'r').read() + elif glob(bustard_pattern) > 0: + f.matrix = None + # there are runs here. Bustard should save the matrix. + else: + return None + + return f + +def fromxml(tree): + """ + Initialize a Firecrest object from an element tree node + """ + f = Firecrest() + f.set_elements(tree) + return f diff --git a/trunk/htsworkflow/pipelines/genome_mapper.py b/trunk/htsworkflow/pipelines/genome_mapper.py new file mode 100644 index 0000000..d29e446 --- /dev/null +++ b/trunk/htsworkflow/pipelines/genome_mapper.py @@ -0,0 +1,141 @@ +#!/usr/bin/python +import glob +import sys +import os +import re + +import logging + +from htsworkflow.util.alphanum import alphanum + +class DuplicateGenome(Exception): pass + + +def _has_metainfo(genome_dir): + metapath = os.path.join(genome_dir, '_metainfo_') + if os.path.isfile(metapath): + return True + else: + return False + +def getAvailableGenomes(genome_base_dir): + """ + raises IOError (on genome_base_dir not found) + raises DuplicateGenome on duplicate genomes found. + + returns a double dictionary (i.e. d[species][build] = path) + """ + + # Need valid directory + if not os.path.exists(genome_base_dir): + msg = "Directory does not exist: %s" % (genome_base_dir) + raise IOError, msg + + # Find all subdirectories + filepath_list = glob.glob(os.path.join(genome_base_dir, '*')) + potential_genome_dirs = \ + [ filepath for filepath in filepath_list if os.path.isdir(filepath)] + + # Get list of metadata files + genome_dir_list = \ + [ dirpath \ + for dirpath in potential_genome_dirs \ + if _has_metainfo(dirpath) ] + + # Genome double dictionary + d = {} + + for genome_dir in genome_dir_list: + line = open(os.path.join(genome_dir, '_metainfo_'), 'r').readline().strip() + + # Get species, build... log and skip on failure + try: + species, build = line.split('|') + except: + logging.warning('Skipping: Invalid metafile (%s) line: %s' \ + % (metafile, line)) + continue + + build_dict = d.setdefault(species, {}) + if build in build_dict: + msg = "Duplicate genome for %s|%s" % (species, build) + raise DuplicateGenome, msg + + build_dict[build] = genome_dir + + return d + + +class constructMapperDict(object): + """ + Emulate a dictionary to map genome|build names to paths. + + It uses the dictionary generated by getAvailableGenomes. + """ + def __init__(self, genome_dict): + self.genome_dict = genome_dict + + def __getitem__(self, key): + """ + Return the best match for key + """ + elements = re.split("\|", key) + + try: + if len(elements) == 1: + # we just the species name + # get the set of builds + builds = self.genome_dict[elements[0]] + + # sort build names the way humans would + keys = builds.keys() + keys.sort(cmp=alphanum) + + # return the path from the 'last' build name + return builds[keys[-1]] + + elif len(elements) == 2: + # we have species, and build name + return self.genome_dict[elements[0]][elements[1]] + else: + raise KeyError("Unrecognized key") + except KeyError, e: + logging.error('Unrecognized genome identifier: %s' % str((elements),)) + return "NoGenomeAvailable" + + def keys(self): + keys = [] + for species in self.genome_dict.keys(): + for build in self.genome_dict[species]: + keys.append([species+'|'+build]) + return keys + + def values(self): + values = [] + for species in self.genome_dict.keys(): + for build in self.genome_dict[species]: + values.append(self.genome_dict[species][build]) + return values + + def items(self): + items = [] + for species in self.genome_dict.keys(): + for build in self.genome_dict[species]: + key = [species+'|'+build] + value = self.genome_dict[species][build] + items.append((key, value)) + return items + +if __name__ == '__main__': + + if len(sys.argv) != 2: + print 'useage: %s ' % (sys.argv[0]) + sys.exit(1) + + d = getAvailableGenomes(sys.argv[1]) + d2 = constructMapperDict(d) + + for k,v in d2.items(): + print '%s: %s' % (k,v) + + diff --git a/trunk/htsworkflow/pipelines/gerald.py b/trunk/htsworkflow/pipelines/gerald.py new file mode 100644 index 0000000..cbc5fcb --- /dev/null +++ b/trunk/htsworkflow/pipelines/gerald.py @@ -0,0 +1,208 @@ +""" +Provide access to information stored in the GERALD directory. +""" +from datetime import datetime, date +import logging +import os +import time + +from htsworkflow.pipelines.summary import Summary +from htsworkflow.pipelines.eland import eland, ELAND + +from htsworkflow.pipelines.runfolder import \ + ElementTree, \ + EUROPEAN_STRPTIME, \ + LANES_PER_FLOWCELL, \ + VERSION_RE +from htsworkflow.util.ethelp import indent, flatten + +class Gerald(object): + """ + Capture meaning out of the GERALD directory + """ + XML_VERSION = 1 + GERALD='Gerald' + RUN_PARAMETERS='RunParameters' + SUMMARY='Summary' + + class LaneParameters(object): + """ + Make it easy to access elements of LaneSpecificRunParameters from python + """ + def __init__(self, gerald, lane_id): + self._gerald = gerald + self._lane_id = lane_id + + def __get_attribute(self, xml_tag): + subtree = self._gerald.tree.find('LaneSpecificRunParameters') + container = subtree.find(xml_tag) + if container is None: + return None + if len(container.getchildren()) > LANES_PER_FLOWCELL: + raise RuntimeError('GERALD config.xml file changed') + lanes = [x.tag.split('_')[1] for x in container.getchildren()] + try: + index = lanes.index(self._lane_id) + except ValueError, e: + return None + element = container[index] + return element.text + def _get_analysis(self): + return self.__get_attribute('ANALYSIS') + analysis = property(_get_analysis) + + def _get_eland_genome(self): + genome = self.__get_attribute('ELAND_GENOME') + # default to the chipwide parameters if there isn't an + # entry in the lane specific paramaters + if genome is None: + subtree = self._gerald.tree.find('ChipWideRunParameters') + container = subtree.find('ELAND_GENOME') + genome = container.text + return genome + eland_genome = property(_get_eland_genome) + + def _get_read_length(self): + return self.__get_attribute('READ_LENGTH') + read_length = property(_get_read_length) + + def _get_use_bases(self): + return self.__get_attribute('USE_BASES') + use_bases = property(_get_use_bases) + + class LaneSpecificRunParameters(object): + """ + Provide access to LaneSpecificRunParameters + """ + def __init__(self, gerald): + self._gerald = gerald + self._lane = None + + def _initalize_lanes(self): + """ + build dictionary of LaneParameters + """ + self._lanes = {} + tree = self._gerald.tree + analysis = tree.find('LaneSpecificRunParameters/ANALYSIS') + # according to the pipeline specs I think their fields + # are sampleName_laneID, with sampleName defaulting to s + # since laneIDs are constant lets just try using + # those consistently. + for element in analysis: + sample, lane_id = element.tag.split('_') + self._lanes[int(lane_id)] = Gerald.LaneParameters( + self._gerald, lane_id) + + def __getitem__(self, key): + if self._lane is None: + self._initalize_lanes() + return self._lanes[key] + def keys(self): + if self._lane is None: + self._initalize_lanes() + return self._lanes.keys() + def values(self): + if self._lane is None: + self._initalize_lanes() + return self._lanes.values() + def items(self): + if self._lane is None: + self._initalize_lanes() + return self._lanes.items() + def __len__(self): + if self._lane is None: + self._initalize_lanes() + return len(self._lanes) + + def __init__(self, xml=None): + self.pathname = None + self.tree = None + + # parse lane parameters out of the config.xml file + self.lanes = Gerald.LaneSpecificRunParameters(self) + + self.summary = None + self.eland_results = None + + if xml is not None: + self.set_elements(xml) + + def _get_date(self): + if self.tree is None: + return datetime.today() + timestamp = self.tree.findtext('ChipWideRunParameters/TIME_STAMP') + epochstamp = time.mktime(time.strptime(timestamp, '%c')) + return datetime.fromtimestamp(epochstamp) + date = property(_get_date) + + def _get_time(self): + return time.mktime(self.date.timetuple()) + time = property(_get_time, doc='return run time as seconds since epoch') + + def _get_version(self): + if self.tree is None: + return None + return self.tree.findtext('ChipWideRunParameters/SOFTWARE_VERSION') + version = property(_get_version) + + def dump(self): + """ + Debugging function, report current object + """ + print 'Gerald version:', self.version + print 'Gerald run date:', self.date + print 'Gerald config.xml:', self.tree + self.summary.dump() + + def get_elements(self): + if self.tree is None or self.summary is None: + return None + + gerald = ElementTree.Element(Gerald.GERALD, + {'version': unicode(Gerald.XML_VERSION)}) + gerald.append(self.tree) + gerald.append(self.summary.get_elements()) + if self.eland_results: + gerald.append(self.eland_results.get_elements()) + return gerald + + def set_elements(self, tree): + if tree.tag != Gerald.GERALD: + raise ValueError('exptected GERALD') + xml_version = int(tree.attrib.get('version', 0)) + if xml_version > Gerald.XML_VERSION: + logging.warn('XML tree is a higher version than this class') + for element in list(tree): + tag = element.tag.lower() + if tag == Gerald.RUN_PARAMETERS.lower(): + self.tree = element + elif tag == Gerald.SUMMARY.lower(): + self.summary = Summary(xml=element) + elif tag == ELAND.ELAND.lower(): + self.eland_results = ELAND(xml=element) + else: + logging.warn("Unrecognized tag %s" % (element.tag,)) + + +def gerald(pathname): + g = Gerald() + g.pathname = pathname + path, name = os.path.split(pathname) + logging.info("Parsing gerald config.xml") + config_pathname = os.path.join(pathname, 'config.xml') + g.tree = ElementTree.parse(config_pathname).getroot() + + # parse Summary.htm file + logging.info("Parsing Summary.htm") + summary_pathname = os.path.join(pathname, 'Summary.htm') + g.summary = Summary(summary_pathname) + # parse eland files + g.eland_results = eland(g.pathname, g) + return g + +if __name__ == "__main__": + # quick test code + import sys + g = gerald(sys.argv[1]) + #ElementTree.dump(g.get_elements()) diff --git a/trunk/htsworkflow/pipelines/ipar.py b/trunk/htsworkflow/pipelines/ipar.py new file mode 100644 index 0000000..239239e --- /dev/null +++ b/trunk/htsworkflow/pipelines/ipar.py @@ -0,0 +1,239 @@ +""" +Extract information about the IPAR run + +IPAR + class holding the properties we found +ipar + IPAR factory function initalized from a directory name +fromxml + IPAR factory function initalized from an xml dump from + the IPAR object. +""" +__docformat__ = "restructuredtext en" + +import datetime +from glob import glob +import logging +import os +import re +import stat +import time + +from htsworkflow.pipelines.runfolder import \ + ElementTree, \ + VERSION_RE, \ + EUROPEAN_STRPTIME + +class Tiles(object): + def __init__(self, tree): + self.tree = tree.find("TileSelection") + + def keys(self): + key_list = [] + for c in self.tree.getchildren(): + k = c.attrib.get('Index', None) + if k is not None: + key_list.append(k) + return key_list + + def values(self): + value_list = [] + for lane in self.tree.getchildren(): + attributes = {} + for child in lane.getchildren(): + if child.tag == "Sample": + attributes['Sample'] = child.text + elif child.tag == 'TileRange': + attributes['TileRange'] = (int(child.attrib['Min']),int(child.attrib['Max'])) + value_list.append(attributes) + return value_list + + def items(self): + return zip(self.keys(), self.values()) + + def __getitem__(self, key): + # FIXME: this is inefficient. building the dictionary be rescanning the xml. + v = dict(self.items()) + return v[key] + +class IPAR(object): + XML_VERSION=1 + + # xml tag names + IPAR = 'IPAR' + TIMESTAMP = 'timestamp' + MATRIX = 'matrix' + RUN = 'Run' + + def __init__(self, xml=None): + self.tree = None + self.date = datetime.datetime.today() + self._tiles = None + if xml is not None: + self.set_elements(xml) + + def _get_time(self): + return time.mktime(self.date.timetuple()) + def _set_time(self, value): + mtime_tuple = time.localtime(value) + self.date = datetime.datetime(*(mtime_tuple[0:7])) + time = property(_get_time, _set_time, + doc='run time as seconds since epoch') + + def _get_cycles(self): + if self.tree is None: + return None + cycles = self.tree.find("Cycles") + if cycles is None: + return None + return cycles.attrib + + def _get_start(self): + """ + return cycle start + """ + cycles = self._get_cycles() + if cycles is not None: + return int(cycles['First']) + else: + return None + start = property(_get_start, doc="get cycle start") + + def _get_stop(self): + """ + return cycle stop + """ + cycles = self._get_cycles() + if cycles is not None: + return int(cycles['Last']) + else: + return None + stop = property(_get_stop, doc="get cycle stop") + + def _get_tiles(self): + if self._tiles is None: + self._tiles = Tiles(self.tree) + return self._tiles + tiles = property(_get_tiles) + + def _get_version(self): + software = self.tree.find('Software') + if software is not None: + return software.attrib['Version'] + version = property(_get_version, "IPAR software version") + + + def file_list(self): + """ + Generate list of all files that should be generated by the IPAR unit + """ + suffix_node = self.tree.find('RunParameters/CompressionSuffix') + if suffix_node is None: + print "find compression suffix failed" + return None + suffix = suffix_node.text + files = [] + format = "%s_%s_%04d_%s.txt%s" + for lane, attrib in self.tiles.items(): + for file_type in ["int","nse"]: + start, stop = attrib['TileRange'] + for tile in range(start, stop+1): + files.append(format % (attrib['Sample'], lane, tile, file_type, suffix)) + return files + + def dump(self): + print "Matrix:", self.matrix + print "Tree:", self.tree + + def get_elements(self): + attribs = {'version': str(IPAR.XML_VERSION) } + root = ElementTree.Element(IPAR.IPAR, attrib=attribs) + timestamp = ElementTree.SubElement(root, IPAR.TIMESTAMP) + timestamp.text = str(int(self.time)) + root.append(self.tree) + matrix = ElementTree.SubElement(root, IPAR.MATRIX) + matrix.text = self.matrix + return root + + def set_elements(self, tree): + if tree.tag != IPAR.IPAR: + raise ValueError('Expected "IPAR" SubElements') + xml_version = int(tree.attrib.get('version', 0)) + if xml_version > IPAR.XML_VERSION: + logging.warn('IPAR XML tree is a higher version than this class') + for element in list(tree): + if element.tag == IPAR.RUN: + self.tree = element + elif element.tag == IPAR.TIMESTAMP: + self.time = int(element.text) + elif element.tag == IPAR.MATRIX: + self.matrix = element.text + else: + raise ValueError("Unrecognized tag: %s" % (element.tag,)) + +def load_ipar_param_tree(paramfile): + """ + look for a .param file and load it if it is an IPAR tree + """ + + tree = ElementTree.parse(paramfile).getroot() + run = tree.find('Run') + if run.attrib.has_key('Name') and run.attrib['Name'].startswith("IPAR"): + return run + + return None + +def ipar(pathname): + """ + Examine the directory at pathname and initalize a IPAR object + """ + logging.info("Searching IPAR directory") + i = IPAR() + i.pathname = pathname + + # parse firecrest directory name + path, name = os.path.split(pathname) + groups = name.split('_') + if groups[0] != 'IPAR': + raise ValueError('ipar can only process IPAR directories') + + bustard_pattern = os.path.join(pathname, 'Bustard*') + # contents of the matrix file? + matrix_pathname = os.path.join(pathname, 'Matrix', 's_matrix.txt') + if os.path.exists(matrix_pathname): + # this is IPAR_1.01 + i.matrix = open(matrix_pathname, 'r').read() + elif glob(bustard_pattern) > 0: + i.matrix = None + # its still live. + else: + return None + + # look for parameter xml file + paramfile = os.path.join(path, '.params') + if os.path.exists(paramfile): + i.tree = load_ipar_param_tree(paramfile) + mtime_local = os.stat(paramfile)[stat.ST_MTIME] + i.time = mtime_local + return i + +def fromxml(tree): + """ + Initialize a IPAR object from an element tree node + """ + f = IPAR() + f.set_elements(tree) + return f + +if __name__ == "__main__": + i = ipar(os.path.expanduser('~/gec/081021_HWI-EAS229_0063_30HKUAAXX/Data/IPAR_1.01')) + x = i.get_elements() + j = fromxml(x) + #ElementTree.dump(x) + print j.date + print j.start + print j.stop + print i.tiles.keys() + print j.tiles.keys() + print j.tiles.items() + print j.file_list() diff --git a/trunk/htsworkflow/pipelines/recipe_parser.py b/trunk/htsworkflow/pipelines/recipe_parser.py new file mode 100644 index 0000000..7f5ced6 --- /dev/null +++ b/trunk/htsworkflow/pipelines/recipe_parser.py @@ -0,0 +1,48 @@ +from xml import sax + + +def get_cycles(recipe_xml_filepath): + """ + returns the number of cycles found in Recipe*.xml + """ + handler = CycleXmlHandler() + sax.parse(recipe_xml_filepath, handler) + return handler.cycle_count + + + +class CycleXmlHandler(sax.ContentHandler): + + def __init__(self): + self.cycle_count = 0 + self.in_protocol = False + sax.ContentHandler.__init__(self) + + + def startDocument(self): + self.cycle_count = 0 + self.in_protocol = False + + + def startElement(self, name, attrs): + + #Only count Incorporations as cycles if within + # the protocol section of the xml document. + if name == "Incorporation" and self.in_protocol: + #print 'Found a cycle!' + self.cycle_count += 1 + return + + elif name == 'Protocol': + #print 'In protocol' + self.in_protocol = True + return + + #print 'Skipping: %s' % (name) + + + def endElement(self, name): + + if name == 'Protocol': + #print 'End protocol' + self.in_protocol = False diff --git a/trunk/htsworkflow/pipelines/retrieve_config.py b/trunk/htsworkflow/pipelines/retrieve_config.py new file mode 100644 index 0000000..13805c9 --- /dev/null +++ b/trunk/htsworkflow/pipelines/retrieve_config.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python + +from optparse import OptionParser, IndentedHelpFormatter +from ConfigParser import SafeConfigParser + +import logging +import os +import sys +import urllib2 + +__docformat__ = "restructredtext en" + +CONFIG_SYSTEM = '/etc/hts_frontend/hts_frontend.conf' +CONFIG_USER = os.path.expanduser('~/.hts_frontend.conf') + +#Disable or enable commandline arg parsing; disabled by default. +DISABLE_CMDLINE = True + +class FlowCellNotFound(Exception): pass +class WebError404(Exception): pass + +class DummyOptions: + """ + Used when command line parsing is disabled; default + """ + def __init__(self): + self.url = None + self.output_filepath = None + self.flowcell = None + self.genome_dir = None + +class PreformattedDescriptionFormatter(IndentedHelpFormatter): + + #def format_description(self, description): + # + # if description: + # return description + "\n" + # else: + # return "" + + def format_epilog(self, epilog): + """ + It was removing my preformated epilog, so this should override + that behavior! Muhahaha! + """ + if epilog: + return "\n" + epilog + "\n" + else: + return "" + + +def constructOptionParser(): + """ + returns a pre-setup optparser + """ + global DISABLE_CMDLINE + + if DISABLE_CMDLINE: + return None + + parser = OptionParser(formatter=PreformattedDescriptionFormatter()) + + parser.set_description('Retrieves eland config file from hts_frontend web frontend.') + + parser.epilog = """ +Config File: + * %s (System wide) + * %s (User specific; overrides system) + * command line overrides all config file options + + Example Config File: + + [config_file_server] + base_host_url=http://somewhere.domain:port +""" % (CONFIG_SYSTEM, CONFIG_USER) + + #Special formatter for allowing preformatted description. + ##parser.format_epilog(PreformattedDescriptionFormatter()) + + parser.add_option("-u", "--url", + action="store", type="string", dest="url") + + parser.add_option("-o", "--output", + action="store", type="string", dest="output_filepath") + + parser.add_option("-f", "--flowcell", + action="store", type="string", dest="flowcell") + + parser.add_option("-g", "--genome_dir", + action="store", type="string", dest="genome_dir") + + #parser.set_default("url", "default") + + return parser + +def constructConfigParser(): + """ + returns a pre-setup config parser + """ + parser = SafeConfigParser() + parser.read([CONFIG_SYSTEM, CONFIG_USER]) + if not parser.has_section('config_file_server'): + parser.add_section('config_file_server') + if not parser.has_section('local_setup'): + parser.add_section('local_setup') + + return parser + + +def getCombinedOptions(): + """ + Returns optparse options after it has be updated with ConfigParser + config files and merged with parsed commandline options. + """ + cl_parser = constructOptionParser() + conf_parser = constructConfigParser() + + if cl_parser is None: + options = DummyOptions() + else: + options, args = cl_parser.parse_args() + + if options.url is None: + if conf_parser.has_option('config_file_server', 'base_host_url'): + options.url = conf_parser.get('config_file_server', 'base_host_url') + + if options.genome_dir is None: + if conf_parser.has_option('local_setup', 'genome_dir'): + options.genome_dir = conf_parser.get('local_setup', 'genome_dir') + + print 'USING OPTIONS:' + print ' URL:', options.url + print ' OUT:', options.output_filepath + print ' FC:', options.flowcell + print 'GDIR:', options.genome_dir + print '' + + return options + + +def saveConfigFile(flowcell, base_host_url, output_filepath): + """ + retrieves the flowcell eland config file, give the base_host_url + (i.e. http://sub.domain.edu:port) + """ + url = base_host_url + '/eland_config/%s/' % (flowcell) + + f = open(output_filepath, 'w') + #try: + try: + web = urllib2.urlopen(url) + except urllib2.URLError, e: + errmsg = 'URLError: %s' % (e.reason,) + logging.error(errmsg) + logging.error('opened %s' % (url,)) + raise IOError(errmsg) + + #except IOError, msg: + # if str(msg).find("Connection refused") >= 0: + # print 'Error: Connection refused for: %s' % (url) + # f.close() + # sys.exit(1) + # elif str(msg).find("Name or service not known") >= 0: + # print 'Error: Invalid domain or ip address for: %s' % (url) + # f.close() + # sys.exit(2) + # else: + # raise IOError, msg + + data = web.read() + + if data.find('Hmm, config file for') >= 0: + msg = "Flowcell (%s) not found in DB; full url(%s)" % (flowcell, url) + raise FlowCellNotFound, msg + + if data.find('404 - Not Found') >= 0: + msg = "404 - Not Found: Flowcell (%s); base_host_url (%s);\n full url(%s)\n " \ + "Did you get right port #?" % (flowcell, base_host_url, url) + raise FlowCellNotFound, msg + + f.write(data) + web.close() + f.close() + logging.info('Wrote config file to %s' % (output_filepath,)) + + diff --git a/trunk/htsworkflow/pipelines/run_status.py b/trunk/htsworkflow/pipelines/run_status.py new file mode 100644 index 0000000..e6a3ed8 --- /dev/null +++ b/trunk/htsworkflow/pipelines/run_status.py @@ -0,0 +1,454 @@ +__docformat__ = "restructuredtext en" + +import glob +import re +import os +import sys +import time +import threading + +s_comment = re.compile('^#') +s_general_read_len = re.compile('^READ_LENGTH ') +s_read_len = re.compile('^[1-8]+:READ_LENGTH ') + +s_firecrest = None + +# FIRECREST PATTERNS +# _p2f(, lane, tile, cycle) +PATTERN_FIRECREST_QCM = 's_%s_%s_%s_qcm.xml' + +# _p2f(, lane, tile) +PATTERN_FIRECREST_INT = 's_%s_%s_02_int.txt' +PATTERN_FIRECREST_NSE = 's_%s_%s_nse.txt.gz' +PATTERN_FIRECREST_POS = 's_%s_%s_pos.txt' +PATTERN_FIRECREST_IDX = 's_%s_%s_idx.txt' +PATTERN_FIRECREST_CLU1 = 's_%s_%s_01_1_clu.txt' +PATTERN_FIRECREST_CLU2 = 's_%s_%s_01_2_clu.txt' +PATTERN_FIRECREST_CLU3 = 's_%s_%s_01_3_clu.txt' +PATTERN_FIRECREST_CLU4 = 's_%s_%s_01_4_clu.txt' + + +# BUSTARD PATTERNS +# _p2f(, lane, tile) +PATTERN_BUSTARD_SIG2 = 's_%s_%s_sig2.txt' +PATTERN_BUSTARD_PRB = 's_%s_%s_prb.txt' + + + +# GERALD PATTERNS +# _p2f(, lane, tile) +PATTERN_GERALD_ALLTMP = 's_%s_%s_all.txt.tmp' +PATTERN_GERALD_QRAWTMP = 's_%s_%s_qraw.txt.tmp' +PATTERN_GERALD_ALLPNGTMP = 's_%s_%s_all.tmp.png' +PATTERN_GERALD_ALIGNTMP = 's_%s_%s_align.txt.tmp' +PATTERN_GERALD_QVALTMP = 's_%s_%s_qval.txt.tmp' +PATTERN_GERALD_SCORETMP = 's_%s_%s_score.txt.tmp' +PATTERN_GERALD_PREALIGNTMP = 's_%s_%s_prealign.txt.tmp' +PATTERN_GERALD_REALIGNTMP = 's_%s_%s_realign.txt.tmp' +PATTERN_GERALD_RESCORETMP = 's_%s_%s_rescore.txt.tmp' +PATTERN_GERALD_RESCOREPNG = 's_%s_%s_rescore.png' +PATTERN_GERALD_ERRORSTMPPNG = 's_%s_%s_errors.tmp.png' +PATTERN_GERALD_QCALTMP = 's_%s_%s_qcal.txt.tmp' +PATTERN_GERALD_QVAL = 's_%s_%s_qval.txt' + +# _p2f(, lane) +PATTERN_GERALD_SEQPRETMP = 's_%s_seqpre.txt.tmp' +PATTERN_GERALD_RESULTTMP = 's_%s_eland_result.txt.tmp' +PATTERN_GERALD_SIGMEANSTMP = 's_%s_Signal_Means.txt.tmp' +PATTERN_GERALD_CALLPNG = 's_%s_call.png' +PATTERN_GERALD_ALLPNG = 's_%s_all.png' +PATTERN_GERALD_PERCENTALLPNG = 's_%s_percent_all.png' +PATTERN_GERALD_PERCENTCALLPNG = 's_%s_percent_call.png' +PATTERN_GERALD_PERCENTBASEPNG = 's_%s_percent_base.png' +PATTERN_GERALD_FILTTMP = 's_%s_filt.txt.tmp' +PATTERN_GERALD_FRAGTMP = 's_%s_frag.txt.tmp' +PATTERN_GERALD_QREPORTTMP = 's_%s_qreport.txt.tmp' +PATTERN_GERALD_QTABLETMP = 's_%s_qtable.txt.tmp' +PATTERN_GERALD_QCALREPORTTMP = 's_%s_qcalreport.txt.tmp' +PATTERN_GERALD_SEQUENCETMP = 's_%s_sequence.txt.tmp' +PATTERN_GERALD_LANEFINISHED = 's_%s_finished.txt' + + + +def _p2f(pattern, lane, tile=None, cycle=None): + """ + Converts a pattern plus info into file names + """ + + # lane, and cycle provided (INVALID) + if tile is None and cycle is not None: + msg = "Handling of cycle without tile is not currently implemented." + raise ValueError, msg + + # lane, tile, cycle provided + elif cycle: + return pattern % (lane, + "%04d" % (tile,), + "%02d" % (cycle,)) + + # lane, tile provided + elif tile: + return pattern % (lane, "%04d" % (tile,)) + + # lane provided + else: + return pattern % (lane) + + +class GARunStatus(object): + + def __init__(self, conf_filepath): + """ + Given an eland config file in the top level directory + of a run, predicts the files that will be generated + during a run and provides methods for retrieving + (completed, total) for each step or entire run. + """ + #print 'self._conf_filepath = %s' % (conf_filepath) + self._conf_filepath = conf_filepath + self._base_dir, junk = os.path.split(conf_filepath) + self._image_dir = os.path.join(self._base_dir, 'Images') + + self.lanes = [] + self.lane_read_length = {} + self.tiles = None + self.cycles = None + + self.status = {} + self.status['firecrest'] = {} + self.status['bustard'] = {} + self.status['gerald'] = {} + + self._process_config() + self._count_tiles() + self._count_cycles() + self._generate_expected() + + + def _process_config(self): + """ + Grabs info from self._conf_filepath + """ + f = open(self._conf_filepath, 'r') + + for line in f: + + #Skip comment lines for now. + if s_comment.search(line): + continue + + mo = s_general_read_len.search(line) + if mo: + read_length = int(line[mo.end():]) + #Handle general READ_LENGTH + for i in range(1,9): + self.lane_read_length[i] = read_length + + mo = s_read_len.search(line) + if mo: + read_length = int(line[mo.end():]) + lanes, junk = line.split(':') + + #Convert lanes from string of lanes to list of lane #s. + lanes = [ int(i) for i in lanes ] + + + for lane in lanes: + + #Keep track of which lanes are being run. + if lane not in self.lanes: + self.lanes.append(lane) + + #Update with lane specific read lengths + self.lane_read_length[lane] = read_length + + self.lanes.sort() + + + def _count_tiles(self): + """ + Count the number of tiles being used + """ + self.tiles = len(glob.glob(os.path.join(self._image_dir, + 'L001', + 'C1.1', + 's_1_*_a.tif'))) + + def _count_cycles(self): + """ + Figures out the number of cycles that are available + """ + #print 'self._image_dir = %s' % (self._image_dir) + cycle_dirs = glob.glob(os.path.join(self._image_dir, 'L001', 'C*.1')) + #print 'cycle_dirs = %s' % (cycle_dirs) + cycle_list = [] + for cycle_dir in cycle_dirs: + junk, c = os.path.split(cycle_dir) + cycle_list.append(int(c[1:c.find('.')])) + + self.cycles = max(cycle_list) + + + + + def _generate_expected(self): + """ + generates a list of files we expect to find. + """ + + firecrest = self.status['firecrest'] + bustard = self.status['bustard'] + gerald = self.status['gerald'] + + + for lane in self.lanes: + for tile in range(1,self.tiles+1): + for cycle in range(1, self.cycles+1): + + ########################## + # LANE, TILE, CYCLE LAYER + + # FIRECREST + firecrest[_p2f(PATTERN_FIRECREST_QCM, lane, tile, cycle)] = False + + + ################### + # LANE, TILE LAYER + + # FIRECREST + firecrest[_p2f(PATTERN_FIRECREST_INT, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_NSE, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_POS, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_IDX, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU1, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU2, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU3, lane, tile)] = False + firecrest[_p2f(PATTERN_FIRECREST_CLU4, lane, tile)] = False + + + # BUSTARD + bustard[_p2f(PATTERN_BUSTARD_SIG2, lane, tile)] = False + bustard[_p2f(PATTERN_BUSTARD_PRB, lane, tile)] = False + + + # GERALD + #gerald[_p2f(PATTERN_GERALD_ALLTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_QRAWTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_ALLPNGTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_ALIGNTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_QVALTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_SCORETMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_PREALIGNTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_REALIGNTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_RESCORETMP, lane, tile)] = False + gerald[_p2f(PATTERN_GERALD_RESCOREPNG, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_ERRORSTMPPNG, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_QCALTMP, lane, tile)] = False + #gerald[_p2f(PATTERN_GERALD_QVAL, lane, tile)] = False + + ################### + # LANE LAYER + + # GERALD + #gerald[_p2f(PATTERN_GERALD_SEQPRETMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_RESULTTMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_SIGMEANSTMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_CALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_ALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTCALLPNG, lane)] = False + gerald[_p2f(PATTERN_GERALD_PERCENTBASEPNG, lane)] = False + #gerald[_p2f(PATTERN_GERALD_FILTTMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_FRAGTMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_QREPORTTMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_QTABLETMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_QCALREPORTTMP, lane)] = False + #gerald[_p2f(PATTERN_GERALD_SEQUENCETMP, lane)] = False + gerald[_p2f(PATTERN_GERALD_LANEFINISHED, lane)] = False + + + + ################# + # LOOPS FINISHED + + # FIRECREST + firecrest['offsets_finished.txt'] = False + firecrest['finished.txt'] = False + + # BUSTARD + bustard['finished.txt'] = False + + # GERALD + gerald['tiles.txt'] = False + gerald['FullAll.htm'] = False + #gerald['All.htm.tmp'] = False + #gerald['Signal_Means.txt.tmp'] = False + #gerald['plotIntensity_for_IVC'] = False + #gerald['IVC.htm.tmp'] = False + gerald['FullError.htm'] = False + gerald['FullPerfect.htm'] = False + #gerald['Error.htm.tmp'] = False + #gerald['Perfect.htm.tmp'] = False + #gerald['Summary.htm.tmp'] = False + #gerald['Tile.htm.tmp'] = False + gerald['finished.txt'] = False + + def statusFirecrest(self): + """ + returns (, ) + """ + firecrest = self.status['firecrest'] + total = len(firecrest) + completed = firecrest.values().count(True) + + return (completed, total) + + + def statusBustard(self): + """ + returns (, ) + """ + bustard = self.status['bustard'] + total = len(bustard) + completed = bustard.values().count(True) + + return (completed, total) + + + def statusGerald(self): + """ + returns (, ) + """ + gerald = self.status['gerald'] + total = len(gerald) + completed = gerald.values().count(True) + + return (completed, total) + + + def statusTotal(self): + """ + returns (, ) + """ + #f = firecrest c = completed + #b = bustard t = total + #g = gerald + fc, ft = self.statusFirecrest() + bc, bt = self.statusBustard() + gc, gt = self.statusGerald() + + return (fc+bc+gc, ft+bt+gt) + + + def statusReport(self): + """ + Generate the basic percent complete report + """ + def _percentCompleted(completed, total): + """ + Returns precent completed as float + """ + return (completed / float(total)) * 100 + + fc, ft = self.statusFirecrest() + bc, bt = self.statusBustard() + gc, gt = self.statusGerald() + tc, tt = self.statusTotal() + + fp = _percentCompleted(fc, ft) + bp = _percentCompleted(bc, bt) + gp = _percentCompleted(gc, gt) + tp = _percentCompleted(tc, tt) + + report = ['Firecrest: %s%% (%s/%s)' % (fp, fc, ft), + ' Bustard: %s%% (%s/%s)' % (bp, bc, bt), + ' Gerald: %s%% (%s/%s)' % (gp, gc, gt), + '-----------------------', + ' Total: %s%% (%s/%s)' % (tp, tc, tt), + ] + return report + + def updateFirecrest(self, filename): + """ + Marks firecrest filename as being completed. + """ + self.status['firecrest'][filename] = True + + + def updateBustard(self, filename): + """ + Marks bustard filename as being completed. + """ + self.status['bustard'][filename] = True + + + def updateGerald(self, filename): + """ + Marks gerald filename as being completed. + """ + self.status['gerald'][filename] = True + + + +################################################## +# Functions to be called by Thread(target=) +def _cmdLineStatusMonitorFunc(conf_info): + """ + Given a ConfigInfo object, provides status to stdout. + + You should probably use startCmdLineStatusMonitor() + instead of ths function. + + .. python: + def example_launch(): + t = threading.Thread(target=_cmdLineStatusMonitorFunc, + args=[conf_info]) + t.setDaemon(True) + t.start() + """ + SLEEP_AMOUNT = 30 + + while 1: + if conf_info.status is None: + print "No status object yet." + time.sleep(SLEEP_AMOUNT) + continue + + report = conf_info.status.statusReport() + print os.linesep.join(report) + print + + time.sleep(SLEEP_AMOUNT) + + +############################################# +# Start monitor thread convenience functions +def startCmdLineStatusMonitor(conf_info): + """ + Starts a command line status monitor given a conf_info object. + """ + t = threading.Thread(target=_cmdLineStatusMonitorFunc, args=[conf_info]) + t.setDaemon(True) + t.start() + +from optparse import OptionParser +def make_parser(): + usage = "%prog: config file" + + parser = OptionParser() + return parser + +def main(cmdline=None): + parser = make_parser() + opt, args = parser.parse_args(cmdline) + + if len(args) != 1: + parser.error("need name of configuration file") + + status = GARunStatus(args[0]) + print os.linesep.join(status.statusReport()) + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/htsworkflow/pipelines/runfolder.py b/trunk/htsworkflow/pipelines/runfolder.py new file mode 100644 index 0000000..11795c2 --- /dev/null +++ b/trunk/htsworkflow/pipelines/runfolder.py @@ -0,0 +1,499 @@ +""" +Core information needed to inspect a runfolder. +""" +from glob import glob +import logging +import os +import re +import shutil +import stat +import subprocess +import sys +import time + +try: + from xml.etree import ElementTree +except ImportError, e: + from elementtree import ElementTree + +EUROPEAN_STRPTIME = "%d-%m-%Y" +EUROPEAN_DATE_RE = "([0-9]{1,2}-[0-9]{1,2}-[0-9]{4,4})" +VERSION_RE = "([0-9\.]+)" +USER_RE = "([a-zA-Z0-9]+)" +LANES_PER_FLOWCELL = 8 + +from htsworkflow.util.alphanum import alphanum +from htsworkflow.util.ethelp import indent, flatten + +class PipelineRun(object): + """ + Capture "interesting" information about a pipeline run + """ + XML_VERSION = 1 + PIPELINE_RUN = 'PipelineRun' + FLOWCELL_ID = 'FlowcellID' + + def __init__(self, pathname=None, xml=None): + if pathname is not None: + self.pathname = os.path.normpath(pathname) + else: + self.pathname = None + self._name = None + self._flowcell_id = None + self.image_analysis = None + self.bustard = None + self.gerald = None + + if xml is not None: + self.set_elements(xml) + + def _get_flowcell_id(self): + # extract flowcell ID + if self._flowcell_id is None: + config_dir = os.path.join(self.pathname, 'Config') + flowcell_id_path = os.path.join(config_dir, 'FlowcellId.xml') + if os.path.exists(flowcell_id_path): + flowcell_id_tree = ElementTree.parse(flowcell_id_path) + self._flowcell_id = flowcell_id_tree.findtext('Text') + else: + path_fields = self.pathname.split('_') + if len(path_fields) > 0: + # guessing last element of filename + flowcell_id = path_fields[-1] + else: + flowcell_id = 'unknown' + + logging.warning( + "Flowcell id was not found, guessing %s" % ( + flowcell_id)) + self._flowcell_id = flowcell_id + return self._flowcell_id + flowcell_id = property(_get_flowcell_id) + + def get_elements(self): + """ + make one master xml file from all of our sub-components. + """ + root = ElementTree.Element(PipelineRun.PIPELINE_RUN) + flowcell = ElementTree.SubElement(root, PipelineRun.FLOWCELL_ID) + flowcell.text = self.flowcell_id + root.append(self.image_analysis.get_elements()) + root.append(self.bustard.get_elements()) + root.append(self.gerald.get_elements()) + return root + + def set_elements(self, tree): + # this file gets imported by all the others, + # so we need to hide the imports to avoid a cyclic imports + from htsworkflow.pipelines import firecrest + from htsworkflow.pipelines import ipar + from htsworkflow.pipelines import bustard + from htsworkflow.pipelines import gerald + + tag = tree.tag.lower() + if tag != PipelineRun.PIPELINE_RUN.lower(): + raise ValueError('Pipeline Run Expecting %s got %s' % ( + PipelineRun.PIPELINE_RUN, tag)) + for element in tree: + tag = element.tag.lower() + if tag == PipelineRun.FLOWCELL_ID.lower(): + self._flowcell_id = element.text + #ok the xword.Xword.XWORD pattern for module.class.constant is lame + # you should only have Firecrest or IPAR, never both of them. + elif tag == firecrest.Firecrest.FIRECREST.lower(): + self.image_analysis = firecrest.Firecrest(xml=element) + elif tag == ipar.IPAR.IPAR.lower(): + self.image_analysis = ipar.IPAR(xml=element) + elif tag == bustard.Bustard.BUSTARD.lower(): + self.bustard = bustard.Bustard(xml=element) + elif tag == gerald.Gerald.GERALD.lower(): + self.gerald = gerald.Gerald(xml=element) + else: + logging.warn('PipelineRun unrecognized tag %s' % (tag,)) + + def _get_run_name(self): + """ + Given a run tuple, find the latest date and use that as our name + """ + if self._name is None: + tmax = max(self.image_analysis.time, self.bustard.time, self.gerald.time) + timestamp = time.strftime('%Y-%m-%d', time.localtime(tmax)) + self._name = 'run_'+self.flowcell_id+"_"+timestamp+'.xml' + return self._name + name = property(_get_run_name) + + def save(self, destdir=None): + if destdir is None: + destdir = '' + logging.info("Saving run report "+ self.name) + xml = self.get_elements() + indent(xml) + dest_pathname = os.path.join(destdir, self.name) + ElementTree.ElementTree(xml).write(dest_pathname) + + def load(self, filename): + logging.info("Loading run report from " + filename) + tree = ElementTree.parse(filename).getroot() + self.set_elements(tree) + +def load_pipeline_run_xml(pathname): + """ + Load and instantiate a Pipeline run from a run xml file + + :Parameters: + - `pathname` : location of an run xml file + + :Returns: initialized PipelineRun object + """ + tree = ElementTree.parse(pathname).getroot() + run = PipelineRun(xml=tree) + return run + +def get_runs(runfolder): + """ + Search through a run folder for all the various sub component runs + and then return a PipelineRun for each different combination. + + For example if there are two different GERALD runs, this will + generate two different PipelineRun objects, that differ + in there gerald component. + """ + from htsworkflow.pipelines import firecrest + from htsworkflow.pipelines import ipar + from htsworkflow.pipelines import bustard + from htsworkflow.pipelines import gerald + + def scan_post_image_analysis(runs, runfolder, image_analysis, pathname): + logging.info("Looking for bustard directories in %s" % (pathname,)) + bustard_glob = os.path.join(pathname, "Bustard*") + for bustard_pathname in glob(bustard_glob): + logging.info("Found bustard directory %s" % (bustard_pathname,)) + b = bustard.bustard(bustard_pathname) + gerald_glob = os.path.join(bustard_pathname, 'GERALD*') + logging.info("Looking for gerald directories in %s" % (pathname,)) + for gerald_pathname in glob(gerald_glob): + logging.info("Found gerald directory %s" % (gerald_pathname,)) + try: + g = gerald.gerald(gerald_pathname) + p = PipelineRun(runfolder) + p.image_analysis = image_analysis + p.bustard = b + p.gerald = g + runs.append(p) + except IOError, e: + logging.error("Ignoring " + str(e)) + + datadir = os.path.join(runfolder, 'Data') + + logging.info('Searching for runs in ' + datadir) + runs = [] + # scan for firecrest directories + for firecrest_pathname in glob(os.path.join(datadir,"*Firecrest*")): + logging.info('Found firecrest in ' + datadir) + image_analysis = firecrest.firecrest(firecrest_pathname) + if image_analysis is None: + logging.warn( + "%s is an empty or invalid firecrest directory" % (firecrest_pathname,) + ) + else: + scan_post_image_analysis( + runs, runfolder, image_analysis, firecrest_pathname + ) + # scan for IPAR directories + for ipar_pathname in glob(os.path.join(datadir,"IPAR_*")): + logging.info('Found ipar directories in ' + datadir) + image_analysis = ipar.ipar(ipar_pathname) + if image_analysis is None: + logging.warn( + "%s is an empty or invalid IPAR directory" %(ipar_pathname,) + ) + else: + scan_post_image_analysis( + runs, runfolder, image_analysis, ipar_pathname + ) + + return runs + +def get_specific_run(gerald_dir): + """ + Given a gerald directory, construct a PipelineRun out of its parents + + Basically this allows specifying a particular run instead of the previous + get_runs which scans a runfolder for various combinations of + firecrest/ipar/bustard/gerald runs. + """ + from htsworkflow.pipelines import firecrest + from htsworkflow.pipelines import ipar + from htsworkflow.pipelines import bustard + from htsworkflow.pipelines import gerald + + bustard_dir = os.path.abspath(os.path.join(gerald_dir, '..')) + image_dir = os.path.abspath(os.path.join(gerald_dir, '..', '..')) + + runfolder_dir = os.path.abspath(os.path.join(image_dir, '..','..')) + + logging.info('--- use-run detected options ---') + logging.info('runfolder: %s' % (runfolder_dir,)) + logging.info('image_dir: %s' % (image_dir,)) + logging.info('bustard_dir: %s' % (bustard_dir,)) + logging.info('gerald_dir: %s' % (gerald_dir,)) + + # find our processed image dir + image_run = None + # split into parent, and leaf directory + # leaf directory should be an IPAR or firecrest directory + data_dir, short_image_dir = os.path.split(image_dir) + logging.info('data_dir: %s' % (data_dir,)) + logging.info('short_iamge_dir: %s' %(short_image_dir,)) + + # guess which type of image processing directory we have by looking + # in the leaf directory name + if re.search('Firecrest', short_image_dir, re.IGNORECASE) is not None: + image_run = firecrest.firecrest(image_dir) + elif re.search('IPAR', short_image_dir, re.IGNORECASE) is not None: + image_run = ipar.ipar(image_dir) + # if we din't find a run, report the error and return + if image_run is None: + msg = '%s does not contain an image processing step' % (image_dir,) + logging.error(msg) + return None + + # find our base calling + base_calling_run = bustard.bustard(bustard_dir) + if base_calling_run is None: + logging.error('%s does not contain a bustard run' % (bustard_dir,)) + return None + + # find alignments + gerald_run = gerald.gerald(gerald_dir) + if gerald_run is None: + logging.error('%s does not contain a gerald run' % (gerald_dir,)) + return None + + p = PipelineRun(runfolder_dir) + p.image_analysis = image_run + p.bustard = base_calling_run + p.gerald = gerald_run + + logging.info('Constructed PipelineRun from %s' % (gerald_dir,)) + return p + +def extract_run_parameters(runs): + """ + Search through runfolder_path for various runs and grab their parameters + """ + for run in runs: + run.save() + +def summarize_mapped_reads(genome_map, mapped_reads): + """ + Summarize per chromosome reads into a genome count + But handle spike-in/contamination symlinks seperately. + """ + summarized_reads = {} + genome_reads = 0 + genome = 'unknown' + for k, v in mapped_reads.items(): + path, k = os.path.split(k) + if len(path) > 0 and not genome_map.has_key(path): + genome = path + genome_reads += v + else: + summarized_reads[k] = summarized_reads.setdefault(k, 0) + v + summarized_reads[genome] = genome_reads + return summarized_reads + +def summarize_lane(gerald, lane_id): + report = [] + summary_results = gerald.summary.lane_results + for end in range(len(summary_results)): + eland_result = gerald.eland_results.results[end][lane_id] + report.append("Sample name %s" % (eland_result.sample_name)) + report.append("Lane id %s end %s" % (eland_result.lane_id, end)) + cluster = summary_results[end][eland_result.lane_id].cluster + report.append("Clusters %d +/- %d" % (cluster[0], cluster[1])) + report.append("Total Reads: %d" % (eland_result.reads)) + + if hasattr(eland_result, 'match_codes'): + mc = eland_result.match_codes + nm = mc['NM'] + nm_percent = float(nm)/eland_result.reads * 100 + qc = mc['QC'] + qc_percent = float(qc)/eland_result.reads * 100 + + report.append("No Match: %d (%2.2g %%)" % (nm, nm_percent)) + report.append("QC Failed: %d (%2.2g %%)" % (qc, qc_percent)) + report.append('Unique (0,1,2 mismatches) %d %d %d' % \ + (mc['U0'], mc['U1'], mc['U2'])) + report.append('Repeat (0,1,2 mismatches) %d %d %d' % \ + (mc['R0'], mc['R1'], mc['R2'])) + + if hasattr(eland_result, 'genome_map'): + report.append("Mapped Reads") + mapped_reads = summarize_mapped_reads(eland_result.genome_map, eland_result.mapped_reads) + for name, counts in mapped_reads.items(): + report.append(" %s: %d" % (name, counts)) + + report.append('') + return report + +def summary_report(runs): + """ + Summarize cluster numbers and mapped read counts for a runfolder + """ + report = [] + for run in runs: + # print a run name? + report.append('Summary for %s' % (run.name,)) + # sort the report + eland_keys = run.gerald.eland_results.results[0].keys() + eland_keys.sort(alphanum) + + for lane_id in eland_keys: + report.extend(summarize_lane(run.gerald, lane_id)) + report.append('---') + report.append('') + return os.linesep.join(report) + +def is_compressed(filename): + if os.path.splitext(filename)[1] == ".gz": + return True + elif os.path.splitext(filename)[1] == '.bz2': + return True + else: + return False + +def extract_results(runs, output_base_dir=None): + if output_base_dir is None: + output_base_dir = os.getcwd() + + for r in runs: + result_dir = os.path.join(output_base_dir, r.flowcell_id) + logging.info("Using %s as result directory" % (result_dir,)) + if not os.path.exists(result_dir): + os.mkdir(result_dir) + + # create cycle_dir + cycle = "C%d-%d" % (r.image_analysis.start, r.image_analysis.stop) + logging.info("Filling in %s" % (cycle,)) + cycle_dir = os.path.join(result_dir, cycle) + if os.path.exists(cycle_dir): + logging.error("%s already exists, not overwriting" % (cycle_dir,)) + continue + else: + os.mkdir(cycle_dir) + + # copy stuff out of the main run + g = r.gerald + + # save run file + r.save(cycle_dir) + + # Copy Summary.htm + summary_path = os.path.join(r.gerald.pathname, 'Summary.htm') + if os.path.exists(summary_path): + logging.info('Copying %s to %s' % (summary_path, cycle_dir)) + shutil.copy(summary_path, cycle_dir) + else: + logging.info('Summary file %s was not found' % (summary_path,)) + + # tar score files + score_files = [] + + # check for g.pathname/Temp a new feature of 1.1rc1 + scores_path = g.pathname + scores_path_temp = os.path.join(scores_path, 'Temp') + if os.path.isdir(scores_path_temp): + scores_path = scores_path_temp + + # hopefully we have a directory that contains s_*_score files + for f in os.listdir(scores_path): + if re.match('.*_score.txt', f): + score_files.append(f) + + tar_cmd = ['/bin/tar', 'c'] + score_files + bzip_cmd = [ 'bzip2', '-9', '-c' ] + tar_dest_name =os.path.join(cycle_dir, 'scores.tar.bz2') + tar_dest = open(tar_dest_name, 'w') + logging.info("Compressing score files from %s" % (scores_path,)) + logging.info("Running tar: " + " ".join(tar_cmd[:10])) + logging.info("Running bzip2: " + " ".join(bzip_cmd)) + logging.info("Writing to %s" %(tar_dest_name)) + + env = {'BZIP': '-9'} + tar = subprocess.Popen(tar_cmd, stdout=subprocess.PIPE, shell=False, env=env, + cwd=scores_path) + bzip = subprocess.Popen(bzip_cmd, stdin=tar.stdout, stdout=tar_dest) + tar.wait() + + # copy & bzip eland files + for lanes_dictionary in g.eland_results.results: + for eland_lane in lanes_dictionary.values(): + source_name = eland_lane.pathname + path, name = os.path.split(eland_lane.pathname) + dest_name = os.path.join(cycle_dir, name) + logging.info("Saving eland file %s to %s" % \ + (source_name, dest_name)) + + if is_compressed(name): + logging.info('Already compressed, Saving to %s' % (dest_name, )) + shutil.copy(source_name, dest_name) + else: + # not compressed + dest_name += '.bz2' + args = ['bzip2', '-9', '-c', source_name] + logging.info('Running: %s' % ( " ".join(args) )) + bzip_dest = open(dest_name, 'w') + bzip = subprocess.Popen(args, stdout=bzip_dest) + logging.info('Saving to %s' % (dest_name, )) + bzip.wait() + +def rm_list(files, dry_run=True): + for f in files: + if os.path.exists(f): + logging.info('deleting %s' % (f,)) + if not dry_run: + if os.path.isdir(f): + shutil.rmtree(f) + else: + os.unlink(f) + else: + logging.warn("%s doesn't exist."% (f,)) + +def clean_runs(runs, dry_run=True): + """ + Clean up run folders to optimize for compression. + """ + if dry_run: + logging.info('In dry-run mode') + + for run in runs: + logging.info('Cleaninging %s' % (run.pathname,)) + # rm RunLog*.xml + runlogs = glob(os.path.join(run.pathname, 'RunLog*xml')) + rm_list(runlogs, dry_run) + # rm pipeline_*.txt + pipeline_logs = glob(os.path.join(run.pathname, 'pipeline*.txt')) + rm_list(pipeline_logs, dry_run) + # rm gclog.txt? + # rm NetCopy.log? Isn't this robocopy? + logs = glob(os.path.join(run.pathname, '*.log')) + rm_list(logs, dry_run) + # rm nfn.log? + # Calibration + calibration_dir = glob(os.path.join(run.pathname, 'Calibration_*')) + rm_list(calibration_dir, dry_run) + # rm Images/L* + logging.info("Cleaning images") + image_dirs = glob(os.path.join(run.pathname, 'Images', 'L*')) + rm_list(image_dirs, dry_run) + # cd Data/C1-*_Firecrest* + logging.info("Cleaning intermediate files") + # make clean_intermediate + if os.path.exists(os.path.join(run.image_analysis.pathname, 'Makefile')): + clean_process = subprocess.Popen(['make', 'clean_intermediate'], + cwd=run.image_analysis.pathname,) + clean_process.wait() + + + diff --git a/trunk/htsworkflow/pipelines/summary.py b/trunk/htsworkflow/pipelines/summary.py new file mode 100644 index 0000000..dc323ff --- /dev/null +++ b/trunk/htsworkflow/pipelines/summary.py @@ -0,0 +1,302 @@ +""" +Analyze the Summary.htm file produced by GERALD +""" +import types +from pprint import pprint + +from htsworkflow.pipelines.runfolder import ElementTree +from htsworkflow.util.ethelp import indent, flatten + +nan = float('nan') + +class Summary(object): + """ + Extract some useful information from the Summary.htm file + """ + XML_VERSION = 3 + SUMMARY = 'Summary' + + class LaneResultSummary(object): + """ + Parse the LaneResultSummary table out of Summary.htm + Mostly for the cluster number + """ + LANE_RESULT_SUMMARY = 'LaneResultSummary' + TAGS = { + 'LaneYield': 'lane_yield', + 'Cluster': 'cluster', # Raw + 'ClusterPF': 'cluster_pass_filter', + 'AverageFirstCycleIntensity': 'average_first_cycle_intensity', + 'PercentIntensityAfter20Cycles': 'percent_intensity_after_20_cycles', + 'PercentPassFilterClusters': 'percent_pass_filter_clusters', + 'PercentPassFilterAlign': 'percent_pass_filter_align', + 'AverageAlignmentScore': 'average_alignment_score', + 'PercentErrorRate': 'percent_error_rate' + } + + def __init__(self, html=None, xml=None): + self.lane = None + self.end = 0 + self.lane_yield = None + self.cluster = None + self.cluster_pass_filter = None + self.average_first_cycle_intensity = None + self.percent_intensity_after_20_cycles = None + self.percent_pass_filter_clusters = None + self.percent_pass_filter_align = None + self.average_alignment_score = None + self.percent_error_rate = None + + if html is not None: + self.set_elements_from_html(html) + if xml is not None: + self.set_elements(xml) + + def set_elements_from_html(self, data): + if not len(data) in (8,10): + raise RuntimeError("Summary.htm file format changed, len(data)=%d" % (len(data),)) + + # same in pre-0.3.0 Summary file and 0.3 summary file + self.lane = int(data[0]) + + if len(data) == 8: + parsed_data = [ parse_mean_range(x) for x in data[1:] ] + # this is the < 0.3 Pipeline version + self.cluster = parsed_data[0] + self.average_first_cycle_intensity = parsed_data[1] + self.percent_intensity_after_20_cycles = parsed_data[2] + self.percent_pass_filter_clusters = parsed_data[3] + self.percent_pass_filter_align = parsed_data[4] + self.average_alignment_score = parsed_data[5] + self.percent_error_rate = parsed_data[6] + elif len(data) == 10: + parsed_data = [ parse_mean_range(x) for x in data[2:] ] + # this is the >= 0.3 summary file + self.lane_yield = data[1] + self.cluster = parsed_data[0] + self.cluster_pass_filter = parsed_data[1] + self.average_first_cycle_intensity = parsed_data[2] + self.percent_intensity_after_20_cycles = parsed_data[3] + self.percent_pass_filter_clusters = parsed_data[4] + self.percent_pass_filter_align = parsed_data[5] + self.average_alignment_score = parsed_data[6] + self.percent_error_rate = parsed_data[7] + + def get_elements(self): + lane_result = ElementTree.Element( + Summary.LaneResultSummary.LANE_RESULT_SUMMARY, + {'lane': str(self.lane), 'end': str(self.end)}) + for tag, variable_name in Summary.LaneResultSummary.TAGS.items(): + value = getattr(self, variable_name) + if value is None: + continue + # it looks like a sequence + elif type(value) in (types.TupleType, types.ListType): + element = make_mean_range_element( + lane_result, + tag, + *value + ) + else: + element = ElementTree.SubElement(lane_result, tag) + element.text = value + return lane_result + + def set_elements(self, tree): + if tree.tag != Summary.LaneResultSummary.LANE_RESULT_SUMMARY: + raise ValueError('Expected %s' % ( + Summary.LaneResultSummary.LANE_RESULT_SUMMARY)) + self.lane = int(tree.attrib['lane']) + # default to the first end, for the older summary files + # that are single ended + self.end = int(tree.attrib.get('end', 0)) + tags = Summary.LaneResultSummary.TAGS + for element in list(tree): + try: + variable_name = tags[element.tag] + setattr(self, variable_name, + parse_summary_element(element)) + except KeyError, e: + logging.warn('Unrecognized tag %s' % (element.tag,)) + + def __init__(self, filename=None, xml=None): + # lane results is a list of 1 or 2 ends containing + # a dictionary of all the lanes reported in this + # summary file + self.lane_results = [{}] + + if filename is not None: + self._extract_lane_results(filename) + if xml is not None: + self.set_elements(xml) + + def __getitem__(self, key): + return self.lane_results[key] + + def __len__(self): + return len(self.lane_results) + + def _flattened_row(self, row): + """ + flatten the children of a ... + """ + return [flatten(x) for x in row.getchildren() ] + + def _parse_table(self, table): + """ + assumes the first line is the header of a table, + and that the remaining rows are data + """ + rows = table.getchildren() + data = [] + for r in rows: + data.append(self._flattened_row(r)) + return data + + def _extract_named_tables(self, pathname): + """ + extract all the 'named' tables from a Summary.htm file + and return as a dictionary + + Named tables are

    ...

    ...
    pairs + The contents of the h2 tag is considered to the name + of the table. + """ + # tree = ElementTree.parse(pathname).getroot() + # hack for 1.1rc1, this should be removed when possible. + file_body = open(pathname).read() + file_body = file_body.replace('CHASTITY<=', 'CHASTITY<=') + tree = ElementTree.fromstring(file_body) + body = tree.find('body') + tables = {} + for i in range(len(body)): + if body[i].tag == 'h2' and body[i+1].tag == 'table': + # we have an interesting table + name = flatten(body[i]) + table = body[i+1] + data = self._parse_table(table) + tables[name] = data + return tables + + def _extract_lane_results(self, pathname): + tables = self._extract_named_tables(pathname) + table_names = [ ('Lane Results Summary', 0), + ('Lane Results Summary : Read 1', 0), + ('Lane Results Summary : Read 2', 1),] + for name, end in table_names: + if tables.has_key(name): + self._extract_lane_results_for_end(tables, name, end) + + def _extract_lane_results_for_end(self, tables, table_name, end): + """ + extract the Lane Results Summary table + """ + # parse lane result summary + lane_summary = tables[table_name] + # this is version 1 of the summary file + if len(lane_summary[-1]) == 8: + # strip header + headers = lane_summary[0] + # grab the lane by lane data + lane_summary = lane_summary[1:] + + # len(lane_summary[-1] = 10 is version 2 of the summary file + # = 9 is version 3 of the Summary.htm file + elif len(lane_summary[-1]) in (9, 10): + # lane_summary[0] is a different less specific header row + headers = lane_summary[1] + lane_summary = lane_summary[2:10] + # after the last lane, there's a set of chip wide averages + + # append an extra dictionary if needed + if len(self.lane_results) < (end + 1): + self.lane_results.append({}) + + for r in lane_summary: + lrs = Summary.LaneResultSummary(html=r) + lrs.end = end + self.lane_results[lrs.end][lrs.lane] = lrs + + def get_elements(self): + summary = ElementTree.Element(Summary.SUMMARY, + {'version': unicode(Summary.XML_VERSION)}) + for end in self.lane_results: + for lane in end.values(): + summary.append(lane.get_elements()) + return summary + + def set_elements(self, tree): + if tree.tag != Summary.SUMMARY: + return ValueError("Expected %s" % (Summary.SUMMARY,)) + xml_version = int(tree.attrib.get('version', 0)) + if xml_version > Summary.XML_VERSION: + logging.warn('Summary XML tree is a higher version than this class') + for element in list(tree): + lrs = Summary.LaneResultSummary() + lrs.set_elements(element) + if len(self.lane_results) < (lrs.end + 1): + self.lane_results.append({}) + self.lane_results[lrs.end][lrs.lane] = lrs + + def is_paired_end(self): + return len(self.lane_results) == 2 + + def dump(self): + """ + Debugging function, report current object + """ + pass + +def tonumber(v): + """ + Convert a value to int if its an int otherwise a float. + """ + try: + v = int(v) + except ValueError, e: + v = float(v) + return v + +def parse_mean_range(value): + """ + Parse values like 123 +/- 4.5 + """ + if value.strip() == 'unknown': + return nan, nan + + values = value.split() + if len(values) == 1: + if values[0] == '+/-': + return nan,nan + else: + return tonumber(values[0]) + + average, pm, deviation = values + if pm != '+/-': + raise RuntimeError("Summary.htm file format changed") + return tonumber(average), tonumber(deviation) + +def make_mean_range_element(parent, name, mean, deviation): + """ + Make an ElementTree subelement + """ + element = ElementTree.SubElement(parent, name, + { 'mean': unicode(mean), + 'deviation': unicode(deviation)}) + return element + +def parse_mean_range_element(element): + """ + Grab mean/deviation out of element + """ + return (tonumber(element.attrib['mean']), + tonumber(element.attrib['deviation'])) + +def parse_summary_element(element): + """ + Determine if we have a simple element or a mean/deviation element + """ + if len(element.attrib) > 0: + return parse_mean_range_element(element) + else: + return element.text diff --git a/trunk/htsworkflow/pipelines/test/__init__.py b/trunk/htsworkflow/pipelines/test/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/pipelines/test/simulate_runfolder.py b/trunk/htsworkflow/pipelines/test/simulate_runfolder.py new file mode 100644 index 0000000..2e340d2 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/simulate_runfolder.py @@ -0,0 +1,186 @@ +""" +Create simulated solexa/illumina runfolders for testing +""" + +import os +import shutil + +TEST_CODE_DIR = os.path.split(__file__)[0] +TESTDATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata') +LANE_LIST = range(1,9) + +def make_firecrest_dir(data_dir, version="1.9.2", start=1, stop=37): + firecrest_dir = os.path.join(data_dir, + 'C%d-%d_Firecrest%s_12-04-2008_diane' % (start, stop, version) + ) + os.mkdir(firecrest_dir) + return firecrest_dir + +def make_ipar_dir(data_dir, version='1.01'): + """ + Construct an artificial ipar parameter file and directory + """ + ipar1_01_file = os.path.join(TESTDATA_DIR, 'IPAR1.01.params') + shutil.copy(ipar1_01_file, os.path.join(data_dir, '.params')) + + ipar_dir = os.path.join(data_dir, 'IPAR_%s' % (version,)) + if not os.path.exists(ipar_dir): + os.mkdir(ipar_dir) + return ipar_dir + +def make_flowcell_id(runfolder_dir, flowcell_id=None): + if flowcell_id is None: + flowcell_id = '207BTAAXY' + + config = """ + + %s +""" % (flowcell_id,) + config_dir = os.path.join(runfolder_dir, 'Config') + + if not os.path.exists(config_dir): + os.mkdir(config_dir) + pathname = os.path.join(config_dir, 'FlowcellId.xml') + f = open(pathname,'w') + f.write(config) + f.close() + +def make_bustard_config132(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'bustard-config132.xml') + destination = os.path.join(gerald_dir, 'config.xml') + shutil.copy(source, destination) + +def make_matrix(matrix_filename): + contents = """# Auto-generated frequency response matrix +> A +> C +> G +> T +0.77 0.15 -0.04 -0.04 +0.76 1.02 -0.05 -0.06 +-0.10 -0.10 1.17 -0.03 +-0.13 -0.12 0.80 1.27 +""" + f = open(matrix_filename, 'w') + f.write(contents) + f.close() + +def make_phasing_params(bustard_dir): + for lane in range(1,9): + pathname = os.path.join(bustard_dir, 'params%d.xml' % (lane)) + f = open(pathname, 'w') + f.write(""" + 0.009900 + 0.003500 + +""") + f.close() + +def make_gerald_config_026(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'gerald_config_0.2.6.xml') + destination = os.path.join(gerald_dir, 'config.xml') + shutil.copy(source, destination) + +def make_gerald_config_100(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'gerald_config_1.0.xml') + destination = os.path.join(gerald_dir, 'config.xml') + shutil.copy(source, destination) + +def make_summary_htm_100(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'Summary-pipeline100.htm') + destination = os.path.join(gerald_dir, 'Summary.htm') + shutil.copy(source, destination) + +def make_summary_htm_110(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'Summary-pipeline110.htm') + destination = os.path.join(gerald_dir, 'Summary.htm') + shutil.copy(source, destination) + +def make_summary_paired_htm(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'Summary-paired-pipeline110.htm') + destination = os.path.join(gerald_dir, 'Summary.htm') + shutil.copy(source, destination) + +def make_summary_ipar130_htm(gerald_dir): + source = os.path.join(TESTDATA_DIR, 'Summary-ipar130.htm') + destination = os.path.join(gerald_dir, 'Summary.htm') + shutil.copy(source, destination) + +def make_eland_results(gerald_dir): + eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D. +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T +>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0 +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T +""" + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_eland_result.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_result) + f.close() + +def make_eland_multi(gerald_dir, paired=False, lane_list=LANE_LIST): + eland_multi = [""">HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0 +>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0 +>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1,chr7.fa:22516603F1,chr9.fa:134886204R +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0 +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0 +""", """>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 AAGATATCTACGACGTGGTATGGCGGTGTCTGGTCGT NM +>HWI-EAS229_60_30DP9AAXX:1:1:1221:788 NNNNNNNNNNNNNNGTGGTATGGCGGTGTCTGGTCGT QC +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:2 chr5.fa:55269838R0 +>HWI-EAS229_60_30DP9AAXX:1:1:1121:379 AGAAGAGACATTAAGAGTTCCTGAAATTTATATCTGG 2:1:0 chr16.fa:46189180R1,chr7.fa:122968519R0,chr8.fa:48197174F0,chr7.fa:22516603F1,chr9.fa:134886204R +>HWI-EAS229_60_30DP9AAXX:1:1:892:1155 ACATTCTCCTTTCCTTCTGAAGTTTTTACGATTCTTT 0:9:10 chr10.fa:114298201F1,chr12.fa:8125072F1,19500297F2,42341293R2,chr13.fa:27688155R2,95069772R1,chr15.fa:51016475F2,chr16.fa:27052155F2,chr1.fa:192426217R2,chr21.fa:23685310R2,chr2.fa:106680068F1,chr3.fa:185226695F2,chr4.fa:106626808R2,chr5.fa:14704894F1,43530779F1,126543189F2,chr6.fa:74284101F1 +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample1:55269838R0 +>HWI-EAS229_60_30DP9AAXX:1:1:931:747 AAAAAAGCAAATTTCATTCACATGTTCTGTGTTCATA 1:0:0 spike.fa/sample2:55269838R0 +"""] + if paired: + for e in [1,2]: + for i in lane_list: + pathname = os.path.join(gerald_dir, + 's_%d_%d_eland_multi.txt' % (i,e)) + f = open(pathname, 'w') + f.write(eland_multi[e-1]) + f.close() + else: + for i in lane_list: + pathname = os.path.join(gerald_dir, + 's_%d_eland_multi.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_multi[0]) + f.close() + +def make_scarf(gerald_dir, lane_list=LANE_LIST): + seq = """HWI-EAS229_92_30VNBAAXX:1:1:0:161:NCAATTACACGACGCTAGCCCTAAAGCTATTTCGAGG:E[aaaabb^a\a_^^a[S`ba_WZUXaaaaaaUKPER +HWI-EAS229_92_30VNBAAXX:1:1:0:447:NAGATGCGCATTTGAAGTAGGAGCAAAAGATCAAGGT:EUabaab^baabaaaaaaaa^^Uaaaaa\aaaa__`a +HWI-EAS229_92_30VNBAAXX:1:1:0:1210:NATAGCCTCTATAGAAGCCACTATTATTTTTTTCTTA:EUa`]`baaaaa^XQU^a`S``S_`J_aaaaaabb^V +HWI-EAS229_92_30VNBAAXX:1:1:0:1867:NTGGAGCAGATATAAAAACAGATGGTGACGTTGAAGT:E[^UaaaUaba^aaa^aa^XV\baaLaLaaaaQVXV^ +HWI-EAS229_92_30VNBAAXX:1:1:0:1898:NAGCTCGTGTCGTGAGATGTTAGGTTAAGTCCTGCAA:EK_aaaaaaaaaaaUZaaZaXM[aaaXSM\aaZ]URE +""" + for l in lane_list: + pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,)) + f = open(pathname,'w') + f.write(seq) + f.close() + +def make_fastq(gerald_dir, lane_list=LANE_LIST): + seq = """@HWI-EAS229:1:2:182:712#0/1 +AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA ++HWI-EAS229:1:2:182:712#0/1 +\bab_bbaabbababbaaa]]D]bb_baabbab\baa +@HWI-EAS229:1:2:198:621#0/1 +CCCCCCCCCCCCCCCCCCCCCNCCCCCCCCCCCCCCC ++HWI-EAS229:1:2:198:621#0/1 +[aaaaaaa`_`aaaaaaa[`ZDZaaaaaaaaaaaaaa +@HWI-EAS229:1:2:209:1321#0/1 +AAAAAAAAAAAAAAAAAAAAANAAAAAAAAAAAAAAA ++HWI-EAS229:1:2:209:1321#0/1 +_bbbbbaaababaabbbbab]D]aaaaaaaaaaaaaa +""" + for l in lane_list: + pathname = os.path.join(gerald_dir, 's_%d_sequence.txt' %(l,)) + f = open(pathname,'w') + f.write(seq) + f.close() + + diff --git a/trunk/htsworkflow/pipelines/test/test_genome_mapper.py b/trunk/htsworkflow/pipelines/test/test_genome_mapper.py new file mode 100644 index 0000000..8ba1ba5 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_genome_mapper.py @@ -0,0 +1,33 @@ +import unittest + +from StringIO import StringIO +from htsworkflow.pipelines import genome_mapper + +class testGenomeMapper(unittest.TestCase): + def test_construct_mapper(self): + genomes = { + 'Arabidopsis thaliana': {'v01212004': '/arabidopsis'}, + 'Homo sapiens': {'hg18': '/hg18'}, + 'Mus musculus': {'mm8': '/mm8', + 'mm9': '/mm9', + 'mm10': '/mm10'}, + 'Phage': {'174': '/phi'}, + } + genome_map = genome_mapper.constructMapperDict(genomes) + + self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8") + self.failUnlessEqual("%(Phage|174)s" % (genome_map), "/phi") + self.failUnlessEqual("%(Mus musculus)s" % (genome_map), "/mm10") + self.failUnlessEqual("%(Mus musculus|mm8)s" % (genome_map), "/mm8") + self.failUnlessEqual("%(Mus musculus|mm10)s" % (genome_map), "/mm10") + + self.failUnlessEqual(len(genome_map.keys()), 6) + self.failUnlessEqual(len(genome_map.values()), 6) + self.failUnlessEqual(len(genome_map.items()), 6) + + +def suite(): + return unittest.makeSuite(testGenomeMapper,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder026.py b/trunk/htsworkflow/pipelines/test/test_runfolder026.py new file mode 100644 index 0000000..f8160ed --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder026.py @@ -0,0 +1,474 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import firecrest +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_summary_htm(gerald_dir): + summary_htm = """ + + + + +

    080416_HWI-EAS229_0024_207BTAAXX Summary

    +

    Summary Information For Experiment 080416_HWI-EAS229_0024_207BTAAXX on Machine HWI-EAS229

    +



    Chip Summary

    + + + + +
    MachineHWI-EAS229
    Run Folder080416_HWI-EAS229_0024_207BTAAXX
    Chip IDunknown
    +



    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    LaneSample IDSample TargetSample TypeLengthFilterTiles
    1unknowndm3ELAND32'((CHASTITY>=0.6))'Lane 1
    2unknownequcab1ELAND32'((CHASTITY>=0.6))'Lane 2
    3unknownequcab1ELAND32'((CHASTITY>=0.6))'Lane 3
    4unknowncanfam2ELAND32'((CHASTITY>=0.6))'Lane 4
    5unknownhg18ELAND32'((CHASTITY>=0.6))'Lane 5
    6unknownhg18ELAND32'((CHASTITY>=0.6))'Lane 6
    7unknownhg18ELAND32'((CHASTITY>=0.6))'Lane 7
    8unknownhg18ELAND32'((CHASTITY>=0.6))'Lane 8
    +



    Lane Results Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Clusters Av 1st Cycle Int % intensity after 20 cycles % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    117421 +/- 21397230 +/- 80123.73 +/- 10.7913.00 +/- 22.9132.03 +/- 18.456703.57 +/- 3753.854.55 +/- 4.81
    220311 +/- 24027660 +/- 67817.03 +/- 4.4040.74 +/- 30.3329.54 +/- 9.035184.02 +/- 1631.543.27 +/- 3.94
    320193 +/- 23997700 +/- 79715.75 +/- 3.3056.56 +/- 17.1627.33 +/- 7.484803.49 +/- 1313.313.07 +/- 2.86
    415537 +/- 25317620 +/- 139215.37 +/- 3.7963.05 +/- 18.3015.88 +/- 4.993162.13 +/- 962.593.11 +/- 2.22
    532047 +/- 33568093 +/- 83123.79 +/- 6.1853.36 +/- 18.0648.04 +/- 13.779866.23 +/- 2877.302.26 +/- 1.16
    632946 +/- 47538227 +/- 73624.07 +/- 4.6954.65 +/- 12.5750.98 +/- 10.5410468.86 +/- 2228.532.21 +/- 2.33
    739504 +/- 41718401 +/- 78522.55 +/- 4.5645.22 +/- 10.3448.41 +/- 9.679829.40 +/- 1993.202.26 +/- 1.11
    837998 +/- 37928443 +/- 121139.03 +/- 7.5242.16 +/- 12.3540.98 +/- 14.898128.87 +/- 3055.343.57 +/- 2.77
    + + +""" + pathname = os.path.join(gerald_dir, 'Summary.htm') + f = open(pathname, 'w') + f.write(summary_htm) + f.close() + +def make_eland_results(gerald_dir): + eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D. +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T +>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0 +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T +""" + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_eland_result.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_result) + f.close() + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # make a fake runfolder directory + self.temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + self.runfolder_dir = os.path.join(self.temp_dir, + '080102_HWI-EAS229_0010_207BTAAXX') + os.mkdir(self.runfolder_dir) + + self.data_dir = os.path.join(self.runfolder_dir, 'Data') + os.mkdir(self.data_dir) + + self.firecrest_dir = os.path.join(self.data_dir, + 'C1-33_Firecrest1.8.28_12-04-2008_diane' + ) + os.mkdir(self.firecrest_dir) + self.matrix_dir = os.path.join(self.firecrest_dir, 'Matrix') + os.mkdir(self.matrix_dir) + matrix_filename = os.path.join(self.matrix_dir, 's_matrix') + make_matrix(matrix_filename) + + self.bustard_dir = os.path.join(self.firecrest_dir, + 'Bustard1.8.28_12-04-2008_diane') + os.mkdir(self.bustard_dir) + make_phasing_params(self.bustard_dir) + + self.gerald_dir = os.path.join(self.bustard_dir, + 'GERALD_12-04-2008_diane') + os.mkdir(self.gerald_dir) + make_gerald_config_026(self.gerald_dir) + make_summary_htm(self.gerald_dir) + make_eland_results(self.gerald_dir) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_firecrest(self): + """ + Construct a firecrest object + """ + f = firecrest.firecrest(self.firecrest_dir) + self.failUnlessEqual(f.version, '1.8.28') + self.failUnlessEqual(f.start, 1) + self.failUnlessEqual(f.stop, 33) + self.failUnlessEqual(f.user, 'diane') + self.failUnlessEqual(f.date, date(2008,4,12)) + + xml = f.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + f2 = firecrest.Firecrest(xml=xml) + self.failUnlessEqual(f.version, f2.version) + self.failUnlessEqual(f.start, f2.start) + self.failUnlessEqual(f.stop, f2.stop) + self.failUnlessEqual(f.user, f2.user) + self.failUnlessEqual(f.date, f2.date) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.28') + self.failUnlessEqual(b.date, date(2008,4,12)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp') + self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2', + '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '32') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*32) + + # test data extracted from summary file + clusters = [None, + (17421, 2139), (20311, 2402), (20193, 2399), (15537, 2531), + (32047, 3356), (32946, 4753), (39504, 4171), (37998, 3792)] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + self.failUnlessEqual(len(g.summary), 1) + # test (some) summary elements + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa', + 'chr2L.fa': 'dm3/chr2L.fa', + 'Lambda.fa': 'Lambda.fa'} + genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map, + 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + for i in range(1,9): + lane = eland.results[0][i] + self.failUnlessEqual(lane.reads, 4) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 3) + self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1) + self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1) + self.failUnlessEqual(lane.match_codes['U1'], 2) + self.failUnlessEqual(lane.match_codes['NM'], 1) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 3) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml') + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml') + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder030.py b/trunk/htsworkflow/pipelines/test/test_runfolder030.py new file mode 100644 index 0000000..0691308 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder030.py @@ -0,0 +1,898 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import firecrest +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_summary_htm(gerald_dir): + summary_htm=""" + + + + +

    080627_HWI-EAS229_0036_3055HAXX Summary

    +

    Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229

    +



    Chip Summary

    + + + + +
    MachineHWI-EAS229
    Run Folder080627_HWI-EAS229_0036_3055HAXX
    Chip IDunknown
    +



    Chip Results Summary

    + + + + + + + + + + +
    ClustersClusters (PF)Yield (kbases)
    80933224435778031133022
    +



    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    LaneSample IDSample TargetSample TypeLengthFilterNum TilesTiles
    1unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 1
    2unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 2
    3unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 3
    4unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 4
    5unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 5
    6unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 6
    7unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 7
    8unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 8
    +



    Lane Results Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    115804696483 +/- 907460787 +/- 4240329 +/- 35101.88 +/- 6.0363.21 +/- 3.2970.33 +/- 0.249054.08 +/- 59.160.46 +/- 0.18
    2156564133738 +/- 793860217 +/- 1926444 +/- 3992.62 +/- 7.5845.20 +/- 3.3151.98 +/- 0.746692.04 +/- 92.490.46 +/- 0.09
    3185818152142 +/- 1000271468 +/- 2827366 +/- 3691.53 +/- 8.6647.19 +/- 3.8082.24 +/- 0.4410598.68 +/- 64.130.41 +/- 0.04
    43495315784 +/- 216213443 +/- 1728328 +/- 4097.53 +/- 9.8785.29 +/- 1.9180.02 +/- 0.5310368.82 +/- 71.080.15 +/- 0.05
    5167936119735 +/- 846564590 +/- 2529417 +/- 3788.69 +/- 14.7954.10 +/- 2.5976.95 +/- 0.329936.47 +/- 65.750.28 +/- 0.02
    6173463152177 +/- 814666716 +/- 2493372 +/- 3987.06 +/- 9.8643.98 +/- 3.1278.80 +/- 0.4310162.28 +/- 49.650.38 +/- 0.03
    714928784649 +/- 732557418 +/- 3617295 +/- 2889.40 +/- 8.2367.97 +/- 1.8233.38 +/- 0.254247.92 +/- 32.371.00 +/- 0.03
    810695354622 +/- 481241136 +/- 3309284 +/- 3790.21 +/- 9.1075.39 +/- 2.2748.33 +/- 0.296169.21 +/- 169.500.86 +/- 1.22
    Tile mean across chip
    Av.1011665447235492.3660.2965.258403.690.50
    +



    Expanded Lane Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    1964830.77000.31001.004967663.21317 +/- 320.13 +/- 0.44-1.14 +/- 0.3470.330.4641758
    21337380.77000.31001.224046745.20415 +/- 330.29 +/- 0.40-0.79 +/- 0.3551.980.4630615
    31521420.77000.31001.307858847.19344 +/- 260.68 +/- 0.51-0.77 +/- 0.4282.240.4157552
    4157840.77000.31000.291109585.29306 +/- 340.20 +/- 0.69-1.28 +/- 0.6680.020.1510671
    51197350.77000.31000.856033554.10380 +/- 320.34 +/- 0.49-1.55 +/- 4.6976.950.2849015
    61521770.77000.31001.217090543.98333 +/- 270.57 +/- 0.50-0.91 +/- 0.3978.800.3851663
    7846490.77000.31001.382106967.97272 +/- 201.15 +/- 0.52-0.84 +/- 0.5833.381.0018265
    8546220.77000.31001.172133575.39262 +/- 311.10 +/- 0.59-1.01 +/- 0.4748.330.8619104
    +

    IVC Plots
    +

    IVC.htm +

    +

    All Intensity Plots
    +

    All.htm +

    +

    Error graphs:
    +

    Error.htm +

    +Back to top +



    Lane 1

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    10001114972326.4894.3957.4470.29038.60.44
    +Back to top +



    Lane 2

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    20001147793448.1283.6838.5753.76905.40.54
    +Back to top +



    Lane 3

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    30001167904374.0586.9140.3681.310465.00.47
    +Back to top +



    Lane 4

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    4000120308276.8592.8784.2680.410413.80.16
    +Back to top +



    Lane 5

    + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    +Back to top +



    Lane 6

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    60001166844348.1277.5938.1379.710264.40.44
    +Back to top +



    Lane 7

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    7000198913269.9086.6664.5533.24217.51.02
    +Back to top +



    Lane 8

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    8000164972243.6089.4073.1748.36182.80.71
    +Back to top + + +""" + pathname = os.path.join(gerald_dir, 'Summary.htm') + f = open(pathname, 'w') + f.write(summary_htm) + f.close() + +def make_eland_results(gerald_dir): + eland_result = """>HWI-EAS229_24_207BTAAXX:1:7:599:759 ACATAGNCACAGACATAAACATAGACATAGAC U0 1 1 3 chrUextra.fa 28189829 R D. +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 chr2L.fa 8796855 R DD 24T +>HWI-EAS229_24_207BTAAXX:1:7:776:582 AGCTCANCCGATCGAAAACCTCNCCAAGCAAT NM 0 0 0 +>HWI-EAS229_24_207BTAAXX:1:7:205:842 AAACAANNCTCCCAAACACGTAAACTGGAAAA U1 0 1 0 Lambda.fa 8796855 R DD 24T +""" + for i in range(1,9): + pathname = os.path.join(gerald_dir, + 's_%d_eland_result.txt' % (i,)) + f = open(pathname, 'w') + f.write(eland_result) + f.close() + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '080102_HWI-EAS229_0010_207BTAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + firecrest_dir = os.path.join(data_dir, + 'C1-33_Firecrest1.8.28_12-04-2008_diane' + ) + os.mkdir(firecrest_dir) + matrix_dir = os.path.join(firecrest_dir, 'Matrix') + os.mkdir(matrix_dir) + matrix_filename = os.path.join(matrix_dir, 's_matrix.txt') + make_matrix(matrix_filename) + + bustard_dir = os.path.join(firecrest_dir, + 'Bustard1.8.28_12-04-2008_diane') + os.mkdir(bustard_dir) + make_phasing_params(bustard_dir) + + gerald_dir = os.path.join(bustard_dir, + 'GERALD_12-04-2008_diane') + os.mkdir(gerald_dir) + make_gerald_config_026(gerald_dir) + make_summary_htm(gerald_dir) + make_eland_results(gerald_dir) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.firecrest_dir = firecrest_dir + obj.matrix_dir = matrix_dir + obj.bustard_dir = bustard_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_firecrest(self): + """ + Construct a firecrest object + """ + f = firecrest.firecrest(self.firecrest_dir) + self.failUnlessEqual(f.version, '1.8.28') + self.failUnlessEqual(f.start, 1) + self.failUnlessEqual(f.stop, 33) + self.failUnlessEqual(f.user, 'diane') + self.failUnlessEqual(f.date, date(2008,4,12)) + + xml = f.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + f2 = firecrest.Firecrest(xml=xml) + self.failUnlessEqual(f.version, f2.version) + self.failUnlessEqual(f.start, f2.start) + self.failUnlessEqual(f.stop, f2.stop) + self.failUnlessEqual(f.user, f2.user) + self.failUnlessEqual(f.date, f2.date) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.28') + self.failUnlessEqual(b.date, date(2008,4,12)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp') + self.failUnlessEqual(g.date, datetime(2008,4,19,19,8,30)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, '/g/dm3', '/g/equcab1', '/g/equcab1', '/g/canfam2', + '/g/hg18', '/g/hg18', '/g/hg18', '/g/hg18', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '32') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*32) + + # test data extracted from summary file + clusters = [None, + (96483, 9074), (133738, 7938), + (152142, 10002), (15784, 2162), + (119735, 8465), (152177, 8146), + (84649, 7325), (54622, 4812),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + dm3_map = { 'chrUextra.fa' : 'dm3/chrUextra.fa', + 'chr2L.fa': 'dm3/chr2L.fa', + 'Lambda.fa': 'Lambda.fa'} + genome_maps = { 1:dm3_map, 2:dm3_map, 3:dm3_map, 4:dm3_map, + 5:dm3_map, 6:dm3_map, 7:dm3_map, 8:dm3_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + for i in range(1,9): + lane = eland.results[0][i] + self.failUnlessEqual(lane.reads, 4) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 3) + self.failUnlessEqual(lane.mapped_reads['Lambda.fa'], 1) + self.failUnlessEqual(lane.mapped_reads['dm3/chr2L.fa'], 1) + self.failUnlessEqual(lane.match_codes['U1'], 2) + self.failUnlessEqual(lane.match_codes['NM'], 1) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 3) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXX_2008-04-19.xml') + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + self.failUnlessEqual(runs[0].name, 'run_207BTAAXY_2008-04-19.xml') + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder110.py b/trunk/htsworkflow/pipelines/test/test_runfolder110.py new file mode 100644 index 0000000..fc91ce4 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder110.py @@ -0,0 +1,302 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import firecrest +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '081017_HWI-EAS229_0062_30J55AAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + firecrest_dir = os.path.join(data_dir, + 'C1-37_Firecrest1.9.6_20-10-2008_diane') + os.mkdir(firecrest_dir) + + bustard_dir = os.path.join(firecrest_dir, + 'Bustard1.9.6_20-10-2008_diane') + os.mkdir(bustard_dir) + make_phasing_params(bustard_dir) + + matrix_name = os.path.join(bustard_dir, 'matrix1.txt') + make_matrix(matrix_name) + + + gerald_dir = os.path.join(bustard_dir, + 'GERALD_20-10-2008_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_htm_110(gerald_dir) + make_eland_multi(gerald_dir) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = firecrest_dir + obj.bustard_dir = bustard_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_firecrest(self): + """ + Construct a firecrest object + """ + f = firecrest.firecrest(self.image_analysis_dir) + self.failUnlessEqual(f.version, '1.9.6') + self.failUnlessEqual(f.start, 1) + self.failUnlessEqual(f.stop, 37) + self.failUnlessEqual(f.user, 'diane') + self.failUnlessEqual(f.date, date(2008,10,20)) + + xml = f.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + f2 = firecrest.Firecrest(xml=xml) + self.failUnlessEqual(f.version, f2.version) + self.failUnlessEqual(f.start, f2.start) + self.failUnlessEqual(f.stop, f2.stop) + self.failUnlessEqual(f.user, f2.user) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.9.6') + self.failUnlessEqual(b.date, date(2008,10,20)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # raw cluster numbers extracted from summary file + # its the first +/- value in the lane results summary + # section + clusters = [None, + (190220, 15118), (190560, 14399), + (187597, 12369), (204142, 16877), + (247308, 11600), (204298, 15640), + (202707, 15404), (198075, 14702),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + self.failUnlessEqual(len(g.summary), 1) + # test (some) summary elements + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + for i in range(1,9): + lane = eland.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + name = 'run_30J55AAXX_2009-02-22.xml' + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '30J55AAXX') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_30J55AAXX_2009-02-22.xml' + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder_ipar100.py b/trunk/htsworkflow/pipelines/test/test_runfolder_ipar100.py new file mode 100644 index 0000000..d58a70d --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder_ipar100.py @@ -0,0 +1,301 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import ipar +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '080102_HWI-EAS229_0010_207BTAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + ipar_dir = make_ipar_dir(data_dir) + + matrix_dir = os.path.join(ipar_dir, 'Matrix') + os.mkdir(matrix_dir) + matrix_name = os.path.join(matrix_dir, 's_matrix.txt') + make_matrix(matrix_name) + + bustard_dir = os.path.join(ipar_dir, + 'Bustard1.8.28_12-04-2008_diane') + os.mkdir(bustard_dir) + make_phasing_params(bustard_dir) + + gerald_dir = os.path.join(bustard_dir, + 'GERALD_12-04-2008_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_htm_100(gerald_dir) + make_eland_multi(gerald_dir) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = ipar_dir + obj.matrix_dir = matrix_dir + obj.bustard_dir = bustard_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_ipar(self): + """ + Construct a firecrest object + """ + i = ipar.ipar(self.image_analysis_dir) + self.failUnlessEqual(i.version, '2.01.192.0') + self.failUnlessEqual(i.start, 1) + self.failUnlessEqual(i.stop, 37) + + xml = i.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + i2 = ipar.IPAR(xml=xml) + self.failUnlessEqual(i.version, i2.version) + self.failUnlessEqual(i.start, i2.start) + self.failUnlessEqual(i.stop, i2.stop) + self.failUnlessEqual(i.date, i2.date) + self.failUnlessEqual(i.file_list(), i2.file_list()) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.28') + self.failUnlessEqual(b.date, date(2008,4,12)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # test data extracted from summary file + clusters = [None, + (96483, 9074), (133738, 7938), + (152142, 10002), (15784, 2162), + (119735, 8465), (152177, 8146), + (84649, 7325), (54622, 4812),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + for i in range(1,9): + lane = eland.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.mapped_reads['spike.fa/sample1'], 1) + self.failUnlessEqual(lane.mapped_reads['spike.fa/sample2'], 1) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder_ipar130.py b/trunk/htsworkflow/pipelines/test/test_runfolder_ipar130.py new file mode 100644 index 0000000..9e6ac22 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder_ipar130.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import eland +from htsworkflow.pipelines import ipar +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '090313_HWI-EAS229_0101_3021JAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + ipar_dir = make_ipar_dir(data_dir, '1.30') + + bustard_dir = os.path.join(ipar_dir, + 'Bustard1.3.2_15-03-2008_diane') + os.mkdir(bustard_dir) + make_phasing_params(bustard_dir) + make_bustard_config132(bustard_dir) + + gerald_dir = os.path.join(bustard_dir, + 'GERALD_15-03-2008_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_ipar130_htm(gerald_dir) + make_eland_multi(gerald_dir, lane_list=[1,2,3,4,5,6,]) + make_scarf(gerald_dir, lane_list=[7,]) + make_fastq(gerald_dir, lane_list=[8,]) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = ipar_dir + obj.bustard_dir = bustard_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_ipar(self): + """ + Construct a firecrest object + """ + i = ipar.ipar(self.image_analysis_dir) + self.failUnlessEqual(i.version, '2.01.192.0') + self.failUnlessEqual(i.start, 1) + self.failUnlessEqual(i.stop, 37) + + xml = i.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + i2 = ipar.IPAR(xml=xml) + self.failUnlessEqual(i.version, i2.version) + self.failUnlessEqual(i.start, i2.start) + self.failUnlessEqual(i.stop, i2.stop) + self.failUnlessEqual(i.date, i2.date) + self.failUnlessEqual(i.file_list(), i2.file_list()) + + def test_bustard(self): + """ + construct a bustard object + """ + def check_crosstalk(crosstalk): + self.failUnlessAlmostEqual(crosstalk.base['A'][0], 1.27) + self.failUnlessAlmostEqual(crosstalk.base['A'][1], 0.20999999999999) + self.failUnlessAlmostEqual(crosstalk.base['A'][2], -0.02) + self.failUnlessAlmostEqual(crosstalk.base['A'][3], -0.03) + + self.failUnlessAlmostEqual(crosstalk.base['C'][0], 0.57) + self.failUnlessAlmostEqual(crosstalk.base['C'][1], 0.58) + self.failUnlessAlmostEqual(crosstalk.base['C'][2], -0.01) + self.failUnlessAlmostEqual(crosstalk.base['C'][3], -0.01) + + self.failUnlessAlmostEqual(crosstalk.base['T'][0], -0.02) + self.failUnlessAlmostEqual(crosstalk.base['T'][1], -0.02) + self.failUnlessAlmostEqual(crosstalk.base['T'][2], 0.80) + self.failUnlessAlmostEqual(crosstalk.base['T'][3], 1.07) + + self.failUnlessAlmostEqual(crosstalk.base['G'][0], -0.03) + self.failUnlessAlmostEqual(crosstalk.base['G'][1], -0.04) + self.failUnlessAlmostEqual(crosstalk.base['G'][2], 1.51) + self.failUnlessAlmostEqual(crosstalk.base['G'][3], -0.02) + + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.3.2') + self.failUnlessEqual(b.date, date(2008,3,15)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + self.failUnlessEqual(b.crosstalk.base.keys(), ['A','C','T','G']) + check_crosstalk(b.crosstalk) + + xml = b.get_elements() + print ElementTree.dump(xml) + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + check_crosstalk(b2.crosstalk) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # test data extracted from summary file + clusters = [None, + (126910, 4300), (165739, 6792), + (196565, 8216), (153897, 8501), + (135536, 3908), (154083, 9315), + (159991, 9292), (198479, 17671),] + + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + summary_lane = g.summary[0][i] + self.failUnlessEqual(summary_lane.cluster, clusters[i]) + self.failUnlessEqual(summary_lane.lane, i) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + self.failUnlessEqual(len(g.summary), 1) + for i in range(1,9): + g_summary = g.summary[0][i] + g2_summary = g2.summary[0][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[0].keys(): + g_results = g_eland.results[0][lane] + g2_results = g2_eland.results[0][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + if isinstance(g_results, eland.ElandLane): + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland_container = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + # I added sequence lanes to the last 2 lanes of this test case + for i in range(1,7): + lane = eland_container.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + # test scarf + lane = eland_container.results[0][7] + self.failUnlessEqual(lane.reads, 5) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 7) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.SCARF_TYPE) + + # test fastq + lane = eland_container.results[0][8] + self.failUnlessEqual(lane.reads, 3) + self.failUnlessEqual(lane.sample_name, 's') + self.failUnlessEqual(lane.lane_id, 8) + self.failUnlessEqual(lane.sequence_type, eland.SequenceLane.FASTQ_TYPE) + + xml = eland_container.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for i in range(1,9): + l1 = eland_container.results[0][i] + l2 = e2.results[0][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + if isinstance(l1, eland.ElandLane): + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + elif isinstance(l1, eland.SequenceLane): + print 'l1', l1.__dict__ + print 'l2', l2.__dict__ + self.failUnlessEqual(l1.sequence_type, l2.sequence_type) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + name = 'run_3021JAAXX_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXY_%s.xml' % ( date.today().strftime('%Y-%m-%d'),) + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/test_runfolder_pair.py b/trunk/htsworkflow/pipelines/test/test_runfolder_pair.py new file mode 100644 index 0000000..c0fb684 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/test_runfolder_pair.py @@ -0,0 +1,327 @@ +#!/usr/bin/env python + +from datetime import datetime, date +import os +import tempfile +import shutil +import unittest + +from htsworkflow.pipelines import firecrest +from htsworkflow.pipelines import bustard +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +from htsworkflow.pipelines.test.simulate_runfolder import * + + +def make_runfolder(obj=None): + """ + Make a fake runfolder, attach all the directories to obj if defined + """ + # make a fake runfolder directory + temp_dir = tempfile.mkdtemp(prefix='tmp_runfolder_') + + runfolder_dir = os.path.join(temp_dir, + '080102_HWI-EAS229_0010_207BTAAXX') + os.mkdir(runfolder_dir) + + data_dir = os.path.join(runfolder_dir, 'Data') + os.mkdir(data_dir) + + ipar_dir = make_firecrest_dir(data_dir, "1.9.6", 1, 152) + + matrix_dir = os.path.join(ipar_dir, 'Matrix') + os.mkdir(matrix_dir) + matrix_name = os.path.join(matrix_dir, 's_matrix.txt') + make_matrix(matrix_name) + + bustard_dir = os.path.join(ipar_dir, + 'Bustard1.8.28_12-04-2008_diane') + os.mkdir(bustard_dir) + make_phasing_params(bustard_dir) + + gerald_dir = os.path.join(bustard_dir, + 'GERALD_12-04-2008_diane') + os.mkdir(gerald_dir) + make_gerald_config_100(gerald_dir) + make_summary_paired_htm(gerald_dir) + make_eland_multi(gerald_dir, paired=True) + + if obj is not None: + obj.temp_dir = temp_dir + obj.runfolder_dir = runfolder_dir + obj.data_dir = data_dir + obj.image_analysis_dir = ipar_dir + obj.matrix_dir = matrix_dir + obj.bustard_dir = bustard_dir + obj.gerald_dir = gerald_dir + + +class RunfolderTests(unittest.TestCase): + """ + Test components of the runfolder processing code + which includes firecrest, bustard, and gerald + """ + def setUp(self): + # attaches all the directories to the object passed in + make_runfolder(self) + + def tearDown(self): + shutil.rmtree(self.temp_dir) + + def test_firecrest(self): + """ + Construct a firecrest object + """ + f = firecrest.firecrest(self.image_analysis_dir) + self.failUnlessEqual(f.version, '1.9.6') + self.failUnlessEqual(f.start, 1) + self.failUnlessEqual(f.stop, 152) + self.failUnlessEqual(f.user, 'diane') + # As of 2008-12-8, the date was being set in + # simulate_runfolder.make_firecrest_dir + self.failUnlessEqual(f.date, date(2008,4,12)) + + xml = f.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + + f2 = firecrest.Firecrest(xml=xml) + self.failUnlessEqual(f.version, f2.version) + self.failUnlessEqual(f.start, f2.start) + self.failUnlessEqual(f.stop, f2.stop) + self.failUnlessEqual(f.user, f2.user) + + def test_bustard(self): + """ + construct a bustard object + """ + b = bustard.bustard(self.bustard_dir) + self.failUnlessEqual(b.version, '1.8.28') + self.failUnlessEqual(b.date, date(2008,4,12)) + self.failUnlessEqual(b.user, 'diane') + self.failUnlessEqual(len(b.phasing), 8) + self.failUnlessAlmostEqual(b.phasing[8].phasing, 0.0099) + + xml = b.get_elements() + b2 = bustard.Bustard(xml=xml) + self.failUnlessEqual(b.version, b2.version) + self.failUnlessEqual(b.date, b2.date ) + self.failUnlessEqual(b.user, b2.user) + self.failUnlessEqual(len(b.phasing), len(b2.phasing)) + for key in b.phasing.keys(): + self.failUnlessEqual(b.phasing[key].lane, + b2.phasing[key].lane) + self.failUnlessEqual(b.phasing[key].phasing, + b2.phasing[key].phasing) + self.failUnlessEqual(b.phasing[key].prephasing, + b2.phasing[key].prephasing) + + def test_gerald(self): + # need to update gerald and make tests for it + g = gerald.gerald(self.gerald_dir) + + self.failUnlessEqual(g.version, + '@(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp') + self.failUnlessEqual(g.date, datetime(2009,2,22,21,15,59)) + self.failUnlessEqual(len(g.lanes), len(g.lanes.keys())) + self.failUnlessEqual(len(g.lanes), len(g.lanes.items())) + + + # list of genomes, matches what was defined up in + # make_gerald_config. + # the first None is to offset the genomes list to be 1..9 + # instead of pythons default 0..8 + genomes = [None, + '/g/mm9', + '/g/mm9', + '/g/elegans190', + '/g/arabidopsis01222004', + '/g/mm9', + '/g/mm9', + '/g/mm9', + '/g/mm9', ] + + # test lane specific parameters from gerald config file + for i in range(1,9): + cur_lane = g.lanes[i] + self.failUnlessEqual(cur_lane.analysis, 'eland_extended') + self.failUnlessEqual(cur_lane.eland_genome, genomes[i]) + self.failUnlessEqual(cur_lane.read_length, '37') + self.failUnlessEqual(cur_lane.use_bases, 'Y'*37) + + # I want to be able to use a simple iterator + for l in g.lanes.values(): + self.failUnlessEqual(l.analysis, 'eland_extended') + self.failUnlessEqual(l.read_length, '37') + self.failUnlessEqual(l.use_bases, 'Y'*37) + + # test data extracted from summary file + clusters = [[None, + (103646, 4515), (106678, 4652), + (84583, 5963), (68813, 4782), + (104854, 4664), (43555, 1632), + (54265, 1588), (64363, 2697),], + [None, + (103647, 4516), (106679, 4653), + (84584, 5964), (68814, 4783), + (104855, 4665), (43556, 1633), + (54266, 1589), (64364, 2698),],] + + for end in [0,1]: + for lane in range(1,9): + summary_lane = g.summary[end][lane] + self.failUnlessEqual(summary_lane.cluster, clusters[end][lane]) + self.failUnlessEqual(summary_lane.lane, lane) + + xml = g.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + g2 = gerald.Gerald(xml=xml) + + # do it all again after extracting from the xml file + self.failUnlessEqual(g.version, g2.version) + self.failUnlessEqual(g.date, g2.date) + self.failUnlessEqual(len(g.lanes.keys()), len(g2.lanes.keys())) + self.failUnlessEqual(len(g.lanes.items()), len(g2.lanes.items())) + + # test lane specific parameters from gerald config file + for i in range(1,9): + g_lane = g.lanes[i] + g2_lane = g2.lanes[i] + self.failUnlessEqual(g_lane.analysis, g2_lane.analysis) + self.failUnlessEqual(g_lane.eland_genome, g2_lane.eland_genome) + self.failUnlessEqual(g_lane.read_length, g2_lane.read_length) + self.failUnlessEqual(g_lane.use_bases, g2_lane.use_bases) + + # test (some) summary elements + for end in [0,1]: + for i in range(1,9): + g_summary = g.summary[end][i] + g2_summary = g2.summary[end][i] + self.failUnlessEqual(g_summary.cluster, g2_summary.cluster) + self.failUnlessEqual(g_summary.lane, g2_summary.lane) + + g_eland = g.eland_results + g2_eland = g2.eland_results + for lane in g_eland.results[end].keys(): + g_results = g_eland.results[end][lane] + g2_results = g_eland.results[end][lane] + self.failUnlessEqual(g_results.reads, + g2_results.reads) + self.failUnlessEqual(len(g_results.mapped_reads), + len(g2_results.mapped_reads)) + for k in g_results.mapped_reads.keys(): + self.failUnlessEqual(g_results.mapped_reads[k], + g2_results.mapped_reads[k]) + + self.failUnlessEqual(len(g_results.match_codes), + len(g2_results.match_codes)) + for k in g_results.match_codes.keys(): + self.failUnlessEqual(g_results.match_codes[k], + g2_results.match_codes[k]) + + + def test_eland(self): + hg_map = {'Lambda.fa': 'Lambda.fa'} + for i in range(1,22): + short_name = 'chr%d.fa' % (i,) + long_name = 'hg18/chr%d.fa' % (i,) + hg_map[short_name] = long_name + + genome_maps = { 1:hg_map, 2:hg_map, 3:hg_map, 4:hg_map, + 5:hg_map, 6:hg_map, 7:hg_map, 8:hg_map } + eland = gerald.eland(self.gerald_dir, genome_maps=genome_maps) + + # check first end + for i in range(1,9): + lane = eland.results[0][i] + self.failUnlessEqual(lane.reads, 6) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 0) + + # check second end + for i in range(1,9): + lane = eland.results[1][i] + self.failUnlessEqual(lane.reads, 7) + self.failUnlessEqual(lane.sample_name, "s") + self.failUnlessEqual(lane.lane_id, i) + self.failUnlessEqual(len(lane.mapped_reads), 17) + self.failUnlessEqual(lane.mapped_reads['hg18/chr5.fa'], 4) + self.failUnlessEqual(lane.match_codes['U0'], 3) + self.failUnlessEqual(lane.match_codes['R0'], 2) + self.failUnlessEqual(lane.match_codes['U1'], 1) + self.failUnlessEqual(lane.match_codes['R1'], 9) + self.failUnlessEqual(lane.match_codes['U2'], 0) + self.failUnlessEqual(lane.match_codes['R2'], 12) + self.failUnlessEqual(lane.match_codes['NM'], 1) + self.failUnlessEqual(lane.match_codes['QC'], 1) + + xml = eland.get_elements() + # just make sure that element tree can serialize the tree + xml_str = ElementTree.tostring(xml) + e2 = gerald.ELAND(xml=xml) + + for end in [0, 1]: + for i in range(1,9): + l1 = eland.results[end][i] + l2 = e2.results[end][i] + self.failUnlessEqual(l1.reads, l2.reads) + self.failUnlessEqual(l1.sample_name, l2.sample_name) + self.failUnlessEqual(l1.lane_id, l2.lane_id) + self.failUnlessEqual(len(l1.mapped_reads), len(l2.mapped_reads)) + self.failUnlessEqual(len(l1.mapped_reads), 17) + for k in l1.mapped_reads.keys(): + self.failUnlessEqual(l1.mapped_reads[k], + l2.mapped_reads[k]) + + self.failUnlessEqual(len(l1.match_codes), 9) + self.failUnlessEqual(len(l1.match_codes), len(l2.match_codes)) + for k in l1.match_codes.keys(): + self.failUnlessEqual(l1.match_codes[k], + l2.match_codes[k]) + + def test_runfolder(self): + runs = runfolder.get_runs(self.runfolder_dir) + + # do we get the flowcell id from the filename? + self.failUnlessEqual(len(runs), 1) + # firecrest's date depends on filename not the create time. + name = 'run_207BTAAXX_2009-02-22.xml' + self.failUnlessEqual(runs[0].name, name) + + # do we get the flowcell id from the FlowcellId.xml file + make_flowcell_id(self.runfolder_dir, '207BTAAXY') + runs = runfolder.get_runs(self.runfolder_dir) + self.failUnlessEqual(len(runs), 1) + name = 'run_207BTAAXY_2009-02-22.xml' + self.failUnlessEqual(runs[0].name, name) + + r1 = runs[0] + xml = r1.get_elements() + xml_str = ElementTree.tostring(xml) + + r2 = runfolder.PipelineRun(xml=xml) + self.failUnlessEqual(r1.name, r2.name) + self.failIfEqual(r2.image_analysis, None) + self.failIfEqual(r2.bustard, None) + self.failIfEqual(r2.gerald, None) + + +def suite(): + return unittest.makeSuite(RunfolderTests,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") + diff --git a/trunk/htsworkflow/pipelines/test/testdata/IPAR1.01.params b/trunk/htsworkflow/pipelines/test/testdata/IPAR1.01.params new file mode 100644 index 0000000..11bc608 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/IPAR1.01.params @@ -0,0 +1,63 @@ + + + + + + + + 1 + 37 + 090220_HWI-EAS229_0093_30VR0AAXX + + + 1 + 37 + 090220_HWI-EAS229_0093_30VR0AAXX + + gzip + .p.gz + HWI-EAS229 + 090220_HWI-EAS229_0093_30VR0AAXX + + + 1 + 2.7 + 1.5 + 4 + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + + \ No newline at end of file diff --git a/trunk/htsworkflow/pipelines/test/testdata/Summary-ipar130.htm b/trunk/htsworkflow/pipelines/test/testdata/Summary-ipar130.htm new file mode 100644 index 0000000..c9eaca9 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/Summary-ipar130.htm @@ -0,0 +1,9325 @@ + +

    Summary Information For Experiment 090313_HWI-EAS229_0101_3021JAAXX

    +

    Chip Summary

    + + + + + + + + + + + + + +
    MachineHWI-EAS229
    Run Folder090313_HWI-EAS229_0101_3021JAAXX
    Chip IDunknown
    +

    Chip Results Summary

    + + + + + + + + + + + +
    ClustersClusters (PF)Yield (kbases)
    12892198621940781811809
    +

    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Sample IDSample TargetSample TypeLengthFilterChast. Thresh.Num TilesTiles
    1unknownmm9ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 1
    2unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 2
    3unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 3
    4unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 4
    5unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 5
    6unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 6
    7unknownelegans190ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 7
    8unknownmm9ELAND_EXTENDED37'((FAILED_CHASTITY<=1.000000))'0.600000100 + Lane 8
    +

    Lane Results Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    157684126910 +/- 430015590 +/- 487454 +/- 14124.36 +/- 27.0512.29 +/- 3.827.36 +/- 1.845.92 +/- 1.644.52 +/- 0.58
    278644165739 +/- 679221255 +/- 503476 +/- 1688.44 +/- 20.5912.94 +/- 3.5213.55 +/- 0.8312.33 +/- 0.845.05 +/- 0.24
    368671196565 +/- 821618559 +/- 5413106 +/- 1978.51 +/- 19.659.41 +/- 2.641.07 +/- 0.100.81 +/- 0.086.27 +/- 0.46
    4126273153897 +/- 850134128 +/- 798475 +/- 12110.44 +/- 26.0322.13 +/- 4.824.53 +/- 0.364.51 +/- 0.383.58 +/- 0.22
    5116257135536 +/- 390831420 +/- 503970 +/- 10116.68 +/- 24.4623.21 +/- 3.844.25 +/- 0.394.19 +/- 0.413.62 +/- 0.26
    6159230154083 +/- 931543035 +/- 1019379 +/- 14123.00 +/- 28.8027.76 +/- 5.593.64 +/- 0.303.53 +/- 0.313.48 +/- 0.30
    7180779159991 +/- 929248859 +/- 842082 +/- 14121.76 +/- 24.7530.47 +/- 4.530.86 +/- 0.100.58 +/- 0.073.11 +/- 0.36
    824267198479 +/- 176716625 +/- 1773122 +/- 1473.80 +/- 14.413.30 +/- 0.6848.65 +/- 5.6143.07 +/- 6.503.28 +/- 0.29
    Tile mean across chip
    Average1614002743483104.6217.6910.499.374.11
    +

    Expanded Lane Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw) % Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    11269110.48000.30005.57307612.2965.81 +/- 15.68-0.47 +/- 1.842.12 +/- 1.137.364.521075
    21657400.48000.30005.96607612.9476.73 +/- 12.151.47 +/- 1.921.21 +/- 1.9013.555.052541
    31965660.48000.30007.474889.41103.17 +/- 13.332.71 +/- 1.991.26 +/- 1.881.076.27173
    41538980.48000.30004.79332222.1395.07 +/- 11.421.28 +/- 2.122.69 +/- 1.554.533.581402
    51355360.48000.30004.75258223.2187.51 +/- 10.871.01 +/- 2.072.00 +/- 1.534.253.621221
    61540840.48000.30004.76327027.76109.23 +/- 14.971.17 +/- 2.123.01 +/- 1.733.643.481422
    71599910.48000.30004.36100130.47109.94 +/- 12.231.48 +/- 2.322.30 +/- 2.470.863.11394
    81984790.48000.30005.16108763.30115.81 +/- 11.512.09 +/- 1.671.19 +/- 2.9148.653.282958
    +

    Lane 1

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    1000113718730.60217.245.914.172.485.11
    1000212857629.07106.365.063.232.296.25
    1000313137426.72120.115.043.342.445.43
    1000412925828.42121.904.573.102.056.42
    1000512718331.92128.195.233.662.625.68
    1000612681035.93107.595.373.542.445.83
    1000712760034.18128.165.693.682.625.90
    1000812740445.3881.276.103.472.475.65
    1000912661636.17126.687.144.213.055.36
    1001012652833.95143.967.824.623.495.31
    1001112724142.65104.638.204.813.395.50
    1001213063733.40151.658.724.433.434.97
    1001313455729.95152.848.364.693.525.34
    1001412651933.45124.078.655.173.935.14
    1001513246231.15167.268.335.704.424.71
    1001613070557.9375.967.876.654.975.12
    1001713459940.48139.539.016.535.174.46
    1001812755041.45130.168.486.344.954.89
    1001912688054.35108.009.766.034.594.71
    1002012681138.05122.408.726.234.884.88
    1002113056234.10140.988.795.434.185.19
    1002212658642.0599.648.315.634.165.41
    1002312474638.08108.088.965.854.355.07
    1002412545045.17109.248.755.393.845.37
    1002512473945.58109.059.176.515.044.72
    1002612084547.38104.969.935.874.634.63
    1002712229247.65104.568.956.274.754.91
    1002812513348.95128.509.637.695.964.74
    1002912187647.25123.449.567.616.024.53
    1003012906736.33140.618.707.215.504.83
    1003112699739.03148.379.887.626.084.44
    1003212309747.68129.5210.248.106.334.61
    1003312791646.25171.7814.148.116.724.29
    1003412493652.72129.079.459.247.404.65
    1003512333249.62135.6710.678.917.134.50
    1003612241051.8298.9411.668.956.994.93
    1003712582040.17143.318.658.686.934.59
    1003812165447.78111.889.959.177.104.67
    1003912417152.88102.849.0410.158.014.70
    1004012458473.9098.3812.8110.308.344.29
    1004112394952.55172.7910.2510.608.764.27
    1004212522055.25129.959.9810.188.314.41
    1004312459158.95126.2511.7910.238.254.60
    1004412420453.42104.3510.319.757.894.62
    1004512360853.05116.3510.219.737.944.52
    1004612386060.00135.8813.938.246.774.71
    1004712439549.30156.0912.728.757.154.58
    1004812245062.30115.2115.968.226.804.36
    1004912244575.0096.4313.1910.028.054.36
    1005012376549.55141.6813.129.017.544.32
    1005112569275.67124.1218.078.347.033.94
    1005212510482.65120.0216.928.827.424.02
    1005312463680.20112.3116.619.968.403.94
    1005412564169.65143.9018.548.607.254.05
    1005512590574.35116.4816.508.737.394.04
    1005612693559.57125.0515.199.107.364.51
    1005712351070.22101.5713.729.667.794.39
    1005812596860.57144.6615.897.996.604.28
    1005912738575.50100.3315.179.697.904.33
    1006012518857.75122.2112.818.877.314.40
    1006112795245.27124.9611.727.776.084.73
    1006212396475.3872.8012.588.486.674.47
    1006312480378.60114.5714.627.926.334.48
    1006413018567.62142.4816.199.948.324.26
    1006512565473.00159.2524.928.207.003.89
    1006611953353.10168.6915.048.667.164.25
    1006712176060.4797.5215.148.687.164.46
    1006812366466.0077.5812.927.805.924.65
    1006912389052.57163.5814.728.467.004.26
    1007012624162.42137.0813.477.706.124.31
    1007112283454.62102.0112.688.176.484.45
    1007212563070.7086.9514.677.986.374.20
    1007312505359.30162.5615.308.577.314.00
    1007412766861.90155.8215.918.437.014.14
    1007512442664.95148.3817.087.736.374.16
    1007613224857.9597.4116.178.187.013.86
    1007712948680.6872.3316.418.286.614.30
    1007813045384.7270.6413.068.386.544.14
    1007913448854.73107.5815.647.246.163.92
    1008012666372.0879.7816.098.847.194.15
    1008112070055.45174.6217.298.247.113.76
    1008212485671.9578.0813.718.546.864.12
    1008312659467.3890.3513.118.396.764.19
    1008412183160.75120.3316.706.665.573.87
    1008512069853.15138.9913.807.065.754.24
    1008612073754.78160.4716.596.475.194.33
    1008713239853.5599.5815.426.995.913.97
    1008812797058.12140.0414.307.386.054.45
    1008912352265.20136.2016.817.085.983.90
    1009014330746.35150.7013.346.265.353.78
    1009113059754.43113.0514.647.426.273.87
    1009214136646.40161.1017.076.365.513.69
    1009312802868.60104.2616.716.685.453.89
    1009412845060.07127.4214.886.895.733.81
    1009512896272.45106.1416.636.915.803.43
    1009613473660.43111.8715.306.795.853.58
    1009713110954.83174.0114.316.625.563.81
    1009813371756.33120.4615.526.865.773.70
    1009913170450.42135.4511.816.405.284.12
    1010013602349.25148.7314.925.674.804.03
    +

    Lane 2

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    2000117086062.62102.598.7111.6810.425.37
    2000216804758.73101.288.0912.0111.154.68
    2000316561155.85114.918.3512.7911.924.72
    2000417114754.3389.007.7612.5111.065.30
    2000516670251.8098.417.9812.9311.854.94
    2000617153056.8393.149.6613.4612.564.80
    2000717595663.4586.259.4913.1512.134.82
    2000817822070.4877.5510.0412.9111.645.18
    2000917242054.58107.889.7712.7911.625.02
    2001017411948.60103.658.7012.4611.474.93
    2001117322464.1583.099.3812.7111.425.11
    2001216894556.9776.928.6713.6012.005.37
    2001316969854.75110.098.7512.8911.824.99
    2001417643056.8876.449.0012.5111.305.14
    2001517339254.30109.3910.2713.4712.195.16
    2001617375286.2551.8011.4913.8812.435.24
    2001717289050.9095.1910.7212.9011.725.07
    2001817335960.3087.6910.5013.4511.985.21
    2001917287180.2249.8011.1612.8611.125.26
    2002016750061.65109.5710.6213.9512.734.99
    2002117306164.4286.0311.1913.3811.795.27
    2002217138764.0596.5710.1213.4812.345.01
    2002317811352.7381.4110.6412.8611.904.82
    2002417237965.3874.999.7913.2012.055.08
    2002517066159.2590.9710.0913.2112.085.06
    2002616905373.6766.9810.5813.0011.864.82
    2002716600669.5899.829.5513.0012.044.98
    2002816539979.2083.7811.6513.7012.734.79
    2002916465979.8059.659.4814.1012.845.11
    2003016341974.8284.2011.4013.3111.735.30
    2003116188170.3374.6512.6214.0512.825.10
    2003216224763.8578.1913.1714.1913.005.15
    2003316326893.0393.1513.9714.1612.695.25
    2003416190561.9574.0113.8514.1513.254.92
    20035162020105.3545.1614.7114.6213.135.07
    2003616027667.33125.5813.9713.3012.105.19
    2003715958672.9565.0812.8413.9212.455.41
    2003815807073.65138.4211.3814.1512.755.15
    2003915838278.83110.3715.2014.5913.185.21
    2004015906969.10130.3520.2714.2513.314.96
    2004115778577.0888.5814.2313.8512.245.43
    20042163264157.7847.5016.6713.6210.915.45
    2004315824687.0778.9516.5813.6311.935.45
    2004415731674.78113.5716.8714.2412.995.16
    2004515731082.5076.3315.7314.0812.635.31
    2004615753399.4274.5316.9713.9712.295.51
    2004715755276.73102.3820.7214.5513.335.06
    2004815611285.2590.5620.7614.3613.234.99
    2004915766484.1088.7018.3614.1412.895.18
    2005015664382.5785.3517.0614.1312.735.23
    20051155494118.5366.7820.2514.3513.025.06
    2005215556782.30103.5221.1413.6012.455.08
    2005315500095.6582.0220.3614.1412.675.20
    2005415508874.28120.1320.8313.9012.894.98
    2005515512393.9885.9819.2814.7413.644.80
    2005615419283.1293.7718.8614.6213.095.31
    2005715571896.3262.7016.9914.4812.865.32
    2005815638074.72129.9116.5915.1614.025.07
    2005915755686.45104.8615.4913.5812.065.39
    2006015832387.9076.7917.4914.0212.345.52
    2006115810274.28110.0617.2214.3513.165.21
    2006215768984.8094.5814.2115.1813.765.28
    2006315754092.15101.4415.9315.2913.765.24
    2006415721966.2876.8017.7315.3514.874.50
    2006516000365.1582.0817.3015.0014.214.71
    2006616032073.3093.9314.9614.9813.994.88
    2006716086880.20115.2714.2114.9313.765.02
    2006816177276.6566.8913.3113.7312.734.89
    2006916375487.0077.3912.6513.7212.245.22
    2007016342682.3399.8512.7213.6012.444.93
    2007116178074.8090.9113.0314.1312.815.38
    20072165226100.0096.1012.4514.1913.055.12
    2007316786379.8083.3615.3714.0313.124.85
    20074165913101.1254.6813.2214.1912.855.15
    2007516480287.0793.1111.5313.7712.315.10
    2007616682377.2566.3810.9613.4912.544.87
    2007716396276.1569.6314.1114.1713.264.85
    20078168087100.9581.4012.4113.3612.184.90
    2007916556989.4058.8911.7313.7312.415.03
    2008017195574.95102.0313.6012.9512.114.78
    2008116648083.3596.9412.5012.7611.205.42
    2008217108980.0864.1012.3112.9811.775.16
    2008316865869.50122.0510.7113.2212.134.99
    2008416695887.2268.1310.2912.8711.465.13
    2008516859358.4282.978.8212.1711.344.81
    2008617004894.2860.338.6412.4810.675.22
    20087173518107.0851.279.2212.7311.015.26
    2008816883363.73131.4212.0512.9712.164.62
    2008917620082.8563.799.2812.7511.235.20
    2009017241172.20121.9514.1913.1012.334.75
    2009117166078.1282.9111.4113.2012.454.73
    2009216825779.2266.9310.4113.0411.884.96
    2009317196081.5375.8010.0012.9212.234.62
    2009417124388.1068.139.1712.3411.265.08
    2009517374070.6590.5910.4412.9012.144.76
    2009617465990.7290.8812.6912.9412.154.56
    2009717718271.08114.7012.3912.7712.364.43
    2009817384963.10131.3411.7912.3311.694.50
    2009917601693.2392.0111.9512.3811.234.99
    2010017458787.2895.7910.0711.0910.104.89
    +

    Lane 3

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    3000120375888.1274.136.391.000.706.55
    3000218622294.2265.836.731.070.954.76
    3000318211783.00100.817.141.320.956.20
    3000418599871.10107.917.061.190.955.98
    3000519053274.88104.046.791.000.736.18
    3000618042573.30104.336.620.900.755.91
    3000718668093.6781.106.501.040.845.98
    3000819400189.2361.395.861.040.816.44
    3000919336094.6585.166.540.990.716.31
    3001018128883.6878.676.561.070.846.66
    3001118787394.0082.456.231.010.755.91
    3001218540570.3580.566.461.090.786.35
    3001316263864.4280.874.881.060.836.76
    3001419716695.8360.686.701.050.836.14
    3001519348883.3569.867.031.070.846.26
    3001620553779.45116.658.991.080.856.17
    30017192552108.7555.957.380.890.637.00
    30018200383103.7059.237.501.000.755.93
    3001920192681.4766.407.420.850.695.70
    3002018257181.1270.397.161.130.836.48
    3002118669882.6780.047.101.000.775.77
    3002218953284.5380.957.751.000.776.23
    3002318832388.7273.127.831.150.856.81
    3002417838383.00119.946.880.970.745.47
    3002519204997.1273.728.400.970.775.42
    3002619702898.6292.197.990.850.606.68
    3002719089992.3869.018.771.180.965.56
    3002818939989.2096.308.041.050.736.62
    3002918466278.98108.678.071.080.825.86
    3003019068892.4366.087.910.950.716.76
    3003119629097.80110.619.391.040.825.42
    30032196748103.58108.289.200.990.795.86
    30033201464133.7054.1510.831.090.767.01
    3003420177898.12115.4112.761.090.816.42
    30035195338101.5265.7711.601.160.856.61
    30036205839137.8074.5511.891.140.826.16
    3003719538694.28112.5411.241.030.806.15
    3003819272689.8367.1611.221.020.756.73
    3003919264288.30126.6411.391.130.906.10
    30040203095123.5385.1013.231.000.786.57
    30041201324126.2377.2013.611.230.966.23
    30042194426102.3075.0711.980.910.676.30
    30043197512104.6272.8812.631.150.826.74
    30044196006126.1261.6312.481.150.896.33
    30045197592129.0571.4814.251.301.006.77
    30046195797118.4588.6414.491.140.846.48
    30047194741128.0078.3812.621.190.876.64
    30048197436124.1579.6814.711.200.846.80
    30049196566120.5584.5515.841.240.916.51
    30050196255107.5789.3615.931.150.935.66
    30051198773129.7295.8615.541.190.866.44
    30052197968134.0079.4613.941.070.786.47
    30053198600134.5557.3213.101.160.896.35
    30054197173113.2583.2014.051.140.826.53
    30055195506104.7579.3311.141.060.766.73
    30056193200105.3088.8211.171.080.786.80
    30057194216121.0273.0610.951.090.776.45
    30058199530139.8067.0112.721.190.906.09
    30059202163127.1568.9312.501.080.835.87
    30060200721130.5053.9811.621.110.886.27
    30061198041106.8565.7912.351.070.796.75
    30062197475105.6774.9210.841.130.807.21
    30063196647102.3888.6211.751.030.726.89
    30064198472128.3083.9413.021.130.807.11
    30065207784112.4391.9711.580.960.746.31
    3006619257592.03105.929.691.130.866.44
    30067195245103.5091.3810.021.080.866.17
    30068198723106.4281.548.831.250.907.09
    30069198214118.1558.998.650.970.745.99
    3007019077086.2573.457.750.910.755.41
    30071196479105.0352.378.181.130.865.94
    30072200551113.7254.808.511.010.736.82
    30073211144140.7846.4710.170.980.756.30
    30074213445141.6247.279.631.100.856.29
    30075212336138.6843.638.820.980.776.20
    30076201544114.7855.118.910.920.705.75
    3007719309993.45130.829.941.030.825.86
    30078192730113.3857.537.040.830.725.07
    30079208296146.5083.219.011.060.757.18
    30080198441126.6046.547.661.070.786.69
    30081198612125.23101.208.251.070.736.21
    30082211383135.9369.389.921.090.825.97
    30083205392120.1549.528.891.060.787.11
    30084209166132.1248.007.411.020.795.75
    30085199610106.67107.977.221.030.845.81
    3008620088488.3563.956.511.221.005.74
    30087202820122.6288.347.901.070.846.27
    30088212070106.8872.707.901.090.815.97
    30089198992113.7278.197.011.030.785.93
    30090210462100.10123.989.421.050.805.99
    3009119813185.1878.027.491.020.756.21
    3009219588899.8876.577.101.080.826.54
    30093204882113.3058.278.771.130.836.31
    30094194269103.7566.317.251.000.796.23
    30095210191120.0555.108.581.100.836.06
    30096199862111.0559.705.991.080.766.31
    3009719039295.5369.338.081.371.046.40
    30098201697138.3874.926.121.020.786.28
    3009919031490.5892.587.911.210.846.13
    30100211178123.4369.788.240.980.785.90
    +

    Lane 4

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    4000115997282.6789.4518.604.244.083.55
    4000215434574.00102.6017.124.434.403.29
    4000314903868.75107.4916.524.704.773.29
    4000415420867.6796.2717.364.164.053.58
    4000515979361.13119.8814.934.154.163.49
    4000616018551.15122.0913.634.294.373.45
    4000714917456.97168.0615.414.604.773.43
    4000813772170.80142.1316.915.385.393.64
    4000914136064.75144.0919.124.634.763.44
    4001013891258.08103.8713.675.665.603.77
    4001115619271.6594.7315.984.574.483.66
    4001215148558.5898.8915.455.084.923.94
    4001314854278.0073.1416.064.654.703.34
    4001414728465.98109.0618.574.864.803.68
    4001515229573.3288.7119.264.264.313.42
    4001615007360.65137.2617.335.145.143.69
    4001714565776.27134.7418.674.724.603.82
    4001815227467.92146.2320.684.644.713.51
    4001916199886.2861.9521.884.274.093.95
    4002013917868.3394.8017.105.075.083.43
    4002114425169.20119.8019.675.205.153.57
    4002215420264.55150.5018.844.824.973.12
    4002315480270.13120.7121.314.394.493.33
    4002415561778.33108.9716.724.884.833.42
    4002514649757.23141.6819.744.814.883.47
    4002614354472.0887.5118.885.035.013.55
    4002716461467.3085.1019.324.444.493.56
    4002815271473.1281.9118.154.844.913.49
    4002915196170.8583.4220.604.434.463.48
    4003014975176.0593.2920.815.115.013.74
    4003116229596.4596.7325.744.263.964.04
    4003216244264.57171.3525.314.254.143.93
    4003315659688.9272.2522.774.434.303.92
    4003414837659.43174.6319.785.255.323.63
    4003514420675.6779.3527.194.804.763.76
    4003613898755.15153.9418.634.794.693.85
    4003713508366.1085.2921.964.854.634.11
    4003815393460.40106.9122.094.955.013.59
    4003914813766.30138.7624.024.564.523.76
    4004015844384.60141.0228.124.554.533.62
    4004115371987.7870.3826.954.694.543.91
    4004215086973.0787.6521.314.704.673.77
    4004315642570.20107.3423.594.244.313.60
    4004415687877.2299.4824.994.664.643.74
    4004515513376.70106.9129.124.564.463.80
    4004615225080.03101.7226.174.614.473.94
    4004714987471.4799.9719.184.985.063.68
    4004815401563.02121.9826.894.364.343.75
    4004915833580.30119.7733.144.104.113.47
    4005015979888.50116.1331.674.274.363.16
    4005116007990.17119.4636.324.204.073.25
    4005215529081.90131.8429.524.514.533.55
    4005315898080.47101.2730.614.144.073.67
    4005415842178.83121.4729.544.574.523.52
    4005515775067.22112.0126.514.154.253.66
    4005615701686.9596.1226.144.384.273.80
    4005715179984.08119.9827.604.264.153.83
    4005815431672.90153.4325.864.284.203.87
    4005915708289.6073.1927.084.404.373.60
    4006015302259.10137.9426.124.534.693.38
    4006115104371.2590.2123.264.394.533.42
    4006215948788.62113.6822.094.564.324.05
    40063163188100.53115.7226.634.324.363.45
    4006413670163.62120.4333.633.673.693.82
    4006513606270.9079.5823.054.784.633.88
    4006614883557.35146.3827.344.194.413.17
    4006715232578.15131.8023.844.344.303.67
    4006815542098.5858.6924.764.444.383.54
    4006914880383.63102.9625.184.714.803.46
    4007014666675.55138.4527.434.724.723.76
    4007116135869.85118.5819.734.104.143.60
    4007216375680.7093.8022.764.304.433.27
    40073169752100.7597.7927.234.444.313.45
    40074153606105.5790.4827.164.764.673.36
    4007515584099.6765.3422.834.754.593.70
    4007616058077.55129.9826.624.534.573.35
    4007713354372.98149.5419.365.165.432.96
    4007816387966.4787.0628.074.034.053.61
    40079160311100.6276.5224.024.634.453.70
    4008014755299.77122.1526.944.324.403.18
    4008117572867.38147.0120.253.894.073.24
    40082159621100.6073.8624.944.544.543.50
    4008315172383.7297.1320.834.604.673.38
    4008414929175.22114.4622.024.834.873.48
    4008515864086.8286.5519.364.264.103.60
    4008614995373.95111.4319.775.065.073.66
    4008715998299.40103.1416.814.604.523.48
    4008818405558.55114.9024.893.463.523.55
    4008915997273.67104.2417.244.254.083.85
    40090164917101.95109.4418.704.844.693.52
    4009115245371.45112.0718.894.414.373.50
    4009215521980.2093.5217.644.494.483.60
    4009316390087.6570.6816.484.304.133.60
    4009414535582.1082.8615.254.454.343.59
    4009513811283.02123.4917.944.704.653.55
    4009615465457.23169.7713.994.484.593.47
    4009716770980.4087.1318.504.093.983.77
    4009816017569.58131.3020.003.953.893.57
    4009916539873.87121.9322.003.913.973.17
    4010015699878.80133.6623.094.114.143.33
    +

    Lane 5

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    5000113965872.90128.7421.853.753.673.28
    5000214096466.42101.9221.293.733.583.77
    5000313908568.25102.4217.784.294.223.33
    5000413835269.20110.3021.413.833.713.75
    5000513944474.35106.8619.884.113.983.56
    5000613962159.90104.1318.863.673.513.93
    5000713904864.80103.7419.503.723.483.69
    5000814116064.6798.7618.083.593.413.78
    5000913924562.2394.0918.473.893.543.97
    5001013821157.3594.4218.353.833.713.57
    5001113697558.2793.2218.294.044.043.34
    5001213599257.3586.0917.514.114.003.85
    5001314093053.18140.4818.623.973.853.73
    5001414031363.6593.0517.483.753.453.85
    5001514181574.3881.4517.773.923.733.70
    5001613820677.85109.3419.303.823.593.65
    5001714066187.75105.4118.564.574.383.42
    5001813913364.8377.8221.374.054.043.54
    5001913891977.03122.3622.404.043.923.62
    5002013670767.1298.3219.014.604.513.41
    5002114028459.67110.9818.314.134.043.82
    5002213588754.6097.7116.943.723.653.73
    5002313606556.40125.9819.934.124.223.49
    5002413779362.90131.2820.664.124.023.70
    5002513434962.10115.3423.384.073.963.62
    5002613880254.20131.5520.524.054.113.44
    5002713419156.70167.2021.614.644.703.55
    5002813559657.73151.9325.574.544.543.61
    5002913249378.58126.4422.195.034.833.77
    5003013257064.17157.6524.174.674.524.17
    5003113786484.25111.9031.204.314.153.87
    5003213647154.62150.3028.334.674.673.68
    5003313306582.12107.4022.894.724.364.29
    5003413771366.05126.5726.094.334.213.91
    5003513015559.4083.1621.504.694.873.50
    5003613578386.4590.6621.284.704.503.77
    5003713164167.3080.7922.364.294.223.68
    5003813072864.1598.9920.055.024.933.91
    5003913209060.8399.7519.364.954.814.01
    5004013421151.10136.9414.114.844.714.03
    5004113310293.6266.5721.274.604.433.70
    5004213292572.62104.2324.154.124.033.89
    5004313260276.58129.0222.134.714.493.87
    5004413141366.65108.9325.854.474.343.85
    5004512992582.50115.2421.634.654.384.19
    5004613168761.55108.5723.724.244.353.43
    5004712892876.20134.0226.024.514.283.99
    5004812933882.2398.2428.084.534.433.53
    5004912928059.00149.6228.804.324.183.96
    5005012749171.47121.5530.584.264.133.81
    5005113049377.25106.0230.244.354.363.34
    5005213085262.63157.5228.304.454.463.44
    5005312999373.77114.5024.844.844.803.68
    5005413207172.70113.5530.554.384.313.65
    5005513165273.97109.7027.144.724.643.69
    5005613159073.28113.9926.474.244.173.62
    5005713227879.7791.1026.244.494.443.56
    5005813200668.95114.6821.474.624.593.93
    5005913109660.10146.5524.154.324.303.87
    5006013835959.00169.9224.174.294.343.45
    5006113047264.83124.8418.734.324.173.86
    5006213211064.70105.4121.424.454.304.10
    5006313692666.60115.2423.774.113.963.84
    5006413294880.10102.6826.123.643.523.83
    5006513263080.0290.8224.464.504.543.53
    5006613482764.57103.6019.555.255.163.94
    5006713193065.08105.9219.204.875.013.46
    5006813397588.0088.3220.494.404.403.39
    5006913232256.48119.6524.404.374.443.43
    5007013182664.92142.3625.014.184.113.85
    5007113185956.25163.0723.434.714.683.57
    5007213271378.30124.2722.725.165.213.22
    5007313512597.5398.7727.414.584.473.51
    5007413331978.48169.3232.464.224.293.25
    5007513577883.6598.8328.144.504.662.93
    5007613310955.02125.8530.993.683.613.76
    5007713162765.42140.7323.914.564.732.95
    5007814066594.4381.7324.253.913.933.36
    5007913271084.3383.9023.844.033.903.52
    5008013472880.50145.9330.494.084.043.45
    5008114369274.45113.3627.334.024.063.57
    5008213609282.20149.8228.864.354.303.50
    5008313543078.88141.5826.774.834.793.37
    5008413800070.45145.5624.054.524.463.36
    5008513747461.35168.9926.203.803.833.54
    5008613360765.4799.5822.233.733.653.71
    5008713537070.42147.6420.614.043.993.39
    5008813608277.0291.0721.903.623.633.55
    5008913618771.58110.2025.594.013.903.72
    5009013499459.78195.9825.624.134.243.15
    5009113483578.85101.3318.804.174.143.48
    5009214059968.4898.8020.403.533.583.39
    5009314221971.75133.3821.964.094.123.30
    5009414220575.38135.0623.883.613.603.47
    5009513711073.45102.1123.224.144.113.57
    5009614043183.15105.9525.344.284.333.12
    5009714049780.83123.3526.313.753.653.45
    5009814248677.9296.8226.413.703.683.50
    5009914581785.30110.9327.923.853.933.16
    5010013960474.98135.9824.923.713.723.21
    +

    Lane 6

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    6000115365379.58136.0023.863.573.463.57
    6000216073956.15162.7817.733.213.133.67
    6000314713263.32127.7917.493.753.653.56
    6000415160265.30109.9518.043.533.423.54
    6000515695961.57109.8720.253.163.083.64
    6000614699575.43108.2519.923.683.573.55
    6000714761984.72103.1620.123.923.643.62
    6000815786873.75139.2226.323.443.303.66
    6000915500372.53126.2723.623.373.253.32
    6001014801568.85141.5420.083.683.513.62
    6001115394371.40106.0619.503.473.383.37
    6001214885881.2286.4319.513.363.253.65
    6001314423474.6090.4217.023.593.463.55
    6001414300669.7091.3217.263.783.623.63
    6001516633052.00154.4720.502.772.703.69
    6001614463258.68215.2524.253.593.464.03
    6001713624575.1298.8022.864.314.153.81
    6001814542869.75143.5121.983.683.613.65
    6001913691467.10160.8023.124.204.153.46
    6002014817166.60162.0521.473.733.504.06
    6002113491473.75128.8516.653.903.614.09
    6002213865182.2076.0917.874.143.833.75
    6002314942362.85105.4920.753.753.593.61
    6002414345183.30104.1720.224.244.103.46
    6002514754072.5091.8319.863.743.693.28
    6002614115472.90100.7920.734.064.063.25
    6002716078268.42127.9122.543.523.503.22
    6002814813470.67121.0123.533.483.373.61
    6002915446569.50109.9326.043.613.693.26
    6003015176769.12187.7022.593.964.023.28
    6003116414791.0096.9230.613.142.903.97
    6003215327565.38174.2328.833.633.603.56
    6003315372298.0889.1227.303.923.563.76
    6003415219376.95102.4027.203.903.793.54
    6003515346374.17107.9930.033.523.353.75
    6003615089565.65166.8332.033.853.713.55
    6003714574562.45116.4926.603.773.633.81
    6003815128464.00181.2526.093.813.743.52
    6003915142458.40199.4026.273.493.393.83
    6004015764975.05119.0532.593.052.893.62
    6004115106260.13208.9426.713.523.443.68
    6004214721362.43124.9925.033.793.653.87
    6004315312368.88123.4125.473.553.373.77
    6004415009977.20115.0329.383.733.573.46
    6004515295288.53108.3932.193.433.213.53
    6004615098182.27104.9830.683.823.673.54
    6004715350582.67139.4032.673.893.713.62
    6004815383490.45119.0233.133.873.623.64
    6004915222283.83121.3832.214.083.873.47
    6005015179870.42130.8832.013.893.773.43
    6005115443095.3391.4032.173.613.443.52
    6005215787386.33102.0030.043.523.323.76
    6005315248974.80127.6132.863.803.743.43
    6005415440274.40132.0231.733.963.843.72
    6005515191275.0396.4333.813.973.863.42
    6005615546076.15111.4635.003.873.813.45
    6005715411477.83138.1936.393.703.573.66
    6005815636979.62125.8727.394.043.873.57
    6005915615179.38106.2432.373.293.123.89
    6006015510984.15108.5332.093.383.154.09
    6006114235171.72140.5429.123.923.813.84
    6006214914084.9095.4727.323.503.393.86
    6006315984866.10158.8925.373.463.304.01
    6006415935263.03119.2826.664.043.993.69
    6006516244194.40128.8730.653.663.563.35
    6006614179861.55109.6322.383.943.993.42
    6006716068686.90121.6327.803.623.403.98
    6006815253967.95125.4633.743.603.443.78
    6006915525387.2578.6527.313.283.123.79
    6007015808278.6092.1428.453.353.263.74
    6007114864172.60160.7428.053.783.803.25
    6007215182174.60146.1527.264.194.263.19
    60073159347103.45121.8233.033.823.793.19
    6007415259868.88190.0932.134.184.143.25
    6007515801788.73137.8136.733.733.633.36
    60076168580109.37119.4334.663.253.013.49
    60077161724110.4078.6036.103.533.443.10
    6007816831599.8882.2534.853.293.163.28
    6007916048195.53124.5531.563.733.643.13
    6008016399278.00120.3233.983.243.203.01
    60081153658100.70101.4935.123.703.593.04
    60082171593126.27113.3238.703.153.033.29
    60083162775119.7065.2936.563.443.313.08
    6008413324867.38133.4031.503.893.783.26
    60085152741108.30119.3932.763.353.153.27
    6008613827068.20127.9727.143.833.853.13
    6008716477171.25147.4429.573.163.192.98
    6008813112177.50134.5227.093.873.913.14
    6008915055889.0081.3525.533.633.573.25
    6009016218089.12102.9225.094.063.883.31
    60091161673101.30108.9630.883.203.033.33
    6009215882293.17117.1232.933.693.652.97
    60093167698100.77111.5135.193.483.402.87
    6009416911188.40117.0230.233.323.243.01
    60095177848103.12117.8936.783.333.263.07
    6009616613395.85110.6732.003.533.463.08
    6009717802383.72145.6331.053.193.152.88
    6009818035884.95151.6829.703.083.072.89
    6009917056893.78102.4534.363.463.412.97
    6010015970494.20119.9629.873.443.363.07
    +

    Lane 7

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    7000116739769.27175.6033.310.900.612.48
    7000216045574.47139.7829.550.880.592.91
    7000316141282.17112.0828.640.850.553.00
    70004163624101.3584.0726.100.870.533.76
    7000516402192.78113.1826.890.960.653.07
    7000615680675.62117.1627.590.860.602.80
    7000716203873.40114.4427.610.860.602.45
    70008165511107.40105.2635.620.860.562.85
    7000916328287.62129.2430.110.810.573.20
    7001015699268.35119.0922.630.810.532.96
    7001115055272.62118.7325.240.870.602.60
    7001216014068.35105.5622.160.720.473.23
    7001314930285.85100.9321.860.790.503.30
    7001414374084.3087.3723.580.820.572.92
    7001513942758.95157.8024.120.920.633.22
    7001613617772.90146.6026.280.930.632.91
    7001713223769.00151.4525.131.100.792.68
    7001814286966.30171.0023.940.990.752.93
    7001914245669.05165.8631.970.970.632.91
    7002015502960.87100.8228.640.750.523.65
    7002115478362.23154.2824.080.730.543.34
    7002214006054.48172.1020.641.050.712.94
    7002314834771.9595.5529.020.830.582.65
    7002414892960.38114.8726.970.800.592.99
    7002515095067.45106.3025.610.900.633.29
    7002615044867.12151.3226.180.830.603.44
    7002715734380.08155.8926.530.820.583.90
    7002815491976.90169.7027.220.900.623.35
    7002915591677.33153.2829.530.860.603.52
    7003015332774.3398.7929.260.830.623.44
    70031167377101.38100.9626.850.890.593.32
    7003215860080.6586.7333.060.930.623.56
    7003315607071.68169.0633.610.940.652.99
    7003414799159.85160.9034.161.110.723.15
    7003514545476.00130.0031.631.020.682.89
    7003616460069.8889.9134.740.700.443.41
    7003715089273.33136.9931.620.790.543.98
    7003815874071.50126.5031.620.740.583.22
    7003915957976.23117.7434.900.820.563.78
    7004015300973.10124.4226.481.010.703.34
    7004116312978.7269.5835.200.660.433.48
    7004215665287.90121.4229.830.790.533.56
    7004315675783.33126.7936.070.940.633.16
    7004415982281.50114.7235.180.880.592.99
    7004515776196.48106.9234.860.980.623.11
    7004616355082.83155.0636.161.030.642.59
    70047161738108.3583.7135.480.970.603.02
    7004815751789.12125.8338.040.970.623.51
    7004915626884.37120.3935.930.930.633.00
    7005016198592.23115.9438.210.940.613.05
    7005116423692.08118.2737.060.960.592.93
    7005216203287.82125.7038.330.920.573.20
    7005316198592.42106.9838.000.910.573.14
    7005415650778.40109.3833.590.910.632.56
    7005516099199.3590.0931.610.750.543.26
    7005616282594.60105.8435.810.910.603.07
    7005716313595.25103.1536.030.880.552.93
    7005815969385.17115.1228.950.790.573.45
    7005915405277.88137.1737.450.880.583.13
    7006016119889.47108.3032.870.790.553.51
    7006115678276.92114.4032.300.810.573.58
    7006215997080.27126.2829.280.870.603.59
    7006316069686.85105.0128.670.740.543.37
    7006416108289.83128.1931.120.780.552.98
    70065173381111.25122.3625.050.840.573.98
    7006616008593.4086.2427.820.820.602.96
    7006716030180.0898.6326.390.830.613.07
    70068152691101.3091.1929.110.880.593.66
    7006914832570.78150.8331.020.880.633.20
    7007016212585.8592.0530.620.680.483.06
    7007116596268.72103.5632.700.660.463.51
    7007216653593.93124.5734.010.800.523.13
    70073173264106.55126.5631.720.790.503.45
    70074168644101.98125.5529.960.760.543.06
    7007515619971.35151.2331.660.890.593.52
    7007616559470.38157.1931.080.930.633.48
    7007717607293.1273.8034.360.800.503.20
    70078174206115.9296.7434.010.800.503.53
    7007916306592.30111.2435.110.860.612.78
    7008017236759.48116.8634.711.000.643.07
    70081160853106.10116.7133.530.930.602.77
    7008217080967.78153.1930.680.800.542.83
    7008317125069.68153.5028.970.820.552.95
    7008417252390.85116.6229.600.890.572.94
    7008517208397.45107.5231.450.800.522.91
    7008615887990.08108.6025.960.950.633.05
    70087169708104.50114.4530.170.770.503.53
    7008814945168.62147.8328.240.780.493.16
    7008917337086.58119.5526.950.910.632.82
    7009016805985.48160.1634.640.900.573.04
    7009117001889.40120.4732.400.810.522.71
    7009216793377.98104.1730.920.870.612.71
    7009316930567.32132.2730.500.900.632.64
    7009416420975.47143.1929.490.890.612.61
    7009517623976.33160.8338.910.830.562.70
    70096170534109.0082.6132.381.000.632.80
    70097174053113.3590.0513.000.490.352.45
    70098175159110.0898.0929.740.980.652.79
    70099179218100.78130.6927.530.940.642.59
    7010015550088.42121.5431.600.950.632.33
    +

    Lane 8

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane TileClusters (raw)Av 1st Cycle Int (PF)Av % intensity after 20 cycles (PF)% PF Clusters % Align (PF) Av Alignment Score (PF)% Error Rate (PF)
    80001181113113.7271.640.8142.0737.022.90
    80002167526104.8359.462.2136.2531.473.80
    8000316213195.0871.291.9938.3532.693.67
    80004174931122.2056.302.1839.6429.883.55
    80005188914111.8873.432.7140.5535.063.68
    8000617657495.4375.373.0043.1737.843.46
    80007177213109.3259.392.4338.5633.273.59
    8000817914899.5293.773.1648.0244.062.89
    80009184260102.1294.663.0246.6142.163.10
    8001016172188.9565.912.6142.6438.683.11
    80011181269102.8558.072.5735.9730.414.29
    80012182179105.4764.922.9644.4340.583.19
    8001316705898.9772.422.9543.8238.673.18
    80014181070103.4569.432.9944.7239.393.40
    80015192314128.2252.412.7840.5033.403.57
    80016198144120.3056.902.9244.3937.083.62
    80017202972130.8559.513.1247.2940.243.47
    80018201819134.9052.712.8043.9137.453.57
    80019209515140.4747.412.8446.1639.353.52
    80020176152111.7260.062.6040.8733.933.67
    80021176568110.1568.322.5944.6239.063.50
    80022169602108.7575.792.8748.0843.732.98
    80023189324102.4883.462.8847.0641.363.52
    80024171625116.5860.632.9847.6142.473.45
    8002516209297.65114.413.2348.3239.953.10
    80026166734106.85113.293.3550.8146.103.18
    80027188771135.7089.903.3651.3645.203.37
    80028208995143.7874.373.4550.6644.313.37
    80029189249102.97101.463.5050.7046.133.16
    80030168319113.25101.503.3952.5247.693.17
    80031207260148.2865.943.5553.1647.513.22
    80032214286117.7799.773.0350.9346.693.12
    80033204280138.0564.513.3351.1345.233.30
    80034218862143.2749.693.5551.4645.873.09
    80035212945126.7869.434.0453.3547.423.14
    80036209646122.4079.744.1855.9450.402.95
    80037200910112.3088.693.8454.0749.682.95
    80038208228123.5381.383.9255.2550.762.99
    80039218373131.2081.633.9756.8052.262.90
    80040210877115.6085.623.9654.5449.832.95
    80041205484116.0087.263.7852.5844.034.66
    80042201755105.4094.123.8055.6650.723.11
    80043199044120.7580.353.6852.9947.783.14
    80044194716120.9374.053.6853.3548.073.23
    80045213702129.8066.413.8553.6047.913.10
    80046212117108.7594.673.8558.0253.313.05
    80047222320108.6296.183.6458.3154.052.86
    80048215015129.8570.454.0255.0548.913.28
    80049212941128.0567.363.9553.5847.393.30
    80050207426119.8870.704.2754.2249.053.14
    80051221088120.4880.274.9857.3452.493.09
    80052206850113.8577.054.2854.5050.163.13
    80053218985128.3571.394.4155.4650.363.15
    80054215826125.5080.324.5155.5751.033.05
    80055217025123.7581.944.3855.0650.343.06
    80056221504133.7265.404.1753.4948.733.11
    80057217253125.7859.094.0153.6048.373.17
    80058217739133.2059.313.9053.2346.933.35
    80059206901119.10103.573.5951.5945.563.32
    80060209276133.2575.553.9153.2348.593.14
    80061206912116.4280.823.8753.3748.653.08
    80062208293130.9376.973.9953.8349.453.02
    80063216117140.8568.024.0453.9648.523.11
    80064221626138.3868.514.0753.9948.413.20
    80065226495119.1790.854.2656.1352.262.98
    80066211217122.7577.134.0753.1348.263.22
    80067207572117.9785.403.9053.4848.973.05
    80068212274130.4777.973.7850.9245.663.15
    80069225642151.3267.643.4347.9941.143.46
    80070206267130.8544.733.0445.2039.723.49
    80071197697116.0055.882.8744.8640.683.36
    80072201670131.1554.333.1049.6345.223.23
    80073199697141.8552.343.2150.8147.202.95
    80074220224146.2266.883.1050.2445.853.01
    80075198282128.4072.593.1750.4146.143.12
    80076187773131.5566.253.3050.9645.633.20
    80077186366113.85100.553.1350.3246.193.15
    80078189058133.5791.372.9047.0142.993.19
    80079213947164.3057.353.1548.4944.573.09
    80080221739121.9574.932.7144.1535.063.23
    80081205044132.7549.512.7942.3936.713.48
    80082210695129.4380.633.5649.5044.263.29
    80083217007139.4572.433.4748.6340.363.69
    80084210797144.3570.663.4248.6443.353.29
    80085211477142.2273.723.3847.6436.573.26
    80086200320117.9563.593.0345.1540.323.25
    80087204295136.7557.183.3548.2444.393.04
    80088203778141.1865.793.2146.7942.243.13
    80089209681124.6582.433.2247.8944.072.95
    80090199690129.0569.822.9646.2041.843.11
    80091202793119.6079.183.1347.4142.533.12
    80092188850109.6074.643.1744.8140.443.26
    80093188951119.9577.432.8944.1439.453.29
    80094177924125.0571.752.1937.0529.903.82
    80095175164103.0088.762.7940.6729.893.75
    80096170691109.4074.771.2737.5431.244.00
    800971777150.00N/A0.000.000.000.00
    80098168260121.3864.302.1436.2020.993.23
    80099202629149.7272.232.5141.1334.743.56
    80100162617125.7266.752.5341.0731.793.48
    +
  • IVC Plots

  • + click here +

  • All Intensity Plots
  • + click here +

  • Error Graphs
  • + click here +

  • Error Curves
  • + click here + + diff --git a/trunk/htsworkflow/pipelines/test/testdata/Summary-paired-pipeline110.htm b/trunk/htsworkflow/pipelines/test/testdata/Summary-paired-pipeline110.htm new file mode 100644 index 0000000..5b41c1e --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/Summary-paired-pipeline110.htm @@ -0,0 +1,662 @@ + + + + + +

    080920_HWI-EAS229_0057_30GBJAAXX Summary

    +

    Summary Information For Experiment 080920_HWI-EAS229_0057_30GBJAAXX on Machine unknown

    +



    Chip Summary

    + + + + +
    MachineUNKNOWN
    Run Folder080920_HWI-EAS229_0057_30GBJAAXX
    Chip IDunknown
    +



    Chip Results Summary

    + + + + + + + + + + +
    ClustersClusters (PF)Yield (kbases)
    126151880959234563549167
    +



    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    LaneSample IDSample TargetSample TypeLengthFilterNum TilesTiles
    1unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 1
    2unknownhg18ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 2
    3unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 3
    4unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 4
    5unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 5
    6unknownhg18ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 6
    7unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 7
    8unknownmm9ELAND_PAIR37, 37'((CHASTITY>=0.6))'100Lane 8
    +



    Lane Results Summary : Read 1

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    1277083103646 +/- 451574887 +/- 6080290 +/- 1799.34 +/- 3.5272.22 +/- 4.6389.19 +/- 0.5914.16 +/- 0.630.94 +/- 0.17
    2289563106678 +/- 465278260 +/- 2539294 +/- 1698.23 +/- 2.6673.43 +/- 2.5287.05 +/- 0.6416.81 +/- 0.550.92 +/- 0.17
    325924284583 +/- 596370065 +/- 4194284 +/- 1899.82 +/- 3.0582.90 +/- 1.3289.49 +/- 0.2018.13 +/- 0.660.81 +/- 0.13
    421054968813 +/- 478256905 +/- 4145300 +/- 29102.00 +/- 14.7482.91 +/- 5.8956.93 +/- 0.8225.85 +/- 2.300.95 +/- 0.30
    5295555104854 +/- 466479879 +/- 6270281 +/- 1998.26 +/- 5.8576.34 +/- 6.6757.71 +/- 0.3026.16 +/- 1.680.97 +/- 0.19
    614040143555 +/- 163237946 +/- 2140233 +/- 16105.74 +/- 8.4087.14 +/- 3.8789.08 +/- 1.0033.53 +/- 2.181.05 +/- 0.21
    715421754265 +/- 158841680 +/- 5319224 +/- 18111.33 +/- 8.9076.94 +/- 10.5284.50 +/- 1.4127.44 +/- 2.331.32 +/- 0.25
    814796964363 +/- 269739991 +/- 6785248 +/- 43109.93 +/- 7.8062.45 +/- 12.0582.20 +/- 2.0824.63 +/- 2.531.57 +/- 0.22
    Tile mean across chip
    Av.7884459952269103.0876.7979.5223.341.06
    +



    Lane Results Summary : Read 2

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    1277083103647 +/- 451674887 +/- 6080277 +/- 1794.42 +/- 5.6872.22 +/- 4.6381.54 +/- 2.1342.70 +/- 5.490.89 +/- 0.27
    2289563106679 +/- 465378260 +/- 2539259 +/- 1393.57 +/- 2.5573.43 +/- 2.5282.05 +/- 0.3743.98 +/- 3.020.76 +/- 0.15
    325924284584 +/- 596470065 +/- 4194252 +/- 1294.23 +/- 2.1982.90 +/- 1.3284.94 +/- 0.2851.76 +/- 2.290.59 +/- 0.07
    421054968814 +/- 478356905 +/- 4145226 +/- 1696.82 +/- 7.1282.91 +/- 5.8956.01 +/- 0.9927.86 +/- 3.480.95 +/- 0.33
    5295555104855 +/- 466579879 +/- 6270200 +/- 24103.56 +/- 15.4576.34 +/- 6.6756.76 +/- 0.4125.68 +/- 2.060.98 +/- 0.17
    614040143556 +/- 163337946 +/- 2140179 +/- 10100.82 +/- 5.4787.14 +/- 3.8788.64 +/- 1.4234.05 +/- 2.600.98 +/- 0.22
    715421754266 +/- 158941680 +/- 5319184 +/- 5103.42 +/- 3.4776.94 +/- 10.5283.90 +/- 1.3227.60 +/- 2.071.26 +/- 0.16
    814796964364 +/- 269839991 +/- 6785206 +/- 3199.48 +/- 3.2362.45 +/- 12.0579.81 +/- 3.3523.06 +/- 2.501.56 +/- 0.23
    Tile mean across chip
    Av.788445995222398.2976.7976.7034.591.00
    +



    Expanded Lane Summary : Read 1

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    11036460.86000.49001.377481372.22266 +/- 17-0.53 +/- 0.37-0.42 +/- 0.2189.190.9464718
    21066780.86000.49001.347484273.43284 +/- 160.08 +/- 0.43-0.17 +/- 0.3487.050.9265850
    3845830.86000.49001.096549382.90286 +/- 140.29 +/- 0.48-0.02 +/- 0.1789.490.8160899
    4688130.86000.49001.193369782.91286 +/- 23-0.01 +/- 0.62-0.37 +/- 0.3056.930.9531080
    51048540.86000.49001.325007576.34258 +/- 25-0.03 +/- 0.46-0.49 +/- 0.2757.710.9744149
    6435550.86000.49001.243439987.14231 +/- 14-0.19 +/- 0.46-0.34 +/- 0.4089.081.0532302
    7542650.86000.49001.673818876.94224 +/- 14-0.41 +/- 0.49-0.55 +/- 0.2384.501.3233435
    8643630.86000.49002.153807762.45247 +/- 42-0.52 +/- 0.36-0.29 +/- 0.1982.201.5731036
    +



    Expanded Lane Summary : Read 2

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    11036460.79000.46001.246887072.22254 +/- 15-0.53 +/- 0.37-0.42 +/- 0.2181.540.8959272
    21066780.79000.46001.117198073.43247 +/- 120.08 +/- 0.43-0.17 +/- 0.3482.050.7662240
    3845830.79000.46000.806350082.90243 +/- 80.29 +/- 0.48-0.02 +/- 0.1784.940.5958029
    4688130.79000.46001.123353482.91210 +/- 19-0.01 +/- 0.62-0.37 +/- 0.3056.010.9530548
    51048540.79000.46001.244995176.34193 +/- 12-0.03 +/- 0.46-0.49 +/- 0.2756.760.9843366
    6435550.79000.46001.123475187.14174 +/- 7-0.19 +/- 0.46-0.34 +/- 0.4088.640.9832208
    7542650.79000.46001.553841876.94178 +/- 4-0.41 +/- 0.49-0.55 +/- 0.2383.901.2633240
    8643630.79000.46002.073696862.45198 +/- 32-0.52 +/- 0.36-0.29 +/- 0.1979.811.5630181
    + + \ No newline at end of file diff --git a/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline100.htm b/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline100.htm new file mode 100644 index 0000000..1b82467 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline100.htm @@ -0,0 +1,598 @@ + + + + + +

    080627_HWI-EAS229_0036_3055HAXX Summary

    +

    Summary Information For Experiment 080627_HWI-EAS229_0036_3055HAXX on Machine HWI-EAS229

    +



    Chip Summary

    + + + + +
    MachineHWI-EAS229
    Run Folder080627_HWI-EAS229_0036_3055HAXX
    Chip IDunknown
    +



    Chip Results Summary

    + + + + + + + + + + +
    ClustersClusters (PF)Yield (kbases)
    80933224435778031133022
    +



    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    LaneSample IDSample TargetSample TypeLengthFilterNum TilesTiles
    1unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 1
    2unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 2
    3unknownmm9ELAND26'((CHASTITY>=0.6))'100Lane 3
    4unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 4
    5unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 5
    6unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 6
    7unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 7
    8unknownelegans170ELAND26'((CHASTITY>=0.6))'100Lane 8
    +



    Lane Results Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    115804696483 +/- 907460787 +/- 4240329 +/- 35101.88 +/- 6.0363.21 +/- 3.2970.33 +/- 0.249054.08 +/- 59.160.46 +/- 0.18
    2156564133738 +/- 793860217 +/- 1926444 +/- 3992.62 +/- 7.5845.20 +/- 3.3151.98 +/- 0.746692.04 +/- 92.490.46 +/- 0.09
    3185818152142 +/- 1000271468 +/- 2827366 +/- 3691.53 +/- 8.6647.19 +/- 3.8082.24 +/- 0.4410598.68 +/- 64.130.41 +/- 0.04
    43495315784 +/- 216213443 +/- 1728328 +/- 4097.53 +/- 9.8785.29 +/- 1.9180.02 +/- 0.5310368.82 +/- 71.080.15 +/- 0.05
    5167936119735 +/- 846564590 +/- 2529417 +/- 3788.69 +/- 14.7954.10 +/- 2.5976.95 +/- 0.329936.47 +/- 65.750.28 +/- 0.02
    6173463152177 +/- 814666716 +/- 2493372 +/- 3987.06 +/- 9.8643.98 +/- 3.1278.80 +/- 0.4310162.28 +/- 49.650.38 +/- 0.03
    714928784649 +/- 732557418 +/- 3617295 +/- 2889.40 +/- 8.2367.97 +/- 1.8233.38 +/- 0.254247.92 +/- 32.371.00 +/- 0.03
    810695354622 +/- 481241136 +/- 3309284 +/- 3790.21 +/- 9.1075.39 +/- 2.2748.33 +/- 0.296169.21 +/- 169.500.86 +/- 1.22
    Tile mean across chip
    Av.1011665447235492.3660.2965.258403.690.50
    +



    Expanded Lane Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    1964830.77000.31001.004967663.21317 +/- 320.13 +/- 0.44-1.14 +/- 0.3470.330.4641758
    21337380.77000.31001.224046745.20415 +/- 330.29 +/- 0.40-0.79 +/- 0.3551.980.4630615
    31521420.77000.31001.307858847.19344 +/- 260.68 +/- 0.51-0.77 +/- 0.4282.240.4157552
    4157840.77000.31000.291109585.29306 +/- 340.20 +/- 0.69-1.28 +/- 0.6680.020.1510671
    51197350.77000.31000.856033554.10380 +/- 320.34 +/- 0.49-1.55 +/- 4.6976.950.2849015
    61521770.77000.31001.217090543.98333 +/- 270.57 +/- 0.50-0.91 +/- 0.3978.800.3851663
    7846490.77000.31001.382106967.97272 +/- 201.15 +/- 0.52-0.84 +/- 0.5833.381.0018265
    8546220.77000.31001.172133575.39262 +/- 311.10 +/- 0.59-1.01 +/- 0.4748.330.8619104
    +

    IVC Plots
    +

    IVC.htm +

    +

    All Intensity Plots
    +

    All.htm +

    +

    Error graphs:
    +

    Error.htm +

    +Back to top +



    Lane 1

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    10001114972326.4894.3957.4470.29038.60.44
    +Back to top +



    Lane 2

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    20001147793448.1283.6838.5753.76905.40.54
    +Back to top +



    Lane 3

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    30001167904374.0586.9140.3681.310465.00.47
    +Back to top +



    Lane 4

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    4000120308276.8592.8784.2680.410413.80.16
    +Back to top +



    Lane 5

    + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    +Back to top +



    Lane 6

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    60001166844348.1277.5938.1379.710264.40.44
    +Back to top +



    Lane 7

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    7000198913269.9086.6664.5533.24217.51.02
    +Back to top +



    Lane 8

    + + + + + + + + + + + + + + + + + + + + + + + +
    Lane Tile Clusters (raw)Av 1st Cycle Int (PF) Av % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Av Alignment Score (PF) % Error Rate (PF)
    8000164972243.6089.4073.1748.36182.80.71
    +Back to top + + diff --git a/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline110.htm b/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline110.htm new file mode 100644 index 0000000..854bda6 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/Summary-pipeline110.htm @@ -0,0 +1,400 @@ + + + + + +

    081017_HWI-EAS229_0062_30J55AAXX Summary

    +

    Summary Information For Experiment 081017_HWI-EAS229_0062_30J55AAXX on Machine HWI-EAS229

    +



    Chip Summary

    + + + + +
    MachineHWI-EAS229
    Run Folder081017_HWI-EAS229_0062_30J55AAXX
    Chip IDunknown
    +



    Chip Results Summary

    + + + + + + + + + + +
    ClustersClusters (PF)Yield (kbases)
    162491175996221593686019
    +



    Lane Parameter Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    LaneSample IDSample TargetSample TypeLengthFilterChast. Thresh.Num TilesTiles
    1unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 1
    2unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 2
    3unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 3
    4unknownhg18ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 4
    5unknownhg18ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 5
    6unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 6
    7unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 7
    8unknownmm9ELAND37'((FAILED_CHASTITY<=1))'0.6100Lane 8
    +



    Lane Results Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoTile Mean +/- SD for Lane
    Lane Lane Yield (kbases) Clusters (raw)Clusters (PF) 1st Cycle Int (PF) % intensity after 20 cycles (PF) % PF Clusters % Align (PF) Alignment Score (PF) % Error Rate (PF)
    1435340190220 +/- 15118117659 +/- 8144273 +/- 1680.02 +/- 2.5262.15 +/- 5.5477.18 +/- 0.2213447.28 +/- 43.352.78 +/- 0.13
    2462364190560 +/- 14399124963 +/- 5687271 +/- 1675.73 +/- 2.4665.83 +/- 4.1270.06 +/- 0.3912082.95 +/- 64.813.22 +/- 0.09
    3468929187597 +/- 12369126737 +/- 5549274 +/- 1672.61 +/- 2.6767.69 +/- 2.7274.03 +/- 0.2212470.18 +/- 50.024.27 +/- 0.08
    4491642204142 +/- 16877132876 +/- 4023253 +/- 1680.43 +/- 3.1065.39 +/- 3.8472.95 +/- 0.1513273.80 +/- 39.750.78 +/- 0.10
    5433033247308 +/- 11600117036 +/- 4489273 +/- 1168.60 +/- 2.4047.48 +/- 3.6366.91 +/- 0.5411700.08 +/- 66.332.62 +/- 0.13
    6483012204298 +/- 15640130543 +/- 6972254 +/- 1181.35 +/- 1.9664.14 +/- 4.4077.28 +/- 0.1114084.01 +/- 23.090.71 +/- 0.03
    7474325202707 +/- 15404128196 +/- 9745255 +/- 1379.95 +/- 2.0863.48 +/- 5.6375.78 +/- 0.1813758.74 +/- 60.860.88 +/- 0.12
    8437372198075 +/- 14702118208 +/- 14798259 +/- 1481.80 +/- 2.5359.85 +/- 7.6774.55 +/- 0.3613586.07 +/- 103.970.71 +/- 0.15
    Tile mean across chip
    Av.20311312452726477.5662.0073.5913050.392.00
    +



    Expanded Lane Summary

    + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Lane InfoPhasing InfoRaw Data (tile mean)Filtered Data (tile mean)
    Lane Clusters (tile mean) (raw)% Phasing % Prephasing % Error Rate (raw) Equiv Perfect Clusters (raw) % retained Cycle 2-4 Av Int (PF) Cycle 2-10 Av % Loss (PF) Cycle 10-20 Av % Loss (PF) % Align (PF) % Error Rate (PF) Equiv Perfect Clusters (PF)
    11902200.68000.28003.1710726262.15241 +/- 130.56 +/- 0.220.29 +/- 0.1477.182.7886184
    21905600.68000.28003.539867865.83238 +/- 140.78 +/- 0.150.53 +/- 0.1570.063.2283090
    31875970.68000.28004.4410400867.69233 +/- 140.56 +/- 0.170.59 +/- 0.2674.034.2789278
    42041420.68000.28001.3811576565.39239 +/- 141.28 +/- 0.210.77 +/- 0.2172.950.7893475
    52473080.68000.28003.4010300647.48242 +/- 101.61 +/- 0.391.21 +/- 0.2166.912.6273768
    62042980.68000.28001.3312223364.14242 +/- 121.30 +/- 0.110.73 +/- 0.2277.280.7197646
    72027070.68000.28001.5111751363.48238 +/- 131.27 +/- 0.380.66 +/- 0.2275.780.8893659
    81980750.68000.28001.4111111559.85244 +/- 121.19 +/- 0.160.65 +/- 0.2974.550.7185327
    + + \ No newline at end of file diff --git a/trunk/htsworkflow/pipelines/test/testdata/bustard-config132.xml b/trunk/htsworkflow/pipelines/test/testdata/bustard-config132.xml new file mode 100644 index 0000000..2820451 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/bustard-config132.xml @@ -0,0 +1,129 @@ + + + + + 0.600000 + + 0 + 0 + 2 + 1 + 1 + 37 + 1 + + + A + C + G + T + 1.270000 + 0.210000 + -0.020000 + -0.030000 + 0.570000 + 0.580000 + -0.010000 + -0.010000 + -0.030000 + -0.040000 + 1.510000 + -0.020000 + -0.020000 + -0.020000 + 0.800000 + 1.070000 + + + 1 + 3 + 1 + 0 + 1 + 37 + 1 + + + none + 25 + none + none + gzip + failed-chastity + le + 1.000000 + + + + + 1 + 1 + gzip + .gz + 0 + 0 + 0 + + 1 + 37 + /zfs1/wold-lab/diane/sequencer/090307_HWI-EAS229_0097_30U0BAAXX + + HWI-EAS229 + 0 + 1 + -1 + -1 + + + 1 + 37 + /zfs1/wold-lab/diane/sequencer/090307_HWI-EAS229_0097_30U0BAAXX + + /zfs1/wold-lab/diane/sequencer/090307_HWI-EAS229_0097_30U0BAAXX + 090307 + 97 + + + + + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + s + + + + + + + diff --git a/trunk/htsworkflow/pipelines/test/testdata/gerald_config_0.2.6.xml b/trunk/htsworkflow/pipelines/test/testdata/gerald_config_0.2.6.xml new file mode 100644 index 0000000..c0753ad --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/gerald_config_0.2.6.xml @@ -0,0 +1,82 @@ + + + default + + + + + Need_to_specify_ELAND_genome_directory + 8 + + domain.com + diane + localhost:25 + /home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane + /home/diane/gec + 1 + /home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald/../../Genomes + Need_to_specify_genome_file_name + genome + /home/diane/gec/080416_HWI-EAS229_0024_207BTAAXX/Data/C1-33_Firecrest1.8.28_19-04-2008_diane/Bustard1.8.28_19-04-2008_diane/GERALD_19-04-2008_diane + + _prb.txt + 12 + '((CHASTITY>=0.6))' + _qhg.txt + --symbolic + 32 + --scarf + _seq.txt + _sig2.txt + _sig.txt + @(#) Id: GERALD.pl,v 1.68.2.2 2007/06/13 11:08:49 km Exp + s_[1-8]_[0-9][0-9][0-9][0-9] + s + Sat Apr 19 19:08:30 2008 + /home/diane/proj/SolexaPipeline-0.2.2.6/Goat/../Gerald + all + http://host.domain.com/yourshare/ + + + + eland + eland + eland + eland + eland + eland + eland + eland + + + /g/dm3 + /g/equcab1 + /g/equcab1 + /g/canfam2 + /g/hg18 + /g/hg18 + /g/hg18 + /g/hg18 + + + 32 + 32 + 32 + 32 + 32 + 32 + 32 + 32 + + + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + + + diff --git a/trunk/htsworkflow/pipelines/test/testdata/gerald_config_1.0.xml b/trunk/htsworkflow/pipelines/test/testdata/gerald_config_1.0.xml new file mode 100644 index 0000000..7ced7c6 --- /dev/null +++ b/trunk/htsworkflow/pipelines/test/testdata/gerald_config_1.0.xml @@ -0,0 +1,156 @@ + + + none + + + + + /home/diane/proj/GAPipeline-1.0/Goat/../Gerald/../bin + + + + Need_to_specify_ELAND_genome_directory + + 8 + + + domain.com + diane + localhost:25 + /home/diane/gec/090220_HWI-EAS229_0093_30VR0AAXX/Data/C1-37_Firecrest1.9.5_22-02-2009_diane/Bustard1.9.5_22-02-2009_diane + /home/diane/gec + 1 + /home/diane/proj/GAPipeline-1.0/Goat/../Gerald/../../Genomes + Need_to_specify_genome_file_name + + genome + genome + genome + + 2 + 37 + /home/diane/gec/090220_HWI-EAS229_0093_30VR0AAXX/Data/C1-37_Firecrest1.9.5_22-02-2009_diane/Bustard1.9.5_22-02-2009_diane/GERALD_22-02-2009_diane + + + _prb.txt + 12 + + + + '((CHASTITY>=0.6))' + _qhg.txt + + + + 0 + 0 + 0 + --symbolic + 37 + 37 + 0 + --scarf + _seq.txt + _sig2.txt + _sig.txt + chastity + ge + 0.6 + @(#) Id: GERALD.pl,v 1.171 2008/05/19 17:36:14 mzerara Exp + + + 0.6 + _traces.srf + -P + + -R + s_[1-8]_[0-9][0-9][0-9][0-9] + s + Sun Feb 22 21:15:59 2009 + /home/diane/proj/GAPipeline-1.0/Goat/../Gerald + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + http://host.domain.com/yourshare/ + + + + eland_extended + eland_extended + eland_extended + eland_extended + eland_extended + eland_extended + eland_extended + eland_extended + + + /g/mm9 + /g/mm9 + /g/elegans190 + /g/arabidopsis01222004 + /g/mm9 + /g/mm9 + /g/mm9 + /g/mm9 + + + s_1$(QTABLE_SUFFIX) + s_2$(QTABLE_SUFFIX) + s_3$(QTABLE_SUFFIX) + s_4$(QTABLE_SUFFIX) + s_5$(QTABLE_SUFFIX) + s_6$(QTABLE_SUFFIX) + s_7$(QTABLE_SUFFIX) + s_8$(QTABLE_SUFFIX) + + + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + + + 37 + 37 + 37 + 37 + 37 + 37 + 37 + 37 + + + + + + + + + + + + + + + + + + + + + + + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY + + + diff --git a/trunk/htsworkflow/util/__init__.py b/trunk/htsworkflow/util/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/trunk/htsworkflow/util/alphanum.py b/trunk/htsworkflow/util/alphanum.py new file mode 100644 index 0000000..5e25606 --- /dev/null +++ b/trunk/htsworkflow/util/alphanum.py @@ -0,0 +1,69 @@ +# +# The Alphanum Algorithm is an improved sorting algorithm for strings +# containing numbers. Instead of sorting numbers in ASCII order like +# a standard sort, this algorithm sorts numbers in numeric order. +# +# The Alphanum Algorithm is discussed at http://www.DaveKoelle.com +# +#* Python implementation provided by Chris Hulan (chris.hulan@gmail.com) +#* Distributed under same license as original +# +# This library is free software; you can redistribute it and/or +# modify it under the terms of the GNU Lesser General Public +# License as published by the Free Software Foundation; either +# version 2.1 of the License, or any later version. +# +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this library; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA +# + +import re +import types + +# +# TODO: Make decimal points be considered in the same class as digits +# + +def chunkify(str): + """ + return a list of numbers and non-numeric substrings of +str+ + the numeric substrings are converted to integer, non-numeric are left as is + """ + if type(str) in types.StringTypes: + chunks = re.findall("(\d+|\D+)",str) + #convert numeric strings to numbers + chunks = [re.match('\d',x) and int(x) or x for x in chunks] + return chunks + elif type(str) in [types.IntType, types.LongType, types.FloatType]: + return [str] + else: + raise ValueError("Unsupported type %s for input %s" % (type(str), str)) + +def alphanum(a,b): + """ + breaks +a+ and +b+ into pieces and returns left-to-right comparison of the pieces + + +a+ and +b+ are expected to be strings (for example file names) with numbers and non-numeric characters + Split the values into list of numbers and non numeric sub-strings and so comparison of numbers gives + Numeric sorting, comparison of non-numeric gives Lexicographic order + """ + # split strings into chunks + aChunks = chunkify(a) + bChunks = chunkify(b) + + return cmp(aChunks,bChunks) #built in comparison works once data is prepared + + + +if __name__ == "__main__": + unsorted = ["1000X Radonius Maximus","10X Radonius","200X Radonius","20X Radonius","20X Radonius Prime","30X Radonius","40X Radonius","Allegia 50 Clasteron","Allegia 500 Clasteron","Allegia 51 Clasteron","Allegia 51B Clasteron","Allegia 52 Clasteron","Allegia 60 Clasteron","Alpha 100","Alpha 2","Alpha 200","Alpha 2A","Alpha 2A-8000","Alpha 2A-900","Callisto Morphamax","Callisto Morphamax 500","Callisto Morphamax 5000","Callisto Morphamax 600","Callisto Morphamax 700","Callisto Morphamax 7000","Callisto Morphamax 7000 SE","Callisto Morphamax 7000 SE2","QRS-60 Intrinsia Machine","QRS-60F Intrinsia Machine","QRS-62 Intrinsia Machine","QRS-62F Intrinsia Machine","Xiph Xlater 10000","Xiph Xlater 2000","Xiph Xlater 300","Xiph Xlater 40","Xiph Xlater 5","Xiph Xlater 50","Xiph Xlater 500","Xiph Xlater 5000","Xiph Xlater 58"] + sorted = unsorted[:] + sorted.sort(alphanum) + print '+++++Sorted...++++' + print '\n'.join(sorted) diff --git a/trunk/htsworkflow/util/ethelp.py b/trunk/htsworkflow/util/ethelp.py new file mode 100644 index 0000000..19f6c9f --- /dev/null +++ b/trunk/htsworkflow/util/ethelp.py @@ -0,0 +1,32 @@ +""" +ElementTree helper functions +""" +def indent(elem, level=0): + """ + reformat an element tree to be 'pretty' (indented) + """ + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + for child in elem: + indent(child, level+1) + # we don't want the closing tag indented too far + child.tail = i + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + +def flatten(elem, include_tail=0): + """ + Extract the text from an element tree + (AKA extract the text that not part of XML tags) + """ + text = elem.text or "" + for e in elem: + text += flatten(e, 1) + if include_tail and elem.tail: text += elem.tail + return text + diff --git a/trunk/htsworkflow/util/fctracker.py b/trunk/htsworkflow/util/fctracker.py new file mode 100644 index 0000000..5ba3389 --- /dev/null +++ b/trunk/htsworkflow/util/fctracker.py @@ -0,0 +1,201 @@ +""" +Provide some quick and dirty access and reporting for the fctracker database. + +The advantage to this code is that it doesn't depend on django being +installed, so it can run on machines other than the webserver. +""" +import datetime +import os +import re +import sys +import time + +if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5: + # we're python 2.5 + import sqlite3 +else: + import pysqlite2.dbapi2 as sqlite3 + + +class fctracker: + """ + provide a simple way to interact with the flowcell data in fctracker.db + """ + def __init__(self, database): + # default to the current directory + if database is None: + self.database = self._guess_fctracker_path() + else: + self.database = database + self.conn = sqlite3.connect(self.database) + self._get_library() + self._get_species() + + def _guess_fctracker_path(self): + """ + Guess a few obvious places for the database + """ + fctracker = 'fctracker.db' + name = fctracker + # is it in the current dir? + if os.path.exists(name): + return name + name = os.path.expanduser(os.path.join('~', fctracker)) + if os.path.exists(name): + return name + raise RuntimeError("Can't find fctracker") + + def _make_dict_from_table(self, table_name, pkey_name): + """ + Convert a django table into a dictionary indexed by the primary key. + Yes, it really does just load everything into memory, hopefully + we stay under a few tens of thousands of runs for a while. + """ + table = {} + c = self.conn.cursor() + c.execute('select * from %s;' % (table_name)) + # extract just the field name + description = [ f[0] for f in c.description] + for row in c: + row_dict = dict(zip(description, row)) + table[row_dict[pkey_name]] = row_dict + c.close() + return table + + def _add_lanes_to_libraries(self): + """ + add flowcell/lane ids to new attribute 'lanes' in the library dictionary + """ + library_id_re = re.compile('lane_\d_library_id') + + for fc_id, fc in self.flowcells.items(): + lane_library = [ (x[0][5], x[1]) for x in fc.items() + if library_id_re.match(x[0]) ] + for lane, library_id in lane_library: + if not self.library[library_id].has_key('lanes'): + self.library[library_id]['lanes'] = [] + self.library[library_id]['lanes'].append((fc_id, lane)) + + def _get_library(self): + """ + attach the library dictionary to the instance + """ + self.library = self._make_dict_from_table( + 'samples_library', + 'id') + + + def _get_species(self): + """ + attach the species dictionary to the instance + """ + self.species = self._make_dict_from_table( + 'samples_species', + 'id' + ) + + def _get_flowcells(self, where=None): + """ + attach the flowcell dictionary to the instance + + where is a sql where clause. (eg "where run_date > '2008-1-1'") + that can be used to limit what flowcells we select + FIXME: please add sanitization code + """ + if where is None: + where = "" + self.flowcells = {} + c = self.conn.cursor() + c.execute('select * from experiments_flowcell %s;' % (where)) + # extract just the field name + description = [ f[0] for f in c.description ] + for row in c: + row_dict = dict(zip(description, row)) + fcid, status = self._parse_flowcell_id(row_dict) + row_dict['flowcell_id'] = fcid + row_dict['flowcell_status'] = status + + for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]: + lane_library = self.library[row_dict[lane+"_id"]] + species_id = lane_library['library_species_id'] + lane_library['library_species'] = self.species[species_id] + row_dict[lane] = lane_library + # some useful parsing + run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S') + run_date = datetime.datetime(*run_date[:6]) + row_dict['run_date'] = run_date + self.flowcells[row_dict['flowcell_id']] = row_dict + + self._add_lanes_to_libraries() + return self.flowcells + + def _parse_flowcell_id(self, flowcell_row): + """ + Return flowcell id and status + + We stored the status information in the flowcell id name. + this was dumb, but database schemas are hard to update. + """ + fields = flowcell_row['flowcell_id'].split() + fcid = None + status = None + if len(fields) > 0: + fcid = fields[0] + if len(fields) > 1: + status = fields[1] + return fcid, status + + +def flowcell_gone(cell): + """ + Use a variety of heuristics to determine if the flowcell drive + has been deleted. + """ + status = cell['flowcell_status'] + if status is None: + return False + failures = ['failed', 'deleted', 'not run'] + for f in failures: + if re.search(f, status): + return True + else: + return False + +def recoverable_drive_report(flowcells): + """ + Attempt to report what flowcells are still on a hard drive + """ + def format_status(status): + if status is None: + return "" + else: + return status+" " + + # sort flowcells by run date + flowcell_list = [] + for key, cell in flowcells.items(): + flowcell_list.append( (cell['run_date'], key) ) + flowcell_list.sort() + + report = [] + line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) " + line += "%(species)s" + for run_date, flowcell_id in flowcell_list: + cell = flowcells[flowcell_id] + if flowcell_gone(cell): + continue + for l in range(1,9): + lane = 'lane_%d' % (l) + cell_library = cell['%s_library'%(lane)] + fields = { + 'date': cell['run_date'].strftime('%y-%b-%d'), + 'id': cell['flowcell_id'], + 'lane': l, + 'library_name': cell_library['library_name'], + 'library_id': cell['%s_library_id'%(lane)], + 'species': cell_library['library_species']['scientific_name'], + 'status': format_status(cell['flowcell_status']), + } + report.append(line % (fields)) + return os.linesep.join(report) + diff --git a/trunk/htsworkflow/util/hdquery.py b/trunk/htsworkflow/util/hdquery.py new file mode 100644 index 0000000..ff16387 --- /dev/null +++ b/trunk/htsworkflow/util/hdquery.py @@ -0,0 +1,25 @@ +import os + +#try: +import py_sg +#except: +# print 'ERROR: Please install py_sg (easy_install py_sg)' + + +def get_hd_serial_num(device): + """ + device = '/dev/sdX' + + returns hard drive serial number for a device; requires read permissions. + """ + fd = os.open(device, os.O_RDONLY) + + # fd: device object + # \x12: INQUIRY CMD; \x01: EVPD bit set to 1; \x80: Unit Serial Number page + # See http://en.wikipedia.org/wiki/SCSI_Inquiry_Command for helpful chart + # ##: # byte buffer for returned data + data = py_sg.read(fd, "\x12\x01\x80", 32) + + # Remove extra \x00's, and split remaining data into two chunks, + # the 2nd of which is the serial number + return data.strip('\x00').split()[1] diff --git a/trunk/htsworkflow/util/makebed.py b/trunk/htsworkflow/util/makebed.py new file mode 100755 index 0000000..e82968a --- /dev/null +++ b/trunk/htsworkflow/util/makebed.py @@ -0,0 +1,170 @@ +""" +Utility functions to make bedfiles. +""" +import os +import re + +__docformat__ = "restructredtext en" + +# map eland_result.txt sense +sense_map = { 'F': '+', 'R': '-'} +sense_color = { 'F': '0,0,255', 'R': '255,255,0' } + +def create_bed_header(name, description): + """ + Produce the headerline for a bedfile + """ + # provide default track names + if name is None: name = "track" + if description is None: description = "eland result file" + bed_header = 'track name="%s" description="%s" visibility=4 itemRgb="ON"' % (name, description) + bed_header += os.linesep + return bed_header + +def make_bed_from_eland_stream(instream, outstream, name, description, chromosome_prefix='chr'): + """ + read an eland result file from instream and write a bedfile to outstream + + :Parameters: + - `instream`: stream containing the output from eland + - `outstream`: stream to write the bed file too + - `name`: name of bed-file (must be unique) + - `description`: longer description of the bed file + - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern + """ + for line in make_bed_from_eland_generator(instream, name, description, chromosome_prefix): + outstream.write(line) + +def make_bed_from_eland_generator(instream, name, description, chromosome_prefix='chr'): + """ + read an eland result file from instream and write a bedfile to outstream + + :Parameters: + - `instream`: stream containing the output from eland + - `name`: name of bed-file (must be unique) + - `description`: longer description of the bed file + - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern + + :Return: generator which yields lines of bedfile + """ + # indexes into fields in eland_result.txt file + SEQ = 1 + CHR = 6 + START = 7 + SENSE = 8 + + yield create_bed_header(name, description) + prefix_len = len(chromosome_prefix) + + for line in instream: + fields = line.split() + # we need more than the CHR field, and it needs to match a chromosome + if len(fields) <= CHR or fields[CHR][:prefix_len] != chromosome_prefix: + continue + start = fields[START] + stop = int(start) + len(fields[SEQ]) + # strip off filename extension + chromosome = fields[CHR].split('.')[0] + + yield '%s %s %d read 0 %s - - %s%s' % ( + chromosome, + start, + stop, + sense_map[fields[SENSE]], + sense_color[fields[SENSE]], + os.linesep + ) + +def make_bed_from_multi_eland_stream( + instream, + outstream, + name, + description, + chr_prefix='chr', + max_reads=255 + ): + """ + read a multi eland result file from instream and write the bedfile to outstream + + :Parameters: + - `instream`: stream containing the output from eland + - `outstream`: stream to write the bed file too + - `name`: name of bed-file (must be unique) + - `description`: longer description of the bed file + - `chromosome_prefix`: restrict output lines to fasta records that start with this pattern + - `max_reads`: maximum number of reads to write to bed stream + """ + for lane in make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads): + outstream.write(lane) + +def make_bed_from_multi_eland_generator(instream, name, description, chr_prefix, max_reads=255): + loc_pattern = '(?P(?P[0-9]+)(?P[FR])(?P[0-9]+))' + other_pattern = '(?P[^:,]+)' + split_re = re.compile('(%s|%s)' % (loc_pattern, other_pattern)) + + yield create_bed_header(name, description) + for line in instream: + rec = line.split() + if len(rec) > 3: + # colony_id = rec[0] + seq = rec[1] + # number of matches for 0, 1, and 2 mismatches + # m0, m1, m2 = [int(x) for x in rec[2].split(':')] + compressed_reads = rec[3] + cur_chr = "" + reads = {0: [], 1: [], 2:[]} + + for token in split_re.finditer(compressed_reads): + if token.group('chr') is not None: + cur_chr = token.group('chr') + # strip off extension if present + cur_chr = os.path.splitext(cur_chr)[0] + elif token.group('fullloc') is not None: + matches = int(token.group('count')) + # only emit a bed line if + # our current chromosome starts with chromosome pattern + if chr_prefix is None or cur_chr.startswith(chr_prefix): + start = int(token.group('start')) + stop = start + len(seq) + orientation = token.group('dir') + strand = sense_map[orientation] + color = sense_color[orientation] + # build up list of reads for this record + reads[matches].append((cur_chr, start, stop, strand, color)) + + # report up to our max_read threshold reporting the fewer-mismatch + # matches first + reported_reads = 0 + keys = [0,1,2] + for mismatch, read_list in ((k, reads[k]) for k in keys): + reported_reads += len(read_list) + if reported_reads <= max_reads: + for cur_chr, start, stop, strand, color in read_list: + reported_reads += 1 + yield '%s %d %d read 0 %s - - %s%s' % ( + cur_chr, + start, + stop, + sense_map[orientation], + sense_color[orientation], + os.linesep + ) + +def make_description(flowcell_id, lane): + """ + compute a bedfile name and description from the django database + """ + from htsworkflow.frontend.experiments import models as experiments + + lane = int(lane) + if lane < 1 or lane > 8: + raise RuntimeError("flowcells only have lanes 1-8") + + cell = experiments.FlowCell.objects.get(flowcell_id=flowcell_id) + + name = "%s-%s" % (flowcell_id, lane) + + cell_library = getattr(cell, 'lane_%d_library' %(lane,)) + cell_library_id = cell_library.library_id + description = "%s-%s" % (cell_library.library_name, cell_library_id) + return name, description diff --git a/trunk/htsworkflow/util/mount.py b/trunk/htsworkflow/util/mount.py new file mode 100644 index 0000000..bc0c26f --- /dev/null +++ b/trunk/htsworkflow/util/mount.py @@ -0,0 +1,64 @@ +""" +Utilities for working with unix-style mounts. +""" +import os +import subprocess + +def list_mount_points(): + """ + Return list of current mount points + + Note: unix-like OS specific + """ + mount_points = [] + likely_locations = ['/sbin/mount', '/bin/mount'] + for mount in likely_locations: + if os.path.exists(mount): + p = subprocess.Popen(mount, stdout=subprocess.PIPE) + p.wait() + for l in p.stdout.readlines(): + rec = l.split() + device = rec[0] + mount_point = rec[2] + assert rec[1] == 'on' + # looking at the output of mount on linux, osx, and + # sunos, the first 3 elements are always the same + # devicename on path + # everything after that displays the attributes + # of the mount points in wildly differing formats + mount_points.append(mount_point) + return mount_points + else: + raise RuntimeError("Couldn't find a mount executable") + +def is_mounted(point_to_check): + """ + Return true if argument exactly matches a current mount point. + """ + for mount_point in list_mount_points(): + if point_to_check == mount_point: + return True + else: + return False + +def find_mount_point_for(pathname): + """ + Find the deepest mount point pathname is located on + """ + realpath = os.path.realpath(pathname) + mount_points = list_mount_points() + + prefixes = set() + for current_mount in mount_points: + cp = os.path.commonprefix([current_mount, realpath]) + prefixes.add((len(cp), cp)) + + prefixes = list(prefixes) + prefixes.sort() + if len(prefixes) == 0: + return None + else: + # return longest common prefix + return prefixes[-1][1] + + diff --git a/trunk/htsworkflow/util/opener.py b/trunk/htsworkflow/util/opener.py new file mode 100644 index 0000000..035bb24 --- /dev/null +++ b/trunk/htsworkflow/util/opener.py @@ -0,0 +1,57 @@ +""" +Helpful utilities for turning random names/objects into streams. +""" +import os +import gzip +import bz2 +import types +import urllib2 + +def isfilelike(file_ref, mode): + """Does file_ref have the core file operations? + """ + # if mode is w/a check to make sure we writeable ops + # but always check to see if we can read + read_operations = ['read', 'readline', 'readlines'] + write_operations = [ 'write', 'writelines' ] + #random_operations = [ 'seek', 'tell' ] + if mode[0] in ('w', 'a'): + for o in write_operations: + if not hasattr(file_ref, o): + return False + for o in read_operations: + if not hasattr(file_ref, o): + return False + + return True + +def isurllike(file_ref, mode): + """ + does file_ref look like a url? + (AKA does it start with protocol:// ?) + """ + #what if mode is 'w'? + parsed = urllib2.urlparse.urlparse(file_ref) + schema, netloc, path, params, query, fragment = parsed + + return len(schema) > 0 + +def autoopen(file_ref, mode='r'): + """ + Attempt to intelligently turn file_ref into a readable stream + """ + # catch being passed a file + if type(file_ref) is types.FileType: + return file_ref + # does it look like a file? + elif isfilelike(file_ref, mode): + return file_ref + elif isurllike(file_ref, mode): + return urllib2.urlopen(file_ref) + elif os.path.splitext(file_ref)[1] == ".gz": + return gzip.open(file_ref, mode) + elif os.path.splitext(file_ref)[1] == '.bz2': + return bz2.BZ2File(file_ref, mode) + else: + return open(file_ref,mode) + diff --git a/trunk/htsworkflow/util/queuecommands.py b/trunk/htsworkflow/util/queuecommands.py new file mode 100644 index 0000000..23fff16 --- /dev/null +++ b/trunk/htsworkflow/util/queuecommands.py @@ -0,0 +1,99 @@ +""" +Run up to N simultanous jobs from provided of commands +""" + +import logging +import os +from subprocess import PIPE +import subprocess +import select +import sys +import time + +class QueueCommands(object): + """ + Queue up N commands from cmd_list, launching more jobs as the first + finish. + """ + + def __init__(self, cmd_list, N=0, cwd=None, env=None): + """ + cmd_list is a list of elements suitable for subprocess + N is the number of simultanious processes to run. + 0 is all of them. + + WARNING: this will not work on windows + (It depends on being able to pass local file descriptors to the + select call with isn't supported by the Win32 API) + """ + self.to_run = cmd_list[:] + self.running = {} + self.N = N + self.cwd = cwd + self.env = env + + def under_process_limit(self): + """ + are we still under the total number of allowable jobs? + """ + if self.N == 0: + return True + + if len(self.running) < self.N: + return True + + return False + + def start_jobs(self): + """ + Launch jobs until we have the maximum allowable running + (or have run out of jobs) + """ + queue_log = logging.getLogger('queue') + + while (len(self.to_run) > 0) and self.under_process_limit(): + queue_log.info('%d left to run', len(self.to_run)) + cmd = self.to_run.pop(0) + p = subprocess.Popen(cmd, + stdout=PIPE, + shell=True, + cwd=self.cwd, + env=self.env) + self.running[p.stdout] = p + queue_log.info("Created process %d from %s" % (p.pid, str(cmd))) + + def run(self): + """ + run up to N jobs until we run out of jobs + """ + queue_log = logging.getLogger('queue') + queue_log.debug('using %s as cwd' % (self.cwd,)) + + # to_run slowly gets consumed by start_jobs + while len(self.to_run) > 0 or len(self.running) > 0: + # fill any empty spots in our job queue + self.start_jobs() + + # build a list of file descriptors + # fds=file desciptors + fds = [ x.stdout for x in self.running.values()] + + # wait for something to finish + # wl= write list, xl=exception list (not used so get bad names) + read_list, wl, xl = select.select(fds, [], fds, 1 ) + + # for everything that might have finished... + for pending_fd in read_list: + pending = self.running[pending_fd] + # if it really did finish, remove it from running jobs + if pending.poll() is not None: + queue_log.info("Process %d finished [%d]", + pending.pid, pending.returncode) + del self.running[pending_fd] + else: + # It's still running, but there's some output + buffer = pending_fd.readline() + buffer = buffer.strip() + msg = "%d:(%d) %s" %(pending.pid, len(buffer), buffer) + logging.debug(msg) + time.sleep(1) diff --git a/trunk/htsworkflow/util/test/test_alphanum.py b/trunk/htsworkflow/util/test/test_alphanum.py new file mode 100644 index 0000000..bfb2eda --- /dev/null +++ b/trunk/htsworkflow/util/test/test_alphanum.py @@ -0,0 +1,39 @@ +import copy +import os +import unittest + +from htsworkflow.util.alphanum import alphanum + +class testAlphanum(unittest.TestCase): + def test_string(self): + unsorted = ['z5', 'b3', 'b10', 'a001', 'a2'] + sorted = [ 'a001', 'a2', 'b3', 'b10', 'z5'] + scratch = copy.copy(unsorted) + scratch.sort(alphanum) + + for i in xrange(len(scratch)): + self.failIfEqual(scratch[i], unsorted[i]) + for i in xrange(len(scratch)): + self.failUnlessEqual(scratch[i], sorted[i]) + + def test_numbers(self): + unsorted = [5,7,10,18,-1,3] + sorted = [-1,3,5,7,10,18] + scratch = copy.copy(unsorted) + scratch.sort(alphanum) + + for i in xrange(len(scratch)): + self.failIfEqual(scratch[i], unsorted[i]) + for i in xrange(len(scratch)): + self.failUnlessEqual(scratch[i], sorted[i]) + + +def suite(): + return unittest.makeSuite(testAlphanum, 'test') + +if __name__ == "__main__": + unittest.main(defaultTest='suite') + + + + diff --git a/trunk/htsworkflow/util/test/test_ethelp.py b/trunk/htsworkflow/util/test/test_ethelp.py new file mode 100644 index 0000000..63f0ac3 --- /dev/null +++ b/trunk/htsworkflow/util/test/test_ethelp.py @@ -0,0 +1,35 @@ +import os +import unittest + +try: + from xml.etree import ElementTree +except ImportError, e: + from elementtree import ElementTree + +from htsworkflow.util.ethelp import indent, flatten + +class testETHelper(unittest.TestCase): + def setUp(self): + self.foo = 'asdf
    ' + self.foo_tree = ElementTree.fromstring(self.foo) + + def test_indent(self): + flat_foo = ElementTree.tostring(self.foo_tree) + self.failUnlessEqual(len(flat_foo.split('\n')), 1) + + indent(self.foo_tree) + pretty_foo = ElementTree.tostring(self.foo_tree) + self.failUnlessEqual(len(pretty_foo.split('\n')), 5) + + def test_flatten(self): + self.failUnless(flatten(self.foo_tree), 'asdf') + +def suite(): + return unittest.makeSuite(testETHelper, 'test') + +if __name__ == "__main__": + unittest.main(defaultTest='suite') + + + + diff --git a/trunk/htsworkflow/util/test/test_makebed.py b/trunk/htsworkflow/util/test/test_makebed.py new file mode 100644 index 0000000..b5d3026 --- /dev/null +++ b/trunk/htsworkflow/util/test/test_makebed.py @@ -0,0 +1,55 @@ +import os +from StringIO import StringIO +import unittest + +from htsworkflow.util import makebed + +class testMakeBed(unittest.TestCase): + def test_multi_1_0_0_limit_1(self): + instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0') + out = StringIO() + + out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'description', 'mm9_chr', 1)) + self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n') + + def test_multi_1_0_0_limit_255(self): + instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:112:383 TCAAATCTTATGCTANGAATCNCAAATTTTCT 1:0:0 mm9_chr13_random.fa:1240R0') + out = StringIO() + + out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255)) + self.failUnlessEqual(out[1], 'mm9_chr13_random 1240 1272 read 0 - - - 255,255,0\n') + + + def test_multi_2_0_0_limit_1(self): + instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0') + out = StringIO() + + out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1)) + self.failUnlessEqual(len(out), 1) + + def test_multi_2_0_0_limit_255(self): + instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:104:586 GTTCTCGCATAAACTNACTCTNAATAGATTCA 2:0:0 mm9_chr4.fa:42995432F0,mm9_chrX.fa:101541458F0') + out = StringIO() + + out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 255)) + self.failUnlessEqual(len(out), 3) + self.failUnlessEqual(out[1], + 'mm9_chr4 42995432 42995464 read 0 + - - 0,0,255\n') + self.failUnlessEqual(out[2], + 'mm9_chrX 101541458 101541490 read 0 + - - 0,0,255\n') + + def test_multi_0_2_0_limit_1(self): + instream = StringIO('>HWI-EAS229_26_209LVAAXX:7:3:115:495 TCTCCCTGAAAAATANAAGTGNTGTTGGTGAG 0:2:1 mm9_chr14.fa:104434729F2,mm9_chr16.fa:63263818R1,mm9_chr2.fa:52265438R1') + out = StringIO() + + out = list(makebed.make_bed_from_multi_eland_generator(instream, 'name', 'desc', 'mm9_chr', 1)) + print out + self.failUnlessEqual(len(out), 1) + +def suite(): + return unittest.makeSuite(testMakeBed, 'test') + +if __name__ == "__main__": + unittest.main(defaultTest='suite') + + diff --git a/trunk/htsworkflow/util/test/test_queuecommands.py b/trunk/htsworkflow/util/test/test_queuecommands.py new file mode 100644 index 0000000..f4807d7 --- /dev/null +++ b/trunk/htsworkflow/util/test/test_queuecommands.py @@ -0,0 +1,58 @@ +import os +import logging +import time +import unittest + + +from htsworkflow.util.queuecommands import QueueCommands + +class testQueueCommands(unittest.TestCase): + def setUp(self): + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-8s %(message)s') + + + + def test_unlimited_run_slow(self): + """ + Run everything at once + """ + cmds = ['/bin/sleep 0', + '/bin/sleep 1', + '/bin/sleep 2',] + + q = QueueCommands(cmds) + start = time.time() + q.run() + end = time.time()-start + # we should only take the length of the longest sleep + # pity I had to add a 1 second sleep + self.failUnless( end > 2.9 and end < 3.1, + "took %s seconds, exected ~3" % (end,)) + + def test_limited_run_slow(self): + """ + Run a limited number of jobs + """ + cmds = ['/bin/sleep 1', + '/bin/sleep 2', + '/bin/sleep 3',] + + q = QueueCommands(cmds, 2) + + start = time.time() + q.run() + end = time.time()-start + # pity I had to add a 1 second sleep + self.failUnless( end > 5.9 and end < 6.1, + "took %s seconds, expected ~6" % (end,)) + +def suite(): + return unittest.makeSuite(testQueueCommands, 'test') + +if __name__ == "__main__": + unittest.main(defaultTest='suite') + + + + diff --git a/trunk/scripts/configure_pipeline b/trunk/scripts/configure_pipeline new file mode 100644 index 0000000..0251337 --- /dev/null +++ b/trunk/scripts/configure_pipeline @@ -0,0 +1,88 @@ +#!/usr/bin/env python +import os +import sys +import re +from htsworkflow.pipelines.configure_run import * +from htsworkflow.pipelines import retrieve_config as _rc +from htsworkflow.pipelines.run_status import startCmdLineStatusMonitor + +s_fc = re.compile('FC[0-9]+') + +#Turn on built-in command-line parsing. +_rc.DISABLE_CMDLINE = False + +GENOME_DIR = '/data-store01/compbio/genomes/' + + + +def main(args=None): + ci = ConfigInfo() + ci.analysis_dir = os.getcwd() + ci.base_analysis_dir, junk = os.path.split(ci.analysis_dir) + + #FIXME: make a better command line tool + skip_retrieve_config = False + if len(args) == 1: + arg = args[0] + + #If FC##### found + if s_fc.search(arg): + cfg_filepath = os.path.abspath('config32auto.txt') + flowcell = arg + #else, config file provide + else: + cfg_filepath = os.path.abspath(args[0]) + skip_retrieve_config = True + else: + print "usage:\n" \ + " configure_pipeline FC#####\n" \ + " or:\n" \ + " configure_pipeline \n" + return 3 + + genome_dir = GENOME_DIR + + if not skip_retrieve_config: + status_retrieve_cfg = retrieve_config(ci, flowcell, cfg_filepath, genome_dir) + if status_retrieve_cfg: + print "Retrieve config file successful" + else: + print "Failed to retrieve config file" + else: + print "Config file %s provided from command-line" % (cfg_filepath) + ci.config_filepath = cfg_filepath + status_retrieve_cfg = True + + if status_retrieve_cfg: + status = configure(ci) + if status: + print "Configure success" + else: + print "Configure failed" + + print 'Run Dir:', ci.run_path + print 'Bustard Dir:', ci.bustard_path + + if status: + # Setup status cmdline status monitor + startCmdLineStatusMonitor(ci) + + print 'Running pipeline now!' + run_status = run_pipeline(ci) + if run_status is True: + print 'Pipeline ran successfully.' + return 0 + else: + print 'Pipeline run failed.' + return 1 + + return 2 + +if __name__ == "__main__": + logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(levelname)-8s %(message)s', + datefmt='%a, %d %b %Y %H:%M:%S', + #filename='pipeline_main.log', + filemode='w') + + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/copier b/trunk/scripts/copier new file mode 100644 index 0000000..9338b07 --- /dev/null +++ b/trunk/scripts/copier @@ -0,0 +1,6 @@ +#!/usr/bin/env python +import sys +from htsworkflow.automation.copier import main + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/elandseq b/trunk/scripts/elandseq new file mode 100755 index 0000000..6a5178c --- /dev/null +++ b/trunk/scripts/elandseq @@ -0,0 +1,51 @@ +#!/usr/bin/env python +import optparse +import os +import sys + +from htsworkflow.pipelines.eland import extract_eland_sequence + +def make_parser(): + usage = "usage: %prog [options] infile [outfile]" + + parser = optparse.OptionParser(usage) + parser.add_option("-e", "--extract", dest="slice", + default=":", + help="provide a python slice operator to select a portion of an eland file") + return parser + +def main(argv): + parser = make_parser() + + (opt, args) = parser.parse_args(argv) + + if len(args) not in (0, 1, 2): + parser.error('incorrect number of arguments') + + # get our slice coordinates + start, end = opt.slice.split(':') + if len(start) > 0: + start = int(start) + else: + start = None + if len(end) > 0: + end = int(end) + else: + end = None + + # open infile + if len(args) > 0: + instream = open(args[0],'r') + else: + instream = sys.stdin + + if len(args) > 1: + outstream = open(args[1],'w') + else: + outstream = sys.stdout + + extract_eland_sequence(instream, outstream, start, end) + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/scripts/gerald2bed.py b/trunk/scripts/gerald2bed.py new file mode 100644 index 0000000..7a726e7 --- /dev/null +++ b/trunk/scripts/gerald2bed.py @@ -0,0 +1,96 @@ +#!/usr/bin/python +""" +Convert a group of eland_result files from a sequencer run to bed files. +""" +from glob import glob +import logging +import optparse +import sys +import os + +from htsworkflow.util.makebed import make_bed_from_eland_stream, make_description + +def make_bed_for_gerald(eland_dir, output_dir, prefix, database, flowcell): + """ + convert s_[1-8]_eland_result.txt to corresponding bed files + """ + eland_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.txt')) + out_files = glob(os.path.join(eland_dir, 's_[1-8]_eland_result.bed')) + if len(out_files) > 0: + raise RuntimeError("please move old bedfiles") + + logging.info('Processing %s using flowcell id %s' % (eland_dir, flowcell)) + for pathname in eland_files: + path, name = os.path.split(pathname) + lane = int(name[2]) + outname = 's_%d_eland_result.bed' %(lane,) + logging.info('Converting lane %d to %s' % (lane, outname)) + + outpathname = os.path.join(eland_dir, outname) + # look up descriptions + bed_name, description = make_description(database, flowcell, lane) + + # open files + instream = open(pathname,'r') + outstream = open(outpathname,'w') + + make_bed_from_eland_stream( + instream, outstream, name, description, prefix + ) + +def make_parser(): + usage = """%prog: --flowcell directory_name + +directory should contain a set of 8 eland result files named like +s_[12345678]_eland_result.txt""" + + + parser = optparse.OptionParser(usage) + + parser.add_option('-o', '--output', dest='output', + help="destination directory for our bed files" \ + "defaults to eland directory", + default=None) + parser.add_option('--chromosome', dest='prefix', + help='Set the chromosome prefix name. defaults to "chr"', + default='chr') + parser.add_option("--database", dest='database', + help="specify location of fctracker database", + default=None) + parser.add_option("--flowcell", dest='flowcell', + help="specify the flowcell id for this run", + default=None) + parser.add_option('-v', '--verbose', dest='verbose', action='store_true', + help='increase verbosity', + default=False) + return parser + +def main(command_line=None): + logging.basicConfig(level=logging.WARNING) + if command_line is None: + command_line = sys.argv[1:] + + parser = make_parser() + (opts, args) = parser.parse_args(command_line) + + if len(args) != 1: + parser.error('Directory name required') + + eland_dir = args[0] + if not os.path.isdir(eland_dir): + parser.error('%s must be a directory' % (eland_dir,)) + + if opts.flowcell is None: + parser.error('Flowcell ID required') + + if opts.verbose: + logger = logging.getLogger() + logger.setLevel(logging.INFO) + + make_bed_for_gerald(eland_dir, opts.output, opts.prefix, opts.database, opts.flowcell) + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/scripts/library.py b/trunk/scripts/library.py new file mode 100644 index 0000000..35532f4 --- /dev/null +++ b/trunk/scripts/library.py @@ -0,0 +1,39 @@ +""" +Provide some quick and dirty access and reporting for the fctracker database. + +The advantage to this code is that it doesn't depend on django being +installed, so it can run on machines other than the webserver. +""" +from optparse import OptionParser +import sys + +from htsworkflow.util import fctracker + +def make_parser(): + """ + Make parser + """ + parser = OptionParser() + parser.add_option("-d", "--database", dest="database", + help="path to the fctracker.db", + default=None) + parser.add_option("-w", "--where", dest="where", + help="add a where clause", + default=None) + return parser + +def main(argv=None): + if argv is None: + argv = [] + parser = make_parser() + + opt, args = parser.parse_args(argv) + + fc = fctracker.fctracker(opt.database) + cells = fc._get_flowcells(opt.where) + + print fctracker.recoverable_drive_report(cells) + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/make-library-tree b/trunk/scripts/make-library-tree new file mode 100644 index 0000000..50ce7a9 --- /dev/null +++ b/trunk/scripts/make-library-tree @@ -0,0 +1,241 @@ +#!/usr/bin/python +""" +Make a tree of symlinks organized by library id. +""" +from ConfigParser import SafeConfigParser +from glob import glob +import logging +from optparse import OptionParser +import logging +import os +import stat +import sys + +from htsworkflow.util import fctracker + +def find_lanes(flowcell_dir, flowcell_id, lane): + lane_name = "s_%s_eland_*" %(lane) + pattern = os.path.join(flowcell_dir, flowcell_id, "*", lane_name) + lanes = glob(pattern) + return lanes + +def make_long_lane_name(flowcell_dir, lane_pathname): + """ + make a name from the eland result file name + """ + if flowcell_dir == lane_pathname[0:len(flowcell_dir)]: + subpath = lane_pathname[len(flowcell_dir):] + long_name = subpath.replace(os.path.sep, "_") + return long_name + else: + return None + +def parse_srf_directory(srf_dir): + """ + search srf_dir for *.srf files + + builds a dictionary indexed by flowcell name. + """ + flowcells = {} + srfs = glob(os.path.join(srf_dir,'*.srf')) + for pathname in srfs: + path, filename = os.path.split(pathname) + basename, ext = os.path.splitext(filename) + record = basename.split('_') + if len(record) != 6: + logging.error("Unrecognized srf file: %s expected 6 fields got %d" % (pathname,len(record))) + continue + + site = record[0] + date = record[1] + machine = record[2] + runid = record[3] + flowcellid = record[4] + laneid = record[5] + + desc = "_".join([site,date,machine,runid,flowcellid]) + flowcells[flowcellid] = desc + return flowcells + + +def carefully_make_hardlink(source, destination, dry_run=False): + """ + Make a hard link, failing if a different link already exists + + Checking to see if the link already exists and is + the same as the link we want to make. + If the link already exists and is different, throw an error. + """ + logging.debug("CHECKING: %s -> %s", source, destination) + + if not os.path.exists(source): + logging.warning("%s doesn't exist", source) + return + + if os.path.exists(destination): + if os.path.samefile(source, destination): + logging.debug('SAME: %s -> %s' % (source, destination)) + return + else: + raise IOError('%s and %s are different files' % \ + (source, destination)) + logging.info('Linking: %s -> %s' % (source, destination)) + + if dry_run: return + + os.link(source, destination) + os.chmod(destination, + stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH ) + +def link_all_eland_lanes(library_path, flowcell_dir, flowcell_id, lane, dry_run): + """ + find eland files at different alignment lengths + and put each of those in the file + """ + lanes = find_lanes(flowcell_dir, flowcell_id, lane) + for lane_pathname in lanes: + long_name = make_long_lane_name(flowcell_dir, + lane_pathname) + long_pathname = os.path.join(library_path, long_name) + carefully_make_hardlink(lane_pathname, + long_pathname, + dry_run) + +def link_srf_lanes(srf_names, library_path, srf_dir, flowcell_id, lane, dry_run): + """ + Link srf files into our library directories. + + the srf files must be named: + _____.srf + """ + srf_basename = srf_names.get(flowcell_id, None) + if srf_basename is None: + logging.info("srf file for %s was not found", flowcell_id) + else: + srf_filename = "%s_%s.srf" % (srf_basename, lane) + source = os.path.join(srf_dir, srf_filename) + destination = os.path.join(library_path, srf_filename) + carefully_make_hardlink(source, destination, dry_run) + + +def make_library_tree(fcdb, library_dir, flowcell_dir, srfs_dir, + dry_run=False): + """ + Iterate over the library + """ + library_dir = os.path.normpath(library_dir) + os.path.sep + flowcell_dir = os.path.normpath(flowcell_dir) + os.path.sep + srfs_dir = os.path.normpath(srfs_dir) + os.path.sep + + srf_names = parse_srf_directory(srfs_dir) + + for lib_id, lib in fcdb.library.items(): + library_path = os.path.join(library_dir, str(lib_id)) + if not os.path.exists(library_path): + os.mkdir(library_path) + + for flowcell_id, lane in lib.get('lanes', []): + link_all_eland_lanes(library_path, + flowcell_dir, + flowcell_id, + lane, + dry_run) + + link_srf_lanes(srf_names, + library_path, + srfs_dir, + flowcell_id, + lane, + dry_run) + +def make_parser(): + """ + Make parser + """ + parser = OptionParser() + parser.add_option('-c', '--config', default=None, + help='path to a configuration file containing a ' + 'sequence archive section') + + parser.add_option("--database", dest="database", + help="path to the fctracker.db", + default=None) + parser.add_option('-a', '--sequence-archive', default=None, + help='path to where the sequence archive lives') + parser.add_option("-w", "--where", dest="where", + help="add a where clause", + default=None) + + parser.add_option('-v', '--verbose', action='store_true', default=False, + help='be more verbose') + parser.add_option('-d', '--debug', action='store_true', default=False, + help='report everything') + + parser.add_option("--dry-run", dest="dry_run", action="store_true", + default=False, + help="Don't modify the filesystem") + return parser + +def main(argv=None): + FRONTEND_NAME = 'frontend' + SECTION_NAME = 'sequence_archive' + DATABASE_OPT = 'database_name' + ARCHIVE_OPT = 'archive_path' + + if argv is None: + argv = [] + parser = make_parser() + + # parse command line arguments + opt, args = parser.parse_args(argv) + + # setup logging + level = logging.WARN + if opt.verbose: + level = logging.INFO + if opt.debug: + level = logging.DEBUG + logging.basicConfig(level=level) + + # figure out what config file to read + config_path = [os.path.expanduser('~/.htsworkflow.ini'), + '/etc/htsworkflow.ini'] + if opt.config is not None: + config_path = [opt.config] + + # parse options from config file + config_file = SafeConfigParser() + config_file.read(config_path) + + # load defaults from config file if not overriden by the command line + print opt.database + if opt.database is None and \ + config_file.has_option(FRONTEND_NAME, DATABASE_OPT): + opt.database = config_file.get(FRONTEND_NAME, DATABASE_OPT) + + if opt.sequence_archive is None and \ + config_file.has_option(SECTION_NAME, ARCHIVE_OPT): + opt.sequence_archive = config_file.get(SECTION_NAME, ARCHIVE_OPT) + + # complain if critical things are missing + if opt.database is None: + parser.error('Need location of htsworkflow frontend database') + + if opt.sequence_archive is None: + parser.error('Need the root path for the sequence archive') + + fcdb = fctracker.fctracker(opt.database) + cells = fcdb._get_flowcells(opt.where) + + library_dir = os.path.join(opt.sequence_archive, 'libraries') + flowcell_dir = os.path.join(opt.sequence_archive, 'flowcells') + srfs_dir = os.path.join(opt.sequence_archive, 'srfs') + make_library_tree(fcdb, + library_dir, flowcell_dir, srfs_dir, + opt.dry_run) + + return 0 + +if __name__ == "__main__": + rv = main(sys.argv[1:]) + # sys.exit(rv) diff --git a/trunk/scripts/makebed b/trunk/scripts/makebed new file mode 100755 index 0000000..577b868 --- /dev/null +++ b/trunk/scripts/makebed @@ -0,0 +1,113 @@ +#!/usr/bin/python +import optparse +import sys +import os + +from htsworkflow.util.opener import autoopen +from htsworkflow.util.makebed import make_bed_from_eland_stream, make_bed_from_multi_eland_stream, make_description + +def make_parser(): + parser = optparse.OptionParser() + parser.add_option('-e', '--eland', dest='inname', + help='specify input eland filename') + parser.add_option('-b', '--bed', dest='outname', + help='specify output befilename') + parser.add_option('-n', '--name', dest='name', + help='specify the track (short) name.', + default=None) + parser.add_option('-d', '--description', dest='description', + help='specify the track description', + default=None) + parser.add_option('--chromosome', dest='prefix', + help='Set the chromosome prefix name. defaults to "chr"', + default='chr') + parser.add_option("--database", dest='database', + help="specify location of fctracker database", + default=None) + parser.add_option("--flowcell", dest='flowcell', + help="compute name and description from database using flowcell id", + default=None) + parser.add_option("--lane", dest='lane', + help='specify which lane to use when retrieving description from database', + default=None) + + multi = optparse.OptionGroup(parser, 'Multi-read ELAND support') + + multi.add_option('-m', '--multi', action='store_true', + help='Enable parsing multi-read eland files', + default=False) + multi.add_option('--reads', type='int', + help='limit reporting multi reads to this many reads' + '(most usefully --reads=1 will turn a multi-read ' + 'file into a single read file)', + default=255) + parser.add_option_group(multi) + + return parser + +def main(command_line=None): + instream = None + outstream = None + + if command_line is None: + command_line = sys.argv[1:] + + parser = make_parser() + (options, args) = parser.parse_args(command_line) + + if options.inname is None: + parser.error("Need eland input file name") + return 1 + + if options.inname == '-': + instream = sys.stdin + elif os.path.exists(options.inname): + instream = autoopen(options.inname, 'r') + else: + parser.error('%s was not found' % (options.inname)) + return 1 + + # figure out name for output file + if options.outname is None: + # if outname wasn't defined, and we're reading from stdout + if instream is sys.stdin: + # write to stdout + outstream = sys.stdout + else: + # if there's a name write to name.bed + options.outname = os.path.splitext(options.inname)[0]+'.bed' + print >>sys.stderr, "defaulting to outputname", options.outname + elif options.outname == '-': + outstream = sys.stdout + + if outstream is None: + if os.path.exists(options.outname): + parser.error("not overwriting %s" % (options.outname)) + return 1 + else: + outstream = open(options.outname, 'w') + + if options.flowcell is not None and options.lane is not None: + # get our name/description out of the database + name, description = make_description( + options.database, options.flowcell, options.lane + ) + else: + name = options.name + description = options.description + + if options.multi: + make_bed_from_multi_eland_stream(instream, outstream, + name, description, + options.prefix, + options.reads) + + else: + make_bed_from_eland_stream(instream, outstream, + name, description, + options.prefix) + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) + diff --git a/trunk/scripts/mark_archived_data b/trunk/scripts/mark_archived_data new file mode 100755 index 0000000..eadafb6 --- /dev/null +++ b/trunk/scripts/mark_archived_data @@ -0,0 +1,102 @@ +#!/usr/bin/env python + +#import os +#os.environ['DJANGO_SETTINGS_MODULE'] = 'htsworkflow.frontend.settings' + +from htsworkflow.util.hdquery import get_hd_serial_num +from htsworkflow.frontend import settings + +#from django.conf import settings +from optparse import OptionParser + +import sys +import urllib2 + + + +def construct_parser(): + """ + """ + parser = OptionParser("usage: %prog -f -d OR\n\t %prog -f -s ") + parser.add_option("-f", "--flowcell", action="store", type="string", dest="flowcell", + help="flowcell being archived") + parser.add_option("-d", "--device", action="store", type="string", dest="device", + help="device flowcell is being archived to") + parser.add_option("-s", "--serial", action="store", type="string", dest="serial", + help="serial num. of archive device") + parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False) + + return parser + + +def update_db(flowcell, serial, debug=False): + """ + Creates link between flowcell and storage device over http + """ + url = settings.LINK_FLOWCELL_STORAGE_DEVICE_URL+'%s/%s/' % (flowcell, serial) + + req = urllib2.Request(url) + try: + response = urllib2.urlopen(req) + except urllib2.URLError, e: + print 'ERROR - HTTP OUTPUT (Return Code: %s); use -v/--verbose for more details.' % (e.code) + if debug: + print e.read() + sys.exit(e.code) + + print "DB Update of %s & %s succeeded" % (flowcell, serial) + print response.read() + + +def process_args(parser): + """ + returns flowcell and serial# + """ + options, args = parser.parse_args() + + msg = [] + + # Only provide device or serial + if options.device is not None and options.serial is not None: + print "ERROR: Please only provide --device or --serial.\n" \ + " The serial number is extracted automatically if the device is provided." + sys.exit(2) + + print 'Flowcell:', options.flowcell + print ' Device:', options.device + print ' Serial:', options.serial + + if options.flowcell is None: + msg.append(" --flowcell required") + + # if device and serial missing: + if options.device is None and options.serial is None: + msg.append(" --device OR --serial required") + + if len(msg) > 0: + print '\n'.join(msg) + sys.exit(3) + + # Update db records + if options.device is not None: + serial = get_hd_serial_num(options.device) + update_db(flowcell=options.flowcell, serial=serial, debug=options.verbose) + elif options.serial is not None: + update_db(flowcell=options.flowcell, serial=options.serial, debug=options.verbose) + else: + msg ="FATAL should not happen error occured; i.e. the best kind!" + raise ValueError, msg + + + +def main(): + """ + """ + parser = construct_parser() + process_args(parser) + + #print "Database Updated." + sys.exit(0) + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/trunk/scripts/rerun_eland.py b/trunk/scripts/rerun_eland.py new file mode 100644 index 0000000..af06cdd --- /dev/null +++ b/trunk/scripts/rerun_eland.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python + +import logging +from optparse import OptionParser +import os +import subprocess +import sys + +from htsworkflow.pipelines import gerald +from htsworkflow.pipelines.eland import extract_eland_sequence +from htsworkflow.pipelines import runfolder + +def make_query_filename(eland_obj, output_dir): + query_name = '%s_%s_eland_query.txt' + query_name %= (eland_obj.sample_name, eland_obj.lane_id) + + query_pathname = os.path.join(output_dir, query_name) + + if os.path.exists(query_pathname): + logging.warn("overwriting %s" % (query_pathname,)) + + return query_pathname + +def make_result_filename(eland_obj, output_dir): + result_name = '%s_%s_eland_result.txt' + result_name %= (eland_obj.sample_name, eland_obj.lane_id) + + result_pathname = os.path.join(output_dir, result_name) + + if os.path.exists(result_pathname): + logging.warn("overwriting %s" % (result_pathname,)) + + return result_pathname + +def extract_sequence(inpathname, query_pathname, length, dry_run=False): + logging.info('extracting %d bases' %(length,)) + logging.info('extracting from %s' %(inpathname,)) + logging.info('extracting to %s' %(query_pathname,)) + + if not dry_run: + try: + instream = open(inpathname, 'r') + outstream = open(query_pathname, 'w') + extract_eland_sequence(instream, outstream, 0, length) + finally: + outstream.close() + instream.close() + +def run_eland(length, query_name, genome, result_name, multi=False, dry_run=False): + cmdline = ['eland_%d' % (length,), query_name, genome, result_name] + if multi: + cmdline += ['--multi'] + + logging.info('running eland: ' + " ".join(cmdline)) + if not dry_run: + return subprocess.Popen(cmdline) + else: + return None + + +def rerun(gerald_dir, output_dir, length=25, dry_run=False): + """ + look for eland files in gerald_dir and write a subset to output_dir + """ + logging.info("Extracting %d bp from files in %s" % (length, gerald_dir)) + g = gerald.gerald(gerald_dir) + + # this will only work if we're only missing the last dir in output_dir + if not os.path.exists(output_dir): + logging.info("Making %s" %(output_dir,)) + if not dry_run: os.mkdir(output_dir) + + processes = [] + for lane_id, lane_param in g.lanes.items(): + eland = g.eland_results[lane_id] + + inpathname = eland.pathname + query_pathname = make_query_filename(eland, output_dir) + result_pathname = make_result_filename(eland, output_dir) + + extract_sequence(inpathname, query_pathname, length, dry_run=dry_run) + + p = run_eland(length, + query_pathname, + lane_param.eland_genome, + result_pathname, + dry_run=dry_run) + if p is not None: + processes.append(p) + + for p in processes: + p.wait() + +def make_parser(): + usage = '%prog: [options] runfolder' + + parser = OptionParser(usage) + + parser.add_option('--gerald', + help='specify location of GERALD directory', + default=None) + parser.add_option('-o', '--output', + help='specify output location of files', + default=None) + parser.add_option('-l', '--read-length', type='int', + help='specify new eland length', + dest='length', + default=25) + parser.add_option('--dry-run', action='store_true', + help='only pretend to run', + default=False) + parser.add_option('-v', '--verbose', action='store_true', + help='increase verbosity', + default=False) + + return parser + + +def main(cmdline=None): + logging.basicConfig(level=logging.WARNING) + + parser = make_parser() + opts, args = parser.parse_args(cmdline) + + if opts.length < 16 or opts.length > 32: + parser.error("eland can only process reads in the range 16-32") + + if len(args) > 1: + parser.error("Can only process one runfolder directory") + elif len(args) == 1: + runs = runfolder.get_runs(args[0]) + if len(runs) != 1: + parser.error("Not a runfolder") + opts.gerald = runs[0].gerald.pathname + if opts.output is None: + opts.output = os.path.join( + runs[0].pathname, + 'Data', + # pythons 0..n ==> elands 1..n+1 + 'C1-%d' % (opts.length+1,) + ) + + elif opts.gerald is None: + parser.error("need gerald directory") + + if opts.output is None: + parser.error("specify location for the new eland files") + + if opts.verbose: + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + + rerun(opts.gerald, opts.output, opts.length, dry_run=opts.dry_run) + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/retrieve_config b/trunk/scripts/retrieve_config new file mode 100644 index 0000000..3fab6ad --- /dev/null +++ b/trunk/scripts/retrieve_config @@ -0,0 +1,52 @@ +#!/usr/bin/env python +import sys +from htsworkflow.pipelines.retrieve_config import * +from htsworkflow.pipelines import retrieve_config +from htsworkflow.pipelines.genome_mapper import getAvailableGenomes +from htsworkflow.pipelines.genome_mapper import constructMapperDict + +#Turn on built-in command-line parsing. +retrieve_config.DISABLE_CMDLINE = False + +def main(args=None): + #Display help if no args are presented + if len(sys.argv) == 1: + sys.argv.append('-h') + + options = getCombinedOptions() + msg_list = ['ERROR MESSAGES:'] + if options.output_filepath is None: + msg_list.append(" Output filepath argument required. -o or --output=") + + if options.flowcell is None: + msg_list.append(" Flow cell argument required. -f or --flowcell=") + + if options.url is None: + msg_list.append(" URL argument required (-u or --url=), or entry\n" \ + " in /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf") + if options.genome_dir is None: + msg_list.append(" genome_dir argument required (-g or \n" \ + " --genome_dir=, or entry in \n" \ + " /etc/ga_frontend/ga_frontend.conf or ~/.ga_frontend.conf") + + if len(msg_list) > 1: + print '\n'.join(msg_list) + return 1 + + saveConfigFile(options.flowcell, options.url, options.output_filepath) + + f = open(options.output_filepath, 'r') + data = f.read() + f.close() + + genome_dict = getAvailableGenomes(options.genome_dir) + mapper_dict = constructMapperDict(genome_dict) + + f = open(options.output_filepath, 'w') + f.write(data % (mapper_dict)) + f.close() + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/runfolder b/trunk/scripts/runfolder new file mode 100644 index 0000000..1380fbf --- /dev/null +++ b/trunk/scripts/runfolder @@ -0,0 +1,138 @@ +#!/usr/bin/env python +""" +Runfolder.py can generate a xml file capturing all the 'interesting' parameters from a finished pipeline run. (using the -a option). The information currently being captured includes: + + * Flowcell ID + * run dates + * start/stop cycle numbers + * Firecrest, bustard, gerald version numbers + * Eland analysis types, and everything in the eland configuration file. + * cluster numbers and other values from the Summary.htm + LaneSpecificParameters table. + * How many reads mapped to a genome from an eland file + +The ELAND "mapped reads" counter will also check for eland squashed file +that were symlinked from another directory. This is so I can track how +many reads landed on the genome of interest and on the spike ins. + +Basically my subdirectories something like: + +genomes/hg18 +genomes/hg18/chr*.2bpb <- files for hg18 genome +genomes/hg18/chr*.vld +genomes/hg18/VATG.fa.2bp <- symlink to genomes/spikeins +genomes/spikein + +runfolder.py can also spit out a simple summary report (-s option) +that contains the per lane post filter cluster numbers and the mapped +read counts. (The report isn't currently very pretty) +""" +from glob import glob +import logging +import optparse +import os +import sys + +from htsworkflow.pipelines import runfolder +from htsworkflow.pipelines.runfolder import ElementTree + +def make_parser(): + usage = 'usage: %prog [options] runfolder_root_dir' + parser = optparse.OptionParser(usage) + + parser.add_option('-v', '--verbose', dest='verbose', action='store_true', + default=False, + help='turn on verbose mode') + parser.add_option('--dry-run', action='store_true', default=False, + help="Don't delete anything (in clean mode)") + + commands = optparse.OptionGroup(parser, 'Commands') + + commands.add_option('-s', '--summary', dest='summary', action='store_true', + default=False, + help='produce summary report') + commands.add_option('-a', '--archive', dest='archive', action='store_true', + default=False, + help='generate run configuration archive') + commands.add_option('--extract-results', action='store_true', + default=False, + help='create run-xml summary, compress the eland result files, and ' + 'copy them and the Summary.htm file into archival directory.') + commands.add_option('-c', '--clean', action='store_true', default=False, + help='Clean runfolder, preparing it for long-term storage') + parser.add_option_group(commands) + + parser.add_option('-o', '--output-dir', default=None, + help="specify the default output directory for extract results") + + parser.add_option('-u', '--use-run', dest='use_run', default=None, + help='Specify which run to use instead of autoscanning ' + 'the runfolder. You do this by providing the final ' + ' GERALD directory, and it assumes the parent ' + 'directories are the bustard and image processing ' + 'directories.') + + parser.add_option('--run-xml', dest='run_xml', + default=None, + help='specify a run_.xml file for summary reports') + + + return parser + +def main(cmdlist=None): + parser = make_parser() + opt, args = parser.parse_args(cmdlist) + + logging.basicConfig() + if opt.verbose: + root_log = logging.getLogger() + root_log.setLevel(logging.INFO) + + logging.info('Starting htsworkflow illumina runfolder processing tool.') + runs = [] + if opt.run_xml: + # handle ~ shortcut + opt.run_xml = os.path.expanduser(opt.run_xml) + tree = ElementTree.parse(opt.run_xml).getroot() + runs.append(runfolder.PipelineRun(xml=tree)) + + # look for manually specified run + if opt.use_run is not None: + specific_run = runfolder.get_specific_run(opt.use_run) + if specific_run is not None: + runs.append(specific_run) + else: + logging.warn("Couldn't find a run in %s" % (opt.use_run,)) + + # scan runfolders for runs + for run_pattern in args: + # expand args on our own if needed + for run_dir in glob(run_pattern): + runs.extend(runfolder.get_runs(run_dir)) + + if len(runs) > 0: + command_run = False + if opt.summary: + print runfolder.summary_report(runs) + command_run = True + if opt.archive: + runfolder.extract_run_parameters(runs) + command_run = True + if opt.extract_results: + runfolder.extract_results(runs, opt.output_dir) + command_run = True + if opt.clean: + runfolder.clean_runs(runs, opt.dry_run) + command_run = True + + if command_run == False: + print "You need to specify a command."+os.linesep + parser.print_help() + else: + print "You need to specify some run folders to process..."+os.linesep + parser.print_help() + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/runner b/trunk/scripts/runner new file mode 100644 index 0000000..560299f --- /dev/null +++ b/trunk/scripts/runner @@ -0,0 +1,6 @@ +#!/usr/bin/env python +import sys +from htsworkflow.automation.runner import main + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/spoolwatcher b/trunk/scripts/spoolwatcher new file mode 100644 index 0000000..b2f833e --- /dev/null +++ b/trunk/scripts/spoolwatcher @@ -0,0 +1,6 @@ +#!/usr/bin/env python +import sys +from htsworkflow.automation.spoolwatcher import main + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/scripts/srf b/trunk/scripts/srf new file mode 100644 index 0000000..e7478a9 --- /dev/null +++ b/trunk/scripts/srf @@ -0,0 +1,183 @@ +#!/usr/bin/python + +from glob import glob +import logging +import optparse +import os +import subprocess +import sys + +from htsworkflow.util import queuecommands +from htsworkflow.pipelines import runfolder + +SOLEXA2SRF = 0 +ILLUMINA2SRF10 = 1 +ILLUMINA2SRF11 = 2 + +def make_commands(run_name, lanes, site_name, destdir, cmdlevel=ILLUMINA2SRF11): + """ + make a subprocess-friendly list of command line arguments to run solexa2srf + generates files like: + woldlab:080514_HWI-EAS229_0029_20768AAXX:8.srf + site run name lane + + run_name - most of the file name (run folder name is a good choice) + lanes - list of integers corresponding to which lanes to process + site_name - name of your "sequencing site" or "Individual" + destdir - where to write all the srf files + """ + cmd_list = [] + for lane in lanes: + name_prefix = '%s_%%l_%%t_' % (run_name,) + destname = '%s_%s_%d.srf' % (site_name, run_name, lane) + destdir = os.path.normpath(destdir) + dest_path = os.path.join(destdir, destname) + seq_pattern = 's_%d_*_seq.txt' % (lane,) + + if cmdlevel == SOLEXA2SRF: + cmd = ['solexa2srf', + '-N', name_prefix, + '-n', '%3x:%3y', + '-o', dest_path, + seq_pattern] + elif cmdlevel == ILLUMINA2SRF10: + cmd = ['illumina2srf', + '-v1.0', + '-o', dest_path, + seq_pattern] + elif cmdlevel == ILLUMINA2SRF11: + seq_pattern = 's_%d_*_qseq.txt' % (lane,) + cmd = ['illumina2srf', + '-o', dest_path, + seq_pattern] + else: + raise ValueError("Unrecognized run level %d" % (cmdlevel,)) + + cmd_list.append(" ".join(cmd)) + return cmd_list + +def pathname_to_run_name(base): + """ + Convert a pathname to a base runfolder name + handle the case with a trailing / + """ + name = "" + while len(name) == 0: + base, name = os.path.split(base) + if len(base) == 0: + return None + return name + +def make_parser(): + usage = '%prog: [options] runfolder -l 1,2,3 [runfolder -l 5,6 ...]' + + parser = optparse.OptionParser(usage) + parser.add_option('--dry-run', action='store_true', + help='print what would be done', + default=False) + + parser.add_option('-d', '--dest-dir', dest='dest_dir', + help='location to write srf files to', + default='.') + parser.add_option('-s', '--site', + help='site name', + default='Individual') + parser.add_option('-l', '--lanes', dest='lanes', action="append", + default=[], + help='comma seperated list of lanes to add to srf' + ) + parser.add_option('-j', '--jobs', default=1, type='int', + help='how many jobs to run simultaneously') + parser.add_option('-r', '--runfolder-version', default=ILLUMINA2SRF11, type='int', + help='Which class of srf file should we attempt to create\n' + '0 = Solexa pipeline 0.2.6 - 0.3\n' + '1 = illumina pipeline 1.0\n' + '2 = illumina pipeline 1.1rc1 and later \n') + + parser.add_option('-v', '--verbose', dest='verbose', + default=False, action='store_true', + help='report more about internals (INFO)') + parser.add_option('--debug', dest='debug', + default=False, action='store_true', + help='report even more about internals (DEBUG)') + + return parser + +def parse_lane_arg(lane_arg): + """ + Convert comma sperated list of lane ids to a list of integers + """ + lanes = [] + for lane in lane_arg.split(','): + try: + lane = int(lane) + if lane < 1 or lane > 8: + parser.error('Lanes must be in range [1..8]') + lanes.append(lane) + except ValueError: + parser.error('Lane selections must be integers') + return lanes + +def main(cmdline=None): + parser = make_parser() + opts, args = parser.parse_args(cmdline) + + if opts.debug: + logging.basicConfig(level=logging.DEBUG) + elif opts.verbose: + logging.basicConfig(level=logging.INFO) + else: + logging.basicConfig(level=logging.WARNING) + + if len(args) == 0: + parser.error('need runfolder arguments') + + # parse lane arguemnts + lanes_list = [] + if len(opts.lanes) == 0: + lanes_list = [[1,2,3,4,5,6,7,8]] * len(args) + elif len(opts.lanes) == len(args): + for lane_arg in opts.lanes: + lanes_list.append(parse_lane_arg(lane_arg)) + else: + parser.error( + "Number of lane arguments must match number of runfolders" + ) + + # build list of commands + cmds = {} + for runfolder_path, lanes in zip(args, lanes_list): + # normalize paths, either relative to home dirs or current dir + runfolder_path = os.path.abspath(runfolder_path) + # the last part of the path should be a runfolder name + name = pathname_to_run_name(runfolder_path) + # so any bustard directories? + runs = runfolder.get_runs(runfolder_path) + # give up if there are anything other than 1 run + if len(runs) > 1: + print 'ERROR: Too many run directories in %s' %(runfolder_path,) + return 1 + elif len(runs) == 1: + bustard_dir = runs[0].bustard.pathname + cmds[bustard_dir] = make_commands(name, lanes, opts.site, opts.dest_dir, opts.runfolder_version) + else: + print "ERROR: Couldn't find a bustard directory in", runfolder_path + return 1 + + if not opts.dry_run: + for cwd, cmd_list in cmds.items(): + curdir = os.getcwd() + os.chdir(cwd) + q = queuecommands.QueueCommands(cmd_list, opts.jobs) + q.run() + os.chdir(curdir) + else: + for cwd, cmd_list in cmds.items(): + print cwd + print cmd_list + print 'jobs: ', opts.jobs + + return 0 + +if __name__ == "__main__": + sys.exit(main(sys.argv[1:])) diff --git a/trunk/setup.py b/trunk/setup.py new file mode 100644 index 0000000..6d3c606 --- /dev/null +++ b/trunk/setup.py @@ -0,0 +1,34 @@ +from setuptools import setup + +setup( + name="htsworkflow", + description="some bots and other utilities to help deal with data from an illumina sequencer", + author="Diane Trout & Brandon King", + author_email="diane@caltech.edu", + packages=["htsworkflow", + "htsworkflow.pipelines", + "htsworkflow.frontend", + "htsworkflow.frontend.analysis", + "htsworkflow.frontend.eland_config", + "htsworkflow.frontend.experiments", + "htsworkflow.frontend.inventory", + "htsworkflow.frontend.reports", + "htsworkflow.frontend.samples", + "htsworkflow.automation", + "htsworkflow.util" + ], + scripts=[ + 'scripts/configure_pipeline', + 'scripts/copier', + 'scripts/gerald2bed.py', + 'scripts/library.py', + 'scripts/makebed', + 'scripts/rerun_eland.py', + 'scripts/retrieve_config', + 'scripts/runfolder', + 'scripts/runner', + 'scripts/spoolwatcher', + 'scripts/srf', + 'scripts/mark_archived_data' + ], +) diff --git a/trunk/templates/config_form.html b/trunk/templates/config_form.html new file mode 100644 index 0000000..3fbbbee --- /dev/null +++ b/trunk/templates/config_form.html @@ -0,0 +1,17 @@ + + + Test Config Form + + + +
    + {{ form.as_custom }}
    + +
    + + + \ No newline at end of file diff --git a/trunk/test/test_copier.py b/trunk/test/test_copier.py new file mode 100644 index 0000000..f34be14 --- /dev/null +++ b/trunk/test/test_copier.py @@ -0,0 +1,69 @@ +import unittest + +from StringIO import StringIO +from htsworkflow.automation import copier + +class testCopier(unittest.TestCase): + def test_runfolder_validate(self): + self.failUnlessEqual(copier.runfolder_validate(""), False) + self.failUnlessEqual(copier.runfolder_validate("1345_23"), False) + self.failUnlessEqual(copier.runfolder_validate("123456_asdf-$23'"), False) + self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44"), True) + self.failUnlessEqual(copier.runfolder_validate("123456_USI-EAS44 "), False) + + def test_empty_config(self): + cfg = StringIO("""[fake] +something: unrelated +""") + bot = copier.CopierBot('fake', configfile=cfg) + self.failUnlessRaises(KeyError, bot.read_config) + + def test_full_config(self): + cfg = StringIO("""[copier] +jid: copier@example.fake +password: badpassword +authorized_users: user1@example.fake user2@example.fake +rsync_password_file: ~/.sequencer +rsync_sources: /tmp/sequencer_source +rsync_destination: /tmp/sequencer_destination +notify_users: user3@example.fake +# who to run to +#runner: +""") + c = copier.CopierBot("copier", configfile=cfg) + c.read_config() + c._init_rsync() + self.failUnlessEqual(c.jid, 'copier@example.fake') + self.failUnlessEqual(c.cfg['password'], 'badpassword') + self.failUnlessEqual(len(c.authorized_users), 2) + self.failUnlessEqual(c.authorized_users[0], 'user1@example.fake') + self.failUnlessEqual(c.authorized_users[1], 'user2@example.fake') + self.failUnlessEqual(c.rsync.source_base_list[0], + '/tmp/sequencer_source/') + self.failUnlessEqual(c.rsync.dest_base, '/tmp/sequencer_destination') + self.failUnlessEqual(len(c.notify_users), 1) + self.failUnlessEqual(c.notify_users[0], 'user3@example.fake') + + def test_dirlist_filter(self): + """ + test our dir listing parser + """ + # everyone should have a root dir, and since we're not + # currently writing files... it should all be good + r = copier.rsync('/', '/', '/') + + listing = [ + 'drwxrwxr-x 0 2007/12/29 12:34:56 071229_USI-EAS229_001_FC1234\n', + '-rwxrw-r-- 123268 2007/12/29 17:39:31 2038EAAXX.rtf\n', + '-rwxrw-r-- 6 2007/12/29 15:10:29 New Text Document.txt\n', + ] + + result = r.list_filter(listing) + self.failUnlessEqual(len(result), 1) + self.failUnlessEqual(result[0][-1], '4') + +def suite(): + return unittest.makeSuite(testCopier,'test') + +if __name__ == "__main__": + unittest.main(defaultTest="suite") diff --git a/trunk/test/tree.py b/trunk/test/tree.py new file mode 100644 index 0000000..4f666cc --- /dev/null +++ b/trunk/test/tree.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python + +""" +Build a fake directory tree for testing rsync management code. +""" + +import os +import random + +def make_random_string(length=8): + """Make a random string, length characters long + """ + symbols = "abcdefhijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789" + name = [] + for i in xrange(length): + name.append(random.choice(symbols)) + return "".join(name) + +def make_file(pathname): + """Make a file with some random stuff in it + """ + stream = open(pathname,'w') + stream.write(make_random_string(16)) + stream.close() + +def make_tree(root, depth=3, directories=5, files=10): + """ + Make a tree of random directories and files + + depth is how many levels of subdirectories + directories is how many directories each subdirectory should have + files is how many files to create in each directory + """ + if not os.path.exists(root): + os.mkdir(root) + + paths = [] + # make files + for i in range(files): + name = make_random_string() + paths.append(name) + pathname = os.path.join(root, name) + make_file(pathname) + + # make subdirectories if we still have some depth to go + if depth > 0: + for i in range(directories): + name = make_random_string() + # paths.append(name) + pathname = os.path.join(root, name) + subpaths = make_tree(pathname, depth-1, directories, files) + paths.extend([ os.path.join(name, x) for x in subpaths ]) + + return paths + +def generate_paths(root): + """Make a list of relative paths like generated by make_tree + """ + paths = [] + for curdir, subdirs, files in os.walk(root): + paths.extend([ os.path.join(curdir, f) for f in files ]) + + # an inefficient way of getting the correct common prefix + # (e.g. root might not have a trailing /) + common_root = os.path.commonprefix(paths) + common_len = len(common_root) + return [ p[common_len:] for p in paths ] + +def compare_tree(root, paths, verbose=False): + """Make sure the tree matches our relative list of paths + """ + # what we find when we look + experimental_set = set(generate_paths(root)) + # what we expect + theoretical_set = set(paths) + # true if the difference of the two sets is the empty set + difference = experimental_set - theoretical_set + issame = (len(difference) == 0) + if verbose and not issame: + print difference + return issame -- 2.30.2