From: Diane Trout Date: Wed, 11 Jan 2017 00:56:52 +0000 (-0800) Subject: Save some example sql queries mirrored copy of encode database X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=htsworkflow.git;a=commitdiff_plain;h=7fd611925cbf73184883fbaa152592cd81faeae3 Save some example sql queries mirrored copy of encode database --- diff --git a/encode_submission/annotations.sql b/encode_submission/annotations.sql new file mode 100644 index 0000000..11cfea7 --- /dev/null +++ b/encode_submission/annotations.sql @@ -0,0 +1,25 @@ +with + annotation as ( + select + uri as annotation, + payload->>'description' as description, + jsonb_array_elements_text(payload->'files') as file + from item + where object_type = 'Annotation' + ), + file as ( + select + uri as file, + payload->>'status' as status, + payload->>'file_type' as file_type, + payload->>'file_format' as file_format, + payload->>'output_type' as output_type, + payload->>'output_category' as category + from item + where object_type = 'File' + ) +select annotation, description, file.file, status, file_type, file_format, category +from annotation + join file on annotation.file = file.file +where category != 'raw data' +; diff --git a/encode_submission/antibody.sql b/encode_submission/antibody.sql new file mode 100644 index 0000000..cb4b548 --- /dev/null +++ b/encode_submission/antibody.sql @@ -0,0 +1,66 @@ +-- \pset format unaligned +-- \pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/michael-snyder/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library, + payload->>'antibody' as Antibody + from item + where object_type = 'Replicate' + ), + antibody_lot as ( + select uri as AntibodyLot, + payload->>'antigen_description' as Antigen_Description, + payload->>'clonality' as clonality, + jsonb_array_elements_text(payload->'targets') as Target, + jsonb_array_elements_text(payload->'characterizations') as Characterization + from item + where object_type = 'AntibodyLot' + ), + antibody_characterization as ( + select uri as AntibodyCharacterization + from item + where object_type = 'AntibodyCharcterization' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample + from item + where object_type = 'Library' + ), + biosample as ( + select uri as Biosample, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ), + donor as ( + select uri as Donor, + payload->>'organism' as Organism + from item + ) +select + Experiment, + Experiment_Status, + Replicate.Replicate, + Replicate.Antibody, + AntibodyLot +from experiment + JOIN replicate ON experiment.Replicate = replicate.Replicate + JOIN antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot +-- limit 10 +; diff --git a/encode_submission/biosample.sql b/encode_submission/biosample.sql new file mode 100644 index 0000000..3fdd167 --- /dev/null +++ b/encode_submission/biosample.sql @@ -0,0 +1,20 @@ +with recursive + biosample(uri, payload, parent_of, part_of, d) as ( + select uri, payload, + jsonb_array_elements_text(payload->'parent_of') as parent_of, + payload->>'part_of' as part_of, + 1 + from item + where object_type = 'Biosample' + union all + select child.uri, child.payload, + jsonb_array_elements_text(child.payload->'parent_of') as parent_of, + child.payload->>'part_of' as part_of, + d + 1 + from biosample, item child + where biosample.uri = child.payload->>'part_of' + ) +select part_of, uri, parent_of, d +from biosample +where d > 1 +order by uri diff --git a/encode_submission/entex-donor.sql b/encode_submission/entex-donor.sql new file mode 100644 index 0000000..2c691f4 --- /dev/null +++ b/encode_submission/entex-donor.sql @@ -0,0 +1,12 @@ +-- \pset format unaligned +-- \pset fieldsep '\t' +with gtex as ( + select distinct uri as Donor, + substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix + from item + where object_type = 'HumanDonor' + ) +select Donor, AliasPrefix +from gtex +where AliasPrefix = 'gtex' +; diff --git a/encode_submission/entex-lab.sql b/encode_submission/entex-lab.sql new file mode 100644 index 0000000..af55bb3 --- /dev/null +++ b/encode_submission/entex-lab.sql @@ -0,0 +1,54 @@ +\pset format unaligned +\pset fieldsep '\t' +with donor as ( + select uri as Donor, + payload->>'accession' + from item + ), + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'lab' as Experiment_Lab, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library + from item + where object_type = 'Replicate' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample, + jsonb_array_elements_text(payload->'aliases') as Library_Alias + from item + where object_type = 'Library' + ), + biosample as ( + select uri as Biosample, + payload->>'accession' as Biosample_Accession, + payload->>'date_created' as Biosample_Created, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ) +select Experiment_Lab, Experiment_Status, count(Experiment_Accession) +from experiment + LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate + LEFT JOIN library on replicate.Library = library.Library + LEFT JOIN biosample on library.Biosample = biosample.Biosample +where biosample.Donor in ('/human-donors/ENCDO845WKR/','/human-donors/ENCDO451RUA/','/human-donors/ENCDO793LXB/','/human-donors/ENCDO271OUW/') +group by Experiment_Lab, Experiment_Status +-- order by experiment_released, experiment_status, experiment_accession +-- limit 10 + + + +-- ENTEX donors diff --git a/encode_submission/entex-recursive.sql b/encode_submission/entex-recursive.sql new file mode 100644 index 0000000..f392605 --- /dev/null +++ b/encode_submission/entex-recursive.sql @@ -0,0 +1,40 @@ +-- \pset format unaligned +-- \pset fieldsep '\t' +with recursive + gtex as ( + select distinct uri as Donor, + substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix + from item + where object_type = 'HumanDonor' + ), + biosample(uri, payload, parent_of, part_of, d) as ( + select uri, payload, + jsonb_array_elements_text(payload->'parent_of') as parent_of, + payload->>'part_of' as part_of, + 1 + from item + where object_type = 'Biosample' + union all + select uri, payload, + jsonb_array_elements_text(payload->'parent_of') as parent_of, + payload->>'part_of' as part_of, + d + 1 + from biosample + where parent_of = uri + ), + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'lab' as Experiment_Lab, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' + ) +select gtex.Donor, biosample.uri, biosample.parent_of +from gtex + JOIN biosample ON gtex.Donor = biosample.payload->>'donor' +where gtex.AliasPrefix = 'gtex' +; diff --git a/encode_submission/entex.sql b/encode_submission/entex.sql new file mode 100644 index 0000000..a992d0b --- /dev/null +++ b/encode_submission/entex.sql @@ -0,0 +1,37 @@ +\pset format unaligned +\pset fieldsep '\t' +with + gtex as ( + select distinct uri as Donor, + substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix + from item + where object_type = 'HumanDonor' + ), + biosample as ( + select uri as Biosample, + payload->>'accession' as Biosample_Accession, + payload->>'date_created' as Biosample_Created, + payload->>'lab' as Biosample_Lab, + payload->>'biosample_term_name' as Biosample_Term, + payload->>'summary' as Description, + jsonb_array_elements_text(payload->'parent_of') as child_biosample, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ), + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'lab' as Experiment_Lab, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' + ) +select gtex.Donor, Biosample, Biosample_Lab, Biosample_Term, Description, child_biosample +from gtex + LEFT JOIN biosample ON gtex.Donor = biosample.Donor +where gtex.AliasPrefix = 'gtex' +; diff --git a/encode_submission/myers-quarter-report-detail.sql b/encode_submission/myers-quarter-report-detail.sql new file mode 100644 index 0000000..8c1f83b --- /dev/null +++ b/encode_submission/myers-quarter-report-detail.sql @@ -0,0 +1,51 @@ +\pset format unaligned +\pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + payload->>'lab' as Experiment_Lab, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/richard-myers/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library + from item + where object_type = 'Replicate' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample, + jsonb_array_elements_text(payload->'aliases') as LibraryAlias + from item + where object_type = 'Library' + ) +select distinct Experiment_Accession, + Experiment_Type, + Experiment_Description, + Experiment_Status, + Experiment_Released, + Experiment_Lab, +-- experiment.Replicate, + experiment.Replicate, + Library_Accession, + to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created, + LibraryAlias +-- library.Biosample, +from experiment + LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate + LEFT JOIN library on replicate.Library = library.Library +-- where +-- Experiment_Status = 'started' +order by experiment_released, experiment_status, experiment_accession +-- limit 10 +; diff --git a/encode_submission/quarter-report.sql b/encode_submission/quarter-report.sql new file mode 100755 index 0000000..dfd7228 --- /dev/null +++ b/encode_submission/quarter-report.sql @@ -0,0 +1,50 @@ +\pset format unaligned +\pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/barbara-wold/"'::jsonb + -- payload->'lab' @> '"/labs/ali-mortazavi/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library + from item + where object_type = 'Replicate' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample, + substring(jsonb_array_elements_text(payload->'aliases') from 'barbara-wold:([0-9]+)') + as Jumpgate_Library_ID + from item + where object_type = 'Library' + ) +select distinct Experiment_Accession, + Experiment_Type, + Experiment_Description, + Experiment_Status, + Experiment_Released, +-- experiment.Replicate, + Library_Accession, + to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created, + Library_Alias +-- library.Biosample, +from experiment + LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate + LEFT JOIN library on replicate.Library = library.Library +-- where +-- Experiment_Status = 'started' +order by experiment_released, experiment_status, experiment_accession +-- limit 10 +; diff --git a/encode_submission/snyder-chip-report.sql b/encode_submission/snyder-chip-report.sql new file mode 100644 index 0000000..8fc782f --- /dev/null +++ b/encode_submission/snyder-chip-report.sql @@ -0,0 +1,95 @@ +\pset format unaligned +\pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/michael-snyder/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library, + payload->>'antibody' as Antibody + from item + where object_type = 'Replicate' + ), + antibody_lot as ( + select uri as AntibodyLot, + payload->>'antigen_description' as Antigen_Description, + payload->>'clonality' as clonality, + jsonb_array_elements_text(payload->'targets') as Target, + jsonb_array_elements_text(payload->'characterizations') as Characterization + from item + where object_type = 'AntibodyLot' + ), + antibody_characterization as ( + select uri as AntibodyCharacterization, + payload->>'status' as Status, + payload->>'characterization_method' as Characterization_Method + from item + where object_type = 'AntibodyCharacterization' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample + from item + where object_type = 'Library' + ), + biosample as ( + select uri as Biosample, + payload->>'summary' as Summary, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ), + target as ( + select uri as Target, + payload->>'title' as title, + payload->>'status' as status, + jsonb_array_elements_text(payload->'investigated_as') as investigated_as + from item + where object_type = 'Target' + ), + donor as ( + select uri as Donor, + payload->>'organism' as Organism + from item + ) +select distinct + Experiment_Accession, + Experiment_Type, + Experiment_Description, + Experiment_Status, + Experiment_Released, + Library_Accession, + AntibodyLot, + Antigen_Description, + Clonality, + antibody_characterization.Status as Characterization_Status, + antibody_characterization.characterization_method as Characterization_Method, + target.Target, + target.title as Target_Title, + target.status as Target_Status, + biosample.Biosample, + biosample.summary as Biosample_Description +from experiment + JOIN replicate ON experiment.Replicate = replicate.Replicate + JOIN antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot + JOIN target ON antibody_lot.Target = target.Target + JOIN library on replicate.Library = library.Library + JOIN biosample on library.Biosample = biosample.Biosample + JOIN antibody_characterization on antibody_lot.Characterization = antibody_characterization.AntibodyCharacterization +-- where +-- Experiment_Status = 'started' +order by experiment_accession, AntibodyLot +-- limit 10 +; diff --git a/encode_submission/target.sql b/encode_submission/target.sql new file mode 100644 index 0000000..ee88e47 --- /dev/null +++ b/encode_submission/target.sql @@ -0,0 +1,91 @@ +-- \pset format unaligned +-- \pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/michael-snyder/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library, + payload->>'antibody' as Antibody + from item + where object_type = 'Replicate' + ), + antibody_lot as ( + select uri as AntibodyLot, + payload->>'antigen_description' as Antigen_Description, + payload->>'clonality' as clonality, + jsonb_array_elements_text(payload->'targets') as Target, + jsonb_array_elements_text(payload->'characterizations') as Characterization + from item + where object_type = 'AntibodyLot' + ), + antibody_characterization as ( + select uri as Characterization, + payload->>'caption' as Caption, + payload->>'characterization_method' as Characterization_Method, + payload->>'status' as Characterization_Status + from item + where object_type = 'AntibodyCharcterization' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample + from item + where object_type = 'Library' + ), + biosample as ( + select uri as Biosample, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ), + target as ( + select uri as Target, + payload->>'title' as title, + payload->>'status' as status, + jsonb_array_elements_text(payload->'investigated_as') as investigated_as + from item + where object_type = 'Target' + ), + donor as ( + select uri as Donor, + payload->>'organism' as Organism + from item + ) +select + Replicate, + Library, + AntibodyLot, + Antigen_Description, + Clonality, + antibody_lot.Target, + antibody_lot.Characterization, + Characterization_Method, + Characterization_Status, + target.title, + target.status, + target.investigated_as +-- experiment.Replicate, +-- Library_Accession, +-- library.Biosample, +-- donor.Organism, +-- to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created +from replicate + left join antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot + left join antibody_characterization on + antibody_lot.Characterization = antibody_characterization.Characterization + left join target on antibody_lot.Target = target.Target +limit 10 +; diff --git a/encode_submission/wold-quarter-report-detail.sql b/encode_submission/wold-quarter-report-detail.sql new file mode 100644 index 0000000..70d5fec --- /dev/null +++ b/encode_submission/wold-quarter-report-detail.sql @@ -0,0 +1,66 @@ +\pset format unaligned +\pset fieldsep '\t' +with + experiment as ( + select uri as Experiment, + payload->>'accession' as Experiment_Accession, + payload->>'description' as Experiment_Description, + payload->>'status' as Experiment_Status, + payload->>'date_released' as Experiment_Released, + payload->>'assay_title' as Experiment_Type, + jsonb_array_elements_text(payload->'replicates') as Replicate + from item + where object_type = 'Experiment' and + payload->'lab' @> '"/labs/barbara-wold/"'::jsonb + -- payload->'lab' @> '"/labs/ali-mortazavi/"'::jsonb + ), + replicate as ( + select uri as Replicate, + payload->>'library' as Library + from item + where object_type = 'Replicate' + ), + library as ( + select uri as Library, + payload->>'accession' as Library_Accession, + payload->>'date_created' as Library_Created, + payload->>'biosample' as Biosample, + substring(jsonb_array_elements_text(payload->'aliases') from 'barbara-wold:([0-9]+)') + as Jumpgate_Library_ID + from item + where object_type = 'Library' + ), + biosample as ( + select uri as Biosample, + payload->>'donor' as Donor + from item + where object_type = 'Biosample' + ), + donor as ( + select uri as Donor, + payload->>'organism' as Organism + from item + ) +select distinct Experiment_Accession, + Experiment_Type, + Experiment_Description, + Experiment_Status, + Experiment_Released, +-- experiment.Replicate, + Library_Accession, + library.Biosample, + donor.Organism, + to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created, + Jumpgate_Library_ID +-- samples_library.library_name +from experiment + LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate + LEFT JOIN library on replicate.Library = library.Library +-- LEFT JOIN samples_library on Jumpgate_Library_ID = samples_library.id + LEFT JOIN biosample on library.Biosample = biosample.Biosample + LEFT JOIN donor on biosample.Donor = donor.Donor +-- where +-- Experiment_Status = 'started' +order by experiment_released, experiment_status, experiment_accession +-- limit 10 +;