Save some example sql queries mirrored copy of encode database
authorDiane Trout <diane@ghic.org>
Wed, 11 Jan 2017 00:56:52 +0000 (16:56 -0800)
committerDiane Trout <diane@ghic.org>
Wed, 11 Jan 2017 00:56:52 +0000 (16:56 -0800)
12 files changed:
encode_submission/annotations.sql [new file with mode: 0644]
encode_submission/antibody.sql [new file with mode: 0644]
encode_submission/biosample.sql [new file with mode: 0644]
encode_submission/entex-donor.sql [new file with mode: 0644]
encode_submission/entex-lab.sql [new file with mode: 0644]
encode_submission/entex-recursive.sql [new file with mode: 0644]
encode_submission/entex.sql [new file with mode: 0644]
encode_submission/myers-quarter-report-detail.sql [new file with mode: 0644]
encode_submission/quarter-report.sql [new file with mode: 0755]
encode_submission/snyder-chip-report.sql [new file with mode: 0644]
encode_submission/target.sql [new file with mode: 0644]
encode_submission/wold-quarter-report-detail.sql [new file with mode: 0644]

diff --git a/encode_submission/annotations.sql b/encode_submission/annotations.sql
new file mode 100644 (file)
index 0000000..11cfea7
--- /dev/null
@@ -0,0 +1,25 @@
+with
+  annotation as (
+  select
+    uri as annotation,
+    payload->>'description' as description,
+    jsonb_array_elements_text(payload->'files') as file
+  from item
+  where object_type = 'Annotation'
+  ),
+  file as (
+  select
+    uri as file,
+    payload->>'status' as status,
+    payload->>'file_type' as file_type,
+    payload->>'file_format' as file_format,
+    payload->>'output_type' as output_type,
+    payload->>'output_category' as category
+  from item
+  where object_type = 'File'
+  )
+select annotation, description, file.file, status, file_type, file_format, category
+from annotation
+     join file on annotation.file = file.file
+where category != 'raw data'
+;
diff --git a/encode_submission/antibody.sql b/encode_submission/antibody.sql
new file mode 100644 (file)
index 0000000..cb4b548
--- /dev/null
@@ -0,0 +1,66 @@
+-- \pset format unaligned
+-- \pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/michael-snyder/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library,
+           payload->>'antibody' as Antibody
+    from item
+    where object_type = 'Replicate'
+  ),
+  antibody_lot as (
+    select uri as AntibodyLot,
+           payload->>'antigen_description' as Antigen_Description,
+           payload->>'clonality' as clonality,
+           jsonb_array_elements_text(payload->'targets') as Target,
+           jsonb_array_elements_text(payload->'characterizations') as Characterization
+    from item
+    where object_type = 'AntibodyLot'
+  ),
+  antibody_characterization as (
+    select uri as AntibodyCharacterization
+    from item
+    where object_type = 'AntibodyCharcterization'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample
+    from item
+    where object_type = 'Library'
+  ),
+  biosample as (
+     select uri as Biosample,
+            payload->>'donor' as Donor
+        from item
+        where object_type = 'Biosample'
+  ),
+  donor as (
+     select uri as Donor,
+            payload->>'organism' as Organism
+      from item
+  )
+select
+  Experiment,
+  Experiment_Status,
+  Replicate.Replicate,
+  Replicate.Antibody,
+  AntibodyLot
+from experiment
+     JOIN replicate ON experiment.Replicate = replicate.Replicate
+     JOIN antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot
+-- limit 10
+;
diff --git a/encode_submission/biosample.sql b/encode_submission/biosample.sql
new file mode 100644 (file)
index 0000000..3fdd167
--- /dev/null
@@ -0,0 +1,20 @@
+with recursive
+   biosample(uri, payload, parent_of, part_of, d) as (
+       select uri, payload,
+              jsonb_array_elements_text(payload->'parent_of') as parent_of,
+              payload->>'part_of' as part_of,
+              1
+       from item
+       where object_type = 'Biosample'
+   union all
+       select child.uri, child.payload,
+              jsonb_array_elements_text(child.payload->'parent_of') as parent_of,
+              child.payload->>'part_of' as part_of,
+              d + 1
+       from biosample, item child
+       where biosample.uri = child.payload->>'part_of'
+   )
+select part_of, uri, parent_of, d
+from biosample
+where d > 1
+order by uri
diff --git a/encode_submission/entex-donor.sql b/encode_submission/entex-donor.sql
new file mode 100644 (file)
index 0000000..2c691f4
--- /dev/null
@@ -0,0 +1,12 @@
+-- \pset format unaligned
+-- \pset fieldsep '\t'
+with gtex as (
+  select distinct uri as Donor,
+         substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix
+  from item
+  where object_type = 'HumanDonor'
+  )
+select Donor, AliasPrefix
+from gtex
+where AliasPrefix = 'gtex'
+;
diff --git a/encode_submission/entex-lab.sql b/encode_submission/entex-lab.sql
new file mode 100644 (file)
index 0000000..af55bb3
--- /dev/null
@@ -0,0 +1,54 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with donor as (
+  select uri as Donor,
+         payload->>'accession'
+  from item
+  ),
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'lab' as Experiment_Lab,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment'
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library
+    from item
+    where object_type = 'Replicate'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample,
+           jsonb_array_elements_text(payload->'aliases') as Library_Alias
+    from item
+    where object_type = 'Library'
+  ),
+  biosample as (
+     select uri as Biosample,
+            payload->>'accession' as Biosample_Accession,
+            payload->>'date_created' as Biosample_Created,
+            payload->>'donor' as Donor
+     from item
+     where object_type = 'Biosample'
+  )
+select Experiment_Lab, Experiment_Status, count(Experiment_Accession)
+from experiment
+     LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate
+     LEFT JOIN library on replicate.Library = library.Library
+     LEFT JOIN biosample on library.Biosample = biosample.Biosample
+where biosample.Donor in ('/human-donors/ENCDO845WKR/','/human-donors/ENCDO451RUA/','/human-donors/ENCDO793LXB/','/human-donors/ENCDO271OUW/')
+group by Experiment_Lab, Experiment_Status
+-- order by experiment_released, experiment_status, experiment_accession
+-- limit 10
+
+
+
+-- ENTEX donors 
diff --git a/encode_submission/entex-recursive.sql b/encode_submission/entex-recursive.sql
new file mode 100644 (file)
index 0000000..f392605
--- /dev/null
@@ -0,0 +1,40 @@
+-- \pset format unaligned
+-- \pset fieldsep '\t'
+with recursive
+  gtex as (
+  select distinct uri as Donor,
+         substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix
+  from item
+  where object_type = 'HumanDonor'
+  ),
+  biosample(uri, payload, parent_of, part_of, d) as (
+       select uri, payload,
+              jsonb_array_elements_text(payload->'parent_of') as parent_of,
+              payload->>'part_of' as part_of,
+              1
+       from item
+       where object_type = 'Biosample'
+   union all
+       select uri, payload,
+              jsonb_array_elements_text(payload->'parent_of') as parent_of,
+              payload->>'part_of' as part_of,
+              d + 1
+       from biosample
+       where parent_of = uri
+  ),
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'lab' as Experiment_Lab,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment'
+  )
+select gtex.Donor, biosample.uri, biosample.parent_of
+from gtex
+     JOIN biosample ON gtex.Donor = biosample.payload->>'donor'
+where gtex.AliasPrefix = 'gtex'
+;
diff --git a/encode_submission/entex.sql b/encode_submission/entex.sql
new file mode 100644 (file)
index 0000000..a992d0b
--- /dev/null
@@ -0,0 +1,37 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with
+  gtex as (
+  select distinct uri as Donor,
+         substring(jsonb_array_elements_text(payload->'aliases'), '([a-zA-Z0-9]+):') as AliasPrefix
+  from item
+  where object_type = 'HumanDonor'
+  ),
+  biosample as (
+     select uri as Biosample,
+            payload->>'accession' as Biosample_Accession,
+            payload->>'date_created' as Biosample_Created,
+            payload->>'lab' as Biosample_Lab,
+            payload->>'biosample_term_name' as Biosample_Term,
+            payload->>'summary' as Description,
+            jsonb_array_elements_text(payload->'parent_of') as child_biosample,
+            payload->>'donor' as Donor
+        from item
+        where object_type = 'Biosample'
+  ),
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'lab' as Experiment_Lab,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment'
+  )
+select gtex.Donor, Biosample, Biosample_Lab, Biosample_Term, Description, child_biosample
+from gtex
+     LEFT JOIN biosample ON gtex.Donor = biosample.Donor
+where gtex.AliasPrefix = 'gtex'
+;
diff --git a/encode_submission/myers-quarter-report-detail.sql b/encode_submission/myers-quarter-report-detail.sql
new file mode 100644 (file)
index 0000000..8c1f83b
--- /dev/null
@@ -0,0 +1,51 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         payload->>'lab' as Experiment_Lab,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/richard-myers/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library
+    from item
+    where object_type = 'Replicate'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample,
+           jsonb_array_elements_text(payload->'aliases') as LibraryAlias
+    from item
+    where object_type = 'Library'
+  )
+select distinct Experiment_Accession,
+                Experiment_Type,
+                Experiment_Description,
+                Experiment_Status,
+                Experiment_Released,
+                Experiment_Lab,
+--                experiment.Replicate,
+                experiment.Replicate,
+                Library_Accession,
+                to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created,
+                LibraryAlias
+--                library.Biosample,
+from experiment
+     LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate
+     LEFT JOIN library on replicate.Library = library.Library
+-- where 
+--      Experiment_Status = 'started'
+order by experiment_released, experiment_status, experiment_accession
+-- limit 10
+;
diff --git a/encode_submission/quarter-report.sql b/encode_submission/quarter-report.sql
new file mode 100755 (executable)
index 0000000..dfd7228
--- /dev/null
@@ -0,0 +1,50 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/barbara-wold/"'::jsonb
+  --      payload->'lab' @> '"/labs/ali-mortazavi/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library
+    from item
+    where object_type = 'Replicate'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample,
+           substring(jsonb_array_elements_text(payload->'aliases') from 'barbara-wold:([0-9]+)')
+               as Jumpgate_Library_ID
+    from item
+    where object_type = 'Library'
+  )
+select distinct Experiment_Accession,
+                Experiment_Type,
+                Experiment_Description,
+                Experiment_Status,
+                Experiment_Released,
+--                experiment.Replicate,
+                Library_Accession,
+                to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created,
+                Library_Alias
+--                library.Biosample,
+from experiment
+     LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate
+     LEFT JOIN library on replicate.Library = library.Library
+-- where 
+--      Experiment_Status = 'started'
+order by experiment_released, experiment_status, experiment_accession
+-- limit 10
+;
diff --git a/encode_submission/snyder-chip-report.sql b/encode_submission/snyder-chip-report.sql
new file mode 100644 (file)
index 0000000..8fc782f
--- /dev/null
@@ -0,0 +1,95 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/michael-snyder/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library,
+           payload->>'antibody' as Antibody
+    from item
+    where object_type = 'Replicate'
+  ),
+  antibody_lot as (
+    select uri as AntibodyLot,
+           payload->>'antigen_description' as Antigen_Description,
+           payload->>'clonality' as clonality,
+           jsonb_array_elements_text(payload->'targets') as Target,
+           jsonb_array_elements_text(payload->'characterizations') as Characterization
+    from item
+    where object_type = 'AntibodyLot'
+  ),
+  antibody_characterization as (
+    select uri as AntibodyCharacterization,
+           payload->>'status' as Status,
+           payload->>'characterization_method' as Characterization_Method
+    from item
+    where object_type = 'AntibodyCharacterization'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample
+    from item
+    where object_type = 'Library'
+  ),
+  biosample as (
+    select uri as Biosample,
+           payload->>'summary' as Summary,
+           payload->>'donor' as Donor
+    from item
+    where object_type = 'Biosample'
+  ),
+  target as (
+    select uri as Target,
+           payload->>'title' as title,
+           payload->>'status' as status,
+           jsonb_array_elements_text(payload->'investigated_as') as investigated_as
+    from item
+    where object_type = 'Target'
+  ),  
+  donor as (
+     select uri as Donor,
+            payload->>'organism' as Organism
+      from item
+  )
+select distinct 
+                Experiment_Accession,
+                Experiment_Type,
+                Experiment_Description,
+                Experiment_Status,
+                Experiment_Released,
+                Library_Accession,
+                AntibodyLot,
+                Antigen_Description,
+                Clonality,
+                antibody_characterization.Status as Characterization_Status,
+                antibody_characterization.characterization_method as Characterization_Method,
+                target.Target,
+                target.title as Target_Title,
+                target.status as Target_Status,
+                biosample.Biosample,
+                biosample.summary as Biosample_Description
+from experiment
+     JOIN replicate ON experiment.Replicate = replicate.Replicate
+     JOIN antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot
+     JOIN target ON antibody_lot.Target = target.Target
+     JOIN library on replicate.Library = library.Library
+     JOIN biosample on library.Biosample = biosample.Biosample
+     JOIN antibody_characterization on antibody_lot.Characterization = antibody_characterization.AntibodyCharacterization
+-- where 
+--      Experiment_Status = 'started'
+order by experiment_accession, AntibodyLot
+-- limit 10
+;
diff --git a/encode_submission/target.sql b/encode_submission/target.sql
new file mode 100644 (file)
index 0000000..ee88e47
--- /dev/null
@@ -0,0 +1,91 @@
+-- \pset format unaligned
+-- \pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/michael-snyder/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library,
+           payload->>'antibody' as Antibody
+    from item
+    where object_type = 'Replicate'
+  ),
+  antibody_lot as (
+    select uri as AntibodyLot,
+           payload->>'antigen_description' as Antigen_Description,
+           payload->>'clonality' as clonality,
+           jsonb_array_elements_text(payload->'targets') as Target,
+           jsonb_array_elements_text(payload->'characterizations') as Characterization
+    from item
+    where object_type = 'AntibodyLot'
+  ),
+  antibody_characterization as (
+    select uri as Characterization,
+           payload->>'caption' as Caption,
+           payload->>'characterization_method' as Characterization_Method,
+           payload->>'status' as Characterization_Status
+    from item
+    where object_type = 'AntibodyCharcterization'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample
+    from item
+    where object_type = 'Library'
+  ),
+  biosample as (
+     select uri as Biosample,
+            payload->>'donor' as Donor
+        from item
+        where object_type = 'Biosample'
+  ),
+  target as (
+    select uri as Target,
+           payload->>'title' as title,
+           payload->>'status' as status,
+           jsonb_array_elements_text(payload->'investigated_as') as investigated_as
+    from item
+    where object_type = 'Target'
+  ),  
+  donor as (
+     select uri as Donor,
+            payload->>'organism' as Organism
+      from item
+  )
+select
+  Replicate,
+  Library,
+  AntibodyLot,
+  Antigen_Description,
+  Clonality,
+  antibody_lot.Target,
+  antibody_lot.Characterization,
+  Characterization_Method,
+  Characterization_Status,
+  target.title,
+  target.status,
+  target.investigated_as
+--                experiment.Replicate,
+--                Library_Accession,
+--                library.Biosample,
+--                donor.Organism,
+--                to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created
+from replicate
+     left join antibody_lot on replicate.Antibody = antibody_lot.AntibodyLot
+     left join antibody_characterization on
+               antibody_lot.Characterization = antibody_characterization.Characterization
+     left join target on antibody_lot.Target = target.Target
+limit 10
+;
diff --git a/encode_submission/wold-quarter-report-detail.sql b/encode_submission/wold-quarter-report-detail.sql
new file mode 100644 (file)
index 0000000..70d5fec
--- /dev/null
@@ -0,0 +1,66 @@
+\pset format unaligned
+\pset fieldsep '\t'
+with
+  experiment as (
+  select uri as Experiment,
+         payload->>'accession' as Experiment_Accession,
+         payload->>'description' as Experiment_Description,
+         payload->>'status' as Experiment_Status,
+         payload->>'date_released' as Experiment_Released,
+         payload->>'assay_title' as Experiment_Type,
+         jsonb_array_elements_text(payload->'replicates') as Replicate
+  from item
+  where object_type = 'Experiment' and
+        payload->'lab' @> '"/labs/barbara-wold/"'::jsonb
+  --      payload->'lab' @> '"/labs/ali-mortazavi/"'::jsonb
+  ),
+  replicate as (
+    select uri as Replicate,
+           payload->>'library' as Library
+    from item
+    where object_type = 'Replicate'
+  ),
+  library as (
+    select uri as Library,
+           payload->>'accession' as Library_Accession,
+           payload->>'date_created' as Library_Created,
+           payload->>'biosample' as Biosample,
+           substring(jsonb_array_elements_text(payload->'aliases') from 'barbara-wold:([0-9]+)')
+               as Jumpgate_Library_ID
+    from item
+    where object_type = 'Library'
+  ),
+  biosample as (
+     select uri as Biosample,
+            payload->>'donor' as Donor
+        from item
+        where object_type = 'Biosample'
+  ),
+  donor as (
+     select uri as Donor,
+            payload->>'organism' as Organism
+      from item
+  )
+select distinct Experiment_Accession,
+                Experiment_Type,
+                Experiment_Description,
+                Experiment_Status,
+                Experiment_Released,
+--                experiment.Replicate,
+                Library_Accession,
+                library.Biosample,
+                donor.Organism,
+                to_char(library.Library_Created::date, 'YYYY-MM-DD') as Library_Created,
+                Jumpgate_Library_ID
+--                samples_library.library_name 
+from experiment
+     LEFT JOIN replicate ON experiment.Replicate = replicate.Replicate
+     LEFT JOIN library on replicate.Library = library.Library
+--     LEFT JOIN samples_library on Jumpgate_Library_ID = samples_library.id
+     LEFT JOIN biosample on library.Biosample = biosample.Biosample
+     LEFT JOIN donor on biosample.Donor = donor.Donor
+-- where 
+--      Experiment_Status = 'started'
+order by experiment_released, experiment_status, experiment_accession
+-- limit 10
+;