15e6d84cd0a8cf8282db3e27c65c8da7e40b482c
[htsworkflow.git] / docs / conv_caltech_v0.1_to_htsw.py
1 import shutil
2 import sqlite3
3 import sys
4
5 def main(cmdline=None):
6     if len(cmdline) == 1:
7        dest='/tmp/fctracker.db'
8     else:
9       dest = cmdline[1]
10     shutil.copy(cmdline[0], dest)
11     conn = sqlite3.connect(dest)
12     c = conn.cursor()
13     c.execute('drop table fctracker_elandresult');
14     c.execute('''CREATE TABLE "experiments_clusterstation" (
15       "id" integer NOT NULL PRIMARY KEY,
16       "name" varchar(50) NOT NULL UNIQUE);''')
17     c.execute('''INSERT INTO experiments_clusterstation (name) values ("station");''')
18     c.execute('''CREATE TABLE "experiments_sequencer" (
19       "id" integer NOT NULL PRIMARY KEY,
20       "name" varchar(50) NOT NULL UNIQUE);''')
21     c.execute('''INSERT INTO experiments_sequencer (name) values ("HWI-EAS229");''')
22
23     c.execute('''CREATE TABLE "experiments_flowcell" (
24     "id" integer NOT NULL PRIMARY KEY,
25     "flowcell_id" varchar(20) NOT NULL UNIQUE,
26     "run_date" datetime NOT NULL,
27     "advanced_run" bool NOT NULL,
28     "paired_end" bool NOT NULL,
29     "read_length" integer NOT NULL,
30     "lane_1_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
31     "lane_2_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
32     "lane_3_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
33     "lane_4_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
34     "lane_5_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
35     "lane_6_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
36     "lane_7_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
37     "lane_8_library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
38     "lane_1_pM" decimal NOT NULL,
39     "lane_2_pM" decimal NOT NULL,
40     "lane_3_pM" decimal NOT NULL,
41     "lane_4_pM" decimal NOT NULL,
42     "lane_5_pM" decimal NOT NULL,
43     "lane_6_pM" decimal NOT NULL,
44     "lane_7_pM" decimal NOT NULL,
45     "lane_8_pM" decimal NOT NULL,
46     "lane_1_cluster_estimate" integer NULL,
47     "lane_2_cluster_estimate" integer NULL,
48     "lane_3_cluster_estimate" integer NULL,
49     "lane_4_cluster_estimate" integer NULL,
50     "lane_5_cluster_estimate" integer NULL,
51     "lane_6_cluster_estimate" integer NULL,
52     "lane_7_cluster_estimate" integer NULL,
53     "lane_8_cluster_estimate" integer NULL,
54     "cluster_station_id" integer NOT NULL REFERENCES "experiments_clusterstation" ("id"),
55     "sequencer_id" integer NOT NULL REFERENCES "experiments_sequencer" ("id"),
56     "notes" text NOT NULL
57 );''')
58     c.execute('''insert into experiments_flowcell 
59         (id, flowcell_id, run_date, advanced_run, paired_end, read_length,
60          lane_1_library_id, lane_2_library_id, lane_3_library_id,
61          lane_4_library_id, lane_5_library_id, lane_6_library_id,
62          lane_7_library_id, lane_8_library_id, lane_1_pm,
63          lane_2_pM, lane_3_pM, lane_4_pM, lane_5_pM, lane_6_pM,
64          lane_7_pM, lane_8_pM, lane_1_cluster_estimate,
65          lane_2_cluster_estimate, lane_3_cluster_estimate, 
66          lane_4_cluster_estimate, lane_5_cluster_estimate,
67          lane_6_cluster_estimate, lane_7_cluster_estimate, 
68          lane_8_cluster_estimate, cluster_station_id, sequencer_id,
69          notes) 
70       select
71          id, flowcell_id, run_date, advanced_run, paired_end, read_length,
72          lane_1_library_id, lane_2_library_id, lane_3_library_id,
73          lane_4_library_id, lane_5_library_id, lane_6_library_id,
74          lane_7_library_id, lane_8_library_id, lane_1_pm,
75          lane_2_pM, lane_3_pM, lane_4_pM, lane_5_pM, lane_6_pM,
76          lane_7_pM, lane_8_pM, lane_1_cluster_estimate,
77          lane_2_cluster_estimate, lane_3_cluster_estimate, 
78          lane_4_cluster_estimate, lane_5_cluster_estimate,
79          lane_6_cluster_estimate, lane_7_cluster_estimate, 
80          lane_8_cluster_estimate, 1, 1,
81          notes from fctracker_flowcell;''')
82     c.execute('''drop table fctracker_flowcell;''')
83
84     # create samples.cellline
85     c.execute('''CREATE TABLE "samples_cellline" (
86     "id" integer NOT NULL PRIMARY KEY,
87     "cellline_name" varchar(100) NOT NULL UNIQUE,
88     "nickname" varchar(20) NULL,
89     "notes" text NOT NULL);''')
90     c.execute('''insert into samples_cellline (cellline_name,notes) values("Unknown","Unknown");''')
91
92     # Create samples.condition
93     c.execute('''CREATE TABLE "samples_condition" (
94     "id" integer NOT NULL PRIMARY KEY,
95     "condition_name" varchar(2000) NOT NULL UNIQUE,
96     "nickname" varchar(20) NULL,
97     "notes" text NOT NULL);''')
98     c.execute('''insert into samples_condition (condition_name,notes) values("Unknown","Unknown");''')
99
100     # create samples.library
101     c.execute('''CREATE TABLE "samples_library" (
102     "id" integer NOT NULL PRIMARY KEY,
103     "library_id" varchar(30) NOT NULL,
104     "library_name" varchar(100) NOT NULL UNIQUE,
105     "library_species_id" integer NOT NULL REFERENCES "samples_species" ("id"),
106     "cell_line_id" integer NOT NULL REFERENCES "samples_cellline" ("id"),
107     "condition_id" integer NOT NULL REFERENCES "samples_condition" ("id"),
108     "antibody_id" integer NULL REFERENCES "samples_antibody" ("id"),
109     "replicate" smallint unsigned NOT NULL,
110     "experiment_type" varchar(50) NOT NULL,
111     "creation_date" date NULL,
112     "made_for" varchar(50) NOT NULL,
113     "made_by" varchar(50) NOT NULL,
114     "stopping_point" varchar(25) NOT NULL,
115     "amplified_from_sample_id" integer NULL,
116     "undiluted_concentration" decimal NULL,
117     "successful_pM" decimal NULL,
118     "ten_nM_dilution" bool NOT NULL,
119     "avg_lib_size" integer NULL,
120     "notes" text NOT NULL);''')
121     c.execute('''INSERT INTO samples_library 
122       (id,library_id,library_name,library_species_id, experiment_type,
123        cell_line_id,condition_id,replicate,made_by,creation_date,
124        made_for,stopping_point,amplified_from_sample_id,
125        undiluted_concentration,ten_nM_dilution,successful_pM,
126        avg_lib_size,notes) 
127 select library_id,library_id,library_name,library_species_id,"unknown",
128        1,           1,           1,        made_by,creation_date,
129        made_for,stopping_point,amplified_from_sample_id,
130        undiluted_concentration,ten_nM_dilution,successful_pM,
131        0,notes from fctracker_library;''');
132     c.execute('''update samples_library set experiment_type="RNA-seq" where library_id in (select library_id from fctracker_library where RNASeq = 1);''')
133     #c.execute('''drop table fctracker_library;''') 
134     # add many to many tables
135     c.execute('''CREATE TABLE "samples_library_affiliations" (
136     "id" integer NOT NULL PRIMARY KEY,
137     "library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
138     "affiliation_id" integer NOT NULL REFERENCES "samples_affiliation" ("id"),
139     UNIQUE ("library_id", "affiliation_id"));''')
140
141     c.execute('''CREATE TABLE "samples_library_tags" (
142     "id" integer NOT NULL PRIMARY KEY,
143     "library_id" integer NOT NULL REFERENCES "samples_library" ("id"),
144     "tag_id" integer NOT NULL REFERENCES "samples_tag" ("id"),
145     UNIQUE ("library_id", "tag_id"));''')
146
147
148
149     #
150     c.execute('''CREATE TABLE "samples_species" (
151     "id" integer NOT NULL PRIMARY KEY,
152     "scientific_name" varchar(256) NOT NULL,
153     "common_name" varchar(256) NOT NULL);''')
154     c.execute('''insert into samples_species 
155         (id, scientific_name, common_name)
156     select
157          id, scientific_name, common_name
158     from fctracker_species;''')
159     c.execute('''drop table fctracker_species''')
160     conn.commit()
161
162 if __name__ == "__main__":
163     main(sys.argv[1:])