125e7b49ff66ffd41db4a56d0520b0cdde38f822
[htsworkflow.git] / docs / conv_library_id_to_pk_v0.3.1.py
1 import sqlite3
2
3 def map_library_ids(c):
4     lib_ids = {}
5     c.execute("""select id, library_id from samples_library""")
6     for row in c:
7         surrogate_id = unicode(row[0]) # auto key
8         artificial_id = unicode(row[1]) # the id printed on the library tubes
9         lib_ids[surrogate_id] = artificial_id
10     return lib_ids
11
12 def convert_experiments_lane(c, lib_ids):
13     """
14     Change Library ID in experiments_lane table
15     """
16     c.execute('alter table experiments_lane rename to old_experiments_lane')
17     c.execute("""CREATE TABLE "experiments_lane" (
18     "id" integer NOT NULL PRIMARY KEY,
19     "flowcell_id" integer NOT NULL REFERENCES "experiments_flowcell" ("id"),
20     "lane_number" integer NOT NULL,
21     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("library_id"),
22     "pM" decimal NOT NULL,
23     "cluster_estimate" integer,
24     "comment" text);""")
25
26     c.execute("""select id, flowcell_id, lane_number, library_id, pM, cluster_estimate, comment
27                  from old_experiments_lane;""")
28
29     new_rows = []
30     for row in c:
31         new_rows.append({'id':row[0], 'flowcell_id':row[1], 'lane_number':row[2],
32                          'library_id':lib_ids[unicode(row[3])], 'pM':row[4],
33                          'cluster_estimate':row[5],
34                          'comment':row[6]})
35
36     sql = '''insert into experiments_lane
37         (id, flowcell_id, lane_number, library_id, pM, cluster_estimate, comment)
38         values
39         (:id, :flowcell_id, :lane_number, :library_id, :pM, :cluster_estimate, :comment)'''
40     c.executemany(sql, new_rows)
41
42     c.execute('drop table old_experiments_lane')
43
44 def convert_samples_library_affiliations(c, lib_ids):
45     """
46     Change Library ID in experiments_lane table
47     """
48     c.execute('alter table samples_library_affiliations rename to old_samples_library_affiliations')
49     c.execute('''CREATE TABLE "samples_library_affiliations" (
50     "id" integer NOT NULL PRIMARY KEY,
51     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("id"),
52     "affiliation_id" integer NOT NULL REFERENCES "samples_affiliation" ("id"),
53     UNIQUE ("library_id", "affiliation_id")
54 );''')
55
56     c.execute("""select id, library_id, affiliation_id
57                  from old_samples_library_affiliations;""")
58
59     new_rows = []
60     for row in c:
61         new_rows.append({'id':row[0], 'library_id': lib_ids[unicode(row[1])], 'affiliation_id':row[2],})
62
63     sql = '''insert into samples_library_affiliations
64         (id, library_id, affiliation_id)
65         values
66         (:id, :library_id, :affiliation_id)'''
67     c.executemany(sql, new_rows)
68
69     c.execute('drop table old_samples_library_affiliations;')
70
71 def convert_samples_library_tags(c, lib_ids):
72     """
73     Change Library ID in samples_library_tags table
74     """
75     c.execute('alter table samples_library_tags rename to old_samples_library_tags')
76     c.execute('''CREATE TABLE "samples_library_tags" (
77     "id" integer NOT NULL PRIMARY KEY,
78     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("id"),
79     "tag_id" integer NOT NULL REFERENCES "samples_tag" ("id"),
80     UNIQUE ("library_id", "tag_id")
81 );''')
82
83     c.execute("""select id, library_id, tag_id
84                  from old_samples_library_tags;""")
85
86     new_rows = []
87     for row in c:
88         new_rows.append({'id':row[0], 'library_id': lib_ids[unicode(row[1])], 'tag_id':row[2]})
89
90     sql = '''insert into samples_library_tags
91         (id, library_id, tag_id)
92         values
93         (:id, :library_id, :tag_id)'''
94     c.executemany(sql, new_rows)
95
96     c.execute('drop table old_samples_library_tags;')    
97
98
99 def convert_samples_library(c, lib_ids):
100     """
101     Change Library ID in samples_library_tags table
102     """
103     c.execute('alter table samples_library rename to old_samples_library')
104     c.execute('''CREATE TABLE "samples_library" (
105     "id" varchar(10) NOT NULL PRIMARY KEY,
106     "library_name" varchar(100) NOT NULL UNIQUE,
107     "library_species_id" integer NOT NULL REFERENCES "samples_species" ("id"),
108     "hidden" bool NOT NULL,
109     "account_number" varchar(100),
110     "cell_line_id" integer REFERENCES "samples_cellline" ("id"),
111     "condition_id" integer REFERENCES "samples_condition" ("id"),
112     "antibody_id" integer REFERENCES "samples_antibody" ("id"),
113     "replicate" smallint unsigned NOT NULL,
114     "experiment_type_id" integer NOT NULL REFERENCES "samples_experimenttype" ("id"),
115     "library_type_id" integer REFERENCES "samples_librarytype" ("id"),
116     "creation_date" date,
117     "made_for" varchar(50) NOT NULL,
118     "made_by" varchar(50) NOT NULL,
119     "stopping_point" varchar(25) NOT NULL,
120     "amplified_from_sample_id" varchar(10),
121     "undiluted_concentration" decimal,
122     "successful_pM" decimal,
123     "ten_nM_dilution" bool NOT NULL,
124     "avg_lib_size" integer,
125     "notes" text NOT NULL
126 );''')
127
128     c.execute("""
129     select library_id, library_name, library_species_id, hidden, account_number, cell_line_id,
130     condition_id, antibody_id, replicate, experiment_type_id, library_type_id,
131     creation_date, made_for, made_by, stopping_point, amplified_from_sample_id,
132     undiluted_concentration, successful_pM, ten_nM_dilution, avg_lib_size, notes
133     from old_samples_library;""")
134
135     new_rows = []
136     for row in c:
137         new_rows.append({
138         'id': row[0],
139         'library_name': row[1],
140         'library_species_id': row[2],
141         'hidden': row[3],
142         'account_number': row[4],
143         'cell_line_id': row[5],
144         'condition_id': row[6],
145         'antibody_id': row[7],
146         'replicate': row[8],
147         'experiment_type_id': row[9],
148         'library_type_id': row[10],
149         'creation_date': row[11],
150         'made_for': row[12],
151         'made_by': row[13],
152         'stopping_point': row[14],
153         'amplified_from_sample_id': row[15],
154         'undiluted_concentration': row[16],
155         'successful_pM': row[17],
156         'ten_nM_dilution': row[18],
157         'avg_lib_size': row[19],
158         'notes': row[20],
159             })
160
161     sql = '''insert into samples_library
162         (id, library_name, library_species_id, hidden, account_number, cell_line_id,
163          condition_id, antibody_id, replicate, experiment_type_id, library_type_id,
164          creation_date, made_for, made_by, stopping_point, amplified_from_sample_id,
165          undiluted_concentration, successful_pM, ten_nM_dilution, avg_lib_size, notes)
166         values
167         (:id, :library_name, :library_species_id, :hidden, :account_number, :cell_line_id,
168          :condition_id, :antibody_id, :replicate, :experiment_type_id, :library_type_id,
169          :creation_date, :made_for, :made_by, :stopping_point, :amplified_from_sample_id,
170          :undiluted_concentration, :successful_pM, :ten_nM_dilution, :avg_lib_size, :notes);
171          '''
172     c.executemany(sql, new_rows)
173
174     c.execute('drop table old_samples_library;')
175
176 def convert_library_id(db_path):
177     conn = sqlite3.connect(db_path)
178     c = conn.cursor()
179
180     lib_ids = map_library_ids(c)
181
182     convert_experiments_lane(c, lib_ids)
183     convert_samples_library_affiliations(c, lib_ids)
184     convert_samples_library_tags(c, lib_ids)
185     convert_samples_library(c, lib_ids)
186     
187     conn.commit()
188
189 def main():
190     db_path = '/Users/diane/proj/solexa/gaworkflow/svn/woldlab.db'
191     convert_library_id(db_path)
192
193 if __name__ == "__main__":
194     main()