Attempt to be robust to not having an alignment in our RunXml file
[htsworkflow.git] / docs / conv_library_id_to_pk_v0.3.1.py
1 import optparse
2 import sqlite3
3
4 def map_library_ids(c):
5     lib_ids = {}
6     c.execute("""select id, library_id from samples_library""")
7     for row in c:
8         surrogate_id = unicode(row[0]) # auto key
9         artificial_id = unicode(row[1]) # the id printed on the library tubes
10         lib_ids[surrogate_id] = artificial_id
11     return lib_ids
12
13 def convert_experiments_lane(c, lib_ids):
14     """
15     Change Library ID in experiments_lane table
16     """
17     c.execute('alter table experiments_lane rename to old_experiments_lane')
18     c.execute("""CREATE TABLE "experiments_lane" (
19     "id" integer NOT NULL PRIMARY KEY,
20     "flowcell_id" integer NOT NULL REFERENCES "experiments_flowcell" ("id"),
21     "lane_number" integer NOT NULL,
22     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("library_id"),
23     "pM" decimal NOT NULL,
24     "cluster_estimate" integer,
25     "comment" text);""")
26
27     c.execute("""select id, flowcell_id, lane_number, library_id, pM, cluster_estimate, comment
28                  from old_experiments_lane;""")
29
30     new_rows = []
31     for row in c:
32         new_rows.append({'id':row[0], 'flowcell_id':row[1], 'lane_number':row[2],
33                          'library_id':lib_ids[unicode(row[3])], 'pM':row[4],
34                          'cluster_estimate':row[5],
35                          'comment':row[6]})
36
37     sql = '''insert into experiments_lane
38         (id, flowcell_id, lane_number, library_id, pM, cluster_estimate, comment)
39         values
40         (:id, :flowcell_id, :lane_number, :library_id, :pM, :cluster_estimate, :comment)'''
41     c.executemany(sql, new_rows)
42
43     c.execute('drop table old_experiments_lane')
44
45 def convert_samples_library_affiliations(c, lib_ids):
46     """
47     Change Library ID in experiments_lane table
48     """
49     c.execute('alter table samples_library_affiliations rename to old_samples_library_affiliations')
50     c.execute('''CREATE TABLE "samples_library_affiliations" (
51     "id" integer NOT NULL PRIMARY KEY,
52     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("id"),
53     "affiliation_id" integer NOT NULL REFERENCES "samples_affiliation" ("id"),
54     UNIQUE ("library_id", "affiliation_id")
55 );''')
56
57     c.execute("""select id, library_id, affiliation_id
58                  from old_samples_library_affiliations;""")
59
60     new_rows = []
61     for row in c:
62         new_rows.append({'id':row[0], 'library_id': lib_ids[unicode(row[1])], 'affiliation_id':row[2],})
63
64     sql = '''insert into samples_library_affiliations
65         (id, library_id, affiliation_id)
66         values
67         (:id, :library_id, :affiliation_id)'''
68     c.executemany(sql, new_rows)
69
70     c.execute('drop table old_samples_library_affiliations;')
71
72 def convert_samples_library_tags(c, lib_ids):
73     """
74     Change Library ID in samples_library_tags table
75     """
76     c.execute('alter table samples_library_tags rename to old_samples_library_tags')
77     c.execute('''CREATE TABLE "samples_library_tags" (
78     "id" integer NOT NULL PRIMARY KEY,
79     "library_id" varchar(10) NOT NULL REFERENCES "samples_library" ("id"),
80     "tag_id" integer NOT NULL REFERENCES "samples_tag" ("id"),
81     UNIQUE ("library_id", "tag_id")
82 );''')
83
84     c.execute("""select id, library_id, tag_id
85                  from old_samples_library_tags;""")
86
87     new_rows = []
88     for row in c:
89         new_rows.append({'id':row[0], 'library_id': lib_ids[unicode(row[1])], 'tag_id':row[2]})
90
91     sql = '''insert into samples_library_tags
92         (id, library_id, tag_id)
93         values
94         (:id, :library_id, :tag_id)'''
95     c.executemany(sql, new_rows)
96
97     c.execute('drop table old_samples_library_tags;')    
98
99
100 def convert_samples_library(c, lib_ids):
101     """
102     Change Library ID in samples_library_tags table
103     """
104     c.execute('alter table samples_library rename to old_samples_library')
105     c.execute('''CREATE TABLE "samples_library" (
106     "id" varchar(10) NOT NULL PRIMARY KEY,
107     "library_name" varchar(100) NOT NULL UNIQUE,
108     "library_species_id" integer NOT NULL REFERENCES "samples_species" ("id"),
109     "hidden" bool NOT NULL,
110     "account_number" varchar(100),
111     "cell_line_id" integer REFERENCES "samples_cellline" ("id"),
112     "condition_id" integer REFERENCES "samples_condition" ("id"),
113     "antibody_id" integer REFERENCES "samples_antibody" ("id"),
114     "replicate" smallint unsigned NOT NULL,
115     "experiment_type_id" integer NOT NULL REFERENCES "samples_experimenttype" ("id"),
116     "library_type_id" integer REFERENCES "samples_librarytype" ("id"),
117     "creation_date" date,
118     "made_for" varchar(50) NOT NULL,
119     "made_by" varchar(50) NOT NULL,
120     "stopping_point" varchar(25) NOT NULL,
121     "amplified_from_sample_id" varchar(10),
122     "undiluted_concentration" decimal,
123     "successful_pM" decimal,
124     "ten_nM_dilution" bool NOT NULL,
125     "avg_lib_size" integer,
126     "notes" text NOT NULL
127 );''')
128
129     c.execute("""
130     select library_id, library_name, library_species_id, hidden, account_number, cell_line_id,
131     condition_id, antibody_id, replicate, experiment_type_id, library_type_id,
132     creation_date, made_for, made_by, stopping_point, amplified_from_sample_id,
133     undiluted_concentration, successful_pM, ten_nM_dilution, avg_lib_size, notes
134     from old_samples_library;""")
135
136     new_rows = []
137     for row in c:
138         new_rows.append({
139         'id': row[0],
140         'library_name': row[1],
141         'library_species_id': row[2],
142         'hidden': row[3],
143         'account_number': row[4],
144         'cell_line_id': row[5],
145         'condition_id': row[6],
146         'antibody_id': row[7],
147         'replicate': row[8],
148         'experiment_type_id': row[9],
149         'library_type_id': row[10],
150         'creation_date': row[11],
151         'made_for': row[12],
152         'made_by': row[13],
153         'stopping_point': row[14],
154         'amplified_from_sample_id': row[15],
155         'undiluted_concentration': row[16],
156         'successful_pM': row[17],
157         'ten_nM_dilution': row[18],
158         'avg_lib_size': row[19],
159         'notes': row[20],
160             })
161
162     sql = '''insert into samples_library
163         (id, library_name, library_species_id, hidden, account_number, cell_line_id,
164          condition_id, antibody_id, replicate, experiment_type_id, library_type_id,
165          creation_date, made_for, made_by, stopping_point, amplified_from_sample_id,
166          undiluted_concentration, successful_pM, ten_nM_dilution, avg_lib_size, notes)
167         values
168         (:id, :library_name, :library_species_id, :hidden, :account_number, :cell_line_id,
169          :condition_id, :antibody_id, :replicate, :experiment_type_id, :library_type_id,
170          :creation_date, :made_for, :made_by, :stopping_point, :amplified_from_sample_id,
171          :undiluted_concentration, :successful_pM, :ten_nM_dilution, :avg_lib_size, :notes);
172          '''
173     c.executemany(sql, new_rows)
174
175     c.execute('drop table old_samples_library;')
176
177 def convert_library_id(db_path):
178     conn = sqlite3.connect(db_path)
179     c = conn.cursor()
180
181     lib_ids = map_library_ids(c)
182
183     convert_experiments_lane(c, lib_ids)
184     convert_samples_library_affiliations(c, lib_ids)
185     convert_samples_library_tags(c, lib_ids)
186     convert_samples_library(c, lib_ids)
187     
188     conn.commit()
189
190 def make_parser():
191     usage = '%prog: database_filename'
192     parser = optparse.OptionParser(usage)
193     return parser
194
195 def main(cmdline=None):
196     parser = make_parser()
197     opts, args = parser.parse_args(cmdline)
198     if len(args) != 1:
199        parser.error('requires path to sqlite database file')
200
201     db_path = args[0]
202
203     convert_library_id(db_path)
204
205 if __name__ == "__main__":
206     main()