2 Provide some quick and dirty access and reporting for the fctracker database.
4 The advantage to this code is that it doesn't depend on django being
5 installed, so it can run on machines other than the webserver.
13 if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
17 import pysqlite2.dbapi2 as sqlite3
22 provide a simple way to interact with the flowcell data in fctracker.db
24 def __init__(self, database):
25 # default to the current directory
27 self.database = self._guess_fctracker_path()
29 self.database = database
30 self.conn = sqlite3.connect(self.database)
34 def _guess_fctracker_path(self):
36 Guess a few obvious places for the database
38 fctracker = 'fctracker.db'
40 # is it in the current dir?
41 if os.path.exists(name):
43 name = os.path.expanduser(os.path.join('~', fctracker))
44 if os.path.exists(name):
46 raise RuntimeError("Can't find fctracker")
48 def _make_dict_from_table(self, table_name, pkey_name):
50 Convert a django table into a dictionary indexed by the primary key.
51 Yes, it really does just load everything into memory, hopefully
52 we stay under a few tens of thousands of runs for a while.
55 c = self.conn.cursor()
56 c.execute('select * from %s;' % (table_name))
57 # extract just the field name
58 description = [ f[0] for f in c.description]
60 row_dict = dict(zip(description, row))
61 table[row_dict[pkey_name]] = row_dict
65 def _add_lanes_to_libraries(self):
67 add flowcell/lane ids to new attribute 'lanes' in the library dictionary
69 library_id_re = re.compile('lane_\d_library_id')
71 for fc_id, fc in self.flowcells.items():
72 lane_library = [ (x[0][5], x[1]) for x in fc.items()
73 if library_id_re.match(x[0]) ]
74 for lane, library_id in lane_library:
75 if not self.library[library_id].has_key('lanes'):
76 self.library[library_id]['lanes'] = []
77 self.library[library_id]['lanes'].append((fc_id, lane))
79 def _get_library(self):
81 attach the library dictionary to the instance
83 self.library = self._make_dict_from_table(
88 def _get_species(self):
90 attach the species dictionary to the instance
92 self.species = self._make_dict_from_table(
97 def _get_flowcells(self, where=None):
99 attach the flowcell dictionary to the instance
101 where is a sql where clause. (eg "where run_date > '2008-1-1'")
102 that can be used to limit what flowcells we select
103 FIXME: please add sanitization code
108 c = self.conn.cursor()
109 c.execute('select * from experiments_flowcell %s;' % (where))
110 # extract just the field name
111 description = [ f[0] for f in c.description ]
113 row_dict = dict(zip(description, row))
114 fcid, status = self._parse_flowcell_id(row_dict)
115 row_dict['flowcell_id'] = fcid
116 row_dict['flowcell_status'] = status
118 for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
119 lane_library = self.library[row_dict[lane+"_id"]]
120 species_id = lane_library['library_species_id']
121 lane_library['library_species'] = self.species[species_id]
122 row_dict[lane] = lane_library
123 # some useful parsing
124 run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S')
125 run_date = datetime.datetime(*run_date[:6])
126 row_dict['run_date'] = run_date
127 self.flowcells[row_dict['flowcell_id']] = row_dict
129 self._add_lanes_to_libraries()
130 return self.flowcells
132 def _parse_flowcell_id(self, flowcell_row):
134 Return flowcell id and status
136 We stored the status information in the flowcell id name.
137 this was dumb, but database schemas are hard to update.
139 fields = flowcell_row['flowcell_id'].split()
149 def flowcell_gone(cell):
151 Use a variety of heuristics to determine if the flowcell drive
154 status = cell['flowcell_status']
157 failures = ['failed', 'deleted', 'not run']
159 if re.search(f, status):
164 def recoverable_drive_report(flowcells):
166 Attempt to report what flowcells are still on a hard drive
168 def format_status(status):
174 # sort flowcells by run date
176 for key, cell in flowcells.items():
177 flowcell_list.append( (cell['run_date'], key) )
181 line = "%(date)s %(id)s %(status)s%(lane)s %(library_name)s (%(library_id)s) "
182 line += "%(species)s"
183 for run_date, flowcell_id in flowcell_list:
184 cell = flowcells[flowcell_id]
185 if flowcell_gone(cell):
188 lane = 'lane_%d' % (l)
189 cell_library = cell['%s_library'%(lane)]
191 'date': cell['run_date'].strftime('%y-%b-%d'),
192 'id': cell['flowcell_id'],
194 'library_name': cell_library['library_name'],
195 'library_id': cell['%s_library_id'%(lane)],
196 'species': cell_library['library_species']['scientific_name'],
197 'status': format_status(cell['flowcell_status']),
199 report.append(line % (fields))
200 return os.linesep.join(report)