2 Provide some quick and dirty access and reporting for the fctracker database.
4 The advantage to this code is that it doesn't depend on django being
5 installed, so it can run on machines other than the webserver.
12 if sys.version_info[0] + sys.version_info[1] * 0.1 >= 2.5:
16 import pysqlite2.dbapi2 as sqlite3
21 provide a simple way to interact with the flowcell data in fctracker.db
23 def __init__(self, database):
24 # default to the current directory
26 self.database = self._guess_fctracker_path()
28 self.database = database
29 self.conn = sqlite3.connect(self.database)
33 def _guess_fctracker_path(self):
35 Guess a few obvious places for the database
37 fctracker = 'fctracker.db'
39 # is it in the current dir?
40 if os.path.exists(name):
42 name = os.path.expanduser(os.path.join('~', fctracker))
43 if os.path.exists(name):
45 raise RuntimeError("Can't find fctracker")
47 def _make_dict_from_table(self, table_name, pkey_name):
49 Convert a django table into a dictionary indexed by the primary key.
50 Yes, it really does just load everything into memory, hopefully
51 we stay under a few tens of thousands of runs for a while.
54 c = self.conn.cursor()
55 c.execute('select * from %s;' % (table_name))
56 # extract just the field name
57 description = [ f[0] for f in c.description]
59 row_dict = dict(zip(description, row))
60 table[row_dict[pkey_name]] = row_dict
64 def _get_library(self):
66 attach the library dictionary to the instance
68 self.library = self._make_dict_from_table(
73 def _get_species(self):
75 attach the species dictionary to the instance
77 self.species = self._make_dict_from_table(
82 def _get_flowcells(self, where=None):
84 attach the flowcell dictionary to the instance
86 where is a sql where clause. (eg "where run_date > '2008-1-1'")
87 that can be used to limit what flowcells we select
88 FIXME: please add sanitization code
93 c = self.conn.cursor()
94 c.execute('select * from fctracker_flowcell %s;' % (where))
95 # extract just the field name
96 description = [ f[0] for f in c.description ]
98 row_dict = dict(zip(description, row))
99 for lane in [ 'lane_%d_library' % (i) for i in range(1,9) ]:
100 lane_library = self.library[row_dict[lane+"_id"]]
101 species_id = lane_library['library_species_id']
102 lane_library['library_species'] = self.species[species_id]
103 row_dict[lane] = lane_library
104 # some useful parsing
105 run_date = time.strptime(row_dict['run_date'], '%Y-%m-%d %H:%M:%S')
106 run_date = datetime.datetime(*run_date[:6])
107 row_dict['run_date'] = run_date
108 self.flowcells[row_dict['flowcell_id']] = row_dict
109 return self.flowcells
111 def recoverable_drive_report(flowcells):
113 Attempt to report what flowcells are still on a hard drive
115 def flowcell_gone(cell):
117 Use a variety of heuristics to determine if the flowcell drive
120 name = cell['flowcell_id']
123 if 'deleted' in name:
125 if 'not run' in name:
129 # sort flowcells by run date
131 for key, cell in flowcells.items():
132 flowcell_list.append( (cell['run_date'], key) )
136 line = "%(date)s %(id)s %(lane)s %(library_name)s (%(library_id)s) "
137 line += "%(species)s"
138 for run_date, flowcell_id in flowcell_list:
139 cell = flowcells[flowcell_id]
140 if flowcell_gone(cell):
143 lane = 'lane_%d' % (l)
144 cell_library = cell['%s_library'%(lane)]
146 'date': cell['run_date'].strftime('%y-%b-%d'),
147 'id': cell['flowcell_id'],
149 'library_name': cell_library['library_name'],
150 'library_id': cell['%s_library_id'%(lane)],
151 'species': cell_library['library_species']['scientific_name'],
153 report.append(line % (fields))
154 return os.linesep.join(report)