9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init, pre_save
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 LOGGER = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 LOGGER.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
32 "defaulting to %s" % (RESCAN_DELAY,))
34 RUN_STATUS_CHOICES = (
35 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
36 (1, 'Data Pipeline Started'),
37 (2, 'Data Pipeline Interrupted'),
38 (3, 'Data Pipeline Finished'),
39 (4, 'Collect Results Started'),
40 (5, 'Collect Results Finished'),
45 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
48 class ClusterStation(models.Model):
49 """List of cluster stations"""
50 name = models.CharField(max_length=50, unique=True)
51 isdefault = models.BooleanField(default=False, null=False)
54 ordering = ["-isdefault", "name"]
56 def __unicode__(self):
57 return unicode(self.name)
61 d = cls.objects.filter(isdefault=True).all()
64 d = cls.objects.order_by('-id').all()
70 def update_isdefault(sender, instance, **kwargs):
71 """Clear default if needed
73 if instance.isdefault:
74 for c in ClusterStation.objects.all():
75 if c.id != instance.id:
79 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
81 class Sequencer(models.Model):
82 """Sequencers we've owned
84 name = models.CharField(max_length=50, db_index=True)
85 instrument_name = models.CharField(max_length=50, db_index=True)
86 serial_number = models.CharField(max_length=50, db_index=True)
87 model = models.CharField(max_length=255)
88 active = models.BooleanField(default=True, null=False)
89 isdefault = models.BooleanField(default=False, null=False)
90 comment = models.CharField(max_length=255)
93 ordering = ["-isdefault", "-active", "name"]
95 def __unicode__(self):
96 name = [unicode(self.name)]
97 if self.instrument_name is not None:
98 name.append("(%s)" % (unicode(self.instrument_name),))
102 def get_absolute_url(self):
103 return ('htsworkflow.frontend.experiments.views.sequencer',
108 d = cls.objects.filter(isdefault=True).all()
111 d = cls.objects.order_by('active', '-id').all()
117 def update_isdefault(sender, instance, **kwargs):
118 """Clear default if needed
120 if instance.isdefault:
121 for s in Sequencer.objects.all():
122 if s.id != instance.id:
126 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
129 class FlowCell(models.Model):
130 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
131 run_date = models.DateTimeField()
132 advanced_run = models.BooleanField(default=False)
133 paired_end = models.BooleanField(default=False)
134 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
135 control_lane = models.IntegerField(choices=[(1, 1),
147 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
148 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
150 notes = models.TextField(blank=True)
152 def __unicode__(self):
153 return unicode(self.flowcell_id)
157 for lane in self.lane_set.order_by('lane_number'):
158 cluster_estimate = lane.cluster_estimate
159 if cluster_estimate is not None:
160 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
162 cluster_estimate = 'None'
163 library_id = lane.library_id
164 library = lane.library
165 element = '<tr><td>%d</td>'\
166 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
167 html.append(element % (lane.lane_number,
168 library.get_admin_url(),
171 html.append('</table>')
172 return "\n".join(html)
173 Lanes.allow_tags = True
176 ordering = ["-run_date"]
178 def get_admin_url(self):
179 # that's the django way... except it didn't work
180 return urlresolvers.reverse('admin:experiments_flowcell_change',
183 def flowcell_type(self):
184 """Convert our boolean 'is paired' flag to a name
192 def get_absolute_url(self):
193 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
194 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
197 def get_raw_data_directory(self):
198 """Return location of where the raw data is stored"""
199 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
201 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
203 def update_data_runs(self):
204 result_root = self.get_raw_data_directory()
205 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
206 if result_root is None:
209 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
210 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
212 dataruns = dict([(x.result_dir, x) for x in self.datarun_set.all()])
215 for dirpath, dirnames, filenames in os.walk(result_root):
216 for filename in filenames:
217 if run_xml_re.match(filename):
218 # we have a run directory
219 relative_pathname = get_relative_pathname(dirpath)
220 cached_run = dataruns.get(relative_pathname, None)
221 now = datetime.datetime.now()
222 if (cached_run is None):
223 self.import_data_run(relative_pathname, filename)
224 elif (now - cached_run.last_update_time).days > \
226 self.import_data_run(relative_pathname,
227 filename, cached_run)
229 def import_data_run(self, relative_pathname, run_xml_name, run=None):
230 """Given a result directory import files"""
231 run_dir = get_absolute_pathname(relative_pathname)
232 run_xml_path = os.path.join(run_dir, run_xml_name)
233 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
234 LOGGER.debug("Importing run from %s" % (relative_pathname,))
239 run.status = RUN_STATUS_REVERSE_MAP['DONE']
240 run.result_dir = relative_pathname
241 run.runfolder_name = run_xml_data.runfolder_name
242 run.cycle_start = run_xml_data.image_analysis.start
243 run.cycle_stop = run_xml_data.image_analysis.stop
244 run.run_start_time = run_xml_data.image_analysis.date
245 run.image_software = run_xml_data.image_analysis.software
246 run.image_version = run_xml_data.image_analysis.version
247 run.basecall_software = run_xml_data.bustard.software
248 run.basecall_version = run_xml_data.bustard.version
249 run.alignment_software = run_xml_data.gerald.software
250 run.alignment_version = run_xml_data.gerald.version
252 run.last_update_time = datetime.datetime.now()
255 run.update_result_files()
258 # FIXME: should we automatically update dataruns?
259 # Or should we expect someone to call update_data_runs?
260 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
261 # """Update our dataruns
263 # if not os.path.exists(settings.RESULT_HOME_DIR):
266 # instance.update_data_runs()
267 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
270 LANE_STATUS_CODES = [(0, 'Failed'),
273 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
274 LANE_STATUS_MAP[None] = "Unknown"
277 def is_valid_lane(value):
278 if value >= 1 and value <= 8:
284 class Lane(models.Model):
285 flowcell = models.ForeignKey(FlowCell)
286 lane_number = models.IntegerField()
287 library = models.ForeignKey(Library)
288 pM = models.DecimalField(max_digits=5,
293 cluster_estimate = models.IntegerField(blank=True, null=True)
294 status = models.IntegerField(choices=LANE_STATUS_CODES,
297 comment = models.TextField(null=True, blank=True)
300 def get_absolute_url(self):
301 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
304 def __unicode__(self):
305 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
308 class DataRun(models.Model):
309 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
310 runfolder_name = models.CharField(max_length=50)
311 result_dir = models.CharField(max_length=255)
312 last_update_time = models.DateTimeField()
313 run_start_time = models.DateTimeField()
314 cycle_start = models.IntegerField(null=True, blank=True)
315 cycle_stop = models.IntegerField(null=True, blank=True)
316 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
317 null=True, blank=True)
318 image_software = models.CharField(max_length=50)
319 image_version = models.CharField(max_length=50)
320 basecall_software = models.CharField(max_length=50)
321 basecall_version = models.CharField(max_length=50)
322 alignment_software = models.CharField(max_length=50)
323 alignment_version = models.CharField(max_length=50)
324 comment = models.TextField(blank=True)
326 def update_result_files(self):
327 abs_result_dir = get_absolute_pathname(self.result_dir)
329 for dirname, dirnames, filenames in os.walk(abs_result_dir):
330 for filename in filenames:
331 pathname = os.path.join(dirname, filename)
332 relative_pathname = get_relative_pathname(pathname)
333 datafiles = self.datafile_set.filter(
335 relative_pathname=relative_pathname)
336 if len(datafiles) > 0:
339 metadata = find_file_type_metadata_from_filename(filename)
340 if metadata is not None:
341 metadata['filename'] = filename
343 newfile.data_run = self
344 newfile.file_type = metadata['file_type']
345 newfile.relative_pathname = relative_pathname
347 lane_number = metadata.get('lane', None)
348 if lane_number is not None:
349 lane = self.flowcell.lane_set.get(
350 lane_number=lane_number)
351 newfile.library = lane.library
353 self.datafile_set.add(newfile)
355 self.last_update_time = datetime.datetime.now()
357 def lane_files(self):
360 for datafile in self.datafile_set.all():
361 metadata = datafile.attributes
362 if metadata is not None:
363 lane = metadata.get('lane', None)
365 lane_file_set = lanes.setdefault(lane, {})
366 normalized_name = datafile.file_type.normalized_name
367 lane_file_set[normalized_name] = datafile
370 def ivc_plots(self, lane):
371 ivc_name = ['IVC All', 'IVC Call',
372 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
375 for rel_filename, metadata in self.get_result_files():
376 if metadata.file_type.name in ivc_name:
377 plots[metadata.file_type.name] = (rel_filename, metadata)
380 class FileType(models.Model):
381 """Represent potential file types
383 regex is a pattern used to detect if a filename matches this type
384 data run currently assumes that there may be a (?P<lane>) and
385 (?P<end>) pattern in the regular expression.
387 name = models.CharField(max_length=50)
388 mimetype = models.CharField(max_length=50, null=True, blank=True)
389 # regular expression from glob.fnmatch.translate
390 regex = models.CharField(max_length=50, null=True, blank=True)
392 def parse_filename(self, pathname):
393 """Does filename match our pattern?
395 Returns None if not, or dictionary of match variables if we do.
397 path, filename = os.path.split(pathname)
398 if len(self.regex) > 0:
399 match = re.match(self.regex, filename)
400 if match is not None:
401 # These are (?P<>) names we know about from our
403 results = match.groupdict()
405 # convert int parameters
406 for attribute_name in ['lane', 'end']:
407 value = results.get(attribute_name, None)
408 if value is not None:
409 results[attribute_name] = int(value)
413 def _get_normalized_name(self):
414 """Crush data file name into identifier friendly name"""
415 return self.name.replace(' ', '_').lower()
416 normalized_name = property(_get_normalized_name)
418 def __unicode__(self):
419 #return u"<FileType: %s>" % (self.name,)
424 """Helper function to set default UUID in DataFile"""
425 return str(uuid.uuid1())
428 class DataFile(models.Model):
429 """Store map from random ID to filename"""
430 random_key = models.CharField(max_length=64,
433 data_run = models.ForeignKey(DataRun, db_index=True)
434 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
435 file_type = models.ForeignKey(FileType)
436 relative_pathname = models.CharField(max_length=255, db_index=True)
438 def _get_attributes(self):
439 return self.file_type.parse_filename(self.relative_pathname)
440 attributes = property(_get_attributes)
442 def _get_pathname(self):
443 return get_absolute_pathname(self.relative_pathname)
444 pathname = property(_get_pathname)
447 def get_absolute_url(self):
448 return ('htsworkflow.frontend.experiments.views.read_result_file',
449 (), {'key': self.random_key})
452 def find_file_type_metadata_from_filename(pathname):
453 path, filename = os.path.split(pathname)
455 for file_type in FileType.objects.all():
456 result = file_type.parse_filename(filename)
457 if result is not None:
458 result['file_type'] = file_type
464 def get_relative_pathname(abspath):
465 """Strip off the result home directory from a path
467 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
468 relative_pathname = abspath.replace(result_home_dir, '')
469 return relative_pathname
472 def get_absolute_pathname(relative_pathname):
473 """Attach relative path to results home directory"""
474 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)