9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.utils import timezone
13 from django.db import models
14 from django.db.models.signals import post_init, pre_save
16 from htsworkflow.frontend.samples.models import Library
17 from htsworkflow.util.conversion import parse_flowcell_id
18 from htsworkflow.pipelines import runfolder
22 LOGGER = logging.getLogger(__name__)
25 default_pM = int(settings.DEFAULT_PM)
26 except AttributeError, e:
27 LOGGER.error("invalid value for frontend.default_pm")
29 # how many days to wait before trying to re-import a runfolder
32 RESCAN_DELAY = int(settings.RESCAN_DELAY)
33 except (ValueError, AttributeError):
34 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
35 "defaulting to %s" % (RESCAN_DELAY,))
37 RUN_STATUS_CHOICES = (
38 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
39 (1, 'Data Pipeline Started'),
40 (2, 'Data Pipeline Interrupted'),
41 (3, 'Data Pipeline Finished'),
42 (4, 'Collect Results Started'),
43 (5, 'Collect Results Finished'),
48 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
51 class ClusterStation(models.Model):
52 """List of cluster stations"""
53 name = models.CharField(max_length=50, unique=True)
54 isdefault = models.BooleanField(default=False, null=False)
57 ordering = ["-isdefault", "name"]
59 def __unicode__(self):
60 return unicode(self.name)
64 d = cls.objects.filter(isdefault=True).all()
67 d = cls.objects.order_by('-id').all()
73 def update_isdefault(sender, instance, **kwargs):
74 """Clear default if needed
76 if instance.isdefault:
77 for c in ClusterStation.objects.filter(isdefault=True).all():
78 if c.id != instance.id:
82 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
84 class Sequencer(models.Model):
85 """Sequencers we've owned
87 name = models.CharField(max_length=50, db_index=True)
88 instrument_name = models.CharField(max_length=50, db_index=True)
89 serial_number = models.CharField(max_length=50, db_index=True)
90 model = models.CharField(max_length=255)
91 active = models.BooleanField(default=True, null=False)
92 isdefault = models.BooleanField(default=False, null=False)
93 comment = models.CharField(max_length=255)
96 ordering = ["-isdefault", "-active", "name"]
98 def __unicode__(self):
99 name = [unicode(self.name)]
100 if self.instrument_name is not None:
101 name.append("(%s)" % (unicode(self.instrument_name),))
102 return " ".join(name)
105 def get_absolute_url(self):
106 return ('htsworkflow.frontend.experiments.views.sequencer',
111 d = cls.objects.filter(isdefault=True).all()
114 d = cls.objects.order_by('active', '-id').all()
120 def update_isdefault(sender, instance, **kwargs):
121 """Clear default if needed
123 if instance.isdefault:
124 for s in Sequencer.objects.filter(isdefault=True).all():
125 if s.id != instance.id:
129 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
131 class FlowCellModel(models.Model):
132 name = models.TextField()
133 fixed_time = models.IntegerField(default=0, help_text='(seconds)')
134 per_cycle_time = models.IntegerField(default=0, help_text='(seconds)')
135 isdefault = models.BooleanField(default=False)
139 d = cls.objects.filter(isdefault=True).all()
145 def update_isdefault(sender, instance, **kwargs):
146 """Clear default if needed
148 if instance.isdefault:
149 for s in FlowCellType.objects.filter(isdefault=True).all():
150 if s.id != instance.id:
154 pre_save.connect(FlowCellModel.update_isdefault, sender=FlowCellModel)
156 class FlowCell(models.Model):
157 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
158 run_date = models.DateTimeField()
159 advanced_run = models.BooleanField(default=False)
160 paired_end = models.BooleanField(default=False)
161 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
162 control_lane = models.IntegerField(choices=[(1, 1),
174 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
175 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
176 flowcell_model = models.ForeignKey(FlowCellModel, default=FlowCellModel.default)
178 notes = models.TextField(blank=True)
180 def __unicode__(self):
181 return unicode(self.flowcell_id)
185 for lane in self.lane_set.order_by('lane_number'):
186 cluster_estimate = lane.cluster_estimate
187 if cluster_estimate is not None:
188 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
190 cluster_estimate = 'None'
191 library_id = lane.library_id
192 library = lane.library
193 element = '<tr><td>%d</td>'\
194 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
195 html.append(element % (lane.lane_number,
196 library.get_admin_url(),
199 html.append('</table>')
200 return "\n".join(html)
201 Lanes.allow_tags = True
204 ordering = ["-run_date"]
206 def get_admin_url(self):
207 # that's the django way... except it didn't work
208 return urlresolvers.reverse('admin:experiments_flowcell_change',
211 def flowcell_type(self):
212 """Convert our boolean 'is paired' flag to a name
220 def get_absolute_url(self):
221 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
222 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
225 def get_raw_data_directory(self):
226 """Return location of where the raw data is stored"""
227 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
229 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
231 def update_data_runs(self):
232 result_root = self.get_raw_data_directory()
233 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
234 if result_root is None:
237 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
238 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
241 for dirpath, dirnames, filenames in os.walk(result_root):
242 for filename in filenames:
243 if run_xml_re.match(filename):
244 # we have a run directory
245 relative_pathname = get_relative_pathname(dirpath)
246 self.import_data_run(relative_pathname, filename)
248 def import_data_run(self, relative_pathname, run_xml_name, force=False):
249 """Given a result directory import files"""
251 run_dir = get_absolute_pathname(relative_pathname)
252 run_xml_path = os.path.join(run_dir, run_xml_name)
254 runs = DataRun.objects.filter(result_dir = relative_pathname)
259 raise RuntimeError("Too many data runs for %s" % (
265 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
266 LOGGER.debug("Importing run from %s" % (relative_pathname,))
267 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
269 run.status = RUN_STATUS_REVERSE_MAP['DONE']
270 run.result_dir = relative_pathname
271 run.runfolder_name = run_xml_data.runfolder_name
272 run.cycle_start = run_xml_data.image_analysis.start
273 run.cycle_stop = run_xml_data.image_analysis.stop
274 naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal())
275 run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time)
276 run.image_software = run_xml_data.image_analysis.software
277 run.image_version = run_xml_data.image_analysis.version
278 run.basecall_software = run_xml_data.bustard.software
279 run.basecall_version = run_xml_data.bustard.version
280 # we're frequently not running alignments
281 if run_xml_data.gerald:
282 run.alignment_software = run_xml_data.gerald.software
283 run.alignment_version = run_xml_data.gerald.version
285 run.last_update_time = timezone.now()
288 run.update_result_files()
291 # FIXME: should we automatically update dataruns?
292 # Or should we expect someone to call update_data_runs?
293 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
294 # """Update our dataruns
296 # if not os.path.exists(settings.RESULT_HOME_DIR):
299 # instance.update_data_runs()
300 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
303 LANE_STATUS_CODES = [(0, 'Failed'),
306 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
307 LANE_STATUS_MAP[None] = "Unknown"
310 def is_valid_lane(value):
311 if value >= 1 and value <= 8:
317 class Lane(models.Model):
318 flowcell = models.ForeignKey(FlowCell)
319 lane_number = models.IntegerField()
320 library = models.ForeignKey(Library)
321 pM = models.DecimalField(max_digits=5,
326 cluster_estimate = models.IntegerField(blank=True, null=True)
327 status = models.IntegerField(choices=LANE_STATUS_CODES,
330 comment = models.TextField(null=True, blank=True)
333 def get_absolute_url(self):
334 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
337 def __unicode__(self):
338 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
341 class DataRun(models.Model):
342 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
343 runfolder_name = models.CharField(max_length=50)
344 result_dir = models.CharField(max_length=255)
345 last_update_time = models.DateTimeField()
346 run_start_time = models.DateTimeField()
347 cycle_start = models.IntegerField(null=True, blank=True)
348 cycle_stop = models.IntegerField(null=True, blank=True)
349 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
350 null=True, blank=True)
351 image_software = models.CharField(max_length=50)
352 image_version = models.CharField(max_length=50)
353 basecall_software = models.CharField(max_length=50)
354 basecall_version = models.CharField(max_length=50)
355 alignment_software = models.CharField(max_length=50)
356 alignment_version = models.CharField(max_length=50)
357 comment = models.TextField(blank=True)
359 def update_result_files(self):
360 abs_result_dir = get_absolute_pathname(self.result_dir)
362 for dirname, dirnames, filenames in os.walk(abs_result_dir):
363 for filename in filenames:
364 pathname = os.path.join(dirname, filename)
365 relative_pathname = get_relative_pathname(pathname)
366 datafiles = self.datafile_set.filter(
368 relative_pathname=relative_pathname)
369 if len(datafiles) > 0:
372 metadata = find_file_type_metadata_from_filename(filename)
373 if metadata is not None:
374 metadata['filename'] = filename
376 newfile.data_run = self
377 newfile.file_type = metadata['file_type']
378 newfile.relative_pathname = relative_pathname
380 lane_number = metadata.get('lane', None)
381 if lane_number is not None:
382 lane = self.flowcell.lane_set.get(
383 lane_number=lane_number)
384 newfile.library = lane.library
386 self.datafile_set.add(newfile)
388 self.last_update_time = timezone.now()
390 def lane_files(self):
393 for datafile in self.datafile_set.all():
394 metadata = datafile.attributes
395 if metadata is not None:
396 lane = metadata.get('lane', None)
398 lane_file_set = lanes.setdefault(lane, {})
399 normalized_name = datafile.file_type.normalized_name
400 lane_file_set[normalized_name] = datafile
403 def ivc_plots(self, lane):
404 ivc_name = ['IVC All', 'IVC Call',
405 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
408 for rel_filename, metadata in self.get_result_files():
409 if metadata.file_type.name in ivc_name:
410 plots[metadata.file_type.name] = (rel_filename, metadata)
413 class FileType(models.Model):
414 """Represent potential file types
416 regex is a pattern used to detect if a filename matches this type
417 data run currently assumes that there may be a (?P<lane>) and
418 (?P<end>) pattern in the regular expression.
420 name = models.CharField(max_length=50)
421 mimetype = models.CharField(max_length=50, null=True, blank=True)
422 # regular expression from glob.fnmatch.translate
423 regex = models.TextField(null=True, blank=True)
425 def parse_filename(self, pathname):
426 """Does filename match our pattern?
428 Returns None if not, or dictionary of match variables if we do.
430 path, filename = os.path.split(pathname)
431 if len(self.regex) > 0:
432 match = re.match(self.regex, filename)
433 if match is not None:
434 # These are (?P<>) names we know about from our
436 results = match.groupdict()
438 # convert int parameters
439 for attribute_name in ['lane', 'end']:
440 value = results.get(attribute_name, None)
441 if value is not None:
442 results[attribute_name] = int(value)
446 def _get_normalized_name(self):
447 """Crush data file name into identifier friendly name"""
448 return self.name.replace(' ', '_').lower()
449 normalized_name = property(_get_normalized_name)
451 def __unicode__(self):
452 #return u"<FileType: %s>" % (self.name,)
457 """Helper function to set default UUID in DataFile"""
458 return str(uuid.uuid1())
461 class DataFile(models.Model):
462 """Store map from random ID to filename"""
463 random_key = models.CharField(max_length=64,
466 data_run = models.ForeignKey(DataRun, db_index=True)
467 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
468 file_type = models.ForeignKey(FileType)
469 relative_pathname = models.CharField(max_length=255, db_index=True)
471 def _get_attributes(self):
472 return self.file_type.parse_filename(self.relative_pathname)
473 attributes = property(_get_attributes)
475 def _get_pathname(self):
476 return get_absolute_pathname(self.relative_pathname)
477 pathname = property(_get_pathname)
480 def get_absolute_url(self):
481 return ('htsworkflow.frontend.experiments.views.read_result_file',
482 (), {'key': self.random_key})
485 def find_file_type_metadata_from_filename(pathname):
486 path, filename = os.path.split(pathname)
488 for file_type in FileType.objects.all():
489 result = file_type.parse_filename(filename)
490 if result is not None:
491 result['file_type'] = file_type
497 def get_relative_pathname(abspath):
498 """Strip off the result home directory from a path
500 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
501 relative_pathname = abspath.replace(result_home_dir, '')
502 return relative_pathname
505 def get_absolute_pathname(relative_pathname):
506 """Attach relative path to results home directory"""
507 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)