9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.utils import timezone
13 from django.db import models
14 from django.db.models.signals import post_init, pre_save
16 from htsworkflow.frontend.samples.models import Library
17 from htsworkflow.util.conversion import parse_flowcell_id
18 from htsworkflow.pipelines import runfolder
22 LOGGER = logging.getLogger(__name__)
25 default_pM = int(settings.DEFAULT_PM)
26 except AttributeError, e:
27 LOGGER.error("invalid value for frontend.default_pm")
29 # how many days to wait before trying to re-import a runfolder
32 RESCAN_DELAY = int(settings.RESCAN_DELAY)
33 except (ValueError, AttributeError):
34 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
35 "defaulting to %s" % (RESCAN_DELAY,))
37 RUN_STATUS_CHOICES = (
38 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
39 (1, 'Data Pipeline Started'),
40 (2, 'Data Pipeline Interrupted'),
41 (3, 'Data Pipeline Finished'),
42 (4, 'Collect Results Started'),
43 (5, 'Collect Results Finished'),
48 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
51 class ClusterStation(models.Model):
52 """List of cluster stations"""
53 name = models.CharField(max_length=50, unique=True)
54 isdefault = models.BooleanField(default=False, null=False)
57 ordering = ["-isdefault", "name"]
59 def __unicode__(self):
60 return unicode(self.name)
64 d = cls.objects.filter(isdefault=True).all()
67 d = cls.objects.order_by('-id').all()
73 def update_isdefault(sender, instance, **kwargs):
74 """Clear default if needed
76 if instance.isdefault:
77 for c in ClusterStation.objects.filter(isdefault=True).all():
78 if c.id != instance.id:
82 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
84 class Sequencer(models.Model):
85 """Sequencers we've owned
87 name = models.CharField(max_length=50, db_index=True)
88 instrument_name = models.CharField(max_length=50, db_index=True)
89 serial_number = models.CharField(max_length=50, db_index=True)
90 model = models.CharField(max_length=255)
91 active = models.BooleanField(default=True, null=False)
92 isdefault = models.BooleanField(default=False, null=False)
93 comment = models.CharField(max_length=255)
96 ordering = ["-isdefault", "-active", "name"]
98 def __unicode__(self):
99 name = [unicode(self.name)]
100 if self.instrument_name is not None:
101 name.append("(%s)" % (unicode(self.instrument_name),))
102 return " ".join(name)
105 def get_absolute_url(self):
106 return ('htsworkflow.frontend.experiments.views.sequencer',
111 d = cls.objects.filter(isdefault=True).all()
114 d = cls.objects.order_by('active', '-id').all()
120 def update_isdefault(sender, instance, **kwargs):
121 """Clear default if needed
123 if instance.isdefault:
124 for s in Sequencer.objects.filter(isdefault=True).all():
125 if s.id != instance.id:
129 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
131 class FlowCellModel(models.Model):
132 name = models.TextField()
133 fixed_time = models.IntegerField(default=0, help_text='(seconds)')
134 per_cycle_time = models.IntegerField(default=0, help_text='(seconds)')
135 isdefault = models.BooleanField(default=False)
137 def __unicode__(self):
138 return unicode(self.name)
142 d = cls.objects.filter(isdefault=True).all()
148 def update_isdefault(sender, instance, **kwargs):
149 """Clear default if needed
151 if instance.isdefault:
152 for s in FlowCellType.objects.filter(isdefault=True).all():
153 if s.id != instance.id:
157 pre_save.connect(FlowCellModel.update_isdefault, sender=FlowCellModel)
159 class FlowCell(models.Model):
160 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
161 run_date = models.DateTimeField()
162 advanced_run = models.BooleanField(default=False)
163 paired_end = models.BooleanField(default=False)
164 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
165 control_lane = models.IntegerField(choices=[(1, 1),
177 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
178 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
179 flowcell_model = models.ForeignKey(FlowCellModel, default=FlowCellModel.default)
181 notes = models.TextField(blank=True)
183 def __unicode__(self):
184 return unicode(self.flowcell_id)
188 for lane in self.lane_set.order_by('lane_number'):
189 cluster_estimate = lane.cluster_estimate
190 if cluster_estimate is not None:
191 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
193 cluster_estimate = 'None'
194 library_id = lane.library_id
195 library = lane.library
196 element = '<tr><td>%d</td>'\
197 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
198 html.append(element % (lane.lane_number,
199 library.get_admin_url(),
202 html.append('</table>')
203 return "\n".join(html)
204 Lanes.allow_tags = True
207 ordering = ["-run_date"]
209 def get_admin_url(self):
210 # that's the django way... except it didn't work
211 return urlresolvers.reverse('admin:experiments_flowcell_change',
214 def flowcell_type(self):
215 """Convert our boolean 'is paired' flag to a name
223 def get_absolute_url(self):
224 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
225 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
228 def get_raw_data_directory(self):
229 """Return location of where the raw data is stored"""
230 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
232 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
234 def update_data_runs(self):
235 result_root = self.get_raw_data_directory()
236 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
237 if result_root is None:
240 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
241 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
244 for dirpath, dirnames, filenames in os.walk(result_root):
245 for filename in filenames:
246 if run_xml_re.match(filename):
247 # we have a run directory
248 relative_pathname = get_relative_pathname(dirpath)
249 self.import_data_run(relative_pathname, filename)
251 def import_data_run(self, relative_pathname, run_xml_name, force=False):
252 """Given a result directory import files"""
254 run_dir = get_absolute_pathname(relative_pathname)
255 run_xml_path = os.path.join(run_dir, run_xml_name)
257 runs = DataRun.objects.filter(result_dir = relative_pathname)
262 raise RuntimeError("Too many data runs for %s" % (
268 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
269 LOGGER.debug("Importing run from %s" % (relative_pathname,))
270 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
272 run.status = RUN_STATUS_REVERSE_MAP['DONE']
273 run.result_dir = relative_pathname
274 run.runfolder_name = run_xml_data.runfolder_name
275 run.cycle_start = run_xml_data.image_analysis.start
276 run.cycle_stop = run_xml_data.image_analysis.stop
277 naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal())
278 run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time)
279 run.image_software = run_xml_data.image_analysis.software
280 run.image_version = run_xml_data.image_analysis.version
281 run.basecall_software = run_xml_data.bustard.software
282 run.basecall_version = run_xml_data.bustard.version
283 # we're frequently not running alignments
284 if run_xml_data.gerald:
285 run.alignment_software = run_xml_data.gerald.software
286 run.alignment_version = run_xml_data.gerald.version
288 run.last_update_time = timezone.now()
291 run.update_result_files()
294 # FIXME: should we automatically update dataruns?
295 # Or should we expect someone to call update_data_runs?
296 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
297 # """Update our dataruns
299 # if not os.path.exists(settings.RESULT_HOME_DIR):
302 # instance.update_data_runs()
303 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
306 LANE_STATUS_CODES = [(0, 'Failed'),
309 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
310 LANE_STATUS_MAP[None] = "Unknown"
313 def is_valid_lane(value):
314 if value >= 1 and value <= 8:
320 class Lane(models.Model):
321 flowcell = models.ForeignKey(FlowCell)
322 lane_number = models.IntegerField()
323 library = models.ForeignKey(Library)
324 pM = models.DecimalField(max_digits=5,
329 cluster_estimate = models.IntegerField(blank=True, null=True)
330 status = models.IntegerField(choices=LANE_STATUS_CODES,
333 comment = models.TextField(null=True, blank=True)
336 def get_absolute_url(self):
337 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
340 def __unicode__(self):
341 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
344 class DataRun(models.Model):
345 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
346 runfolder_name = models.CharField(max_length=50)
347 result_dir = models.CharField(max_length=255)
348 last_update_time = models.DateTimeField()
349 run_start_time = models.DateTimeField()
350 cycle_start = models.IntegerField(null=True, blank=True)
351 cycle_stop = models.IntegerField(null=True, blank=True)
352 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
353 null=True, blank=True)
354 image_software = models.CharField(max_length=50)
355 image_version = models.CharField(max_length=50)
356 basecall_software = models.CharField(max_length=50)
357 basecall_version = models.CharField(max_length=50)
358 alignment_software = models.CharField(max_length=50)
359 alignment_version = models.CharField(max_length=50)
360 comment = models.TextField(blank=True)
362 def update_result_files(self):
363 abs_result_dir = get_absolute_pathname(self.result_dir)
365 for dirname, dirnames, filenames in os.walk(abs_result_dir):
366 for filename in filenames:
367 pathname = os.path.join(dirname, filename)
368 relative_pathname = get_relative_pathname(pathname)
369 datafiles = self.datafile_set.filter(
371 relative_pathname=relative_pathname)
372 if len(datafiles) > 0:
375 metadata = find_file_type_metadata_from_filename(filename)
376 if metadata is not None:
377 metadata['filename'] = filename
379 newfile.data_run = self
380 newfile.file_type = metadata['file_type']
381 newfile.relative_pathname = relative_pathname
383 lane_number = metadata.get('lane', None)
384 if lane_number is not None:
385 lane = self.flowcell.lane_set.get(
386 lane_number=lane_number)
387 newfile.library = lane.library
389 self.datafile_set.add(newfile)
391 self.last_update_time = timezone.now()
393 def lane_files(self):
396 for datafile in self.datafile_set.all():
397 metadata = datafile.attributes
398 if metadata is not None:
399 lane = metadata.get('lane', None)
401 lane_file_set = lanes.setdefault(lane, {})
402 normalized_name = datafile.file_type.normalized_name
403 lane_file_set[normalized_name] = datafile
406 def ivc_plots(self, lane):
407 ivc_name = ['IVC All', 'IVC Call',
408 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
411 for rel_filename, metadata in self.get_result_files():
412 if metadata.file_type.name in ivc_name:
413 plots[metadata.file_type.name] = (rel_filename, metadata)
416 class FileType(models.Model):
417 """Represent potential file types
419 regex is a pattern used to detect if a filename matches this type
420 data run currently assumes that there may be a (?P<lane>) and
421 (?P<end>) pattern in the regular expression.
423 name = models.CharField(max_length=50)
424 mimetype = models.CharField(max_length=50, null=True, blank=True)
425 # regular expression from glob.fnmatch.translate
426 regex = models.TextField(null=True, blank=True)
428 def parse_filename(self, pathname):
429 """Does filename match our pattern?
431 Returns None if not, or dictionary of match variables if we do.
433 path, filename = os.path.split(pathname)
434 if len(self.regex) > 0:
435 match = re.match(self.regex, filename)
436 if match is not None:
437 # These are (?P<>) names we know about from our
439 results = match.groupdict()
441 # convert int parameters
442 for attribute_name in ['lane', 'end']:
443 value = results.get(attribute_name, None)
444 if value is not None:
445 results[attribute_name] = int(value)
449 def _get_normalized_name(self):
450 """Crush data file name into identifier friendly name"""
451 return self.name.replace(' ', '_').lower()
452 normalized_name = property(_get_normalized_name)
454 def __unicode__(self):
455 #return u"<FileType: %s>" % (self.name,)
460 """Helper function to set default UUID in DataFile"""
461 return str(uuid.uuid1())
464 class DataFile(models.Model):
465 """Store map from random ID to filename"""
466 random_key = models.CharField(max_length=64,
469 data_run = models.ForeignKey(DataRun, db_index=True)
470 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
471 file_type = models.ForeignKey(FileType)
472 relative_pathname = models.CharField(max_length=255, db_index=True)
474 def _get_attributes(self):
475 return self.file_type.parse_filename(self.relative_pathname)
476 attributes = property(_get_attributes)
478 def _get_pathname(self):
479 return get_absolute_pathname(self.relative_pathname)
480 pathname = property(_get_pathname)
483 def get_absolute_url(self):
484 return ('htsworkflow.frontend.experiments.views.read_result_file',
485 (), {'key': self.random_key})
488 def find_file_type_metadata_from_filename(pathname):
489 path, filename = os.path.split(pathname)
491 for file_type in FileType.objects.all():
492 result = file_type.parse_filename(filename)
493 if result is not None:
494 result['file_type'] = file_type
500 def get_relative_pathname(abspath):
501 """Strip off the result home directory from a path
503 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
504 relative_pathname = abspath.replace(result_home_dir, '')
505 return relative_pathname
508 def get_absolute_pathname(relative_pathname):
509 """Attach relative path to results home directory"""
510 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)