10 from django.conf import settings
11 from django.core.exceptions import ObjectDoesNotExist
12 from django.core import urlresolvers
13 from django.utils import timezone
14 from django.db import models
15 from django.db.models.signals import post_init, pre_save
17 from htsworkflow.frontend.samples.models import Library
18 from htsworkflow.util.conversion import parse_flowcell_id
19 from htsworkflow.pipelines import runfolder
23 LOGGER = logging.getLogger(__name__)
26 default_pM = int(settings.DEFAULT_PM)
27 except AttributeError, e:
28 LOGGER.error("invalid value for frontend.default_pm")
30 # how many days to wait before trying to re-import a runfolder
33 RESCAN_DELAY = int(settings.RESCAN_DELAY)
34 except (ValueError, AttributeError):
35 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
36 "defaulting to %s" % (RESCAN_DELAY,))
38 RUN_STATUS_CHOICES = (
39 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
40 (1, 'Data Pipeline Started'),
41 (2, 'Data Pipeline Interrupted'),
42 (3, 'Data Pipeline Finished'),
43 (4, 'Collect Results Started'),
44 (5, 'Collect Results Finished'),
49 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
52 class ClusterStation(models.Model):
53 """List of cluster stations"""
54 name = models.CharField(max_length=50, unique=True)
55 isdefault = models.BooleanField(default=False, null=False)
58 ordering = ["-isdefault", "name"]
60 def __unicode__(self):
61 return unicode(self.name)
65 d = cls.objects.filter(isdefault=True).all()
68 d = cls.objects.order_by('-id').all()
74 def update_isdefault(sender, instance, **kwargs):
75 """Clear default if needed
77 if instance.isdefault:
78 for c in ClusterStation.objects.filter(isdefault=True).all():
79 if c.id != instance.id:
83 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
85 class Sequencer(models.Model):
86 """Sequencers we've owned
88 name = models.CharField(max_length=50, db_index=True)
89 instrument_name = models.CharField(max_length=50, db_index=True)
90 serial_number = models.CharField(max_length=50, db_index=True)
91 model = models.CharField(max_length=255)
92 active = models.BooleanField(default=True, null=False)
93 isdefault = models.BooleanField(default=False, null=False)
94 comment = models.CharField(max_length=255)
97 ordering = ["-isdefault", "-active", "name"]
99 def __unicode__(self):
100 name = [unicode(self.name)]
101 if self.instrument_name is not None:
102 name.append("(%s)" % (unicode(self.instrument_name),))
103 return " ".join(name)
106 def get_absolute_url(self):
107 return ('htsworkflow.frontend.experiments.views.sequencer',
112 d = cls.objects.filter(isdefault=True).all()
115 d = cls.objects.order_by('active', '-id').all()
121 def update_isdefault(sender, instance, **kwargs):
122 """Clear default if needed
124 if instance.isdefault:
125 for s in Sequencer.objects.filter(isdefault=True).all():
126 if s.id != instance.id:
130 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
132 class FlowCellModel(models.Model):
133 name = models.TextField()
134 fixed_time = models.IntegerField(default=0, help_text='(seconds)')
135 per_cycle_time = models.IntegerField(default=0, help_text='(seconds)')
136 isdefault = models.BooleanField(default=False)
138 def __unicode__(self):
139 return unicode(self.name)
143 d = cls.objects.filter(isdefault=True).all()
149 def update_isdefault(sender, instance, **kwargs):
150 """Clear default if needed
152 if instance.isdefault:
153 for s in FlowCellType.objects.filter(isdefault=True).all():
154 if s.id != instance.id:
158 pre_save.connect(FlowCellModel.update_isdefault, sender=FlowCellModel)
160 class FlowCell(models.Model):
161 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
162 run_date = models.DateTimeField()
163 advanced_run = models.BooleanField(default=False)
164 paired_end = models.BooleanField(default=False)
165 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
166 control_lane = models.IntegerField(choices=[(1, 1),
178 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
179 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
180 flowcell_model = models.ForeignKey(FlowCellModel, default=FlowCellModel.default)
182 notes = models.TextField(blank=True)
184 def __unicode__(self):
185 return unicode(self.flowcell_id)
189 for lane in self.lane_set.order_by('lane_number'):
190 cluster_estimate = lane.cluster_estimate
191 if cluster_estimate is not None:
192 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
194 cluster_estimate = 'None'
195 library_id = lane.library_id
196 library = lane.library
197 element = '<tr><td>%d</td>'\
198 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
199 html.append(element % (lane.lane_number,
200 library.get_admin_url(),
203 html.append('</table>')
204 return "\n".join(html)
205 Lanes.allow_tags = True
208 ordering = ["-run_date"]
210 def get_admin_url(self):
211 # that's the django way... except it didn't work
212 return urlresolvers.reverse('admin:experiments_flowcell_change',
215 def flowcell_type(self):
216 """Convert our boolean 'is paired' flag to a name
224 def get_absolute_url(self):
225 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
226 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
229 def get_raw_data_directory(self):
230 """Return location of where the raw data is stored"""
231 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
233 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
235 def update_data_runs(self):
236 result_root = self.get_raw_data_directory()
237 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
238 if result_root is None:
241 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
242 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
245 for dirpath, dirnames, filenames in os.walk(result_root):
246 for filename in filenames:
247 if run_xml_re.match(filename):
248 # we have a run directory
249 relative_pathname = get_relative_pathname(dirpath)
250 self.import_data_run(relative_pathname, filename)
252 def import_data_run(self, relative_pathname, run_xml_name, force=False):
253 """Given a result directory import files"""
255 run_dir = get_absolute_pathname(relative_pathname)
256 run_xml_path = os.path.join(run_dir, run_xml_name)
258 runs = DataRun.objects.filter(result_dir = relative_pathname)
263 raise RuntimeError("Too many data runs for %s" % (
269 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
270 LOGGER.debug("Importing run from %s" % (relative_pathname,))
271 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
273 run.status = RUN_STATUS_REVERSE_MAP['DONE']
274 run.result_dir = relative_pathname
275 run.runfolder_name = run_xml_data.runfolder_name
276 run.cycle_start = run_xml_data.image_analysis.start
277 run.cycle_stop = run_xml_data.image_analysis.stop
278 naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal())
279 run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time)
280 run.image_software = run_xml_data.image_analysis.software
281 run.image_version = run_xml_data.image_analysis.version
282 run.basecall_software = run_xml_data.bustard.software
283 run.basecall_version = run_xml_data.bustard.version
284 # we're frequently not running alignments
285 if run_xml_data.gerald:
286 run.alignment_software = run_xml_data.gerald.software
287 run.alignment_version = run_xml_data.gerald.version
289 run.last_update_time = timezone.now()
292 run.update_result_files()
295 # FIXME: should we automatically update dataruns?
296 # Or should we expect someone to call update_data_runs?
297 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
298 # """Update our dataruns
300 # if not os.path.exists(settings.RESULT_HOME_DIR):
303 # instance.update_data_runs()
304 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
307 LANE_STATUS_CODES = [(0, 'Failed'),
310 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
311 LANE_STATUS_MAP[None] = "Unknown"
314 def is_valid_lane(value):
315 if value >= 1 and value <= 8:
321 class Lane(models.Model):
322 flowcell = models.ForeignKey(FlowCell)
323 lane_number = models.IntegerField()
324 library = models.ForeignKey(Library)
325 pM = models.DecimalField(max_digits=5,
330 cluster_estimate = models.IntegerField(blank=True, null=True)
331 status = models.IntegerField(choices=LANE_STATUS_CODES,
334 comment = models.TextField(null=True, blank=True)
337 def get_absolute_url(self):
338 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
341 def __unicode__(self):
342 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
345 class DataRun(models.Model):
346 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
347 runfolder_name = models.CharField(max_length=50)
348 result_dir = models.CharField(max_length=255)
349 last_update_time = models.DateTimeField()
350 run_start_time = models.DateTimeField()
351 cycle_start = models.IntegerField(null=True, blank=True)
352 cycle_stop = models.IntegerField(null=True, blank=True)
353 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
354 null=True, blank=True)
355 image_software = models.CharField(max_length=50)
356 image_version = models.CharField(max_length=50)
357 basecall_software = models.CharField(max_length=50)
358 basecall_version = models.CharField(max_length=50)
359 alignment_software = models.CharField(max_length=50)
360 alignment_version = models.CharField(max_length=50)
361 comment = models.TextField(blank=True)
363 def update_result_files(self):
364 abs_result_dir = get_absolute_pathname(self.result_dir)
366 for dirname, dirnames, filenames in os.walk(abs_result_dir):
367 for filename in filenames:
368 pathname = os.path.join(dirname, filename)
369 relative_pathname = get_relative_pathname(pathname)
370 datafiles = self.datafile_set.filter(
372 relative_pathname=relative_pathname)
373 if len(datafiles) > 0:
376 metadata = find_file_type_metadata_from_filename(filename)
377 if metadata is not None:
378 metadata['filename'] = filename
380 newfile.data_run = self
381 newfile.file_type = metadata['file_type']
382 newfile.relative_pathname = relative_pathname
384 lane_number = metadata.get('lane', None)
385 if lane_number is not None:
386 lane = self.flowcell.lane_set.get(
387 lane_number=lane_number)
388 newfile.library = lane.library
390 self.datafile_set.add(newfile)
392 self.last_update_time = timezone.now()
394 def lane_files(self):
397 for datafile in self.datafile_set.all():
398 metadata = datafile.attributes
399 if metadata is not None:
400 lane = metadata.get('lane', None)
402 lane_file_set = lanes.setdefault(lane, {})
403 normalized_name = datafile.file_type.normalized_name
404 lane_file_set[normalized_name] = datafile
407 def ivc_plots(self, lane):
408 ivc_name = ['IVC All', 'IVC Call',
409 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
412 for rel_filename, metadata in self.get_result_files():
413 if metadata.file_type.name in ivc_name:
414 plots[metadata.file_type.name] = (rel_filename, metadata)
417 class FileType(models.Model):
418 """Represent potential file types
420 regex is a pattern used to detect if a filename matches this type
421 data run currently assumes that there may be a (?P<lane>) and
422 (?P<end>) pattern in the regular expression.
424 name = models.CharField(max_length=50)
425 mimetype = models.CharField(max_length=50, null=True, blank=True)
426 # regular expression from glob.fnmatch.translate
427 regex = models.TextField(null=True, blank=True)
429 def parse_filename(self, pathname):
430 """Does filename match our pattern?
432 Returns None if not, or dictionary of match variables if we do.
434 path, filename = os.path.split(pathname)
435 if len(self.regex) > 0:
436 match = re.match(self.regex, filename)
437 if match is not None:
438 # These are (?P<>) names we know about from our
440 results = match.groupdict()
442 # convert int parameters
443 for attribute_name in ['lane', 'end']:
444 value = results.get(attribute_name, None)
445 if value is not None:
446 results[attribute_name] = int(value)
450 def _get_normalized_name(self):
451 """Crush data file name into identifier friendly name"""
452 return self.name.replace(' ', '_').lower()
453 normalized_name = property(_get_normalized_name)
455 def __unicode__(self):
456 #return u"<FileType: %s>" % (self.name,)
459 def regex_is_valid(self):
461 regex = re.compile(self.regex)
462 except sre_constants.error as e:
464 return re.compile(self.regex) is not None
467 """Helper function to set default UUID in DataFile"""
468 return str(uuid.uuid1())
471 class DataFile(models.Model):
472 """Store map from random ID to filename"""
473 random_key = models.CharField(max_length=64,
476 data_run = models.ForeignKey(DataRun, db_index=True)
477 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
478 file_type = models.ForeignKey(FileType)
479 relative_pathname = models.CharField(max_length=255, db_index=True)
481 def _get_attributes(self):
482 return self.file_type.parse_filename(self.relative_pathname)
483 attributes = property(_get_attributes)
485 def _get_pathname(self):
486 return get_absolute_pathname(self.relative_pathname)
487 pathname = property(_get_pathname)
490 def get_absolute_url(self):
491 return ('htsworkflow.frontend.experiments.views.read_result_file',
492 (), {'key': self.random_key})
495 def find_file_type_metadata_from_filename(pathname):
496 path, filename = os.path.split(pathname)
498 for file_type in FileType.objects.all():
499 result = file_type.parse_filename(filename)
500 if result is not None:
501 result['file_type'] = file_type
507 def get_relative_pathname(abspath):
508 """Strip off the result home directory from a path
510 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
511 relative_pathname = abspath.replace(result_home_dir, '')
512 return relative_pathname
515 def get_absolute_pathname(relative_pathname):
516 """Attach relative path to results home directory"""
517 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)