9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.utils import timezone
13 from django.db import models
14 from django.db.models.signals import post_init, pre_save
16 from htsworkflow.frontend.samples.models import Library
17 from htsworkflow.util.conversion import parse_flowcell_id
18 from htsworkflow.pipelines import runfolder
20 LOGGER = logging.getLogger(__name__)
23 default_pM = int(settings.DEFAULT_PM)
24 except AttributeError, e:
25 LOGGER.error("invalid value for frontend.default_pm")
27 # how many days to wait before trying to re-import a runfolder
30 RESCAN_DELAY = int(settings.RESCAN_DELAY)
31 except (ValueError, AttributeError):
32 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
33 "defaulting to %s" % (RESCAN_DELAY,))
35 RUN_STATUS_CHOICES = (
36 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
37 (1, 'Data Pipeline Started'),
38 (2, 'Data Pipeline Interrupted'),
39 (3, 'Data Pipeline Finished'),
40 (4, 'Collect Results Started'),
41 (5, 'Collect Results Finished'),
46 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
49 class ClusterStation(models.Model):
50 """List of cluster stations"""
51 name = models.CharField(max_length=50, unique=True)
52 isdefault = models.BooleanField(default=False, null=False)
55 ordering = ["-isdefault", "name"]
57 def __unicode__(self):
58 return unicode(self.name)
62 d = cls.objects.filter(isdefault=True).all()
65 d = cls.objects.order_by('-id').all()
71 def update_isdefault(sender, instance, **kwargs):
72 """Clear default if needed
74 if instance.isdefault:
75 for c in ClusterStation.objects.filter(isdefault=True).all():
76 if c.id != instance.id:
80 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
82 class Sequencer(models.Model):
83 """Sequencers we've owned
85 name = models.CharField(max_length=50, db_index=True)
86 instrument_name = models.CharField(max_length=50, db_index=True)
87 serial_number = models.CharField(max_length=50, db_index=True)
88 model = models.CharField(max_length=255)
89 active = models.BooleanField(default=True, null=False)
90 isdefault = models.BooleanField(default=False, null=False)
91 comment = models.CharField(max_length=255)
94 ordering = ["-isdefault", "-active", "name"]
96 def __unicode__(self):
97 name = [unicode(self.name)]
98 if self.instrument_name is not None:
99 name.append("(%s)" % (unicode(self.instrument_name),))
100 return " ".join(name)
103 def get_absolute_url(self):
104 return ('htsworkflow.frontend.experiments.views.sequencer',
109 d = cls.objects.filter(isdefault=True).all()
112 d = cls.objects.order_by('active', '-id').all()
118 def update_isdefault(sender, instance, **kwargs):
119 """Clear default if needed
121 if instance.isdefault:
122 for s in Sequencer.objects.filter(isdefault=True).all():
123 if s.id != instance.id:
127 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
130 class FlowCell(models.Model):
131 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
132 run_date = models.DateTimeField()
133 advanced_run = models.BooleanField(default=False)
134 paired_end = models.BooleanField(default=False)
135 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
136 control_lane = models.IntegerField(choices=[(1, 1),
148 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
149 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
151 notes = models.TextField(blank=True)
153 def __unicode__(self):
154 return unicode(self.flowcell_id)
158 for lane in self.lane_set.order_by('lane_number'):
159 cluster_estimate = lane.cluster_estimate
160 if cluster_estimate is not None:
161 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
163 cluster_estimate = 'None'
164 library_id = lane.library_id
165 library = lane.library
166 element = '<tr><td>%d</td>'\
167 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
168 html.append(element % (lane.lane_number,
169 library.get_admin_url(),
172 html.append('</table>')
173 return "\n".join(html)
174 Lanes.allow_tags = True
177 ordering = ["-run_date"]
179 def get_admin_url(self):
180 # that's the django way... except it didn't work
181 return urlresolvers.reverse('admin:experiments_flowcell_change',
184 def flowcell_type(self):
185 """Convert our boolean 'is paired' flag to a name
193 def get_absolute_url(self):
194 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
195 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
198 def get_raw_data_directory(self):
199 """Return location of where the raw data is stored"""
200 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
202 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
204 def update_data_runs(self):
205 result_root = self.get_raw_data_directory()
206 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
207 if result_root is None:
210 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
211 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
214 for dirpath, dirnames, filenames in os.walk(result_root):
215 for filename in filenames:
216 if run_xml_re.match(filename):
217 # we have a run directory
218 relative_pathname = get_relative_pathname(dirpath)
219 self.import_data_run(relative_pathname, filename)
221 def import_data_run(self, relative_pathname, run_xml_name, force=False):
222 """Given a result directory import files"""
224 run_dir = get_absolute_pathname(relative_pathname)
225 run_xml_path = os.path.join(run_dir, run_xml_name)
227 runs = DataRun.objects.filter(result_dir = relative_pathname)
232 raise RuntimeError("Too many data runs for %s" % (
238 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
239 LOGGER.debug("Importing run from %s" % (relative_pathname,))
240 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
242 run.status = RUN_STATUS_REVERSE_MAP['DONE']
243 run.result_dir = relative_pathname
244 run.runfolder_name = run_xml_data.runfolder_name
245 run.cycle_start = run_xml_data.image_analysis.start
246 run.cycle_stop = run_xml_data.image_analysis.stop
247 run.run_start_time = run_xml_data.image_analysis.date
248 run.image_software = run_xml_data.image_analysis.software
249 run.image_version = run_xml_data.image_analysis.version
250 run.basecall_software = run_xml_data.bustard.software
251 run.basecall_version = run_xml_data.bustard.version
252 # we're frequently not running alignments
253 if run_xml_data.gerald:
254 run.alignment_software = run_xml_data.gerald.software
255 run.alignment_version = run_xml_data.gerald.version
257 run.last_update_time = timezone.now()
260 run.update_result_files()
263 # FIXME: should we automatically update dataruns?
264 # Or should we expect someone to call update_data_runs?
265 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
266 # """Update our dataruns
268 # if not os.path.exists(settings.RESULT_HOME_DIR):
271 # instance.update_data_runs()
272 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
275 LANE_STATUS_CODES = [(0, 'Failed'),
278 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
279 LANE_STATUS_MAP[None] = "Unknown"
282 def is_valid_lane(value):
283 if value >= 1 and value <= 8:
289 class Lane(models.Model):
290 flowcell = models.ForeignKey(FlowCell)
291 lane_number = models.IntegerField()
292 library = models.ForeignKey(Library)
293 pM = models.DecimalField(max_digits=5,
298 cluster_estimate = models.IntegerField(blank=True, null=True)
299 status = models.IntegerField(choices=LANE_STATUS_CODES,
302 comment = models.TextField(null=True, blank=True)
305 def get_absolute_url(self):
306 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
309 def __unicode__(self):
310 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
313 class DataRun(models.Model):
314 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
315 runfolder_name = models.CharField(max_length=50)
316 result_dir = models.CharField(max_length=255)
317 last_update_time = models.DateTimeField()
318 run_start_time = models.DateTimeField()
319 cycle_start = models.IntegerField(null=True, blank=True)
320 cycle_stop = models.IntegerField(null=True, blank=True)
321 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
322 null=True, blank=True)
323 image_software = models.CharField(max_length=50)
324 image_version = models.CharField(max_length=50)
325 basecall_software = models.CharField(max_length=50)
326 basecall_version = models.CharField(max_length=50)
327 alignment_software = models.CharField(max_length=50)
328 alignment_version = models.CharField(max_length=50)
329 comment = models.TextField(blank=True)
331 def update_result_files(self):
332 abs_result_dir = get_absolute_pathname(self.result_dir)
334 for dirname, dirnames, filenames in os.walk(abs_result_dir):
335 for filename in filenames:
336 pathname = os.path.join(dirname, filename)
337 relative_pathname = get_relative_pathname(pathname)
338 datafiles = self.datafile_set.filter(
340 relative_pathname=relative_pathname)
341 if len(datafiles) > 0:
344 metadata = find_file_type_metadata_from_filename(filename)
345 if metadata is not None:
346 metadata['filename'] = filename
348 newfile.data_run = self
349 newfile.file_type = metadata['file_type']
350 newfile.relative_pathname = relative_pathname
352 lane_number = metadata.get('lane', None)
353 if lane_number is not None:
354 lane = self.flowcell.lane_set.get(
355 lane_number=lane_number)
356 newfile.library = lane.library
358 self.datafile_set.add(newfile)
360 self.last_update_time = timezone.now()
362 def lane_files(self):
365 for datafile in self.datafile_set.all():
366 metadata = datafile.attributes
367 if metadata is not None:
368 lane = metadata.get('lane', None)
370 lane_file_set = lanes.setdefault(lane, {})
371 normalized_name = datafile.file_type.normalized_name
372 lane_file_set[normalized_name] = datafile
375 def ivc_plots(self, lane):
376 ivc_name = ['IVC All', 'IVC Call',
377 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
380 for rel_filename, metadata in self.get_result_files():
381 if metadata.file_type.name in ivc_name:
382 plots[metadata.file_type.name] = (rel_filename, metadata)
385 class FileType(models.Model):
386 """Represent potential file types
388 regex is a pattern used to detect if a filename matches this type
389 data run currently assumes that there may be a (?P<lane>) and
390 (?P<end>) pattern in the regular expression.
392 name = models.CharField(max_length=50)
393 mimetype = models.CharField(max_length=50, null=True, blank=True)
394 # regular expression from glob.fnmatch.translate
395 regex = models.CharField(max_length=50, null=True, blank=True)
397 def parse_filename(self, pathname):
398 """Does filename match our pattern?
400 Returns None if not, or dictionary of match variables if we do.
402 path, filename = os.path.split(pathname)
403 if len(self.regex) > 0:
404 match = re.match(self.regex, filename)
405 if match is not None:
406 # These are (?P<>) names we know about from our
408 results = match.groupdict()
410 # convert int parameters
411 for attribute_name in ['lane', 'end']:
412 value = results.get(attribute_name, None)
413 if value is not None:
414 results[attribute_name] = int(value)
418 def _get_normalized_name(self):
419 """Crush data file name into identifier friendly name"""
420 return self.name.replace(' ', '_').lower()
421 normalized_name = property(_get_normalized_name)
423 def __unicode__(self):
424 #return u"<FileType: %s>" % (self.name,)
429 """Helper function to set default UUID in DataFile"""
430 return str(uuid.uuid1())
433 class DataFile(models.Model):
434 """Store map from random ID to filename"""
435 random_key = models.CharField(max_length=64,
438 data_run = models.ForeignKey(DataRun, db_index=True)
439 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
440 file_type = models.ForeignKey(FileType)
441 relative_pathname = models.CharField(max_length=255, db_index=True)
443 def _get_attributes(self):
444 return self.file_type.parse_filename(self.relative_pathname)
445 attributes = property(_get_attributes)
447 def _get_pathname(self):
448 return get_absolute_pathname(self.relative_pathname)
449 pathname = property(_get_pathname)
452 def get_absolute_url(self):
453 return ('htsworkflow.frontend.experiments.views.read_result_file',
454 (), {'key': self.random_key})
457 def find_file_type_metadata_from_filename(pathname):
458 path, filename = os.path.split(pathname)
460 for file_type in FileType.objects.all():
461 result = file_type.parse_filename(filename)
462 if result is not None:
463 result['file_type'] = file_type
469 def get_relative_pathname(abspath):
470 """Strip off the result home directory from a path
472 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
473 relative_pathname = abspath.replace(result_home_dir, '')
474 return relative_pathname
477 def get_absolute_pathname(relative_pathname):
478 """Attach relative path to results home directory"""
479 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)