9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.utils import timezone
13 from django.db import models
14 from django.db.models.signals import post_init, pre_save
16 from htsworkflow.frontend.samples.models import Library
17 from htsworkflow.util.conversion import parse_flowcell_id
18 from htsworkflow.pipelines import runfolder
22 LOGGER = logging.getLogger(__name__)
25 default_pM = int(settings.DEFAULT_PM)
26 except AttributeError, e:
27 LOGGER.error("invalid value for frontend.default_pm")
29 # how many days to wait before trying to re-import a runfolder
32 RESCAN_DELAY = int(settings.RESCAN_DELAY)
33 except (ValueError, AttributeError):
34 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
35 "defaulting to %s" % (RESCAN_DELAY,))
37 RUN_STATUS_CHOICES = (
38 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
39 (1, 'Data Pipeline Started'),
40 (2, 'Data Pipeline Interrupted'),
41 (3, 'Data Pipeline Finished'),
42 (4, 'Collect Results Started'),
43 (5, 'Collect Results Finished'),
48 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
51 class ClusterStation(models.Model):
52 """List of cluster stations"""
53 name = models.CharField(max_length=50, unique=True)
54 isdefault = models.BooleanField(default=False, null=False)
57 ordering = ["-isdefault", "name"]
59 def __unicode__(self):
60 return unicode(self.name)
64 d = cls.objects.filter(isdefault=True).all()
67 d = cls.objects.order_by('-id').all()
73 def update_isdefault(sender, instance, **kwargs):
74 """Clear default if needed
76 if instance.isdefault:
77 for c in ClusterStation.objects.filter(isdefault=True).all():
78 if c.id != instance.id:
82 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
84 class Sequencer(models.Model):
85 """Sequencers we've owned
87 name = models.CharField(max_length=50, db_index=True)
88 instrument_name = models.CharField(max_length=50, db_index=True)
89 serial_number = models.CharField(max_length=50, db_index=True)
90 model = models.CharField(max_length=255)
91 active = models.BooleanField(default=True, null=False)
92 isdefault = models.BooleanField(default=False, null=False)
93 comment = models.CharField(max_length=255)
96 ordering = ["-isdefault", "-active", "name"]
98 def __unicode__(self):
99 name = [unicode(self.name)]
100 if self.instrument_name is not None:
101 name.append("(%s)" % (unicode(self.instrument_name),))
102 return " ".join(name)
105 def get_absolute_url(self):
106 return ('htsworkflow.frontend.experiments.views.sequencer',
111 d = cls.objects.filter(isdefault=True).all()
114 d = cls.objects.order_by('active', '-id').all()
120 def update_isdefault(sender, instance, **kwargs):
121 """Clear default if needed
123 if instance.isdefault:
124 for s in Sequencer.objects.filter(isdefault=True).all():
125 if s.id != instance.id:
129 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
132 class FlowCell(models.Model):
133 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
134 run_date = models.DateTimeField()
135 advanced_run = models.BooleanField(default=False)
136 paired_end = models.BooleanField(default=False)
137 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
138 control_lane = models.IntegerField(choices=[(1, 1),
150 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
151 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
153 notes = models.TextField(blank=True)
155 def __unicode__(self):
156 return unicode(self.flowcell_id)
160 for lane in self.lane_set.order_by('lane_number'):
161 cluster_estimate = lane.cluster_estimate
162 if cluster_estimate is not None:
163 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
165 cluster_estimate = 'None'
166 library_id = lane.library_id
167 library = lane.library
168 element = '<tr><td>%d</td>'\
169 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
170 html.append(element % (lane.lane_number,
171 library.get_admin_url(),
174 html.append('</table>')
175 return "\n".join(html)
176 Lanes.allow_tags = True
179 ordering = ["-run_date"]
181 def get_admin_url(self):
182 # that's the django way... except it didn't work
183 return urlresolvers.reverse('admin:experiments_flowcell_change',
186 def flowcell_type(self):
187 """Convert our boolean 'is paired' flag to a name
195 def get_absolute_url(self):
196 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
197 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
200 def get_raw_data_directory(self):
201 """Return location of where the raw data is stored"""
202 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
204 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
206 def update_data_runs(self):
207 result_root = self.get_raw_data_directory()
208 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
209 if result_root is None:
212 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
213 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
216 for dirpath, dirnames, filenames in os.walk(result_root):
217 for filename in filenames:
218 if run_xml_re.match(filename):
219 # we have a run directory
220 relative_pathname = get_relative_pathname(dirpath)
221 self.import_data_run(relative_pathname, filename)
223 def import_data_run(self, relative_pathname, run_xml_name, force=False):
224 """Given a result directory import files"""
226 run_dir = get_absolute_pathname(relative_pathname)
227 run_xml_path = os.path.join(run_dir, run_xml_name)
229 runs = DataRun.objects.filter(result_dir = relative_pathname)
234 raise RuntimeError("Too many data runs for %s" % (
240 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
241 LOGGER.debug("Importing run from %s" % (relative_pathname,))
242 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
244 run.status = RUN_STATUS_REVERSE_MAP['DONE']
245 run.result_dir = relative_pathname
246 run.runfolder_name = run_xml_data.runfolder_name
247 run.cycle_start = run_xml_data.image_analysis.start
248 run.cycle_stop = run_xml_data.image_analysis.stop
249 naive_run_start_time = datetime.datetime.fromordinal(run_xml_data.image_analysis.date.toordinal())
250 run.run_start_time = pytz.timezone(settings.TIME_ZONE).localize(naive_run_start_time)
251 run.image_software = run_xml_data.image_analysis.software
252 run.image_version = run_xml_data.image_analysis.version
253 run.basecall_software = run_xml_data.bustard.software
254 run.basecall_version = run_xml_data.bustard.version
255 # we're frequently not running alignments
256 if run_xml_data.gerald:
257 run.alignment_software = run_xml_data.gerald.software
258 run.alignment_version = run_xml_data.gerald.version
260 run.last_update_time = timezone.now()
263 run.update_result_files()
266 # FIXME: should we automatically update dataruns?
267 # Or should we expect someone to call update_data_runs?
268 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
269 # """Update our dataruns
271 # if not os.path.exists(settings.RESULT_HOME_DIR):
274 # instance.update_data_runs()
275 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
278 LANE_STATUS_CODES = [(0, 'Failed'),
281 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
282 LANE_STATUS_MAP[None] = "Unknown"
285 def is_valid_lane(value):
286 if value >= 1 and value <= 8:
292 class Lane(models.Model):
293 flowcell = models.ForeignKey(FlowCell)
294 lane_number = models.IntegerField()
295 library = models.ForeignKey(Library)
296 pM = models.DecimalField(max_digits=5,
301 cluster_estimate = models.IntegerField(blank=True, null=True)
302 status = models.IntegerField(choices=LANE_STATUS_CODES,
305 comment = models.TextField(null=True, blank=True)
308 def get_absolute_url(self):
309 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
312 def __unicode__(self):
313 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
316 class DataRun(models.Model):
317 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
318 runfolder_name = models.CharField(max_length=50)
319 result_dir = models.CharField(max_length=255)
320 last_update_time = models.DateTimeField()
321 run_start_time = models.DateTimeField()
322 cycle_start = models.IntegerField(null=True, blank=True)
323 cycle_stop = models.IntegerField(null=True, blank=True)
324 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
325 null=True, blank=True)
326 image_software = models.CharField(max_length=50)
327 image_version = models.CharField(max_length=50)
328 basecall_software = models.CharField(max_length=50)
329 basecall_version = models.CharField(max_length=50)
330 alignment_software = models.CharField(max_length=50)
331 alignment_version = models.CharField(max_length=50)
332 comment = models.TextField(blank=True)
334 def update_result_files(self):
335 abs_result_dir = get_absolute_pathname(self.result_dir)
337 for dirname, dirnames, filenames in os.walk(abs_result_dir):
338 for filename in filenames:
339 pathname = os.path.join(dirname, filename)
340 relative_pathname = get_relative_pathname(pathname)
341 datafiles = self.datafile_set.filter(
343 relative_pathname=relative_pathname)
344 if len(datafiles) > 0:
347 metadata = find_file_type_metadata_from_filename(filename)
348 if metadata is not None:
349 metadata['filename'] = filename
351 newfile.data_run = self
352 newfile.file_type = metadata['file_type']
353 newfile.relative_pathname = relative_pathname
355 lane_number = metadata.get('lane', None)
356 if lane_number is not None:
357 lane = self.flowcell.lane_set.get(
358 lane_number=lane_number)
359 newfile.library = lane.library
361 self.datafile_set.add(newfile)
363 self.last_update_time = timezone.now()
365 def lane_files(self):
368 for datafile in self.datafile_set.all():
369 metadata = datafile.attributes
370 if metadata is not None:
371 lane = metadata.get('lane', None)
373 lane_file_set = lanes.setdefault(lane, {})
374 normalized_name = datafile.file_type.normalized_name
375 lane_file_set[normalized_name] = datafile
378 def ivc_plots(self, lane):
379 ivc_name = ['IVC All', 'IVC Call',
380 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
383 for rel_filename, metadata in self.get_result_files():
384 if metadata.file_type.name in ivc_name:
385 plots[metadata.file_type.name] = (rel_filename, metadata)
388 class FileType(models.Model):
389 """Represent potential file types
391 regex is a pattern used to detect if a filename matches this type
392 data run currently assumes that there may be a (?P<lane>) and
393 (?P<end>) pattern in the regular expression.
395 name = models.CharField(max_length=50)
396 mimetype = models.CharField(max_length=50, null=True, blank=True)
397 # regular expression from glob.fnmatch.translate
398 regex = models.CharField(max_length=50, null=True, blank=True)
400 def parse_filename(self, pathname):
401 """Does filename match our pattern?
403 Returns None if not, or dictionary of match variables if we do.
405 path, filename = os.path.split(pathname)
406 if len(self.regex) > 0:
407 match = re.match(self.regex, filename)
408 if match is not None:
409 # These are (?P<>) names we know about from our
411 results = match.groupdict()
413 # convert int parameters
414 for attribute_name in ['lane', 'end']:
415 value = results.get(attribute_name, None)
416 if value is not None:
417 results[attribute_name] = int(value)
421 def _get_normalized_name(self):
422 """Crush data file name into identifier friendly name"""
423 return self.name.replace(' ', '_').lower()
424 normalized_name = property(_get_normalized_name)
426 def __unicode__(self):
427 #return u"<FileType: %s>" % (self.name,)
432 """Helper function to set default UUID in DataFile"""
433 return str(uuid.uuid1())
436 class DataFile(models.Model):
437 """Store map from random ID to filename"""
438 random_key = models.CharField(max_length=64,
441 data_run = models.ForeignKey(DataRun, db_index=True)
442 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
443 file_type = models.ForeignKey(FileType)
444 relative_pathname = models.CharField(max_length=255, db_index=True)
446 def _get_attributes(self):
447 return self.file_type.parse_filename(self.relative_pathname)
448 attributes = property(_get_attributes)
450 def _get_pathname(self):
451 return get_absolute_pathname(self.relative_pathname)
452 pathname = property(_get_pathname)
455 def get_absolute_url(self):
456 return ('htsworkflow.frontend.experiments.views.read_result_file',
457 (), {'key': self.random_key})
460 def find_file_type_metadata_from_filename(pathname):
461 path, filename = os.path.split(pathname)
463 for file_type in FileType.objects.all():
464 result = file_type.parse_filename(filename)
465 if result is not None:
466 result['file_type'] = file_type
472 def get_relative_pathname(abspath):
473 """Strip off the result home directory from a path
475 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
476 relative_pathname = abspath.replace(result_home_dir, '')
477 return relative_pathname
480 def get_absolute_pathname(relative_pathname):
481 """Attach relative path to results home directory"""
482 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)