9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init, pre_save
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 LOGGER = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 LOGGER.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
32 "defaulting to %s" % (RESCAN_DELAY,))
34 RUN_STATUS_CHOICES = (
35 (0, 'Sequencer running'), # Solexa Data Pipeline Not Yet Started'),
36 (1, 'Data Pipeline Started'),
37 (2, 'Data Pipeline Interrupted'),
38 (3, 'Data Pipeline Finished'),
39 (4, 'Collect Results Started'),
40 (5, 'Collect Results Finished'),
45 RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
48 class ClusterStation(models.Model):
49 """List of cluster stations"""
50 name = models.CharField(max_length=50, unique=True)
51 isdefault = models.BooleanField(default=False, null=False)
54 ordering = ["-isdefault", "name"]
56 def __unicode__(self):
57 return unicode(self.name)
61 d = cls.objects.filter(isdefault=True).all()
64 d = cls.objects.order_by('-id').all()
70 def update_isdefault(sender, instance, **kwargs):
71 """Clear default if needed
73 if instance.isdefault:
74 for c in ClusterStation.objects.filter(isdefault=True).all():
75 if c.id != instance.id:
79 pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
81 class Sequencer(models.Model):
82 """Sequencers we've owned
84 name = models.CharField(max_length=50, db_index=True)
85 instrument_name = models.CharField(max_length=50, db_index=True)
86 serial_number = models.CharField(max_length=50, db_index=True)
87 model = models.CharField(max_length=255)
88 active = models.BooleanField(default=True, null=False)
89 isdefault = models.BooleanField(default=False, null=False)
90 comment = models.CharField(max_length=255)
93 ordering = ["-isdefault", "-active", "name"]
95 def __unicode__(self):
96 name = [unicode(self.name)]
97 if self.instrument_name is not None:
98 name.append("(%s)" % (unicode(self.instrument_name),))
102 def get_absolute_url(self):
103 return ('htsworkflow.frontend.experiments.views.sequencer',
108 d = cls.objects.filter(isdefault=True).all()
111 d = cls.objects.order_by('active', '-id').all()
117 def update_isdefault(sender, instance, **kwargs):
118 """Clear default if needed
120 if instance.isdefault:
121 for s in Sequencer.objects.filter(isdefault=True).all():
122 if s.id != instance.id:
126 pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
129 class FlowCell(models.Model):
130 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
131 run_date = models.DateTimeField()
132 advanced_run = models.BooleanField(default=False)
133 paired_end = models.BooleanField(default=False)
134 read_length = models.IntegerField(default=32) # Stanford is currenlty 25
135 control_lane = models.IntegerField(choices=[(1, 1),
147 cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
148 sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
150 notes = models.TextField(blank=True)
152 def __unicode__(self):
153 return unicode(self.flowcell_id)
157 for lane in self.lane_set.order_by('lane_number'):
158 cluster_estimate = lane.cluster_estimate
159 if cluster_estimate is not None:
160 cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
162 cluster_estimate = 'None'
163 library_id = lane.library_id
164 library = lane.library
165 element = '<tr><td>%d</td>'\
166 '<td><a href="%s">%s</a></td><td>%s</td></tr>'
167 html.append(element % (lane.lane_number,
168 library.get_admin_url(),
171 html.append('</table>')
172 return "\n".join(html)
173 Lanes.allow_tags = True
176 ordering = ["-run_date"]
178 def get_admin_url(self):
179 # that's the django way... except it didn't work
180 return urlresolvers.reverse('admin:experiments_flowcell_change',
183 def flowcell_type(self):
184 """Convert our boolean 'is paired' flag to a name
192 def get_absolute_url(self):
193 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
194 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
197 def get_raw_data_directory(self):
198 """Return location of where the raw data is stored"""
199 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
201 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
203 def update_data_runs(self):
204 result_root = self.get_raw_data_directory()
205 LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
206 if result_root is None:
209 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
210 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
213 for dirpath, dirnames, filenames in os.walk(result_root):
214 for filename in filenames:
215 if run_xml_re.match(filename):
216 # we have a run directory
217 relative_pathname = get_relative_pathname(dirpath)
218 self.import_data_run(relative_pathname, filename)
220 def import_data_run(self, relative_pathname, run_xml_name, force=False):
221 """Given a result directory import files"""
222 now = datetime.datetime.now()
223 run_dir = get_absolute_pathname(relative_pathname)
224 run_xml_path = os.path.join(run_dir, run_xml_name)
226 runs = DataRun.objects.filter(result_dir = relative_pathname)
231 raise RuntimeError("Too many data runs for %s" % (
237 if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
238 LOGGER.debug("Importing run from %s" % (relative_pathname,))
239 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
241 run.status = RUN_STATUS_REVERSE_MAP['DONE']
242 run.result_dir = relative_pathname
243 run.runfolder_name = run_xml_data.runfolder_name
244 run.cycle_start = run_xml_data.image_analysis.start
245 run.cycle_stop = run_xml_data.image_analysis.stop
246 run.run_start_time = run_xml_data.image_analysis.date
247 run.image_software = run_xml_data.image_analysis.software
248 run.image_version = run_xml_data.image_analysis.version
249 run.basecall_software = run_xml_data.bustard.software
250 run.basecall_version = run_xml_data.bustard.version
251 # we're frequently not running alignments
252 if run_xml_data.gerald:
253 run.alignment_software = run_xml_data.gerald.software
254 run.alignment_version = run_xml_data.gerald.version
256 run.last_update_time = datetime.datetime.now()
259 run.update_result_files()
262 # FIXME: should we automatically update dataruns?
263 # Or should we expect someone to call update_data_runs?
264 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
265 # """Update our dataruns
267 # if not os.path.exists(settings.RESULT_HOME_DIR):
270 # instance.update_data_runs()
271 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
274 LANE_STATUS_CODES = [(0, 'Failed'),
277 LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
278 LANE_STATUS_MAP[None] = "Unknown"
281 def is_valid_lane(value):
282 if value >= 1 and value <= 8:
288 class Lane(models.Model):
289 flowcell = models.ForeignKey(FlowCell)
290 lane_number = models.IntegerField()
291 library = models.ForeignKey(Library)
292 pM = models.DecimalField(max_digits=5,
297 cluster_estimate = models.IntegerField(blank=True, null=True)
298 status = models.IntegerField(choices=LANE_STATUS_CODES,
301 comment = models.TextField(null=True, blank=True)
304 def get_absolute_url(self):
305 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
308 def __unicode__(self):
309 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
312 class DataRun(models.Model):
313 flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
314 runfolder_name = models.CharField(max_length=50)
315 result_dir = models.CharField(max_length=255)
316 last_update_time = models.DateTimeField()
317 run_start_time = models.DateTimeField()
318 cycle_start = models.IntegerField(null=True, blank=True)
319 cycle_stop = models.IntegerField(null=True, blank=True)
320 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
321 null=True, blank=True)
322 image_software = models.CharField(max_length=50)
323 image_version = models.CharField(max_length=50)
324 basecall_software = models.CharField(max_length=50)
325 basecall_version = models.CharField(max_length=50)
326 alignment_software = models.CharField(max_length=50)
327 alignment_version = models.CharField(max_length=50)
328 comment = models.TextField(blank=True)
330 def update_result_files(self):
331 abs_result_dir = get_absolute_pathname(self.result_dir)
333 for dirname, dirnames, filenames in os.walk(abs_result_dir):
334 for filename in filenames:
335 pathname = os.path.join(dirname, filename)
336 relative_pathname = get_relative_pathname(pathname)
337 datafiles = self.datafile_set.filter(
339 relative_pathname=relative_pathname)
340 if len(datafiles) > 0:
343 metadata = find_file_type_metadata_from_filename(filename)
344 if metadata is not None:
345 metadata['filename'] = filename
347 newfile.data_run = self
348 newfile.file_type = metadata['file_type']
349 newfile.relative_pathname = relative_pathname
351 lane_number = metadata.get('lane', None)
352 if lane_number is not None:
353 lane = self.flowcell.lane_set.get(
354 lane_number=lane_number)
355 newfile.library = lane.library
357 self.datafile_set.add(newfile)
359 self.last_update_time = datetime.datetime.now()
361 def lane_files(self):
364 for datafile in self.datafile_set.all():
365 metadata = datafile.attributes
366 if metadata is not None:
367 lane = metadata.get('lane', None)
369 lane_file_set = lanes.setdefault(lane, {})
370 normalized_name = datafile.file_type.normalized_name
371 lane_file_set[normalized_name] = datafile
374 def ivc_plots(self, lane):
375 ivc_name = ['IVC All', 'IVC Call',
376 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
379 for rel_filename, metadata in self.get_result_files():
380 if metadata.file_type.name in ivc_name:
381 plots[metadata.file_type.name] = (rel_filename, metadata)
384 class FileType(models.Model):
385 """Represent potential file types
387 regex is a pattern used to detect if a filename matches this type
388 data run currently assumes that there may be a (?P<lane>) and
389 (?P<end>) pattern in the regular expression.
391 name = models.CharField(max_length=50)
392 mimetype = models.CharField(max_length=50, null=True, blank=True)
393 # regular expression from glob.fnmatch.translate
394 regex = models.CharField(max_length=50, null=True, blank=True)
396 def parse_filename(self, pathname):
397 """Does filename match our pattern?
399 Returns None if not, or dictionary of match variables if we do.
401 path, filename = os.path.split(pathname)
402 if len(self.regex) > 0:
403 match = re.match(self.regex, filename)
404 if match is not None:
405 # These are (?P<>) names we know about from our
407 results = match.groupdict()
409 # convert int parameters
410 for attribute_name in ['lane', 'end']:
411 value = results.get(attribute_name, None)
412 if value is not None:
413 results[attribute_name] = int(value)
417 def _get_normalized_name(self):
418 """Crush data file name into identifier friendly name"""
419 return self.name.replace(' ', '_').lower()
420 normalized_name = property(_get_normalized_name)
422 def __unicode__(self):
423 #return u"<FileType: %s>" % (self.name,)
428 """Helper function to set default UUID in DataFile"""
429 return str(uuid.uuid1())
432 class DataFile(models.Model):
433 """Store map from random ID to filename"""
434 random_key = models.CharField(max_length=64,
437 data_run = models.ForeignKey(DataRun, db_index=True)
438 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
439 file_type = models.ForeignKey(FileType)
440 relative_pathname = models.CharField(max_length=255, db_index=True)
442 def _get_attributes(self):
443 return self.file_type.parse_filename(self.relative_pathname)
444 attributes = property(_get_attributes)
446 def _get_pathname(self):
447 return get_absolute_pathname(self.relative_pathname)
448 pathname = property(_get_pathname)
451 def get_absolute_url(self):
452 return ('htsworkflow.frontend.experiments.views.read_result_file',
453 (), {'key': self.random_key})
456 def find_file_type_metadata_from_filename(pathname):
457 path, filename = os.path.split(pathname)
459 for file_type in FileType.objects.all():
460 result = file_type.parse_filename(filename)
461 if result is not None:
462 result['file_type'] = file_type
468 def get_relative_pathname(abspath):
469 """Strip off the result home directory from a path
471 result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
472 relative_pathname = abspath.replace(result_home_dir, '')
473 return relative_pathname
476 def get_absolute_pathname(relative_pathname):
477 """Attach relative path to results home directory"""
478 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)