9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 logger = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 logger.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 logger.error("Missing or invalid settings.RESCAN_DELAY, "\
32 "defaulting to %s" % (RESCAN_DELAY,))
34 RUN_STATUS_CHOICES = (
35 (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
36 (1, 'Data Pipeline Started'),
37 (2, 'Data Pipeline Interrupted'),
38 (3, 'Data Pipeline Finished'),
39 (4, 'Collect Results Started'),
40 (5, 'Collect Results Finished'),
45 RUN_STATUS_REVERSE_MAP = dict(((v,k) for k,v in RUN_STATUS_CHOICES))
47 class ClusterStation(models.Model):
48 name = models.CharField(max_length=50, unique=True)
50 def __unicode__(self):
51 return unicode(self.name)
53 class Sequencer(models.Model):
54 name = models.CharField(max_length=50, db_index=True)
55 instrument_name = models.CharField(max_length=50, db_index=True)
56 serial_number = models.CharField(max_length=50, db_index=True)
57 model = models.CharField(max_length=255)
58 comment = models.CharField(max_length=255)
60 def __unicode__(self):
61 name = [unicode(self.name)]
62 if self.instrument_name is not None:
63 name.append("(%s)" % (unicode(self.instrument_name),))
67 def get_absolute_url(self):
68 return ('htsworkflow.frontend.experiments.views.sequencer',
72 class FlowCell(models.Model):
73 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
74 run_date = models.DateTimeField()
75 advanced_run = models.BooleanField(default=False)
76 paired_end = models.BooleanField(default=False)
77 read_length = models.IntegerField(default=32) #Stanford is currenlty 25
78 control_lane = models.IntegerField(choices=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(0,'All Lanes')], null=True, blank=True)
80 cluster_station = models.ForeignKey(ClusterStation, default=3)
81 sequencer = models.ForeignKey(Sequencer, default=1)
83 notes = models.TextField(blank=True)
85 def __unicode__(self):
86 return unicode(self.flowcell_id)
90 for lane in self.lane_set.order_by('lane_number'):
91 cluster_estimate = lane.cluster_estimate
92 if cluster_estimate is not None:
93 cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), )
95 cluster_estimate = 'None'
96 library_id = lane.library_id
97 library = lane.library
98 element = '<tr><td>%d</td><td><a href="%s">%s</a></td><td>%s</td></tr>'
99 html.append(element % (lane.lane_number,
100 library.get_admin_url(),
103 html.append('</table>')
104 return "\n".join(html)
105 Lanes.allow_tags = True
108 ordering = ["-run_date"]
110 def get_admin_url(self):
111 # that's the django way... except it didn't work
112 return urlresolvers.reverse('admin:experiments_flowcell_change',
115 def flowcell_type(self):
117 Convert our boolean 'is paired' flag to a name
125 def get_absolute_url(self):
126 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
127 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
130 def get_raw_data_directory(self):
131 """Return location of where the raw data is stored"""
132 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
134 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
136 def update_data_runs(self):
137 result_root = self.get_raw_data_directory()
138 logger.debug("Update data runs flowcell root: %s" % (result_root,))
139 if result_root is None:
142 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
143 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
145 dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
148 for dirpath, dirnames, filenames in os.walk(result_root):
149 for filename in filenames:
150 if run_xml_re.match(filename):
151 # we have a run directory
152 relative_pathname = get_relative_pathname(dirpath)
153 cached_run = dataruns.get(relative_pathname, None)
154 now = datetime.datetime.now()
155 if (cached_run is None):
156 self.import_data_run(relative_pathname, filename)
157 elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
158 self.import_data_run(relative_pathname,
159 filename, cached_run)
161 def import_data_run(self, relative_pathname, run_xml_name, run=None):
162 """Given a result directory import files"""
163 run_dir = get_absolute_pathname(relative_pathname)
164 run_xml_path = os.path.join(run_dir, run_xml_name)
165 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
166 logger.debug("Importing run from %s" % (relative_pathname,))
171 run.status = RUN_STATUS_REVERSE_MAP['DONE']
172 run.result_dir = relative_pathname
173 run.runfolder_name = run_xml_data.runfolder_name
174 run.cycle_start = run_xml_data.image_analysis.start
175 run.cycle_stop = run_xml_data.image_analysis.stop
176 run.run_start_time = run_xml_data.image_analysis.date
177 run.image_software = run_xml_data.image_analysis.software
178 run.image_version = run_xml_data.image_analysis.version
179 run.basecall_software = run_xml_data.bustard.software
180 run.basecall_version = run_xml_data.bustard.version
181 run.alignment_software = run_xml_data.gerald.software
182 run.alignment_version = run_xml_data.gerald.version
184 run.last_update_time = datetime.datetime.now()
187 run.update_result_files()
190 # FIXME: should we automatically update dataruns?
191 # Or should we expect someone to call update_data_runs?
192 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
193 # """Update our dataruns
195 # if not os.path.exists(settings.RESULT_HOME_DIR):
198 # instance.update_data_runs()
199 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
203 LANE_STATUS_CODES = [(0, 'Failed'),
206 LANE_STATUS_MAP = dict((int(k),v) for k,v in LANE_STATUS_CODES )
207 LANE_STATUS_MAP[None] = "Unknown"
209 def is_valid_lane(value):
210 if value >= 1 and value <= 8:
215 class Lane(models.Model):
216 flowcell = models.ForeignKey(FlowCell)
217 lane_number = models.IntegerField()
218 library = models.ForeignKey(Library)
219 pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
220 cluster_estimate = models.IntegerField(blank=True, null=True)
221 status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
222 comment = models.TextField(null=True, blank=True)
225 def get_absolute_url(self):
226 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
229 def __unicode__(self):
230 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
232 ### -----------------------
233 class DataRun(models.Model):
234 flowcell = models.ForeignKey(FlowCell,verbose_name="Flowcell Id")
235 runfolder_name = models.CharField(max_length=50)
236 result_dir = models.CharField(max_length=255)
237 last_update_time = models.DateTimeField()
238 run_start_time = models.DateTimeField()
239 cycle_start = models.IntegerField(null=True, blank=True)
240 cycle_stop = models.IntegerField(null=True, blank=True)
241 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
242 null=True, blank=True)
243 image_software = models.CharField(max_length=50)
244 image_version = models.CharField(max_length=50)
245 basecall_software = models.CharField(max_length=50)
246 basecall_version = models.CharField(max_length=50)
247 alignment_software = models.CharField(max_length=50)
248 alignment_version = models.CharField(max_length=50)
249 comment = models.TextField(blank=True)
251 def update_result_files(self):
252 abs_result_dir = get_absolute_pathname(self.result_dir)
254 for dirname, dirnames, filenames in os.walk(abs_result_dir):
255 for filename in filenames:
256 pathname = os.path.join(dirname, filename)
257 relative_pathname = get_relative_pathname(pathname)
258 datafiles = self.datafile_set.filter(
260 relative_pathname=relative_pathname)
261 if len(datafiles) > 0:
264 metadata = find_file_type_metadata_from_filename(filename)
265 if metadata is not None:
266 metadata['filename'] = filename
268 newfile.data_run = self
269 newfile.file_type = metadata['file_type']
270 newfile.relative_pathname = relative_pathname
272 lane_number = metadata.get('lane', None)
273 if lane_number is not None:
274 lane = self.flowcell.lane_set.get(lane_number = lane_number)
275 newfile.library = lane.library
277 self.datafile_set.add(newfile)
279 self.last_update_time = datetime.datetime.now()
281 def lane_files(self):
284 for datafile in self.datafile_set.all():
285 metadata = datafile.attributes
286 if metadata is not None:
287 lane = metadata.get('lane', None)
289 lane_file_set = lanes.setdefault(lane, {})
290 lane_file_set[datafile.file_type.normalized_name] = datafile
293 def ivc_plots(self, lane):
294 ivc_name = ['IVC All', 'IVC Call',
295 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
298 for rel_filename, metadata in self.get_result_files():
299 if metadata.file_type.name in ivc_name:
300 plots[metadata.file_type.name] = (rel_filename, metadata)
302 class FileType(models.Model):
303 """Represent potential file types
305 regex is a pattern used to detect if a filename matches this type
306 data run currently assumes that there may be a (?P<lane>) and
307 (?P<end>) pattern in the regular expression.
309 name = models.CharField(max_length=50)
310 mimetype = models.CharField(max_length=50, null=True, blank=True)
311 # regular expression from glob.fnmatch.translate
312 regex = models.CharField(max_length=50, null=True, blank=True)
314 def parse_filename(self, pathname):
315 """Does filename match our pattern?
317 Returns None if not, or dictionary of match variables if we do.
319 path, filename = os.path.split(pathname)
320 if len(self.regex) > 0:
321 match = re.match(self.regex, filename)
322 if match is not None:
323 # These are (?P<>) names we know about from our default regexes.
324 results = match.groupdict()
326 # convert int parameters
327 for attribute_name in ['lane', 'end']:
328 value = results.get(attribute_name, None)
329 if value is not None:
330 results[attribute_name] = int(value)
334 def _get_normalized_name(self):
335 """Crush data file name into identifier friendly name"""
336 return self.name.replace(' ', '_').lower()
337 normalized_name = property(_get_normalized_name)
339 def __unicode__(self):
340 #return u"<FileType: %s>" % (self.name,)
344 """Helper function to set default UUID in DataFile"""
345 return str(uuid.uuid1())
347 class DataFile(models.Model):
348 """Store map from random ID to filename"""
349 random_key = models.CharField(max_length=64,
352 data_run = models.ForeignKey(DataRun, db_index=True)
353 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
354 file_type = models.ForeignKey(FileType)
355 relative_pathname = models.CharField(max_length=255, db_index=True)
357 def _get_attributes(self):
358 return self.file_type.parse_filename(self.relative_pathname)
359 attributes = property(_get_attributes)
361 def _get_pathname(self):
362 return get_absolute_pathname(self.relative_pathname)
363 pathname = property(_get_pathname)
366 def get_absolute_url(self):
367 return ('htsworkflow.frontend.experiments.views.read_result_file',
368 (), {'key': self.random_key })
370 def find_file_type_metadata_from_filename(pathname):
371 path, filename = os.path.split(pathname)
373 for file_type in FileType.objects.all():
374 result = file_type.parse_filename(filename)
375 if result is not None:
376 result['file_type'] = file_type
381 def get_relative_pathname(abspath):
382 """Strip off the result home directory from a path
384 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
385 relative_pathname = abspath.replace(result_home_dir,'')
386 return relative_pathname
388 def get_absolute_pathname(relative_pathname):
389 """Attach relative path to results home directory"""
390 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)