9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 logger = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 logger.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 logger.error("Missing or invalid settings.RESCAN_DELAY, "\
32 "defaulting to %s" % (RESCAN_DELAY,))
34 RUN_STATUS_CHOICES = (
35 (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
36 (1, 'Data Pipeline Started'),
37 (2, 'Data Pipeline Interrupted'),
38 (3, 'Data Pipeline Finished'),
39 (4, 'Collect Results Started'),
40 (5, 'Collect Results Finished'),
45 RUN_STATUS_REVERSE_MAP = dict(((v,k) for k,v in RUN_STATUS_CHOICES))
47 class ClusterStation(models.Model):
48 name = models.CharField(max_length=50, unique=True)
50 def __unicode__(self):
51 return unicode(self.name)
53 class Sequencer(models.Model):
54 name = models.CharField(max_length=50, unique=True)
56 def __unicode__(self):
57 return unicode(self.name)
59 class FlowCell(models.Model):
60 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
61 run_date = models.DateTimeField()
62 advanced_run = models.BooleanField(default=False)
63 paired_end = models.BooleanField(default=False)
64 read_length = models.IntegerField(default=32) #Stanford is currenlty 25
65 control_lane = models.IntegerField(choices=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(0,'All Lanes')], null=True, blank=True)
67 cluster_station = models.ForeignKey(ClusterStation, default=3)
68 sequencer = models.ForeignKey(Sequencer, default=1)
70 notes = models.TextField(blank=True)
72 def __unicode__(self):
73 return unicode(self.flowcell_id)
77 for lane in self.lane_set.order_by('lane_number'):
78 cluster_estimate = lane.cluster_estimate
79 if cluster_estimate is not None:
80 cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), )
82 cluster_estimate = 'None'
83 library_id = lane.library_id
84 library = lane.library
85 element = '<tr><td>%d</td><td><a href="%s">%s</a></td><td>%s</td></tr>'
86 html.append(element % (lane.lane_number,
87 library.get_admin_url(),
90 html.append('</table>')
91 return "\n".join(html)
92 Lanes.allow_tags = True
95 ordering = ["-run_date"]
97 def get_admin_url(self):
98 # that's the django way... except it didn't work
99 return urlresolvers.reverse('admin:experiments_flowcell_change',
102 def flowcell_type(self):
104 Convert our boolean 'is paired' flag to a name
112 def get_absolute_url(self):
113 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
114 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
117 def get_raw_data_directory(self):
118 """Return location of where the raw data is stored"""
119 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
121 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
123 def update_data_runs(self):
124 result_root = self.get_raw_data_directory()
125 logger.debug("Update data runs flowcell root: %s" % (result_root,))
126 if result_root is None:
129 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
130 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
132 dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
135 for dirpath, dirnames, filenames in os.walk(result_root):
136 for filename in filenames:
137 if run_xml_re.match(filename):
138 # we have a run directory
139 relative_pathname = get_relative_pathname(dirpath)
140 cached_run = dataruns.get(relative_pathname, None)
141 now = datetime.datetime.now()
142 if (cached_run is None):
143 self.import_data_run(relative_pathname, filename)
144 elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
145 self.import_data_run(relative_pathname,
146 filename, cached_run)
148 def import_data_run(self, relative_pathname, run_xml_name, run=None):
149 """Given a result directory import files"""
150 run_dir = get_absolute_pathname(relative_pathname)
151 run_xml_path = os.path.join(run_dir, run_xml_name)
152 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
153 logger.debug("Importing run from %s" % (relative_pathname,))
158 run.status = RUN_STATUS_REVERSE_MAP['DONE']
159 run.result_dir = relative_pathname
160 run.runfolder_name = run_xml_data.runfolder_name
161 run.cycle_start = run_xml_data.image_analysis.start
162 run.cycle_stop = run_xml_data.image_analysis.stop
163 run.run_start_time = run_xml_data.image_analysis.date
165 run.last_update_time = datetime.datetime.now()
168 run.update_result_files()
171 # FIXME: should we automatically update dataruns?
172 # Or should we expect someone to call update_data_runs?
173 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
174 # """Update our dataruns
176 # if not os.path.exists(settings.RESULT_HOME_DIR):
179 # instance.update_data_runs()
180 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
184 LANE_STATUS_CODES = [(0, 'Failed'),
187 LANE_STATUS_MAP = dict((int(k),v) for k,v in LANE_STATUS_CODES )
188 LANE_STATUS_MAP[None] = "Unknown"
190 def is_valid_lane(value):
191 if value >= 1 and value <= 8:
196 class Lane(models.Model):
197 flowcell = models.ForeignKey(FlowCell)
198 lane_number = models.IntegerField()
199 library = models.ForeignKey(Library)
200 pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
201 cluster_estimate = models.IntegerField(blank=True, null=True)
202 status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
203 comment = models.TextField(null=True, blank=True)
206 def get_absolute_url(self):
207 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
210 def __unicode__(self):
211 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
213 ### -----------------------
214 class DataRun(models.Model):
215 flowcell = models.ForeignKey(FlowCell,verbose_name="Flowcell Id")
216 runfolder_name = models.CharField(max_length=50)
217 result_dir = models.CharField(max_length=255)
218 last_update_time = models.DateTimeField()
219 run_start_time = models.DateTimeField()
220 cycle_start = models.IntegerField(null=True, blank=True)
221 cycle_stop = models.IntegerField(null=True, blank=True)
222 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
223 null=True, blank=True)
224 comment = models.TextField(blank=True)
226 def update_result_files(self):
227 abs_result_dir = get_absolute_pathname(self.result_dir)
229 for dirname, dirnames, filenames in os.walk(abs_result_dir):
230 for filename in filenames:
231 pathname = os.path.join(dirname, filename)
232 relative_pathname = get_relative_pathname(pathname)
233 datafiles = self.datafile_set.filter(
235 relative_pathname=relative_pathname)
236 if len(datafiles) > 0:
239 metadata = find_file_type_metadata_from_filename(filename)
240 if metadata is not None:
241 metadata['filename'] = filename
243 newfile.data_run = self
244 newfile.file_type = metadata['file_type']
245 newfile.relative_pathname = relative_pathname
247 lane_number = metadata.get('lane', None)
248 if lane_number is not None:
249 lane = self.flowcell.lane_set.get(lane_number = lane_number)
250 newfile.library = lane.library
252 self.datafile_set.add(newfile)
254 self.last_update_time = datetime.datetime.now()
256 def lane_files(self):
259 for datafile in self.datafile_set.all():
260 metadata = datafile.attributes
261 if metadata is not None:
262 lane = metadata.get('lane', None)
264 lane_file_set = lanes.setdefault(lane, {})
265 lane_file_set[datafile.file_type.normalized_name] = datafile
268 def ivc_plots(self, lane):
269 ivc_name = ['IVC All', 'IVC Call',
270 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
273 for rel_filename, metadata in self.get_result_files():
274 if metadata.file_type.name in ivc_name:
275 plots[metadata.file_type.name] = (rel_filename, metadata)
277 class FileType(models.Model):
278 """Represent potential file types
280 regex is a pattern used to detect if a filename matches this type
281 data run currently assumes that there may be a (?P<lane>) and
282 (?P<end>) pattern in the regular expression.
284 name = models.CharField(max_length=50)
285 mimetype = models.CharField(max_length=50, null=True, blank=True)
286 # regular expression from glob.fnmatch.translate
287 regex = models.CharField(max_length=50, null=True, blank=True)
289 def parse_filename(self, pathname):
290 """Does filename match our pattern?
292 Returns None if not, or dictionary of match variables if we do.
294 path, filename = os.path.split(pathname)
295 if len(self.regex) > 0:
296 match = re.match(self.regex, filename)
297 if match is not None:
298 # These are (?P<>) names we know about from our default regexes.
299 results = match.groupdict()
301 # convert int parameters
302 for attribute_name in ['lane', 'end']:
303 value = results.get(attribute_name, None)
304 if value is not None:
305 results[attribute_name] = int(value)
309 def _get_normalized_name(self):
310 """Crush data file name into identifier friendly name"""
311 return self.name.replace(' ', '_').lower()
312 normalized_name = property(_get_normalized_name)
314 def __unicode__(self):
315 #return u"<FileType: %s>" % (self.name,)
319 """Helper function to set default UUID in DataFile"""
320 return str(uuid.uuid1())
322 class DataFile(models.Model):
323 """Store map from random ID to filename"""
324 random_key = models.CharField(max_length=64,
327 data_run = models.ForeignKey(DataRun, db_index=True)
328 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
329 file_type = models.ForeignKey(FileType)
330 relative_pathname = models.CharField(max_length=255, db_index=True)
332 def _get_attributes(self):
333 return self.file_type.parse_filename(self.relative_pathname)
334 attributes = property(_get_attributes)
336 def _get_pathname(self):
337 return get_absolute_pathname(self.relative_pathname)
338 pathname = property(_get_pathname)
341 def get_absolute_url(self):
342 return ('htsworkflow.frontend.experiments.views.read_result_file',
343 (), {'key': self.random_key })
345 def find_file_type_metadata_from_filename(pathname):
346 path, filename = os.path.split(pathname)
348 for file_type in FileType.objects.all():
349 result = file_type.parse_filename(filename)
350 if result is not None:
351 result['file_type'] = file_type
356 def get_relative_pathname(abspath):
357 """Strip off the result home directory from a path
359 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
360 relative_pathname = abspath.replace(result_home_dir,'')
361 return relative_pathname
363 def get_absolute_pathname(relative_pathname):
364 """Attach relative path to results home directory"""
365 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)