9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 logger = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 logger.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 logger.error("Missing or invalid settings.RESCAN_DELAY")
33 RUN_STATUS_CHOICES = (
34 (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
35 (1, 'Data Pipeline Started'),
36 (2, 'Data Pipeline Interrupted'),
37 (3, 'Data Pipeline Finished'),
38 (4, 'Collect Results Started'),
39 (5, 'Collect Results Finished'),
44 RUN_STATUS_REVERSE_MAP = dict(((v,k) for k,v in RUN_STATUS_CHOICES))
46 class ClusterStation(models.Model):
47 name = models.CharField(max_length=50, unique=True)
49 def __unicode__(self):
50 return unicode(self.name)
52 class Sequencer(models.Model):
53 name = models.CharField(max_length=50, unique=True)
55 def __unicode__(self):
56 return unicode(self.name)
58 class FlowCell(models.Model):
59 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
60 run_date = models.DateTimeField()
61 advanced_run = models.BooleanField(default=False)
62 paired_end = models.BooleanField(default=False)
63 read_length = models.IntegerField(default=32) #Stanford is currenlty 25
64 control_lane = models.IntegerField(choices=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(0,'All Lanes')], null=True, blank=True)
66 cluster_station = models.ForeignKey(ClusterStation, default=3)
67 sequencer = models.ForeignKey(Sequencer, default=1)
69 notes = models.TextField(blank=True)
71 def __unicode__(self):
72 return unicode(self.flowcell_id)
76 for lane in self.lane_set.order_by('lane_number'):
77 cluster_estimate = lane.cluster_estimate
78 if cluster_estimate is not None:
79 cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), )
81 cluster_estimate = 'None'
82 library_id = lane.library_id
83 library = lane.library
84 element = '<tr><td>%d</td><td><a href="%s">%s</a></td><td>%s</td></tr>'
85 html.append(element % (lane.lane_number,
86 library.get_admin_url(),
89 html.append('</table>')
90 return "\n".join(html)
91 Lanes.allow_tags = True
94 ordering = ["-run_date"]
96 def get_admin_url(self):
97 # that's the django way... except it didn't work
98 return urlresolvers.reverse('admin:experiments_flowcell_change',
101 def flowcell_type(self):
103 Convert our boolean 'is paired' flag to a name
111 def get_absolute_url(self):
112 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
113 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
116 def get_raw_data_directory(self):
117 """Return location of where the raw data is stored"""
118 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
120 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
122 def update_data_runs(self):
123 result_root = self.get_raw_data_directory()
124 logger.debug("Update data runs flowcell root: %s" % (result_root,))
125 if result_root is None:
128 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
129 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
131 dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
134 for dirpath, dirnames, filenames in os.walk(result_root):
135 for filename in filenames:
136 if run_xml_re.match(filename):
137 # we have a run directory
138 relative_pathname = get_relative_pathname(dirpath)
139 cached_run = dataruns.get(relative_pathname, None)
140 now = datetime.datetime.now()
141 if (cached_run is None):
142 self.import_data_run(relative_pathname, filename)
143 elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
144 self.import_data_run(relative_pathname,
145 filename, cached_run)
147 def import_data_run(self, relative_pathname, run_xml_name, run=None):
148 """Given a result directory import files"""
149 run_dir = get_absolute_pathname(relative_pathname)
150 run_xml_path = os.path.join(run_dir, run_xml_name)
151 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
152 logger.debug("Importing run from %s" % (relative_pathname,))
157 run.status = RUN_STATUS_REVERSE_MAP['DONE']
158 run.result_dir = relative_pathname
159 run.runfolder_name = run_xml_data.runfolder_name
160 run.cycle_start = run_xml_data.image_analysis.start
161 run.cycle_stop = run_xml_data.image_analysis.stop
162 run.run_start_time = run_xml_data.image_analysis.date
164 run.last_update_time = datetime.datetime.now()
167 run.update_result_files()
170 # FIXME: should we automatically update dataruns?
171 # Or should we expect someone to call update_data_runs?
172 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
173 # """Update our dataruns
175 # if not os.path.exists(settings.RESULT_HOME_DIR):
178 # instance.update_data_runs()
179 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
183 LANE_STATUS_CODES = [(0, 'Failed'),
186 LANE_STATUS_MAP = dict((int(k),v) for k,v in LANE_STATUS_CODES )
187 LANE_STATUS_MAP[None] = "Unknown"
189 def is_valid_lane(value):
190 if value >= 1 and value <= 8:
195 class Lane(models.Model):
196 flowcell = models.ForeignKey(FlowCell)
197 lane_number = models.IntegerField()
198 library = models.ForeignKey(Library)
199 pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
200 cluster_estimate = models.IntegerField(blank=True, null=True)
201 status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
202 comment = models.TextField(null=True, blank=True)
205 def get_absolute_url(self):
206 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
209 def __unicode__(self):
210 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
212 ### -----------------------
213 class DataRun(models.Model):
214 flowcell = models.ForeignKey(FlowCell,verbose_name="Flowcell Id")
215 runfolder_name = models.CharField(max_length=50)
216 result_dir = models.CharField(max_length=255)
217 last_update_time = models.DateTimeField()
218 run_start_time = models.DateTimeField()
219 cycle_start = models.IntegerField(null=True, blank=True)
220 cycle_stop = models.IntegerField(null=True, blank=True)
221 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
222 null=True, blank=True)
223 comment = models.TextField(blank=True)
225 def update_result_files(self):
226 abs_result_dir = get_absolute_pathname(self.result_dir)
228 for dirname, dirnames, filenames in os.walk(abs_result_dir):
229 for filename in filenames:
230 pathname = os.path.join(dirname, filename)
231 relative_pathname = get_relative_pathname(pathname)
232 datafiles = self.datafile_set.filter(
234 relative_pathname=relative_pathname)
235 if len(datafiles) > 0:
238 metadata = find_file_type_metadata_from_filename(filename)
239 if metadata is not None:
240 metadata['filename'] = filename
242 newfile.data_run = self
243 newfile.file_type = metadata['file_type']
244 newfile.relative_pathname = relative_pathname
246 lane_number = metadata.get('lane', None)
247 if lane_number is not None:
248 lane = self.flowcell.lane_set.get(lane_number = lane_number)
249 newfile.library = lane.library
251 self.datafile_set.add(newfile)
253 self.last_update_time = datetime.datetime.now()
255 def lane_files(self):
258 for datafile in self.datafile_set.all():
259 metadata = datafile.attributes
260 if metadata is not None:
261 lane = metadata.get('lane', None)
263 lane_file_set = lanes.setdefault(lane, {})
264 lane_file_set[datafile.file_type.normalized_name] = datafile
267 def ivc_plots(self, lane):
268 ivc_name = ['IVC All', 'IVC Call',
269 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
272 for rel_filename, metadata in self.get_result_files():
273 if metadata.file_type.name in ivc_name:
274 plots[metadata.file_type.name] = (rel_filename, metadata)
276 class FileType(models.Model):
277 """Represent potential file types
279 regex is a pattern used to detect if a filename matches this type
280 data run currently assumes that there may be a (?P<lane>) and
281 (?P<end>) pattern in the regular expression.
283 name = models.CharField(max_length=50)
284 mimetype = models.CharField(max_length=50, null=True, blank=True)
285 # regular expression from glob.fnmatch.translate
286 regex = models.CharField(max_length=50, null=True, blank=True)
288 def parse_filename(self, pathname):
289 """Does filename match our pattern?
291 Returns None if not, or dictionary of match variables if we do.
293 path, filename = os.path.split(pathname)
294 if len(self.regex) > 0:
295 match = re.match(self.regex, filename)
296 if match is not None:
297 # These are (?P<>) names we know about from our default regexes.
298 results = match.groupdict()
300 # convert int parameters
301 for attribute_name in ['lane', 'end']:
302 value = results.get(attribute_name, None)
303 if value is not None:
304 results[attribute_name] = int(value)
308 def _get_normalized_name(self):
309 """Crush data file name into identifier friendly name"""
310 return self.name.replace(' ', '_').lower()
311 normalized_name = property(_get_normalized_name)
313 def __unicode__(self):
314 #return u"<FileType: %s>" % (self.name,)
318 """Helper function to set default UUID in DataFile"""
319 return str(uuid.uuid1())
321 class DataFile(models.Model):
322 """Store map from random ID to filename"""
323 random_key = models.CharField(max_length=64,
326 data_run = models.ForeignKey(DataRun, db_index=True)
327 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
328 file_type = models.ForeignKey(FileType)
329 relative_pathname = models.CharField(max_length=255, db_index=True)
331 def _get_attributes(self):
332 return self.file_type.parse_filename(self.relative_pathname)
333 attributes = property(_get_attributes)
335 def _get_pathname(self):
336 return get_absolute_pathname(self.relative_pathname)
337 pathname = property(_get_pathname)
340 def get_absolute_url(self):
341 return ('htsworkflow.frontend.experiments.views.read_result_file',
342 (), {'key': self.random_key })
344 def find_file_type_metadata_from_filename(pathname):
345 path, filename = os.path.split(pathname)
347 for file_type in FileType.objects.all():
348 result = file_type.parse_filename(filename)
349 if result is not None:
350 result['file_type'] = file_type
355 def get_relative_pathname(abspath):
356 """Strip off the result home directory from a path
358 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
359 relative_pathname = abspath.replace(result_home_dir,'')
360 return relative_pathname
362 def get_absolute_pathname(relative_pathname):
363 """Attach relative path to results home directory"""
364 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)