9 from django.conf import settings
10 from django.core.exceptions import ObjectDoesNotExist
11 from django.core import urlresolvers
12 from django.db import models
13 from django.db.models.signals import post_init
15 from htsworkflow.frontend.samples.models import Library
16 from htsworkflow.util.conversion import parse_flowcell_id
17 from htsworkflow.pipelines import runfolder
19 logger = logging.getLogger(__name__)
22 default_pM = int(settings.DEFAULT_PM)
24 logger.error("invalid value for frontend.default_pm")
26 # how many days to wait before trying to re-import a runfolder
29 RESCAN_DELAY = int(settings.RESCAN_DELAY)
30 except (ValueError, AttributeError):
31 logger.error("Missing or invalid settings.RESCAN_DELAY, "\
32 "defaulting to %s" % (RESCAN_DELAY,))
34 RUN_STATUS_CHOICES = (
35 (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
36 (1, 'Data Pipeline Started'),
37 (2, 'Data Pipeline Interrupted'),
38 (3, 'Data Pipeline Finished'),
39 (4, 'Collect Results Started'),
40 (5, 'Collect Results Finished'),
45 RUN_STATUS_REVERSE_MAP = dict(((v,k) for k,v in RUN_STATUS_CHOICES))
47 class ClusterStation(models.Model):
48 name = models.CharField(max_length=50, unique=True)
50 def __unicode__(self):
51 return unicode(self.name)
53 class Sequencer(models.Model):
54 name = models.CharField(max_length=50, db_index=True)
55 instrument_name = models.CharField(max_length=50, db_index=True)
56 serial_number = models.CharField(max_length=50, db_index=True)
57 model = models.CharField(max_length=255)
58 active = models.BooleanField(default=True, null=False)
59 comment = models.CharField(max_length=255)
62 ordering = ["-active", "name"]
64 def __unicode__(self):
65 name = [unicode(self.name)]
66 if self.instrument_name is not None:
67 name.append("(%s)" % (unicode(self.instrument_name),))
72 def get_absolute_url(self):
73 return ('htsworkflow.frontend.experiments.views.sequencer',
77 class FlowCell(models.Model):
78 flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
79 run_date = models.DateTimeField()
80 advanced_run = models.BooleanField(default=False)
81 paired_end = models.BooleanField(default=False)
82 read_length = models.IntegerField(default=32) #Stanford is currenlty 25
83 control_lane = models.IntegerField(choices=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(0,'All Lanes')], null=True, blank=True)
85 cluster_station = models.ForeignKey(ClusterStation, default=3)
86 sequencer = models.ForeignKey(Sequencer, default=1)
88 notes = models.TextField(blank=True)
90 def __unicode__(self):
91 return unicode(self.flowcell_id)
95 for lane in self.lane_set.order_by('lane_number'):
96 cluster_estimate = lane.cluster_estimate
97 if cluster_estimate is not None:
98 cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), )
100 cluster_estimate = 'None'
101 library_id = lane.library_id
102 library = lane.library
103 element = '<tr><td>%d</td><td><a href="%s">%s</a></td><td>%s</td></tr>'
104 html.append(element % (lane.lane_number,
105 library.get_admin_url(),
108 html.append('</table>')
109 return "\n".join(html)
110 Lanes.allow_tags = True
113 ordering = ["-run_date"]
115 def get_admin_url(self):
116 # that's the django way... except it didn't work
117 return urlresolvers.reverse('admin:experiments_flowcell_change',
120 def flowcell_type(self):
122 Convert our boolean 'is paired' flag to a name
130 def get_absolute_url(self):
131 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
132 return ('htsworkflow.frontend.experiments.views.flowcell_detail',
135 def get_raw_data_directory(self):
136 """Return location of where the raw data is stored"""
137 flowcell_id, status = parse_flowcell_id(self.flowcell_id)
139 return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
141 def update_data_runs(self):
142 result_root = self.get_raw_data_directory()
143 logger.debug("Update data runs flowcell root: %s" % (result_root,))
144 if result_root is None:
147 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
148 run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
150 dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
153 for dirpath, dirnames, filenames in os.walk(result_root):
154 for filename in filenames:
155 if run_xml_re.match(filename):
156 # we have a run directory
157 relative_pathname = get_relative_pathname(dirpath)
158 cached_run = dataruns.get(relative_pathname, None)
159 now = datetime.datetime.now()
160 if (cached_run is None):
161 self.import_data_run(relative_pathname, filename)
162 elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
163 self.import_data_run(relative_pathname,
164 filename, cached_run)
166 def import_data_run(self, relative_pathname, run_xml_name, run=None):
167 """Given a result directory import files"""
168 run_dir = get_absolute_pathname(relative_pathname)
169 run_xml_path = os.path.join(run_dir, run_xml_name)
170 run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
171 logger.debug("Importing run from %s" % (relative_pathname,))
176 run.status = RUN_STATUS_REVERSE_MAP['DONE']
177 run.result_dir = relative_pathname
178 run.runfolder_name = run_xml_data.runfolder_name
179 run.cycle_start = run_xml_data.image_analysis.start
180 run.cycle_stop = run_xml_data.image_analysis.stop
181 run.run_start_time = run_xml_data.image_analysis.date
182 run.image_software = run_xml_data.image_analysis.software
183 run.image_version = run_xml_data.image_analysis.version
184 run.basecall_software = run_xml_data.bustard.software
185 run.basecall_version = run_xml_data.bustard.version
186 run.alignment_software = run_xml_data.gerald.software
187 run.alignment_version = run_xml_data.gerald.version
189 run.last_update_time = datetime.datetime.now()
192 run.update_result_files()
195 # FIXME: should we automatically update dataruns?
196 # Or should we expect someone to call update_data_runs?
197 #def update_flowcell_dataruns(sender, instance, *args, **kwargs):
198 # """Update our dataruns
200 # if not os.path.exists(settings.RESULT_HOME_DIR):
203 # instance.update_data_runs()
204 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
208 LANE_STATUS_CODES = [(0, 'Failed'),
211 LANE_STATUS_MAP = dict((int(k),v) for k,v in LANE_STATUS_CODES )
212 LANE_STATUS_MAP[None] = "Unknown"
214 def is_valid_lane(value):
215 if value >= 1 and value <= 8:
220 class Lane(models.Model):
221 flowcell = models.ForeignKey(FlowCell)
222 lane_number = models.IntegerField()
223 library = models.ForeignKey(Library)
224 pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
225 cluster_estimate = models.IntegerField(blank=True, null=True)
226 status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
227 comment = models.TextField(null=True, blank=True)
230 def get_absolute_url(self):
231 return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
234 def __unicode__(self):
235 return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
237 ### -----------------------
238 class DataRun(models.Model):
239 flowcell = models.ForeignKey(FlowCell,verbose_name="Flowcell Id")
240 runfolder_name = models.CharField(max_length=50)
241 result_dir = models.CharField(max_length=255)
242 last_update_time = models.DateTimeField()
243 run_start_time = models.DateTimeField()
244 cycle_start = models.IntegerField(null=True, blank=True)
245 cycle_stop = models.IntegerField(null=True, blank=True)
246 run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
247 null=True, blank=True)
248 image_software = models.CharField(max_length=50)
249 image_version = models.CharField(max_length=50)
250 basecall_software = models.CharField(max_length=50)
251 basecall_version = models.CharField(max_length=50)
252 alignment_software = models.CharField(max_length=50)
253 alignment_version = models.CharField(max_length=50)
254 comment = models.TextField(blank=True)
256 def update_result_files(self):
257 abs_result_dir = get_absolute_pathname(self.result_dir)
259 for dirname, dirnames, filenames in os.walk(abs_result_dir):
260 for filename in filenames:
261 pathname = os.path.join(dirname, filename)
262 relative_pathname = get_relative_pathname(pathname)
263 datafiles = self.datafile_set.filter(
265 relative_pathname=relative_pathname)
266 if len(datafiles) > 0:
269 metadata = find_file_type_metadata_from_filename(filename)
270 if metadata is not None:
271 metadata['filename'] = filename
273 newfile.data_run = self
274 newfile.file_type = metadata['file_type']
275 newfile.relative_pathname = relative_pathname
277 lane_number = metadata.get('lane', None)
278 if lane_number is not None:
279 lane = self.flowcell.lane_set.get(lane_number = lane_number)
280 newfile.library = lane.library
282 self.datafile_set.add(newfile)
284 self.last_update_time = datetime.datetime.now()
286 def lane_files(self):
289 for datafile in self.datafile_set.all():
290 metadata = datafile.attributes
291 if metadata is not None:
292 lane = metadata.get('lane', None)
294 lane_file_set = lanes.setdefault(lane, {})
295 lane_file_set[datafile.file_type.normalized_name] = datafile
298 def ivc_plots(self, lane):
299 ivc_name = ['IVC All', 'IVC Call',
300 'IVC Percent Base', 'IVC Percent All', 'IVC Percent Call']
303 for rel_filename, metadata in self.get_result_files():
304 if metadata.file_type.name in ivc_name:
305 plots[metadata.file_type.name] = (rel_filename, metadata)
307 class FileType(models.Model):
308 """Represent potential file types
310 regex is a pattern used to detect if a filename matches this type
311 data run currently assumes that there may be a (?P<lane>) and
312 (?P<end>) pattern in the regular expression.
314 name = models.CharField(max_length=50)
315 mimetype = models.CharField(max_length=50, null=True, blank=True)
316 # regular expression from glob.fnmatch.translate
317 regex = models.CharField(max_length=50, null=True, blank=True)
319 def parse_filename(self, pathname):
320 """Does filename match our pattern?
322 Returns None if not, or dictionary of match variables if we do.
324 path, filename = os.path.split(pathname)
325 if len(self.regex) > 0:
326 match = re.match(self.regex, filename)
327 if match is not None:
328 # These are (?P<>) names we know about from our default regexes.
329 results = match.groupdict()
331 # convert int parameters
332 for attribute_name in ['lane', 'end']:
333 value = results.get(attribute_name, None)
334 if value is not None:
335 results[attribute_name] = int(value)
339 def _get_normalized_name(self):
340 """Crush data file name into identifier friendly name"""
341 return self.name.replace(' ', '_').lower()
342 normalized_name = property(_get_normalized_name)
344 def __unicode__(self):
345 #return u"<FileType: %s>" % (self.name,)
349 """Helper function to set default UUID in DataFile"""
350 return str(uuid.uuid1())
352 class DataFile(models.Model):
353 """Store map from random ID to filename"""
354 random_key = models.CharField(max_length=64,
357 data_run = models.ForeignKey(DataRun, db_index=True)
358 library = models.ForeignKey(Library, db_index=True, null=True, blank=True)
359 file_type = models.ForeignKey(FileType)
360 relative_pathname = models.CharField(max_length=255, db_index=True)
362 def _get_attributes(self):
363 return self.file_type.parse_filename(self.relative_pathname)
364 attributes = property(_get_attributes)
366 def _get_pathname(self):
367 return get_absolute_pathname(self.relative_pathname)
368 pathname = property(_get_pathname)
371 def get_absolute_url(self):
372 return ('htsworkflow.frontend.experiments.views.read_result_file',
373 (), {'key': self.random_key })
375 def find_file_type_metadata_from_filename(pathname):
376 path, filename = os.path.split(pathname)
378 for file_type in FileType.objects.all():
379 result = file_type.parse_filename(filename)
380 if result is not None:
381 result['file_type'] = file_type
386 def get_relative_pathname(abspath):
387 """Strip off the result home directory from a path
389 result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
390 relative_pathname = abspath.replace(result_home_dir,'')
391 return relative_pathname
393 def get_absolute_pathname(relative_pathname):
394 """Attach relative path to results home directory"""
395 return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)