logger = logging.getLogger(__name__)
default_pM = 5
try:
- default_pM = int(settings.DEFAULT_PM)
+ default_pM = int(settings.DEFAULT_PM)
except ValueError,e:
- logger.error("invalid value for frontend.default_pm")
+ logger.error("invalid value for frontend.default_pm")
+
+# how many days to wait before trying to re-import a runfolder
+RESCAN_DELAY = 1
+try:
+ RESCAN_DELAY = int(settings.RESCAN_DELAY)
+except (ValueError, AttributeError):
+ logger.error("Missing or invalid settings.RESCAN_DELAY")
RUN_STATUS_CHOICES = (
(0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
cluster_station = models.ForeignKey(ClusterStation, default=3)
sequencer = models.ForeignKey(Sequencer, default=1)
-
+
notes = models.TextField(blank=True)
def __unicode__(self):
- return unicode(self.flowcell_id)
+ return unicode(self.flowcell_id)
def Lanes(self):
html = ['<table>']
flowcell_id, status = parse_flowcell_id(self.flowcell_id)
return ('htsworkflow.frontend.experiments.views.flowcell_detail',
[str(flowcell_id)])
-
+
def get_raw_data_directory(self):
"""Return location of where the raw data is stored"""
flowcell_id, status = parse_flowcell_id(self.flowcell_id)
def update_data_runs(self):
result_root = self.get_raw_data_directory()
+ logger.debug("Update data runs flowcell root: %s" % (result_root,))
if result_root is None:
return
result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
-
- dataruns = self.datarun_set.all()
- datarun_result_dirs = [ x.result_dir for x in dataruns ]
+
+ dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
result_dirs = []
for dirpath, dirnames, filenames in os.walk(result_root):
if run_xml_re.match(filename):
# we have a run directory
relative_pathname = get_relative_pathname(dirpath)
- if relative_pathname not in datarun_result_dirs:
+ cached_run = dataruns.get(relative_pathname, None)
+ now = datetime.datetime.now()
+ if (cached_run is None):
self.import_data_run(relative_pathname, filename)
-
- def import_data_run(self, relative_pathname, run_xml_name):
+ elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
+ self.import_data_run(relative_pathname,
+ filename, cached_run)
+
+ def import_data_run(self, relative_pathname, run_xml_name, run=None):
"""Given a result directory import files"""
run_dir = get_absolute_pathname(relative_pathname)
run_xml_path = os.path.join(run_dir, run_xml_name)
run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
-
- run = DataRun()
- run.flowcell = self
- run.status = RUN_STATUS_REVERSE_MAP['DONE']
- run.result_dir = relative_pathname
- run.runfolder_name = run_xml_data.runfolder_name
- run.cycle_start = run_xml_data.image_analysis.start
- run.cycle_stop = run_xml_data.image_analysis.stop
- run.run_start_time = run_xml_data.image_analysis.date
+ logger.debug("Importing run from %s" % (relative_pathname,))
+
+ if run is None:
+ run = DataRun()
+ run.flowcell = self
+ run.status = RUN_STATUS_REVERSE_MAP['DONE']
+ run.result_dir = relative_pathname
+ run.runfolder_name = run_xml_data.runfolder_name
+ run.cycle_start = run_xml_data.image_analysis.start
+ run.cycle_stop = run_xml_data.image_analysis.stop
+ run.run_start_time = run_xml_data.image_analysis.date
run.last_update_time = datetime.datetime.now()
run.save()
run.update_result_files()
-
+
# FIXME: should we automatically update dataruns?
# Or should we expect someone to call update_data_runs?
#def update_flowcell_dataruns(sender, instance, *args, **kwargs):
# if not os.path.exists(settings.RESULT_HOME_DIR):
# return
#
-# instance.update_data_runs()
+# instance.update_data_runs()
#post_init.connect(update_flowcell_dataruns, sender=FlowCell)
class Lane(models.Model):
flowcell = models.ForeignKey(FlowCell)
- lane_number = models.IntegerField()
+ lane_number = models.IntegerField()
library = models.ForeignKey(Library)
pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
- cluster_estimate = models.IntegerField(blank=True, null=True)
- status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
+ cluster_estimate = models.IntegerField(blank=True, null=True)
+ status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
comment = models.TextField(null=True, blank=True)
@models.permalink
run_start_time = models.DateTimeField()
cycle_start = models.IntegerField(null=True, blank=True)
cycle_stop = models.IntegerField(null=True, blank=True)
- run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
+ run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
null=True, blank=True)
comment = models.TextField(blank=True)
relative_pathname=relative_pathname)
if len(datafiles) > 0:
continue
-
+
metadata = find_file_type_metadata_from_filename(filename)
if metadata is not None:
metadata['filename'] = filename
if lane_number is not None:
lane = self.flowcell.lane_set.get(lane_number = lane_number)
newfile.library = lane.library
-
+
self.datafile_set.add(newfile)
-
+
self.last_update_time = datetime.datetime.now()
def lane_files(self):
lanes = {}
-
+
for datafile in self.datafile_set.all():
metadata = datafile.attributes
if metadata is not None:
for rel_filename, metadata in self.get_result_files():
if metadata.file_type.name in ivc_name:
plots[metadata.file_type.name] = (rel_filename, metadata)
-
+
class FileType(models.Model):
"""Represent potential file types
value = results.get(attribute_name, None)
if value is not None:
results[attribute_name] = int(value)
-
+
return results
def _get_normalized_name(self):
"""Crush data file name into identifier friendly name"""
return self.name.replace(' ', '_').lower()
normalized_name = property(_get_normalized_name)
-
+
def __unicode__(self):
#return u"<FileType: %s>" % (self.name,)
return self.name
return result
return None
-
+
def get_relative_pathname(abspath):
"""Strip off the result home directory from a path
"""