X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=htsworkflow%2Ffrontend%2Fexperiments%2Fmodels.py;h=600b9174258098488ecff81b2f75a4f61391a065;hb=c1c4d68823ef12ca7088931a402894d3e856bff6;hp=1b3674c2a3930fc5d382b5ed204ba9f2e1e7bd80;hpb=eff287e0df7a104a140e3e04cb636219ad01f50b;p=htsworkflow.git diff --git a/htsworkflow/frontend/experiments/models.py b/htsworkflow/frontend/experiments/models.py index 1b3674c..600b917 100644 --- a/htsworkflow/frontend/experiments/models.py +++ b/htsworkflow/frontend/experiments/models.py @@ -9,6 +9,7 @@ import uuid from django.conf import settings from django.core.exceptions import ObjectDoesNotExist from django.core import urlresolvers +from django.utils import timezone from django.db import models from django.db.models.signals import post_init, pre_save @@ -20,7 +21,7 @@ LOGGER = logging.getLogger(__name__) default_pM = 5 try: default_pM = int(settings.DEFAULT_PM) -except ValueError, e: +except AttributeError, e: LOGGER.error("invalid value for frontend.default_pm") # how many days to wait before trying to re-import a runfolder @@ -209,32 +210,34 @@ class FlowCell(models.Model): result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '') run_xml_re = re.compile(glob.fnmatch.translate('run*.xml')) - dataruns = dict([(x.result_dir, x) for x in self.datarun_set.all()]) - result_dirs = [] for dirpath, dirnames, filenames in os.walk(result_root): for filename in filenames: if run_xml_re.match(filename): # we have a run directory relative_pathname = get_relative_pathname(dirpath) - cached_run = dataruns.get(relative_pathname, None) - now = datetime.datetime.now() - if (cached_run is None): - self.import_data_run(relative_pathname, filename) - elif (now - cached_run.last_update_time).days > \ - RESCAN_DELAY: - self.import_data_run(relative_pathname, - filename, cached_run) - - def import_data_run(self, relative_pathname, run_xml_name, run=None): + self.import_data_run(relative_pathname, filename) + + def import_data_run(self, relative_pathname, run_xml_name, force=False): """Given a result directory import files""" + now = timezone.now() run_dir = get_absolute_pathname(relative_pathname) run_xml_path = os.path.join(run_dir, run_xml_name) - run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path) - LOGGER.debug("Importing run from %s" % (relative_pathname,)) - if run is None: + runs = DataRun.objects.filter(result_dir = relative_pathname) + if len(runs) == 0: run = DataRun() + created = True + elif len(runs) > 1: + raise RuntimeError("Too many data runs for %s" % ( + relative_pathname,)) + else: + run = runs[0] + created = False + + if created or force or (now-run.last_update_time).days > RESCAN_DELAY: + LOGGER.debug("Importing run from %s" % (relative_pathname,)) + run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path) run.flowcell = self run.status = RUN_STATUS_REVERSE_MAP['DONE'] run.result_dir = relative_pathname @@ -246,13 +249,15 @@ class FlowCell(models.Model): run.image_version = run_xml_data.image_analysis.version run.basecall_software = run_xml_data.bustard.software run.basecall_version = run_xml_data.bustard.version - run.alignment_software = run_xml_data.gerald.software - run.alignment_version = run_xml_data.gerald.version + # we're frequently not running alignments + if run_xml_data.gerald: + run.alignment_software = run_xml_data.gerald.software + run.alignment_version = run_xml_data.gerald.version - run.last_update_time = datetime.datetime.now() - run.save() + run.last_update_time = timezone.now() + run.save() - run.update_result_files() + run.update_result_files() # FIXME: should we automatically update dataruns? @@ -352,7 +357,7 @@ class DataRun(models.Model): self.datafile_set.add(newfile) - self.last_update_time = datetime.datetime.now() + self.last_update_time = timezone.now() def lane_files(self): lanes = {}