X-Git-Url: http://woldlab.caltech.edu/gitweb/?a=blobdiff_plain;f=htsworkflow%2Ffrontend%2Fexperiments%2Fmodels.py;h=5152c406e2fc55c87bd997ed7d4ee14197339859;hb=4a2529df5d2bdaa83d1661730c3791560d58df37;hp=c66a639cae0ab669ce52b3fb3b17e7b765930bbe;hpb=8749d93aec498725bc4a3bc18fae0388e64844fc;p=htsworkflow.git

diff --git a/htsworkflow/frontend/experiments/models.py b/htsworkflow/frontend/experiments/models.py
index c66a639..5152c40 100644
--- a/htsworkflow/frontend/experiments/models.py
+++ b/htsworkflow/frontend/experiments/models.py
@@ -10,28 +10,29 @@ from django.conf import settings
 from django.core.exceptions import ObjectDoesNotExist
 from django.core import urlresolvers
 from django.db import models
-from django.db.models.signals import post_init
+from django.db.models.signals import post_init, pre_save
 
 from htsworkflow.frontend.samples.models import Library
 from htsworkflow.util.conversion import parse_flowcell_id
 from htsworkflow.pipelines import runfolder
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 default_pM = 5
 try:
     default_pM = int(settings.DEFAULT_PM)
-except ValueError,e:
-    logger.error("invalid value for frontend.default_pm")
+except ValueError, e:
+    LOGGER.error("invalid value for frontend.default_pm")
 
 # how many days to wait before trying to re-import a runfolder
 RESCAN_DELAY = 1
 try:
     RESCAN_DELAY = int(settings.RESCAN_DELAY)
 except (ValueError, AttributeError):
-    logger.error("Missing or invalid settings.RESCAN_DELAY")
+    LOGGER.error("Missing or invalid settings.RESCAN_DELAY, "\
+                 "defaulting to %s" % (RESCAN_DELAY,))
 
 RUN_STATUS_CHOICES = (
-    (0, 'Sequencer running'), ##Solexa Data Pipeline Not Yet Started'),
+    (0, 'Sequencer running'),  # Solexa Data Pipeline Not Yet Started'),
     (1, 'Data Pipeline Started'),
     (2, 'Data Pipeline Interrupted'),
     (3, 'Data Pipeline Finished'),
@@ -41,130 +42,221 @@ RUN_STATUS_CHOICES = (
     (7, 'QC Finished'),
     (255, 'DONE'),
   )
-RUN_STATUS_REVERSE_MAP = dict(((v,k) for k,v in RUN_STATUS_CHOICES))
+RUN_STATUS_REVERSE_MAP = dict(((v, k) for k, v in RUN_STATUS_CHOICES))
+
 
 class ClusterStation(models.Model):
-  name = models.CharField(max_length=50, unique=True)
+    """List of cluster stations"""
+    name = models.CharField(max_length=50, unique=True)
+    isdefault = models.BooleanField(default=False, null=False)
+
+    class Meta:
+        ordering = ["-isdefault", "name"]
+
+    def __unicode__(self):
+        return unicode(self.name)
+
+    @classmethod
+    def default(cls):
+        d = cls.objects.filter(isdefault=True).all()
+        if len(d) > 0:
+            return d[0]
+        d = cls.objects.order_by('-id').all()
+        if len(d) > 0:
+            return d[0]
+        return None
+
+    @staticmethod
+    def update_isdefault(sender, instance, **kwargs):
+        """Clear default if needed
+        """
+        if instance.isdefault:
+            for c in ClusterStation.objects.filter(isdefault=True).all():
+                if c.id != instance.id:
+                    c.isdefault = False
+                    c.save()
 
-  def __unicode__(self):
-    return unicode(self.name)
+pre_save.connect(ClusterStation.update_isdefault, sender=ClusterStation)
 
 class Sequencer(models.Model):
-  name = models.CharField(max_length=50, unique=True)
+    """Sequencers we've owned
+    """
+    name = models.CharField(max_length=50, db_index=True)
+    instrument_name = models.CharField(max_length=50, db_index=True)
+    serial_number = models.CharField(max_length=50, db_index=True)
+    model = models.CharField(max_length=255)
+    active = models.BooleanField(default=True, null=False)
+    isdefault = models.BooleanField(default=False, null=False)
+    comment = models.CharField(max_length=255)
+
+    class Meta:
+        ordering = ["-isdefault", "-active", "name"]
+
+    def __unicode__(self):
+        name = [unicode(self.name)]
+        if self.instrument_name is not None:
+            name.append("(%s)" % (unicode(self.instrument_name),))
+        return " ".join(name)
+
+    @models.permalink
+    def get_absolute_url(self):
+        return ('htsworkflow.frontend.experiments.views.sequencer',
+                [self.id])
+
+    @classmethod
+    def default(cls):
+        d = cls.objects.filter(isdefault=True).all()
+        if len(d) > 0:
+            return d[0]
+        d = cls.objects.order_by('active', '-id').all()
+        if len(d) > 0:
+            return d[0]
+        return None
+
+    @staticmethod
+    def update_isdefault(sender, instance, **kwargs):
+        """Clear default if needed
+        """
+        if instance.isdefault:
+            for s in Sequencer.objects.filter(isdefault=True).all():
+                if s.id != instance.id:
+                    s.isdefault = False
+                    s.save()
+
+pre_save.connect(Sequencer.update_isdefault, sender=Sequencer)
 
-  def __unicode__(self):
-    return unicode(self.name)
 
 class FlowCell(models.Model):
-  flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
-  run_date = models.DateTimeField()
-  advanced_run = models.BooleanField(default=False)
-  paired_end = models.BooleanField(default=False)
-  read_length = models.IntegerField(default=32) #Stanford is currenlty 25
-  control_lane = models.IntegerField(choices=[(1,1),(2,2),(3,3),(4,4),(5,5),(6,6),(7,7),(8,8),(0,'All Lanes')], null=True, blank=True)
-
-  cluster_station = models.ForeignKey(ClusterStation, default=3)
-  sequencer = models.ForeignKey(Sequencer, default=1)
-
-  notes = models.TextField(blank=True)
-
-  def __unicode__(self):
-      return unicode(self.flowcell_id)
-
-  def Lanes(self):
-    html = ['<table>']
-    for lane in self.lane_set.order_by('lane_number'):
-        cluster_estimate = lane.cluster_estimate
-        if cluster_estimate is not None:
-            cluster_estimate = "%s k" % ((int(cluster_estimate)/1000), )
+    flowcell_id = models.CharField(max_length=20, unique=True, db_index=True)
+    run_date = models.DateTimeField()
+    advanced_run = models.BooleanField(default=False)
+    paired_end = models.BooleanField(default=False)
+    read_length = models.IntegerField(default=32)  # Stanford is currenlty 25
+    control_lane = models.IntegerField(choices=[(1, 1),
+                                                (2, 2),
+                                                (3, 3),
+                                                (4, 4),
+                                                (5, 5),
+                                                (6, 6),
+                                                (7, 7),
+                                                (8, 8),
+                                                (0, 'All Lanes')],
+                                       null=True,
+                                       blank=True)
+
+    cluster_station = models.ForeignKey(ClusterStation, default=ClusterStation.default)
+    sequencer = models.ForeignKey(Sequencer, default=Sequencer.default)
+
+    notes = models.TextField(blank=True)
+
+    def __unicode__(self):
+        return unicode(self.flowcell_id)
+
+    def Lanes(self):
+        html = ['<table>']
+        for lane in self.lane_set.order_by('lane_number'):
+            cluster_estimate = lane.cluster_estimate
+            if cluster_estimate is not None:
+                cluster_estimate = "%s k" % ((int(cluster_estimate) / 1000), )
+            else:
+                cluster_estimate = 'None'
+            library_id = lane.library_id
+            library = lane.library
+            element = '<tr><td>%d</td>'\
+                      '<td><a href="%s">%s</a></td><td>%s</td></tr>'
+            html.append(element % (lane.lane_number,
+                                   library.get_admin_url(),
+                                   library,
+                                   cluster_estimate))
+        html.append('</table>')
+        return "\n".join(html)
+    Lanes.allow_tags = True
+
+    class Meta:
+        ordering = ["-run_date"]
+
+    def get_admin_url(self):
+        # that's the django way... except it didn't work
+        return urlresolvers.reverse('admin:experiments_flowcell_change',
+                                    args=(self.id,))
+
+    def flowcell_type(self):
+        """Convert our boolean 'is paired' flag to a name
+        """
+        if self.paired_end:
+            return u"Paired"
         else:
-            cluster_estimate = 'None'
-        library_id = lane.library_id
-        library = lane.library
-        element = '<tr><td>%d</td><td><a href="%s">%s</a></td><td>%s</td></tr>'
-        html.append(element % (lane.lane_number,
-                               library.get_admin_url(),
-                               library,
-                               cluster_estimate))
-    html.append('</table>')
-    return "\n".join(html)
-  Lanes.allow_tags = True
-
-  class Meta:
-    ordering = ["-run_date"]
-
-  def get_admin_url(self):
-    # that's the django way... except it didn't work
-    return urlresolvers.reverse('admin:experiments_flowcell_change',
-                                args=(self.id,))
-
-  def flowcell_type(self):
-    """
-    Convert our boolean 'is paired' flag to a name
-    """
-    if self.paired_end:
-      return u"Paired"
-    else:
-      return u"Single"
-
-  @models.permalink
-  def get_absolute_url(self):
-      flowcell_id, status = parse_flowcell_id(self.flowcell_id)
-      return ('htsworkflow.frontend.experiments.views.flowcell_detail',
-              [str(flowcell_id)])
-
-  def get_raw_data_directory(self):
-      """Return location of where the raw data is stored"""
-      flowcell_id, status = parse_flowcell_id(self.flowcell_id)
-
-      return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
-
-  def update_data_runs(self):
-      result_root = self.get_raw_data_directory()
-      logger.debug("Update data runs flowcell root: %s" % (result_root,))
-      if result_root is None:
-          return
-
-      result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
-      run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
-
-      dataruns = dict([ (x.result_dir, x) for x in self.datarun_set.all() ])
-
-      result_dirs = []
-      for dirpath, dirnames, filenames in os.walk(result_root):
-          for filename in filenames:
-              if run_xml_re.match(filename):
-                  # we have a run directory
-                  relative_pathname = get_relative_pathname(dirpath)
-                  cached_run = dataruns.get(relative_pathname, None)
-                  now = datetime.datetime.now()
-                  if (cached_run is None):
-                      self.import_data_run(relative_pathname, filename)
-                  elif (now - cached_run.last_update_time).days > RESCAN_DELAY:
-                      self.import_data_run(relative_pathname,
-                                           filename, cached_run)
-
-  def import_data_run(self, relative_pathname, run_xml_name, run=None):
-      """Given a result directory import files"""
-      run_dir = get_absolute_pathname(relative_pathname)
-      run_xml_path = os.path.join(run_dir, run_xml_name)
-      run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
-      logger.debug("Importing run from %s" % (relative_pathname,))
-
-      if run is None:
-          run = DataRun()
-          run.flowcell = self
-          run.status = RUN_STATUS_REVERSE_MAP['DONE']
-          run.result_dir = relative_pathname
-          run.runfolder_name = run_xml_data.runfolder_name
-          run.cycle_start = run_xml_data.image_analysis.start
-          run.cycle_stop = run_xml_data.image_analysis.stop
-          run.run_start_time = run_xml_data.image_analysis.date
-
-      run.last_update_time = datetime.datetime.now()
-      run.save()
-
-      run.update_result_files()
+            return u"Single"
+
+    @models.permalink
+    def get_absolute_url(self):
+        flowcell_id, status = parse_flowcell_id(self.flowcell_id)
+        return ('htsworkflow.frontend.experiments.views.flowcell_detail',
+                [str(flowcell_id)])
+
+    def get_raw_data_directory(self):
+        """Return location of where the raw data is stored"""
+        flowcell_id, status = parse_flowcell_id(self.flowcell_id)
+
+        return os.path.join(settings.RESULT_HOME_DIR, flowcell_id)
+
+    def update_data_runs(self):
+        result_root = self.get_raw_data_directory()
+        LOGGER.debug("Update data runs flowcell root: %s" % (result_root,))
+        if result_root is None:
+            return
+
+        result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
+        run_xml_re = re.compile(glob.fnmatch.translate('run*.xml'))
+
+        result_dirs = []
+        for dirpath, dirnames, filenames in os.walk(result_root):
+            for filename in filenames:
+                if run_xml_re.match(filename):
+                    # we have a run directory
+                    relative_pathname = get_relative_pathname(dirpath)
+                    self.import_data_run(relative_pathname, filename)
+
+    def import_data_run(self, relative_pathname, run_xml_name, force=False):
+        """Given a result directory import files"""
+        now = datetime.datetime.now()
+        run_dir = get_absolute_pathname(relative_pathname)
+        run_xml_path = os.path.join(run_dir, run_xml_name)
+
+        runs = DataRun.objects.filter(result_dir = relative_pathname)
+        if len(runs) == 0:
+            run = DataRun()
+            created = True
+        elif len(runs) > 1:
+            raise RuntimeError("Too many data runs for %s" % (
+                relative_pathname,))
+        else:
+            run = runs[0]
+            created = False
+
+        if created or force or (now-run.last_update_time).days > RESCAN_DELAY:
+            LOGGER.debug("Importing run from %s" % (relative_pathname,))
+            run_xml_data = runfolder.load_pipeline_run_xml(run_xml_path)
+            run.flowcell = self
+            run.status = RUN_STATUS_REVERSE_MAP['DONE']
+            run.result_dir = relative_pathname
+            run.runfolder_name = run_xml_data.runfolder_name
+            run.cycle_start = run_xml_data.image_analysis.start
+            run.cycle_stop = run_xml_data.image_analysis.stop
+            run.run_start_time = run_xml_data.image_analysis.date
+            run.image_software = run_xml_data.image_analysis.software
+            run.image_version = run_xml_data.image_analysis.version
+            run.basecall_software = run_xml_data.bustard.software
+            run.basecall_version = run_xml_data.bustard.version
+            # we're frequently not running alignments
+            if run_xml_data.gerald:
+                run.alignment_software = run_xml_data.gerald.software
+                run.alignment_version = run_xml_data.gerald.version
+
+            run.last_update_time = datetime.datetime.now()
+            run.save()
+
+            run.update_result_files()
 
 
 # FIXME: should we automatically update dataruns?
@@ -179,39 +271,46 @@ class FlowCell(models.Model):
 #post_init.connect(update_flowcell_dataruns, sender=FlowCell)
 
 
-
 LANE_STATUS_CODES = [(0, 'Failed'),
-                    (1, 'Marginal'),
-                    (2, 'Good'),]
-LANE_STATUS_MAP = dict((int(k),v) for k,v in LANE_STATUS_CODES )
+                     (1, 'Marginal'),
+                     (2, 'Good'), ]
+LANE_STATUS_MAP = dict((int(k), v) for k, v in LANE_STATUS_CODES)
 LANE_STATUS_MAP[None] = "Unknown"
 
+
 def is_valid_lane(value):
     if value >= 1 and value <= 8:
         return True
     else:
-          return False
+        return False
+
 
 class Lane(models.Model):
-  flowcell = models.ForeignKey(FlowCell)
-  lane_number = models.IntegerField()
-  library = models.ForeignKey(Library)
-  pM = models.DecimalField(max_digits=5, decimal_places=2,blank=False, null=False,default=default_pM)
-  cluster_estimate = models.IntegerField(blank=True, null=True)
-  status = models.IntegerField(choices=LANE_STATUS_CODES, null=True, blank=True)
-  comment = models.TextField(null=True, blank=True)
-
-  @models.permalink
-  def get_absolute_url(self):
-       return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
-               [str(self.id)])
-
-  def __unicode__(self):
-      return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
-
-### -----------------------
+    flowcell = models.ForeignKey(FlowCell)
+    lane_number = models.IntegerField()
+    library = models.ForeignKey(Library)
+    pM = models.DecimalField(max_digits=5,
+                             decimal_places=2,
+                             blank=False,
+                             null=False,
+                             default=default_pM)
+    cluster_estimate = models.IntegerField(blank=True, null=True)
+    status = models.IntegerField(choices=LANE_STATUS_CODES,
+                                 null=True,
+                                 blank=True)
+    comment = models.TextField(null=True, blank=True)
+
+    @models.permalink
+    def get_absolute_url(self):
+        return ('htsworkflow.frontend.experiments.views.flowcell_lane_detail',
+                [str(self.id)])
+
+    def __unicode__(self):
+        return self.flowcell.flowcell_id + ':' + unicode(self.lane_number)
+
+
 class DataRun(models.Model):
-    flowcell = models.ForeignKey(FlowCell,verbose_name="Flowcell Id")
+    flowcell = models.ForeignKey(FlowCell, verbose_name="Flowcell Id")
     runfolder_name = models.CharField(max_length=50)
     result_dir = models.CharField(max_length=255)
     last_update_time = models.DateTimeField()
@@ -220,6 +319,12 @@ class DataRun(models.Model):
     cycle_stop = models.IntegerField(null=True, blank=True)
     run_status = models.IntegerField(choices=RUN_STATUS_CHOICES,
                                      null=True, blank=True)
+    image_software = models.CharField(max_length=50)
+    image_version = models.CharField(max_length=50)
+    basecall_software = models.CharField(max_length=50)
+    basecall_version = models.CharField(max_length=50)
+    alignment_software = models.CharField(max_length=50)
+    alignment_version = models.CharField(max_length=50)
     comment = models.TextField(blank=True)
 
     def update_result_files(self):
@@ -230,8 +335,8 @@ class DataRun(models.Model):
                 pathname = os.path.join(dirname, filename)
                 relative_pathname = get_relative_pathname(pathname)
                 datafiles = self.datafile_set.filter(
-                  data_run = self,
-                  relative_pathname=relative_pathname)
+                    data_run=self,
+                    relative_pathname=relative_pathname)
                 if len(datafiles) > 0:
                     continue
 
@@ -245,7 +350,8 @@ class DataRun(models.Model):
 
                     lane_number = metadata.get('lane', None)
                     if lane_number is not None:
-                        lane = self.flowcell.lane_set.get(lane_number = lane_number)
+                        lane = self.flowcell.lane_set.get(
+                            lane_number=lane_number)
                         newfile.library = lane.library
 
                     self.datafile_set.add(newfile)
@@ -261,7 +367,8 @@ class DataRun(models.Model):
                 lane = metadata.get('lane', None)
                 if lane is not None:
                     lane_file_set = lanes.setdefault(lane, {})
-                    lane_file_set[datafile.file_type.normalized_name] = datafile
+                    normalized_name = datafile.file_type.normalized_name
+                    lane_file_set[normalized_name] = datafile
         return lanes
 
     def ivc_plots(self, lane):
@@ -273,6 +380,7 @@ class DataRun(models.Model):
             if metadata.file_type.name in ivc_name:
                 plots[metadata.file_type.name] = (rel_filename, metadata)
 
+
 class FileType(models.Model):
     """Represent potential file types
 
@@ -294,7 +402,8 @@ class FileType(models.Model):
         if len(self.regex) > 0:
             match = re.match(self.regex, filename)
             if match is not None:
-                # These are (?P<>) names we know about from our default regexes.
+                # These are (?P<>) names we know about from our
+                # default regexes.
                 results = match.groupdict()
 
                 # convert int parameters
@@ -314,10 +423,12 @@ class FileType(models.Model):
         #return u"<FileType: %s>" % (self.name,)
         return self.name
 
+
 def str_uuid():
     """Helper function to set default UUID in DataFile"""
     return str(uuid.uuid1())
 
+
 class DataFile(models.Model):
     """Store map from random ID to filename"""
     random_key = models.CharField(max_length=64,
@@ -339,7 +450,8 @@ class DataFile(models.Model):
     @models.permalink
     def get_absolute_url(self):
         return ('htsworkflow.frontend.experiments.views.read_result_file',
-                (), {'key': self.random_key })
+                (), {'key': self.random_key})
+
 
 def find_file_type_metadata_from_filename(pathname):
     path, filename = os.path.split(pathname)
@@ -352,14 +464,15 @@ def find_file_type_metadata_from_filename(pathname):
 
     return None
 
+
 def get_relative_pathname(abspath):
     """Strip off the result home directory from a path
     """
-    result_home_dir = os.path.join(settings.RESULT_HOME_DIR,'')
-    relative_pathname = abspath.replace(result_home_dir,'')
+    result_home_dir = os.path.join(settings.RESULT_HOME_DIR, '')
+    relative_pathname = abspath.replace(result_home_dir, '')
     return relative_pathname
 
+
 def get_absolute_pathname(relative_pathname):
     """Attach relative path to  results home directory"""
     return os.path.join(settings.RESULT_HOME_DIR, relative_pathname)
-