Extend the adapter id field to support the dual indexed libraries
authorDiane Trout <diane@caltech.edu>
Thu, 12 Apr 2012 21:43:10 +0000 (14:43 -0700)
committerDiane Trout <diane@caltech.edu>
Thu, 12 Apr 2012 21:43:10 +0000 (14:43 -0700)
this is implimented by using a - to indicate that two indexes are linked
together.

htsworkflow/frontend/experiments/fixtures/test_flowcells.json
htsworkflow/frontend/samples/fixtures/initial_data.json
htsworkflow/frontend/samples/models.py
htsworkflow/pipelines/retrieve_config.py
htsworkflow/pipelines/test/test_retrive_config.py

index e6ad6e314f28d3bd8e31ba8e7c2954e3f8b9eeaa..63cf30bcab89bd77b788fd296140ba95e7f712f7 100644 (file)
             "antibody": null
         }
     },
+    {
+        "pk": "13044",
+        "model": "samples.library",
+        "fields": {
+            "ten_nM_dilution": false,
+            "gel_cut_size": 225,
+            "library_name": "Dual Indexed Test",
+            "creation_date": "2009-08-26",
+            "cell_line": null,
+            "library_species": 9,
+            "library_type": 9,
+            "multiplex_id": "N701-N501",
+            "made_by": "Lorian",
+            "affiliations": [
+                2
+            ],
+            "replicate": 1,
+            "condition": null,
+            "hidden": false,
+            "stopping_point": "2A",
+            "tags": [],
+            "made_for": "",
+            "amplified_from_sample": 11043,
+            "notes": "8/21/2009 11:57:54\tColor: Orange",
+            "undiluted_concentration": "22.4",
+            "successful_pM": null,
+            "experiment_type": 2,
+            "antibody": null
+        }
+    },
     {
         "pk": "11045",
         "model": "samples.library",
        "pM": "7"
        }
    },
+  {"pk": 1379,
+   "model": "experiments.lane",
+   "fields": {
+       "comment": "",
+       "library": "13044",
+       "cluster_estimate": 196000,
+       "flowcell": 151,
+       "lane_number": 4,
+       "pM": "7"
+       }
+   },
+
     {
         "pk": "11044",
         "model": "samples.library",
index 2a5af14cfe328879b8d5edd0988c4cdd394f1488..76a221e029f920d1181b969017b4a273c61ca2ab 100644 (file)
         "is_paired_end": true
      }
   },
+  {
+     "model": "samples.LibraryType",
+     "pk": 9,
+     "fields": {
+        "name": "Dual Index Illumina",
+        "can_multiplex": true,
+        "is_paired_end": true
+     }
+  },
   {
      "model": "samples.ExperimentType",
      "pk": 1,
       "adapter_type": 5,
       "sequence": "CTTGTA"
     }
-  }
+  },
+  {"fields": {"adapter_type": 9,
+             "multiplex_id": "N501",
+             "sequence": "TAGATCGC"},
+  "model": "samples.multiplexindex",
+  "pk": 74
+  },
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N502",
+             "sequence": "CTCTCTAT"},
+  "model": "samples.multiplexindex",
+  "pk": 75},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N503",
+             "sequence": "TATCCTCT"},
+  "model": "samples.multiplexindex",
+  "pk": 76},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N504",
+             "sequence": "AGAGTAGA"},
+  "model": "samples.multiplexindex",
+  "pk": 77},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N505",
+             "sequence": "GTAAGGAG"},
+  "model": "samples.multiplexindex",
+  "pk": 78},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N506",
+             "sequence": "ACTGCATA"},
+  "model": "samples.multiplexindex",
+  "pk": 79},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N507",
+             "sequence": "AAGGAGTA"},
+  "model": "samples.multiplexindex",
+  "pk": 80},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N508",
+             "sequence": "CTAAGCCT"},
+  "model": "samples.multiplexindex",
+  "pk": 81},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N701",
+             "sequence": "TAAGGCGA"},
+  "model": "samples.multiplexindex",
+  "pk": 82},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N702",
+             "sequence": "CGTACTAG"},
+  "model": "samples.multiplexindex",
+  "pk": 83},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N703",
+             "sequence": "AGGCAGAA"},
+  "model": "samples.multiplexindex",
+  "pk": 84},
+ {"fields": {"adapter_type": 9, "multiplex_id": "N704", "sequence": "TCCTGA"},
+  "model": "samples.multiplexindex",
+  "pk": 85},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N705",
+             "sequence": "GGACTCCT"},
+  "model": "samples.multiplexindex",
+  "pk": 86},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N706",
+             "sequence": "TAGGCATG"},
+  "model": "samples.multiplexindex",
+  "pk": 87},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N707",
+             "sequence": "CTCTCTAC"},
+  "model": "samples.multiplexindex",
+  "pk": 88},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N708",
+             "sequence": "CAGAGAGG"},
+  "model": "samples.multiplexindex",
+  "pk": 89},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N709",
+             "sequence": "GCTACGCT"},
+  "model": "samples.multiplexindex",
+  "pk": 90},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N710",
+             "sequence": "CGAGGCTG"},
+  "model": "samples.multiplexindex",
+  "pk": 91},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N711",
+             "sequence": "AAGAGGCA"},
+  "model": "samples.multiplexindex",
+  "pk": 92},
+ {"fields": {"adapter_type": 9,
+             "multiplex_id": "N712",
+             "sequence": "GTAGAGGA"},
+  "model": "samples.multiplexindex",
+  "pk": 93}
 ]
index 2eeaaecbe3b3aa9d3fc4c651e9774c70b6e4e383..de6fc11dc3d1bb407e43011a59128f8b3f733a4b 100644 (file)
@@ -144,7 +144,7 @@ class LibraryType(models.Model):
 class MultiplexIndex(models.Model):
     """Map adapter types to the multiplex sequence"""
     adapter_type = models.ForeignKey(LibraryType)
-    multiplex_id = models.CharField(max_length=3, null=False)
+    multiplex_id = models.CharField(max_length=6, null=False)
     sequence = models.CharField(max_length=12, blank=True, null=True)
 
     class Meta:
@@ -244,23 +244,41 @@ class Library(models.Model):
       if self.multiplex_id is None or len(self.multiplex_id) == 0:
           return 'Err: id empty'
       sequences = {}
-      multiplex_ids = self.multiplex_id.split(',')
-      for multiplex_id in multiplex_ids:
-          try:
-              multiplex = MultiplexIndex.objects.get(
-                  adapter_type = self.library_type.id,
-                  multiplex_id = multiplex_id)
-              sequences[multiplex_id] = multiplex.sequence
-          except MultiplexIndex.DoesNotExist, e:
-              sequences[multiplex_id] = 'Err: index not found'
+      multiplex_expressions = self.multiplex_id.split(',')
+      for multiplex_term in multiplex_expressions:
+          pairs = multiplex_term.split('-')
+          if len(pairs) == 1:
+              key = pairs[0]
+              seq = self._lookup_index(pairs[0])
+          elif len(pairs) == 2:
+              key = pairs[0] + '-' + pairs[1]
+              seq0 = self._lookup_index(pairs[0])
+              seq1 = self._lookup_index(pairs[1])
+              if seq0 is None or seq1 is None:
+                  seq = None
+              else:
+                  seq = seq0 + '-' + seq1
+          else:
+              raise RuntimeError("Too many - seperated sequences")
+          if seq is None:
+              seq = 'Err: index not found'
+          sequences[key] = seq
       return sequences
 
+  def _lookup_index(self, multiplex_id):
+      try:
+          multiplex = MultiplexIndex.objects.get(
+              adapter_type = self.library_type.id,
+              multiplex_id = multiplex_id)
+          return multiplex.sequence
+      except MultiplexIndex.DoesNotExist, e:
+          return None
+
   def index_sequence_text(self, seperator=' '):
       """Return formatted multiplex index sequences"""
       sequences = self.index_sequences()
       if sequences is None:
           return ""
-
       multiplex_ids = sequences.keys()
       multiplex_ids.sort()
       return seperator.join(( "%s:%s" %(i,sequences[i]) for i in multiplex_ids))
index 94d8f5036089a47daeaad4a4b3f44a58540d0555..bd220a0ee854472e643df0b4e15ea3d4e3ac1ea1 100644 (file)
@@ -17,6 +17,7 @@ except ImportError, e:
 
 from htsworkflow.frontend.auth import apidata
 from htsworkflow.util import api
+from htsworkflow.util import alphanum
 from htsworkflow.util.url import normalize_url
 from htsworkflow.pipelines.genome_mapper import \
      getAvailableGenomes, \
@@ -410,7 +411,7 @@ def format_pooled_libraries(shared, library):
     elif (type(sequences) == types.DictType):
         pooled = []
         multiplex_ids = sequences.keys()
-        multiplex_ids.sort(key=int)
+        multiplex_ids.sort(cmp=alphanum.alphanum)
         for multiplex_id in multiplex_ids:
             sample = {}
             sample.update(shared)
index cb5650191e79f886da243c9837bcfa61291fbb1f..1d0404aa42ec16baff63780f9afa7298e7d04f9b 100644 (file)
@@ -54,19 +54,33 @@ class RetrieveTestCases(TestCase):
 
         output = StringIO()
         save_sample_sheet(output, options, flowcell_info)
+        print output.buf
+
         output.seek(0)
         sheet = list(csv.DictReader(output))
-        expected = [{'SampleProject': '12044_index1', 'Index': 'ATCACG'},
-                    {'SampleProject': '12044_index2', 'Index': 'CGATGT'},
-                    {'SampleProject': '12044_index3', 'Index': 'TTAGGC'},
-                    {'SampleProject': '11045_index1', 'Index': 'ATCACG'},
+        expected = [{'SampleProject': '12044_index1',
+                     'Index': 'ATCACG',
+                     'Lane': '3',
+                     },
+                    {'SampleProject': '12044_index2',
+                     'Index': 'CGATGT',
+                     'Lane': '3',
+                     },
+                    {'SampleProject': '12044_index3',
+                     'Index': 'TTAGGC',
+                     'Lane': '3',
+                     },
+                    {'SampleProject': '11045_index1',
+                     'Index': 'ATCACG',
+                     'Lane': '3',
+                     },
+                    {'SampleProject': '13044_indexN701-N501',
+                     'Index': 'TAAGGCGA-TAGATCGC',
+                     'Lane': '4',
+                     }
                     ]
-        for i in range(4):
-            self.assertEqual(sheet[i]['SampleProject'],
-                             expected[i]['SampleProject'])
-            self.assertEqual(sheet[i]['Index'],
-                             expected[i]['Index'])
-            self.assertEqual(sheet[i]['FCID'], fcid)
-            self.assertEqual(sheet[i]['Lane'], '3')
-
-
+        self.failUnlessEqual(len(sheet), len(expected))
+        for s, e in zip(sheet, expected):
+            for key in e.keys():
+                self.failUnlessEqual(s[key], e[key],
+                  "%s != %s for key %s" % (s[key],e[key], key))