Create a lane to file name turtle data file
authorDiane Trout <diane@caltech.edu>
Tue, 7 Aug 2012 02:13:49 +0000 (19:13 -0700)
committerDiane Trout <diane@caltech.edu>
Tue, 7 Aug 2012 02:13:49 +0000 (19:13 -0700)
this required passing the lane ID information back through
the json api.

htsworkflow/frontend/samples/tests.py
htsworkflow/frontend/samples/views.py
htsworkflow/submission/condorfastq.py
htsworkflow/submission/test/test_condorfastq.py
htsworkflow/templates/lane_to_fastq.turtle [new file with mode: 0644]

index 476ede36ebd9c697172650404dd496f34f66e5cb..19f2f6bebd8f98a74ccabc4c581afcf9b5486c49 100644 (file)
@@ -95,12 +95,24 @@ class SampleWebTestCase(TestCase):
                 # some specific tests
                 if lib.id == '10981':
                     # test a case where there is no known status
-                    lane_set = {u'status': u'Unknown', u'paired_end': True, u'read_length': 75, u'lane_number': 1, u'flowcell': u'303TUAAXX', u'status_code': None}
+                    lane_set = {u'status': u'Unknown',
+                                u'paired_end': True,
+                                u'read_length': 75,
+                                u'lane_number': 1,
+                                u'lane_id': 1193,
+                                u'flowcell': u'303TUAAXX',
+                                u'status_code': None}
                     self.failUnlessEqual(len(d['lane_set']), 1)
                     self.failUnlessEqual(d['lane_set'][0], lane_set)
                 elif lib.id == '11016':
                     # test a case where there is a status
-                    lane_set = {u'status': 'Good', u'paired_end': True, u'read_length': 75, u'lane_number': 5, u'flowcell': u'303TUAAXX', u'status_code': 2}
+                    lane_set = {u'status': 'Good',
+                                u'paired_end': True,
+                                u'read_length': 75,
+                                u'lane_number': 5,
+                                u'lane_id': 1197,
+                                u'flowcell': u'303TUAAXX',
+                                u'status_code': 2}
                     self.failUnlessEqual(len(d['lane_set']), 1)
                     self.failUnlessEqual(d['lane_set'][0], lane_set)
 
@@ -201,9 +213,10 @@ class TestRDFaLibrary(TestCase):
                               [u'http://localhost/lane/1193'],
                               p=libNS['has_lane'])
 
+        fc_uri = RDF.Uri('http://localhost/flowcell/303TUAAXX/')
         self.check_literal_object(model,
                                   [u"303TUAAXX"],
-                                  s=RDF.Uri('http://localhost/flowcell/303TUAAXX/'))
+                                  s=fc_uri, p=libNS['flowcell_id'])
 
     def check_literal_object(self, model, values, s=None, p=None, o=None):
         statements = list(model.find_statements(
index 28a3e1ea73e7794e1b5988bc189484b17537d9b8..a53552ce7135637718e6fd1377836250b486bf61 100644 (file)
@@ -92,6 +92,7 @@ def create_library_context(cl):
     cl.result_count = unicode(cl.paginator._count)
     return {'library_list': records }
 
+
 def library(request, todo_only=False):
     queryset = Library.objects.filter(hidden__exact=0)
     if todo_only:
@@ -110,9 +111,11 @@ def library(request, todo_only=False):
     c = RequestContext(request, context)
     return HttpResponse( t.render(c) )
 
+
 def library_not_run(request):
     return library(request, todo_only=True)
 
+
 def library_to_flowcells(request, lib_id):
     """
     Display information about all the flowcells a library has been run on.
@@ -459,6 +462,7 @@ def library_dict(library_id):
     for lane in lib.lane_set.all():
         lane_info.append( {'flowcell':lane.flowcell.flowcell_id,
                            'lane_number': lane.lane_number,
+                           'lane_id': lane.id,
                            'paired_end': lane.flowcell.paired_end,
                            'read_length': lane.flowcell.read_length,
                            'status_code': lane.status,
index 8513f7b2da2b266803f12ec32acf650242fb165e..1d425ded8458450967d1a2f22e0f63fc95337a95 100644 (file)
@@ -7,6 +7,7 @@ import sys
 import types
 
 from htsworkflow.pipelines.sequences import scan_for_sequences
+from htsworkflow.pipelines.samplekey import SampleKey
 from htsworkflow.pipelines import qseq2fastq
 from htsworkflow.pipelines import srf2fastq
 from htsworkflow.pipelines import desplit_fastq
@@ -18,6 +19,7 @@ from django.template import Context, loader
 
 LOGGER = logging.getLogger(__name__)
 
+
 class CondorFastqExtract(object):
     def __init__(self, host, apidata, sequences_path,
                  log_path='log',
@@ -45,7 +47,9 @@ class CondorFastqExtract(object):
         """
         template_map = {'srf': 'srf.condor',
                         'qseq': 'qseq.condor',
-                        'split_fastq': 'split_fastq.condor'}
+                        'split_fastq': 'split_fastq.condor',
+                        'by_sample': 'lane_to_fastq.turtle',
+                        }
 
         condor_entries = self.build_condor_arguments(result_map)
         for script_type in template_map.keys():
@@ -54,8 +58,8 @@ class CondorFastqExtract(object):
                          'logdir': self.log_path,
                          'env': os.environ.get('PYTHONPATH', None),
                          'args': condor_entries[script_type],
+                         'root_url': self.api.root_url,
                          }
-
             context = Context(variables)
 
             with open(script_type + '.condor','w+') as outstream:
@@ -65,11 +69,12 @@ class CondorFastqExtract(object):
         condor_entries = {'srf': [],
                           'qseq': [],
                           'split_fastq': []}
+
         conversion_funcs = {'srf': self.condor_srf_to_fastq,
                             'qseq': self.condor_qseq_to_fastq,
                             'split_fastq': self.condor_desplit_fastq
                             }
-
+        by_sample = {}
         lib_db = self.find_archive_sequence_files(result_map)
         needed_targets = self.find_missing_targets(result_map, lib_db)
 
@@ -88,9 +93,13 @@ class CondorFastqExtract(object):
                 if sources is not None:
                     condor_entries.setdefault(condor_type, []).append(
                         conversion(sources, target_pathname))
+                    for s in sources:
+                        by_sample.setdefault(s.lane_id,[]).append(
+                            target_pathname)
             else:
                 print " need file", target_pathname
 
+        condor_entries['by_sample'] = by_sample
         return condor_entries
 
     def find_archive_sequence_files(self,  result_map):
@@ -109,7 +118,7 @@ class CondorFastqExtract(object):
 
             for lane in lib_info['lane_set']:
                 lane_key = (lane['flowcell'], lane['lane_number'])
-                candidate_lanes[lane_key] = lib_id
+                candidate_lanes[lane_key] = (lib_id, lane['lane_id'])
                 seq_dirs.add(os.path.join(self.sequences_path,
                                              'flowcells',
                                              lane['flowcell']))
@@ -122,8 +131,10 @@ class CondorFastqExtract(object):
 
         for seq in candidate_seq_list:
             lane_key = (seq.flowcell, seq.lane)
-            lib_id = candidate_lanes.get(lane_key, None)
-            if lib_id is not None:
+            candidate_key = candidate_lanes.get(lane_key, None)
+            if candidate_key is not None:
+                lib_id, lane_id = candidate_key
+                seq.lane_id = lane_id
                 lib_info = lib_db[lib_id]
                 lib_info['lanes'].setdefault(lane_key, set()).add(seq)
 
@@ -225,6 +236,9 @@ class CondorFastqExtract(object):
             'ispaired': sources[0].paired,
         }
 
+    def lane_rdf(self, sources, target_pathname):
+        pass
+
 def make_lane_dict(lib_db, lib_id):
     """
     Convert the lane_set in a lib_db to a dictionary
index 899a4472993e792655bcde40090bd067738a6a20..bb2b3c9995ff9a2fcf8510e9b6cf2860c71d029e 100644 (file)
@@ -89,30 +89,35 @@ LIBDATA = {
              u'insert_size': 200,
              u'lane_set': [{u'flowcell': u'30221AAXX',
                             u'lane_number': 4,
+                            u'lane_id': 3400,
                             u'paired_end': False,
                             u'read_length': 33,
                             u'status': u'Unknown',
                             u'status_code': None},
                            {u'flowcell': u'42JUYAAXX',
                             u'lane_number': 5,
+                            u'lane_id': 4200,
                             u'paired_end': True,
                             u'read_length': 76,
                             u'status': u'Unknown',
                             u'status_code': None},
                            {u'flowcell': u'61MJTAAXX',
                             u'lane_number': 6,
+                            u'lane_id': 6600,
                             u'paired_end': False,
                             u'read_length': 76,
                             u'status': u'Unknown',
                             u'status_code': None},
                            {u'flowcell': u'30DY0AAXX',
                             u'lane_number': 8,
+                            u'lane_id': 3800,
                             u'paired_end': True,
                             u'read_length': 76,
                             u'status': u'Unknown',
                             u'status_code': None},
                            {u'flowcell': u'C02F9ACXX',
                             u'lane_number': 3,
+                            u'lane_id': 12300,
                             u'paired_end': True,
                             u'read_length': 101,
                             u'status': u'Unknown',
@@ -136,12 +141,14 @@ FAKE_APIDATA = {'apiid':0, 'apikey': 'foo'}
 
 class FakeApi(object):
     def __init__(self, *args, **kwargs):
-        pass
+        self.root_url = 'http://localhost'
 
     def get_library(self, libid):
         lib_data = LIBDATA[libid]
         return copy.deepcopy(lib_data)
 
+
+
 class TestCondorFastq(unittest.TestCase):
     def setUp(self):
         self.cwd = os.getcwd()
@@ -378,7 +385,6 @@ class TestCondorFastq(unittest.TestCase):
             self.failUnless('11154_NoIndex_L003_R2_002.fastq.gz' in \
                             arguments[1])
 
-
 def suite():
     suite = unittest.makeSuite(TestCondorFastq, 'test')
     return suite
diff --git a/htsworkflow/templates/lane_to_fastq.turtle b/htsworkflow/templates/lane_to_fastq.turtle
new file mode 100644 (file)
index 0000000..7b4d01e
--- /dev/null
@@ -0,0 +1,3 @@
+{% for key, files in args.items %}{% for f in files %}
+<file://{{ f }}> libraryOntology:has_lane <{{host}}/lane/{{key}}> .
+{% endfor %}{% endfor %}
\ No newline at end of file