Use a logger initialized to the module name much more consistently.
[htsworkflow.git] / htsworkflow / submission / condorfastq.py
index 462c1772ff2e472d1889b8f263da8f1677f47b2a..20afdfd0ca16f5353f3ee6ffeacd90158931b7e9 100644 (file)
@@ -11,7 +11,7 @@ from htsworkflow.pipelines import srf2fastq
 from htsworkflow.util.api import HtswApi
 from htsworkflow.util.conversion import parse_flowcell_id
 
-logger = logging.getLogger(__name__)
+LOGGER = logging.getLogger(__name__)
 
 class CondorFastqExtract(object):
     def __init__(self, host, apidata, sequences_path,
@@ -34,7 +34,7 @@ class CondorFastqExtract(object):
     def build_fastqs(self, library_result_map ):
         """
         Generate condor scripts to build any needed fastq files
-        
+
         Args:
           library_result_map (list):  [(library_id, destination directory), ...]
         """
@@ -43,24 +43,24 @@ class CondorFastqExtract(object):
         srf_condor_header = self.get_srf_condor_header()
         srf_condor_entries = []
         lib_db = self.find_archive_sequence_files(library_result_map)
-    
+
         needed_targets = self.find_missing_targets(library_result_map, lib_db)
-    
+
         for target_pathname, available_sources in needed_targets.items():
-            logger.debug(' target : %s' % (target_pathname,))
-            logger.debug(' candidate sources: %s' % (available_sources,))
+            LOGGER.debug(' target : %s' % (target_pathname,))
+            LOGGER.debug(' candidate sources: %s' % (available_sources,))
             if available_sources.has_key('qseq'):
                 source = available_sources['qseq']
                 qseq_condor_entries.append(
-                    self.condor_qseq_to_fastq(source.path, 
-                                              target_pathname, 
+                    self.condor_qseq_to_fastq(source.path,
+                                              target_pathname,
                                               source.flowcell)
                 )
             elif available_sources.has_key('srf'):
                 source = available_sources['srf']
                 mid = getattr(source, 'mid_point', None)
                 srf_condor_entries.append(
-                    self.condor_srf_to_fastq(source.path, 
+                    self.condor_srf_to_fastq(source.path,
                                              target_pathname,
                                              source.paired,
                                              source.flowcell,
@@ -68,17 +68,17 @@ class CondorFastqExtract(object):
                 )
             else:
                 print " need file", target_pathname
-    
+
         if len(srf_condor_entries) > 0:
-            make_submit_script('srf.fastq.condor', 
+            make_submit_script('srf.fastq.condor',
                                srf_condor_header,
                                srf_condor_entries)
-    
+
         if len(qseq_condor_entries) > 0:
-            make_submit_script('qseq.fastq.condor', 
+            make_submit_script('qseq.fastq.condor',
                                qseq_condor_header,
                                qseq_condor_entries)
-    
+
 
     def get_qseq_condor_header(self):
         return """Universe=vanilla
@@ -97,18 +97,18 @@ output=%(log)s/srf_pair_fastq.$(process).out
 error=%(log)s/srf_pair_fastq.$(process).out
 log=%(log)s/srf_pair_fastq.log
 environment="PYTHONPATH=%(env)s"
-    
+
 """ % {'exe': sys.executable,
            'log': self.log_path,
            'env': os.environ.get('PYTHONPATH', '')
       }
-            
+
     def find_archive_sequence_files(self,  library_result_map):
         """
         Find archived sequence files associated with our results.
         """
-        logger.debug("Searching for sequence files in: %s" %(self.sequences_path,))
-    
+        LOGGER.debug("Searching for sequence files in: %s" %(self.sequences_path,))
+
         lib_db = {}
         seq_dirs = set()
         candidate_lanes = {}
@@ -116,35 +116,35 @@ environment="PYTHONPATH=%(env)s"
             lib_info = self.api.get_library(lib_id)
             lib_info['lanes'] = {}
             lib_db[lib_id] = lib_info
-    
+
             for lane in lib_info['lane_set']:
                 lane_key = (lane['flowcell'], lane['lane_number'])
                 candidate_lanes[lane_key] = lib_id
-                seq_dirs.add(os.path.join(self.sequences_path, 
-                                             'flowcells', 
+                seq_dirs.add(os.path.join(self.sequences_path,
+                                             'flowcells',
                                              lane['flowcell']))
-        logger.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
+        LOGGER.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
         candidate_seq_list = scan_for_sequences(seq_dirs)
-    
+
         # at this point we have too many sequences as scan_for_sequences
         # returns all the sequences in a flowcell directory
         # so lets filter out the extras
-        
+
         for seq in candidate_seq_list:
             lane_key = (seq.flowcell, seq.lane)
             lib_id = candidate_lanes.get(lane_key, None)
             if lib_id is not None:
                 lib_info = lib_db[lib_id]
                 lib_info['lanes'].setdefault(lane_key, set()).add(seq)
-        
+
         return lib_db
-    
+
     def find_missing_targets(self, library_result_map, lib_db):
         """
         Check if the sequence file exists.
         This requires computing what the sequence name is and checking
         to see if it can be found in the sequence location.
-    
+
         Adds seq.paired flag to sequences listed in lib_db[*]['lanes']
         """
         fastq_paired_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s_r%(read)s.fastq'
@@ -154,15 +154,15 @@ environment="PYTHONPATH=%(env)s"
         for lib_id, result_dir in library_result_map:
             lib = lib_db[lib_id]
             lane_dict = make_lane_dict(lib_db, lib_id)
-            
+
             for lane_key, sequences in lib['lanes'].items():
                 for seq in sequences:
                     seq.paired = lane_dict[seq.flowcell]['paired_end']
                     lane_status = lane_dict[seq.flowcell]['status']
-    
+
                     if seq.paired and seq.read is None:
                         seq.read = 1
-                    filename_attributes = { 
+                    filename_attributes = {
                         'flowcell': seq.flowcell,
                         'lib_id': lib_id,
                         'lane': seq.lane,
@@ -176,21 +176,21 @@ environment="PYTHONPATH=%(env)s"
                         # 30DY0 only ran for 151 bases instead of 152
                         # it is actually 76 1st read, 75 2nd read
                         seq.mid_point = 76
-    
+
                     # end filters
                     if seq.paired:
                         target_name = fastq_paired_template % filename_attributes
                     else:
                         target_name = fastq_single_template % filename_attributes
-    
+
                     target_pathname = os.path.join(result_dir, target_name)
                     if self.force or not os.path.exists(target_pathname):
                         t = needed_targets.setdefault(target_pathname, {})
                         t[seq.filetype] = seq
-    
+
         return needed_targets
 
-    
+
     def condor_srf_to_fastq(self,
                             srf_file,
                             target_pathname,
@@ -204,31 +204,31 @@ environment="PYTHONPATH=%(env)s"
             # this is ugly. I did it because I was pregenerating the target
             # names before I tried to figure out what sources could generate
             # those targets, and everything up to this point had been
-            # one-to-one. So I couldn't figure out how to pair the 
-            # target names. 
+            # one-to-one. So I couldn't figure out how to pair the
+            # target names.
             # With this at least the command will run correctly.
             # however if we rename the default targets, this'll break
             # also I think it'll generate it twice.
-            args.extend(['--right', 
+            args.extend(['--right',
                          target_pathname.replace('_r1.fastq', '_r2.fastq')])
         else:
             args.extend(['--single', target_pathname ])
         if flowcell is not None:
             args.extend(['--flowcell', flowcell])
-    
+
         if mid is not None:
             args.extend(['-m', str(mid)])
-    
+
         if self.force:
             args.extend(['--force'])
-    
+
         script = """arguments="%s"
 queue
 """ % (" ".join(args),)
-        
-        return  script 
-    
-    
+
+        return  script
+
+
     def condor_qseq_to_fastq(self, qseq_file, target_pathname, flowcell=None):
         py = qseq2fastq.__file__
         args = [py, '-i', qseq_file, '-o', target_pathname ]
@@ -237,9 +237,9 @@ queue
         script = """arguments="%s"
 queue
 """ % (" ".join(args))
-    
-        return script 
-    
+
+        return script
+
 def make_submit_script(target, header, body_list):
     """
     write out a text file
@@ -247,7 +247,7 @@ def make_submit_script(target, header, body_list):
     this was intended for condor submit scripts
 
     Args:
-      target (str or stream): 
+      target (str or stream):
         if target is a string, we will open and close the file
         if target is a stream, the caller is responsible.