2 from ConfigParser import SafeConfigParser
8 from optparse import OptionParser
10 from pprint import pprint, pformat
12 from StringIO import StringIO
14 from subprocess import Popen, PIPE
22 from htsworkflow.util import api
23 from htsworkflow.pipelines.sequences import \
24 create_sequence_table, \
26 from htsworkflow.pipelines import qseq2fastq
27 from htsworkflow.pipelines import srf2fastq
29 def main(cmdline=None):
30 parser = make_parser()
31 opts, args = parser.parse_args(cmdline)
34 logging.basicConfig(level = logging.DEBUG )
36 logging.basicConfig(level = logging.INFO )
38 logging.basicConfig(level = logging.WARNING )
40 apidata = {'apiid': opts.apiid, 'apikey': opts.apikey }
42 if opts.host is None or opts.apiid is None or opts.apikey is None:
43 parser.error("Please specify host url, apiid, apikey")
45 if opts.makeddf and opts.daf is None:
46 parser.error("Please specify your daf when making ddf files")
49 parser.error("I need at least one library submission-dir input file")
51 library_result_map = []
53 library_result_map.extend(read_library_result_map(a))
55 if opts.make_tree_from is not None:
56 make_tree_from(opts.make_tree_from, library_result_map)
58 if opts.daf is not None:
59 link_daf(opts.daf, library_result_map)
62 build_fastqs(opts.host,
69 make_submission_ini(opts.host, apidata, library_result_map)
72 make_all_ddfs(library_result_map, opts.daf, force=opts.force)
76 # Load defaults from the config files
77 config = SafeConfigParser()
78 config.read([os.path.expanduser('~/.htsworkflow.ini'), '/etc/htsworkflow.ini'])
80 sequence_archive = None
84 SECTION = 'sequence_archive'
85 if config.has_section(SECTION):
86 sequence_archive = config.get(SECTION, 'sequence_archive',sequence_archive)
87 sequence_archive = os.path.expanduser(sequence_archive)
88 apiid = config.get(SECTION, 'apiid', apiid)
89 apikey = config.get(SECTION, 'apikey', apikey)
90 apihost = config.get(SECTION, 'host', apihost)
92 parser = OptionParser()
95 parser.add_option('--make-tree-from',
96 help="create directories & link data files",
98 parser.add_option('--fastq', help="generate scripts for making fastq files",
99 default=False, action="store_true")
101 parser.add_option('--ini', help="generate submission ini file", default=False,
104 parser.add_option('--makeddf', help='make the ddfs', default=False,
107 parser.add_option('--daf', default=None, help='specify daf name')
108 parser.add_option('--force', default=False, action="store_true",
109 help="Force regenerating fastqs")
111 # configuration options
112 parser.add_option('--apiid', default=apiid, help="Specify API ID")
113 parser.add_option('--apikey', default=apikey, help="Specify API KEY")
114 parser.add_option('--host', default=apihost,
115 help="specify HTSWorkflow host",)
116 parser.add_option('--sequence', default=sequence_archive,
117 help="sequence repository")
120 parser.add_option('--verbose', default=False, action="store_true",
121 help='verbose logging')
122 parser.add_option('--debug', default=False, action="store_true",
123 help='debug logging')
128 def make_tree_from(source_path, library_result_map):
129 """Create a tree using data files from source path.
131 for lib_id, lib_path in library_result_map:
132 if not os.path.exists(lib_path):
133 logging.info("Making dir {0}".format(lib_path))
135 source_lib_dir = os.path.join(source_path, lib_path)
136 if os.path.exists(source_lib_dir):
138 for filename in os.listdir(source_lib_dir):
139 source_pathname = os.path.join(source_lib_dir, filename)
140 target_pathname = os.path.join(lib_path, filename)
141 if not os.path.exists(source_pathname):
142 raise IOError("{0} does not exist".format(source_pathname))
143 if not os.path.exists(target_pathname):
144 os.symlink(source_pathname, target_pathname)
146 'LINK {0} to {1}'.format(source_pathname, target_pathname))
148 def build_fastqs(host, apidata, sequences_path, library_result_map,
151 Generate condor scripts to build any needed fastq files
154 host (str): root of the htsworkflow api server
155 apidata (dict): id & key to post to the server
156 sequences_path (str): root of the directory tree to scan for files
157 library_result_map (list): [(library_id, destination directory), ...]
159 qseq_condor_header = """
162 error=log/qseq2fastq.err.$(process).log
163 output=log/qseq2fastq.out.$(process).log
164 log=log/qseq2fastq.log
166 """ % {'exe': sys.executable }
167 qseq_condor_entries = []
168 srf_condor_header = """
171 output=log/srf_pair_fastq.out.$(process).log
172 error=log/srf_pair_fastq.err.$(process).log
173 log=log/srf_pair_fastq.log
174 environment="PYTHONPATH=/home/diane/lib/python2.6/site-packages:/home/diane/proj/solexa/gaworkflow PATH=/woldlab/rattus/lvol0/mus/home/diane/bin:/usr/bin:/bin"
176 """ % {'exe': sys.executable }
177 srf_condor_entries = []
178 lib_db = find_archive_sequence_files(host,
183 needed_targets = find_missing_targets(library_result_map, lib_db, force)
185 for target_pathname, available_sources in needed_targets.items():
186 logging.debug(' target : %s' % (target_pathname,))
187 logging.debug(' candidate sources: %s' % (available_sources,))
188 if available_sources.has_key('qseq'):
189 source = available_sources['qseq']
190 qseq_condor_entries.append(
191 condor_qseq_to_fastq(source.path,
196 elif available_sources.has_key('srf'):
197 source = available_sources['srf']
198 mid = getattr(source, 'mid_point', None)
199 srf_condor_entries.append(
200 condor_srf_to_fastq(source.path,
208 print " need file", target_pathname
210 if len(srf_condor_entries) > 0:
211 make_submit_script('srf.fastq.condor',
215 if len(qseq_condor_entries) > 0:
216 make_submit_script('qseq.fastq.condor',
221 def find_missing_targets(library_result_map, lib_db, force=False):
223 Check if the sequence file exists.
224 This requires computing what the sequence name is and checking
225 to see if it can be found in the sequence location.
227 Adds seq.paired flag to sequences listed in lib_db[*]['lanes']
229 fastq_paired_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s_r%(read)s.fastq'
230 fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
231 # find what targets we're missing
233 for lib_id, result_dir in library_result_map:
235 lane_dict = make_lane_dict(lib_db, lib_id)
237 for lane_key, sequences in lib['lanes'].items():
238 for seq in sequences:
239 seq.paired = lane_dict[seq.flowcell]['paired_end']
240 lane_status = lane_dict[seq.flowcell]['status']
242 if seq.paired and seq.read is None:
244 filename_attributes = {
245 'flowcell': seq.flowcell,
252 if lane_status == 'Failed':
254 if seq.flowcell == '30DY0AAXX':
255 # 30DY0 only ran for 151 bases instead of 152
256 # it is actually 76 1st read, 75 2nd read
261 target_name = fastq_paired_template % filename_attributes
263 target_name = fastq_single_template % filename_attributes
265 target_pathname = os.path.join(result_dir, target_name)
266 if force or not os.path.exists(target_pathname):
267 t = needed_targets.setdefault(target_pathname, {})
268 t[seq.filetype] = seq
270 return needed_targets
273 def link_daf(daf_path, library_result_map):
274 if not os.path.exists(daf_path):
275 raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
277 base_daf = os.path.basename(daf_path)
279 for lib_id, result_dir in library_result_map:
280 if not os.path.exists(result_dir):
281 raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
282 submission_daf = os.path.join(result_dir, base_daf)
283 if not os.path.exists(submission_daf):
284 if not os.path.exists(daf_path):
285 raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
286 os.link(daf_path, submission_daf)
289 def make_submission_ini(host, apidata, library_result_map, paired=True):
290 #attributes = get_filename_attribute_map(paired)
291 view_map = NameToViewMap(host, apidata)
293 candidate_fastq_src = {}
295 for lib_id, result_dir in library_result_map:
296 order_by = ['order_by=files', 'view', 'replicate', 'cell',
297 'readType', 'mapAlgorithm', 'insertLength', 'md5sum' ]
298 inifile = ['[config]']
299 inifile += [" ".join(order_by)]
302 result_ini = os.path.join(result_dir, result_dir+'.ini')
305 submission_files = os.listdir(result_dir)
307 fastq_attributes = {}
308 for f in submission_files:
309 attributes = view_map.find_attributes(f, lib_id)
310 if attributes is None:
311 raise ValueError("Unrecognized file: %s" % (f,))
312 attributes['md5sum'] = "None"
314 ext = attributes["extension"]
315 if attributes['view'] is None:
317 elif attributes.get("type", None) == 'fastq':
318 fastqs.setdefault(ext, set()).add(f)
319 fastq_attributes[ext] = attributes
321 md5sum = make_md5sum(os.path.join(result_dir,f))
322 if md5sum is not None:
323 attributes['md5sum']=md5sum
325 make_submission_section(line_counter,
332 # add in fastqs on a single line.
334 for extension, fastq_files in fastqs.items():
336 make_submission_section(line_counter,
338 fastq_attributes[extension])
343 f = open(result_ini,'w')
344 f.write(os.linesep.join(inifile))
347 def make_lane_dict(lib_db, lib_id):
349 Convert the lane_set in a lib_db to a dictionary
350 indexed by flowcell ID
353 for lane in lib_db[lib_id]['lane_set']:
354 result.append((lane['flowcell'], lane))
358 def make_all_ddfs(library_result_map, daf_name, make_condor=True, force=False):
360 for lib_id, result_dir in library_result_map:
361 ininame = result_dir+'.ini'
362 inipathname = os.path.join(result_dir, ininame)
363 if os.path.exists(inipathname):
365 make_ddf(ininame, daf_name, True, make_condor, result_dir)
368 if make_condor and len(dag_fragment) > 0:
369 dag_filename = 'submission.dagman'
370 if not force and os.path.exists(dag_filename):
371 logging.warn("%s exists, please delete" % (dag_filename,))
373 f = open(dag_filename,'w')
374 f.write( os.linesep.join(dag_fragment))
375 f.write( os.linesep )
379 def make_ddf(ininame, daf_name, guess_ddf=False, make_condor=False, outdir=None):
381 Make ddf files, and bonus condor file
385 if outdir is not None:
390 ddf_name = make_ddf_name(ininame)
392 output = open(ddf_name,'w')
394 file_list = read_ddf_ini(ininame, output)
396 "Read config {0}, found files: {1}".format(
397 ininame, ", ".join(file_list)))
399 file_list.append(daf_name)
400 if ddf_name is not None:
401 file_list.append(ddf_name)
404 archive_condor = make_condor_archive_script(ininame, file_list)
405 upload_condor = make_condor_upload_script(ininame)
407 dag_fragments.extend(
408 make_dag_fragment(ininame, archive_condor, upload_condor)
416 def read_ddf_ini(filename, output=sys.stdout):
418 Read a ini file and dump out a tab delmited text file
421 config = SafeConfigParser()
422 config.read(filename)
424 order_by = shlex.split(config.get("config", "order_by"))
426 output.write("\t".join(order_by))
427 output.write(os.linesep)
428 sections = config.sections()
430 for section in sections:
431 if section == "config":
432 # skip the config block
436 v = config.get(section, key)
439 file_list.extend(parse_filelist(v))
441 output.write("\t".join(values))
442 output.write(os.linesep)
446 def read_library_result_map(filename):
448 Read a file that maps library id to result directory.
449 Does not support spaces in filenames.
454 stream = open(filename,'r')
459 if not line.startswith('#') and len(line) > 0 :
460 library_id, result_dir = line.split()
461 results.append((library_id, result_dir))
465 def make_condor_archive_script(ininame, files):
466 script = """Universe = vanilla
468 Executable = /bin/tar
469 arguments = czvhf ../%(archivename)s %(filelist)s
471 Error = compress.err.$(Process).log
472 Output = compress.out.$(Process).log
473 Log = /tmp/submission-compress-%(user)s.log
474 initialdir = %(initialdir)s
475 environment="GZIP=-3"
481 if not os.path.exists(f):
482 raise RuntimeError("Missing %s" % (f,))
484 context = {'archivename': make_submission_name(ininame),
485 'filelist': " ".join(files),
486 'initialdir': os.getcwd(),
487 'user': os.getlogin()}
489 condor_script = make_condor_name(ininame, 'archive')
490 condor_stream = open(condor_script,'w')
491 condor_stream.write(script % context)
492 condor_stream.close()
496 def make_condor_upload_script(ininame):
497 script = """Universe = vanilla
499 Executable = /usr/bin/lftp
500 arguments = -c put ../%(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
502 Error = upload.err.$(Process).log
503 Output = upload.out.$(Process).log
504 Log = /tmp/submission-upload-%(user)s.log
505 initialdir = %(initialdir)s
509 auth = netrc.netrc(os.path.expanduser("~diane/.netrc"))
511 encodeftp = 'encodeftp.cse.ucsc.edu'
512 ftpuser = auth.hosts[encodeftp][0]
513 ftppassword = auth.hosts[encodeftp][2]
514 context = {'archivename': make_submission_name(ininame),
515 'initialdir': os.getcwd(),
516 'user': os.getlogin(),
518 'ftppassword': ftppassword,
519 'ftphost': encodeftp}
521 condor_script = make_condor_name(ininame, 'upload')
522 condor_stream = open(condor_script,'w')
523 condor_stream.write(script % context)
524 condor_stream.close()
525 os.chmod(condor_script, stat.S_IREAD|stat.S_IWRITE)
530 def make_dag_fragment(ininame, archive_condor, upload_condor):
532 Make the couple of fragments compress and then upload the data.
534 cur_dir = os.getcwd()
535 archive_condor = os.path.join(cur_dir, archive_condor)
536 upload_condor = os.path.join(cur_dir, upload_condor)
537 job_basename = make_base_name(ininame)
540 fragments.append('JOB %s_archive %s' % (job_basename, archive_condor))
541 fragments.append('JOB %s_upload %s' % (job_basename, upload_condor))
542 fragments.append('PARENT %s_archive CHILD %s_upload' % (job_basename, job_basename))
547 def get_library_info(host, apidata, library_id):
548 url = api.library_url(host, library_id)
549 contents = api.retrieve_info(url, apidata)
553 def condor_srf_to_fastq(srf_file, target_pathname, paired, flowcell=None,
554 mid=None, force=False):
555 py = srf2fastq.__file__
556 args = [ py, srf_file, ]
558 args.extend(['--left', target_pathname])
559 # this is ugly. I did it because I was pregenerating the target
560 # names before I tried to figure out what sources could generate
561 # those targets, and everything up to this point had been
562 # one-to-one. So I couldn't figure out how to pair the
564 # With this at least the command will run correctly.
565 # however if we rename the default targets, this'll break
566 # also I think it'll generate it twice.
567 args.extend(['--right',
568 target_pathname.replace('_r1.fastq', '_r2.fastq')])
570 args.extend(['--single', target_pathname ])
571 if flowcell is not None:
572 args.extend(['--flowcell', flowcell])
575 args.extend(['-m', str(mid)])
578 args.extend(['--force'])
583 """ % (" ".join(args),)
588 def condor_qseq_to_fastq(qseq_file, target_pathname, flowcell=None, force=False):
589 py = qseq2fastq.__file__
590 args = [py, '-i', qseq_file, '-o', target_pathname ]
591 if flowcell is not None:
592 args.extend(['-f', flowcell])
596 """ % (" ".join(args))
600 def find_archive_sequence_files(host, apidata, sequences_path,
603 Find all the archive sequence files possibly associated with our results.
606 logging.debug("Searching for sequence files in: %s" %(sequences_path,))
610 #seq_dirs = set(os.path.join(sequences_path, 'srfs'))
612 for lib_id, result_dir in library_result_map:
613 lib_info = get_library_info(host, apidata, lib_id)
614 lib_info['lanes'] = {}
615 lib_db[lib_id] = lib_info
617 for lane in lib_info['lane_set']:
618 lane_key = (lane['flowcell'], lane['lane_number'])
619 candidate_lanes[lane_key] = lib_id
620 seq_dirs.add(os.path.join(sequences_path,
623 logging.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
624 candidate_seq_list = scan_for_sequences(seq_dirs)
626 # at this point we have too many sequences as scan_for_sequences
627 # returns all the sequences in a flowcell directory
628 # so lets filter out the extras
630 for seq in candidate_seq_list:
631 lane_key = (seq.flowcell, seq.lane)
632 lib_id = candidate_lanes.get(lane_key, None)
633 if lib_id is not None:
634 lib_info = lib_db[lib_id]
635 lib_info['lanes'].setdefault(lane_key, set()).add(seq)
640 class NameToViewMap(object):
641 """Determine view attributes for a given submission file name
643 def __init__(self, root_url, apidata):
644 self.root_url = root_url
645 self.apidata = apidata
649 # ma is "map algorithm"
654 ('*.splices.bam', 'Splices'),
655 ('*.bam', self._guess_bam_view),
656 ('junctions.bed', 'Junctions'),
657 ('*.jnct', 'Junctions'),
658 ('*unique.bigwig', None),
659 ('*plus.bigwig', 'PlusSignal'),
660 ('*minus.bigwig', 'MinusSignal'),
661 ('*.bigwig', 'Signal'),
667 ('*ufflinks?0.9.3.genes.gtf', 'GeneDeNovo'),
668 ('*ufflinks?0.9.3.transcripts.gtf', 'TranscriptDeNovo'),
669 ('*GENCODE-v3c.exonFPKM.gtf', 'ExonsGencV3c'),
670 ('*GENCODE-v3c.genes.gtf', 'GeneGencV3c'),
671 ('*GENCODE-v3c.transcripts.gtf', 'TranscriptGencV3c'),
672 ('*GENCODE-v3c.TSS.gtf', 'TSS'),
673 ('*.junctions.bed6+3', 'Junctions'),
675 ('*.?ufflinks-0.9.0?genes.expr', 'GeneDeNovo'),
676 ('*.?ufflinks-0.9.0?transcripts.expr', 'TranscriptDeNovo'),
677 ('*.?ufflinks-0.9.0?transcripts.gtf', 'GeneModel'),
679 ('*.GENCODE-v3c?genes.expr', 'GeneGCV3c'),
680 ('*.GENCODE-v3c?transcript*.expr', 'TranscriptGCV3c'),
681 ('*.GENCODE-v3c?transcript*.gtf', 'TranscriptGencV3c'),
682 ('*.GENCODE-v4?genes.expr', None), #'GeneGCV4'),
683 ('*.GENCODE-v4?transcript*.expr', None), #'TranscriptGCV4'),
684 ('*.GENCODE-v4?transcript*.gtf', None), #'TranscriptGencV4'),
685 ('*_1.75mers.fastq', 'FastqRd1'),
686 ('*_2.75mers.fastq', 'FastqRd2'),
687 ('*_r1.fastq', 'FastqRd1'),
688 ('*_r2.fastq', 'FastqRd2'),
689 ('*.fastq', 'Fastq'),
690 ('*.gtf', 'GeneModel'),
694 ('paired-end-distribution*', 'InsLength'),
695 ('*.stats.txt', 'InsLength'),
699 ('transfer_log', None),
703 None: {"MapAlgorithm": "NA"},
704 "Paired": {"MapAlgorithm": ma},
705 "Aligns": {"MapAlgorithm": ma},
706 "Single": {"MapAlgorithm": ma},
707 "Splices": {"MapAlgorithm": ma},
708 "Junctions": {"MapAlgorithm": ma},
709 "PlusSignal": {"MapAlgorithm": ma},
710 "MinusSignal": {"MapAlgorithm": ma},
711 "Signal": {"MapAlgorithm": ma},
712 "GeneModel": {"MapAlgorithm": ma},
713 "GeneDeNovo": {"MapAlgorithm": ma},
714 "TranscriptDeNovo": {"MapAlgorithm": ma},
715 "ExonsGencV3c": {"MapAlgorithm": ma},
716 "GeneGencV3c": {"MapAlgorithm": ma},
717 "TSS": {"MapAlgorithm": ma},
718 "GeneGCV3c": {"MapAlgorithm": ma},
719 "TranscriptGCV3c": {"MapAlgorithm": ma},
720 "TranscriptGencV3c": {"MapAlgorithm": ma},
721 "GeneGCV4": {"MapAlgorithm": ma},
722 "TranscriptGCV4": {"MapAlgorithm": ma},
723 "FastqRd1": {"MapAlgorithm": "NA", "type": "fastq"},
724 "FastqRd2": {"MapAlgorithm": "NA", "type": "fastq"},
725 "Fastq": {"MapAlgorithm": "NA", "type": "fastq" },
726 "InsLength": {"MapAlgorithm": ma},
728 # view name is one of the attributes
729 for v in self.views.keys():
730 self.views[v]['view'] = v
732 def find_attributes(self, pathname, lib_id):
733 """Looking for the best extension
734 The 'best' is the longest match
737 filename (str): the filename whose extention we are about to examine
739 path, filename = os.path.splitext(pathname)
740 if not self.lib_cache.has_key(lib_id):
741 self.lib_cache[lib_id] = get_library_info(self.root_url,
742 self.apidata, lib_id)
744 lib_info = self.lib_cache[lib_id]
745 if lib_info['cell_line'].lower() == 'unknown':
746 logging.warn("Library %s missing cell_line" % (lib_id,))
748 'cell': lib_info['cell_line'],
749 'replicate': lib_info['replicate'],
751 is_paired = self._is_paired(lib_id, lib_info)
754 attributes.update(self.get_paired_attributes(lib_info))
756 attributes.update(self.get_single_attributes(lib_info))
758 for pattern, view in self.patterns:
759 if fnmatch.fnmatch(pathname, pattern):
761 view = view(is_paired=is_paired)
763 attributes.update(self.views[view])
764 attributes["extension"] = pattern
768 def _guess_bam_view(self, is_paired=True):
769 """Guess a view name based on library attributes
777 def _is_paired(self, lib_id, lib_info):
778 """Determine if a library is paired end"""
779 if len(lib_info["lane_set"]) == 0:
782 if not self.lib_paired.has_key(lib_id):
786 # check to see if all the flowcells are the same.
787 # otherwise we might need to do something complicated
788 for flowcell in lib_info["lane_set"]:
789 # yes there's also a status code, but this comparison
791 if flowcell["status"].lower() == "failed":
792 # ignore failed flowcell
795 elif flowcell["paired_end"]:
800 logging.debug("Library %s: %d paired, %d single, %d failed" % \
801 (lib_info["library_id"], is_paired, isnot_paired, failed))
803 if is_paired > isnot_paired:
804 self.lib_paired[lib_id] = True
805 elif is_paired < isnot_paired:
806 self.lib_paired[lib_id] = False
808 raise RuntimeError("Equal number of paired & unpaired lanes."\
809 "Can't guess library paired status")
811 return self.lib_paired[lib_id]
813 def get_paired_attributes(self, lib_info):
814 if lib_info['insert_size'] is None:
815 errmsg = "Library %s is missing insert_size, assuming 200"
816 logging.warn(errmsg % (lib_info["library_id"],))
819 insert_size = lib_info['insert_size']
820 return {'insertLength': insert_size,
823 def get_single_attributes(self, lib_info):
824 return {'insertLength':'ilNA',
828 def make_submission_section(line_counter, files, attributes):
830 Create a section in the submission ini file
832 inifile = [ "[line%s]" % (line_counter,) ]
833 inifile += ["files=%s" % (",".join(files))]
835 for k,v in attributes.items():
836 inifile += ["%s=%s" % (k,v)]
840 def make_base_name(pathname):
841 base = os.path.basename(pathname)
842 name, ext = os.path.splitext(base)
846 def make_submission_name(ininame):
847 name = make_base_name(ininame)
851 def make_ddf_name(pathname):
852 name = make_base_name(pathname)
856 def make_condor_name(pathname, run_type=None):
857 name = make_base_name(pathname)
859 if run_type is not None:
860 elements.append(run_type)
861 elements.append("condor")
862 return ".".join(elements)
865 def make_submit_script(target, header, body_list):
867 write out a text file
869 this was intended for condor submit scripts
872 target (str or stream):
873 if target is a string, we will open and close the file
874 if target is a stream, the caller is responsible.
877 header to write at the beginning of the file
878 body_list (list of strs):
879 a list of blocks to add to the file.
881 if type(target) in types.StringTypes:
886 for entry in body_list:
888 if type(target) in types.StringTypes:
891 def parse_filelist(file_string):
892 return file_string.split(",")
895 def validate_filelist(files):
897 Die if a file doesn't exist in a file list
900 if not os.path.exists(f):
901 raise RuntimeError("%s does not exist" % (f,))
903 def make_md5sum(filename):
904 """Quickly find the md5sum of a file
906 md5_cache = os.path.join(filename+".md5")
908 if os.path.exists(md5_cache):
909 logging.debug("Found md5sum in {0}".format(md5_cache))
910 stream = open(md5_cache,'r')
911 lines = stream.readlines()
912 md5sum = parse_md5sum_line(lines, filename)
914 md5sum = make_md5sum_unix(filename, md5_cache)
917 def make_md5sum_unix(filename, md5_cache):
918 cmd = ["md5sum", filename]
919 logging.debug("Running {0}".format(" ".join(cmd)))
920 p = Popen(cmd, stdout=PIPE)
921 stdin, stdout = p.communicate()
923 logging.debug("Finished {0} retcode {1}".format(" ".join(cmd), retcode))
925 logging.error("Trouble with md5sum for {0}".format(filename))
927 lines = stdin.split(os.linesep)
928 md5sum = parse_md5sum_line(lines, filename)
929 if md5sum is not None:
930 logging.debug("Caching sum in {0}".format(md5_cache))
931 stream = open(md5_cache, "w")
936 def parse_md5sum_line(lines, filename):
937 md5sum, md5sum_filename = lines[0].split()
938 if md5sum_filename != filename:
939 errmsg = "MD5sum and I disagre about filename. {0} != {1}"
940 logging.error(errmsg.format(filename, md5sum_filename))
944 if __name__ == "__main__":