2 from ConfigParser import SafeConfigParser
8 from optparse import OptionParser
10 from pprint import pprint, pformat
12 from StringIO import StringIO
14 from subprocess import Popen, PIPE
22 from htsworkflow.util import api
23 from htsworkflow.pipelines.sequences import \
24 create_sequence_table, \
26 from htsworkflow.pipelines import qseq2fastq
27 from htsworkflow.pipelines import srf2fastq
29 def main(cmdline=None):
30 parser = make_parser()
31 opts, args = parser.parse_args(cmdline)
34 logging.basicConfig(level = logging.DEBUG )
36 logging.basicConfig(level = logging.INFO )
38 logging.basicConfig(level = logging.WARNING )
40 apidata = {'apiid': opts.apiid, 'apikey': opts.apikey }
42 if opts.host is None or opts.apiid is None or opts.apikey is None:
43 parser.error("Please specify host url, apiid, apikey")
45 if opts.makeddf and opts.daf is None:
46 parser.error("Please specify your daf when making ddf files")
49 parser.error("I need at least one library submission-dir input file")
51 library_result_map = []
53 library_result_map.extend(read_library_result_map(a))
55 if opts.make_tree_from is not None:
56 make_tree_from(opts.make_tree_from, library_result_map)
58 if opts.daf is not None:
59 link_daf(opts.daf, library_result_map)
62 build_fastqs(opts.host,
69 make_submission_ini(opts.host, apidata, library_result_map)
72 make_all_ddfs(library_result_map, opts.daf, force=opts.force)
76 # Load defaults from the config files
77 config = SafeConfigParser()
78 config.read([os.path.expanduser('~/.htsworkflow.ini'), '/etc/htsworkflow.ini'])
80 sequence_archive = None
84 SECTION = 'sequence_archive'
85 if config.has_section(SECTION):
86 sequence_archive = config.get(SECTION, 'sequence_archive',sequence_archive)
87 sequence_archive = os.path.expanduser(sequence_archive)
88 apiid = config.get(SECTION, 'apiid', apiid)
89 apikey = config.get(SECTION, 'apikey', apikey)
90 apihost = config.get(SECTION, 'host', apihost)
92 parser = OptionParser()
95 parser.add_option('--make-tree-from',
96 help="create directories & link data files",
98 parser.add_option('--fastq', help="generate scripts for making fastq files",
99 default=False, action="store_true")
101 parser.add_option('--ini', help="generate submission ini file", default=False,
104 parser.add_option('--makeddf', help='make the ddfs', default=False,
107 parser.add_option('--daf', default=None, help='specify daf name')
108 parser.add_option('--force', default=False, action="store_true",
109 help="Force regenerating fastqs")
111 # configuration options
112 parser.add_option('--apiid', default=apiid, help="Specify API ID")
113 parser.add_option('--apikey', default=apikey, help="Specify API KEY")
114 parser.add_option('--host', default=apihost,
115 help="specify HTSWorkflow host",)
116 parser.add_option('--sequence', default=sequence_archive,
117 help="sequence repository")
120 parser.add_option('--verbose', default=False, action="store_true",
121 help='verbose logging')
122 parser.add_option('--debug', default=False, action="store_true",
123 help='debug logging')
128 def make_tree_from(source_path, library_result_map):
129 """Create a tree using data files from source path.
131 for lib_id, lib_path in library_result_map:
132 if not os.path.exists(lib_path):
133 logging.info("Making dir {0}".format(lib_path))
135 source_lib_dir = os.path.join(source_path, lib_path)
136 if os.path.exists(source_lib_dir):
138 for filename in os.listdir(source_lib_dir):
139 source_pathname = os.path.join(source_lib_dir, filename)
140 target_pathname = os.path.join(lib_path, filename)
141 if not os.path.exists(source_pathname):
142 raise IOError("{0} does not exist".format(source_pathname))
143 if not os.path.exists(target_pathname):
144 os.symlink(source_pathname, target_pathname)
146 'LINK {0} to {1}'.format(source_pathname, target_pathname))
148 def build_fastqs(host, apidata, sequences_path, library_result_map,
151 Generate condor scripts to build any needed fastq files
154 host (str): root of the htsworkflow api server
155 apidata (dict): id & key to post to the server
156 sequences_path (str): root of the directory tree to scan for files
157 library_result_map (list): [(library_id, destination directory), ...]
159 qseq_condor_header = """
162 error=log/qseq2fastq.err.$(process).log
163 output=log/qseq2fastq.out.$(process).log
164 log=log/qseq2fastq.log
166 """ % {'exe': sys.executable }
167 qseq_condor_entries = []
168 srf_condor_header = """
171 output=log/srf_pair_fastq.out.$(process).log
172 error=log/srf_pair_fastq.err.$(process).log
173 log=log/srf_pair_fastq.log
174 environment="PYTHONPATH=/home/diane/lib/python2.6/site-packages:/home/diane/proj/solexa/gaworkflow PATH=/woldlab/rattus/lvol0/mus/home/diane/bin:/usr/bin:/bin"
176 """ % {'exe': sys.executable }
177 srf_condor_entries = []
178 lib_db = find_archive_sequence_files(host,
183 needed_targets = find_missing_targets(library_result_map, lib_db, force)
185 for target_pathname, available_sources in needed_targets.items():
186 logging.debug(' target : %s' % (target_pathname,))
187 logging.debug(' candidate sources: %s' % (available_sources,))
188 if available_sources.has_key('qseq'):
189 source = available_sources['qseq']
190 qseq_condor_entries.append(
191 condor_qseq_to_fastq(source.path,
196 elif available_sources.has_key('srf'):
197 source = available_sources['srf']
198 mid = getattr(source, 'mid_point', None)
199 srf_condor_entries.append(
200 condor_srf_to_fastq(source.path,
208 print " need file", target_pathname
210 if len(srf_condor_entries) > 0:
211 make_submit_script('srf.fastq.condor',
215 if len(qseq_condor_entries) > 0:
216 make_submit_script('qseq.fastq.condor',
221 def find_missing_targets(library_result_map, lib_db, force=False):
223 Check if the sequence file exists.
224 This requires computing what the sequence name is and checking
225 to see if it can be found in the sequence location.
227 Adds seq.paired flag to sequences listed in lib_db[*]['lanes']
229 fastq_paired_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s_r%(read)s.fastq'
230 fastq_single_template = '%(lib_id)s_%(flowcell)s_c%(cycle)s_l%(lane)s.fastq'
231 # find what targets we're missing
233 for lib_id, result_dir in library_result_map:
235 lane_dict = make_lane_dict(lib_db, lib_id)
237 for lane_key, sequences in lib['lanes'].items():
238 for seq in sequences:
239 seq.paired = lane_dict[seq.flowcell]['paired_end']
240 lane_status = lane_dict[seq.flowcell]['status']
242 if seq.paired and seq.read is None:
244 filename_attributes = {
245 'flowcell': seq.flowcell,
252 if lane_status == 'Failed':
254 if seq.flowcell == '30DY0AAXX':
255 # 30DY0 only ran for 151 bases instead of 152
256 # it is actually 76 1st read, 75 2nd read
261 target_name = fastq_paired_template % filename_attributes
263 target_name = fastq_single_template % filename_attributes
265 target_pathname = os.path.join(result_dir, target_name)
266 if force or not os.path.exists(target_pathname):
267 t = needed_targets.setdefault(target_pathname, {})
268 t[seq.filetype] = seq
270 return needed_targets
273 def link_daf(daf_path, library_result_map):
274 if not os.path.exists(daf_path):
275 raise RuntimeError("%s does not exist, how can I link to it?" % (daf_path,))
277 base_daf = os.path.basename(daf_path)
279 for lib_id, result_dir in library_result_map:
280 if not os.path.exists(result_dir):
281 raise RuntimeError("Couldn't find target directory %s" %(result_dir,))
282 submission_daf = os.path.join(result_dir, base_daf)
283 if not os.path.exists(submission_daf):
284 if not os.path.exists(daf_path):
285 raise RuntimeError("Couldn't find daf: %s" %(daf_path,))
286 os.link(daf_path, submission_daf)
289 def make_submission_ini(host, apidata, library_result_map, paired=True):
290 #attributes = get_filename_attribute_map(paired)
291 view_map = NameToViewMap(host, apidata)
293 candidate_fastq_src = {}
295 for lib_id, result_dir in library_result_map:
296 order_by = ['order_by=files', 'view', 'replicate', 'cell',
297 'readType', 'mapAlgorithm', 'insertLength', 'md5sum' ]
298 inifile = ['[config]']
299 inifile += [" ".join(order_by)]
302 result_ini = os.path.join(result_dir, result_dir+'.ini')
305 submission_files = os.listdir(result_dir)
307 fastq_attributes = {}
308 for f in submission_files:
309 attributes = view_map.find_attributes(f, lib_id)
310 attributes['md5sum'] = "None"
311 if attributes is None:
312 raise ValueError("Unrecognized file: %s" % (f,))
314 ext = attributes["extension"]
315 if attributes['view'] is None:
317 elif attributes.get("type", None) == 'fastq':
318 fastqs.setdefault(ext, set()).add(f)
319 fastq_attributes[ext] = attributes
321 md5sum = make_md5sum(os.path.join(result_dir,f))
322 if md5sum is not None:
323 attributes['md5sum']=md5sum
325 make_submission_section(line_counter,
332 # add in fastqs on a single line.
334 for extension, fastq_files in fastqs.items():
336 make_submission_section(line_counter,
338 fastq_attributes[extension])
343 f = open(result_ini,'w')
344 f.write(os.linesep.join(inifile))
347 def make_lane_dict(lib_db, lib_id):
349 Convert the lane_set in a lib_db to a dictionary
350 indexed by flowcell ID
353 for lane in lib_db[lib_id]['lane_set']:
354 result.append((lane['flowcell'], lane))
358 def make_all_ddfs(library_result_map, daf_name, make_condor=True, force=False):
360 for lib_id, result_dir in library_result_map:
361 ininame = result_dir+'.ini'
362 inipathname = os.path.join(result_dir, ininame)
363 if os.path.exists(inipathname):
365 make_ddf(ininame, daf_name, True, make_condor, result_dir)
368 if make_condor and len(dag_fragment) > 0:
369 dag_filename = 'submission.dagman'
370 if not force and os.path.exists(dag_filename):
371 logging.warn("%s exists, please delete" % (dag_filename,))
373 f = open(dag_filename,'w')
374 f.write( os.linesep.join(dag_fragment))
375 f.write( os.linesep )
379 def make_ddf(ininame, daf_name, guess_ddf=False, make_condor=False, outdir=None):
381 Make ddf files, and bonus condor file
385 if outdir is not None:
390 ddf_name = make_ddf_name(ininame)
392 output = open(ddf_name,'w')
394 file_list = read_ddf_ini(ininame, output)
396 "Read config {0}, found files: {1}".format(
397 ininame, ", ".join(file_list)))
399 file_list.append(daf_name)
400 if ddf_name is not None:
401 file_list.append(ddf_name)
404 archive_condor = make_condor_archive_script(ininame, file_list)
405 upload_condor = make_condor_upload_script(ininame)
407 dag_fragments.extend(
408 make_dag_fragment(ininame, archive_condor, upload_condor)
416 def read_ddf_ini(filename, output=sys.stdout):
418 Read a ini file and dump out a tab delmited text file
421 config = SafeConfigParser()
422 config.read(filename)
424 order_by = shlex.split(config.get("config", "order_by"))
426 output.write("\t".join(order_by))
427 output.write(os.linesep)
428 sections = config.sections()
430 for section in sections:
431 if section == "config":
432 # skip the config block
436 v = config.get(section, key)
439 file_list.extend(parse_filelist(v))
441 output.write("\t".join(values))
442 output.write(os.linesep)
446 def read_library_result_map(filename):
448 Read a file that maps library id to result directory.
449 Does not support spaces in filenames.
454 stream = open(filename,'r')
459 if not line.startswith('#') and len(line) > 0 :
460 library_id, result_dir = line.split()
461 results.append((library_id, result_dir))
465 def make_condor_archive_script(ininame, files):
466 script = """Universe = vanilla
468 Executable = /bin/tar
469 arguments = czvhf ../%(archivename)s %(filelist)s
471 Error = compress.err.$(Process).log
472 Output = compress.out.$(Process).log
473 Log = /tmp/submission-compress-%(user)s.log
474 initialdir = %(initialdir)s
475 environment="GZIP=-3"
481 if not os.path.exists(f):
482 raise RuntimeError("Missing %s" % (f,))
484 context = {'archivename': make_submission_name(ininame),
485 'filelist': " ".join(files),
486 'initialdir': os.getcwd(),
487 'user': os.getlogin()}
489 condor_script = make_condor_name(ininame, 'archive')
490 condor_stream = open(condor_script,'w')
491 condor_stream.write(script % context)
492 condor_stream.close()
496 def make_condor_upload_script(ininame):
497 script = """Universe = vanilla
499 Executable = /usr/bin/lftp
500 arguments = -c put ../%(archivename)s -o ftp://%(ftpuser)s:%(ftppassword)s@%(ftphost)s/%(archivename)s
502 Error = upload.err.$(Process).log
503 Output = upload.out.$(Process).log
504 Log = /tmp/submission-upload-%(user)s.log
505 initialdir = %(initialdir)s
509 auth = netrc.netrc(os.path.expanduser("~diane/.netrc"))
511 encodeftp = 'encodeftp.cse.ucsc.edu'
512 ftpuser = auth.hosts[encodeftp][0]
513 ftppassword = auth.hosts[encodeftp][2]
514 context = {'archivename': make_submission_name(ininame),
515 'initialdir': os.getcwd(),
516 'user': os.getlogin(),
518 'ftppassword': ftppassword,
519 'ftphost': encodeftp}
521 condor_script = make_condor_name(ininame, 'upload')
522 condor_stream = open(condor_script,'w')
523 condor_stream.write(script % context)
524 condor_stream.close()
525 os.chmod(condor_script, stat.S_IREAD|stat.S_IWRITE)
530 def make_dag_fragment(ininame, archive_condor, upload_condor):
532 Make the couple of fragments compress and then upload the data.
534 cur_dir = os.getcwd()
535 archive_condor = os.path.join(cur_dir, archive_condor)
536 upload_condor = os.path.join(cur_dir, upload_condor)
537 job_basename = make_base_name(ininame)
540 fragments.append('JOB %s_archive %s' % (job_basename, archive_condor))
541 fragments.append('JOB %s_upload %s' % (job_basename, upload_condor))
542 fragments.append('PARENT %s_archive CHILD %s_upload' % (job_basename, job_basename))
547 def get_library_info(host, apidata, library_id):
548 url = api.library_url(host, library_id)
549 contents = api.retrieve_info(url, apidata)
553 def condor_srf_to_fastq(srf_file, target_pathname, paired, flowcell=None,
554 mid=None, force=False):
555 py = srf2fastq.__file__
556 args = [ py, srf_file, ]
558 args.extend(['--left', target_pathname])
559 # this is ugly. I did it because I was pregenerating the target
560 # names before I tried to figure out what sources could generate
561 # those targets, and everything up to this point had been
562 # one-to-one. So I couldn't figure out how to pair the
564 # With this at least the command will run correctly.
565 # however if we rename the default targets, this'll break
566 # also I think it'll generate it twice.
567 args.extend(['--right',
568 target_pathname.replace('_r1.fastq', '_r2.fastq')])
570 args.extend(['--single', target_pathname ])
571 if flowcell is not None:
572 args.extend(['--flowcell', flowcell])
575 args.extend(['-m', str(mid)])
578 args.extend(['--force'])
583 """ % (" ".join(args),)
588 def condor_qseq_to_fastq(qseq_file, target_pathname, flowcell=None, force=False):
589 py = qseq2fastq.__file__
590 args = [py, '-i', qseq_file, '-o', target_pathname ]
591 if flowcell is not None:
592 args.extend(['-f', flowcell])
596 """ % (" ".join(args))
600 def find_archive_sequence_files(host, apidata, sequences_path,
603 Find all the archive sequence files possibly associated with our results.
606 logging.debug("Searching for sequence files in: %s" %(sequences_path,))
610 #seq_dirs = set(os.path.join(sequences_path, 'srfs'))
612 for lib_id, result_dir in library_result_map:
613 lib_info = get_library_info(host, apidata, lib_id)
614 lib_info['lanes'] = {}
615 lib_db[lib_id] = lib_info
617 for lane in lib_info['lane_set']:
618 lane_key = (lane['flowcell'], lane['lane_number'])
619 candidate_lanes[lane_key] = lib_id
620 seq_dirs.add(os.path.join(sequences_path,
623 logging.debug("Seq_dirs = %s" %(unicode(seq_dirs)))
624 candidate_seq_list = scan_for_sequences(seq_dirs)
626 # at this point we have too many sequences as scan_for_sequences
627 # returns all the sequences in a flowcell directory
628 # so lets filter out the extras
630 for seq in candidate_seq_list:
631 lane_key = (seq.flowcell, seq.lane)
632 lib_id = candidate_lanes.get(lane_key, None)
633 if lib_id is not None:
634 lib_info = lib_db[lib_id]
635 lib_info['lanes'].setdefault(lane_key, set()).add(seq)
640 class NameToViewMap(object):
641 """Determine view attributes for a given submission file name
643 def __init__(self, root_url, apidata):
644 self.root_url = root_url
645 self.apidata = apidata
649 # ma is "map algorithm"
654 ('*.splices.bam', 'Splices'),
655 ('*.bam', self._guess_bam_view),
656 ('junctions.bed', 'Junctions'),
657 ('*.jnct', 'Junctions'),
658 ('*.unique.bigwig', None),
659 ('*.plus.bigwig', 'PlusSignal'),
660 ('*.minus.bigwig', 'MinusSignal'),
661 ('*.bigwig', 'Signal'),
666 ('*.?ufflinks-0.9.0?genes.expr', 'GeneDeNovo'),
667 ('*.?ufflinks-0.9.0?transcripts.expr', 'TranscriptDeNovo'),
668 ('*.?ufflinks-0.9.0?transcripts.gtf', 'GeneModel'),
669 ('*.GENCODE-v3c?genes.expr', 'GeneGCV3c'),
670 ('*.GENCODE-v3c?transcript*.expr', 'TranscriptGCV3c'),
671 ('*.GENCODE-v3c?transcript*.gtf', 'TranscriptGencV3c'),
672 ('*.GENCODE-v4?genes.expr', None), #'GeneGCV4'),
673 ('*.GENCODE-v4?transcript*.expr', None), #'TranscriptGCV4'),
674 ('*.GENCODE-v4?transcript*.gtf', None), #'TranscriptGencV4'),
675 ('*_1.75mers.fastq', 'FastqRd1'),
676 ('*_2.75mers.fastq', 'FastqRd2'),
677 ('*_r1.fastq', 'FastqRd1'),
678 ('*_r2.fastq', 'FastqRd2'),
679 ('*.fastq', 'Fastq'),
680 ('*.gtf', 'GeneModel'),
684 ('paired-end-distribution*', 'InsLength'),
685 ('*.stats.txt', 'InsLength'),
692 None: {"MapAlgorithm": "NA"},
693 "Paired": {"MapAlgorithm": ma},
694 "Aligns": {"MapAlgorithm": ma},
695 "Single": {"MapAlgorithm": ma},
696 "Splices": {"MapAlgorithm": ma},
697 "Junctions": {"MapAlgorithm": ma},
698 "PlusSignal": {"MapAlgorithm": ma},
699 "MinusSignal": {"MapAlgorithm": ma},
700 "Signal": {"MapAlgorithm": ma},
701 "GeneModel": {"MapAlgorithm": ma},
702 "GeneDeNovo": {"MapAlgorithm": ma},
703 "TranscriptDeNovo": {"MapAlgorithm": ma},
704 "GeneGCV3c": {"MapAlgorithm": ma},
705 "TranscriptGCV3c": {"MapAlgorithm": ma},
706 "TranscriptGencV3c": {"MapAlgorithm": ma},
707 "GeneGCV4": {"MapAlgorithm": ma},
708 "TranscriptGCV4": {"MapAlgorithm": ma},
709 "FastqRd1": {"MapAlgorithm": "NA", "type": "fastq"},
710 "FastqRd2": {"MapAlgorithm": "NA", "type": "fastq"},
711 "Fastq": {"MapAlgorithm": "NA", "type": "fastq" },
712 "InsLength": {"MapAlgorithm": ma},
714 # view name is one of the attributes
715 for v in self.views.keys():
716 self.views[v]['view'] = v
718 def find_attributes(self, pathname, lib_id):
719 """Looking for the best extension
720 The 'best' is the longest match
723 filename (str): the filename whose extention we are about to examine
725 path, filename = os.path.splitext(pathname)
726 if not self.lib_cache.has_key(lib_id):
727 self.lib_cache[lib_id] = get_library_info(self.root_url,
728 self.apidata, lib_id)
730 lib_info = self.lib_cache[lib_id]
731 if lib_info['cell_line'].lower() == 'unknown':
732 logging.warn("Library %s missing cell_line" % (lib_id,))
734 'cell': lib_info['cell_line'],
735 'replicate': lib_info['replicate'],
737 is_paired = self._is_paired(lib_id, lib_info)
740 attributes.update(self.get_paired_attributes(lib_info))
742 attributes.update(self.get_single_attributes(lib_info))
744 for pattern, view in self.patterns:
745 if fnmatch.fnmatch(pathname, pattern):
747 view = view(is_paired=is_paired)
749 attributes.update(self.views[view])
750 attributes["extension"] = pattern
754 def _guess_bam_view(self, is_paired=True):
755 """Guess a view name based on library attributes
763 def _is_paired(self, lib_id, lib_info):
764 """Determine if a library is paired end"""
765 if len(lib_info["lane_set"]) == 0:
768 if not self.lib_paired.has_key(lib_id):
772 # check to see if all the flowcells are the same.
773 # otherwise we might need to do something complicated
774 for flowcell in lib_info["lane_set"]:
775 # yes there's also a status code, but this comparison
777 if flowcell["status"].lower() == "failed":
778 # ignore failed flowcell
781 elif flowcell["paired_end"]:
786 logging.debug("Library %s: %d paired, %d single, %d failed" % \
787 (lib_info["library_id"], is_paired, isnot_paired, failed))
789 if is_paired > isnot_paired:
790 self.lib_paired[lib_id] = True
791 elif is_paired < isnot_paired:
792 self.lib_paired[lib_id] = False
794 raise RuntimeError("Equal number of paired & unpaired lanes."\
795 "Can't guess library paired status")
797 return self.lib_paired[lib_id]
799 def get_paired_attributes(self, lib_info):
800 if lib_info['insert_size'] is None:
801 errmsg = "Library %s is missing insert_size, assuming 200"
802 logging.warn(errmsg % (lib_info["library_id"],))
805 insert_size = lib_info['insert_size']
806 return {'insertLength': insert_size,
809 def get_single_attributes(self, lib_info):
810 return {'insertLength':'ilNA',
814 def make_submission_section(line_counter, files, attributes):
816 Create a section in the submission ini file
818 inifile = [ "[line%s]" % (line_counter,) ]
819 inifile += ["files=%s" % (",".join(files))]
821 for k,v in attributes.items():
822 inifile += ["%s=%s" % (k,v)]
826 def make_base_name(pathname):
827 base = os.path.basename(pathname)
828 name, ext = os.path.splitext(base)
832 def make_submission_name(ininame):
833 name = make_base_name(ininame)
837 def make_ddf_name(pathname):
838 name = make_base_name(pathname)
842 def make_condor_name(pathname, run_type=None):
843 name = make_base_name(pathname)
845 if run_type is not None:
846 elements.append(run_type)
847 elements.append("condor")
848 return ".".join(elements)
851 def make_submit_script(target, header, body_list):
853 write out a text file
855 this was intended for condor submit scripts
858 target (str or stream):
859 if target is a string, we will open and close the file
860 if target is a stream, the caller is responsible.
863 header to write at the beginning of the file
864 body_list (list of strs):
865 a list of blocks to add to the file.
867 if type(target) in types.StringTypes:
872 for entry in body_list:
874 if type(target) in types.StringTypes:
877 def parse_filelist(file_string):
878 return file_string.split(",")
881 def validate_filelist(files):
883 Die if a file doesn't exist in a file list
886 if not os.path.exists(f):
887 raise RuntimeError("%s does not exist" % (f,))
889 def make_md5sum(filename):
890 """Quickly find the md5sum of a file
892 md5_cache = os.path.join(filename+".md5")
894 if os.path.exists(md5_cache):
895 logging.debug("Found md5sum in {0}".format(md5_cache))
896 stream = open(md5_cache,'r')
897 lines = stream.readlines()
898 md5sum = parse_md5sum_line(lines, filename)
900 md5sum = make_md5sum_unix(filename, md5_cache)
903 def make_md5sum_unix(filename, md5_cache):
904 cmd = ["md5sum", filename]
905 logging.debug("Running {0}".format(" ".join(cmd)))
906 p = Popen(cmd, stdout=PIPE)
907 stdin, stdout = p.communicate()
909 logging.debug("Finished {0} retcode {1}".format(" ".join(cmd), retcode))
911 logging.error("Trouble with md5sum for {0}".format(filename))
913 lines = stdin.split(os.linesep)
914 md5sum = parse_md5sum_line(lines, filename)
915 if md5sum is not None:
916 logging.debug("Caching sum in {0}".format(md5_cache))
917 stream = open(md5_cache, "w")
922 def parse_md5sum_line(lines, filename):
923 md5sum, md5sum_filename = lines[0].split()
924 if md5sum_filename != filename:
925 errmsg = "MD5sum and I disagre about filename. {0} != {1}"
926 logging.error(errmsg.format(filename, md5sum_filename))
930 if __name__ == "__main__":