From: Diane Trout Date: Fri, 10 Apr 2015 22:26:16 +0000 (-0700) Subject: Script to look through MEME output to make new motif files. X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=helpful_scripts.git;a=commitdiff_plain;h=8f2477f146e84ab4803bb3a391e31895eef464b9 Script to look through MEME output to make new motif files. Written to help Katherine with her MEME project. --- diff --git a/find_meme_letter_probabilities.py b/find_meme_letter_probabilities.py new file mode 100644 index 0000000..fa34bab --- /dev/null +++ b/find_meme_letter_probabilities.py @@ -0,0 +1,111 @@ +#!/usr/bin/python 3 +import argparse +import re +import os + +def main(cmdline=None): + #filename = '/woldlab/castor/home/kfisher/ChIA/AliNodes/DNase4/Wellington/PP_DE_analysis/Consolidated_Wellington_promoters_3xUp_10FPKM.cyteConnected_myocyteFootprints.20rad.MEME2_maxw25/meme.txt' + #output_dir = '.' + + parser = make_parser() + args = parser.parse_args(cmdline) + + if not os.path.isdir(args.output): + os.mkdir(args.output) + + for filename in args.filenames: + with open(filename, 'rt') as instream: + version = find_meme_version(instream) + + motif_counter = 0 + for motif in find_meme_letter_probabilities(instream): + motif_counter += 1 + output_name = 'motif_%d.mot' % (motif_counter,) + output_filename = os.path.join(args.output, output_name) + with open(output_filename, 'wt') as outstream: + save_meme_letter_probabilities( + outstream, version, motif_counter, motif) + + +def make_parser(): + parser = argparse.ArgumentParser('Search through a MEME report for letter-probability matrices') + parser.add_argument('-o', '--output', default='.', + help="output directory") + parser.add_argument('filenames', nargs='*', + help="name of MEME output files to search through") + return parser + +def find_meme_version(instream, limit=20): + """Read through the first few meme header lines looking for the MEME version + """ + version_pattern = re.compile("MEME version (?P[\d.]+)") + + for i, line in enumerate(instream): + if i > limit: + return None + else: + match = version_pattern.match(line) + if match: + return match.group('ver') + + +def find_meme_letter_probabilities(instream): + """Look through a meme output report and find the letter-probability matrix + + Arguments: + instream (input stream): file-like object to read from + + Yields: + list of strings for a MEME letter-probabilities matrix + """ + header = "letter-probability matrix: alength=(?P[ \d]+) w=(?P[ \d]+)" + letter_pattern = re.compile(header) + + motif = [] + rows = 0 + for line in instream: + line = line.rstrip() + match = letter_pattern.match(line) + if match: + if motif: + yield motif + motif = [] + rows = int(match.group('bases')) + motif.append(line) + elif rows: + motif.append(line) + rows -= 1 + yield motif + + +def save_meme_letter_probabilities(outstream, version, motif_index, motif): + """Write MEME mot formatted contents to outstream + + Arguments: + outstream (output stream): file-like object to write to + version (str): motif version number + motif_index (int): what motif number we're on + motif (list of strings): motif letter-probabilities from meme + """ + + report = """MEME version {version} + +ALPHABET= ACGT + +strands: + - + +Background etter frequencies (from uniform background): +A 0.25000 C 0.25000 G 0.25000 T 0.25000 + +MOTIF {count} + +{matrix} +""".format(version=version, + count=motif_index, + matrix=os.linesep.join(motif)) + outstream.write(report) + + +if __name__ == "__main__": + main() +