Script to look through MEME output to make new motif files.
authorDiane Trout <diane@ghic.org>
Fri, 10 Apr 2015 22:26:16 +0000 (15:26 -0700)
committerDiane Trout <diane@ghic.org>
Fri, 10 Apr 2015 22:26:16 +0000 (15:26 -0700)
Written to help Katherine with her MEME project.

find_meme_letter_probabilities.py [new file with mode: 0644]

diff --git a/find_meme_letter_probabilities.py b/find_meme_letter_probabilities.py
new file mode 100644 (file)
index 0000000..fa34bab
--- /dev/null
@@ -0,0 +1,111 @@
+#!/usr/bin/python 3
+import argparse
+import re
+import os
+
+def main(cmdline=None):
+    #filename = '/woldlab/castor/home/kfisher/ChIA/AliNodes/DNase4/Wellington/PP_DE_analysis/Consolidated_Wellington_promoters_3xUp_10FPKM.cyteConnected_myocyteFootprints.20rad.MEME2_maxw25/meme.txt'
+    #output_dir = '.'
+
+    parser = make_parser()
+    args = parser.parse_args(cmdline)
+
+    if not os.path.isdir(args.output):
+        os.mkdir(args.output)
+
+    for filename in args.filenames:
+        with open(filename, 'rt') as instream:
+            version = find_meme_version(instream)
+
+            motif_counter = 0
+            for motif in find_meme_letter_probabilities(instream):
+                motif_counter += 1
+                output_name = 'motif_%d.mot' % (motif_counter,)
+                output_filename = os.path.join(args.output, output_name)
+                with open(output_filename, 'wt') as outstream:
+                    save_meme_letter_probabilities(
+                        outstream, version, motif_counter, motif)
+
+
+def make_parser():
+    parser = argparse.ArgumentParser('Search through a MEME report for letter-probability matrices')
+    parser.add_argument('-o', '--output', default='.',
+                        help="output directory")
+    parser.add_argument('filenames', nargs='*',
+                        help="name of MEME output files to search through")
+    return parser
+
+def find_meme_version(instream, limit=20):
+    """Read through the first few meme header lines looking for the MEME version
+    """
+    version_pattern = re.compile("MEME version (?P<ver>[\d.]+)")
+                                 
+    for i, line in enumerate(instream):
+        if i > limit:
+            return None
+        else:
+            match = version_pattern.match(line)
+            if match:
+                return match.group('ver')
+            
+        
+def find_meme_letter_probabilities(instream):
+    """Look through a meme output report and find the letter-probability matrix
+
+    Arguments:
+      instream (input stream): file-like object to read from
+
+    Yields:
+      list of strings for a MEME letter-probabilities matrix
+    """
+    header = "letter-probability matrix: alength=(?P<letters>[ \d]+) w=(?P<bases>[ \d]+)"
+    letter_pattern = re.compile(header)
+
+    motif = []
+    rows = 0
+    for line in instream:
+        line = line.rstrip()
+        match = letter_pattern.match(line)
+        if match:
+            if motif:
+                yield motif
+                motif = []
+            rows = int(match.group('bases'))
+            motif.append(line)
+        elif rows:
+            motif.append(line)
+            rows -= 1
+    yield motif
+    
+
+def save_meme_letter_probabilities(outstream, version, motif_index, motif):
+    """Write MEME mot formatted contents to outstream
+
+    Arguments:
+      outstream (output stream): file-like object to write to
+      version (str): motif version number
+      motif_index (int): what motif number we're on
+      motif (list of strings): motif letter-probabilities from meme
+    """
+
+    report = """MEME version {version}
+
+ALPHABET= ACGT
+
+strands: + -
+
+Background etter frequencies (from uniform background):
+A 0.25000 C 0.25000 G 0.25000 T 0.25000
+
+MOTIF {count}
+
+{matrix}
+""".format(version=version,
+           count=motif_index,
+           matrix=os.linesep.join(motif))
+    outstream.write(report)
+
+
+if __name__ == "__main__":
+    main()
+