Add script to extract some subset of sequence from an eland result file.
authorDiane Trout <diane@caltech.edu>
Thu, 6 Mar 2008 22:57:39 +0000 (22:57 +0000)
committerDiane Trout <diane@caltech.edu>
Thu, 6 Mar 2008 22:57:39 +0000 (22:57 +0000)
scripts/elandseq [new file with mode: 0755]

diff --git a/scripts/elandseq b/scripts/elandseq
new file mode 100755 (executable)
index 0000000..1dedc38
--- /dev/null
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+import optparse
+import os
+import sys
+
+def extract_sequence(instream, outstream, start, end):
+  for line in instream:
+    record = line.split()
+    if len(record) > 1:
+      result = [record[0], record[1][start:end]]
+    else:
+      result = [record[0][start:end]]
+    outstream.write("\t".join(result))
+    outstream.write(os.linesep)
+      
+
+def make_parser():
+  usage = "usage: %prog [options] infile [outfile]"
+
+  parser = optparse.OptionParser(usage)
+  parser.add_option("-e", "--extract", dest="slice",
+    default=":",
+    help="provide a python slice operator to select a portion of an eland file")
+  return parser
+
+def main(argv):
+  parser = make_parser()
+
+  (opt, args) = parser.parse_args(argv)
+
+  if len(args) not in (0, 1, 2):
+    parser.error('incorrect number of arguments')
+
+  # get our slice coordinates
+  start, end = opt.slice.split(':')
+  if len(start) > 0:
+    start = int(start)
+  else:
+    start = None
+  if len(end) > 0:
+    end = int(end)
+  else:
+    end = None
+
+  # open infile
+  if len(args) > 0:
+    instream = open(args[0],'r')
+  else:
+    instream = sys.stdin
+
+  if len(args) > 1:
+    outstream = open(args[1],'w')
+  else:
+    outstream = sys.stdout
+
+  extract_sequence(instream, outstream, start, end)
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv[1:]))
+