erange version 4.0a dev release
[erange.git] / combineRPKMs.py
index 8fd8f9f56b80d6af257cccdfdbad8f2e8fef1c9b..ead4e1b81d0b37335fbf00908f1aea615cec33dd 100755 (executable)
@@ -3,14 +3,17 @@
 #  ENRAGE
 #
 
-print 'version 1.0'
+print "combineRPKMs: version 1.1"
 try:
     import psyco
     psyco.full()
 except:
     pass
 
-import sys, optparse
+import sys
+import optparse
+import string
+from commoncode import getConfigParser, getConfigBoolOption
 
 
 def main(argv=None):
@@ -18,9 +21,7 @@ def main(argv=None):
         argv = sys.argv
 
     usage = "usage: python %prog firstRPKM expandedRPKM finalRPKM combinedOutfile [--withmultifraction]"
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
-    parser.set_defaults(doFraction=False)
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 3:
@@ -35,22 +36,32 @@ def main(argv=None):
     combineRPKMs(firstfile, expandedfile, finalfile, outfile, options.doFraction)
 
 
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--withmultifraction", action="store_true", dest="doFraction")
+
+    configParser = getConfigParser()
+    section = "combineRPKMs"
+    doFraction = getConfigBoolOption(configParser, section, "doFraction", False)
+
+    parser.set_defaults(doFraction=doFraction)
+
+    return parser
+
+
 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
-    firstfile = open(firstfileName)
-    expandedfile = open(expandedfileName)
-    finalfile = open(finalfileName)
-    outfile = open(outfileName, "w")
 
     firstDict = {}
-    gidDict = {}
-    expandedDict = {}
-
+    firstfile = open(firstfileName)
     for line in firstfile:
         fields = line.strip().split()
         firstDict[fields[1]] = fields[-1]
 
     firstfile.close()
 
+    expandedDict = {}
+    gidDict = {}
+    expandedfile = open(expandedfileName)
     for line in expandedfile:
         fields = line.strip().split()
         expandedDict[fields[1]] = fields[-1]
@@ -63,21 +74,24 @@ def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, do
     else:
         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\n"
 
+    outfile = open(outfileName, "w")
     outfile.write(header)
 
+    finalfile = open(finalfileName)
     for line in finalfile:
         fields = line.strip().split()
         gene = fields[0]
         rnakb = fields[1]
         finalRPKM = fields[2]
         firstRPKM = firstDict.get(gene, "")
-        outline = "%s\t%s\t%s\t%s\t%s\t%s" % (gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM)
+        outputFields = [gidDict[gene], rnakb, gene, firstRPKM, expandedDict[gene], finalRPKM]
 
         if doFraction:
             fraction = fields[3]
-            outline += "\t%s" % fraction
-    
-        outfile.write(outline + '\n')
+            outputFields.append(fraction)
+
+        outline = "%s\n" % string.join(outputFields, "\t")
+        outfile.write(outline)
 
     finalfile.close()
     outfile.close()