Release version for Erange 4.0a
[erange.git] / combineRPKMs.py
index ead4e1b81d0b37335fbf00908f1aea615cec33dd..77f30d6fc2acb2a9424634a138a03ee30e314f91 100755 (executable)
@@ -51,23 +51,8 @@ def makeParser(usage=""):
 
 def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False):
 
-    firstDict = {}
-    firstfile = open(firstfileName)
-    for line in firstfile:
-        fields = line.strip().split()
-        firstDict[fields[1]] = fields[-1]
-
-    firstfile.close()
-
-    expandedDict = {}
-    gidDict = {}
-    expandedfile = open(expandedfileName)
-    for line in expandedfile:
-        fields = line.strip().split()
-        expandedDict[fields[1]] = fields[-1]
-        gidDict[fields[1]] = fields[0]
-
-    expandedfile.close()
+    firstDict = getRPKMDict(firstfileName)
+    gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True)
 
     if doFraction:
         header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n"
@@ -97,5 +82,23 @@ def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, do
     outfile.close()
 
 
+def getRPKMDict(rpkmFileName, getGIDDict=False):
+    gidDict = {}
+    rpkmDict = {}
+    rpkmFile = open(rpkmFileName)
+    for line in rpkmFile:
+        fields = line.strip().split()
+        rpkmDict[fields[1]] = fields[-1]
+        if getGIDDict:
+            gidDict[fields[1]] = fields[0]
+
+    rpkmFile.close()
+
+    if getGIDDict:
+        return gidDict, rpkmDict
+    else:
+        return rpkmDict
+
+
 if __name__ == "__main__":
-    main(sys.argv)
\ No newline at end of file
+    main(sys.argv)