X-Git-Url: http://woldlab.caltech.edu/gitweb/?p=erange.git;a=blobdiff_plain;f=combineRPKMs.py;fp=combineRPKMs.py;h=77f30d6fc2acb2a9424634a138a03ee30e314f91;hp=ead4e1b81d0b37335fbf00908f1aea615cec33dd;hb=77dccd7c98d8cdb60caaf178b1123df71ea662c9;hpb=bc30aca13e5ec397c92e67002fbf7a103130b828 diff --git a/combineRPKMs.py b/combineRPKMs.py index ead4e1b..77f30d6 100755 --- a/combineRPKMs.py +++ b/combineRPKMs.py @@ -51,23 +51,8 @@ def makeParser(usage=""): def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, doFraction=False): - firstDict = {} - firstfile = open(firstfileName) - for line in firstfile: - fields = line.strip().split() - firstDict[fields[1]] = fields[-1] - - firstfile.close() - - expandedDict = {} - gidDict = {} - expandedfile = open(expandedfileName) - for line in expandedfile: - fields = line.strip().split() - expandedDict[fields[1]] = fields[-1] - gidDict[fields[1]] = fields[0] - - expandedfile.close() + firstDict = getRPKMDict(firstfileName) + gidDict, expandedDict = getRPKMDict(expandedfileName, getGIDDict=True) if doFraction: header = "gid\tRNAkb\tgene\tfirstRPKM\texpandedRPKM\tfinalRPKM\tfractionMulti\n" @@ -97,5 +82,23 @@ def combineRPKMs(firstfileName, expandedfileName, finalfileName, outfileName, do outfile.close() +def getRPKMDict(rpkmFileName, getGIDDict=False): + gidDict = {} + rpkmDict = {} + rpkmFile = open(rpkmFileName) + for line in rpkmFile: + fields = line.strip().split() + rpkmDict[fields[1]] = fields[-1] + if getGIDDict: + gidDict[fields[1]] = fields[0] + + rpkmFile.close() + + if getGIDDict: + return gidDict, rpkmDict + else: + return rpkmDict + + if __name__ == "__main__": - main(sys.argv) \ No newline at end of file + main(sys.argv)