5 # Created by Ali Mortazavi on 3/6/09.
10 from commoncode import writeLog, getConfigParser, getConfigOption, getConfigBoolOption
12 versionString = "buildMatrix: version 1.5"
20 usage = "usage: python %prog matrix.step.N-1 data.part matrix.step.N [--rescale] [--truncate maxRPKM] [--log altlogfile]"
22 parser = makeParser(usage)
23 (options, args) = parser.parse_args(argv[1:])
33 if options.maxRPKM is not None:
35 maxRPKM = options.maxRPKM
40 buildMatrix(infile, colfilename, outfilename, truncateRPKM, maxRPKM,
41 options.rescale, options.logfilename)
44 def makeParser(usage=""):
45 parser = optparse.OptionParser(usage=usage)
46 parser.add_option("--rescale", action="store_true", dest="rescale")
47 parser.add_option("--truncate", type="int", dest="maxRPKM")
48 parser.add_option("--log", dest="logfilename")
50 configParser = getConfigParser()
51 section = "buildMatrix"
52 rescale = getConfigBoolOption(configParser, section, "rescale", False)
53 maxRPKM = getConfigOption(configParser, section, "maxRPKM", None)
54 logfilename = getConfigOption(configParser, section, "logfilename", "buildMatrix.log")
56 parser.set_defaults(rescale=rescale, maxRPKM=maxRPKM, logfilename=logfilename)
61 def buildMatrix(inFileName, colfilename, outfilename, truncateRPKM,
62 maxRPKM=100000000, rescale=False, logfilename="buildMatrix.log"):
64 writeLog(logfilename, versionString, string.join(sys.argv[1:]))
66 if "/" in colfilename:
67 colname = colfilename.split("/")[-1]
71 fileParts = colname.split(".")
74 infile = open(inFileName)
75 colfile = open(colfilename)
76 outfile = open(outfilename, "w")
77 header = infile.readline()[:-1]
78 if header.strip() == "":
81 outfile.write("%s\t%s\n" % (header, colID))
88 if doNotProcessLine(line):
91 fields = line.strip().split()
92 val = float(fields[-1])
93 if truncateRPKM and val > maxRPKM:
94 if val > untruncatedMax:
108 finalValues = [(val - min)/range for val in values]
112 for val in finalValues:
113 line = infile.readline().strip()
114 line += "\t%1.3f\n" % val
119 if untruncatedMax > 0:
122 message = "max value in %s was %.2f" % (colname, max)
123 if untruncatedMax > 0:
124 message += " but was truncated to %d" % maxRPKM
127 writeLog(logfilename, versionString, message)
130 def doNotProcessLine(line):
131 return line[0] == "#"
134 if __name__ == "__main__":