5 # Created by Ali Mortazavi on 6/21/08.
16 from commoncode import writeLog, getConfigParser, getConfigIntOption, getConfigBoolOption
19 verstring = "makerdsfrombed: version 2.2"
27 usage = "usage: python %prog label bedfile outrdsfile [--append] [--index] [propertyName::propertyValue] [--cache numPages]"
29 parser = makeParser(usage)
30 (options, args) = parser.parse_args(argv[1:])
34 print "\ntreats all imported reads as uniquely mapped\n"
41 if options.rnaDataType:
49 (pname, pvalue) = arg.strip().split("::")
50 propertyList.append((pname, pvalue))
52 makerdsfrombed(label, filename, outdbname, options.init, dataType, options.doIndex, options.cachePages, propertyList)
55 def makeParser(usage=""):
56 parser = optparse.OptionParser(usage=usage)
57 parser.add_option("--append", action="store_false", dest="init")
58 parser.add_option("--index", action="store_true", dest="doIndex")
59 parser.add_option("--cache", type="int", dest="cachePages")
60 parser.add_option("--RNA", action="store_true", dest="rnaDataType")
62 configParser = getConfigParser()
63 section = "makerdsfrombed"
64 init = getConfigBoolOption(configParser, section, "init", True)
65 rnaDataType = getConfigBoolOption(configParser, section, "RNA", False)
66 doIndex = getConfigBoolOption(configParser, section, "doIndex", False)
67 cachePages = getConfigIntOption(configParser, section, "cachePages", 100000)
69 parser.set_defaults(init=init, rnaDataType=rnaDataType, doIndex=doIndex, cachePages=cachePages)
74 def makerdsfrombed(label, filename, outdbname, init=True, dataType="DNA", doIndex=False, cachePages=100000, propertyList=[]):
80 writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
82 infile = open(filename,"r")
84 rds = ReadDataset.ReadDataset(outdbname, init, dataType, verbose=True)
88 #check that our cacheSize is better than the dataset's default cache size
89 defaultCacheSize = rds.getDefaultCacheSize()
90 if cachePages > defaultCacheSize:
92 rds.setDBcache(cachePages, default=True)
94 rds.setDBcache(cachePages)
96 if len(propertyList) > 0:
97 rds.insertMetadata(propertyList)
104 fields = line.split()
106 readsize = abs(int(fields[1]) - int(fields[2]))
108 rds.insertMetadata([("readsize", readsize+1)])
109 rds.insertMetadata([("imported_from_bed", "True")])
112 start = int(fields[1])
113 stop = int(fields[2])
115 readID = "%s-%s" % (label, str(index))
116 insertList.append((readID, chrom, start, stop, sense, 1.0, "", ""))
117 if index % insertSize == 0:
118 rds.insertUniqs(insertList)
125 if len(insertList) > 0:
126 rds.insertUniqs(insertList)
128 countString = "%d unique reads" % index
131 writeLog(outdbname + ".log", verstring, countString)
134 print "building index...."
135 if cachePages > defaultCacheSize:
136 rds.setDBcache(cachePages)
137 rds.buildIndex(cachePages)
139 rds.buildIndex(defaultCacheSize)
142 if __name__ == "__main__":