5 # Created by Ali Mortazavi on 6/21/08.
13 import sys, string, optparse
14 from commoncode import readDataset, writeLog
16 verstring = "%prog: version 2.1" % sys.argv[0]
24 usage = "usage: python %prog label bedfile outrdsfile [--append] [--index] [propertyName::propertyValue] [--cache numPages]"
26 parser = optparse.OptionParser(usage=usage)
27 parser.add_option("--append", action="store_false", dest="init")
28 parser.add_option("--index", action="store_true", dest="doIndex")
29 parser.add_option("--cache", type="int", dest="cachePages")
30 parser.add_option("--RNA", action="store_true", dest="rnaDataType")
31 parser.set_defaults(init=True, rnaDataType=False, doIndex=False, cachePages=100000)
32 (options, args) = parser.parse_args(argv[1:])
36 print "\ntreats all imported reads as uniquely mapped\n"
43 if options.rnaDataType:
51 (pname, pvalue) = arg.strip().split("::")
52 propertyList.append((pname, pvalue))
54 makerdsfrombed(label, filename, outdbname, options.init, dataType, options.doIndex, options.cachePages, propertyList)
57 def makerdsfrombed(label, filename, outdbname, init=True, dataType="DNA", doIndex=False, cachePages=100000, propertyList=[]):
63 writeLog(outdbname + ".log", verstring, string.join(sys.argv[1:]))
65 infile = open(filename,"r")
67 rds = readDataset(outdbname, init, dataType, verbose=True)
71 #check that our cacheSize is better than the dataset's default cache size
72 defaultCacheSize = rds.getDefaultCacheSize()
73 if cachePages > defaultCacheSize:
75 rds.setDBcache(cachePages, default=True)
77 rds.setDBcache(cachePages)
79 if len(propertyList) > 0:
80 rds.insertMetadata(propertyList)
89 readsize = abs(int(fields[1]) - int(fields[2]))
91 rds.insertMetadata([("readsize", readsize+1)])
92 rds.insertMetadata([("imported_from_bed", "True")])
95 start = int(fields[1])
98 readID = "%s-%s" % (label, str(index))
99 insertList.append((readID, chrom, start, stop, sense, 1.0, "", ""))
100 if index % insertSize == 0:
101 rds.insertUniqs(insertList)
108 if len(insertList) > 0:
109 rds.insertUniqs(insertList)
111 countString = "%d unique reads" % index
114 writeLog(outdbname + ".log", verstring, countString)
117 print "building index...."
118 if cachePages > defaultCacheSize:
119 rds.setDBcache(cachePages)
120 rds.buildIndex(cachePages)
122 rds.buildIndex(defaultCacheSize)
125 if __name__ == "__main__":