10 print 'psyco not running'
13 print '%s: version 2.0' % sys.argv[0]
16 print 'usage: python %s regionfile rdsfile outfilename [-bins numbins] [-field fieldNum] [-raw] [-padregion bp] [-mergeregion bp] [-cache]' % sys.argv[0]
19 from commoncode import *
21 regionfilename = sys.argv[1]
23 outfilename = sys.argv[3]
25 if '-raw' in sys.argv:
33 if '-cache' in sys.argv:
37 if '-field' in sys.argv:
38 fieldIndex = sys.argv.index('-field') + 1
39 cField = int(sys.argv[fieldIndex])
42 if '-padregion' in sys.argv:
43 padField = sys.argv.index('-padregion') + 1
44 padregion = int(sys.argv[padField])
45 print 'padding %d bp on each side of a region' % padregion
48 if '-mergeregion' in sys.argv:
49 mergeField = sys.argv.index('-mergeregion') + 1
50 mergeregion = int(sys.argv[mergeField])
51 print 'merging regions closer than %d bp' % mergeregion
54 if '-bins' in sys.argv:
55 binfield = sys.argv.index('-bins') + 1
56 bins = int(sys.argv[binfield])
58 hitRDS = readDataset(hitfile, verbose = True, cache=doCache)
59 readlen = hitRDS.getReadSize()
60 normalizationFactor = 1.0
62 totalCount = len(hitRDS)
63 normalizationFactor = totalCount / 1000000.
65 chromList = hitRDS.getChromosomes(fullChrom=False)
68 regionDict = getMergedRegions(regionfilename, maxDist = mergeregion, keepLabel = True, verbose = True, chromField = cField, pad=padregion)
70 hitDict = hitRDS.getReadsDict(doMulti=True, findallOptimize=True)
72 (regionsBins, regionsLen) = computeRegionBins(regionsByChromDict, hitDict, bins, readlen, normalizationFactor)
74 outfile = open(outfilename, 'w')
75 for regionID in regionsBins:
77 for binAmount in regionsBins[regionID]:
79 outfile.write('%s\t%s\t%.1f\t%d' % (regionID, regionID, tagCount, Len[gid]))
80 for binAmount in gidBins[gid]:
84 outfile.write('\t%.1f' % (100. * binAmount / tagCount))
86 outfile.write('\t%.1f' % binAmount)