snapshot of 4.0a development. initial git repo commit
[erange.git] / chiapet / segregateLinkers.py
1 import sys
2
3
4 def main(argv=None):
5     if not argv:
6         argv = sys.argv
7
8     infile1 = argv[1]
9     infile2 = argv[2]
10     outprefix = argv[3]
11
12     segregateLinkers(infile1, infile2, outprefix)
13
14
15 def segregateLinkers(infile1name, infile2name, outprefix):
16     infile1 = open(infile1name)
17     infile2 = open(infile2name)
18     same1 = 0
19     same2 = 0
20     mixed = 0
21     hasNA = 0
22
23     outsame1 = open("%s.same1.fa" % outprefix, "w")
24     outsame2 = open("%s.same2.fa" % outprefix, "w")
25     outNA = open("%s.NA.fa" % outprefix, "w")
26     outmixed = open("%s.mixed.fa" % outprefix, "w")
27
28     lines1 = infile1.readlines()
29
30     failed = False
31     for line1 in lines1:
32         line2 = infile2.readline()
33         if failed:
34             line2 = infile2.readline()
35             print line1.strip()
36             print line2.strip()
37             sys.exit(1)
38             continue
39
40         if ">" in line1:
41             try:
42                 (linker1, readid1) = line1.split("_")
43                 (linker2, readid2) = line2.split("_")
44                 shortid1 = readid1.split("/")[0]
45                 shortid2 = readid2.split("/")[0]
46                 if shortid1 != shortid2:
47                     print shortid1, shortid2
48                     sys.exit(1)
49
50                 failed = False
51             except:
52                 print line1.strip()
53                 print line2.strip()
54                 failed = True
55
56             continue
57
58         if "NA" in linker1 or "NA" in linker2:
59             hasNA += 1
60             outNA.write("%s_%s%s" % (linker1, readid1, line1))
61             outNA.write("%s_%s%s" % (linker2, readid2, line2))
62         elif linker1 == linker2:
63             if "L1" in linker1:
64                 same1 += 1
65                 outsame1.write("%s_%s%s" % (linker1, readid1, line1))
66                 outsame1.write("%s_%s%s" % (linker2, readid2, line2))
67             else:
68                 same2 += 1
69                 outsame2.write("%s_%s%s" % (linker1, readid1, line1))
70                 outsame2.write("%s_%s%s" % (linker2, readid2, line2))
71         else:
72             mixed += 1
73             outmixed.write("%s_%s%s" % (linker1, readid1, line1))
74             outmixed.write("%s_%s%s" % (linker2, readid2, line2))
75
76     print same1
77     print same2
78     print mixed
79     print hasNA
80
81     outmixed.close()
82     outNA.close()
83     outsame1.close()
84     outsame2.close()
85
86
87 if __name__ == "__main__":
88     main(sys.argv)