import sys def main(argv=None): if not argv: argv = sys.argv infile1 = argv[1] infile2 = argv[2] outprefix = argv[3] segregateLinkers(infile1, infile2, outprefix) def segregateLinkers(infile1name, infile2name, outprefix): infile1 = open(infile1name) infile2 = open(infile2name) same1 = 0 same2 = 0 mixed = 0 hasNA = 0 outsame1 = open("%s.same1.fa" % outprefix, "w") outsame2 = open("%s.same2.fa" % outprefix, "w") outNA = open("%s.NA.fa" % outprefix, "w") outmixed = open("%s.mixed.fa" % outprefix, "w") lines1 = infile1.readlines() failed = False for line1 in lines1: line2 = infile2.readline() if failed: line2 = infile2.readline() print line1.strip() print line2.strip() sys.exit(1) continue if ">" in line1: try: (linker1, readid1) = line1.split("_") (linker2, readid2) = line2.split("_") shortid1 = readid1.split("/")[0] shortid2 = readid2.split("/")[0] if shortid1 != shortid2: print shortid1, shortid2 sys.exit(1) failed = False except: print line1.strip() print line2.strip() failed = True continue if "NA" in linker1 or "NA" in linker2: hasNA += 1 outNA.write("%s_%s%s" % (linker1, readid1, line1)) outNA.write("%s_%s%s" % (linker2, readid2, line2)) elif linker1 == linker2: if "L1" in linker1: same1 += 1 outsame1.write("%s_%s%s" % (linker1, readid1, line1)) outsame1.write("%s_%s%s" % (linker2, readid2, line2)) else: same2 += 1 outsame2.write("%s_%s%s" % (linker1, readid1, line1)) outsame2.write("%s_%s%s" % (linker2, readid2, line2)) else: mixed += 1 outmixed.write("%s_%s%s" % (linker1, readid1, line1)) outmixed.write("%s_%s%s" % (linker2, readid2, line2)) print same1 print same2 print mixed print hasNA outmixed.close() outNA.close() outsame1.close() outsame2.close() if __name__ == "__main__": main(sys.argv)