14 usage = "usage: python %prog infile1 infile2 outfile [options]"
16 parser = optparse.OptionParser(usage=usage)
17 parser.add_option("-d", dest="delimiter")
18 parser.add_option("--file3", dest="infile3")
19 parser.add_option("-1", type="int", dest="matchfield1")
20 parser.add_option("-2", type="int", dest="matchfield2")
21 parser.add_option("-3", type="int", dest="matchfield3")
22 parser.add_option("-reject1", dest="reject1file")
23 parser.add_option("-trackGID", action="store_true", dest="trackGID")
24 parser.set_defaults(delimiter="\t", infile3=None, matchField1=0, matchField2=0,
25 matchField3=0, rejectFileName="", trackGID=False)
26 (options, args) = parser.parse_args(argv[1:])
36 intersects(infile1, infile2, outfile, options.delimiter, options.infile3,
37 options.matchField1, options.matchField2, options.matchField3,
38 options.rejectFileName, options.trackGID)
41 def intersects(infile1Name, infile2Name, outfileName, delimiter="\t", infile3Name=None,
42 matchField1=0, matchField2=0, matchField3=0, rejectFileName="", trackGID=False):
46 reject1file = open(rejectFileName)
50 if infile3Name is not None:
62 gidKeys = gidDict.keys()
63 list1, fileGIDDict = getCandidatesAndGIDFromFile(infile1Name, delimiter, matchField1, gidKeys)
64 for entry in fileGIDDict.keys():
65 gidDict[entry] = fileGIDDict[entry]
67 gidKeys = gidDict.keys()
68 list2, fileGIDDict = getCandidatesAndGIDFromFile(infile2Name, delimiter, matchField2, gidKeys)
69 for entry in fileGIDDict.keys():
70 gidDict[entry] = fileGIDDict[entry]
73 gidKeys = gidDict.keys()
74 list3, fileGIDDict = getCandidatesAndGIDFromFile(infile3Name, delimiter, matchField3, gidKeys)
75 for entry in fileGIDDict.keys():
76 gidDict[entry] = fileGIDDict[entry]
78 list1 = getCandidateListFromFile(infile1Name, delimiter, matchField1)
79 list2 = getCandidateListFromFile(infile2Name, delimiter, matchField2)
81 list3 = getCandidateListFromFile(infile3Name, delimiter, matchField3)
83 for candidate in list1:
84 if doFile3 and candidate in list2 and candidate in list3:
85 matchedList.append(candidate)
86 elif doFile3 and candidate in list3:
87 matchedList13.append(candidate)
88 elif doFile3 and candidate in list2:
89 matchedList12.append(candidate)
90 elif not doFile3 and candidate in list2:
91 matchedList.append(candidate)
94 reject1file.write("%s%s%s\n" % (candidate, delimiter, gidDict[candidate]))
96 reject1file.write("%s\n" % candidate)
99 for candidate in list2:
100 if candidate not in list1 and candidate in list3:
101 matchedList23.append(candidate)
103 print len(list1), len(list2), len(list3)
105 print len(matchedList12), len(matchedList13), len(matchedList23)
106 print len(matchedList)
108 outfile = open(outfileName, "w")
109 for match in matchedList:
111 outfile.write("%s%s%s\n" % (match, delimiter, gidDict[match]))
113 outfile.write("%s\n" % match)
118 def getCandidatesFromFile(filename, delimiter, matchField, trackGID=False, gidList=[]):
119 infile = open(filename)
127 fields = line.strip().split(delimiter)
128 candidate = fields[matchField]
129 if candidate not in candidateList:
130 candidateList.append(candidate)
132 if trackGID and candidate not in gidList:
133 gidDict[candidate] = fields[matchField + 1]
136 return candidateList, gidDict
139 def getCandidatesAndGIDFromFile(filename, delimiter, matchField, gidList=[]):
140 return getCandidatesFromFile(filename, delimiter, matchField, trackGID=True, gidList=[])
143 def getCandidateListFromFile(filename, delimiter, matchField):
144 candidateList, gidDict = getCandidatesFromFile(filename, delimiter, matchField)
148 if __name__ == "__main__":