import sys def main(argv=None): if not argv: argv = sys.argv linkerfile = argv[1] infile = argv[2] outfile = argv[3] markLinkers(linkerfile, infile, outfile) def markLinkers(linkerFileName, inFileName, outFileName): infile = open(inFileName) outfile = open(outFileName, "w") linkerDict, linkerList = getLinkerInformationFromFile(linkerFileName) for line in infile: if len(line) < 2: continue if "@" in line: readID = line.strip() readID = readID.replace("@", "") else: found = False for linkerID in linkerList: position = line.find(linkerDict[linkerID]) if position >= 19: found = True outfile.write(">L%s_%s\n" % (linkerID[-1:], readID)) outfile.write("%s\n" % line[:20]) if not found: outfile.write(">NA_%s\n" % readID) outfile.write("%s\n" % line[:20]) def getLinkerInformationFromFile(linkerFileName): linkerDict = {} linkerList = [] try: linkerfile = open(linkerFileName) return getLinkerInformation(linkerfile) except IOError: return linkerDict, linkerList def getLinkerInformation(linkerInformationList): linkerDict = {} linkerList = [] for entry in linkerInformationList: if ">" in entry: linkerID = entry.strip() linkerID = linkerID[1:] linkerList.append(linkerID) else: sequence = entry.strip() linkerDict[linkerID] = sequence[:10] return linkerDict, linkerList if __name__ == "__main__": main(sys.argv)