- (start1, flag1, pair1) = readList[0]
- (start2, flag2, pair2) = readList[1]
-
- if flag1 != flag2:
- dist = abs(start1 - start2)
- if flag1 != "NM" and flag2 != "NM" and dist < maxDist:
- geneID = ""
- saw1 = False
- saw2 = False
- if flag1 in goodDict:
- geneID = flag2
- farFlag = flag1
- saw1 = True
-
- if flag2 in goodDict:
- geneID = flag1
- farFlag = flag2
- saw2 = True
-
- if saw1 or saw2:
- total += 1
-
- if saw1 and saw2:
- if flag1 < flag2:
- geneID = flag1
- farFlag = flag2
- else:
- geneID = flag2
- farFlag = flag1
-
- if geneID in farConnected:
- farConnected[geneID].append(farFlag)
- else:
- farConnected[geneID] = [farFlag]
- elif geneID != "":
- try:
- if genome == "dmelanogaster":
- symbol = geneinfoDict["Dmel_" + geneID][0][0]
- else:
- symbol = geneinfoDict[geneID][0][0]
- except:
- try:
- symbol = geneannotDict[(genome, geneID)][0]
- except:
- symbol = "LOC" + geneID
-
- symbol = symbol.strip()
- symbol = symbol.replace(" ","|")
- symbol = symbol.replace("\t","|")
- if farFlag not in assigned:
- assigned[farFlag] = (symbol, geneID)
- print "%s %s %s" % (symbol, geneID, goodDict[farFlag].strip())
- outfile.write("%s %s %s" % (symbol, geneID, goodDict[farFlag]))
- distinct += 1
-
- farIndex = 0
+ if processReads(readList[:2], maxDist):
+ flags = (readList[0]["flag"], readList[1]["flag"])
+ processed, distinctPairs = writeFarPairsToFile(flags, goodDict, genome, geneinfoDict, geneannotDict, outfile, assigned, farConnected)
+ total += processed
+ distinct += distinctPairs
+
+ entriesWritten = writeUnassignedEntriesToFile(farConnected, assigned, goodDict, outfile)
+ distinct += entriesWritten
+ outfile.write("#distinct: %d\ttotal: %d\n" % (distinct, total))
+ outfile.close()
+ print "distinct: %d\ttotal: %d" % (distinct, total)
+ print time.ctime()
+
+
+def doNotProcessChromosome(chromosome):
+ return chromosome == "chrM"
+
+
+def processReads(reads, maxDist):
+ process = False
+ start1 = reads[0]["start"]
+ start2 = reads[1]["start"]
+ dist = abs(start1 - start2)
+ flag1 = reads[0]["flag"]
+ flag2 = reads[1]["flag"]
+
+ if flag1 != flag2 and flag1 != "NM" and flag2 != "NM" and dist < maxDist:
+ process = True
+
+ return process
+
+
+def writeFarPairsToFile(flags, goodDict, genome, geneInfoDict, geneAnnotDict, outfile, assigned, farConnected):
+ flag1, flag2 = flags
+ total = 0
+ distinct = 0
+ read1IsGood = flag1 in goodDict
+ read2IsGood = flag2 in goodDict
+
+ if read1IsGood and read2IsGood:
+ if flag1 < flag2:
+ geneID = flag1
+ farFlag = flag2
+ else:
+ geneID = flag2
+ farFlag = flag1
+
+ try:
+ farConnected[geneID].append(farFlag)
+ except KeyError:
+ farConnected[geneID] = [farFlag]
+ elif read1IsGood or read2IsGood:
+ total += 1
+ if read2IsGood:
+ farFlag = flag2
+ geneID = flag1
+ else:
+ farFlag = flag1
+ geneID = flag2
+
+ try:
+ if genome == "dmelanogaster":
+ symbol = geneInfoDict["Dmel_%s" % geneID][0][0]
+ else:
+ symbol = geneInfoDict[geneID][0][0]
+ except (KeyError, IndexError):
+ try:
+ symbol = geneAnnotDict[(genome, geneID)][0]
+ except (KeyError, IndexError):
+ symbol = "LOC%s" % geneID
+
+ symbol = symbol.strip()
+ symbol = symbol.replace(" ","|")
+ symbol = symbol.replace("\t","|")
+
+ if farFlag not in assigned:
+ assigned[farFlag] = (symbol, geneID)
+ print "%s %s %s" % (symbol, geneID, goodDict[farFlag].strip())
+ outfile.write("%s %s %s" % (symbol, geneID, goodDict[farFlag]))
+ distinct += 1
+
+ return total, distinct
+
+
+def writeUnassignedEntriesToFile(farConnected, assigned, goodDict, outfile):
+ total, written = writeUnassignedPairsToFile(farConnected, assigned, goodDict, outfile)
+ writeUnassignedGoodReadsToFile(total, goodDict, assigned, outfile)
+
+ return written
+
+
+def writeUnassignedPairsToFile(farConnected, assigned, goodDict, outfile):
+ total = 0
+ written = 0