cistematic/core/__init__.py

   1 ###########################################################################
   2 #                                                                         #
   3 # C O P Y R I G H T   N O T I C E                                         #
   4 #  Copyright (c) 2003-10 by:                                              #
   5 #    * California Institute of Technology                                 #
   6 #                                                                         #
   7 #    All Rights Reserved.                                                 #
   8 #                                                                         #
   9 # Permission is hereby granted, free of charge, to any person             #
  10 # obtaining a copy of this software and associated documentation files    #
  11 # (the "Software"), to deal in the Software without restriction,          #
  12 # including without limitation the rights to use, copy, modify, merge,    #
  13 # publish, distribute, sublicense, and/or sell copies of the Software,    #
  14 # and to permit persons to whom the Software is furnished to do so,       #
  15 # subject to the following conditions:                                    #
  16 #                                                                         #
  17 # The above copyright notice and this permission notice shall be          #
  18 # included in all copies or substantial portions of the Software.         #
  19 #                                                                         #
  20 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,         #
  21 # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF      #
  22 # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND                   #
  23 # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS     #
  24 # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN      #
  25 # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN       #
  26 # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE        #
  27 # SOFTWARE.                                                               #
  28 ###########################################################################
  29 #
  30 __all__ = ["motif", "homology", "geneinfo", "protein"]
  31
  32 import cistematic
  33 from cistematic.genomes import Genome, geneDB
  34 import shutil, tempfile, os
  35 from os import environ
  36
  37 if environ.get("CISTEMATIC_TEMP"):
  38     cisTemp = environ.get("CISTEMATIC_TEMP")
  39 else:
  40     cisTemp = "/tmp"
  41
  42 tempfile.tempdir = cisTemp
  43
  44 goDict = {}
  45 annotDict = {}
  46 global cache
  47 cache = {}
  48
  49
  50 def cacheGeneDB(genome):
  51     """ save a copy of a genome's gene database to a local cache.
  52     """
  53     if genome not in cache:
  54         try:
  55             tempgen = "%s.db" % tempfile.mktemp()
  56             shutil.copyfile(geneDB[genome], tempgen)
  57             cache[genome] = tempgen
  58         except:
  59             print "could not cache genome %s" % genome
  60     else:
  61         tempgen = cache[genome]
  62
  63     return tempgen
  64
  65
  66 def uncacheGeneDB(genome=""):
  67     """ remove the local copy of a genome's gene database.
  68     """
  69     global cache
  70     if genome in cache:
  71         try:
  72             os.remove(cache[genome])
  73         except:
  74             print "could not delete %s" % cache[genome]
  75
  76         del cache[genome]
  77     else:
  78         for gen in cache:
  79             try:
  80                 os.remove(cache[gen])
  81             except:
  82                 print "could not delete %s" % cache[gen]
  83
  84         cache = {}
  85
  86
  87 def cachedGenomes():
  88     """ return lists of genomes with a gene database in the local cache.
  89     """
  90     return cache.keys()
  91
  92
  93 def chooseDB(genome, dbfile=""):
  94     """ helper function to use genome's gene database from the local cache if present.
  95     """
  96     global cache
  97     if dbfile == "" and genome in cache:
  98         dbfile = cache[genome]
  99
 100     return dbfile
 101
 102
 103 def readChromosome(genome, chrom, db=""):
 104     """ return sequence for entire chromosome
 105     """
 106     aGenome = Genome(genome, chrom, dbFile=chooseDB(genome, db))
 107     return aGenome.getChromosomeSequence()
 108
 109
 110 def getGenomeEntries(genome, db=""):
 111     """ return the entries for a given genome.
 112     """
 113     global cache
 114     if db == "" and genome in cache:
 115         db = cache[genome]
 116
 117     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 118     return (genome, aGenome.allGIDs())
 119
 120
 121 def getGenomeGeneIDs(genome, db=""):
 122     """ return the entries for a given genome.
 123     """
 124     global cache
 125     if db == "" and genome in cache:
 126         db = cache[genome]
 127
 128     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 129     return aGenome.allGIDs()
 130
 131
 132 def getChromoGeneEntries(chromosome, lowerbound=-1, upperbound=-1, db=""):
 133     """ return the entries for a given chromosome.
 134     """
 135     (genome, chromID) = chromosome
 136     aGenome = Genome(genome, chromID, dbFile=chooseDB(genome, db))
 137     return aGenome.chromGeneEntries(chromID, lowerbound, upperbound)
 138
 139
 140 def getChromosomeNames(genome, db="", partition=1, slice=0):
 141     """ return the chromosomes for a given genome.
 142     """
 143     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 144     return aGenome.allChromNames(partition, slice)
 145
 146
 147 def geneEntry(geneID, db="", version="1"):
 148     """ returns (chrom, start, stop, length, sense) for a given geneID
 149     """
 150     genome = geneID[0]
 151     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 152     return aGenome.geneInfo(geneID, version)
 153
 154
 155 def compNT(nt):
 156     """ returns the complementary basepair to base nt
 157     """
 158     compDict = {"A": "T", "T": "A",
 159                 "G": "C", "C": "G",
 160                 "S": "S",
 161                 "W": "W",
 162                 "R": "Y", "Y": "R",
 163                 "M": "K", "K": "M",
 164                 "H": "D", "D": "H",
 165                 "B": "V", "V": "B",
 166                 "N": "N",
 167                 "a": "t", "t": "a",
 168                 "g": "c", "c": "g",
 169                 "n": "n",
 170                 "z": "z"
 171     }
 172
 173     return compDict.get(nt, "N")
 174
 175
 176 def complement(sequence, length=-1):
 177     """ returns the complement of the sequence.
 178     """
 179     newSeq = ""
 180     seqLength = len(sequence)
 181     if length == seqLength or length < 0:
 182         seqList = list(sequence)
 183         seqList.reverse()
 184         return "".join(map(compNT, seqList))
 185
 186     for index in range(seqLength - 1,seqLength - length - 1, -1):
 187         try:
 188             newSeq += compNT(sequence[index])
 189         except:
 190             newSeq += "N"
 191
 192     return newSeq
 193
 194
 195 def upstreamToNextGene(geneID, radius, version="1", db=""):
 196     """ return distance to gene immediately upstream.
 197     """
 198     upstream = radius
 199     genome = geneID[0]
 200     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 201     try:
 202         if aGenome.checkGene(geneID):
 203             (chrom, start, stop, length, sense) = aGenome.geneInfo(geneID, version)
 204             if sense == "F":
 205                 upstream = aGenome.leftGeneDistance(geneID, upstream, version)
 206             else:
 207                 upstream = aGenome.rightGeneDistance(geneID, upstream, version)
 208     except:
 209         pass
 210
 211     return upstream
 212
 213
 214 def downstreamToNextGene(geneID, radius, version="1", db=""):
 215     """ return distance to gene immediately downstream.
 216     """
 217     downstream = radius
 218     genome = geneID[0]
 219     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 220
 221     try:
 222         if aGenome.checkGene(geneID):
 223             (chrom, start, stop, length, sense) = aGenome.geneInfo(geneID, version)
 224             if sense == "F":
 225                 downstream = aGenome.rightGeneDistance(geneID, downstream, version)
 226             else:
 227                 downstream = aGenome.leftGeneDistance(geneID, downstream, version)
 228     except:
 229         pass
 230
 231     return downstream
 232
 233
 234 def retrieveFeatures(match, radius, featureType="", db=""):
 235     """ return the features around a given match.
 236     """
 237     (chromosome, hit) = match
 238     (genome, chromID) = chromosome
 239     lowerboundHit = int(hit[0]) - int(radius)
 240     if lowerboundHit < 0:
 241         lowerboundHit = 0
 242
 243     aGenome = Genome(genome, chromID, dbFile=chooseDB(genome, db))
 244     results = aGenome.getFeaturesIntersecting(chromID, lowerboundHit, 2 * int(radius), featureType)
 245
 246     return results
 247
 248
 249 def retrieveSeqFeatures(geneID, upstream, cds, downstream, boundToNextGene = False, geneDB=""):
 250     """ retrieve CDS features upstream, all or none of the cds, and downstream of a geneID.
 251         Feature positions are normalized and truncated to local sequence coordinates.
 252     """
 253     results = []
 254     (genome, gID) = geneID
 255     aGenome = Genome(genome, dbFile=chooseDB(genome, geneDB))
 256     if True:
 257         seqstart = 0
 258         seqlen = 0
 259         if aGenome.checkGene(geneID):
 260             (chrom, start, stop, length, sense) = aGenome.geneInfo(geneID)
 261             if stop < start:
 262                 pos = stop
 263                 stop = start
 264                 start = pos
 265
 266             if sense == "F":
 267                 # figure out normalized seqstart and seqstop
 268                 if upstream > 0:
 269                     if boundToNextGene:
 270                         upstream = aGenome.leftGeneDistance(geneID, upstream)
 271
 272                     seqstart = start - upstream
 273                     if seqstart < 0:
 274                         seqstart = 0
 275                         upstream = start
 276
 277                     seqlen = upstream
 278
 279                 if cds > 0:
 280                     if seqlen == 0:
 281                         seqstart = start
 282
 283                     seqlen += length
 284
 285                 if downstream > 0:
 286                     if boundToNextGene:
 287                         downstream = aGenome.rightGeneDistance(geneID, downstream)
 288
 289                     if seqlen == 0:
 290                         seqstart = stop
 291
 292                     seqlen += downstream
 293
 294                 # process features
 295                 allresults = aGenome.getFeaturesIntersecting(chrom, seqstart, seqlen, "CDS")
 296                 for entry in allresults:
 297                     (fname, fversion, fchromosome, fstart, fstop, forientation, ftype) = entry
 298                     if fstop < fstart:
 299                         fstop = fstart
 300                         fstart = fstop
 301
 302                     forstart = fstart - seqstart       # normalize
 303                     if forstart < 0:                # truncate
 304                         forstart = 0
 305
 306                     forstop = fstop - seqstart # normalize
 307                     if forstop > seqlen:  # truncate
 308                         forstop = seqlen
 309
 310                     if (ftype, forstart, forstop, forientation) not in results:
 311                         results.append((ftype, forstart, forstop, forientation))
 312             else:
 313                 # figure out normalized seqstart and seqstop
 314                 if upstream > 0:
 315                     if boundToNextGene:
 316                         upstream = aGenome.rightGeneDistance(geneID, upstream)
 317
 318                     seqstart = stop + upstream
 319                     seqlen = upstream
 320
 321                 if cds > 0:
 322                     if seqlen == 0:
 323                         seqstart = stop
 324
 325                     seqlen += length
 326
 327                 if downstream > 0:
 328                     if boundToNextGene:
 329                         downstream = aGenome.leftGeneDistance(geneID, downstream)
 330
 331                     if seqlen == 0:
 332                         seqstart = start
 333
 334                     seqlen += downstream
 335
 336                 # process features
 337                 allresults = aGenome.getFeaturesIntersecting(chrom, seqstart - seqlen, seqlen, "CDS")
 338                 for entry in allresults:
 339                     (fname, fversion, fchromosome, fstart, fstop, forientation, ftype) = entry
 340                     if fstop < fstart:
 341                         fstop = fstart
 342                         fstart = fstop
 343
 344                     revstart = seqstart - fstop
 345                     if revstart < 0:
 346                         revstart = 0
 347
 348                     revstop = seqstart - fstart
 349                     if revstop > seqlen:
 350                         fstop = seqlen
 351
 352                     if (ftype, revstart, revstop, forientation) not in results:
 353                         results.append((ftype, revstart, revstop, forientation))
 354     else:
 355         pass
 356
 357     return results
 358
 359
 360 def getFeaturesIntersecting(genome, chrom, start, length, db="", ftype="CDS"):
 361     """ return features of type ftype that fall within the given region.
 362     """
 363     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 364     return aGenome.getFeaturesIntersecting(chrom, start, length, ftype)
 365
 366
 367 def retrieveSequence(genome, chrom, start, stop, sense="F", db=""):
 368     """ retrieve a sequence given a genome, chromosome, start, stop, and sense.
 369     """
 370     entrySeq = ""
 371     length = abs(stop - start) + 1
 372     try:
 373         aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 374         if sense == "F":
 375             if start  < 1:
 376                 seqStart  = 0
 377             else:
 378                 seqStart  = start - 1
 379
 380             sequence = aGenome.sequence(chrom, seqStart, length)
 381             entrySeq = sequence
 382         else:
 383             seqStart  = stop - 1
 384             entrySeq= aGenome.sequence(chrom, seqStart, length)
 385
 386     except IOError:
 387         print "Couldn't retrieve sequence %s %s %s %s %s" % (genome, chrom, start, stop, sense)
 388
 389     return entrySeq
 390
 391
 392 def retrieveCDS(geneID, maskCDS=False, maskLower=False, db="", version="1"):
 393     """ retrieveCDS() - retrieve a sequence given a gene identifier
 394     """
 395     entrySeq = ""
 396     genome = geneID[0]
 397     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 398     try:
 399         if aGenome.checkGene(geneID):
 400             entrySeq = aGenome.geneSeq(geneID, maskCDS, maskLower, version)
 401     except IOError:
 402         print "Could not find %s " % str(geneID)
 403
 404     return entrySeq
 405
 406
 407 def retrieveUpstream(geneID, upstream, maskCDS=False, maskLower=False, boundToNextGene=False, db="", version="1"):
 408     """ retrieve sequence 5' of cds of length upstream for a given a gene identifier
 409     """
 410     entrySeq = ""
 411     genome = geneID[0]
 412     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 413     try:
 414         if aGenome.checkGene(geneID):
 415             (chrom, start, stop, length, sense) = aGenome.geneInfo(geneID, version)
 416             if sense == "F":
 417                 if boundToNextGene:
 418                     upstream = aGenome.leftGeneDistance(geneID, upstream, version)
 419
 420                 if (start - upstream) > 1:
 421                     seqStart  = start - upstream - 1
 422                     seqLength = upstream
 423                 else:
 424                     seqStart  = 0
 425                     seqLength = upstream
 426             else:
 427                 if boundToNextGene:
 428                     upstream = aGenome.rightGeneDistance(geneID, upstream, version)
 429
 430                 seqStart  = stop
 431                 seqLength = upstream
 432
 433             sequence = aGenome.sequence(chrom, seqStart, seqLength, maskCDS, maskLower)
 434             # do CDS masking here....
 435             if sense == "F":
 436                 entrySeq = sequence
 437             else:
 438                 entrySeq = complement(sequence, upstream)
 439
 440     except IOError:
 441         print "Couldn't find ", geneID
 442
 443     return entrySeq
 444
 445
 446 def retrieveDownstream(geneID, downstream, maskCDS=False, maskLower=False, boundToNextGene=False, db="", version="1"):
 447     """ retrieve sequence 3' of CDS of length downstream for a given a gene identifier
 448     """
 449     entrySeq = ""
 450     genome = geneID[0]
 451     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 452     if True:
 453         if aGenome.checkGene(geneID):
 454             (chrom, start, stop, length, sense) = aGenome.geneInfo(geneID, version)
 455             if sense == "F":
 456                 if boundToNextGene:
 457                     downstream = aGenome.rightGeneDistance(geneID, downstream, version)
 458
 459                 seqStart  = stop - 1
 460                 seqLength = downstream + 1
 461             else:
 462                 if boundToNextGene:
 463                     downstream = aGenome.leftGeneDistance(geneID, downstream, version)
 464
 465                 if (start - downstream) > 1:
 466                     seqStart  = start - downstream
 467                     seqLength = downstream
 468                 else:
 469                     seqStart  = 0
 470                     seqLength = stop
 471
 472             sequence = aGenome.sequence(chrom, seqStart, seqLength, maskCDS, maskLower)
 473             # do CDS masking here
 474             if sense == "F":
 475                 entrySeq = sequence
 476             else:
 477                 entrySeq = complement(sequence, downstream)
 478
 479     return entrySeq
 480
 481
 482 def retrieveSeq(geneID, upstream, cds, downstream, geneDB="", maskLower = False, boundToNextGene = False, version="1"):
 483     """ retrieve upstream, all or none of the cds, and downstream of a geneID
 484     """
 485     geneSeq = ""
 486     if int(cds) == 2:
 487         maskCDS = True
 488     else:
 489         maskCDS = False
 490
 491     if upstream > 0:
 492         geneSeq += retrieveUpstream(geneID, upstream, maskCDS, maskLower, boundToNextGene, geneDB, version)
 493
 494     if cds > 0:
 495         geneSeq += retrieveCDS(geneID, maskCDS, maskLower, geneDB, version)
 496
 497     if downstream > 0:
 498         geneSeq += retrieveDownstream(geneID, downstream, maskCDS, maskLower, boundToNextGene, geneDB, version)
 499
 500     if len(geneSeq) == 0:
 501         print "retrieveSeq Warning: retrieved null sequence for %s: %s (splice form %s) from geneDB %s" % (geneID[0], geneID[1], version, geneDB)
 502
 503     return geneSeq
 504
 505
 506 def retrieveAll(genome, genes, upstream, downstream, outputFilePath):
 507     """ retrieve set of upstream and downstrean sequences for a list of genes in a genome and save them to a file.
 508     """
 509     outFile = open(outputFilePath, "w")
 510     for gene in genes:
 511         print "Processing " , gene
 512         outFile.write("> %s \n" % (gene))
 513         geneID = (genome, gene)
 514         outFile.write("%s\n" % retrieveSeq(geneID, upstream, 0, downstream))
 515
 516     outFile.close()
 517
 518
 519 def fasta(geneID, seq):
 520     """ write a fasta formated seq with geneID in the header.
 521     """
 522     fastaString = "> %s-%s\n%s\n" % (geneID[0],geneID[1], seq)
 523
 524     return fastaString
 525
 526
 527 def loadGOInfo(genome, db=""):
 528     """ load GO for a given genome
 529     """
 530     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 531     if genome not in goDict.keys():
 532         goDict[genome] = aGenome.allGOInfo()
 533
 534
 535 def getGOInfo(geneID, db=""):
 536     """ retrieve GO info for geneID
 537     """
 538     (genome, locus) = geneID
 539     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 540     try:
 541         return aGenome.goInfo(geneID)
 542     except:
 543         return []
 544
 545
 546 def getGOIDCount(genome, GOID, db=""):
 547     """ retrieve count of genes with a particular GOID.
 548     """
 549     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 550     try:
 551         return aGenome.getGOIDCount(GOID)
 552     except:
 553         return []
 554
 555
 556 def allGOTerms(genome, db=""):
 557     """ return all GO Terms.
 558     """
 559     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 560     try:
 561         return aGenome.allGOterms()
 562     except:
 563         return []
 564
 565
 566 def getAllGOInfo(genome, db=""):
 567     """ return all GO Info.
 568     """
 569     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 570     try:
 571         return aGenome.allGoInfo()
 572     except:
 573         return []
 574
 575
 576 def loadAnnotInfo(genome, db=""):
 577     """ load Annotations for a given genome
 578     """
 579     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 580     if genome not in annotDict.keys():
 581         annotDict[genome] = aGenome.allAnnotInfo()
 582
 583
 584 def getAnnotInfo(geneID, db=""):
 585     """ retrieve Annotations for a given geneID
 586     """
 587     (genome, locus) = geneID
 588     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 589     try:
 590         return aGenome.annotInfo(geneID)
 591     except:
 592         return []
 593
 594
 595 def getAllAnnotInfo(genome, db=""):
 596     """ return all Annotation Info.
 597     """
 598     aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 599     try:
 600         return aGenome.allAnnotInfo()
 601     except:
 602         return []
 603
 604
 605 def sanitize(inSeq, windowSize=16):
 606     """ make sure that very simple repeats are out of the sequence.
 607         Soft-mask any window that has windowSize - 2 of mononucleotides
 608         and (windowSize / 2) - 1 non-GC dinucleotides.
 609     """
 610     seqlen = len(inSeq)
 611     outSeq = list(inSeq.upper())
 612     winmin2 = windowSize - 2
 613     winhalf = windowSize/2 - 1
 614     for pos in range(seqlen - windowSize):
 615         window = inSeq[pos:pos + windowSize].upper()
 616         if window.count("A") > winmin2 or window.count("C") > winmin2 or window.count("G") > winmin2 or window.count("T") > winmin2:
 617             for index in range(windowSize):
 618                 outSeq[pos + index] = outSeq[pos + index].lower()
 619
 620         if window.count("AC") >= winhalf or window.count("AG") >= winhalf or window.count("AT") >= winhalf or window.count("CT") >= winhalf or window.count("GT") >= winhalf or window.count("TA") >= winhalf or window.count("TC") >= winhalf or window.count("TG") >= winhalf or window.count("GA") > winhalf or window.count("CA") > winhalf:
 621             for index in range(windowSize):
 622                 outSeq[pos + index] = outSeq[pos + index].lower()
 623
 624     return "".join(outSeq)
 625
 626
 627 def featuresIntersecting(genome, posList, radius, ftype, name="", chrom="", version="", db="", extendGen="", replaceMod=False):
 628     """ returns a dictionary of matching features to positions of the double form (chromosome, position).
 629         Only positions with features within radius are returned.
 630     """
 631     resultDict = {}
 632     if extendGen != "":
 633         aGenome = Genome(genome, dbFile=chooseDB(genome, db), inRAM=True)
 634         aGenome.extendFeatures(extendGen, replace = replaceMod)
 635     else:
 636         aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 637
 638     features = aGenome.getFeatures(ftype, name, chrom, version)
 639     if len(posList) < 1 or len(features) < 1:
 640         return resultDict
 641
 642     chromList = features.keys()
 643     for (chrom, pos) in posList:
 644         tempList = []
 645         if chrom not in chromList:
 646             continue
 647
 648         for (name, version, chromosome, start, stop, orientation, atype) in features[chrom]:
 649             if (pos + radius) < start or (pos - radius) > stop:
 650                 continue
 651
 652             tempList.append((name, version, chromosome, start, stop, orientation, atype))
 653
 654         if len(tempList) > 0:
 655             resultDict[(chrom, pos)] = tempList
 656
 657     return resultDict
 658
 659
 660 def genesIntersecting(genome, posList, name="", chrom="", version="", db="", flank=0, extendGen="", replaceMod=False):
 661     """ returns a dictionary of matching genes to positions of the double form (chromosome, position).
 662         Only positions with features within radius are returned.
 663     """
 664     resultDict = {}
 665     if extendGen != "":
 666         aGenome = Genome(genome, dbFile=chooseDB(genome, db), inRAM=True)
 667         aGenome.extendFeatures(extendGen, replace = replaceMod)
 668     else:
 669         aGenome = Genome(genome, dbFile=chooseDB(genome, db))
 670
 671     genes = aGenome.getallGeneInfo(name, chrom, version)
 672     if len(posList) < 1 or len(genes) < 1:
 673         return resultDict
 674
 675     chromList = genes.keys()
 676     for (chrom, pos) in posList:
 677         tempList = []
 678         if chrom not in chromList:
 679             continue
 680
 681         for (name, chromosome, start, stop, orientation) in genes[chrom]:
 682             if start-flank <= pos <= stop+flank:
 683                 tempList.append((name, "noversion", chromosome, start, stop, orientation))
 684
 685         if len(tempList) > 0:
 686             resultDict[(chrom, pos)] = tempList
 687
 688     return resultDict