-
-
-def buildHsapiensDB(db=geneDB, downloadDir="%s/download" % cisRoot):
- genePath = "%s/seq_gene.md" % downloadDir # ftp://ftp.ncbi.nih.gov/genomes/H_sapiens/mapview/seq_gene.md.gz
- goDefPath = "%s/GO.terms_and_ids" % downloadDir # ftp://ftp.geneontology.org/go/doc/GO.terms_and_ids
- goPath = "%s/gene2go" % downloadDir # ftp://ftp.ncbi.nih.gov/gene/gene2go.gz
- # chromosomes are from UCSC - will ignore all the alternative haplotypes, chrUn, and random chromosomes
- chromDict = {"1": "%s/chr1.fa" % downloadDir,
- "2": "%s/chr2.fa" % downloadDir,
- "3": "%s/chr3.fa" % downloadDir,
- "4": "%s/chr4.fa" % downloadDir,
- "5": "%s/chr5.fa" % downloadDir,
- "6": "%s/chr6.fa" % downloadDir,
- "7": "%s/chr7.fa" % downloadDir,
- "8": "%s/chr8.fa" % downloadDir,
- "9": "%s/chr9.fa" % downloadDir,
- "10": "%s/chr10.fa" % downloadDir,
- "11": "%s/chr11.fa" % downloadDir,
- "12": "%s/chr12.fa" % downloadDir,
- "13": "%s/chr13.fa" % downloadDir,
- "14": "%s/chr14.fa" % downloadDir,
- "15": "%s/chr15.fa" % downloadDir,
- "16": "%s/chr16.fa" % downloadDir,
- "17": "%s/chr17.fa" % downloadDir,
- "18": "%s/chr18.fa" % downloadDir,
- "19": "%s/chr19.fa" % downloadDir,
- "20": "%s/chr20.fa" % downloadDir,
- "21": "%s/chr21.fa" % downloadDir,
- "22": "%s/chr22.fa" % downloadDir,
- "X": "%s/chrX.fa" % downloadDir,
- "Y": "%s/chrY.fa" % downloadDir
- }
-
- print "Creating database %s" % db
- createDBFile(db)
-
- print "Adding gene entries"
- loadGeneEntries(db, genePath, chromDict)
-
- print "Adding gene features"
- loadGeneFeatures(db, genePath, chromDict)
-
- print "Adding gene annotations"
- loadGeneAnnotations(db)
-
- print "Adding gene ontology"
- loadGeneOntology(db, goPath, goDefPath)
-
- for chromID in chromDict.keys():
- print "Loading chromosome %s" % chromID
- loadChromosome(db, chromID, chromDict[chromID], "/H_sapiens/chromo%s.bin" % chromID)
-
- print "Creating Indices"
- createDBindices(db)
-
- print "Finished creating database %s" % db