first pass cleanup of cistematic/genomes; change bamPreprocessing
[erange.git] / checkrmask.py
index 9f58983195902265c5981d720487a4283d1ca870..8ae30ee958dd10fc0d7e598b7bb25fa4b4ed17c2 100755 (executable)
@@ -4,12 +4,14 @@ try:
 except:
     pass
 
-import sqlite3 as sqlite
-import sys, string, optparse
+import sys
+import string
+import optparse
 import os.path
-from commoncode import writeLog
+import sqlite3 as sqlite
+from commoncode import writeLog, getConfigParser, getConfigOption, getConfigIntOption
 
-versionString = "%prog: version 3.5"
+versionString = "checkrmask: version 3.6"
 print versionString
 
 
@@ -19,11 +21,7 @@ def main(argv=None):
 
     usage = "usage: python %prog dbfile infile outfile goodfile [--startField field] [--cache numPages] [--log logfile]"
 
-    parser = optparse.OptionParser(usage=usage)
-    parser.add_option("--cache", type="int", dest="cachePages")
-    parser.add_option("--startField", type="int", dest="startField")
-    parser.add_option("--log", dest="logfilename")
-    parser.set_defaults(cachePages=500000, startField=0, logfilename=None)
+    parser = makeParser(usage)
     (options, args) = parser.parse_args(argv[1:])
 
     if len(args) < 4:
@@ -38,30 +36,34 @@ def main(argv=None):
     checkrmask(dbfile, filename, outfile, goodfile, options.startField, options.cachePages, options.logfilename)
 
 
-def checkrmask(dbfile, filename, outFileName, goodFileName, startField=0, cachePages=500000, logfilename=None):
+def makeParser(usage=""):
+    parser = optparse.OptionParser(usage=usage)
+    parser.add_option("--cache", type="int", dest="cachePages")
+    parser.add_option("--startField", type="int", dest="startField")
+    parser.add_option("--log", dest="logfilename")
 
-    outfile = open(outFileName, "w")
-    goodfile = open(goodFileName, "w")
-    if startField < 0:
-        startField = 0
+    configParser = getConfigParser()
+    section = "checkrmask"
+    cachePages = getConfigIntOption(configParser, section, "cachePages", 500000)
+    startField = getConfigIntOption(configParser, section, "startField", 0)
+    logfilename = getConfigOption(configParser, section, "logfilename", None)
 
-    if cachePages < 250000:
-        cachePages = 250000
+    parser.set_defaults(cachePages=cachePages, startField=startField, logfilename=logfilename)
 
-    doLog = False
-    if logfilename is not None:
-        writeLog(logfilename, versionString, string.join(sys.argv[1:]))
-        doLog = True
+    return parser
+
+
+def checkrmask(dbfile, filename, outFileName, goodFileName, startField=0, cachePages=500000, logfilename=None):
 
-    infile = open(filename)
     if os.path.isfile(dbfile):
-        db = sqlite.connect(dbfile)
-        sql = db.cursor()
-        sql.execute("PRAGMA CACHE_SIZE = %d" % cachePages)
-        sql.execute("PRAGMA temp_store = MEMORY")
+        checkrmaskdb(dbfile, filename, outFileName, goodFileName, startField, cachePages, logfilename)
     else:
+        outfile = open(outFileName, "w")
+        goodfile = open(goodFileName, "w")
+        infile = open(filename)
         print "No database - passing through"
-        if doLog:
+        if logfilename is not None:
+            writeLog(logfilename, versionString, string.join(sys.argv[1:]))
             writeLog(logfilename, versionString, "No database - passing through")
 
         for line in infile:
@@ -70,7 +72,26 @@ def checkrmask(dbfile, filename, outFileName, goodFileName, startField=0, cacheP
 
         outfile.close()
         goodfile.close()
-        sys.exit(0)
+
+
+def checkrmaskdb(dbfile, filename, outFileName, goodFileName, startField=0, cachePages=500000, logfilename=None):
+
+    outfile = open(outFileName, "w")
+    goodfile = open(goodFileName, "w")
+    if startField < 0:
+        startField = 0
+
+    if cachePages < 250000:
+        cachePages = 250000
+
+    if logfilename is not None:
+        writeLog(logfilename, versionString, string.join(sys.argv[1:]))
+
+    infile = open(filename)
+    db = sqlite.connect(dbfile)
+    sql = db.cursor()
+    sql.execute("PRAGMA CACHE_SIZE = %d" % cachePages)
+    sql.execute("PRAGMA temp_store = MEMORY")
 
     featureList = []
     featureDict = {}
@@ -177,7 +198,7 @@ def checkrmask(dbfile, filename, outFileName, goodFileName, startField=0, cacheP
             outfile.write(outline + "\n")
 
         if len(finalresults) == 0:
-            outline = "%s\tNR\tNR\t%0.00" % line
+            outline = "%s\tNR\tNR\t0.00" % line
             print outline
             outfile.write(outline + "\n")
 
@@ -186,4 +207,4 @@ def checkrmask(dbfile, filename, outFileName, goodFileName, startField=0, cacheP
 
 
 if __name__ == "__main__":
-    main(sys.argv)
\ No newline at end of file
+    main(sys.argv)