Implement a wrapper around bigWigInfo to report bigWig summary info.
authorDiane Trout <diane@ghic.org>
Thu, 17 Oct 2013 21:12:05 +0000 (14:12 -0700)
committerDiane Trout <diane@ghic.org>
Thu, 17 Oct 2013 21:12:05 +0000 (14:12 -0700)
(shocking name there I know)

htsworkflow/util/test/__init__.py
htsworkflow/util/test/test_ucsc.py [new file with mode: 0644]
htsworkflow/util/test/testdata/foo.bigWig [new file with mode: 0644]
htsworkflow/util/ucsc.py [new file with mode: 0644]

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..fdc2a2a6e263151ba6f068479fed8d073f170d12 100644 (file)
@@ -0,0 +1,4 @@
+import os
+
+TEST_CODE_DIR = os.path.split(__file__)[0]
+TEST_DATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata')
diff --git a/htsworkflow/util/test/test_ucsc.py b/htsworkflow/util/test/test_ucsc.py
new file mode 100644 (file)
index 0000000..05a64ba
--- /dev/null
@@ -0,0 +1,29 @@
+"""Test wrappers around ucsc file formats
+"""
+import os
+from unittest2 import TestCase
+from htsworkflow.util.test import TEST_DATA_DIR
+from htsworkflow.util.ucsc import bigWigInfo
+
+from distutils.spawn import find_executable
+
+class TestUCSC(TestCase):
+    def test_bigwig_info(self):
+        if not find_executable('bigWigInfo'):
+            self.skipTest('Need bigWigInfo on path to test')
+
+        filename = os.path.join(TEST_DATA_DIR, 'foo.bigWig')
+        info = bigWigInfo(filename)
+        self.assertEqual(info.version, 4)
+        self.assertEqual(info.isCompressed, True)
+        # what should i do for byteswapped arch?
+        self.assertEqual(info.isSwapped, True)
+        self.assertEqual(info.primaryDataSize, 48)
+        self.assertEqual(info.primaryIndexSize, 6204)
+        self.assertEqual(info.zoomLevels, 2)
+        self.assertEqual(info.basesCovered, 30)
+        self.assertAlmostEqual(info.mean, 0.0)
+        self.assertAlmostEqual(info.min, -5.5)
+        self.assertAlmostEqual(info.max, 5.5)
+        self.assertAlmostEqual(info.std, 4.567501)
+        
diff --git a/htsworkflow/util/test/testdata/foo.bigWig b/htsworkflow/util/test/testdata/foo.bigWig
new file mode 100644 (file)
index 0000000..98090a8
Binary files /dev/null and b/htsworkflow/util/test/testdata/foo.bigWig differ
diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py
new file mode 100644 (file)
index 0000000..e9ff77e
--- /dev/null
@@ -0,0 +1,67 @@
+"""Wrap ucsc command line utilities
+"""
+
+import logging
+import os
+from subprocess import Popen, PIPE
+
+LOGGER = logging.getLogger(__name__)
+
+def parseNumber(number):
+    buffer = []
+    isFloat = False
+    for n in number:
+        if n == ',':
+            continue
+        if n == '.':
+            isFloat = True
+            buffer.append(n)
+        else:
+            buffer.append(n)
+    if isFloat:
+        return float(''.join(buffer))
+    else:
+        return int(''.join(buffer))
+
+def parseBoolean(value):
+    if value.lower() in ('yes', '1', 'true'):
+        return True
+    elif value.lower() in ('no', '0', 'false'):
+        return False
+        
+class bigWigInfo:
+    def __init__(self, filename=None):
+        self.version = None
+        self.isCompressed = None
+        self.isSwapped = None
+        self.primaryDataSize = None
+        self.primaryIndexSize = None
+        self.zoomLevels = None
+        self.chromCount = None
+        self.basesCovered = None
+        self.mean = None
+        self.min = None
+        self.max = None
+        self.std = None
+        self.filename = None
+        if filename:
+            self.scan_file(filename)
+            self.filename = filename
+
+    def scan_file(self, filename):
+        cmd = ['bigWigInfo', 
+               filename]
+        p = Popen(cmd, stdout=PIPE)
+        stdout, _ = p.communicate()
+        for line in stdout.split(os.linesep):
+            if len(line) > 0:
+                term, value = line.split(': ')
+                if term in ('isCompressed', 'isSwapped'):
+                    value = parseBoolean(value)
+                else:
+                    value = parseNumber(value)
+                LOGGER.debug('%s: %s', term, str(value))
+                setattr(self, term, value)
+                
+                
+