From 9c5e5fb4495f21ae6a1fafd98d746aea3b2ad72b Mon Sep 17 00:00:00 2001 From: Diane Trout Date: Thu, 17 Oct 2013 14:12:05 -0700 Subject: [PATCH] Implement a wrapper around bigWigInfo to report bigWig summary info. (shocking name there I know) --- htsworkflow/util/test/__init__.py | 4 ++ htsworkflow/util/test/test_ucsc.py | 29 ++++++++++ htsworkflow/util/test/testdata/foo.bigWig | Bin 0 -> 18938 bytes htsworkflow/util/ucsc.py | 67 ++++++++++++++++++++++ 4 files changed, 100 insertions(+) create mode 100644 htsworkflow/util/test/test_ucsc.py create mode 100644 htsworkflow/util/test/testdata/foo.bigWig create mode 100644 htsworkflow/util/ucsc.py diff --git a/htsworkflow/util/test/__init__.py b/htsworkflow/util/test/__init__.py index e69de29..fdc2a2a 100644 --- a/htsworkflow/util/test/__init__.py +++ b/htsworkflow/util/test/__init__.py @@ -0,0 +1,4 @@ +import os + +TEST_CODE_DIR = os.path.split(__file__)[0] +TEST_DATA_DIR = os.path.join(TEST_CODE_DIR, 'testdata') diff --git a/htsworkflow/util/test/test_ucsc.py b/htsworkflow/util/test/test_ucsc.py new file mode 100644 index 0000000..05a64ba --- /dev/null +++ b/htsworkflow/util/test/test_ucsc.py @@ -0,0 +1,29 @@ +"""Test wrappers around ucsc file formats +""" +import os +from unittest2 import TestCase +from htsworkflow.util.test import TEST_DATA_DIR +from htsworkflow.util.ucsc import bigWigInfo + +from distutils.spawn import find_executable + +class TestUCSC(TestCase): + def test_bigwig_info(self): + if not find_executable('bigWigInfo'): + self.skipTest('Need bigWigInfo on path to test') + + filename = os.path.join(TEST_DATA_DIR, 'foo.bigWig') + info = bigWigInfo(filename) + self.assertEqual(info.version, 4) + self.assertEqual(info.isCompressed, True) + # what should i do for byteswapped arch? + self.assertEqual(info.isSwapped, True) + self.assertEqual(info.primaryDataSize, 48) + self.assertEqual(info.primaryIndexSize, 6204) + self.assertEqual(info.zoomLevels, 2) + self.assertEqual(info.basesCovered, 30) + self.assertAlmostEqual(info.mean, 0.0) + self.assertAlmostEqual(info.min, -5.5) + self.assertAlmostEqual(info.max, 5.5) + self.assertAlmostEqual(info.std, 4.567501) + diff --git a/htsworkflow/util/test/testdata/foo.bigWig b/htsworkflow/util/test/testdata/foo.bigWig new file mode 100644 index 0000000000000000000000000000000000000000..98090a8985baf33f73b178446e19766d6a7fd2e4 GIT binary patch literal 18938 zcmeI)ze@u#7{>9p*J}|hZWct4idtMnLBYWex{I6OaBv_Ui!KK)B7zQr2nrSa0~}mj zo#GErjw+6h-PA=laVY4eN#HElz3?rxZ_=diwI6PKNXxCmeP2AOi+McQ`fY2v)ZAp- z4MZj~W;`=w>Z+~BzL}p)nR;dGh?&~gqB4VG#lB(6+fL=^AdYlv-KH;j+RK>r_B7sGey!5DQ-xv}ywsXw(~GUy-A|Ft_54EfqB^ddy3gsx zuUXSRmt55Sez%^nJJWlf&32izm4W~Q2q1s}0tg_000IagfB*srAb< literal 0 HcmV?d00001 diff --git a/htsworkflow/util/ucsc.py b/htsworkflow/util/ucsc.py new file mode 100644 index 0000000..e9ff77e --- /dev/null +++ b/htsworkflow/util/ucsc.py @@ -0,0 +1,67 @@ +"""Wrap ucsc command line utilities +""" + +import logging +import os +from subprocess import Popen, PIPE + +LOGGER = logging.getLogger(__name__) + +def parseNumber(number): + buffer = [] + isFloat = False + for n in number: + if n == ',': + continue + if n == '.': + isFloat = True + buffer.append(n) + else: + buffer.append(n) + if isFloat: + return float(''.join(buffer)) + else: + return int(''.join(buffer)) + +def parseBoolean(value): + if value.lower() in ('yes', '1', 'true'): + return True + elif value.lower() in ('no', '0', 'false'): + return False + +class bigWigInfo: + def __init__(self, filename=None): + self.version = None + self.isCompressed = None + self.isSwapped = None + self.primaryDataSize = None + self.primaryIndexSize = None + self.zoomLevels = None + self.chromCount = None + self.basesCovered = None + self.mean = None + self.min = None + self.max = None + self.std = None + self.filename = None + if filename: + self.scan_file(filename) + self.filename = filename + + def scan_file(self, filename): + cmd = ['bigWigInfo', + filename] + p = Popen(cmd, stdout=PIPE) + stdout, _ = p.communicate() + for line in stdout.split(os.linesep): + if len(line) > 0: + term, value = line.split(': ') + if term in ('isCompressed', 'isSwapped'): + value = parseBoolean(value) + else: + value = parseNumber(value) + LOGGER.debug('%s: %s', term, str(value)) + setattr(self, term, value) + + + -- 2.30.2