nilmdb/test/speed-readascii.py

#!/usr/bin/python

from printf import printf
import time
import re
import numpy as np
import itertools

class Timer():
    def __init__(self, arg):
        self.arg = arg
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))

def test_split():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(i) for i in line.partition('#')[0].split() ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

def test_split2():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

def test_regex():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

def test_bigregex():
    regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [ int(x or 0) for x in re.match(regex, line).groups() ]
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

def test_numpy():
    out = np.genfromtxt(open('1m.raw', 'r'),
                        dtype = np.dtype('i2,i2,i2,i2,i2,i2'))
    
with Timer("numpy"):
    test_numpy() # 106k/sec

with Timer("regex"):
    test_regex() # 121k/sec

with Timer("split"):
    test_split() # 219k/sec

with Timer("split2"):
    test_split2() # 328k/sec

with Timer("bigregex"):
    test_bigregex() # 130k/sec

# The "int" operation takes quite a while -- int(x,10) is twice as fast
# Perl does about 500k/sec
add read/writebinary tests git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9734 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-27 13:10:49 -04:00			`#!/usr/bin/python`

add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00			`from printf import printf`
			`import time`
			`import re`
			`import numpy as np`
			`import itertools`

			`class Timer():`
			`def __init__(self, arg):`
			`self.arg = arg`
			`def __enter__(self): self.start = time.time()`
			`def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))`

			`def test_split():`
			`for n, line in enumerate(open('1m.raw', 'r')):`
			`out = [0]*6`
			`tmp = [ int(i) for i in line.partition('#')[0].split() ]`
			`out[0:len(tmp)] = tmp`
			`if (n % 100000 == 0):`
			`printf("line %d = %s\n", n, str(out))`

misc updates git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9733 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-27 12:35:18 -04:00			`def test_split2():`
			`for n, line in enumerate(open('1m.raw', 'r')):`
			`out = [0]*6`
			`tmp = [ int(i,10) for i in line.partition('#')[0].split() ]`
			`out[0:len(tmp)] = tmp`
			`if (n % 100000 == 0):`
			`printf("line %d = %s\n", n, str(out))`

add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00			`def test_regex():`
			`for n, line in enumerate(open('1m.raw', 'r')):`
			`out = [0]*6`
			`tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]`
			`out[0:len(tmp)] = tmp`
			`if (n % 100000 == 0):`
			`printf("line %d = %s\n", n, str(out))`

			`def test_bigregex():`
			`regex = re.compile('^(?:\s)' + '(?:(\d+)\s+)?' 6)`
			`for n, line in enumerate(open('1m.raw', 'r')):`
			`out = [ int(x or 0) for x in re.match(regex, line).groups() ]`
			`if (n % 100000 == 0):`
			`printf("line %d = %s\n", n, str(out))`

update test times git-svn-id: https://bucket.mit.edu/svn/nilm/tags/nilmdb-orig-tests@11322 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2012-10-22 17:50:11 -04:00			`def test_numpy():`
			`out = np.genfromtxt(open('1m.raw', 'r'),`
			`dtype = np.dtype('i2,i2,i2,i2,i2,i2'))`

			`with Timer("numpy"):`
			`test_numpy() # 106k/sec`

add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00			`with Timer("regex"):`
update test times git-svn-id: https://bucket.mit.edu/svn/nilm/tags/nilmdb-orig-tests@11322 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2012-10-22 17:50:11 -04:00			`test_regex() # 121k/sec`
add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00
			`with Timer("split"):`
update test times git-svn-id: https://bucket.mit.edu/svn/nilm/tags/nilmdb-orig-tests@11322 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2012-10-22 17:50:11 -04:00			`test_split() # 219k/sec`
misc updates git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9733 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-27 12:35:18 -04:00
			`with Timer("split2"):`
update test times git-svn-id: https://bucket.mit.edu/svn/nilm/tags/nilmdb-orig-tests@11322 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2012-10-22 17:50:11 -04:00			`test_split2() # 328k/sec`
add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00
			`with Timer("bigregex"):`
update test times git-svn-id: https://bucket.mit.edu/svn/nilm/tags/nilmdb-orig-tests@11322 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2012-10-22 17:50:11 -04:00			`test_bigregex() # 130k/sec`
add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00
misc updates git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9733 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-27 12:35:18 -04:00			`# The "int" operation takes quite a while -- int(x,10) is twice as fast`
add some speed tests for reading ASCII files git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@9680 ddd99763-3ecb-0310-9145-efcb8ce7c51f 2011-06-01 16:04:13 -04:00			`# Perl does about 500k/sec`