nilmdb/test/speed-readascii.py

68 lines
1.9 KiB
Python
Raw Permalink Normal View History

#!/usr/bin/python
from printf import printf
import time
import re
import numpy as np
import itertools
class Timer():
def __init__(self, arg):
self.arg = arg
def __enter__(self): self.start = time.time()
def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))
def test_split():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(i) for i in line.partition('#')[0].split() ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))
def test_split2():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))
def test_regex():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))
def test_bigregex():
regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
for n, line in enumerate(open('1m.raw', 'r')):
out = [ int(x or 0) for x in re.match(regex, line).groups() ]
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))
def test_numpy():
out = np.genfromtxt(open('1m.raw', 'r'),
dtype = np.dtype('i2,i2,i2,i2,i2,i2'))
with Timer("numpy"):
test_numpy() # 106k/sec
with Timer("regex"):
test_regex() # 121k/sec
with Timer("split"):
test_split() # 219k/sec
with Timer("split2"):
test_split2() # 328k/sec
with Timer("bigregex"):
test_bigregex() # 130k/sec
# The "int" operation takes quite a while -- int(x,10) is twice as fast
# Perl does about 500k/sec