#!/usr/bin/python from printf import printf import time import re import numpy as np import itertools import struct import array import os import mmap class Timer(): def __init__(self, arg): self.arg = arg def __enter__(self): self.start = time.time() def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start)) def test_struct1(): """read with struct.unpack""" f = open('1m.bin', 'rb') f.seek(0,os.SEEK_END) filesize = f.tell() f.seek(0,os.SEEK_SET) packer = struct.Struct('!dHHHHHH') items = filesize / packer.size for n in xrange(items): s = f.read(packer.size) out = packer.unpack(s) if (n % 100000 == 0): printf("line %d = %s\n", n, str(out)) def test_struct2(): """read with struct.unpack, convert to string""" f = open('1m.bin', 'rb') f.seek(0,os.SEEK_END) filesize = f.tell() f.seek(0,os.SEEK_SET) packer = struct.Struct('!dHHHHHH') items = filesize / packer.size for n in xrange(items): s = f.read(packer.size) out = packer.unpack(s) x = str(out) if (n % 100000 == 0): printf("line %d = %s\n", n, str(out)) def test_mmap(): """struct.unpack with mmap""" with open('1m.bin', 'rb') as f: f.seek(0,os.SEEK_END) filesize = f.tell() f.seek(0,os.SEEK_SET) m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ) packer = struct.Struct('!dHHHHHH') items = filesize / packer.size for n in xrange(items): out = packer.unpack(m[packer.size*n : packer.size*(n+1)]) if (n % 100000 == 0): printf("line %d = %s\n", n, str(out)) with Timer("mmap"): test_mmap() # 1600k with Timer("struct1"): test_struct1() # 1460k with Timer("struct2"): test_struct2() # 210k # Reading from the file is again much quicker than converting to string # Use mmap, it's good