Compare commits

..

6 Commits

Author SHA1 Message Date
7429c1788d Update nilmdb.utils.time 2013-03-15 22:49:59 -04:00
0ef71c193b Remove layout.pyx, since rocket replaced it 2013-03-15 22:32:40 -04:00
4a50dd015e Merge branch 'python-intervals' 2013-03-15 21:39:11 -04:00
22274550ab Test python version of Interval too 2013-03-15 21:37:03 -04:00
4f06d6ae68 Move Interval set_difference inside nilmdb.utils for clients
Clients might need to to Interval math too, so move a simple Interval
class and start putting helpers in there.
2013-03-15 21:37:03 -04:00
c54d8041c3 Update design docs 2013-03-15 21:07:01 -04:00
11 changed files with 156 additions and 555 deletions

View File

@@ -186,6 +186,19 @@ IntervalSet speed
- rbtree and interval converted to cython: - rbtree and interval converted to cython:
8.4 μS, total 12 s, 134 MB RAM 8.4 μS, total 12 s, 134 MB RAM
- Would like to move Interval itself back to Python so other
non-cythonized code like client code can use it more easily.
Testing speed with just `test_interval` being tested, with
`range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
times recorded for 2097152:
- 52ae397 (Interval in cython):
12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
- 9759dcf (Interval in python):
21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
That's a huge difference! Instead, will keep Interval and DBInterval
cythonized inside nilmdb, and just have an additional copy in
nilmdb.utils for clients to use.
Layouts Layouts
------- -------
Current/old design has specific layouts: RawData, PrepData, RawNotchedData. Current/old design has specific layouts: RawData, PrepData, RawNotchedData.

View File

@@ -1,5 +1,9 @@
"""Interval, IntervalSet """Interval, IntervalSet
The Interval implemented here is just like
nilmdb.utils.interval.Interval, except implemented in Cython for
speed.
Represents an interval of time, and a set of such intervals. Represents an interval of time, and a set of such intervals.
Intervals are half-open, ie. they include data points with timestamps Intervals are half-open, ie. they include data points with timestamps
@@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
from ..utils.time import max_timestamp as nilmdb_max_timestamp from ..utils.time import max_timestamp as nilmdb_max_timestamp
from ..utils.time import timestamp_to_string from ..utils.time import timestamp_to_string
from ..utils.iterator import imerge from ..utils.iterator import imerge
from ..utils.interval import IntervalError
import itertools import itertools
cimport rbtree cimport rbtree
@@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t
ctypedef int64_t timestamp_t ctypedef int64_t timestamp_t
class IntervalError(Exception):
"""Error due to interval overlap, etc"""
pass
cdef class Interval: cdef class Interval:
"""Represents an interval of time.""" """Represents an interval of time."""
@@ -59,17 +60,7 @@ cdef class Interval:
def __cmp__(self, Interval other): def __cmp__(self, Interval other):
"""Compare two intervals. If non-equal, order by start then end""" """Compare two intervals. If non-equal, order by start then end"""
if not isinstance(other, Interval): return cmp(self.start, other.start) or cmp(self.end, other.end)
raise TypeError("bad type")
if self.start == other.start:
if self.end < other.end:
return -1
if self.end > other.end:
return 1
return 0
if self.start < other.start:
return -1
return 1
cpdef intersects(self, Interval other): cpdef intersects(self, Interval other):
"""Return True if two Interval objects intersect""" """Return True if two Interval objects intersect"""
@@ -313,63 +304,6 @@ cdef class IntervalSet:
else: else:
yield subset yield subset
def set_difference(self, IntervalSet other not None,
Interval bounds = None):
"""
Compute the difference (self \\ other) between this
IntervalSet and the given IntervalSet; i.e., the ranges
that are present in 'self' but not 'other'.
If 'bounds' is not None, results are limited to the range
specified by the interval 'bounds'.
Returns a generator that yields each interval in turn.
Output intervals are built as subsets of the intervals in the
first argument (self).
"""
# Iterate through all starts and ends in sorted order. Add a
# tag to the iterator so that we can figure out which one they
# were, after sorting.
def decorate(it, key_start, key_end):
for i in it:
yield i.start, key_start, i
yield i.end, key_end, i
if bounds is None:
bounds = Interval(nilmdb_min_timestamp,
nilmdb_max_timestamp)
self_iter = decorate(self.intersection(bounds), 0, 2)
other_iter = decorate(other.intersection(bounds), 1, 3)
# Now iterate over the timestamps of each start and end.
# At each point, evaluate which type of end it is, to determine
# how to build up the output intervals.
self_interval = None
other_interval = None
out_start = None
for (ts, k, i) in imerge(self_iter, other_iter):
if k == 0:
# start self interval
self_interval = i
if other_interval is None:
out_start = ts
elif k == 1:
# start other interval
other_interval = i
if out_start is not None and out_start != ts:
yield self_interval.subset(out_start, ts)
out_start = None
elif k == 2:
# end self interval
if out_start is not None and out_start != ts:
yield self_interval.subset(out_start, ts)
out_start = None
self_interval = None
elif k == 3:
# end other interval
other_interval = None
if self_interval:
out_start = ts
cpdef intersects(self, Interval other): cpdef intersects(self, Interval other):
"""Return True if this IntervalSet intersects another interval""" """Return True if this IntervalSet intersects another interval"""
for n in self.tree.intersect(other.start, other.end): for n in self.tree.intersect(other.start, other.end):

View File

@@ -1,204 +0,0 @@
# cython: profile=False
import time
import sys
import inspect
import cStringIO
from ..utils.time import min_timestamp as nilmdb_min_timestamp
cdef enum:
max_value_count = 64
cimport cython
cimport libc.stdlib
cimport libc.stdio
cimport libc.string
class ParserError(Exception):
def __init__(self, line, message):
self.message = "line " + str(line) + ": " + message
Exception.__init__(self, self.message)
class FormatterError(Exception):
pass
class Layout:
"""Represents a NILM database layout"""
def __init__(self, typestring):
"""Initialize this Layout object to handle the specified
type string"""
try:
[ datatype, count ] = typestring.split("_")
except:
raise KeyError("invalid layout string")
try:
self.count = int(count)
except ValueError:
raise KeyError("invalid count")
if self.count < 1 or self.count > max_value_count:
raise KeyError("invalid count")
if datatype == 'uint16':
self.parse = self.parse_uint16
self.format_str = "%.6f" + " %d" * self.count
self.format = self.format_generic
elif datatype == 'float32':
self.parse = self.parse_float64
self.format_str = "%.6f" + " %.6e" * self.count
self.format = self.format_generic
elif datatype == 'float64':
self.parse = self.parse_float64
self.format_str = "%.6f" + " %.16e" * self.count
self.format = self.format_generic
else:
raise KeyError("invalid type")
self.datatype = datatype
# Parsers
def parse_float64(self, char *text):
cdef int n
cdef double ts
# Return doubles even in float32 case, since they're going into
# a Python array which would upconvert to double anyway.
result = [0] * (self.count + 1)
cdef char *end
ts = libc.stdlib.strtod(text, &end)
if end == text:
raise ValueError("bad timestamp")
result[0] = ts
for n in range(self.count):
text = end
result[n+1] = libc.stdlib.strtod(text, &end)
if end == text:
raise ValueError("wrong number of values")
n = 0
while end[n] == ' ':
n += 1
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
raise ValueError("extra data on line")
return (ts, result)
def parse_uint16(self, char *text):
cdef int n
cdef double ts
cdef int v
cdef char *end
result = [0] * (self.count + 1)
ts = libc.stdlib.strtod(text, &end)
if end == text:
raise ValueError("bad timestamp")
result[0] = ts
for n in range(self.count):
text = end
v = libc.stdlib.strtol(text, &end, 10)
if v < 0 or v > 65535:
raise ValueError("value out of range")
result[n+1] = v
if end == text:
raise ValueError("wrong number of values")
n = 0
while end[n] == ' ':
n += 1
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
raise ValueError("extra data on line")
return (ts, result)
# Formatters
def format_generic(self, d):
n = len(d) - 1
if n != self.count:
raise ValueError("wrong number of values for layout type: "
"got %d, wanted %d" % (n, self.count))
return (self.format_str % tuple(d)) + "\n"
# Get a layout by name
def get_named(typestring):
try:
return Layout(typestring)
except KeyError:
compat = { "PrepData": "float32_8",
"RawData": "uint16_6",
"RawNotchedData": "uint16_9" }
return Layout(compat[typestring])
class Parser(object):
"""Object that parses and stores ASCII data for inclusion into the
database"""
def __init__(self, layout):
if issubclass(layout.__class__, Layout):
self.layout = layout
else:
try:
self.layout = get_named(layout)
except KeyError:
raise TypeError("unknown layout")
self.data = []
self.min_timestamp = None
self.max_timestamp = None
def parse(self, textdata):
"""
Parse the data, provided as lines of text, using the current
layout, into an internal data structure suitable for a
pytables 'table.append(parser.data)'.
"""
cdef double last_ts = nilmdb_min_timestamp
cdef double ts
cdef int n = 0, i
cdef char *line
indata = cStringIO.StringIO(textdata)
# Assume any parsing error is a real error.
# In the future we might want to skip completely empty lines,
# or partial lines right before EOF?
try:
self.data = []
for pyline in indata:
line = pyline
n += 1
if line[0] == '\#':
continue
(ts, row) = self.layout.parse(line)
if ts <= last_ts:
raise ValueError("timestamp is not "
"monotonically increasing")
last_ts = ts
self.data.append(row)
except (ValueError, IndexError, TypeError) as e:
raise ParserError(n, "error: " + e.message)
# Mark timestamp ranges
if len(self.data):
self.min_timestamp = self.data[0][0]
self.max_timestamp = self.data[-1][0]
class Formatter(object):
"""Object that formats database data into ASCII"""
def __init__(self, layout):
if issubclass(layout.__class__, Layout):
self.layout = layout
else:
try:
self.layout = get_named(layout)
except KeyError:
raise TypeError("unknown layout")
def format(self, data):
"""
Format raw data from the database, using the current layout,
as lines of ACSII text.
"""
text = cStringIO.StringIO()
try:
for row in data:
text.write(self.layout.format(row))
except (ValueError, IndexError, TypeError) as e:
raise FormatterError("formatting error: " + e.message)
return text.getvalue()

View File

@@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
from __future__ import absolute_import from __future__ import absolute_import
import nilmdb.utils import nilmdb.utils
from nilmdb.utils.printf import * from nilmdb.utils.printf import *
from nilmdb.server.interval import (Interval, DBInterval,
IntervalSet, IntervalError) from nilmdb.utils.interval import IntervalError
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
from nilmdb.server import bulkdata from nilmdb.server import bulkdata
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
@@ -328,7 +330,7 @@ class NilmDB(object):
List all intervals in 'path' between 'start' and 'end'. If List all intervals in 'path' between 'start' and 'end'. If
'diffpath' is not none, list instead the set-difference 'diffpath' is not none, list instead the set-difference
between the intervals in the two streams; i.e. all interval between the intervals in the two streams; i.e. all interval
ranges that are present in 'path' but not 'path2'. ranges that are present in 'path' but not 'diffpath'.
Returns (intervals, restart) tuple. Returns (intervals, restart) tuple.
@@ -350,7 +352,9 @@ class NilmDB(object):
requested = Interval(start, end) requested = Interval(start, end)
result = [] result = []
if diffpath: if diffpath:
getter = intervals.set_difference(diffintervals, requested) getter = nilmdb.utils.interval.set_difference(
intervals.intersection(requested),
diffintervals.intersection(requested))
else: else:
getter = intervals.intersection(requested) getter = intervals.intersection(requested)
for n, i in enumerate(getter): for n, i in enumerate(getter):

View File

@@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
import nilmdb.utils.fallocate import nilmdb.utils.fallocate
import nilmdb.utils.time import nilmdb.utils.time
import nilmdb.utils.iterator import nilmdb.utils.iterator
import nilmdb.utils.interval

106
nilmdb/utils/interval.py Normal file
View File

@@ -0,0 +1,106 @@
"""Interval. Like nilmdb.server.interval, but re-implemented here
in plain Python so clients have easier access to it.
Intervals are half-open, ie. they include data points with timestamps
[start, end)
"""
import nilmdb.utils.time
import nilmdb.utils.iterator
class IntervalError(Exception):
"""Error due to interval overlap, etc"""
pass
# Interval
class Interval:
"""Represents an interval of time."""
def __init__(self, start, end):
"""
'start' and 'end' are arbitrary numbers that represent time
"""
if start >= end:
# Explicitly disallow zero-width intervals (since they're half-open)
raise IntervalError("start %s must precede end %s" % (start, end))
self.start = start
self.end = end
def __repr__(self):
s = repr(self.start) + ", " + repr(self.end)
return self.__class__.__name__ + "(" + s + ")"
def __str__(self):
return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
" -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
def __cmp__(self, other):
"""Compare two intervals. If non-equal, order by start then end"""
return cmp(self.start, other.start) or cmp(self.end, other.end)
def intersects(self, other):
"""Return True if two Interval objects intersect"""
if not isinstance(other, Interval):
raise TypeError("need an Interval")
if self.end <= other.start or self.start >= other.end:
return False
return True
def subset(self, start, end):
"""Return a new Interval that is a subset of this one"""
# A subclass that tracks additional data might override this.
if start < self.start or end > self.end:
raise IntervalError("not a subset")
return Interval(start, end)
def set_difference(a, b):
"""
Compute the difference (a \\ b) between the intervals in 'a' and
the intervals in 'b'; i.e., the ranges that are present in 'self'
but not 'other'.
'a' and 'b' must both be iterables.
Returns a generator that yields each interval in turn.
Output intervals are built as subsets of the intervals in the
first argument (a).
"""
# Iterate through all starts and ends in sorted order. Add a
# tag to the iterator so that we can figure out which one they
# were, after sorting.
def decorate(it, key_start, key_end):
for i in it:
yield i.start, key_start, i
yield i.end, key_end, i
a_iter = decorate(iter(a), 0, 2)
b_iter = decorate(iter(b), 1, 3)
# Now iterate over the timestamps of each start and end.
# At each point, evaluate which type of end it is, to determine
# how to build up the output intervals.
a_interval = None
b_interval = None
out_start = None
for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
if k == 0:
# start a interval
a_interval = i
if b_interval is None:
out_start = ts
elif k == 1:
# start b interval
b_interval = i
if out_start is not None and out_start != ts:
yield a_interval.subset(out_start, ts)
out_start = None
elif k == 2:
# end a interval
if out_start is not None and out_start != ts:
yield a_interval.subset(out_start, ts)
out_start = None
a_interval = None
elif k == 3:
# end b interval
b_interval = None
if a_interval:
out_start = ts

View File

@@ -1,5 +1,6 @@
from nilmdb.utils import datetime_tz from nilmdb.utils import datetime_tz
import re import re
import time
# Range # Range
min_timestamp = (-2**63) min_timestamp = (-2**63)
@@ -36,6 +37,7 @@ def unix_to_timestamp(unix):
"""Convert a Unix timestamp (floating point seconds since epoch) """Convert a Unix timestamp (floating point seconds since epoch)
into a NILM timestamp (integer microseconds since epoch)""" into a NILM timestamp (integer microseconds since epoch)"""
return int(round(unix * 1e6)) return int(round(unix * 1e6))
seconds_to_timestamp = unix_to_timestamp
def timestamp_to_unix(timestamp): def timestamp_to_unix(timestamp):
"""Convert a NILM timestamp (integer microseconds since epoch) """Convert a NILM timestamp (integer microseconds since epoch)
@@ -118,4 +120,4 @@ def parse_time(toparse):
def now(): def now():
"""Return current timestamp""" """Return current timestamp"""
return unix_to_timestamp(datetime_tz.datetime_tz.utcnow().totimestamp()) return unix_to_timestamp(time.time())

View File

@@ -43,7 +43,6 @@ except: pass
# Use Cython if it's new enough, otherwise use preexisting C files. # Use Cython if it's new enough, otherwise use preexisting C files.
cython_modules = [ 'nilmdb.server.interval', cython_modules = [ 'nilmdb.server.interval',
'nilmdb.server.layout',
'nilmdb.server.rbtree' ] 'nilmdb.server.rbtree' ]
try: try:
import Cython import Cython

View File

@@ -7,7 +7,6 @@ test_serializer.py
test_iteratorizer.py test_iteratorizer.py
test_timestamper.py test_timestamper.py
test_layout.py
test_rbtree.py test_rbtree.py
test_interval.py test_interval.py

View File

@@ -8,8 +8,11 @@ from nose.tools import *
from nose.tools import assert_raises from nose.tools import assert_raises
import itertools import itertools
from nilmdb.server.interval import (Interval, DBInterval, from nilmdb.utils.interval import IntervalError
IntervalSet, IntervalError) from nilmdb.server.interval import Interval, DBInterval, IntervalSet
# so we can test them separately
from nilmdb.utils.interval import Interval as UtilsInterval
from testutil.helpers import * from testutil.helpers import *
import unittest import unittest
@@ -47,6 +50,15 @@ def makeset(string):
return iset return iset
class TestInterval: class TestInterval:
def test_client_interval(self):
# Run interval tests against the Python version of Interval.
global Interval
NilmdbInterval = Interval
Interval = UtilsInterval
self.test_interval()
self.test_interval_intersect()
Interval = NilmdbInterval
def test_interval(self): def test_interval(self):
# Test Interval class # Test Interval class
os.environ['TZ'] = "America/New_York" os.environ['TZ'] = "America/New_York"
@@ -222,7 +234,7 @@ class TestInterval:
eq_(ab,c) eq_(ab,c)
# a \ b == d # a \ b == d
eq_(IntervalSet(a.set_difference(b)), d) eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
# Intersection with intervals # Intersection with intervals
do_test(makeset("[---|---)[)"), do_test(makeset("[---|---)[)"),
@@ -287,10 +299,11 @@ class TestInterval:
b = makeset("[-) [--) [)") b = makeset("[-) [--) [)")
c = makeset("[----) ") c = makeset("[----) ")
d = makeset(" [-) ") d = makeset(" [-) ")
eq_(a.set_difference(b, list(c)[0]), d) eq_(nilmdb.utils.interval.set_difference(
a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
# Empty second set # Empty second set
eq_(a.set_difference(IntervalSet()), a) eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
class TestIntervalDB: class TestIntervalDB:
def test_dbinterval(self): def test_dbinterval(self):
@@ -379,7 +392,7 @@ class TestIntervalSpeed:
print print
yappi.start() yappi.start()
speeds = {} speeds = {}
limit = 10 # was 20 limit = 22 # was 20
for j in [ 2**x for x in range(5,limit) ]: for j in [ 2**x for x in range(5,limit) ]:
start = time.time() start = time.time()
iset = IntervalSet() iset = IntervalSet()

View File

@@ -1,266 +0,0 @@
# -*- coding: utf-8 -*-
import nilmdb
from nilmdb.utils.printf import *
from nose.tools import *
from nose.tools import assert_raises
import distutils.version
import itertools
import os
import sys
import random
import unittest
from testutil.helpers import *
from nilmdb.server.layout import *
class TestLayouts(object):
# Some nilmdb.layout tests. Not complete, just fills in missing
# coverage.
def test_layouts(self):
x = nilmdb.server.layout.get_named("float32_8")
y = nilmdb.server.layout.get_named("float32_8")
eq_(x.count, y.count)
eq_(x.datatype, y.datatype)
y = nilmdb.server.layout.get_named("float32_7")
ne_(x.count, y.count)
eq_(x.datatype, y.datatype)
def test_parsing(self):
self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
def real_t_parsing(self, name_prep, name_raw, name_rawnotch):
# invalid layouts
with assert_raises(TypeError) as e:
parser = Parser("NoSuchLayout")
with assert_raises(TypeError) as e:
parser = Parser("float32")
# too little data
parser = Parser(name_prep)
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5\n" +
"1234567890.100000 1.1 2.2 3.3 4.4 5.5\n")
with assert_raises(ParserError) as e:
parser.parse(data)
in_("error", str(e.exception))
# too much data
parser = Parser(name_prep)
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n" +
"1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n")
with assert_raises(ParserError) as e:
parser.parse(data)
in_("error", str(e.exception))
# just right
parser = Parser(name_prep)
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n" +
"1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n")
parser.parse(data)
eq_(parser.min_timestamp, 1234567890.0)
eq_(parser.max_timestamp, 1234567890.1)
eq_(parser.data, [[1234567890.0,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8],
[1234567890.1,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8]])
# try uint16_6 too, with clamping
parser = Parser(name_raw)
data = ( "1234567890.000000 1 2 3 4 5 6\n" +
"1234567890.100000 1 2 3 4 5 6\n" )
parser.parse(data)
eq_(parser.data, [[1234567890.0,1,2,3,4,5,6],
[1234567890.1,1,2,3,4,5,6]])
# pass an instantiated class
parser = Parser(get_named(name_rawnotch))
data = ( "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
"1234567890.100000 1 2 3 4 5 6 7 8 9\n" )
parser.parse(data)
# non-monotonic
parser = Parser(name_raw)
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
"1234567890.099999 1 2 3 4 5 6\n" )
with assert_raises(ParserError) as e:
parser.parse(data)
in_("not monotonically increasing", str(e.exception))
parser = Parser(name_raw)
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
"1234567890.100000 1 2 3 4 5 6\n" )
with assert_raises(ParserError) as e:
parser.parse(data)
in_("not monotonically increasing", str(e.exception))
parser = Parser(name_raw)
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
"1234567890.100001 1 2 3 4 5 6\n" )
parser.parse(data)
# uint16_6 with values out of bounds
parser = Parser(name_raw)
data = ( "1234567890.000000 1 2 3 4 500000 6\n" +
"1234567890.100000 1 2 3 4 5 6\n" )
with assert_raises(ParserError) as e:
parser.parse(data)
in_("value out of range", str(e.exception))
# Empty data should work but is useless
parser = Parser(name_raw)
data = ""
parser.parse(data)
assert(parser.min_timestamp is None)
assert(parser.max_timestamp is None)
def test_formatting(self):
self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
def real_t_formatting(self, name_prep, name_raw, name_rawnotch):
# invalid layout
with assert_raises(TypeError) as e:
formatter = Formatter("NoSuchLayout")
# too little data
formatter = Formatter(name_prep)
data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5 ],
[ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5 ] ]
with assert_raises(FormatterError) as e:
formatter.format(data)
in_("error", str(e.exception))
# too much data
formatter = Formatter(name_prep)
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
[ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
with assert_raises(FormatterError) as e:
formatter.format(data)
in_("error", str(e.exception))
# just right
formatter = Formatter(name_prep)
data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ],
[ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ] ]
text = formatter.format(data)
eq_(text,
"1234567890.000000 1.100000e+00 2.200000e+00 3.300000e+00 "
"4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
"8.800000e+00\n" +
"1234567890.100000 1.100000e+00 2.200000e+00 3.300000e+00 "
"4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
"8.800000e+00\n")
# try uint16_6 too
formatter = Formatter(name_raw)
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6 ],
[ 1234567890.100000, 1, 2, 3, 4, 5, 6 ] ]
text = formatter.format(data)
eq_(text,
"1234567890.000000 1 2 3 4 5 6\n" +
"1234567890.100000 1 2 3 4 5 6\n")
# pass an instantiated class
formatter = Formatter(get_named(name_rawnotch))
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
[ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
text = formatter.format(data)
eq_(text,
"1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
"1234567890.100000 1 2 3 4 5 6 7 8 9\n")
# Empty data should work but is useless
formatter = Formatter(name_raw)
data = []
text = formatter.format(data)
eq_(text, "")
def test_roundtrip(self):
self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
def real_t_roundtrip(self, name_prep, name_raw, name_rawnotch):
# Verify that textual data passed into the Parser, and then
# back through the Formatter, then back into the Parser,
# gives identical parsed representations
random.seed(12345)
def do_roundtrip(layout, datagen):
for i in range(100):
rows = random.randint(1,100)
data = ""
ts = 1234567890
for r in range(rows):
ts += random.uniform(0,1)
row = sprintf("%f", ts) + " "
row += " ".join(datagen())
row += "\n"
data += row
parser1 = Parser(layout)
formatter = Formatter(layout)
parser2 = Parser(layout)
parser1.parse(data)
parser2.parse(formatter.format(parser1.data))
eq_(parser1.data, parser2.data)
def datagen():
return [ sprintf("%.6e", random.uniform(-1000,1000))
for x in range(8) ]
do_roundtrip(name_prep, datagen)
def datagen():
return [ sprintf("%d", random.randint(0,65535))
for x in range(6) ]
do_roundtrip(name_raw, datagen)
def datagen():
return [ sprintf("%d", random.randint(0,65535))
for x in range(9) ]
do_roundtrip(name_rawnotch, datagen)
class TestLayoutSpeed:
@unittest.skip("this is slow")
def test_layout_speed(self):
import time
random.seed(54321)
def do_speedtest(layout, datagen, rows = 5000, times = 100):
# Build data once
data = ""
ts = 1234567890
for r in range(rows):
ts += random.uniform(0,1)
row = sprintf("%f", ts) + " "
row += " ".join(datagen())
row += "\n"
data += row
# Do lots of roundtrips
start = time.time()
for i in range(times):
parser = Parser(layout)
formatter = Formatter(layout)
parser.parse(data)
formatter.format(parser.data)
elapsed = time.time() - start
printf("roundtrip %s: %d ms, %.1f μs/row, %d rows/sec\n",
layout,
elapsed * 1e3,
(elapsed * 1e6) / (rows * times),
(rows * times) / elapsed)
print ""
def datagen():
return [ sprintf("%.6e", random.uniform(-1000,1000))
for x in range(10) ]
do_speedtest("float32_10", datagen)
def datagen():
return [ sprintf("%d", random.randint(0,65535))
for x in range(10) ]
do_speedtest("uint16_10", datagen)
def datagen():
return [ sprintf("%d", random.randint(0,65535))
for x in range(6) ]
do_speedtest("uint16_6", datagen)