Update nilmdb.utils.time

Remove layout.pyx, since rocket replaced it
Merge branch 'python-intervals'
2013-03-15 22:49:59 -04:00 · 2013-03-15 22:32:40 -04:00 · 2013-03-15 21:39:11 -04:00 · 2013-03-15 21:37:03 -04:00 · 2013-03-15 21:37:03 -04:00 · 2013-03-15 21:07:01 -04:00
11 changed files with 156 additions and 555 deletions
--- a/docs/design.md
+++ b/docs/design.md
@@ -186,6 +186,19 @@ IntervalSet speed
    - rbtree and interval converted to cython:
      8.4 μS, total 12 s, 134 MB RAM

+- Would like to move Interval itself back to Python so other
+  non-cythonized code like client code can use it more easily.
+  Testing speed with just `test_interval` being tested, with
+  `range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
+  times recorded for 2097152:
+    - 52ae397 (Interval in cython):
+	  12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
+	- 9759dcf (Interval in python):
+	  21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
+  That's a huge difference!  Instead, will keep Interval and DBInterval
+  cythonized inside nilmdb, and just have an additional copy in
+  nilmdb.utils for clients to use.
+
 Layouts
 -------
 Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
--- a/nilmdb/server/interval.pyx
+++ b/nilmdb/server/interval.pyx
@@ -1,5 +1,9 @@
 """Interval, IntervalSet

+The Interval implemented here is just like
+nilmdb.utils.interval.Interval, except implemented in Cython for
+speed.
+
 Represents an interval of time, and a set of such intervals.

 Intervals are half-open, ie. they include data points with timestamps
@@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
 from ..utils.time import max_timestamp as nilmdb_max_timestamp
 from ..utils.time import timestamp_to_string
 from ..utils.iterator import imerge
+from ..utils.interval import IntervalError
 import itertools

 cimport rbtree
@@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t

 ctypedef int64_t timestamp_t

-class IntervalError(Exception):
-    """Error due to interval overlap, etc"""
-    pass
-
 cdef class Interval:
    """Represents an interval of time."""

@@ -59,17 +60,7 @@ cdef class Interval:

    def __cmp__(self, Interval other):
        """Compare two intervals.  If non-equal, order by start then end"""
-        if not isinstance(other, Interval):
-            raise TypeError("bad type")
-        if self.start == other.start:
-            if self.end < other.end:
-                return -1
-            if self.end > other.end:
-                return 1
-            return 0
-        if self.start < other.start:
-            return -1
-        return 1
+        return cmp(self.start, other.start) or cmp(self.end, other.end)

    cpdef intersects(self, Interval other):
        """Return True if two Interval objects intersect"""
@@ -313,63 +304,6 @@ cdef class IntervalSet:
                    else:
                        yield subset

-    def set_difference(self, IntervalSet other not None,
-                       Interval bounds = None):
-        """
-        Compute the difference (self \\ other) between this
-        IntervalSet and the given IntervalSet; i.e., the ranges
-        that are present in 'self' but not 'other'.
-
-        If 'bounds' is not None, results are limited to the range
-        specified by the interval 'bounds'.
-
-        Returns a generator that yields each interval in turn.
-        Output intervals are built as subsets of the intervals in the
-        first argument (self).
-        """
-        # Iterate through all starts and ends in sorted order.  Add a
-        # tag to the iterator so that we can figure out which one they
-        # were, after sorting.
-        def decorate(it, key_start, key_end):
-            for i in it:
-                yield i.start, key_start, i
-                yield i.end, key_end, i
-        if bounds is None:
-            bounds = Interval(nilmdb_min_timestamp,
-                              nilmdb_max_timestamp)
-        self_iter = decorate(self.intersection(bounds), 0, 2)
-        other_iter = decorate(other.intersection(bounds), 1, 3)
-
-        # Now iterate over the timestamps of each start and end.
-        # At each point, evaluate which type of end it is, to determine
-        # how to build up the output intervals.
-        self_interval = None
-        other_interval = None
-        out_start = None
-        for (ts, k, i) in imerge(self_iter, other_iter):
-            if k == 0:
-                # start self interval
-                self_interval = i
-                if other_interval is None:
-                    out_start = ts
-            elif k == 1:
-                # start other interval
-                other_interval = i
-                if out_start is not None and out_start != ts:
-                    yield self_interval.subset(out_start, ts)
-                out_start = None
-            elif k == 2:
-                # end self interval
-                if out_start is not None and out_start != ts:
-                    yield self_interval.subset(out_start, ts)
-                out_start = None
-                self_interval = None
-            elif k == 3:
-                # end other interval
-                other_interval = None
-                if self_interval:
-                    out_start = ts
-
    cpdef intersects(self, Interval other):
        """Return True if this IntervalSet intersects another interval"""
        for n in self.tree.intersect(other.start, other.end):
--- a/nilmdb/server/layout.pyx
+++ b/nilmdb/server/layout.pyx
@@ -1,204 +0,0 @@
-# cython: profile=False
-
-import time
-import sys
-import inspect
-import cStringIO
-
-from ..utils.time import min_timestamp as nilmdb_min_timestamp
-
-cdef enum:
-    max_value_count = 64
-
-cimport cython
-cimport libc.stdlib
-cimport libc.stdio
-cimport libc.string
-
-class ParserError(Exception):
-    def __init__(self, line, message):
-        self.message = "line " + str(line) + ": " + message
-        Exception.__init__(self, self.message)
-
-class FormatterError(Exception):
-    pass
-
-class Layout:
-    """Represents a NILM database layout"""
-
-    def __init__(self, typestring):
-        """Initialize this Layout object to handle the specified
-        type string"""
-        try:
-            [ datatype, count ] = typestring.split("_")
-        except:
-            raise KeyError("invalid layout string")
-
-        try:
-            self.count = int(count)
-        except ValueError:
-            raise KeyError("invalid count")
-        if self.count < 1 or self.count > max_value_count:
-            raise KeyError("invalid count")
-
-        if datatype == 'uint16':
-            self.parse = self.parse_uint16
-            self.format_str = "%.6f" + " %d" * self.count
-            self.format = self.format_generic
-        elif datatype == 'float32':
-            self.parse = self.parse_float64
-            self.format_str = "%.6f" + " %.6e" * self.count
-            self.format = self.format_generic
-        elif datatype == 'float64':
-            self.parse = self.parse_float64
-            self.format_str = "%.6f" + " %.16e" * self.count
-            self.format = self.format_generic
-        else:
-            raise KeyError("invalid type")
-
-        self.datatype = datatype
-
-    # Parsers
-    def parse_float64(self, char *text):
-        cdef int n
-        cdef double ts
-        # Return doubles even in float32 case, since they're going into
-        # a Python array which would upconvert to double anyway.
-        result = [0] * (self.count + 1)
-        cdef char *end
-        ts = libc.stdlib.strtod(text, &end)
-        if end == text:
-            raise ValueError("bad timestamp")
-        result[0] = ts
-        for n in range(self.count):
-            text = end
-            result[n+1] = libc.stdlib.strtod(text, &end)
-            if end == text:
-                raise ValueError("wrong number of values")
-        n = 0
-        while end[n] == ' ':
-            n += 1
-        if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
-            raise ValueError("extra data on line")
-        return (ts, result)
-
-    def parse_uint16(self, char *text):
-        cdef int n
-        cdef double ts
-        cdef int v
-        cdef char *end
-        result = [0] * (self.count + 1)
-        ts = libc.stdlib.strtod(text, &end)
-        if end == text:
-            raise ValueError("bad timestamp")
-        result[0] = ts
-        for n in range(self.count):
-            text = end
-            v = libc.stdlib.strtol(text, &end, 10)
-            if v < 0 or v > 65535:
-                raise ValueError("value out of range")
-            result[n+1] = v
-            if end == text:
-                raise ValueError("wrong number of values")
-        n = 0
-        while end[n] == ' ':
-            n += 1
-        if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
-            raise ValueError("extra data on line")
-        return (ts, result)
-
-    # Formatters
-    def format_generic(self, d):
-        n = len(d) - 1
-        if n != self.count:
-            raise ValueError("wrong number of values for layout type: "
-                             "got %d, wanted %d" % (n, self.count))
-        return (self.format_str % tuple(d)) + "\n"
-
-# Get a layout by name
-def get_named(typestring):
-    try:
-        return Layout(typestring)
-    except KeyError:
-        compat = { "PrepData": "float32_8",
-                   "RawData": "uint16_6",
-                   "RawNotchedData": "uint16_9" }
-        return Layout(compat[typestring])
-
-class Parser(object):
-    """Object that parses and stores ASCII data for inclusion into the
-    database"""
-
-    def __init__(self, layout):
-        if issubclass(layout.__class__, Layout):
-            self.layout = layout
-        else:
-            try:
-                self.layout = get_named(layout)
-            except KeyError:
-                raise TypeError("unknown layout")
-
-        self.data = []
-        self.min_timestamp = None
-        self.max_timestamp = None
-
-    def parse(self, textdata):
-        """
-        Parse the data, provided as lines of text, using the current
-        layout, into an internal data structure suitable for a
-        pytables 'table.append(parser.data)'.
-        """
-        cdef double last_ts = nilmdb_min_timestamp
-        cdef double ts
-        cdef int n = 0, i
-        cdef char *line
-
-        indata = cStringIO.StringIO(textdata)
-        # Assume any parsing error is a real error.
-        # In the future we might want to skip completely empty lines,
-        # or partial lines right before EOF?
-        try:
-            self.data = []
-            for pyline in indata:
-                line = pyline
-                n += 1
-                if line[0] == '\#':
-                    continue
-                (ts, row) = self.layout.parse(line)
-                if ts <= last_ts:
-                    raise ValueError("timestamp is not "
-                                     "monotonically increasing")
-                last_ts = ts
-                self.data.append(row)
-        except (ValueError, IndexError, TypeError) as e:
-            raise ParserError(n, "error: " + e.message)
-
-        # Mark timestamp ranges
-        if len(self.data):
-            self.min_timestamp = self.data[0][0]
-            self.max_timestamp = self.data[-1][0]
-
-class Formatter(object):
-    """Object that formats database data into ASCII"""
-
-    def __init__(self, layout):
-        if issubclass(layout.__class__, Layout):
-            self.layout = layout
-        else:
-            try:
-                self.layout = get_named(layout)
-            except KeyError:
-                raise TypeError("unknown layout")
-
-    def format(self, data):
-        """
-        Format raw data from the database, using the current layout,
-        as lines of ACSII text.
-        """
-        text = cStringIO.StringIO()
-        try:
-            for row in data:
-                text.write(self.layout.format(row))
-        except (ValueError, IndexError, TypeError) as e:
-            raise FormatterError("formatting error: " + e.message)
-        return text.getvalue()
--- a/nilmdb/server/nilmdb.py
+++ b/nilmdb/server/nilmdb.py
@@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
 from __future__ import absolute_import
 import nilmdb.utils
 from nilmdb.utils.printf import *
-from nilmdb.server.interval import (Interval, DBInterval,
-                                    IntervalSet, IntervalError)
+
+from nilmdb.utils.interval import IntervalError
+from nilmdb.server.interval import Interval, DBInterval, IntervalSet
+
 from nilmdb.server import bulkdata
 from nilmdb.server.errors import NilmDBError, StreamError, OverlapError

@@ -328,7 +330,7 @@ class NilmDB(object):
        List all intervals in 'path' between 'start' and 'end'.  If
        'diffpath' is not none, list instead the set-difference
        between the intervals in the two streams; i.e. all interval
-        ranges that are present in 'path' but not 'path2'.
+        ranges that are present in 'path' but not 'diffpath'.

        Returns (intervals, restart) tuple.

@@ -350,7 +352,9 @@ class NilmDB(object):
        requested = Interval(start, end)
        result = []
        if diffpath:
-            getter = intervals.set_difference(diffintervals, requested)
+            getter = nilmdb.utils.interval.set_difference(
+                intervals.intersection(requested),
+                diffintervals.intersection(requested))
        else:
            getter = intervals.intersection(requested)
        for n, i in enumerate(getter):
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
 import nilmdb.utils.fallocate
 import nilmdb.utils.time
 import nilmdb.utils.iterator
+import nilmdb.utils.interval
--- a/nilmdb/utils/interval.py
+++ b/nilmdb/utils/interval.py
@@ -0,0 +1,106 @@
+"""Interval.  Like nilmdb.server.interval, but re-implemented here
+in plain Python so clients have easier access to it.
+
+Intervals are half-open, ie. they include data points with timestamps
+[start, end)
+"""
+
+import nilmdb.utils.time
+import nilmdb.utils.iterator
+
+class IntervalError(Exception):
+    """Error due to interval overlap, etc"""
+    pass
+
+# Interval
+class Interval:
+    """Represents an interval of time."""
+
+    def __init__(self, start, end):
+        """
+        'start' and 'end' are arbitrary numbers that represent time
+        """
+        if start >= end:
+            # Explicitly disallow zero-width intervals (since they're half-open)
+            raise IntervalError("start %s must precede end %s" % (start, end))
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        s = repr(self.start) + ", " + repr(self.end)
+        return self.__class__.__name__ + "(" + s + ")"
+
+    def __str__(self):
+        return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
+                " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
+
+    def __cmp__(self, other):
+        """Compare two intervals.  If non-equal, order by start then end"""
+        return cmp(self.start, other.start) or cmp(self.end, other.end)
+
+    def intersects(self, other):
+        """Return True if two Interval objects intersect"""
+        if not isinstance(other, Interval):
+            raise TypeError("need an Interval")
+        if self.end <= other.start or self.start >= other.end:
+            return False
+        return True
+
+    def subset(self, start, end):
+        """Return a new Interval that is a subset of this one"""
+        # A subclass that tracks additional data might override this.
+        if start < self.start or end > self.end:
+            raise IntervalError("not a subset")
+        return Interval(start, end)
+
+def set_difference(a, b):
+    """
+    Compute the difference (a \\ b) between the intervals in 'a' and
+    the intervals in 'b'; i.e., the ranges that are present in 'self'
+    but not 'other'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    # Iterate through all starts and ends in sorted order.  Add a
+    # tag to the iterator so that we can figure out which one they
+    # were, after sorting.
+    def decorate(it, key_start, key_end):
+        for i in it:
+            yield i.start, key_start, i
+            yield i.end, key_end, i
+    a_iter = decorate(iter(a), 0, 2)
+    b_iter = decorate(iter(b), 1, 3)
+
+    # Now iterate over the timestamps of each start and end.
+    # At each point, evaluate which type of end it is, to determine
+    # how to build up the output intervals.
+    a_interval = None
+    b_interval = None
+    out_start = None
+    for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
+        if k == 0:
+            # start a interval
+            a_interval = i
+            if b_interval is None:
+                out_start = ts
+        elif k == 1:
+            # start b interval
+            b_interval = i
+            if out_start is not None and out_start != ts:
+                yield a_interval.subset(out_start, ts)
+            out_start = None
+        elif k == 2:
+            # end a interval
+            if out_start is not None and out_start != ts:
+                yield a_interval.subset(out_start, ts)
+            out_start = None
+            a_interval = None
+        elif k == 3:
+            # end b interval
+            b_interval = None
+            if a_interval:
+                out_start = ts
--- a/nilmdb/utils/time.py
+++ b/nilmdb/utils/time.py
@@ -1,5 +1,6 @@
 from nilmdb.utils import datetime_tz
 import re
+import time

 # Range
 min_timestamp = (-2**63)
@@ -36,6 +37,7 @@ def unix_to_timestamp(unix):
    """Convert a Unix timestamp (floating point seconds since epoch)
    into a NILM timestamp (integer microseconds since epoch)"""
    return int(round(unix * 1e6))
+seconds_to_timestamp = unix_to_timestamp

 def timestamp_to_unix(timestamp):
    """Convert a NILM timestamp (integer microseconds since epoch)
@@ -118,4 +120,4 @@ def parse_time(toparse):

 def now():
    """Return current timestamp"""
-    return unix_to_timestamp(datetime_tz.datetime_tz.utcnow().totimestamp())
+    return unix_to_timestamp(time.time())
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,6 @@ except: pass

 # Use Cython if it's new enough, otherwise use preexisting C files.
 cython_modules = [ 'nilmdb.server.interval',
-                   'nilmdb.server.layout',
                   'nilmdb.server.rbtree' ]
 try:
    import Cython
--- a/tests/test.order
+++ b/tests/test.order
@@ -7,7 +7,6 @@ test_serializer.py
 test_iteratorizer.py

 test_timestamper.py
-test_layout.py
 test_rbtree.py
 test_interval.py

--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -8,8 +8,11 @@ from nose.tools import *
 from nose.tools import assert_raises
 import itertools

-from nilmdb.server.interval import (Interval, DBInterval,
-                                    IntervalSet, IntervalError)
+from nilmdb.utils.interval import IntervalError
+from nilmdb.server.interval import Interval, DBInterval, IntervalSet
+
+# so we can test them separately
+from nilmdb.utils.interval import Interval as UtilsInterval

 from testutil.helpers import *
 import unittest
@@ -47,6 +50,15 @@ def makeset(string):
    return iset

 class TestInterval:
+    def test_client_interval(self):
+        # Run interval tests against the Python version of Interval.
+        global Interval
+        NilmdbInterval = Interval
+        Interval = UtilsInterval
+        self.test_interval()
+        self.test_interval_intersect()
+        Interval = NilmdbInterval
+
    def test_interval(self):
        # Test Interval class
        os.environ['TZ'] = "America/New_York"
@@ -222,7 +234,7 @@ class TestInterval:
            eq_(ab,c)

            # a \ b == d
-            eq_(IntervalSet(a.set_difference(b)), d)
+            eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)

        # Intersection with intervals
        do_test(makeset("[---|---)[)"),
@@ -287,10 +299,11 @@ class TestInterval:
        b = makeset("[-) [--) [)")
        c = makeset("[----)     ")
        d = makeset("  [-)      ")
-        eq_(a.set_difference(b, list(c)[0]), d)
+        eq_(nilmdb.utils.interval.set_difference(
+            a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)

        # Empty second set
-        eq_(a.set_difference(IntervalSet()), a)
+        eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)

 class TestIntervalDB:
    def test_dbinterval(self):
@@ -379,7 +392,7 @@ class TestIntervalSpeed:
        print
        yappi.start()
        speeds = {}
-        limit = 10 # was 20
+        limit = 22 # was 20
        for j in [ 2**x for x in range(5,limit) ]:
            start = time.time()
            iset = IntervalSet()
--- a/tests/test_layout.py
+++ b/tests/test_layout.py
@@ -1,266 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import nilmdb
-
-from nilmdb.utils.printf import *
-
-from nose.tools import *
-from nose.tools import assert_raises
-import distutils.version
-import itertools
-import os
-import sys
-import random
-import unittest
-
-from testutil.helpers import *
-
-from nilmdb.server.layout import *
-
-class TestLayouts(object):
-    # Some nilmdb.layout tests.  Not complete, just fills in missing
-    # coverage.
-    def test_layouts(self):
-        x = nilmdb.server.layout.get_named("float32_8")
-        y = nilmdb.server.layout.get_named("float32_8")
-        eq_(x.count, y.count)
-        eq_(x.datatype, y.datatype)
-        y = nilmdb.server.layout.get_named("float32_7")
-        ne_(x.count, y.count)
-        eq_(x.datatype, y.datatype)
-
-    def test_parsing(self):
-        self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
-        self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
-    def real_t_parsing(self, name_prep, name_raw, name_rawnotch):
-        # invalid layouts
-        with assert_raises(TypeError) as e:
-            parser = Parser("NoSuchLayout")
-        with assert_raises(TypeError) as e:
-            parser = Parser("float32")
-
-        # too little data
-        parser = Parser(name_prep)
-        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5\n" +
-                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5\n")
-        with assert_raises(ParserError) as e:
-            parser.parse(data)
-        in_("error", str(e.exception))
-
-        # too much data
-        parser = Parser(name_prep)
-        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n" +
-                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n")
-        with assert_raises(ParserError) as e:
-            parser.parse(data)
-        in_("error", str(e.exception))
-
-        # just right
-        parser = Parser(name_prep)
-        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n" +
-                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n")
-        parser.parse(data)
-        eq_(parser.min_timestamp, 1234567890.0)
-        eq_(parser.max_timestamp, 1234567890.1)
-        eq_(parser.data, [[1234567890.0,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8],
-                          [1234567890.1,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8]])
-
-        # try uint16_6 too, with clamping
-        parser = Parser(name_raw)
-        data = ( "1234567890.000000 1 2 3 4 5 6\n" +
-                 "1234567890.100000 1 2 3 4 5 6\n" )
-        parser.parse(data)
-        eq_(parser.data, [[1234567890.0,1,2,3,4,5,6],
-                          [1234567890.1,1,2,3,4,5,6]])
-
-        # pass an instantiated class
-        parser = Parser(get_named(name_rawnotch))
-        data = ( "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
-                 "1234567890.100000 1 2 3 4 5 6 7 8 9\n" )
-        parser.parse(data)
-
-        # non-monotonic
-        parser = Parser(name_raw)
-        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
-                 "1234567890.099999 1 2 3 4 5 6\n" )
-        with assert_raises(ParserError) as e:
-            parser.parse(data)
-        in_("not monotonically increasing", str(e.exception))
-
-        parser = Parser(name_raw)
-        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
-                 "1234567890.100000 1 2 3 4 5 6\n" )
-        with assert_raises(ParserError) as e:
-            parser.parse(data)
-        in_("not monotonically increasing", str(e.exception))
-
-        parser = Parser(name_raw)
-        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
-                 "1234567890.100001 1 2 3 4 5 6\n" )
-        parser.parse(data)
-
-        # uint16_6 with values out of bounds
-        parser = Parser(name_raw)
-        data = ( "1234567890.000000 1 2 3 4 500000 6\n" +
-                 "1234567890.100000 1 2 3 4 5 6\n" )
-        with assert_raises(ParserError) as e:
-            parser.parse(data)
-        in_("value out of range", str(e.exception))
-
-        # Empty data should work but is useless
-        parser = Parser(name_raw)
-        data = ""
-        parser.parse(data)
-        assert(parser.min_timestamp is None)
-        assert(parser.max_timestamp is None)
-
-    def test_formatting(self):
-        self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
-        self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
-    def real_t_formatting(self, name_prep, name_raw, name_rawnotch):
-        # invalid layout
-        with assert_raises(TypeError) as e:
-            formatter = Formatter("NoSuchLayout")
-
-        # too little data
-        formatter = Formatter(name_prep)
-        data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5 ],
-                 [ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5 ] ]
-        with assert_raises(FormatterError) as e:
-            formatter.format(data)
-        in_("error", str(e.exception))
-
-        # too much data
-        formatter = Formatter(name_prep)
-        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
-                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
-        with assert_raises(FormatterError) as e:
-            formatter.format(data)
-        in_("error", str(e.exception))
-
-        # just right
-        formatter = Formatter(name_prep)
-        data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ],
-                 [ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ] ]
-        text = formatter.format(data)
-        eq_(text,
-            "1234567890.000000 1.100000e+00 2.200000e+00 3.300000e+00 "
-            "4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
-            "8.800000e+00\n" +
-            "1234567890.100000 1.100000e+00 2.200000e+00 3.300000e+00 "
-            "4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
-            "8.800000e+00\n")
-
-        # try uint16_6 too
-        formatter = Formatter(name_raw)
-        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6 ],
-                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6 ] ]
-        text = formatter.format(data)
-        eq_(text,
-            "1234567890.000000 1 2 3 4 5 6\n" +
-            "1234567890.100000 1 2 3 4 5 6\n")
-
-        # pass an instantiated class
-        formatter = Formatter(get_named(name_rawnotch))
-        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
-                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
-        text = formatter.format(data)
-        eq_(text,
-            "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
-            "1234567890.100000 1 2 3 4 5 6 7 8 9\n")
-
-        # Empty data should work but is useless
-        formatter = Formatter(name_raw)
-        data = []
-        text = formatter.format(data)
-        eq_(text, "")
-
-    def test_roundtrip(self):
-        self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
-        self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
-    def real_t_roundtrip(self, name_prep, name_raw, name_rawnotch):
-        # Verify that textual data passed into the Parser, and then
-        # back through the Formatter, then back into the Parser,
-        # gives identical parsed representations
-        random.seed(12345)
-
-        def do_roundtrip(layout, datagen):
-            for i in range(100):
-                rows = random.randint(1,100)
-                data = ""
-                ts = 1234567890
-                for r in range(rows):
-                    ts += random.uniform(0,1)
-                    row = sprintf("%f", ts) + " "
-                    row += " ".join(datagen())
-                    row += "\n"
-                    data += row
-                parser1 = Parser(layout)
-                formatter = Formatter(layout)
-                parser2 = Parser(layout)
-                parser1.parse(data)
-                parser2.parse(formatter.format(parser1.data))
-                eq_(parser1.data, parser2.data)
-
-        def datagen():
-            return [ sprintf("%.6e", random.uniform(-1000,1000))
-                     for x in range(8) ]
-        do_roundtrip(name_prep, datagen)
-
-        def datagen():
-            return [ sprintf("%d", random.randint(0,65535))
-                     for x in range(6) ]
-        do_roundtrip(name_raw, datagen)
-
-        def datagen():
-            return [ sprintf("%d", random.randint(0,65535))
-                     for x in range(9) ]
-        do_roundtrip(name_rawnotch, datagen)
-
-class TestLayoutSpeed:
-    @unittest.skip("this is slow")
-    def test_layout_speed(self):
-        import time
-
-        random.seed(54321)
-
-        def do_speedtest(layout, datagen, rows = 5000, times = 100):
-            # Build data once
-            data = ""
-            ts = 1234567890
-            for r in range(rows):
-                ts += random.uniform(0,1)
-                row = sprintf("%f", ts) + " "
-                row += " ".join(datagen())
-                row += "\n"
-                data += row
-
-            # Do lots of roundtrips
-            start = time.time()
-            for i in range(times):
-                parser = Parser(layout)
-                formatter = Formatter(layout)
-                parser.parse(data)
-                formatter.format(parser.data)
-            elapsed = time.time() - start
-            printf("roundtrip %s: %d ms, %.1f μs/row, %d rows/sec\n",
-                   layout,
-                   elapsed * 1e3,
-                   (elapsed * 1e6) / (rows * times),
-                   (rows * times) / elapsed)
-
-        print ""
-        def datagen():
-            return [ sprintf("%.6e", random.uniform(-1000,1000))
-                     for x in range(10) ]
-        do_speedtest("float32_10", datagen)
-
-        def datagen():
-            return [ sprintf("%d", random.randint(0,65535))
-                     for x in range(10) ]
-        do_speedtest("uint16_10", datagen)
-
-        def datagen():
-            return [ sprintf("%d", random.randint(0,65535))
-                     for x in range(6) ]
-        do_speedtest("uint16_6", datagen)
Author	SHA1	Message	Date
Jim Paris	7429c1788d	Update nilmdb.utils.time	2013-03-15 22:49:59 -04:00
Jim Paris	0ef71c193b	Remove layout.pyx, since rocket replaced it	2013-03-15 22:32:40 -04:00
Jim Paris	4a50dd015e	Merge branch 'python-intervals'	2013-03-15 21:39:11 -04:00
Jim Paris	22274550ab	Test python version of Interval too	2013-03-15 21:37:03 -04:00
Jim Paris	4f06d6ae68	Move Interval set_difference inside nilmdb.utils for clients Clients might need to to Interval math too, so move a simple Interval class and start putting helpers in there.	2013-03-15 21:37:03 -04:00
Jim Paris	c54d8041c3	Update design docs	2013-03-15 21:07:01 -04:00