Destroy now requires that all data has been previously removed.

Added new flag "-R" to command line to perform an automatic removal. This should be the last of the ways in which a single command could block the nilmdb thread for a long time.
Limit the number of rows removed per call to nilmdb.stream_remove
2013-03-18 19:39:03 -04:00 · 2013-03-18 18:22:45 -04:00 · 2013-03-18 18:20:25 -04:00 · 2013-03-18 18:19:24 -04:00 · 2013-03-18 18:16:35 -04:00 · 2013-03-18 16:27:27 -04:00
17 changed files with 299 additions and 624 deletions
--- a/docs/design.md
+++ b/docs/design.md
@@ -186,6 +186,19 @@ IntervalSet speed
    - rbtree and interval converted to cython:
      8.4 μS, total 12 s, 134 MB RAM
 - Would like to move Interval itself back to Python so other
  non-cythonized code like client code can use it more easily.
  Testing speed with just `test_interval` being tested, with
  `range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
  times recorded for 2097152:
    - 52ae397 (Interval in cython):
 	  12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
 	- 9759dcf (Interval in python):
 	  21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
  That's a huge difference!  Instead, will keep Interval and DBInterval
  cythonized inside nilmdb, and just have an additional copy in
  nilmdb.utils for clients to use.
 Layouts
 -------
 Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
--- a/extras/nilmtool-bash-completion.sh
+++ b/extras/nilmtool-bash-completion.sh
@@ -17,4 +17,4 @@ _nilmtool_argcomplete() {
        unset COMPREPLY
    fi
 }
-complete -o nospace -o default -F _nilmtool_argcomplete nilmtool
+complete -o nospace -F _nilmtool_argcomplete nilmtool
--- a/nilmdb/client/client.py
+++ b/nilmdb/client/client.py
@@ -97,7 +97,7 @@ class Client(object):
        return self.http.post("stream/create", params)
    def stream_destroy(self, path):
-        """Delete stream and its contents"""
+        """Delete stream.  Fails if any data is still present."""
        params = { "path": path }
        return self.http.post("stream/destroy", params)
--- a/nilmdb/cmdline/destroy.py
+++ b/nilmdb/cmdline/destroy.py
@@ -7,11 +7,14 @@ def setup(self, sub):
    cmd = sub.add_parser("destroy", help="Delete a stream and all data",
                         formatter_class = def_form,
                         description="""
-                         Destroy the stream at the specified path.  All
+                         Destroy the stream at the specified path.
-                         data and metadata related to the stream is
+                         The stream must be empty.  All metadata
-                         permanently deleted.
+                         related to the stream is permanently deleted.
                         """)
    cmd.set_defaults(handler = cmd_destroy)
    group = cmd.add_argument_group("Options")
    group.add_argument("-R", "--remove", action="store_true",
                       help="Remove all data before destroying stream")
    group = cmd.add_argument_group("Required arguments")
    group.add_argument("path",
                       help="Path of the stream to delete, e.g. /foo/bar",
@@ -20,6 +23,11 @@ def setup(self, sub):
 def cmd_destroy(self):
    """Destroy stream"""
    if self.args.remove:
        try:
            count = self.client.stream_remove(self.args.path)
        except nilmdb.client.ClientError as e:
            self.die("error removing data: %s", str(e))
    try:
        self.client.stream_destroy(self.args.path)
    except nilmdb.client.ClientError as e:
--- a/nilmdb/server/interval.pyx
+++ b/nilmdb/server/interval.pyx
@@ -1,5 +1,9 @@
 """Interval, IntervalSet
 The Interval implemented here is just like
 nilmdb.utils.interval.Interval, except implemented in Cython for
 speed.
 Represents an interval of time, and a set of such intervals.
 Intervals are half-open, ie. they include data points with timestamps
@@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
 from ..utils.time import max_timestamp as nilmdb_max_timestamp
 from ..utils.time import timestamp_to_string
 from ..utils.iterator import imerge
 from ..utils.interval import IntervalError
 import itertools
 cimport rbtree
@@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t
 ctypedef int64_t timestamp_t
 class IntervalError(Exception):
    """Error due to interval overlap, etc"""
    pass
 cdef class Interval:
    """Represents an interval of time."""
@@ -59,17 +60,7 @@ cdef class Interval:
    def __cmp__(self, Interval other):
        """Compare two intervals.  If non-equal, order by start then end"""
-        if not isinstance(other, Interval):
+        return cmp(self.start, other.start) or cmp(self.end, other.end)
            raise TypeError("bad type")
        if self.start == other.start:
            if self.end < other.end:
                return -1
            if self.end > other.end:
                return 1
            return 0
        if self.start < other.start:
            return -1
        return 1
    cpdef intersects(self, Interval other):
        """Return True if two Interval objects intersect"""
@@ -295,81 +286,19 @@ cdef class IntervalSet:
        (potentially) subsetted to make the one that is being
        returned.
        """
-        if not isinstance(interval, Interval):
+        if orig:
            raise TypeError("bad type")
            for n in self.tree.intersect(interval.start, interval.end):
                i = n.obj
            if i:
                if i.start >= interval.start and i.end <= interval.end:
                    if orig:
                        yield (i, i)
                    else:
                        yield i
                else:
                subset = i.subset(max(i.start, interval.start),
                                  min(i.end, interval.end))
                    if orig:
                yield (subset, i)
        else:
            for n in self.tree.intersect(interval.start, interval.end):
                i = n.obj
                subset = i.subset(max(i.start, interval.start),
                                  min(i.end, interval.end))
                yield subset
    def set_difference(self, IntervalSet other not None,
                       Interval bounds = None):
        """
        Compute the difference (self \\ other) between this
        IntervalSet and the given IntervalSet; i.e., the ranges
        that are present in 'self' but not 'other'.
        If 'bounds' is not None, results are limited to the range
        specified by the interval 'bounds'.
        Returns a generator that yields each interval in turn.
        Output intervals are built as subsets of the intervals in the
        first argument (self).
        """
        # Iterate through all starts and ends in sorted order.  Add a
        # tag to the iterator so that we can figure out which one they
        # were, after sorting.
        def decorate(it, key_start, key_end):
            for i in it:
                yield i.start, key_start, i
                yield i.end, key_end, i
        if bounds is None:
            bounds = Interval(nilmdb_min_timestamp,
                              nilmdb_max_timestamp)
        self_iter = decorate(self.intersection(bounds), 0, 2)
        other_iter = decorate(other.intersection(bounds), 1, 3)
        # Now iterate over the timestamps of each start and end.
        # At each point, evaluate which type of end it is, to determine
        # how to build up the output intervals.
        self_interval = None
        other_interval = None
        out_start = None
        for (ts, k, i) in imerge(self_iter, other_iter):
            if k == 0:
                # start self interval
                self_interval = i
                if other_interval is None:
                    out_start = ts
            elif k == 1:
                # start other interval
                other_interval = i
                if out_start is not None and out_start != ts:
                    yield self_interval.subset(out_start, ts)
                out_start = None
            elif k == 2:
                # end self interval
                if out_start is not None and out_start != ts:
                    yield self_interval.subset(out_start, ts)
                out_start = None
                self_interval = None
            elif k == 3:
                # end other interval
                other_interval = None
                if self_interval:
                    out_start = ts
    cpdef intersects(self, Interval other):
        """Return True if this IntervalSet intersects another interval"""
        for n in self.tree.intersect(other.start, other.end):
--- a/nilmdb/server/layout.pyx
+++ b/nilmdb/server/layout.pyx
@@ -1,204 +0,0 @@
 # cython: profile=False
 import time
 import sys
 import inspect
 import cStringIO
 from ..utils.time import min_timestamp as nilmdb_min_timestamp
 cdef enum:
    max_value_count = 64
 cimport cython
 cimport libc.stdlib
 cimport libc.stdio
 cimport libc.string
 class ParserError(Exception):
    def __init__(self, line, message):
        self.message = "line " + str(line) + ": " + message
        Exception.__init__(self, self.message)
 class FormatterError(Exception):
    pass
 class Layout:
    """Represents a NILM database layout"""
    def __init__(self, typestring):
        """Initialize this Layout object to handle the specified
        type string"""
        try:
            [ datatype, count ] = typestring.split("_")
        except:
            raise KeyError("invalid layout string")
        try:
            self.count = int(count)
        except ValueError:
            raise KeyError("invalid count")
        if self.count < 1 or self.count > max_value_count:
            raise KeyError("invalid count")
        if datatype == 'uint16':
            self.parse = self.parse_uint16
            self.format_str = "%.6f" + " %d" * self.count
            self.format = self.format_generic
        elif datatype == 'float32':
            self.parse = self.parse_float64
            self.format_str = "%.6f" + " %.6e" * self.count
            self.format = self.format_generic
        elif datatype == 'float64':
            self.parse = self.parse_float64
            self.format_str = "%.6f" + " %.16e" * self.count
            self.format = self.format_generic
        else:
            raise KeyError("invalid type")
        self.datatype = datatype
    # Parsers
    def parse_float64(self, char *text):
        cdef int n
        cdef double ts
        # Return doubles even in float32 case, since they're going into
        # a Python array which would upconvert to double anyway.
        result = [0] * (self.count + 1)
        cdef char *end
        ts = libc.stdlib.strtod(text, &end)
        if end == text:
            raise ValueError("bad timestamp")
        result[0] = ts
        for n in range(self.count):
            text = end
            result[n+1] = libc.stdlib.strtod(text, &end)
            if end == text:
                raise ValueError("wrong number of values")
        n = 0
        while end[n] == ' ':
            n += 1
        if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
            raise ValueError("extra data on line")
        return (ts, result)
    def parse_uint16(self, char *text):
        cdef int n
        cdef double ts
        cdef int v
        cdef char *end
        result = [0] * (self.count + 1)
        ts = libc.stdlib.strtod(text, &end)
        if end == text:
            raise ValueError("bad timestamp")
        result[0] = ts
        for n in range(self.count):
            text = end
            v = libc.stdlib.strtol(text, &end, 10)
            if v < 0 or v > 65535:
                raise ValueError("value out of range")
            result[n+1] = v
            if end == text:
                raise ValueError("wrong number of values")
        n = 0
        while end[n] == ' ':
            n += 1
        if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
            raise ValueError("extra data on line")
        return (ts, result)
    # Formatters
    def format_generic(self, d):
        n = len(d) - 1
        if n != self.count:
            raise ValueError("wrong number of values for layout type: "
                             "got %d, wanted %d" % (n, self.count))
        return (self.format_str % tuple(d)) + "\n"
 # Get a layout by name
 def get_named(typestring):
    try:
        return Layout(typestring)
    except KeyError:
        compat = { "PrepData": "float32_8",
                   "RawData": "uint16_6",
                   "RawNotchedData": "uint16_9" }
        return Layout(compat[typestring])
 class Parser(object):
    """Object that parses and stores ASCII data for inclusion into the
    database"""
    def __init__(self, layout):
        if issubclass(layout.__class__, Layout):
            self.layout = layout
        else:
            try:
                self.layout = get_named(layout)
            except KeyError:
                raise TypeError("unknown layout")
        self.data = []
        self.min_timestamp = None
        self.max_timestamp = None
    def parse(self, textdata):
        """
        Parse the data, provided as lines of text, using the current
        layout, into an internal data structure suitable for a
        pytables 'table.append(parser.data)'.
        """
        cdef double last_ts = nilmdb_min_timestamp
        cdef double ts
        cdef int n = 0, i
        cdef char *line
        indata = cStringIO.StringIO(textdata)
        # Assume any parsing error is a real error.
        # In the future we might want to skip completely empty lines,
        # or partial lines right before EOF?
        try:
            self.data = []
            for pyline in indata:
                line = pyline
                n += 1
                if line[0] == '\#':
                    continue
                (ts, row) = self.layout.parse(line)
                if ts <= last_ts:
                    raise ValueError("timestamp is not "
                                     "monotonically increasing")
                last_ts = ts
                self.data.append(row)
        except (ValueError, IndexError, TypeError) as e:
            raise ParserError(n, "error: " + e.message)
        # Mark timestamp ranges
        if len(self.data):
            self.min_timestamp = self.data[0][0]
            self.max_timestamp = self.data[-1][0]
 class Formatter(object):
    """Object that formats database data into ASCII"""
    def __init__(self, layout):
        if issubclass(layout.__class__, Layout):
            self.layout = layout
        else:
            try:
                self.layout = get_named(layout)
            except KeyError:
                raise TypeError("unknown layout")
    def format(self, data):
        """
        Format raw data from the database, using the current layout,
        as lines of ACSII text.
        """
        text = cStringIO.StringIO()
        try:
            for row in data:
                text.write(self.layout.format(row))
        except (ValueError, IndexError, TypeError) as e:
            raise FormatterError("formatting error: " + e.message)
        return text.getvalue()
--- a/nilmdb/server/nilmdb.py
+++ b/nilmdb/server/nilmdb.py
@@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
 from __future__ import absolute_import
 import nilmdb.utils
 from nilmdb.utils.printf import *
-from nilmdb.server.interval import (Interval, DBInterval,
+
-                                    IntervalSet, IntervalError)
+from nilmdb.utils.interval import IntervalError
 from nilmdb.server.interval import Interval, DBInterval, IntervalSet
 from nilmdb.server import bulkdata
 from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
@@ -81,7 +83,18 @@ class NilmDB(object):
    verbose = 0
    def __init__(self, basepath, max_results=None,
-                 bulkdata_args=None):
+                 max_removals=None, bulkdata_args=None):
        """Initialize NilmDB at the given basepath.
        Other arguments are for debugging / testing:
        'max_results' is the max rows to send in a single
        stream_intervals or stream_extract response.
        'max_removals' is the max rows to delete at once
        in stream_move.
        'bulkdata_args' is kwargs for the bulkdata module.
        """
        if bulkdata_args is None:
            bulkdata_args = {}
@@ -111,11 +124,11 @@ class NilmDB(object):
        self.con.execute("PRAGMA journal_mode=WAL")
        # Approximate largest number of elements that we want to send
-        # in a single reply (for stream_intervals, stream_extract)
+        # in a single reply (for stream_intervals, stream_extract).
-        if max_results:
+        self.max_results = max_results or 16384
-            self.max_results = max_results
+
-        else:
+        # Remove up to this many rows per call to stream_remove.
-            self.max_results = 16384
+        self.max_removals = max_removals or 1048576
    def get_basepath(self):
        return self.basepath
@@ -328,18 +341,18 @@ class NilmDB(object):
        List all intervals in 'path' between 'start' and 'end'.  If
        'diffpath' is not none, list instead the set-difference
        between the intervals in the two streams; i.e. all interval
-        ranges that are present in 'path' but not 'path2'.
+        ranges that are present in 'path' but not 'diffpath'.
        Returns (intervals, restart) tuple.
-        intervals is a list of [start,end] timestamps of all intervals
+        'intervals' is a list of [start,end] timestamps of all intervals
        that exist for path, between start and end.
-        restart, if nonzero, means that there were too many results to
+        'restart', if not None, means that there were too many results
-        return in a single request.  The data is complete from the
+        to return in a single request.  The data is complete from the
-        starting timestamp to the point at which it was truncated,
+        starting timestamp to the point at which it was truncated, and
-        and a new request with a start time of 'restart' will fetch
+        a new request with a start time of 'restart' will fetch the
-        the next block of data.
+        next block of data.
        """
        stream_id = self._stream_id(path)
        intervals = self._get_intervals(stream_id)
@@ -350,7 +363,9 @@ class NilmDB(object):
        requested = Interval(start, end)
        result = []
        if diffpath:
-            getter = intervals.set_difference(diffintervals, requested)
+            getter = nilmdb.utils.interval.set_difference(
                intervals.intersection(requested),
                diffintervals.intersection(requested))
        else:
            getter = intervals.intersection(requested)
        for n, i in enumerate(getter):
@@ -359,7 +374,7 @@ class NilmDB(object):
                break
            result.append([i.start, i.end])
        else:
-            restart = 0
+            restart = None
        return (result, restart)
    def stream_create(self, path, layout_name):
@@ -435,17 +450,22 @@ class NilmDB(object):
                        (newpath, stream_id))
    def stream_destroy(self, path):
-        """Fully remove a table and all of its data from the database.
+        """Fully remove a table from the database.  Fails if there are
-        No way to undo it!  Metadata is removed."""
+        any intervals data present; remove them first.  Metadata is
        also removed."""
        stream_id = self._stream_id(path)
-        # Delete the cached interval data (if it was cached)
+        # Verify that no intervals are present, and clear the cache
        iset = self._get_intervals(stream_id)
        if len(iset):
            raise NilmDBError("all intervals must be removed before "
                              "destroying a stream")
        self._get_intervals.cache_remove(self, stream_id)
-        # Delete the data
+        # Delete the bulkdata storage
        self.data.destroy(path)
-        # Delete metadata, stream, intervals
+        # Delete metadata, stream, intervals (should be none)
        with self.con as con:
            con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
            con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,))
@@ -517,10 +537,10 @@ class NilmDB(object):
        """
        Returns (data, restart) tuple.
-        data is ASCII-formatted data from the database, formatted
+        'data' is ASCII-formatted data from the database, formatted
        according to the layout of the stream.
-        restart, if nonzero, means that there were too many results to
+        'restart', if not None, means that there were too many results to
        return in a single request.  The data is complete from the
        starting timestamp to the point at which it was truncated,
        and a new request with a start time of 'restart' will fetch
@@ -539,7 +559,7 @@ class NilmDB(object):
        result = []
        matched = 0
        remaining = self.max_results
-        restart = 0
+        restart = None
        for interval in intervals.intersection(requested):
            # Reading single rows from the table is too slow, so
            # we use two bisections to find both the starting and
@@ -564,7 +584,7 @@ class NilmDB(object):
            # Count them
            remaining -= row_end - row_start
-            if restart:
+            if restart is not None:
                break
        if count:
@@ -574,9 +594,17 @@ class NilmDB(object):
    def stream_remove(self, path, start = None, end = None):
        """
        Remove data from the specified time interval within a stream.
-        Removes all data in the interval [start, end), and intervals
+
-        are truncated or split appropriately.  Returns the number of
+        Removes data in the interval [start, end), and intervals are
-        data points removed.
+        truncated or split appropriately.
        Returns a (removed, restart) tuple.
        'removed' is the number of data points that were removed.
        'restart', if not None, means there were too many rows to
        remove in a single request.  This function should be called
        again with a start time of 'restart' to complete the removal.
        """
        stream_id = self._stream_id(path)
        table = self.data.getnode(path)
@@ -584,6 +612,8 @@ class NilmDB(object):
        (start, end) = self._check_user_times(start, end)
        to_remove = Interval(start, end)
        removed = 0
        remaining = self.max_removals
        restart = None
        # Can't remove intervals from within the iterator, so we need to
        # remember what's currently in the intersection now.
@@ -594,6 +624,13 @@ class NilmDB(object):
            row_start = self._find_start(table, dbint)
            row_end = self._find_end(table, dbint)
            # Shorten it if we'll hit the maximum number of removals
            row_max = row_start + remaining
            if row_max < row_end:
                row_end = row_max
                dbint.end = table[row_max]
                restart = dbint.end
            # Adjust the DBInterval to match the newly found ends
            dbint.db_start = dbint.start
            dbint.db_end = dbint.end
@@ -609,4 +646,7 @@ class NilmDB(object):
            # Count how many were removed
            removed += row_end - row_start
-        return removed
+            if restart is not None:
                break
        return (removed, restart)
--- a/nilmdb/server/server.py
+++ b/nilmdb/server/server.py
@@ -210,7 +210,7 @@ class Stream(NilmApp):
    @exception_to_httperror(NilmDBError)
    @cherrypy.tools.CORS_allow(methods = ["POST"])
    def destroy(self, path):
-        """Delete a stream and its associated data."""
+        """Delete a stream.  Fails if any data is still present."""
        return self.db.stream_destroy(path)
    # /stream/rename?oldpath=/newton/prep&newpath=/newton/prep/1
@@ -339,7 +339,14 @@ class Stream(NilmApp):
            if start >= end:
                raise cherrypy.HTTPError("400 Bad Request",
                                         "start must precede end")
-        return self.db.stream_remove(path, start, end)
+        total_removed = 0
        while True:
            (removed, restart) = self.db.stream_remove(path, start, end)
            total_removed += removed
            if restart is None:
                break
            start = restart
        return total_removed
    # /stream/intervals?path=/newton/prep
    # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
@@ -386,7 +393,7 @@ class Stream(NilmApp):
                                                           diffpath)
                response = ''.join([ json.dumps(i) + "\r\n" for i in ints ])
                yield response
-                if restart == 0:
+                if restart is None:
                    break
                start = restart
        return content(start, end)
@@ -432,7 +439,7 @@ class Stream(NilmApp):
                (data, restart) = self.db.stream_extract(path, start, end)
                yield data
-                if restart == 0:
+                if restart is None:
                    return
                start = restart
        return content(start, end, count)
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
 import nilmdb.utils.fallocate
 import nilmdb.utils.time
 import nilmdb.utils.iterator
 import nilmdb.utils.interval
--- a/nilmdb/utils/interval.py
+++ b/nilmdb/utils/interval.py
@@ -0,0 +1,106 @@
 """Interval.  Like nilmdb.server.interval, but re-implemented here
 in plain Python so clients have easier access to it.
 Intervals are half-open, ie. they include data points with timestamps
 [start, end)
 """
 import nilmdb.utils.time
 import nilmdb.utils.iterator
 class IntervalError(Exception):
    """Error due to interval overlap, etc"""
    pass
 # Interval
 class Interval:
    """Represents an interval of time."""
    def __init__(self, start, end):
        """
        'start' and 'end' are arbitrary numbers that represent time
        """
        if start >= end:
            # Explicitly disallow zero-width intervals (since they're half-open)
            raise IntervalError("start %s must precede end %s" % (start, end))
        self.start = start
        self.end = end
    def __repr__(self):
        s = repr(self.start) + ", " + repr(self.end)
        return self.__class__.__name__ + "(" + s + ")"
    def __str__(self):
        return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
                " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
    def __cmp__(self, other):
        """Compare two intervals.  If non-equal, order by start then end"""
        return cmp(self.start, other.start) or cmp(self.end, other.end)
    def intersects(self, other):
        """Return True if two Interval objects intersect"""
        if not isinstance(other, Interval):
            raise TypeError("need an Interval")
        if self.end <= other.start or self.start >= other.end:
            return False
        return True
    def subset(self, start, end):
        """Return a new Interval that is a subset of this one"""
        # A subclass that tracks additional data might override this.
        if start < self.start or end > self.end:
            raise IntervalError("not a subset")
        return Interval(start, end)
 def set_difference(a, b):
    """
    Compute the difference (a \\ b) between the intervals in 'a' and
    the intervals in 'b'; i.e., the ranges that are present in 'self'
    but not 'other'.
    'a' and 'b' must both be iterables.
    Returns a generator that yields each interval in turn.
    Output intervals are built as subsets of the intervals in the
    first argument (a).
    """
    # Iterate through all starts and ends in sorted order.  Add a
    # tag to the iterator so that we can figure out which one they
    # were, after sorting.
    def decorate(it, key_start, key_end):
        for i in it:
            yield i.start, key_start, i
            yield i.end, key_end, i
    a_iter = decorate(iter(a), 0, 2)
    b_iter = decorate(iter(b), 1, 3)
    # Now iterate over the timestamps of each start and end.
    # At each point, evaluate which type of end it is, to determine
    # how to build up the output intervals.
    a_interval = None
    b_interval = None
    out_start = None
    for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
        if k == 0:
            # start a interval
            a_interval = i
            if b_interval is None:
                out_start = ts
        elif k == 1:
            # start b interval
            b_interval = i
            if out_start is not None and out_start != ts:
                yield a_interval.subset(out_start, ts)
            out_start = None
        elif k == 2:
            # end a interval
            if out_start is not None and out_start != ts:
                yield a_interval.subset(out_start, ts)
            out_start = None
            a_interval = None
        elif k == 3:
            # end b interval
            b_interval = None
            if a_interval:
                out_start = ts
--- a/nilmdb/utils/time.py
+++ b/nilmdb/utils/time.py
@@ -1,5 +1,8 @@
 from __future__ import absolute_import
 from nilmdb.utils import datetime_tz
 import re
 import time
 # Range
 min_timestamp = (-2**63)
@@ -36,6 +39,7 @@ def unix_to_timestamp(unix):
    """Convert a Unix timestamp (floating point seconds since epoch)
    into a NILM timestamp (integer microseconds since epoch)"""
    return int(round(unix * 1e6))
 seconds_to_timestamp = unix_to_timestamp
 def timestamp_to_unix(timestamp):
    """Convert a NILM timestamp (integer microseconds since epoch)
@@ -56,6 +60,11 @@ def parse_time(toparse):
    timestamp, the current local timezone is assumed (e.g. from the TZ
    env var).
    """
    if toparse == "min":
        return min_timestamp
    if toparse == "max":
        return max_timestamp
    # If string isn't "now" and doesn't contain at least 4 digits,
    # consider it invalid.  smartparse might otherwise accept
    # empty strings and strings with just separators.
@@ -118,4 +127,4 @@ def parse_time(toparse):
 def now():
    """Return current timestamp"""
-    return unix_to_timestamp(datetime_tz.datetime_tz.utcnow().totimestamp())
+    return unix_to_timestamp(time.time())
--- a/setup.py
+++ b/setup.py
@@ -43,7 +43,6 @@ except: pass
 # Use Cython if it's new enough, otherwise use preexisting C files.
 cython_modules = [ 'nilmdb.server.interval',
                   'nilmdb.server.layout',
                   'nilmdb.server.rbtree' ]
 try:
    import Cython
--- a/tests/test.order
+++ b/tests/test.order
@@ -7,7 +7,6 @@ test_serializer.py
 test_iteratorizer.py
 test_timestamper.py
 test_layout.py
 test_rbtree.py
 test_interval.py
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -375,6 +375,7 @@ class TestClient(object):
            # Delete streams that exist
            for stream in client.stream_list():
                client.stream_remove(stream[0])
                client.stream_destroy(stream[0])
            # Database is empty
@@ -506,6 +507,10 @@ class TestClient(object):
              [ 109, 118 ],
              [ 200, 300 ] ])
        # destroy stream (try without removing data first)
        with assert_raises(ClientError):
            client.stream_destroy("/context/test")
        client.stream_remove("/context/test")
        client.stream_destroy("/context/test")
        client.close()
@@ -600,6 +605,7 @@ class TestClient(object):
                     ])
        # Clean up
        client.stream_remove("/empty/test")
        client.stream_destroy("/empty/test")
        client.close()
@@ -635,8 +641,9 @@ class TestClient(object):
            eq_(connections(), (1, 5))
            # Clean up
            c.stream_remove("/persist/test")
            c.stream_destroy("/persist/test")
-            eq_(connections(), (1, 6))
+            eq_(connections(), (1, 7))
    def test_client_13_timestamp_rounding(self):
        # Test potentially bad timestamps (due to floating point
@@ -661,5 +668,6 @@ class TestClient(object):
                # Server will round this and give an error on finalize()
                ctx.insert("299999999.99 1\n")
        client.stream_remove("/rounding/test")
        client.stream_destroy("/rounding/test")
        client.close()
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -21,12 +21,13 @@ from testutil.helpers import *
 testdb = "tests/cmdline-testdb"
-def server_start(max_results = None, bulkdata_args = {}):
+def server_start(max_results = None, max_removals = None, bulkdata_args = {}):
    global test_server, test_db
    # Start web app on a custom port
    test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(
        testdb,
        max_results = max_results,
        max_removals = max_removals,
        bulkdata_args = bulkdata_args)
    test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
                                       port = 32180, stoppable = False,
@@ -233,6 +234,8 @@ class TestCmdline(object):
        eq_(parse_time("1333648800.0"), test)
        eq_(parse_time("1333648800000000"), test)
        eq_(parse_time("@1333648800000000"), test)
        eq_(parse_time("min"), nilmdb.utils.time.min_timestamp)
        eq_(parse_time("max"), nilmdb.utils.time.max_timestamp)
        with assert_raises(ValueError):
            parse_time("@hashtag12345")
@@ -699,11 +702,9 @@ class TestCmdline(object):
        # Reinsert some data, to verify that no overlaps with deleted
        # data are reported
-        os.environ['TZ'] = "UTC"
+        for minute in ["0", "2"]:
            self.ok("insert --timestamp -f --rate 120 /newton/prep"
-                "tests/data/prep-20120323T1000")
+                    " tests/data/prep-20120323T100" + minute)
        self.ok("insert -t --filename --rate 120 /newton/prep "
                "tests/data/prep-20120323T1002")
    def test_11_destroy(self):
        # Delete records
@@ -715,6 +716,9 @@ class TestCmdline(object):
        self.fail("destroy /no/such/stream")
        self.contain("No stream at path")
        self.fail("destroy -R /no/such/stream")
        self.contain("No stream at path")
        self.fail("destroy asdfasdf")
        self.contain("No stream at path")
@@ -728,8 +732,14 @@ class TestCmdline(object):
        self.ok("list --detail")
        lines_(self.captured, 7)
-        # Delete some
+        # Fail to destroy because intervals still present
-        self.ok("destroy /newton/prep")
+        self.fail("destroy /newton/prep")
        self.contain("all intervals must be removed")
        self.ok("list --detail")
        lines_(self.captured, 7)
        # Destroy for real
        self.ok("destroy -R /newton/prep")
        self.ok("list")
        self.match("/newton/raw uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")
@@ -740,7 +750,8 @@ class TestCmdline(object):
        self.ok("destroy /newton/raw")
        self.ok("create /newton/raw uint16_6")
-        self.ok("destroy /newton/raw")
+        # Specify --remove with no data
        self.ok("destroy --remove /newton/raw")
        self.ok("list")
        self.match("")
@@ -815,7 +826,7 @@ class TestCmdline(object):
        # Now recreate the data one more time and make sure there are
        # fewer files.
-        self.ok("destroy /newton/prep")
+        self.ok("destroy --remove /newton/prep")
        self.fail("destroy /newton/prep") # already destroyed
        self.ok("create /newton/prep float32_8")
        os.environ['TZ'] = "UTC"
@@ -826,14 +837,16 @@ class TestCmdline(object):
        for (dirpath, dirnames, filenames) in os.walk(testdb):
            nfiles += len(filenames)
        lt_(nfiles, 50)
-        self.ok("destroy /newton/prep") # destroy again
+        self.ok("destroy -R /newton/prep") # destroy again
    def test_14_remove_files(self):
        # Test BulkData's ability to remove when data is split into
        # multiple files.  Should be a fairly comprehensive test of
        # remove functionality.
        # Also limit max_removals, to cover more functionality.
        server_stop()
-        server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
+        server_start(max_removals = 4321,
                     bulkdata_args = { "file_size" : 920, # 23 rows per file
                                       "files_per_dir" : 3 })
        # Insert data.  Just for fun, insert out of order
@@ -974,8 +987,8 @@ class TestCmdline(object):
        self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
-        self.ok("destroy /diff/1")
+        self.ok("destroy -R /diff/1")
-        self.ok("destroy /diff/2")
+        self.ok("destroy -R /diff/2")
    def test_16_rename(self):
        # Test renaming.  Force file size smaller so we get more files
@@ -1039,7 +1052,7 @@ class TestCmdline(object):
        self.fail("rename /foo/bar /xxx/yyy/zzz/www")
        self.contain("path is subdir of existing node")
        self.ok("rename /foo/bar /xxx/yyy/mmm")
-        self.ok("destroy /xxx/yyy/zzz")
+        self.ok("destroy -R /xxx/yyy/zzz")
        check_path("xxx", "yyy", "mmm")
        # Extract it at the final path
@@ -1047,7 +1060,7 @@ class TestCmdline(object):
                "--end '2012-03-23 10:04:01'")
        eq_(self.captured, extract_before)
-        self.ok("destroy /xxx/yyy/mmm")
+        self.ok("destroy -R /xxx/yyy/mmm")
        # Make sure temporary rename dirs weren't left around
        for (dirpath, dirnames, filenames) in os.walk(testdb):
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -8,8 +8,11 @@ from nose.tools import *
 from nose.tools import assert_raises
 import itertools
-from nilmdb.server.interval import (Interval, DBInterval,
+from nilmdb.utils.interval import IntervalError
-                                    IntervalSet, IntervalError)
+from nilmdb.server.interval import Interval, DBInterval, IntervalSet
 # so we can test them separately
 from nilmdb.utils.interval import Interval as UtilsInterval
 from testutil.helpers import *
 import unittest
@@ -47,6 +50,15 @@ def makeset(string):
    return iset
 class TestInterval:
    def test_client_interval(self):
        # Run interval tests against the Python version of Interval.
        global Interval
        NilmdbInterval = Interval
        Interval = UtilsInterval
        self.test_interval()
        self.test_interval_intersect()
        Interval = NilmdbInterval
    def test_interval(self):
        # Test Interval class
        os.environ['TZ'] = "America/New_York"
@@ -222,7 +234,7 @@ class TestInterval:
            eq_(ab,c)
            # a \ b == d
-            eq_(IntervalSet(a.set_difference(b)), d)
+            eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
        # Intersection with intervals
        do_test(makeset("[---|---)[)"),
@@ -287,10 +299,11 @@ class TestInterval:
        b = makeset("[-) [--) [)")
        c = makeset("[----)     ")
        d = makeset("  [-)      ")
-        eq_(a.set_difference(b, list(c)[0]), d)
+        eq_(nilmdb.utils.interval.set_difference(
            a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
        # Empty second set
-        eq_(a.set_difference(IntervalSet()), a)
+        eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
 class TestIntervalDB:
    def test_dbinterval(self):
@@ -379,7 +392,7 @@ class TestIntervalSpeed:
        print
        yappi.start()
        speeds = {}
-        limit = 10 # was 20
+        limit = 22 # was 20
        for j in [ 2**x for x in range(5,limit) ]:
            start = time.time()
            iset = IntervalSet()
--- a/tests/test_layout.py
+++ b/tests/test_layout.py
@@ -1,266 +0,0 @@
 # -*- coding: utf-8 -*-
 import nilmdb
 from nilmdb.utils.printf import *
 from nose.tools import *
 from nose.tools import assert_raises
 import distutils.version
 import itertools
 import os
 import sys
 import random
 import unittest
 from testutil.helpers import *
 from nilmdb.server.layout import *
 class TestLayouts(object):
    # Some nilmdb.layout tests.  Not complete, just fills in missing
    # coverage.
    def test_layouts(self):
        x = nilmdb.server.layout.get_named("float32_8")
        y = nilmdb.server.layout.get_named("float32_8")
        eq_(x.count, y.count)
        eq_(x.datatype, y.datatype)
        y = nilmdb.server.layout.get_named("float32_7")
        ne_(x.count, y.count)
        eq_(x.datatype, y.datatype)
    def test_parsing(self):
        self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
        self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
    def real_t_parsing(self, name_prep, name_raw, name_rawnotch):
        # invalid layouts
        with assert_raises(TypeError) as e:
            parser = Parser("NoSuchLayout")
        with assert_raises(TypeError) as e:
            parser = Parser("float32")
        # too little data
        parser = Parser(name_prep)
        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5\n" +
                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5\n")
        with assert_raises(ParserError) as e:
            parser.parse(data)
        in_("error", str(e.exception))
        # too much data
        parser = Parser(name_prep)
        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n" +
                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n")
        with assert_raises(ParserError) as e:
            parser.parse(data)
        in_("error", str(e.exception))
        # just right
        parser = Parser(name_prep)
        data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n" +
                 "1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n")
        parser.parse(data)
        eq_(parser.min_timestamp, 1234567890.0)
        eq_(parser.max_timestamp, 1234567890.1)
        eq_(parser.data, [[1234567890.0,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8],
                          [1234567890.1,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8]])
        # try uint16_6 too, with clamping
        parser = Parser(name_raw)
        data = ( "1234567890.000000 1 2 3 4 5 6\n" +
                 "1234567890.100000 1 2 3 4 5 6\n" )
        parser.parse(data)
        eq_(parser.data, [[1234567890.0,1,2,3,4,5,6],
                          [1234567890.1,1,2,3,4,5,6]])
        # pass an instantiated class
        parser = Parser(get_named(name_rawnotch))
        data = ( "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
                 "1234567890.100000 1 2 3 4 5 6 7 8 9\n" )
        parser.parse(data)
        # non-monotonic
        parser = Parser(name_raw)
        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
                 "1234567890.099999 1 2 3 4 5 6\n" )
        with assert_raises(ParserError) as e:
            parser.parse(data)
        in_("not monotonically increasing", str(e.exception))
        parser = Parser(name_raw)
        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
                 "1234567890.100000 1 2 3 4 5 6\n" )
        with assert_raises(ParserError) as e:
            parser.parse(data)
        in_("not monotonically increasing", str(e.exception))
        parser = Parser(name_raw)
        data = ( "1234567890.100000 1 2 3 4 5 6\n" +
                 "1234567890.100001 1 2 3 4 5 6\n" )
        parser.parse(data)
        # uint16_6 with values out of bounds
        parser = Parser(name_raw)
        data = ( "1234567890.000000 1 2 3 4 500000 6\n" +
                 "1234567890.100000 1 2 3 4 5 6\n" )
        with assert_raises(ParserError) as e:
            parser.parse(data)
        in_("value out of range", str(e.exception))
        # Empty data should work but is useless
        parser = Parser(name_raw)
        data = ""
        parser.parse(data)
        assert(parser.min_timestamp is None)
        assert(parser.max_timestamp is None)
    def test_formatting(self):
        self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
        self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
    def real_t_formatting(self, name_prep, name_raw, name_rawnotch):
        # invalid layout
        with assert_raises(TypeError) as e:
            formatter = Formatter("NoSuchLayout")
        # too little data
        formatter = Formatter(name_prep)
        data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5 ],
                 [ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5 ] ]
        with assert_raises(FormatterError) as e:
            formatter.format(data)
        in_("error", str(e.exception))
        # too much data
        formatter = Formatter(name_prep)
        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
        with assert_raises(FormatterError) as e:
            formatter.format(data)
        in_("error", str(e.exception))
        # just right
        formatter = Formatter(name_prep)
        data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ],
                 [ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ] ]
        text = formatter.format(data)
        eq_(text,
            "1234567890.000000 1.100000e+00 2.200000e+00 3.300000e+00 "
            "4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
            "8.800000e+00\n" +
            "1234567890.100000 1.100000e+00 2.200000e+00 3.300000e+00 "
            "4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
            "8.800000e+00\n")
        # try uint16_6 too
        formatter = Formatter(name_raw)
        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6 ],
                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6 ] ]
        text = formatter.format(data)
        eq_(text,
            "1234567890.000000 1 2 3 4 5 6\n" +
            "1234567890.100000 1 2 3 4 5 6\n")
        # pass an instantiated class
        formatter = Formatter(get_named(name_rawnotch))
        data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
                 [ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
        text = formatter.format(data)
        eq_(text,
            "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
            "1234567890.100000 1 2 3 4 5 6 7 8 9\n")
        # Empty data should work but is useless
        formatter = Formatter(name_raw)
        data = []
        text = formatter.format(data)
        eq_(text, "")
    def test_roundtrip(self):
        self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
        self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
    def real_t_roundtrip(self, name_prep, name_raw, name_rawnotch):
        # Verify that textual data passed into the Parser, and then
        # back through the Formatter, then back into the Parser,
        # gives identical parsed representations
        random.seed(12345)
        def do_roundtrip(layout, datagen):
            for i in range(100):
                rows = random.randint(1,100)
                data = ""
                ts = 1234567890
                for r in range(rows):
                    ts += random.uniform(0,1)
                    row = sprintf("%f", ts) + " "
                    row += " ".join(datagen())
                    row += "\n"
                    data += row
                parser1 = Parser(layout)
                formatter = Formatter(layout)
                parser2 = Parser(layout)
                parser1.parse(data)
                parser2.parse(formatter.format(parser1.data))
                eq_(parser1.data, parser2.data)
        def datagen():
            return [ sprintf("%.6e", random.uniform(-1000,1000))
                     for x in range(8) ]
        do_roundtrip(name_prep, datagen)
        def datagen():
            return [ sprintf("%d", random.randint(0,65535))
                     for x in range(6) ]
        do_roundtrip(name_raw, datagen)
        def datagen():
            return [ sprintf("%d", random.randint(0,65535))
                     for x in range(9) ]
        do_roundtrip(name_rawnotch, datagen)
 class TestLayoutSpeed:
    @unittest.skip("this is slow")
    def test_layout_speed(self):
        import time
        random.seed(54321)
        def do_speedtest(layout, datagen, rows = 5000, times = 100):
            # Build data once
            data = ""
            ts = 1234567890
            for r in range(rows):
                ts += random.uniform(0,1)
                row = sprintf("%f", ts) + " "
                row += " ".join(datagen())
                row += "\n"
                data += row
            # Do lots of roundtrips
            start = time.time()
            for i in range(times):
                parser = Parser(layout)
                formatter = Formatter(layout)
                parser.parse(data)
                formatter.format(parser.data)
            elapsed = time.time() - start
            printf("roundtrip %s: %d ms, %.1f μs/row, %d rows/sec\n",
                   layout,
                   elapsed * 1e3,
                   (elapsed * 1e6) / (rows * times),
                   (rows * times) / elapsed)
        print ""
        def datagen():
            return [ sprintf("%.6e", random.uniform(-1000,1000))
                     for x in range(10) ]
        do_speedtest("float32_10", datagen)
        def datagen():
            return [ sprintf("%d", random.randint(0,65535))
                     for x in range(10) ]
        do_speedtest("uint16_10", datagen)
        def datagen():
            return [ sprintf("%d", random.randint(0,65535))
                     for x in range(6) ]
        do_speedtest("uint16_6", datagen)
Author	SHA1	Message	Date
Jim Paris	4cdef3285d	Destroy now requires that all data has been previously removed. Added new flag "-R" to command line to perform an automatic removal. This should be the last of the ways in which a single command could block the nilmdb thread for a long time.	2013-03-18 19:39:03 -04:00
Jim Paris	bcd82c4d59	Limit the number of rows removed per call to nilmdb.stream_remove Server class will retry as needed, as with stream_extract and stream_intervals.	2013-03-18 18:22:45 -04:00
Jim Paris	caf63ab01f	Fix stream_extract/stream_intervals restart around timestamp == 0.	2013-03-18 18:20:25 -04:00
Jim Paris	2d72891162	Accept "min" and "max" as timestamps on command line	2013-03-18 18:19:24 -04:00
Jim Paris	cda2ac3e77	Don't return a mutable interval from IntervalSet.intersection() Instead, always take the subset, which creates a new interval. Also adds a small optimization by moving the 'if orig' check outside the loop.	2013-03-18 18:16:35 -04:00
Jim Paris	57d3d60f6a	Fix relative import problems	2013-03-18 16:27:27 -04:00
Jim Paris	d6b5befe76	Don't use filenames as default arg completion	2013-03-16 17:27:58 -04:00
Jim Paris	7429c1788d	Update nilmdb.utils.time	2013-03-15 22:49:59 -04:00
Jim Paris	0ef71c193b	Remove layout.pyx, since rocket replaced it	2013-03-15 22:32:40 -04:00
Jim Paris	4a50dd015e	Merge branch 'python-intervals'	2013-03-15 21:39:11 -04:00
Jim Paris	22274550ab	Test python version of Interval too	2013-03-15 21:37:03 -04:00
Jim Paris	4f06d6ae68	Move Interval set_difference inside nilmdb.utils for clients Clients might need to to Interval math too, so move a simple Interval class and start putting helpers in there.	2013-03-15 21:37:03 -04:00
Jim Paris	c54d8041c3	Update design docs	2013-03-15 21:07:01 -04:00