Merging adjacent intervals is working now!

Adjust test expectations accordingly, since the number of intervals they print out will now be smaller.
11 years ago · 9082cc9f44
--- a/nilmdb/interval.pyx
+++ b/nilmdb/interval.pyx
@@ -236,7 +236,7 @@ cdef class IntervalSet:
                self.__iadd__(x)
        return self

    def iadd(self, Interval other not None):
    def iadd_nocheck(self, Interval other not None):
        """Inplace add -- modifies self.
        'Optimized' version that doesn't check for intersection and
        only inserts the new interval into the tree."""
@@ -306,3 +306,13 @@ cdef class IntervalSet:
            if n.obj.intersects(other):
                return True
        return False

    def find_end(self, double t):
        """
        Return an Interval from this tree that ends at time t, or
        None if it doesn't exist.
        """
        n = self.tree.find_left_end(t)
        if n and n.obj.end == t:
            return n.obj
        return None
--- a/nilmdb/nilmdb.py
+++ b/nilmdb/nilmdb.py
@@ -204,13 +204,38 @@ class NilmDB(object):
        if iset.intersects(interval): # pragma: no cover (gets caught earlier)
            raise NilmDBError("new interval overlaps existing data")

        # Check for adjacency
        # XX TODO
        # Check for adjacency.  If there's a stream in the database
        # that ends exactly when this one starts, and the database
        # rows match up, we can make one interval that covers the
        # time range [adjacent.start -> interval.end)
        # and database rows [ adjacent.start_pos -> end_pos ].
        # Only do this if the resulting interval isn't too large.
        max_merged_rows = 30000000 # a bit more than 1 hour at 8 KHz
        adjacent = iset.find_end(interval.start)
        if (adjacent is not None and
            start_pos == adjacent.db_endpos and
            (end_pos - adjacent.db_startpos) < max_merged_rows):
            # First delete the old one, both from our cache and the
            # database
            iset -= adjacent
            self.con.execute("DELETE FROM ranges WHERE "
                             "stream_id=? AND start_time=? AND "
                             "end_time=? AND start_pos=? AND "
                             "end_pos=?", (stream_id,
                                           adjacent.db_start,
                                           adjacent.db_end,
                                           adjacent.db_startpos,
                                           adjacent.db_endpos))

            # Now update our interval so the fallthrough add is
            # correct.
            interval.start = adjacent.start
            start_pos = adjacent.db_startpos

        # Add the new interval to the cache
        iset.iadd(DBInterval(interval.start, interval.end,
                             interval.start, interval.end,
                             start_pos, end_pos))
        iset.iadd_nocheck(DBInterval(interval.start, interval.end,
                                     interval.start, interval.end,
                                     start_pos, end_pos))

        # Insert into the database
        self.con.execute("INSERT INTO ranges "
@@ -218,6 +243,7 @@ class NilmDB(object):
                         "VALUES (?,?,?,?,?)",
                         (stream_id, interval.start, interval.end,
                          int(start_pos), int(end_pos)))

        self.con.commit()

    def stream_list(self, path = None, layout = None):
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -267,7 +267,7 @@ class TestClient(object):
        # still disable chunked responses for debugging.
        x = client.http.get("stream/intervals", { "path": "/newton/prep" },
                            retjson=False)
        eq_(x.count('\n'), 2)
        lines_(x, 1)
        if "transfer-encoding: chunked" not in client.http._headers.lower():
            warnings.warn("Non-chunked HTTP response for /stream/intervals")

--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -368,16 +368,16 @@ class TestCmdline(object):
    def test_cmdline_07_detail(self):
        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
        lines_(self.captured, 11)
        lines_(self.captured, 8)

        self.ok("list --detail --path *prep")
        lines_(self.captured, 7)
        lines_(self.captured, 4)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'")
        lines_(self.captured, 5)
        lines_(self.captured, 3)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'")
        lines_(self.captured, 3)
        lines_(self.captured, 2)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'")
        lines_(self.captured, 2)
@@ -397,7 +397,7 @@ class TestCmdline(object):
        self.contain("10:05:15.500")

        self.ok("list --detail")
        lines_(self.captured, 11)
        lines_(self.captured, 8)

    def test_cmdline_08_extract(self):
        # nonexistent stream
@@ -459,7 +459,7 @@ class TestCmdline(object):
        server_stop()
        server_start(max_results = 2)
        self.ok("list --detail")
        lines_(self.captured, 11)
        lines_(self.captured, 8)
        server_stop()
        server_start()

@@ -484,7 +484,7 @@ class TestCmdline(object):

        # Notice how they're not empty
        self.ok("list --detail")
        lines_(self.captured, 11)
        lines_(self.captured, 8)

        # Delete some
        self.ok("destroy /newton/prep")
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -21,8 +21,10 @@ def ne_(a, b):
        raise AssertionError("unexpected %s == %s" % (myrepr(a), myrepr(b)))

 def lines_(a, n):
    if not a.count('\n') == n:
        raise AssertionError("wanted %d lines, got output: '%s'", n, a)
    l = a.count('\n')
    if not l == n:
        raise AssertionError("wanted %d lines, got %d in output: '%s'"
                             % (n, l, a))

 def recursive_unlink(path):
    try:
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -173,7 +173,7 @@ class TestInterval:
        eq_(iset, IntervalSet([a, b]))

        iset = IntervalSet(a)
        iset.iadd(b)
        iset.iadd_nocheck(b)
        eq_(iset, IntervalSet([a, b]))

        iset = IntervalSet(a) + IntervalSet(b)