Can query intervals now.

On the big database, the server takes a few seconds to figure out the interval intersections. Need to think about how to improve that -- the real key might be to start reducing the number of intervals we're storing by combining them, potentially as they're inserted. git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10838 ddd99763-3ecb-0310-9145-efcb8ce7c51f
2012-05-07 22:32:02 +00:00 · 2012-05-07 22:32:02 +00:00 · 4501da6edc
commit 4501da6edc
parent 36045fe53b
9 changed files with 90 additions and 25 deletions
--- a/nilmdb/client.py
+++ b/nilmdb/client.py
@ -223,4 +223,7 @@ class Client(object):
            params["start"] = start
        if end is not None:
            params["end"] = end
-        return self.curl.getjson("stream/intervals", params)
+        (intervals, truncated) = self.curl.getjson("stream/intervals", params)
+        # If truncated is true, there are more intervals after the last
+        # one listed.
+        return (intervals, truncated)
--- a/nilmdb/cmdline.py
+++ b/nilmdb/cmdline.py
@ -220,20 +220,19 @@ class Cmdline(object):
                continue

            printf("%s %s\n", path, layout)
-            if self.args.detail:
-                intervals = self.client.stream_intervals(
-                    path, self.args.start, self.args.end)
-                for (start, end) in intervals:
-                    printf("  [ %s -> %s ]\n",
-                           self.time_string(start), self.time_string(end))
+            if not self.args.detail:
+                continue

-    def cmd_detail(self):
-        """Detail the ranges available in a particular stream"""
-        streams = self.client.stream_list()
-        for (path, layout) in streams:
-            if (fnmatch.fnmatch(path, self.args.path) and
-                fnmatch.fnmatch(layout, self.args.layout)):
-                printf("%s %s\n", path, layout)
+            (intervals, truncated) = self.client.stream_intervals(
+                path, self.args.start, self.args.end)
+            if not intervals:
+                printf("  (no intervals)\n")
+                continue
+            for (start, end) in intervals:
+                printf("  [ %s -> %s ]\n",
+                       self.time_string(start), self.time_string(end))
+            if truncated: # pragma: no cover (hard to test)
+                printf("  (... truncated, more intervals follow)\n")

    def cmd_create(self):
        """Create new stream"""
@ -343,7 +342,7 @@ class Cmdline(object):
    def arg_time(self, toparse):
        """Parse a time string argument"""
        try:
-            return self.parse_time(toparse)
+            return self.parse_time(toparse).totimestamp()
        except ValueError as e:
            raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
                                                     str(e), toparse))
--- a/nilmdb/interval.py
+++ b/nilmdb/interval.py
@ -26,7 +26,7 @@ class Interval(bxintersect.Interval):
        'start' and 'end' are arbitrary floats that represent time
        """
        if start > end:
-            raise IntervalError("start must precede end")
+            raise IntervalError("start %s must precede end %s" % (start, end))
        bxintersect.Interval.__init__(self, start, end)

    def __repr__(self):
--- a/nilmdb/nilmdb.py
+++ b/nilmdb/nilmdb.py
@ -22,7 +22,7 @@ import os
 import errno

 # Note about performance and transactions:
-# 
+#
 # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
 # takes about 125msec.  sqlite3 will commit transactions at 3 times:
 # 1: explicit con.commit()
@ -215,6 +215,35 @@ class NilmDB(object):

        return sorted(list(x) for x in result)

+    def stream_intervals(self, path, start = None, end = None):
+        """
+        Returns (intervals, truncated) tuple.
+
+        intervals is a list of [start,end] timestamps of all intervals
+        that exist for path, between start and end.
+
+        truncated, if True, means that there were too many results to
+        return in a single request.  The data is complete from the
+        starting timestamp to the point at which it was truncated.
+        """
+
+        # About 35 bytes per interval in the JSON output.
+        # Let's limit to 10k intervals per response = 350k
+        MAX_RESULTS = 10000
+
+        stream_id = self._stream_id(path)
+        intervals = self._get_intervals(stream_id)
+        requested = Interval(start or 0, end or 1e12)
+        result = []
+        for n, i in enumerate(intervals & requested):
+            if n >= MAX_RESULTS: # pragma: no cover (hard to test)
+                truncated = True
+                break
+            result.append([i.start, i.end])
+        else:
+            truncated = False
+        return (result, truncated)
+
    def stream_create(self, path, layout_name):
        """Create a new table in the database.

--- a/nilmdb/server.py
+++ b/nilmdb/server.py
@ -180,7 +180,14 @@ class Stream(NilmApp):
    @cherrypy.expose
    @cherrypy.tools.json_out()
    def intervals(self, path, start = None, end = None):
-        return [[123, 456],[123, 456]]
+        # If truncated is true, there are more intervals after the last
+        # one listed.
+        if start is not None:
+            start = float(start)
+        if end is not None:
+            end = float(end)
+        (intervals, truncated ) = self.db.stream_intervals(path, start, end)
+        return (intervals, truncated)

 class Exiter(object):
    """App that exits the server, for testing"""
--- a/runserver.py
+++ b/runserver.py
@ -9,7 +9,7 @@ server = nilmdb.Server(db, host = "127.0.0.1",
                       port = 12380,
                       embedded = False)

-if 0:
+if 1:
    server.start(blocking = True)
 else:
    try:
--- a/setup.cfg
+++ b/setup.cfg
@ -10,7 +10,7 @@ cover-erase=
 ##cover-branches=     # need nose 1.1.3 for this
 stop=
 verbosity=2
-tests=tests/test_cmdline.py
+#tests=tests/test_cmdline.py
 #tests=tests/test_layout.py
 #tests=tests/test_interval.py
 #tests=tests/test_client.py
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@ -298,7 +298,7 @@ class TestCmdline(object):
        server_stop()
        server_start()

-        # not an overlap if we specify a different start
+        # still an overlap if we specify a different start
        os.environ['TZ'] = "America/New_York"
        self.fail("insert --start '03/23/2012 06:05:00' /newton/prep "
                  "tests/data/prep-20120323T1004")
@ -318,8 +318,33 @@ class TestCmdline(object):
        self.fail("insert --start 'whatever' /newton/prep /dev/null")

    def test_cmdline_7_detail(self):
-        self.ok("list")
-        self.dump()
-
+        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
-        self.dump()
+        eq_(self.captured.count('\n'), 11)
+
+        self.ok("list --detail --path *prep")
+        eq_(self.captured.count('\n'), 7)
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'")
+        eq_(self.captured.count('\n'), 5)
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'")
+        eq_(self.captured.count('\n'), 3)
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'")
+        eq_(self.captured.count('\n'), 2)
+        self.contain("10:05:15.000")
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'")
+        eq_(self.captured.count('\n'), 2)
+        self.contain("10:05:15.500")
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'")
+        eq_(self.captured.count('\n'), 2)
+        self.contain("no intervals")
+
+        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'"
+                + " --end='23 Mar 2012 10:05:15.50'")
+        eq_(self.captured.count('\n'), 2)
+        self.contain("10:05:15.500")
+
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@ -38,6 +38,8 @@ def makeset(string):
 class TestInterval:
    def test_interval(self):
        # Test Interval class
+        os.environ['TZ'] = "America/New_York"
+        datetime_tz._localtz = None
        (d1, d2, d3) = [ datetime_tz.datetime_tz.smartparse(x).totimestamp()
                         for x in [ "03/24/2012", "03/25/2012", "03/26/2012" ] ]