Can query intervals now.

On the big database, the server takes a few seconds to figure out the
interval intersections.  Need to think about how to improve that --
the real key might be to start reducing the number of intervals we're
storing by combining them, potentially as they're inserted.


git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10838 ddd99763-3ecb-0310-9145-efcb8ce7c51f
This commit is contained in:
Jim Paris 2012-05-07 22:32:02 +00:00
parent 36045fe53b
commit 4501da6edc
9 changed files with 90 additions and 25 deletions

View File

@ -223,4 +223,7 @@ class Client(object):
params["start"] = start
if end is not None:
params["end"] = end
return self.curl.getjson("stream/intervals", params)
(intervals, truncated) = self.curl.getjson("stream/intervals", params)
# If truncated is true, there are more intervals after the last
# one listed.
return (intervals, truncated)

View File

@ -220,20 +220,19 @@ class Cmdline(object):
continue
printf("%s %s\n", path, layout)
if self.args.detail:
intervals = self.client.stream_intervals(
path, self.args.start, self.args.end)
for (start, end) in intervals:
printf(" [ %s -> %s ]\n",
self.time_string(start), self.time_string(end))
if not self.args.detail:
continue
def cmd_detail(self):
"""Detail the ranges available in a particular stream"""
streams = self.client.stream_list()
for (path, layout) in streams:
if (fnmatch.fnmatch(path, self.args.path) and
fnmatch.fnmatch(layout, self.args.layout)):
printf("%s %s\n", path, layout)
(intervals, truncated) = self.client.stream_intervals(
path, self.args.start, self.args.end)
if not intervals:
printf(" (no intervals)\n")
continue
for (start, end) in intervals:
printf(" [ %s -> %s ]\n",
self.time_string(start), self.time_string(end))
if truncated: # pragma: no cover (hard to test)
printf(" (... truncated, more intervals follow)\n")
def cmd_create(self):
"""Create new stream"""
@ -343,7 +342,7 @@ class Cmdline(object):
def arg_time(self, toparse):
"""Parse a time string argument"""
try:
return self.parse_time(toparse)
return self.parse_time(toparse).totimestamp()
except ValueError as e:
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
str(e), toparse))

View File

@ -26,7 +26,7 @@ class Interval(bxintersect.Interval):
'start' and 'end' are arbitrary floats that represent time
"""
if start > end:
raise IntervalError("start must precede end")
raise IntervalError("start %s must precede end %s" % (start, end))
bxintersect.Interval.__init__(self, start, end)
def __repr__(self):

View File

@ -22,7 +22,7 @@ import os
import errno
# Note about performance and transactions:
#
#
# Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
# takes about 125msec. sqlite3 will commit transactions at 3 times:
# 1: explicit con.commit()
@ -215,6 +215,35 @@ class NilmDB(object):
return sorted(list(x) for x in result)
def stream_intervals(self, path, start = None, end = None):
"""
Returns (intervals, truncated) tuple.
intervals is a list of [start,end] timestamps of all intervals
that exist for path, between start and end.
truncated, if True, means that there were too many results to
return in a single request. The data is complete from the
starting timestamp to the point at which it was truncated.
"""
# About 35 bytes per interval in the JSON output.
# Let's limit to 10k intervals per response = 350k
MAX_RESULTS = 10000
stream_id = self._stream_id(path)
intervals = self._get_intervals(stream_id)
requested = Interval(start or 0, end or 1e12)
result = []
for n, i in enumerate(intervals & requested):
if n >= MAX_RESULTS: # pragma: no cover (hard to test)
truncated = True
break
result.append([i.start, i.end])
else:
truncated = False
return (result, truncated)
def stream_create(self, path, layout_name):
"""Create a new table in the database.

View File

@ -180,7 +180,14 @@ class Stream(NilmApp):
@cherrypy.expose
@cherrypy.tools.json_out()
def intervals(self, path, start = None, end = None):
return [[123, 456],[123, 456]]
# If truncated is true, there are more intervals after the last
# one listed.
if start is not None:
start = float(start)
if end is not None:
end = float(end)
(intervals, truncated ) = self.db.stream_intervals(path, start, end)
return (intervals, truncated)
class Exiter(object):
"""App that exits the server, for testing"""

View File

@ -9,7 +9,7 @@ server = nilmdb.Server(db, host = "127.0.0.1",
port = 12380,
embedded = False)
if 0:
if 1:
server.start(blocking = True)
else:
try:

View File

@ -10,7 +10,7 @@ cover-erase=
##cover-branches= # need nose 1.1.3 for this
stop=
verbosity=2
tests=tests/test_cmdline.py
#tests=tests/test_cmdline.py
#tests=tests/test_layout.py
#tests=tests/test_interval.py
#tests=tests/test_client.py

View File

@ -298,7 +298,7 @@ class TestCmdline(object):
server_stop()
server_start()
# not an overlap if we specify a different start
# still an overlap if we specify a different start
os.environ['TZ'] = "America/New_York"
self.fail("insert --start '03/23/2012 06:05:00' /newton/prep "
"tests/data/prep-20120323T1004")
@ -318,8 +318,33 @@ class TestCmdline(object):
self.fail("insert --start 'whatever' /newton/prep /dev/null")
def test_cmdline_7_detail(self):
self.ok("list")
self.dump()
# Just count the number of lines, it's probably fine
self.ok("list --detail")
self.dump()
eq_(self.captured.count('\n'), 11)
self.ok("list --detail --path *prep")
eq_(self.captured.count('\n'), 7)
self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'")
eq_(self.captured.count('\n'), 5)
self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'")
eq_(self.captured.count('\n'), 3)
self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.000")
self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.500")
self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("no intervals")
self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'"
+ " --end='23 Mar 2012 10:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.500")

View File

@ -38,6 +38,8 @@ def makeset(string):
class TestInterval:
def test_interval(self):
# Test Interval class
os.environ['TZ'] = "America/New_York"
datetime_tz._localtz = None
(d1, d2, d3) = [ datetime_tz.datetime_tz.smartparse(x).totimestamp()
for x in [ "03/24/2012", "03/25/2012", "03/26/2012" ] ]