Browse Source

Can query intervals now.

On the big database, the server takes a few seconds to figure out the
interval intersections.  Need to think about how to improve that --
the real key might be to start reducing the number of intervals we're
storing by combining them, potentially as they're inserted.


git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10838 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
4501da6edc
9 changed files with 91 additions and 26 deletions
  1. +4
    -1
      nilmdb/client.py
  2. +14
    -15
      nilmdb/cmdline.py
  3. +1
    -1
      nilmdb/interval.py
  4. +30
    -1
      nilmdb/nilmdb.py
  5. +8
    -1
      nilmdb/server.py
  6. +1
    -1
      runserver.py
  7. +1
    -1
      setup.cfg
  8. +30
    -5
      tests/test_cmdline.py
  9. +2
    -0
      tests/test_interval.py

+ 4
- 1
nilmdb/client.py View File

@@ -223,4 +223,7 @@ class Client(object):
params["start"] = start
if end is not None:
params["end"] = end
return self.curl.getjson("stream/intervals", params)
(intervals, truncated) = self.curl.getjson("stream/intervals", params)
# If truncated is true, there are more intervals after the last
# one listed.
return (intervals, truncated)

+ 14
- 15
nilmdb/cmdline.py View File

@@ -220,20 +220,19 @@ class Cmdline(object):
continue

printf("%s %s\n", path, layout)
if self.args.detail:
intervals = self.client.stream_intervals(
path, self.args.start, self.args.end)
for (start, end) in intervals:
printf(" [ %s -> %s ]\n",
self.time_string(start), self.time_string(end))

def cmd_detail(self):
"""Detail the ranges available in a particular stream"""
streams = self.client.stream_list()
for (path, layout) in streams:
if (fnmatch.fnmatch(path, self.args.path) and
fnmatch.fnmatch(layout, self.args.layout)):
printf("%s %s\n", path, layout)
if not self.args.detail:
continue

(intervals, truncated) = self.client.stream_intervals(
path, self.args.start, self.args.end)
if not intervals:
printf(" (no intervals)\n")
continue
for (start, end) in intervals:
printf(" [ %s -> %s ]\n",
self.time_string(start), self.time_string(end))
if truncated: # pragma: no cover (hard to test)
printf(" (... truncated, more intervals follow)\n")

def cmd_create(self):
"""Create new stream"""
@@ -343,7 +342,7 @@ class Cmdline(object):
def arg_time(self, toparse):
"""Parse a time string argument"""
try:
return self.parse_time(toparse)
return self.parse_time(toparse).totimestamp()
except ValueError as e:
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
str(e), toparse))


+ 1
- 1
nilmdb/interval.py View File

@@ -26,7 +26,7 @@ class Interval(bxintersect.Interval):
'start' and 'end' are arbitrary floats that represent time
"""
if start > end:
raise IntervalError("start must precede end")
raise IntervalError("start %s must precede end %s" % (start, end))
bxintersect.Interval.__init__(self, start, end)

def __repr__(self):


+ 30
- 1
nilmdb/nilmdb.py View File

@@ -22,7 +22,7 @@ import os
import errno

# Note about performance and transactions:
#
#
# Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
# takes about 125msec. sqlite3 will commit transactions at 3 times:
# 1: explicit con.commit()
@@ -215,6 +215,35 @@ class NilmDB(object):

return sorted(list(x) for x in result)

def stream_intervals(self, path, start = None, end = None):
"""
Returns (intervals, truncated) tuple.

intervals is a list of [start,end] timestamps of all intervals
that exist for path, between start and end.

truncated, if True, means that there were too many results to
return in a single request. The data is complete from the
starting timestamp to the point at which it was truncated.
"""

# About 35 bytes per interval in the JSON output.
# Let's limit to 10k intervals per response = 350k
MAX_RESULTS = 10000

stream_id = self._stream_id(path)
intervals = self._get_intervals(stream_id)
requested = Interval(start or 0, end or 1e12)
result = []
for n, i in enumerate(intervals & requested):
if n >= MAX_RESULTS: # pragma: no cover (hard to test)
truncated = True
break
result.append([i.start, i.end])
else:
truncated = False
return (result, truncated)

def stream_create(self, path, layout_name):
"""Create a new table in the database.



+ 8
- 1
nilmdb/server.py View File

@@ -180,7 +180,14 @@ class Stream(NilmApp):
@cherrypy.expose
@cherrypy.tools.json_out()
def intervals(self, path, start = None, end = None):
return [[123, 456],[123, 456]]
# If truncated is true, there are more intervals after the last
# one listed.
if start is not None:
start = float(start)
if end is not None:
end = float(end)
(intervals, truncated ) = self.db.stream_intervals(path, start, end)
return (intervals, truncated)

class Exiter(object):
"""App that exits the server, for testing"""


+ 1
- 1
runserver.py View File

@@ -9,7 +9,7 @@ server = nilmdb.Server(db, host = "127.0.0.1",
port = 12380,
embedded = False)

if 0:
if 1:
server.start(blocking = True)
else:
try:


+ 1
- 1
setup.cfg View File

@@ -10,7 +10,7 @@ cover-erase=
##cover-branches= # need nose 1.1.3 for this
stop=
verbosity=2
tests=tests/test_cmdline.py
#tests=tests/test_cmdline.py
#tests=tests/test_layout.py
#tests=tests/test_interval.py
#tests=tests/test_client.py


+ 30
- 5
tests/test_cmdline.py View File

@@ -298,7 +298,7 @@ class TestCmdline(object):
server_stop()
server_start()

# not an overlap if we specify a different start
# still an overlap if we specify a different start
os.environ['TZ'] = "America/New_York"
self.fail("insert --start '03/23/2012 06:05:00' /newton/prep "
"tests/data/prep-20120323T1004")
@@ -318,8 +318,33 @@ class TestCmdline(object):
self.fail("insert --start 'whatever' /newton/prep /dev/null")

def test_cmdline_7_detail(self):
self.ok("list")
self.dump()

# Just count the number of lines, it's probably fine
self.ok("list --detail")
self.dump()
eq_(self.captured.count('\n'), 11)

self.ok("list --detail --path *prep")
eq_(self.captured.count('\n'), 7)

self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'")
eq_(self.captured.count('\n'), 5)

self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'")
eq_(self.captured.count('\n'), 3)

self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.000")

self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.500")

self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("no intervals")

self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'"
+ " --end='23 Mar 2012 10:05:15.50'")
eq_(self.captured.count('\n'), 2)
self.contain("10:05:15.500")


+ 2
- 0
tests/test_interval.py View File

@@ -38,6 +38,8 @@ def makeset(string):
class TestInterval:
def test_interval(self):
# Test Interval class
os.environ['TZ'] = "America/New_York"
datetime_tz._localtz = None
(d1, d2, d3) = [ datetime_tz.datetime_tz.smartparse(x).totimestamp()
for x in [ "03/24/2012", "03/25/2012", "03/26/2012" ] ]



Loading…
Cancel
Save