row individually, when extracting data. Switch to using bisect module when doing the bisection, to lessen the chance of errors. Added syslog ability for timer module, for timing stuff deep inside the server. Make the chunked/non-chunked test just give a warning, rather than failing the tests, for debugging purposes. Alternate approach would be to disable "die on error" for the tests. git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10896 ddd99763-3ecb-0310-9145-efcb8ce7c51ftags/bxinterval-last
@@ -19,6 +19,7 @@ import time | |||
import sys | |||
import os | |||
import errno | |||
import bisect | |||
import pyximport | |||
pyximport.install() | |||
@@ -86,6 +87,13 @@ class StreamError(NilmDBError): | |||
class OverlapError(NilmDBError): | |||
pass | |||
# Helper that lets us pass a Pytables table into bisect | |||
class BisectableTable(object): | |||
def __init__(self, table): | |||
self.table = table | |||
def __getitem__(self, index): | |||
return self.table[index][0] | |||
class NilmDB(object): | |||
verbose = 0 | |||
@@ -379,24 +387,36 @@ class NilmDB(object): | |||
def _find_start(self, table, interval): | |||
""" | |||
Given a DBInterval, find the row in the database that | |||
corresponds to the start time. Here, we perform a binary | |||
search between 'db_startpos' and 'db_endpos' and return the | |||
first database position with a timestamp (first element) | |||
greater than or equal to 'start'. | |||
corresponds to the start time. Return the first database | |||
position with a timestamp (first element) greater than or | |||
equal to 'start'. | |||
""" | |||
# Optimization for the common case where an interval wasn't truncated | |||
if interval.start == interval.db_start: | |||
return interval.db_startpos | |||
lo = interval.db_startpos | |||
hi = interval.db_endpos - 1 | |||
x = interval.start | |||
while lo < hi: | |||
mid = (lo + hi) // 2 | |||
if table[mid][0] < x: | |||
lo = mid + 1 | |||
else: | |||
hi = mid | |||
return lo | |||
return bisect.bisect_left(BisectableTable(table), | |||
interval.start, | |||
interval.db_startpos, | |||
interval.db_endpos) | |||
def _find_end(self, table, interval): | |||
""" | |||
Given a DBInterval, find the row in the database that follows | |||
the end time. Return the first database position after the | |||
row with timestamp (first element) greater than or equal | |||
to 'end'. | |||
""" | |||
# Optimization for the common case where an interval wasn't truncated | |||
if interval.end == interval.db_end: | |||
return interval.db_endpos | |||
# Note that we still use bisect_left here, because we don't | |||
# want to include the given timestamp in the results. This is | |||
# so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return | |||
# non-overlapping data. | |||
return bisect.bisect_left(BisectableTable(table), | |||
interval.end, | |||
interval.db_startpos, | |||
interval.db_endpos) | |||
def stream_extract(self, path, start = None, end = None): | |||
""" | |||
@@ -417,25 +437,28 @@ class NilmDB(object): | |||
intervals = self._get_intervals(stream_id) | |||
requested = Interval(start or 0, end or 1e12) | |||
result = [] | |||
n = 0 | |||
remaining = self.max_results | |||
restart = 0 | |||
for interval in intervals.intersection(requested): | |||
# Find row corresponding to interval start | |||
row = self._find_start(table, interval) | |||
# Gather results until we hit the row limit or the | |||
# endpoint. | |||
while table[row][0] < end: | |||
result.append(table[row]) | |||
row += 1 | |||
if row >= interval.db_endpos: | |||
break | |||
n += 1 | |||
if n >= self.max_results: | |||
restart = table[row][0] | |||
break | |||
# If restart is set, stop now | |||
# Reading single rows from the table is too slow, so | |||
# we use two bisections to find both the starting and | |||
# ending row for this particular interval, then | |||
# read the entire range as one slice. | |||
row_start = self._find_start(table, interval) | |||
row_end = self._find_end(table, interval) | |||
# Shorten it if we'll hit the maximum number of results | |||
row_max = row_start + remaining | |||
if row_max < row_end: | |||
row_end = row_max | |||
restart = table[row_max][0] | |||
# Gather these results up | |||
result.extend(table[row_start:row_end]) | |||
# Count them | |||
remaining -= row_end - row_start | |||
if restart: | |||
break | |||
@@ -9,8 +9,13 @@ import contextlib | |||
import time | |||
@contextlib.contextmanager | |||
def Timer(name = None): | |||
def Timer(name = None, tosyslog = False): | |||
start = time.time() | |||
yield | |||
elapsed = int((time.time() - start) * 1000) | |||
print (name or 'elapsed') + ": " + str(elapsed) + " ms" | |||
msg = (name or 'elapsed') + ": " + str(elapsed) + " ms" | |||
if tosyslog: # pragma: no cover | |||
import syslog | |||
syslog.syslog(msg) | |||
else: | |||
print msg |
@@ -14,6 +14,7 @@ import threading | |||
import cStringIO | |||
import simplejson as json | |||
import unittest | |||
import warnings | |||
from test_helpers import * | |||
@@ -170,7 +171,14 @@ class TestClient(object): | |||
in_("400 Bad Request", str(e.exception)) | |||
in_("OverlapError", str(e.exception)) | |||
def test_client_4_generators(self): | |||
def test_client_4_extract(self): | |||
# Misc tests for extract. Most of them are in test_cmdline. | |||
client = nilmdb.Client(url = "http://localhost:12380/") | |||
for x in client.stream_extract("/newton/prep", 123, 123): | |||
raise Exception("shouldn't be any data for this request") | |||
def test_client_5_generators(self): | |||
# A lot of the client functionality is already tested by test_cmdline, | |||
# but this gets a bit more coverage that cmdline misses. | |||
client = nilmdb.Client(url = "http://localhost:12380/") | |||
@@ -218,8 +226,7 @@ class TestClient(object): | |||
in_("404 Not Found", str(e.exception)) | |||
in_("No such stream", str(e.exception)) | |||
#@unittest.skip("while debugging") | |||
def test_client_5_chunked(self): | |||
def test_client_6_chunked(self): | |||
# Make sure that /stream/intervals and /stream/extract | |||
# properly return streaming, chunked response. Pokes around | |||
# in client.http internals a bit to look at the response | |||
@@ -227,13 +234,17 @@ class TestClient(object): | |||
client = nilmdb.Client(url = "http://localhost:12380/") | |||
# Use a warning rather than returning a test failure, so that we can | |||
# still disable chunked responses for debugging. | |||
x = client.http.get("stream/intervals", { "path": "/newton/prep" }, | |||
retjson=False) | |||
eq_(x.count('\n'), 2) | |||
in_("transfer-encoding: chunked", client.http._headers.lower()) | |||
if "transfer-encoding: chunked" not in client.http._headers.lower(): | |||
warnings.warn("Non-chunked HTTP response for /stream/intervals") | |||
x = client.http.get("stream/extract", | |||
{ "path": "/newton/prep", | |||
"start": "123", | |||
"end": "123" }, retjson=False) | |||
in_("transfer-encoding: chunked", client.http._headers.lower()) | |||
if "transfer-encoding: chunked" not in client.http._headers.lower(): | |||
warnings.warn("Non-chunked HTTP response for /stream/extract") |
@@ -378,11 +378,15 @@ class TestCmdline(object): | |||
self.fail("extract /no/such/foo --start 2000-01-01 --end 2020-01-01") | |||
self.contain("Error getting stream info") | |||
# empty range | |||
# empty ranges | |||
self.fail("extract -a /newton/prep " + | |||
"--start '23 Mar 2012 10:00:30' " + | |||
"--end '23 Mar 2012 10:00:30'", exitcode = 2) | |||
self.contain("no data") | |||
self.fail("extract -a /newton/prep " + | |||
"--start '23 Mar 2012 10:00:30.000001' " + | |||
"--end '23 Mar 2012 10:00:30.000001'", exitcode = 2) | |||
self.contain("no data") | |||
# Check various dumps against stored copies of how they should appear | |||
def test(file, start, end, extra=""): | |||
@@ -406,8 +410,9 @@ class TestCmdline(object): | |||
"--end '23 Mar 2112 10:00:30'", exitcode = 2) | |||
self.contain("no data") | |||
# all data put in by tests | |||
self.ok("extract -a /newton/prep --start 2000-01-01 --end 2020-01-01") | |||
print self.captured.count('\n') | |||
eq_(self.captured.count('\n'), 43204) | |||
def test_cmdline_9_truncated(self): | |||
# Test truncated responses by overriding the nilmdb max_results | |||