@@ -178,7 +178,7 @@ class Client(object): | |||||
return self.http.get_gen("stream/intervals", params) | return self.http.get_gen("stream/intervals", params) | ||||
def stream_extract(self, path, start = None, end = None, | def stream_extract(self, path, start = None, end = None, | ||||
count = False, markup = False): | |||||
count = False, markup = False, binary = False): | |||||
""" | """ | ||||
Extract data from a stream. Returns a generator that yields | Extract data from a stream. Returns a generator that yields | ||||
lines of ASCII-formatted data that matches the database | lines of ASCII-formatted data that matches the database | ||||
@@ -189,6 +189,11 @@ class Client(object): | |||||
Specify markup = True to include comments in the returned data | Specify markup = True to include comments in the returned data | ||||
that indicate interval starts and ends. | that indicate interval starts and ends. | ||||
Specify binary = True to return chunks of raw binary data, | |||||
rather than lines of ASCII-formatted data. Raw binary data | |||||
is always little-endian and matches the database types | |||||
(including a uint64 timestamp). | |||||
""" | """ | ||||
params = { | params = { | ||||
"path": path, | "path": path, | ||||
@@ -201,7 +206,9 @@ class Client(object): | |||||
params["count"] = 1 | params["count"] = 1 | ||||
if markup: | if markup: | ||||
params["markup"] = 1 | params["markup"] = 1 | ||||
return self.http.get_gen("stream/extract", params) | |||||
if binary: | |||||
params["binary"] = 1 | |||||
return self.http.get_gen("stream/extract", params, binary = binary) | |||||
def stream_count(self, path, start = None, end = None): | def stream_count(self, path, start = None, end = None): | ||||
""" | """ | ||||
@@ -110,7 +110,8 @@ class HTTPClient(object): | |||||
return self._req("PUT", url, params, data) | return self._req("PUT", url, params, data) | ||||
# Generator versions that return data one line at a time. | # Generator versions that return data one line at a time. | ||||
def _req_gen(self, method, url, query = None, body = None, headers = None): | |||||
def _req_gen(self, method, url, query = None, body = None, | |||||
headers = None, binary = False): | |||||
""" | """ | ||||
Make a request and return a generator that gives back strings | Make a request and return a generator that gives back strings | ||||
or JSON decoded lines of the body data, or raise an error if | or JSON decoded lines of the body data, or raise an error if | ||||
@@ -118,16 +119,19 @@ class HTTPClient(object): | |||||
""" | """ | ||||
(response, isjson) = self._do_req(method, url, query, body, | (response, isjson) = self._do_req(method, url, query, body, | ||||
stream = True, headers = headers) | stream = True, headers = headers) | ||||
if isjson: | |||||
if binary: | |||||
for chunk in response.iter_content(chunk_size = 65536): | |||||
yield chunk | |||||
elif isjson: | |||||
for line in response.iter_lines(): | for line in response.iter_lines(): | ||||
yield json.loads(line) | yield json.loads(line) | ||||
else: | else: | ||||
for line in response.iter_lines(): | for line in response.iter_lines(): | ||||
yield line | yield line | ||||
def get_gen(self, url, params = None): | |||||
def get_gen(self, url, params = None, binary = False): | |||||
"""Simple GET (parameters in URL) returning a generator""" | """Simple GET (parameters in URL) returning a generator""" | ||||
return self._req_gen("GET", url, params) | |||||
return self._req_gen("GET", url, params, binary = binary) | |||||
# Not much use for a POST or PUT generator, since they don't | # Not much use for a POST or PUT generator, since they don't | ||||
# return much data. | # return much data. |
@@ -0,0 +1,77 @@ | |||||
# -*- coding: utf-8 -*- | |||||
"""Provide a NumpyClient class that is based on normal Client, but has | |||||
additional methods for extracting and inserting data via Numpy arrays.""" | |||||
import nilmdb.utils | |||||
import nilmdb.client.client | |||||
import nilmdb.client.httpclient | |||||
from nilmdb.client.errors import ClientError | |||||
import contextlib | |||||
from nilmdb.utils.time import timestamp_to_string, string_to_timestamp | |||||
import numpy | |||||
import cStringIO | |||||
def layout_to_dtype(layout): | |||||
ltype = layout.split('_')[0] | |||||
lcount = int(layout.split('_')[1]) | |||||
if ltype.startswith('int'): | |||||
atype = '<i' + str(int(ltype[3:]) / 8) | |||||
elif ltype.startswith('uint'): | |||||
atype = '<u' + str(int(ltype[4:]) / 8) | |||||
elif ltype.startswith('float'): | |||||
atype = '<f' + str(int(ltype[5:]) / 8) | |||||
else: | |||||
raise ValueError("bad layout") | |||||
return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)]) | |||||
class NumpyClient(nilmdb.client.client.Client): | |||||
"""Subclass of nilmdb.client.Client that adds additional methods for | |||||
extracting and inserting data via Numpy arrays.""" | |||||
def stream_extract_numpy(self, path, start = None, end = None, | |||||
layout = None, maxrows = 100000, | |||||
structured = False): | |||||
""" | |||||
Extract data from a stream. Returns a generator that yields | |||||
Numpy arrays of up to 'maxrows' of data each. | |||||
If 'layout' is None, it is read using stream_info. | |||||
If 'structured' is False, all data is converted to float64 | |||||
and returned in a flat 2D array. Otherwise, data is returned | |||||
as a structured dtype in a 1D array. | |||||
""" | |||||
if layout is None: | |||||
streams = self.stream_list(path) | |||||
if len(streams) != 1: | |||||
raise ClientError("can't get layout for path: " + path) | |||||
layout = streams[0][1] | |||||
dtype = layout_to_dtype(layout) | |||||
def to_numpy(data): | |||||
a = numpy.fromstring(data, dtype) | |||||
if structured: | |||||
return a | |||||
return numpy.c_[a['timestamp'], a['data']] | |||||
chunks = [] | |||||
total_len = 0 | |||||
maxsize = dtype.itemsize * maxrows | |||||
for data in self.stream_extract(path, start, end, binary = True): | |||||
# Add this block of binary data | |||||
chunks.append(data) | |||||
total_len += len(data) | |||||
# See if we have enough to make the requested Numpy array | |||||
while total_len >= maxsize: | |||||
assembled = "".join(chunks) | |||||
total_len -= maxsize | |||||
chunks = [ assembled[maxsize:] ] | |||||
block = assembled[:maxsize] | |||||
yield to_numpy(block) | |||||
if total_len: | |||||
yield to_numpy("".join(chunks)) |
@@ -479,7 +479,7 @@ class Table(object): | |||||
# Success, so update self.nrows accordingly | # Success, so update self.nrows accordingly | ||||
self.nrows = tot_rows | self.nrows = tot_rows | ||||
def get_data(self, start, stop): | |||||
def get_data(self, start, stop, binary = False): | |||||
"""Extract data corresponding to Python range [n:m], | """Extract data corresponding to Python range [n:m], | ||||
and returns a formatted string""" | and returns a formatted string""" | ||||
if (start is None or | if (start is None or | ||||
@@ -497,10 +497,13 @@ class Table(object): | |||||
if count > remaining: | if count > remaining: | ||||
count = remaining | count = remaining | ||||
f = self.file_open(subdir, filename) | f = self.file_open(subdir, filename) | ||||
ret.append(f.extract_string(offset, count)) | |||||
if binary: | |||||
ret.append(f.extract_binary(offset, count)) | |||||
else: | |||||
ret.append(f.extract_string(offset, count)) | |||||
remaining -= count | remaining -= count | ||||
row += count | row += count | ||||
return "".join(ret) | |||||
return b"".join(ret) | |||||
def __getitem__(self, row): | def __getitem__(self, row): | ||||
"""Extract timestamps from a row, with table[n] notation.""" | """Extract timestamps from a row, with table[n] notation.""" | ||||
@@ -538,7 +538,7 @@ class NilmDB(object): | |||||
dbinterval.db_endpos) | dbinterval.db_endpos) | ||||
def stream_extract(self, path, start = None, end = None, | def stream_extract(self, path, start = None, end = None, | ||||
count = False, markup = False): | |||||
count = False, markup = False, binary = False): | |||||
""" | """ | ||||
Returns (data, restart) tuple. | Returns (data, restart) tuple. | ||||
@@ -559,6 +559,9 @@ class NilmDB(object): | |||||
'markup', if true, indicates that returned data should be | 'markup', if true, indicates that returned data should be | ||||
marked with a comment denoting when a particular interval | marked with a comment denoting when a particular interval | ||||
starts, and another comment when an interval ends. | starts, and another comment when an interval ends. | ||||
'binary', if true, means to return raw binary rather than | |||||
ASCII-formatted data. | |||||
""" | """ | ||||
stream_id = self._stream_id(path) | stream_id = self._stream_id(path) | ||||
table = self.data.getnode(path) | table = self.data.getnode(path) | ||||
@@ -569,6 +572,8 @@ class NilmDB(object): | |||||
matched = 0 | matched = 0 | ||||
remaining = self.max_results | remaining = self.max_results | ||||
restart = None | restart = None | ||||
if binary and (markup or count): | |||||
raise NilmDBError("binary mode can't be used with markup or count") | |||||
for interval in intervals.intersection(requested): | for interval in intervals.intersection(requested): | ||||
# Reading single rows from the table is too slow, so | # Reading single rows from the table is too slow, so | ||||
# we use two bisections to find both the starting and | # we use two bisections to find both the starting and | ||||
@@ -593,7 +598,7 @@ class NilmDB(object): | |||||
timestamp_to_string(interval.start) + "\n") | timestamp_to_string(interval.start) + "\n") | ||||
# Gather these results up | # Gather these results up | ||||
result.append(table.get_data(row_start, row_end)) | |||||
result.append(table.get_data(row_start, row_end, binary)) | |||||
# Count them | # Count them | ||||
remaining -= row_end - row_start | remaining -= row_end - row_start | ||||
@@ -527,6 +527,46 @@ err: | |||||
return NULL; | return NULL; | ||||
} | } | ||||
/**** | |||||
* Extract to binary string containing raw little-endian binary data | |||||
*/ | |||||
static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args) | |||||
{ | |||||
long count; | |||||
long offset; | |||||
if (!PyArg_ParseTuple(args, "ll", &offset, &count)) | |||||
return NULL; | |||||
if (!self->file) { | |||||
PyErr_SetString(PyExc_Exception, "no file"); | |||||
return NULL; | |||||
} | |||||
/* Seek to target location */ | |||||
if (fseek(self->file, offset, SEEK_SET) < 0) { | |||||
PyErr_SetFromErrno(PyExc_OSError); | |||||
return NULL; | |||||
} | |||||
uint8_t *str; | |||||
int len = count * self->binary_size; | |||||
str = malloc(len); | |||||
if (str == NULL) { | |||||
PyErr_SetFromErrno(PyExc_OSError); | |||||
return NULL; | |||||
} | |||||
/* Data in the file is already in the desired little-endian | |||||
binary format, so just read it directly. */ | |||||
if (fread(str, self->binary_size, count, self->file) != count) { | |||||
free(str); | |||||
PyErr_SetFromErrno(PyExc_OSError); | |||||
return NULL; | |||||
} | |||||
PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len); | |||||
free(str); | |||||
return pystr; | |||||
} | |||||
/**** | /**** | ||||
* Extract timestamp | * Extract timestamp | ||||
@@ -600,6 +640,12 @@ static PyMethodDef Rocket_methods[] = { | |||||
"Extract count rows of data from the file at offset offset.\n" | "Extract count rows of data from the file at offset offset.\n" | ||||
"Return an ascii formatted string according to the layout" }, | "Return an ascii formatted string according to the layout" }, | ||||
{ "extract_binary", | |||||
(PyCFunction)Rocket_extract_binary, METH_VARARGS, | |||||
"extract_binary(self, offset, count)\n\n" | |||||
"Extract count rows of data from the file at offset offset.\n" | |||||
"Return a raw binary string of data matching the data layout." }, | |||||
{ "extract_timestamp", | { "extract_timestamp", | ||||
(PyCFunction)Rocket_extract_timestamp, METH_VARARGS, | (PyCFunction)Rocket_extract_timestamp, METH_VARARGS, | ||||
"extract_timestamp(self, offset)\n\n" | "extract_timestamp(self, offset)\n\n" | ||||
@@ -400,7 +400,7 @@ class Stream(NilmApp): | |||||
@chunked_response | @chunked_response | ||||
@response_type("text/plain") | @response_type("text/plain") | ||||
def extract(self, path, start = None, end = None, | def extract(self, path, start = None, end = None, | ||||
count = False, markup = False): | |||||
count = False, markup = False, binary = False): | |||||
""" | """ | ||||
Extract data from backend database. Streams the resulting | Extract data from backend database. Streams the resulting | ||||
entries as ASCII text lines separated by newlines. This may | entries as ASCII text lines separated by newlines. This may | ||||
@@ -411,6 +411,11 @@ class Stream(NilmApp): | |||||
If 'markup' is True, adds comments to the stream denoting each | If 'markup' is True, adds comments to the stream denoting each | ||||
interval's start and end timestamp. | interval's start and end timestamp. | ||||
If 'binary' is True, return raw binary data, rather than lines | |||||
of ASCII-formatted data. Raw binary data is always | |||||
little-endian and matches the database types (including a | |||||
uint64 timestamp). | |||||
""" | """ | ||||
(start, end) = self._get_times(start, end) | (start, end) = self._get_times(start, end) | ||||
@@ -418,6 +423,13 @@ class Stream(NilmApp): | |||||
if len(self.db.stream_list(path = path)) != 1: | if len(self.db.stream_list(path = path)) != 1: | ||||
raise cherrypy.HTTPError("404", "No such stream: " + path) | raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||
if binary: | |||||
cherrypy.response.headers['Content-Type'] = ( | |||||
"application/octet-stream") | |||||
if markup or count: | |||||
raise cherrypy.HTTPError("400", "can't mix binary and " | |||||
"markup or count modes") | |||||
@workaround_cp_bug_1200 | @workaround_cp_bug_1200 | ||||
def content(start, end): | def content(start, end): | ||||
# Note: disable chunked responses to see tracebacks from here. | # Note: disable chunked responses to see tracebacks from here. | ||||
@@ -429,7 +441,8 @@ class Stream(NilmApp): | |||||
while True: | while True: | ||||
(data, restart) = self.db.stream_extract( | (data, restart) = self.db.stream_extract( | ||||
path, start, end, count = False, markup = markup) | |||||
path, start, end, count = False, | |||||
markup = markup, binary = binary) | |||||
yield data | yield data | ||||
if restart is None: | if restart is None: | ||||
@@ -107,6 +107,7 @@ setup(name='nilmdb', | |||||
author_email = 'jim@jtan.com', | author_email = 'jim@jtan.com', | ||||
tests_require = [ 'nose', | tests_require = [ 'nose', | ||||
'coverage', | 'coverage', | ||||
'numpy', | |||||
], | ], | ||||
setup_requires = [ 'distribute', | setup_requires = [ 'distribute', | ||||
], | ], | ||||
@@ -12,6 +12,7 @@ test_interval.py | |||||
test_bulkdata.py | test_bulkdata.py | ||||
test_nilmdb.py | test_nilmdb.py | ||||
test_client.py | test_client.py | ||||
test_numpyclient.py | |||||
test_cmdline.py | test_cmdline.py | ||||
test_*.py | test_*.py |
@@ -23,6 +23,7 @@ import warnings | |||||
import resource | import resource | ||||
import time | import time | ||||
import re | import re | ||||
import struct | |||||
from testutil.helpers import * | from testutil.helpers import * | ||||
@@ -293,6 +294,23 @@ class TestClient(object): | |||||
# Test count | # Test count | ||||
eq_(client.stream_count("/newton/prep"), 14400) | eq_(client.stream_count("/newton/prep"), 14400) | ||||
# Test binary output | |||||
with assert_raises(ClientError) as e: | |||||
list(client.stream_extract("/newton/prep", | |||||
markup = True, binary = True)) | |||||
with assert_raises(ClientError) as e: | |||||
list(client.stream_extract("/newton/prep", | |||||
count = True, binary = True)) | |||||
data = "".join(client.stream_extract("/newton/prep", binary = True)) | |||||
# Quick check using struct | |||||
unpacker = struct.Struct("<qffffffff") | |||||
out = [] | |||||
for i in range(14400): | |||||
out.append(unpacker.unpack_from(data, i * unpacker.size)) | |||||
eq_(out[0], (1332511200000000, 266568.0, 224029.0, 5161.39990234375, | |||||
2525.169921875, 8350.83984375, 3724.699951171875, | |||||
1355.3399658203125, 2039.0)) | |||||
client.close() | client.close() | ||||
def test_client_06_generators(self): | def test_client_06_generators(self): | ||||
@@ -90,13 +90,16 @@ class Test00Nilmdb(object): # named 00 so it runs first | |||||
eq_(db.stream_get_metadata("/newton/prep"), meta1) | eq_(db.stream_get_metadata("/newton/prep"), meta1) | ||||
eq_(db.stream_get_metadata("/newton/raw"), meta1) | eq_(db.stream_get_metadata("/newton/raw"), meta1) | ||||
# fill in some test coverage for start >= end | |||||
# fill in some misc. test coverage | |||||
with assert_raises(nilmdb.server.NilmDBError): | with assert_raises(nilmdb.server.NilmDBError): | ||||
db.stream_remove("/newton/prep", 0, 0) | db.stream_remove("/newton/prep", 0, 0) | ||||
with assert_raises(nilmdb.server.NilmDBError): | with assert_raises(nilmdb.server.NilmDBError): | ||||
db.stream_remove("/newton/prep", 1, 0) | db.stream_remove("/newton/prep", 1, 0) | ||||
db.stream_remove("/newton/prep", 0, 1) | db.stream_remove("/newton/prep", 0, 1) | ||||
with assert_raises(nilmdb.server.NilmDBError): | |||||
db.stream_extract("/newton/prep", count = True, binary = True) | |||||
db.close() | db.close() | ||||
class TestBlockingServer(object): | class TestBlockingServer(object): | ||||
@@ -0,0 +1,108 @@ | |||||
# -*- coding: utf-8 -*- | |||||
import nilmdb.server | |||||
import nilmdb.client | |||||
import nilmdb.client.numpyclient | |||||
from nilmdb.utils.printf import * | |||||
from nilmdb.utils import timestamper | |||||
from nilmdb.client import ClientError, ServerError | |||||
from nilmdb.utils import datetime_tz | |||||
from nose.plugins.skip import SkipTest | |||||
from nose.tools import * | |||||
from nose.tools import assert_raises | |||||
import itertools | |||||
import distutils.version | |||||
from testutil.helpers import * | |||||
import numpy as np | |||||
testdb = "tests/numpyclient-testdb" | |||||
testurl = "http://localhost:32180/" | |||||
def setup_module(): | |||||
global test_server, test_db | |||||
# Clear out DB | |||||
recursive_unlink(testdb) | |||||
# Start web app on a custom port | |||||
test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(testdb) | |||||
test_server = nilmdb.server.Server(test_db, host = "127.0.0.1", | |||||
port = 32180, stoppable = False, | |||||
fast_shutdown = True, | |||||
force_traceback = True) | |||||
test_server.start(blocking = False) | |||||
def teardown_module(): | |||||
global test_server, test_db | |||||
# Close web app | |||||
test_server.stop() | |||||
test_db.close() | |||||
class TestNumpyClient(object): | |||||
def test_numpyclient_01_basic(self): | |||||
# Test basic connection | |||||
client = nilmdb.client.numpyclient.NumpyClient(url = testurl) | |||||
version = client.version() | |||||
eq_(distutils.version.LooseVersion(version), | |||||
distutils.version.LooseVersion(test_server.version)) | |||||
# Verify subclassing | |||||
assert(isinstance(client, nilmdb.client.Client)) | |||||
# Layouts | |||||
for layout in "int8_t", "something_8", "integer_1": | |||||
with assert_raises(ValueError): | |||||
for x in client.stream_extract_numpy("/foo", layout=layout): | |||||
pass | |||||
for layout in "int8_1", "uint8_30", "int16_20", "float64_100": | |||||
with assert_raises(ClientError) as e: | |||||
for x in client.stream_extract_numpy("/foo", layout=layout): | |||||
pass | |||||
in_("No such stream", str(e.exception)) | |||||
with assert_raises(ClientError) as e: | |||||
for x in client.stream_extract_numpy("/foo"): | |||||
pass | |||||
in_("can't get layout for path", str(e.exception)) | |||||
client.close() | |||||
def test_numpyclient_02_extract(self): | |||||
client = nilmdb.client.numpyclient.NumpyClient(url = testurl) | |||||
# Insert some data as text | |||||
client.stream_create("/newton/prep", "float32_8") | |||||
testfile = "tests/data/prep-20120323T1000" | |||||
start = nilmdb.utils.time.parse_time("20120323T1000") | |||||
rate = 120 | |||||
data = timestamper.TimestamperRate(testfile, start, rate) | |||||
result = client.stream_insert("/newton/prep", data, | |||||
start, start + 119999777) | |||||
# Extract Numpy arrays | |||||
array = None | |||||
pieces = 0 | |||||
for chunk in client.stream_extract_numpy("/newton/prep", maxrows=1000): | |||||
pieces += 1 | |||||
if array is not None: | |||||
array = np.vstack((array, chunk)) | |||||
else: | |||||
array = chunk | |||||
eq_(array.shape, (14400, 9)) | |||||
eq_(pieces, 15) | |||||
# Try structured | |||||
s = list(client.stream_extract_numpy("/newton/prep", structured = True)) | |||||
assert(np.array_equal(np.c_[s[0]['timestamp'], s[0]['data']], array)) | |||||
# Compare. Will be close but not exact because the conversion | |||||
# to and from ASCII was lossy. | |||||
data = timestamper.TimestamperRate(testfile, start, rate) | |||||
actual = np.fromstring(" ".join(data), sep=' ').reshape(14400, 9) | |||||
assert(np.allclose(array, actual)) | |||||
client.close() |