Browse Source

Add binary extract to client, server, nilmdb, bulkdata, and rocket.

tags/nilmdb-1.4.8^2
Jim Paris 11 years ago
parent
commit
5b7409f802
8 changed files with 113 additions and 14 deletions
  1. +9
    -2
      nilmdb/client/client.py
  2. +8
    -4
      nilmdb/client/httpclient.py
  3. +6
    -3
      nilmdb/server/bulkdata.py
  4. +7
    -2
      nilmdb/server/nilmdb.py
  5. +46
    -0
      nilmdb/server/rocket.c
  6. +15
    -2
      nilmdb/server/server.py
  7. +18
    -0
      tests/test_client.py
  8. +4
    -1
      tests/test_nilmdb.py

+ 9
- 2
nilmdb/client/client.py View File

@@ -178,7 +178,7 @@ class Client(object):
return self.http.get_gen("stream/intervals", params) return self.http.get_gen("stream/intervals", params)


def stream_extract(self, path, start = None, end = None, def stream_extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
""" """
Extract data from a stream. Returns a generator that yields Extract data from a stream. Returns a generator that yields
lines of ASCII-formatted data that matches the database lines of ASCII-formatted data that matches the database
@@ -189,6 +189,11 @@ class Client(object):


Specify markup = True to include comments in the returned data Specify markup = True to include comments in the returned data
that indicate interval starts and ends. that indicate interval starts and ends.

Specify binary = True to return chunks of raw binary data,
rather than lines of ASCII-formatted data. Raw binary data
is always little-endian and matches the database types
(including a uint64 timestamp).
""" """
params = { params = {
"path": path, "path": path,
@@ -201,7 +206,9 @@ class Client(object):
params["count"] = 1 params["count"] = 1
if markup: if markup:
params["markup"] = 1 params["markup"] = 1
return self.http.get_gen("stream/extract", params)
if binary:
params["binary"] = 1
return self.http.get_gen("stream/extract", params, binary = binary)


def stream_count(self, path, start = None, end = None): def stream_count(self, path, start = None, end = None):
""" """


+ 8
- 4
nilmdb/client/httpclient.py View File

@@ -110,7 +110,8 @@ class HTTPClient(object):
return self._req("PUT", url, params, data) return self._req("PUT", url, params, data)


# Generator versions that return data one line at a time. # Generator versions that return data one line at a time.
def _req_gen(self, method, url, query = None, body = None, headers = None):
def _req_gen(self, method, url, query = None, body = None,
headers = None, binary = False):
""" """
Make a request and return a generator that gives back strings Make a request and return a generator that gives back strings
or JSON decoded lines of the body data, or raise an error if or JSON decoded lines of the body data, or raise an error if
@@ -118,16 +119,19 @@ class HTTPClient(object):
""" """
(response, isjson) = self._do_req(method, url, query, body, (response, isjson) = self._do_req(method, url, query, body,
stream = True, headers = headers) stream = True, headers = headers)
if isjson:
if binary:
for chunk in response.iter_content(chunk_size = 65536):
yield chunk
elif isjson:
for line in response.iter_lines(): for line in response.iter_lines():
yield json.loads(line) yield json.loads(line)
else: else:
for line in response.iter_lines(): for line in response.iter_lines():
yield line yield line


def get_gen(self, url, params = None):
def get_gen(self, url, params = None, binary = False):
"""Simple GET (parameters in URL) returning a generator""" """Simple GET (parameters in URL) returning a generator"""
return self._req_gen("GET", url, params)
return self._req_gen("GET", url, params, binary = binary)


# Not much use for a POST or PUT generator, since they don't # Not much use for a POST or PUT generator, since they don't
# return much data. # return much data.

+ 6
- 3
nilmdb/server/bulkdata.py View File

@@ -479,7 +479,7 @@ class Table(object):
# Success, so update self.nrows accordingly # Success, so update self.nrows accordingly
self.nrows = tot_rows self.nrows = tot_rows


def get_data(self, start, stop):
def get_data(self, start, stop, binary = False):
"""Extract data corresponding to Python range [n:m], """Extract data corresponding to Python range [n:m],
and returns a formatted string""" and returns a formatted string"""
if (start is None or if (start is None or
@@ -497,10 +497,13 @@ class Table(object):
if count > remaining: if count > remaining:
count = remaining count = remaining
f = self.file_open(subdir, filename) f = self.file_open(subdir, filename)
ret.append(f.extract_string(offset, count))
if binary:
ret.append(f.extract_binary(offset, count))
else:
ret.append(f.extract_string(offset, count))
remaining -= count remaining -= count
row += count row += count
return "".join(ret)
return b"".join(ret)


def __getitem__(self, row): def __getitem__(self, row):
"""Extract timestamps from a row, with table[n] notation.""" """Extract timestamps from a row, with table[n] notation."""


+ 7
- 2
nilmdb/server/nilmdb.py View File

@@ -538,7 +538,7 @@ class NilmDB(object):
dbinterval.db_endpos) dbinterval.db_endpos)


def stream_extract(self, path, start = None, end = None, def stream_extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
""" """
Returns (data, restart) tuple. Returns (data, restart) tuple.


@@ -559,6 +559,9 @@ class NilmDB(object):
'markup', if true, indicates that returned data should be 'markup', if true, indicates that returned data should be
marked with a comment denoting when a particular interval marked with a comment denoting when a particular interval
starts, and another comment when an interval ends. starts, and another comment when an interval ends.

'binary', if true, means to return raw binary rather than
ASCII-formatted data.
""" """
stream_id = self._stream_id(path) stream_id = self._stream_id(path)
table = self.data.getnode(path) table = self.data.getnode(path)
@@ -569,6 +572,8 @@ class NilmDB(object):
matched = 0 matched = 0
remaining = self.max_results remaining = self.max_results
restart = None restart = None
if binary and (markup or count):
raise NilmDBError("binary mode can't be used with markup or count")
for interval in intervals.intersection(requested): for interval in intervals.intersection(requested):
# Reading single rows from the table is too slow, so # Reading single rows from the table is too slow, so
# we use two bisections to find both the starting and # we use two bisections to find both the starting and
@@ -593,7 +598,7 @@ class NilmDB(object):
timestamp_to_string(interval.start) + "\n") timestamp_to_string(interval.start) + "\n")


# Gather these results up # Gather these results up
result.append(table.get_data(row_start, row_end))
result.append(table.get_data(row_start, row_end, binary))


# Count them # Count them
remaining -= row_end - row_start remaining -= row_end - row_start


+ 46
- 0
nilmdb/server/rocket.c View File

@@ -527,6 +527,46 @@ err:
return NULL; return NULL;
} }


/****
* Extract to binary string containing raw little-endian binary data
*/
static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)
{
long count;
long offset;

if (!PyArg_ParseTuple(args, "ll", &offset, &count))
return NULL;
if (!self->file) {
PyErr_SetString(PyExc_Exception, "no file");
return NULL;
}
/* Seek to target location */
if (fseek(self->file, offset, SEEK_SET) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

uint8_t *str;
int len = count * self->binary_size;
str = malloc(len);
if (str == NULL) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

/* Data in the file is already in the desired little-endian
binary format, so just read it directly. */
if (fread(str, self->binary_size, count, self->file) != count) {
free(str);
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len);
free(str);
return pystr;
}


/**** /****
* Extract timestamp * Extract timestamp
@@ -600,6 +640,12 @@ static PyMethodDef Rocket_methods[] = {
"Extract count rows of data from the file at offset offset.\n" "Extract count rows of data from the file at offset offset.\n"
"Return an ascii formatted string according to the layout" }, "Return an ascii formatted string according to the layout" },


{ "extract_binary",
(PyCFunction)Rocket_extract_binary, METH_VARARGS,
"extract_binary(self, offset, count)\n\n"
"Extract count rows of data from the file at offset offset.\n"
"Return a raw binary string of data matching the data layout." },

{ "extract_timestamp", { "extract_timestamp",
(PyCFunction)Rocket_extract_timestamp, METH_VARARGS, (PyCFunction)Rocket_extract_timestamp, METH_VARARGS,
"extract_timestamp(self, offset)\n\n" "extract_timestamp(self, offset)\n\n"


+ 15
- 2
nilmdb/server/server.py View File

@@ -400,7 +400,7 @@ class Stream(NilmApp):
@chunked_response @chunked_response
@response_type("text/plain") @response_type("text/plain")
def extract(self, path, start = None, end = None, def extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
""" """
Extract data from backend database. Streams the resulting Extract data from backend database. Streams the resulting
entries as ASCII text lines separated by newlines. This may entries as ASCII text lines separated by newlines. This may
@@ -411,6 +411,11 @@ class Stream(NilmApp):


If 'markup' is True, adds comments to the stream denoting each If 'markup' is True, adds comments to the stream denoting each
interval's start and end timestamp. interval's start and end timestamp.

If 'binary' is True, return raw binary data, rather than lines
of ASCII-formatted data. Raw binary data is always
little-endian and matches the database types (including a
uint64 timestamp).
""" """
(start, end) = self._get_times(start, end) (start, end) = self._get_times(start, end)


@@ -418,6 +423,13 @@ class Stream(NilmApp):
if len(self.db.stream_list(path = path)) != 1: if len(self.db.stream_list(path = path)) != 1:
raise cherrypy.HTTPError("404", "No such stream: " + path) raise cherrypy.HTTPError("404", "No such stream: " + path)


if binary:
cherrypy.response.headers['Content-Type'] = (
"application/octet-stream")
if markup or count:
raise cherrypy.HTTPError("400", "can't mix binary and "
"markup or count modes")

@workaround_cp_bug_1200 @workaround_cp_bug_1200
def content(start, end): def content(start, end):
# Note: disable chunked responses to see tracebacks from here. # Note: disable chunked responses to see tracebacks from here.
@@ -429,7 +441,8 @@ class Stream(NilmApp):


while True: while True:
(data, restart) = self.db.stream_extract( (data, restart) = self.db.stream_extract(
path, start, end, count = False, markup = markup)
path, start, end, count = False,
markup = markup, binary = binary)
yield data yield data


if restart is None: if restart is None:


+ 18
- 0
tests/test_client.py View File

@@ -23,6 +23,7 @@ import warnings
import resource import resource
import time import time
import re import re
import struct


from testutil.helpers import * from testutil.helpers import *


@@ -293,6 +294,23 @@ class TestClient(object):
# Test count # Test count
eq_(client.stream_count("/newton/prep"), 14400) eq_(client.stream_count("/newton/prep"), 14400)


# Test binary output
with assert_raises(ClientError) as e:
list(client.stream_extract("/newton/prep",
markup = True, binary = True))
with assert_raises(ClientError) as e:
list(client.stream_extract("/newton/prep",
count = True, binary = True))
data = "".join(client.stream_extract("/newton/prep", binary = True))
# Quick check using struct
unpacker = struct.Struct("<qffffffff")
out = []
for i in range(14400):
out.append(unpacker.unpack_from(data, i * unpacker.size))
eq_(out[0], (1332511200000000, 266568.0, 224029.0, 5161.39990234375,
2525.169921875, 8350.83984375, 3724.699951171875,
1355.3399658203125, 2039.0))

client.close() client.close()


def test_client_06_generators(self): def test_client_06_generators(self):


+ 4
- 1
tests/test_nilmdb.py View File

@@ -90,13 +90,16 @@ class Test00Nilmdb(object): # named 00 so it runs first
eq_(db.stream_get_metadata("/newton/prep"), meta1) eq_(db.stream_get_metadata("/newton/prep"), meta1)
eq_(db.stream_get_metadata("/newton/raw"), meta1) eq_(db.stream_get_metadata("/newton/raw"), meta1)


# fill in some test coverage for start >= end
# fill in some misc. test coverage
with assert_raises(nilmdb.server.NilmDBError): with assert_raises(nilmdb.server.NilmDBError):
db.stream_remove("/newton/prep", 0, 0) db.stream_remove("/newton/prep", 0, 0)
with assert_raises(nilmdb.server.NilmDBError): with assert_raises(nilmdb.server.NilmDBError):
db.stream_remove("/newton/prep", 1, 0) db.stream_remove("/newton/prep", 1, 0)
db.stream_remove("/newton/prep", 0, 1) db.stream_remove("/newton/prep", 0, 1)


with assert_raises(nilmdb.server.NilmDBError):
db.stream_extract("/newton/prep", count = True, binary = True)

db.close() db.close()


class TestBlockingServer(object): class TestBlockingServer(object):


Loading…
Cancel
Save