Browse Source

Merge branch 'binary' into HEAD

tags/nilmdb-1.4.8^0
Jim Paris 11 years ago
parent
commit
f0304b4c00
12 changed files with 300 additions and 14 deletions
  1. +9
    -2
      nilmdb/client/client.py
  2. +8
    -4
      nilmdb/client/httpclient.py
  3. +77
    -0
      nilmdb/client/numpyclient.py
  4. +6
    -3
      nilmdb/server/bulkdata.py
  5. +7
    -2
      nilmdb/server/nilmdb.py
  6. +46
    -0
      nilmdb/server/rocket.c
  7. +15
    -2
      nilmdb/server/server.py
  8. +1
    -0
      setup.py
  9. +1
    -0
      tests/test.order
  10. +18
    -0
      tests/test_client.py
  11. +4
    -1
      tests/test_nilmdb.py
  12. +108
    -0
      tests/test_numpyclient.py

+ 9
- 2
nilmdb/client/client.py View File

@@ -178,7 +178,7 @@ class Client(object):
return self.http.get_gen("stream/intervals", params)

def stream_extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
"""
Extract data from a stream. Returns a generator that yields
lines of ASCII-formatted data that matches the database
@@ -189,6 +189,11 @@ class Client(object):

Specify markup = True to include comments in the returned data
that indicate interval starts and ends.

Specify binary = True to return chunks of raw binary data,
rather than lines of ASCII-formatted data. Raw binary data
is always little-endian and matches the database types
(including a uint64 timestamp).
"""
params = {
"path": path,
@@ -201,7 +206,9 @@ class Client(object):
params["count"] = 1
if markup:
params["markup"] = 1
return self.http.get_gen("stream/extract", params)
if binary:
params["binary"] = 1
return self.http.get_gen("stream/extract", params, binary = binary)

def stream_count(self, path, start = None, end = None):
"""


+ 8
- 4
nilmdb/client/httpclient.py View File

@@ -110,7 +110,8 @@ class HTTPClient(object):
return self._req("PUT", url, params, data)

# Generator versions that return data one line at a time.
def _req_gen(self, method, url, query = None, body = None, headers = None):
def _req_gen(self, method, url, query = None, body = None,
headers = None, binary = False):
"""
Make a request and return a generator that gives back strings
or JSON decoded lines of the body data, or raise an error if
@@ -118,16 +119,19 @@ class HTTPClient(object):
"""
(response, isjson) = self._do_req(method, url, query, body,
stream = True, headers = headers)
if isjson:
if binary:
for chunk in response.iter_content(chunk_size = 65536):
yield chunk
elif isjson:
for line in response.iter_lines():
yield json.loads(line)
else:
for line in response.iter_lines():
yield line

def get_gen(self, url, params = None):
def get_gen(self, url, params = None, binary = False):
"""Simple GET (parameters in URL) returning a generator"""
return self._req_gen("GET", url, params)
return self._req_gen("GET", url, params, binary = binary)

# Not much use for a POST or PUT generator, since they don't
# return much data.

+ 77
- 0
nilmdb/client/numpyclient.py View File

@@ -0,0 +1,77 @@
# -*- coding: utf-8 -*-

"""Provide a NumpyClient class that is based on normal Client, but has
additional methods for extracting and inserting data via Numpy arrays."""

import nilmdb.utils
import nilmdb.client.client
import nilmdb.client.httpclient
from nilmdb.client.errors import ClientError

import contextlib
from nilmdb.utils.time import timestamp_to_string, string_to_timestamp

import numpy
import cStringIO

def layout_to_dtype(layout):
ltype = layout.split('_')[0]
lcount = int(layout.split('_')[1])
if ltype.startswith('int'):
atype = '<i' + str(int(ltype[3:]) / 8)
elif ltype.startswith('uint'):
atype = '<u' + str(int(ltype[4:]) / 8)
elif ltype.startswith('float'):
atype = '<f' + str(int(ltype[5:]) / 8)
else:
raise ValueError("bad layout")
return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])

class NumpyClient(nilmdb.client.client.Client):
"""Subclass of nilmdb.client.Client that adds additional methods for
extracting and inserting data via Numpy arrays."""

def stream_extract_numpy(self, path, start = None, end = None,
layout = None, maxrows = 100000,
structured = False):
"""
Extract data from a stream. Returns a generator that yields
Numpy arrays of up to 'maxrows' of data each.

If 'layout' is None, it is read using stream_info.

If 'structured' is False, all data is converted to float64
and returned in a flat 2D array. Otherwise, data is returned
as a structured dtype in a 1D array.
"""
if layout is None:
streams = self.stream_list(path)
if len(streams) != 1:
raise ClientError("can't get layout for path: " + path)
layout = streams[0][1]
dtype = layout_to_dtype(layout)

def to_numpy(data):
a = numpy.fromstring(data, dtype)
if structured:
return a
return numpy.c_[a['timestamp'], a['data']]

chunks = []
total_len = 0
maxsize = dtype.itemsize * maxrows
for data in self.stream_extract(path, start, end, binary = True):
# Add this block of binary data
chunks.append(data)
total_len += len(data)

# See if we have enough to make the requested Numpy array
while total_len >= maxsize:
assembled = "".join(chunks)
total_len -= maxsize
chunks = [ assembled[maxsize:] ]
block = assembled[:maxsize]
yield to_numpy(block)

if total_len:
yield to_numpy("".join(chunks))

+ 6
- 3
nilmdb/server/bulkdata.py View File

@@ -479,7 +479,7 @@ class Table(object):
# Success, so update self.nrows accordingly
self.nrows = tot_rows

def get_data(self, start, stop):
def get_data(self, start, stop, binary = False):
"""Extract data corresponding to Python range [n:m],
and returns a formatted string"""
if (start is None or
@@ -497,10 +497,13 @@ class Table(object):
if count > remaining:
count = remaining
f = self.file_open(subdir, filename)
ret.append(f.extract_string(offset, count))
if binary:
ret.append(f.extract_binary(offset, count))
else:
ret.append(f.extract_string(offset, count))
remaining -= count
row += count
return "".join(ret)
return b"".join(ret)

def __getitem__(self, row):
"""Extract timestamps from a row, with table[n] notation."""


+ 7
- 2
nilmdb/server/nilmdb.py View File

@@ -538,7 +538,7 @@ class NilmDB(object):
dbinterval.db_endpos)

def stream_extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
"""
Returns (data, restart) tuple.

@@ -559,6 +559,9 @@ class NilmDB(object):
'markup', if true, indicates that returned data should be
marked with a comment denoting when a particular interval
starts, and another comment when an interval ends.

'binary', if true, means to return raw binary rather than
ASCII-formatted data.
"""
stream_id = self._stream_id(path)
table = self.data.getnode(path)
@@ -569,6 +572,8 @@ class NilmDB(object):
matched = 0
remaining = self.max_results
restart = None
if binary and (markup or count):
raise NilmDBError("binary mode can't be used with markup or count")
for interval in intervals.intersection(requested):
# Reading single rows from the table is too slow, so
# we use two bisections to find both the starting and
@@ -593,7 +598,7 @@ class NilmDB(object):
timestamp_to_string(interval.start) + "\n")

# Gather these results up
result.append(table.get_data(row_start, row_end))
result.append(table.get_data(row_start, row_end, binary))

# Count them
remaining -= row_end - row_start


+ 46
- 0
nilmdb/server/rocket.c View File

@@ -527,6 +527,46 @@ err:
return NULL;
}

/****
* Extract to binary string containing raw little-endian binary data
*/
static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)
{
long count;
long offset;

if (!PyArg_ParseTuple(args, "ll", &offset, &count))
return NULL;
if (!self->file) {
PyErr_SetString(PyExc_Exception, "no file");
return NULL;
}
/* Seek to target location */
if (fseek(self->file, offset, SEEK_SET) < 0) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

uint8_t *str;
int len = count * self->binary_size;
str = malloc(len);
if (str == NULL) {
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

/* Data in the file is already in the desired little-endian
binary format, so just read it directly. */
if (fread(str, self->binary_size, count, self->file) != count) {
free(str);
PyErr_SetFromErrno(PyExc_OSError);
return NULL;
}

PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len);
free(str);
return pystr;
}

/****
* Extract timestamp
@@ -600,6 +640,12 @@ static PyMethodDef Rocket_methods[] = {
"Extract count rows of data from the file at offset offset.\n"
"Return an ascii formatted string according to the layout" },

{ "extract_binary",
(PyCFunction)Rocket_extract_binary, METH_VARARGS,
"extract_binary(self, offset, count)\n\n"
"Extract count rows of data from the file at offset offset.\n"
"Return a raw binary string of data matching the data layout." },

{ "extract_timestamp",
(PyCFunction)Rocket_extract_timestamp, METH_VARARGS,
"extract_timestamp(self, offset)\n\n"


+ 15
- 2
nilmdb/server/server.py View File

@@ -400,7 +400,7 @@ class Stream(NilmApp):
@chunked_response
@response_type("text/plain")
def extract(self, path, start = None, end = None,
count = False, markup = False):
count = False, markup = False, binary = False):
"""
Extract data from backend database. Streams the resulting
entries as ASCII text lines separated by newlines. This may
@@ -411,6 +411,11 @@ class Stream(NilmApp):

If 'markup' is True, adds comments to the stream denoting each
interval's start and end timestamp.

If 'binary' is True, return raw binary data, rather than lines
of ASCII-formatted data. Raw binary data is always
little-endian and matches the database types (including a
uint64 timestamp).
"""
(start, end) = self._get_times(start, end)

@@ -418,6 +423,13 @@ class Stream(NilmApp):
if len(self.db.stream_list(path = path)) != 1:
raise cherrypy.HTTPError("404", "No such stream: " + path)

if binary:
cherrypy.response.headers['Content-Type'] = (
"application/octet-stream")
if markup or count:
raise cherrypy.HTTPError("400", "can't mix binary and "
"markup or count modes")

@workaround_cp_bug_1200
def content(start, end):
# Note: disable chunked responses to see tracebacks from here.
@@ -429,7 +441,8 @@ class Stream(NilmApp):

while True:
(data, restart) = self.db.stream_extract(
path, start, end, count = False, markup = markup)
path, start, end, count = False,
markup = markup, binary = binary)
yield data

if restart is None:


+ 1
- 0
setup.py View File

@@ -107,6 +107,7 @@ setup(name='nilmdb',
author_email = 'jim@jtan.com',
tests_require = [ 'nose',
'coverage',
'numpy',
],
setup_requires = [ 'distribute',
],


+ 1
- 0
tests/test.order View File

@@ -12,6 +12,7 @@ test_interval.py
test_bulkdata.py
test_nilmdb.py
test_client.py
test_numpyclient.py
test_cmdline.py

test_*.py

+ 18
- 0
tests/test_client.py View File

@@ -23,6 +23,7 @@ import warnings
import resource
import time
import re
import struct

from testutil.helpers import *

@@ -293,6 +294,23 @@ class TestClient(object):
# Test count
eq_(client.stream_count("/newton/prep"), 14400)

# Test binary output
with assert_raises(ClientError) as e:
list(client.stream_extract("/newton/prep",
markup = True, binary = True))
with assert_raises(ClientError) as e:
list(client.stream_extract("/newton/prep",
count = True, binary = True))
data = "".join(client.stream_extract("/newton/prep", binary = True))
# Quick check using struct
unpacker = struct.Struct("<qffffffff")
out = []
for i in range(14400):
out.append(unpacker.unpack_from(data, i * unpacker.size))
eq_(out[0], (1332511200000000, 266568.0, 224029.0, 5161.39990234375,
2525.169921875, 8350.83984375, 3724.699951171875,
1355.3399658203125, 2039.0))

client.close()

def test_client_06_generators(self):


+ 4
- 1
tests/test_nilmdb.py View File

@@ -90,13 +90,16 @@ class Test00Nilmdb(object): # named 00 so it runs first
eq_(db.stream_get_metadata("/newton/prep"), meta1)
eq_(db.stream_get_metadata("/newton/raw"), meta1)

# fill in some test coverage for start >= end
# fill in some misc. test coverage
with assert_raises(nilmdb.server.NilmDBError):
db.stream_remove("/newton/prep", 0, 0)
with assert_raises(nilmdb.server.NilmDBError):
db.stream_remove("/newton/prep", 1, 0)
db.stream_remove("/newton/prep", 0, 1)

with assert_raises(nilmdb.server.NilmDBError):
db.stream_extract("/newton/prep", count = True, binary = True)

db.close()

class TestBlockingServer(object):


+ 108
- 0
tests/test_numpyclient.py View File

@@ -0,0 +1,108 @@
# -*- coding: utf-8 -*-

import nilmdb.server
import nilmdb.client
import nilmdb.client.numpyclient

from nilmdb.utils.printf import *
from nilmdb.utils import timestamper
from nilmdb.client import ClientError, ServerError
from nilmdb.utils import datetime_tz

from nose.plugins.skip import SkipTest
from nose.tools import *
from nose.tools import assert_raises
import itertools
import distutils.version

from testutil.helpers import *

import numpy as np

testdb = "tests/numpyclient-testdb"
testurl = "http://localhost:32180/"

def setup_module():
global test_server, test_db
# Clear out DB
recursive_unlink(testdb)

# Start web app on a custom port
test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(testdb)
test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
port = 32180, stoppable = False,
fast_shutdown = True,
force_traceback = True)
test_server.start(blocking = False)

def teardown_module():
global test_server, test_db
# Close web app
test_server.stop()
test_db.close()

class TestNumpyClient(object):

def test_numpyclient_01_basic(self):
# Test basic connection
client = nilmdb.client.numpyclient.NumpyClient(url = testurl)
version = client.version()
eq_(distutils.version.LooseVersion(version),
distutils.version.LooseVersion(test_server.version))

# Verify subclassing
assert(isinstance(client, nilmdb.client.Client))

# Layouts
for layout in "int8_t", "something_8", "integer_1":
with assert_raises(ValueError):
for x in client.stream_extract_numpy("/foo", layout=layout):
pass
for layout in "int8_1", "uint8_30", "int16_20", "float64_100":
with assert_raises(ClientError) as e:
for x in client.stream_extract_numpy("/foo", layout=layout):
pass
in_("No such stream", str(e.exception))

with assert_raises(ClientError) as e:
for x in client.stream_extract_numpy("/foo"):
pass
in_("can't get layout for path", str(e.exception))

client.close()

def test_numpyclient_02_extract(self):
client = nilmdb.client.numpyclient.NumpyClient(url = testurl)

# Insert some data as text
client.stream_create("/newton/prep", "float32_8")
testfile = "tests/data/prep-20120323T1000"
start = nilmdb.utils.time.parse_time("20120323T1000")
rate = 120
data = timestamper.TimestamperRate(testfile, start, rate)
result = client.stream_insert("/newton/prep", data,
start, start + 119999777)

# Extract Numpy arrays
array = None
pieces = 0
for chunk in client.stream_extract_numpy("/newton/prep", maxrows=1000):
pieces += 1
if array is not None:
array = np.vstack((array, chunk))
else:
array = chunk
eq_(array.shape, (14400, 9))
eq_(pieces, 15)

# Try structured
s = list(client.stream_extract_numpy("/newton/prep", structured = True))
assert(np.array_equal(np.c_[s[0]['timestamp'], s[0]['data']], array))

# Compare. Will be close but not exact because the conversion
# to and from ASCII was lossy.
data = timestamper.TimestamperRate(testfile, start, rate)
actual = np.fromstring(" ".join(data), sep=' ').reshape(14400, 9)
assert(np.allclose(array, actual))

client.close()

Loading…
Cancel
Save