nilmdb/nilmdb/client/client.py

388 lines
15 KiB
Python

# -*- coding: utf-8 -*-
"""Class for performing HTTP client requests via libcurl"""
import nilmdb
import nilmdb.utils
import nilmdb.client.httpclient
import time
import simplejson as json
import contextlib
def float_to_string(f):
"""Use repr to maintain full precision in the string output."""
return repr(float(f))
def extract_timestamp(line):
"""Extract just the timestamp from a line of data text"""
return float(line.split()[0])
class Client(object):
"""Main client interface to the Nilm database."""
def __init__(self, url, post_json = False):
"""Initialize client with given URL. If post_json is true,
POST requests are sent with Content-Type 'application/json'
instead of the default 'x-www-form-urlencoded'."""
self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
self.post_json = post_json
# __enter__/__exit__ allow this class to be a context manager
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def _json_post_param(self, data):
"""Return compact json-encoded version of parameter"""
if self.post_json:
# If we're posting as JSON, we don't need to encode it further here
return data
return json.dumps(data, separators=(',',':'))
def close(self):
"""Close the connection; safe to call multiple times"""
self.http.close()
def geturl(self):
"""Return the URL we're using"""
return self.http.baseurl
def version(self):
"""Return server version"""
return self.http.get("version")
def dbinfo(self):
"""Return server database info (path, size, free space)
as a dictionary."""
return self.http.get("dbinfo")
def stream_list(self, path = None, layout = None, extent = False):
params = {}
if path is not None:
params["path"] = path
if layout is not None:
params["layout"] = layout
if extent:
params["extent"] = 1
return self.http.get("stream/list", params)
def stream_get_metadata(self, path, keys = None):
params = { "path": path }
if keys is not None:
params["key"] = keys
return self.http.get("stream/get_metadata", params)
def stream_set_metadata(self, path, data):
"""Set stream metadata from a dictionary, replacing all existing
metadata."""
params = {
"path": path,
"data": self._json_post_param(data)
}
return self.http.post("stream/set_metadata", params)
def stream_update_metadata(self, path, data):
"""Update stream metadata from a dictionary"""
params = {
"path": path,
"data": self._json_post_param(data)
}
return self.http.post("stream/update_metadata", params)
def stream_create(self, path, layout):
"""Create a new stream"""
params = { "path": path,
"layout" : layout }
return self.http.post("stream/create", params)
def stream_destroy(self, path):
"""Delete stream and its contents"""
params = { "path": path }
return self.http.post("stream/destroy", params)
def stream_remove(self, path, start = None, end = None):
"""Remove data from the specified time range"""
params = {
"path": path
}
if start is not None:
params["start"] = float_to_string(start)
if end is not None:
params["end"] = float_to_string(end)
return self.http.post("stream/remove", params)
@contextlib.contextmanager
def stream_insert_context(self, path, start = None, end = None):
"""Return a context manager that allows data to be efficiently
inserted into a stream in a piecewise manner. Data is be provided
as single lines, and is aggregated and sent to the server in larger
chunks as necessary. Data lines must match the database layout for
the given path, and end with a newline.
Example:
with client.stream_insert_context('/path', start, end) as ctx:
ctx.insert_line('1234567890.0 1 2 3 4\\n')
ctx.insert_line('1234567891.0 1 2 3 4\\n')
For more details, see help for nilmdb.client.client.StreamInserter
This may make multiple requests to the server, if the data is
large enough or enough time has passed between insertions.
"""
ctx = StreamInserter(self, path, start, end)
yield ctx
ctx.finalize()
def stream_insert(self, path, data, start = None, end = None):
"""Insert rows of data into a stream. data should be an
iterable object that provides ASCII data that matches the
database layout for path. See stream_insert_context for
details on the 'start' and 'end' parameters."""
with self.stream_insert_context(path, start, end) as ctx:
ctx.insert_iter(data)
return ctx.last_response
def stream_insert_block(self, path, block, start, end):
"""Insert an entire block of data into a stream. Like
stream_insert, except 'block' contains multiple lines of ASCII
text and is sent in one single chunk."""
params = { "path": path,
"start": float_to_string(start),
"end": float_to_string(end) }
return self.http.put("stream/insert", block, params)
def stream_intervals(self, path, start = None, end = None):
"""
Return a generator that yields each stream interval.
"""
params = {
"path": path
}
if start is not None:
params["start"] = float_to_string(start)
if end is not None:
params["end"] = float_to_string(end)
return self.http.get_gen("stream/intervals", params)
def stream_extract(self, path, start = None, end = None, count = False):
"""
Extract data from a stream. Returns a generator that yields
lines of ASCII-formatted data that matches the database
layout for the given path.
Specify count = True to return a count of matching data points
rather than the actual data. The output format is unchanged.
"""
params = {
"path": path,
}
if start is not None:
params["start"] = float_to_string(start)
if end is not None:
params["end"] = float_to_string(end)
if count:
params["count"] = 1
return self.http.get_gen("stream/extract", params)
def stream_count(self, path, start = None, end = None):
"""
Return the number of rows of data in the stream that satisfy
the given timestamps.
"""
counts = list(self.stream_extract(path, start, end, count = True))
return int(counts[0])
class StreamInserter(object):
"""Object returned by stream_insert_context() that manages
the insertion of rows of data into a particular path.
The basic data flow is that we are filling a contiguous interval
on the server, with no gaps, that extends from timestamp 'start'
to timestamp 'end'. Data timestamps satisfy 'start <= t < end'.
Data is provided by the user one line at a time with
.insert_line() or .insert_iter().
1. The first inserted line begins a new interval that starts at
'start'. If 'start' is not given, it is deduced from the first
line's timestamp.
2. Subsequent lines go into the same contiguous interval. As lines
are inserted, this routine may make multiple insertion requests to
the server, but will structure the timestamps to leave no gaps.
3. The current contiguous interval can be completed by manually
calling .finalize(), which the context manager will also do
automatically. This will send any remaining data to the server,
using the 'end' timestamp to end the interval.
After a .finalize(), inserting new data goes back to step 1.
.update_start() can be called before step 1 to change the start
time for the interval. .update_end() can be called before step 3
to change the end time for the interval.
"""
# See design.md for a discussion of how much data to send.
# These are soft limits -- actual data might be rounded up.
# We send when we have a certain amount of data queued, or
# when a certain amount of time has passed since the last send.
_max_data = 2 * 1024 * 1024
_max_time = 30
# Delta to add to the final timestamp, if "end" wasn't given
_end_epsilon = 1e-6
def __init__(self, client, path, start = None, end = None):
"""'http' is the httpclient object. 'path' is the database
path to insert to. 'start' and 'end' are used for the first
contiguous interval."""
self.last_response = None
self._client = client
self._path = path
# Start and end for the overall contiguous interval we're
# filling
self._interval_start = start
self._interval_end = end
# Data for the specific block we're building up to send
self._block_data = []
self._block_len = 0
self._block_start = None
# Time of last request
self._last_time = time.time()
# We keep a buffer of the two most recently inserted lines.
# Only the older one actually gets processed; the newer one
# is used to "look-ahead" to the next timestamp if we need
# to internally split an insertion into two requests.
self._line_old = None
self._line_new = None
def insert_iter(self, iter):
"""Insert all lines of ASCII formatted data from the given
iterable. Lines must be terminated with '\\n'."""
for line in iter:
self.insert_line(line)
def insert_line(self, line, allow_intermediate = True):
"""Insert a single line of ASCII formatted data. Line
must be terminated with '\\n'."""
if line and (len(line) < 1 or line[-1] != '\n'):
raise ValueError("lines must end in with a newline character")
# Store this new line, but process the previous (old) one.
# This lets us "look ahead" to the next line.
self._line_old = self._line_new
self._line_new = line
if self._line_old is None:
return
# If starting a new block, pull out the timestamp if needed.
if self._block_start is None:
if self._interval_start is not None:
# User provided a start timestamp. Use it once, then
# clear it for the next block.
self._block_start = self._interval_start
self._interval_start = None
else:
# Extract timestamp from the first row
self._block_start = extract_timestamp(self._line_old)
# Save the line
self._block_data.append(self._line_old)
self._block_len += len(self._line_old)
if allow_intermediate:
# Send an intermediate block to the server if needed.
elapsed = time.time() - self._last_time
if (self._block_len > self._max_data) or (elapsed > self._max_time):
self._send_block_intermediate()
def update_start(self, start):
"""Update the start time for the next contiguous interval.
Call this before starting to insert data for a new interval,
for example, after .finalize()"""
self._interval_start = start
def update_end(self, end):
"""Update the end time for the current contiguous interval.
Call this before .finalize()"""
self._interval_end = end
def finalize(self):
"""Stop filling the current contiguous interval.
All outstanding data will be sent, and the interval end
time of the interval will be taken from the 'end' argument
used when initializing this class, or the most recent
value passed to update_end(), or the last timestamp plus
a small epsilon value if no other endpoint was provided.
If more data is inserted after a finalize(), it will become
part of a new interval and there may be a gap left in-between."""
# Special marker tells insert_line that this is the end
self.insert_line(None, allow_intermediate = False)
if self._block_len > 0:
# We have data pending, so send the final block
self._send_block_final()
elif None not in (self._interval_start, self._interval_end):
# We have no data, but enough information to create an
# empty interval.
self._block_start = self._interval_start
self._interval_start = None
self._send_block_final()
else:
# No data, and no timestamps to use to create an empty
# interval.
pass
# Make sure both timestamps are emptied for future intervals.
self._interval_start = None
self._interval_end = None
def _send_block_intermediate(self):
"""Send data, when we still have more data to send.
Use the timestamp from the next line, so that the blocks
are contiguous."""
block_end = extract_timestamp(self._line_new)
if self._interval_end is not None and block_end > self._interval_end:
# Something's fishy -- the timestamp we found is after
# the user's specified end. Limit it here, and the
# server will return an error.
block_end = self._interval_end
self._send_block(block_end)
def _send_block_final(self):
"""Send data, when this is the last block for the interval.
There is no next line, so figure out the actual interval end
using interval_end or end_epsilon."""
if self._interval_end is not None:
# Use the user's specified end timestamp
block_end = self._interval_end
# Clear it in case we send more intervals in the future.
self._interval_end = None
else:
# Add an epsilon to the last timestamp we saw
block_end = extract_timestamp(self._line_old) + self._end_epsilon
self._send_block(block_end)
def _send_block(self, block_end):
"""Send current block to the server"""
self.last_response = self._client.stream_insert_block(
self._path, "".join(self._block_data),
self._block_start, block_end)
# Clear out the block
self._block_data = []
self._block_len = 0
self._block_start = None
# Note when we sent it
self._last_time = time.time()