|
- # -*- coding: utf-8 -*-
-
- """Class for performing HTTP client requests via libcurl"""
-
- import nilmdb
- import nilmdb.utils
- import nilmdb.client.httpclient
-
- import time
- import simplejson as json
- import contextlib
-
- def float_to_string(f):
- """Use repr to maintain full precision in the string output."""
- return repr(float(f))
-
- def extract_timestamp(line):
- """Extract just the timestamp from a line of data text"""
- return float(line.split()[0])
-
- class Client(object):
- """Main client interface to the Nilm database."""
-
- def __init__(self, url, post_json = False):
- """Initialize client with given URL. If post_json is true,
- POST requests are sent with Content-Type 'application/json'
- instead of the default 'x-www-form-urlencoded'."""
- self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
-
- # __enter__/__exit__ allow this class to be a context manager
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
-
- def _json_param(self, data):
- """Return compact json-encoded version of parameter"""
- return json.dumps(data, separators=(',',':'))
-
- def close(self):
- """Close the connection; safe to call multiple times"""
- self.http.close()
-
- def geturl(self):
- """Return the URL we're using"""
- return self.http.baseurl
-
- def version(self):
- """Return server version"""
- return self.http.get("version")
-
- def dbinfo(self):
- """Return server database info (path, size, free space)
- as a dictionary."""
- return self.http.get("dbinfo")
-
- def stream_list(self, path = None, layout = None, extent = False):
- params = {}
- if path is not None:
- params["path"] = path
- if layout is not None:
- params["layout"] = layout
- if extent:
- params["extent"] = 1
- return self.http.get("stream/list", params)
-
- def stream_get_metadata(self, path, keys = None):
- params = { "path": path }
- if keys is not None:
- params["key"] = keys
- return self.http.get("stream/get_metadata", params)
-
- def stream_set_metadata(self, path, data):
- """Set stream metadata from a dictionary, replacing all existing
- metadata."""
- params = {
- "path": path,
- "data": self._json_param(data)
- }
- return self.http.post("stream/set_metadata", params)
-
- def stream_update_metadata(self, path, data):
- """Update stream metadata from a dictionary"""
- params = {
- "path": path,
- "data": self._json_param(data)
- }
- return self.http.post("stream/update_metadata", params)
-
- def stream_create(self, path, layout):
- """Create a new stream"""
- params = { "path": path,
- "layout" : layout }
- return self.http.post("stream/create", params)
-
- def stream_destroy(self, path):
- """Delete stream and its contents"""
- params = { "path": path }
- return self.http.post("stream/destroy", params)
-
- def stream_remove(self, path, start = None, end = None):
- """Remove data from the specified time range"""
- params = {
- "path": path
- }
- if start is not None:
- params["start"] = float_to_string(start)
- if end is not None:
- params["end"] = float_to_string(end)
- return self.http.post("stream/remove", params)
-
- @contextlib.contextmanager
- def stream_insert_context(self, path, start = None, end = None):
- """Return a context manager that allows data to be efficiently
- inserted into a stream in a piecewise manner. Data is be provided
- as single lines, and is aggregated and sent to the server in larger
- chunks as necessary. Data lines must match the database layout for
- the given path, and end with a newline.
-
- Example:
- with client.stream_insert_context('/path', start, end) as ctx:
- ctx.insert_line('1234567890.0 1 2 3 4\\n')
- ctx.insert_line('1234567891.0 1 2 3 4\\n')
-
- For more details, see help for nilmdb.client.client.StreamInserter
-
- This may make multiple requests to the server, if the data is
- large enough or enough time has passed between insertions.
- """
- ctx = StreamInserter(self, path, start, end)
- yield ctx
- ctx.finalize()
-
- def stream_insert(self, path, data, start = None, end = None):
- """Insert rows of data into a stream. data should be an
- iterable object that provides ASCII data that matches the
- database layout for path. See stream_insert_context for
- details on the 'start' and 'end' parameters."""
- with self.stream_insert_context(path, start, end) as ctx:
- ctx.insert_iter(data)
- return ctx.last_response
-
- def stream_insert_block(self, path, block, start, end):
- """Insert an entire block of data into a stream. Like
- stream_insert, except 'block' contains multiple lines of ASCII
- text and is sent in one single chunk."""
- params = { "path": path,
- "start": float_to_string(start),
- "end": float_to_string(end) }
- return self.http.put("stream/insert", block, params)
-
- def stream_intervals(self, path, start = None, end = None):
- """
- Return a generator that yields each stream interval.
- """
- params = {
- "path": path
- }
- if start is not None:
- params["start"] = float_to_string(start)
- if end is not None:
- params["end"] = float_to_string(end)
- return self.http.get_gen("stream/intervals", params)
-
- def stream_extract(self, path, start = None, end = None, count = False):
- """
- Extract data from a stream. Returns a generator that yields
- lines of ASCII-formatted data that matches the database
- layout for the given path.
-
- Specify count = True to return a count of matching data points
- rather than the actual data. The output format is unchanged.
- """
- params = {
- "path": path,
- }
- if start is not None:
- params["start"] = float_to_string(start)
- if end is not None:
- params["end"] = float_to_string(end)
- if count:
- params["count"] = 1
- return self.http.get_gen("stream/extract", params)
-
- def stream_count(self, path, start = None, end = None):
- """
- Return the number of rows of data in the stream that satisfy
- the given timestamps.
- """
- counts = list(self.stream_extract(path, start, end, count = True))
- return int(counts[0])
-
- class StreamInserter(object):
- """Object returned by stream_insert_context() that manages
- the insertion of rows of data into a particular path.
-
- The basic data flow is that we are filling a contiguous interval
- on the server, with no gaps, that extends from timestamp 'start'
- to timestamp 'end'. Data timestamps satisfy 'start <= t < end'.
- Data is provided by the user one line at a time with
- .insert_line() or .insert_iter().
-
- 1. The first inserted line begins a new interval that starts at
- 'start'. If 'start' is not given, it is deduced from the first
- line's timestamp.
-
- 2. Subsequent lines go into the same contiguous interval. As lines
- are inserted, this routine may make multiple insertion requests to
- the server, but will structure the timestamps to leave no gaps.
-
- 3. The current contiguous interval can be completed by manually
- calling .finalize(), which the context manager will also do
- automatically. This will send any remaining data to the server,
- using the 'end' timestamp to end the interval.
-
- After a .finalize(), inserting new data goes back to step 1.
-
- .update_start() can be called before step 1 to change the start
- time for the interval. .update_end() can be called before step 3
- to change the end time for the interval.
- """
-
- # See design.md for a discussion of how much data to send.
- # These are soft limits -- actual data might be rounded up.
- # We send when we have a certain amount of data queued, or
- # when a certain amount of time has passed since the last send.
- _max_data = 2 * 1024 * 1024
- _max_time = 30
-
- # Delta to add to the final timestamp, if "end" wasn't given
- _end_epsilon = 1e-6
-
- def __init__(self, client, path, start = None, end = None):
- """'http' is the httpclient object. 'path' is the database
- path to insert to. 'start' and 'end' are used for the first
- contiguous interval."""
- self.last_response = None
-
- self._client = client
- self._path = path
-
- # Start and end for the overall contiguous interval we're
- # filling
- self._interval_start = start
- self._interval_end = end
-
- # Data for the specific block we're building up to send
- self._block_data = []
- self._block_len = 0
- self._block_start = None
-
- # Time of last request
- self._last_time = time.time()
-
- # We keep a buffer of the two most recently inserted lines.
- # Only the older one actually gets processed; the newer one
- # is used to "look-ahead" to the next timestamp if we need
- # to internally split an insertion into two requests.
- self._line_old = None
- self._line_new = None
-
- def insert_iter(self, iter):
- """Insert all lines of ASCII formatted data from the given
- iterable. Lines must be terminated with '\\n'."""
- for line in iter:
- self.insert_line(line)
-
- def insert_line(self, line, allow_intermediate = True):
- """Insert a single line of ASCII formatted data. Line
- must be terminated with '\\n'."""
- if line and (len(line) < 1 or line[-1] != '\n'):
- raise ValueError("lines must end in with a newline character")
-
- # Store this new line, but process the previous (old) one.
- # This lets us "look ahead" to the next line.
- self._line_old = self._line_new
- self._line_new = line
- if self._line_old is None:
- return
-
- # If starting a new block, pull out the timestamp if needed.
- if self._block_start is None:
- if self._interval_start is not None:
- # User provided a start timestamp. Use it once, then
- # clear it for the next block.
- self._block_start = self._interval_start
- self._interval_start = None
- else:
- # Extract timestamp from the first row
- self._block_start = extract_timestamp(self._line_old)
-
- # Save the line
- self._block_data.append(self._line_old)
- self._block_len += len(self._line_old)
-
- if allow_intermediate:
- # Send an intermediate block to the server if needed.
- elapsed = time.time() - self._last_time
- if (self._block_len > self._max_data) or (elapsed > self._max_time):
- self._send_block_intermediate()
-
- def update_start(self, start):
- """Update the start time for the next contiguous interval.
- Call this before starting to insert data for a new interval,
- for example, after .finalize()"""
- self._interval_start = start
-
- def update_end(self, end):
- """Update the end time for the current contiguous interval.
- Call this before .finalize()"""
- self._interval_end = end
-
- def finalize(self):
- """Stop filling the current contiguous interval.
- All outstanding data will be sent, and the interval end
- time of the interval will be taken from the 'end' argument
- used when initializing this class, or the most recent
- value passed to update_end(), or the last timestamp plus
- a small epsilon value if no other endpoint was provided.
-
- If more data is inserted after a finalize(), it will become
- part of a new interval and there may be a gap left in-between."""
- # Special marker tells insert_line that this is the end
- self.insert_line(None, allow_intermediate = False)
-
- if self._block_len > 0:
- # We have data pending, so send the final block
- self._send_block_final()
- elif None not in (self._interval_start, self._interval_end):
- # We have no data, but enough information to create an
- # empty interval.
- self._block_start = self._interval_start
- self._interval_start = None
- self._send_block_final()
- else:
- # No data, and no timestamps to use to create an empty
- # interval.
- pass
-
- # Make sure both timestamps are emptied for future intervals.
- self._interval_start = None
- self._interval_end = None
-
- def _send_block_intermediate(self):
- """Send data, when we still have more data to send.
- Use the timestamp from the next line, so that the blocks
- are contiguous."""
- block_end = extract_timestamp(self._line_new)
- if self._interval_end is not None and block_end > self._interval_end:
- # Something's fishy -- the timestamp we found is after
- # the user's specified end. Limit it here, and the
- # server will return an error.
- block_end = self._interval_end
- self._send_block(block_end)
-
- def _send_block_final(self):
- """Send data, when this is the last block for the interval.
- There is no next line, so figure out the actual interval end
- using interval_end or end_epsilon."""
- if self._interval_end is not None:
- # Use the user's specified end timestamp
- block_end = self._interval_end
- # Clear it in case we send more intervals in the future.
- self._interval_end = None
- else:
- # Add an epsilon to the last timestamp we saw
- block_end = extract_timestamp(self._line_old) + self._end_epsilon
- self._send_block(block_end)
-
- def _send_block(self, block_end):
- """Send current block to the server"""
- self.last_response = self._client.stream_insert_block(
- self._path, "".join(self._block_data),
- self._block_start, block_end)
-
- # Clear out the block
- self._block_data = []
- self._block_len = 0
- self._block_start = None
-
- # Note when we sent it
- self._last_time = time.time()
|