|
- # -*- coding: utf-8 -*-
-
- """Class for performing HTTP client requests via libcurl"""
-
- import nilmdb.utils
- import nilmdb.client.httpclient
- from nilmdb.client.errors import ClientError
-
- import re
- import time
- import simplejson as json
- import contextlib
-
- from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
-
- def extract_timestamp(line):
- """Extract just the timestamp from a line of data text"""
- return string_to_timestamp(line.split()[0])
-
- class Client(object):
- """Main client interface to the Nilm database."""
-
- def __init__(self, url, post_json = False):
- """Initialize client with given URL. If post_json is true,
- POST requests are sent with Content-Type 'application/json'
- instead of the default 'x-www-form-urlencoded'."""
- self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
- self.post_json = post_json
-
- # __enter__/__exit__ allow this class to be a context manager
- def __enter__(self):
- return self
-
- def __exit__(self, exc_type, exc_value, traceback):
- self.close()
-
- def _json_post_param(self, data):
- """Return compact json-encoded version of parameter"""
- if self.post_json:
- # If we're posting as JSON, we don't need to encode it further here
- return data
- return json.dumps(data, separators=(',',':'))
-
- def close(self):
- """Close the connection; safe to call multiple times"""
- self.http.close()
-
- def geturl(self):
- """Return the URL we're using"""
- return self.http.baseurl
-
- def version(self):
- """Return server version"""
- return self.http.get("version")
-
- def dbinfo(self):
- """Return server database info (path, size, free space)
- as a dictionary."""
- return self.http.get("dbinfo")
-
- def stream_list(self, path = None, layout = None, extended = False):
- params = {}
- if path is not None:
- params["path"] = path
- if layout is not None:
- params["layout"] = layout
- if extended:
- params["extended"] = 1
- def sort_streams_nicely(x):
- """Human-friendly sort (/stream/2 before /stream/10)"""
- num = lambda t: int(t) if t.isdigit() else t
- key = lambda k: [ num(c) for c in re.split('([0-9]+)', k[0]) ]
- return sorted(x, key = key)
- return sort_streams_nicely(self.http.get("stream/list", params))
-
- def stream_get_metadata(self, path, keys = None):
- params = { "path": path }
- if keys is not None:
- params["key"] = keys
- return self.http.get("stream/get_metadata", params)
-
- def stream_set_metadata(self, path, data):
- """Set stream metadata from a dictionary, replacing all existing
- metadata."""
- params = {
- "path": path,
- "data": self._json_post_param(data)
- }
- return self.http.post("stream/set_metadata", params)
-
- def stream_update_metadata(self, path, data):
- """Update stream metadata from a dictionary"""
- params = {
- "path": path,
- "data": self._json_post_param(data)
- }
- return self.http.post("stream/update_metadata", params)
-
- def stream_create(self, path, layout):
- """Create a new stream"""
- params = { "path": path,
- "layout" : layout }
- return self.http.post("stream/create", params)
-
- def stream_destroy(self, path):
- """Delete stream. Fails if any data is still present."""
- params = { "path": path }
- return self.http.post("stream/destroy", params)
-
- def stream_rename(self, oldpath, newpath):
- """Rename a stream."""
- params = { "oldpath": oldpath,
- "newpath": newpath }
- return self.http.post("stream/rename", params)
-
- def stream_remove(self, path, start = None, end = None):
- """Remove data from the specified time range"""
- params = {
- "path": path
- }
- if start is not None:
- params["start"] = timestamp_to_string(start)
- if end is not None:
- params["end"] = timestamp_to_string(end)
- return self.http.post("stream/remove", params)
-
- @contextlib.contextmanager
- def stream_insert_context(self, path, start = None, end = None):
- """Return a context manager that allows data to be efficiently
- inserted into a stream in a piecewise manner. Data is be provided
- as single lines, and is aggregated and sent to the server in larger
- chunks as necessary. Data lines must match the database layout for
- the given path, and end with a newline.
-
- Example:
- with client.stream_insert_context('/path', start, end) as ctx:
- ctx.insert('1234567890.0 1 2 3 4\\n')
- ctx.insert('1234567891.0 1 2 3 4\\n')
-
- For more details, see help for nilmdb.client.client.StreamInserter
-
- This may make multiple requests to the server, if the data is
- large enough or enough time has passed between insertions.
- """
- ctx = StreamInserter(self.http, path, start, end)
- yield ctx
- ctx.finalize()
-
- def stream_insert(self, path, data, start = None, end = None):
- """Insert rows of data into a stream. data should be a string
- or iterable that provides ASCII data that matches the database
- layout for path. See stream_insert_context for details on the
- 'start' and 'end' parameters."""
- with self.stream_insert_context(path, start, end) as ctx:
- if isinstance(data, basestring):
- ctx.insert(data)
- else:
- for chunk in data:
- ctx.insert(chunk)
- return ctx.last_response
-
- def stream_intervals(self, path, start = None, end = None, diffpath = None):
- """
- Return a generator that yields each stream interval.
-
- If diffpath is not None, yields only interval ranges that are
- present in 'path' but not in 'diffpath'.
- """
- params = {
- "path": path
- }
- if diffpath is not None:
- params["diffpath"] = diffpath
- if start is not None:
- params["start"] = timestamp_to_string(start)
- if end is not None:
- params["end"] = timestamp_to_string(end)
- return self.http.get_gen("stream/intervals", params)
-
- def stream_extract(self, path, start = None, end = None,
- count = False, markup = False, binary = False):
- """
- Extract data from a stream. Returns a generator that yields
- lines of ASCII-formatted data that matches the database
- layout for the given path.
-
- Specify count = True to return a count of matching data points
- rather than the actual data. The output format is unchanged.
-
- Specify markup = True to include comments in the returned data
- that indicate interval starts and ends.
-
- Specify binary = True to return chunks of raw binary data,
- rather than lines of ASCII-formatted data. Raw binary data
- is always little-endian and matches the database types
- (including a uint64 timestamp).
- """
- params = {
- "path": path,
- }
- if start is not None:
- params["start"] = timestamp_to_string(start)
- if end is not None:
- params["end"] = timestamp_to_string(end)
- if count:
- params["count"] = 1
- if markup:
- params["markup"] = 1
- if binary:
- params["binary"] = 1
- return self.http.get_gen("stream/extract", params, binary = binary)
-
- def stream_count(self, path, start = None, end = None):
- """
- Return the number of rows of data in the stream that satisfy
- the given timestamps.
- """
- counts = list(self.stream_extract(path, start, end, count = True))
- return int(counts[0])
-
- class StreamInserter(object):
- """Object returned by stream_insert_context() that manages
- the insertion of rows of data into a particular path.
-
- The basic data flow is that we are filling a contiguous interval
- on the server, with no gaps, that extends from timestamp 'start'
- to timestamp 'end'. Data timestamps satisfy 'start <= t < end'.
-
- Data is provided to .insert() as ASCII formatted data separated by
- newlines. The chunks of data passed to .insert() do not need to
- match up with the newlines; less or more than one line can be passed.
-
- 1. The first inserted line begins a new interval that starts at
- 'start'. If 'start' is not given, it is deduced from the first
- line's timestamp.
-
- 2. Subsequent lines go into the same contiguous interval. As lines
- are inserted, this routine may make multiple insertion requests to
- the server, but will structure the timestamps to leave no gaps.
-
- 3. The current contiguous interval can be completed by manually
- calling .finalize(), which the context manager will also do
- automatically. This will send any remaining data to the server,
- using the 'end' timestamp to end the interval. If no 'end'
- was provided, it is deduced from the last timestamp seen,
- plus a small delta.
-
- After a .finalize(), inserting new data goes back to step 1.
-
- .update_start() can be called before step 1 to change the start
- time for the interval. .update_end() can be called before step 3
- to change the end time for the interval.
- """
-
- # See design.md for a discussion of how much data to send. This
- # is a soft limit -- we might send up to twice as much or so
- _max_data = 2 * 1024 * 1024
- _max_data_after_send = 64 * 1024
-
- def __init__(self, http, path, start = None, end = None):
- """'http' is the httpclient object. 'path' is the database
- path to insert to. 'start' and 'end' are used for the first
- contiguous interval."""
- self.last_response = None
-
- self._http = http
- self._path = path
-
- # Start and end for the overall contiguous interval we're
- # filling
- self._interval_start = start
- self._interval_end = end
-
- # Current data we're building up to send. Each string
- # goes into the array, and gets joined all at once.
- self._block_data = []
- self._block_len = 0
-
- def insert(self, data):
- """Insert a chunk of ASCII formatted data in string form. The
- overall data must consist of lines terminated by '\\n'."""
- length = len(data)
- maxdata = self._max_data
-
- if length > maxdata:
- # This could make our buffer more than twice what we
- # wanted to send, so split it up. This is a bit
- # inefficient, but the user really shouldn't be providing
- # this much data at once.
- for cut in range(0, length, maxdata):
- self.insert(data[cut:(cut + maxdata)])
- return
-
- # Append this string to our list
- self._block_data.append(data)
- self._block_len += length
-
- # Send the block once we have enough data
- if self._block_len >= maxdata:
- self._send_block(final = False)
- if self._block_len >= self._max_data_after_send: # pragma: no cover
- raise ValueError("too much data left over after trying"
- " to send intermediate block; is it"
- " missing newlines or malformed?")
-
- def update_start(self, start):
- """Update the start time for the next contiguous interval.
- Call this before starting to insert data for a new interval,
- for example, after .finalize()"""
- self._interval_start = start
-
- def update_end(self, end):
- """Update the end time for the current contiguous interval.
- Call this before .finalize()"""
- self._interval_end = end
-
- def finalize(self):
- """Stop filling the current contiguous interval.
- All outstanding data will be sent, and the interval end
- time of the interval will be taken from the 'end' argument
- used when initializing this class, or the most recent
- value passed to update_end(), or the last timestamp plus
- a small epsilon value if no other endpoint was provided.
-
- If more data is inserted after a finalize(), it will become
- part of a new interval and there may be a gap left in-between."""
- self._send_block(final = True)
-
- def send(self):
- """Send any data that we might have buffered up. Does not affect
- any other treatment of timestamps or endpoints."""
- self._send_block(final = False)
-
- def _get_first_noncomment(self, block):
- """Return the (start, end) indices of the first full line in
- block that isn't a comment, or raise IndexError if
- there isn't one."""
- start = 0
- while True:
- end = block.find('\n', start)
- if end < 0:
- raise IndexError
- if block[start] != '#':
- return (start, (end + 1))
- start = end + 1
-
- def _get_last_noncomment(self, block):
- """Return the (start, end) indices of the last full line in
- block[:length] that isn't a comment, or raise IndexError if
- there isn't one."""
- end = block.rfind('\n')
- if end <= 0:
- raise IndexError
- while True:
- start = block.rfind('\n', 0, end)
- if block[start + 1] != '#':
- return ((start + 1), end)
- if start == -1:
- raise IndexError
- end = start
-
- def _send_block(self, final = False):
- """Send data currently in the block. The data sent will
- consist of full lines only, so some might be left over."""
- # Build the full string to send
- block = "".join(self._block_data)
-
- start_ts = self._interval_start
- if start_ts is None:
- # Pull start from the first line
- try:
- (spos, epos) = self._get_first_noncomment(block)
- start_ts = extract_timestamp(block[spos:epos])
- except (ValueError, IndexError):
- pass # no timestamp is OK, if we have no data
-
- if final:
- # For a final block, it must end in a newline, and the
- # ending timestamp is either the user-provided end,
- # or the timestamp of the last line plus epsilon.
- end_ts = self._interval_end
- try:
- if block[-1] != '\n':
- raise ValueError("final block didn't end with a newline")
- if end_ts is None:
- (spos, epos) = self._get_last_noncomment(block)
- end_ts = extract_timestamp(block[spos:epos])
- end_ts += nilmdb.utils.time.epsilon
- except (ValueError, IndexError):
- pass # no timestamp is OK, if we have no data
- self._block_data = []
- self._block_len = 0
-
- # Next block is completely fresh
- self._interval_start = None
- self._interval_end = None
- else:
- # An intermediate block, e.g. "line1\nline2\nline3\nline4"
- # We need to save "line3\nline4" for the next block, and
- # use the timestamp from "line3" as the ending timestamp
- # for this one.
- try:
- (spos, epos) = self._get_last_noncomment(block)
- end_ts = extract_timestamp(block[spos:epos])
- except (ValueError, IndexError):
- # If we found no timestamp, give up; we could send this
- # block later when we have more data.
- return
- if spos == 0:
- # Not enough data to send an intermediate block
- return
- if self._interval_end is not None and end_ts > self._interval_end:
- # User gave us bad endpoints; send it anyway, and let
- # the server complain so that the error is the same
- # as if we hadn't done this chunking.
- end_ts = self._interval_end
- self._block_data = [ block[spos:] ]
- self._block_len = (epos - spos)
- block = block[:spos]
-
- # Next block continues where this one ended
- self._interval_start = end_ts
-
- # Double check endpoints
- if start_ts is None or end_ts is None:
- # If the block has no non-comment lines, it's OK
- try:
- self._get_first_noncomment(block)
- except IndexError:
- return
- raise ClientError("have data to send, but no start/end times")
-
- # Send it
- params = { "path": self._path,
- "start": timestamp_to_string(start_ts),
- "end": timestamp_to_string(end_ts) }
- self.last_response = self._http.put("stream/insert", block, params)
-
- return
|