You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

416 lines
16 KiB

  1. # -*- coding: utf-8 -*-
  2. """Class for performing HTTP client requests via libcurl"""
  3. import nilmdb.utils
  4. import nilmdb.client.httpclient
  5. from nilmdb.client.errors import ClientError
  6. import time
  7. import simplejson as json
  8. import contextlib
  9. from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
  10. def extract_timestamp(line):
  11. """Extract just the timestamp from a line of data text"""
  12. return string_to_timestamp(line.split()[0])
  13. class Client(object):
  14. """Main client interface to the Nilm database."""
  15. def __init__(self, url, post_json = False):
  16. """Initialize client with given URL. If post_json is true,
  17. POST requests are sent with Content-Type 'application/json'
  18. instead of the default 'x-www-form-urlencoded'."""
  19. self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
  20. self.post_json = post_json
  21. # __enter__/__exit__ allow this class to be a context manager
  22. def __enter__(self):
  23. return self
  24. def __exit__(self, exc_type, exc_value, traceback):
  25. self.close()
  26. def _json_post_param(self, data):
  27. """Return compact json-encoded version of parameter"""
  28. if self.post_json:
  29. # If we're posting as JSON, we don't need to encode it further here
  30. return data
  31. return json.dumps(data, separators=(',',':'))
  32. def close(self):
  33. """Close the connection; safe to call multiple times"""
  34. self.http.close()
  35. def geturl(self):
  36. """Return the URL we're using"""
  37. return self.http.baseurl
  38. def version(self):
  39. """Return server version"""
  40. return self.http.get("version")
  41. def dbinfo(self):
  42. """Return server database info (path, size, free space)
  43. as a dictionary."""
  44. return self.http.get("dbinfo")
  45. def stream_list(self, path = None, layout = None, extended = False):
  46. params = {}
  47. if path is not None:
  48. params["path"] = path
  49. if layout is not None:
  50. params["layout"] = layout
  51. if extended:
  52. params["extended"] = 1
  53. return self.http.get("stream/list", params)
  54. def stream_get_metadata(self, path, keys = None):
  55. params = { "path": path }
  56. if keys is not None:
  57. params["key"] = keys
  58. return self.http.get("stream/get_metadata", params)
  59. def stream_set_metadata(self, path, data):
  60. """Set stream metadata from a dictionary, replacing all existing
  61. metadata."""
  62. params = {
  63. "path": path,
  64. "data": self._json_post_param(data)
  65. }
  66. return self.http.post("stream/set_metadata", params)
  67. def stream_update_metadata(self, path, data):
  68. """Update stream metadata from a dictionary"""
  69. params = {
  70. "path": path,
  71. "data": self._json_post_param(data)
  72. }
  73. return self.http.post("stream/update_metadata", params)
  74. def stream_create(self, path, layout):
  75. """Create a new stream"""
  76. params = { "path": path,
  77. "layout" : layout }
  78. return self.http.post("stream/create", params)
  79. def stream_destroy(self, path):
  80. """Delete stream. Fails if any data is still present."""
  81. params = { "path": path }
  82. return self.http.post("stream/destroy", params)
  83. def stream_rename(self, oldpath, newpath):
  84. """Rename a stream."""
  85. params = { "oldpath": oldpath,
  86. "newpath": newpath }
  87. return self.http.post("stream/rename", params)
  88. def stream_remove(self, path, start = None, end = None):
  89. """Remove data from the specified time range"""
  90. params = {
  91. "path": path
  92. }
  93. if start is not None:
  94. params["start"] = timestamp_to_string(start)
  95. if end is not None:
  96. params["end"] = timestamp_to_string(end)
  97. return self.http.post("stream/remove", params)
  98. @contextlib.contextmanager
  99. def stream_insert_context(self, path, start = None, end = None):
  100. """Return a context manager that allows data to be efficiently
  101. inserted into a stream in a piecewise manner. Data is be provided
  102. as single lines, and is aggregated and sent to the server in larger
  103. chunks as necessary. Data lines must match the database layout for
  104. the given path, and end with a newline.
  105. Example:
  106. with client.stream_insert_context('/path', start, end) as ctx:
  107. ctx.insert('1234567890.0 1 2 3 4\\n')
  108. ctx.insert('1234567891.0 1 2 3 4\\n')
  109. For more details, see help for nilmdb.client.client.StreamInserter
  110. This may make multiple requests to the server, if the data is
  111. large enough or enough time has passed between insertions.
  112. """
  113. ctx = StreamInserter(self.http, path, start, end)
  114. yield ctx
  115. ctx.finalize()
  116. def stream_insert(self, path, data, start = None, end = None):
  117. """Insert rows of data into a stream. data should be a string
  118. or iterable that provides ASCII data that matches the database
  119. layout for path. See stream_insert_context for details on the
  120. 'start' and 'end' parameters."""
  121. with self.stream_insert_context(path, start, end) as ctx:
  122. if isinstance(data, basestring):
  123. ctx.insert(data)
  124. else:
  125. for chunk in data:
  126. ctx.insert(chunk)
  127. return ctx.last_response
  128. def stream_intervals(self, path, start = None, end = None, diffpath = None):
  129. """
  130. Return a generator that yields each stream interval.
  131. If diffpath is not None, yields only interval ranges that are
  132. present in 'path' but not in 'diffpath'.
  133. """
  134. params = {
  135. "path": path
  136. }
  137. if diffpath is not None:
  138. params["diffpath"] = diffpath
  139. if start is not None:
  140. params["start"] = timestamp_to_string(start)
  141. if end is not None:
  142. params["end"] = timestamp_to_string(end)
  143. return self.http.get_gen("stream/intervals", params)
  144. def stream_extract(self, path, start = None, end = None, count = False):
  145. """
  146. Extract data from a stream. Returns a generator that yields
  147. lines of ASCII-formatted data that matches the database
  148. layout for the given path.
  149. Specify count = True to return a count of matching data points
  150. rather than the actual data. The output format is unchanged.
  151. """
  152. params = {
  153. "path": path,
  154. }
  155. if start is not None:
  156. params["start"] = timestamp_to_string(start)
  157. if end is not None:
  158. params["end"] = timestamp_to_string(end)
  159. if count:
  160. params["count"] = 1
  161. return self.http.get_gen("stream/extract", params)
  162. def stream_count(self, path, start = None, end = None):
  163. """
  164. Return the number of rows of data in the stream that satisfy
  165. the given timestamps.
  166. """
  167. counts = list(self.stream_extract(path, start, end, count = True))
  168. return int(counts[0])
  169. class StreamInserter(object):
  170. """Object returned by stream_insert_context() that manages
  171. the insertion of rows of data into a particular path.
  172. The basic data flow is that we are filling a contiguous interval
  173. on the server, with no gaps, that extends from timestamp 'start'
  174. to timestamp 'end'. Data timestamps satisfy 'start <= t < end'.
  175. Data is provided to .insert() as ASCII formatted data separated by
  176. newlines. The chunks of data passed to .insert() do not need to
  177. match up with the newlines; less or more than one line can be passed.
  178. 1. The first inserted line begins a new interval that starts at
  179. 'start'. If 'start' is not given, it is deduced from the first
  180. line's timestamp.
  181. 2. Subsequent lines go into the same contiguous interval. As lines
  182. are inserted, this routine may make multiple insertion requests to
  183. the server, but will structure the timestamps to leave no gaps.
  184. 3. The current contiguous interval can be completed by manually
  185. calling .finalize(), which the context manager will also do
  186. automatically. This will send any remaining data to the server,
  187. using the 'end' timestamp to end the interval. If no 'end'
  188. was provided, it is deduced from the last timestamp seen,
  189. plus a small delta.
  190. After a .finalize(), inserting new data goes back to step 1.
  191. .update_start() can be called before step 1 to change the start
  192. time for the interval. .update_end() can be called before step 3
  193. to change the end time for the interval.
  194. """
  195. # See design.md for a discussion of how much data to send. This
  196. # is a soft limit -- we might send up to twice as much or so
  197. _max_data = 2 * 1024 * 1024
  198. _max_data_after_send = 64 * 1024
  199. def __init__(self, http, path, start = None, end = None):
  200. """'http' is the httpclient object. 'path' is the database
  201. path to insert to. 'start' and 'end' are used for the first
  202. contiguous interval."""
  203. self.last_response = None
  204. self._http = http
  205. self._path = path
  206. # Start and end for the overall contiguous interval we're
  207. # filling
  208. self._interval_start = start
  209. self._interval_end = end
  210. # Current data we're building up to send. Each string
  211. # goes into the array, and gets joined all at once.
  212. self._block_data = []
  213. self._block_len = 0
  214. def insert(self, data):
  215. """Insert a chunk of ASCII formatted data in string form. The
  216. overall data must consist of lines terminated by '\\n'."""
  217. length = len(data)
  218. maxdata = self._max_data
  219. if length > maxdata:
  220. # This could make our buffer more than twice what we
  221. # wanted to send, so split it up. This is a bit
  222. # inefficient, but the user really shouldn't be providing
  223. # this much data at once.
  224. for cut in range(0, length, maxdata):
  225. self.insert(data[cut:(cut + maxdata)])
  226. return
  227. # Append this string to our list
  228. self._block_data.append(data)
  229. self._block_len += length
  230. # Send the block once we have enough data
  231. if self._block_len >= maxdata:
  232. self._send_block(final = False)
  233. if self._block_len >= self._max_data_after_send: # pragma: no cover
  234. raise ValueError("too much data left over after trying"
  235. " to send intermediate block; is it"
  236. " missing newlines or malformed?")
  237. def update_start(self, start):
  238. """Update the start time for the next contiguous interval.
  239. Call this before starting to insert data for a new interval,
  240. for example, after .finalize()"""
  241. self._interval_start = start
  242. def update_end(self, end):
  243. """Update the end time for the current contiguous interval.
  244. Call this before .finalize()"""
  245. self._interval_end = end
  246. def finalize(self):
  247. """Stop filling the current contiguous interval.
  248. All outstanding data will be sent, and the interval end
  249. time of the interval will be taken from the 'end' argument
  250. used when initializing this class, or the most recent
  251. value passed to update_end(), or the last timestamp plus
  252. a small epsilon value if no other endpoint was provided.
  253. If more data is inserted after a finalize(), it will become
  254. part of a new interval and there may be a gap left in-between."""
  255. self._send_block(final = True)
  256. def _get_first_noncomment(self, block):
  257. """Return the (start, end) indices of the first full line in
  258. block that isn't a comment, or raise IndexError if
  259. there isn't one."""
  260. start = 0
  261. while True:
  262. end = block.find('\n', start)
  263. if end < 0:
  264. raise IndexError
  265. if block[start] != '#':
  266. return (start, (end + 1))
  267. start = end + 1
  268. def _get_last_noncomment(self, block):
  269. """Return the (start, end) indices of the last full line in
  270. block[:length] that isn't a comment, or raise IndexError if
  271. there isn't one."""
  272. end = block.rfind('\n')
  273. if end <= 0:
  274. raise IndexError
  275. while True:
  276. start = block.rfind('\n', 0, end)
  277. if block[start + 1] != '#':
  278. return ((start + 1), end)
  279. if start == -1:
  280. raise IndexError
  281. end = start
  282. def _send_block(self, final = False):
  283. """Send data currently in the block. The data sent will
  284. consist of full lines only, so some might be left over."""
  285. # Build the full string to send
  286. block = "".join(self._block_data)
  287. start_ts = self._interval_start
  288. if start_ts is None:
  289. # Pull start from the first line
  290. try:
  291. (spos, epos) = self._get_first_noncomment(block)
  292. start_ts = extract_timestamp(block[spos:epos])
  293. except (ValueError, IndexError):
  294. pass # no timestamp is OK, if we have no data
  295. if final:
  296. # For a final block, it must end in a newline, and the
  297. # ending timestamp is either the user-provided end,
  298. # or the timestamp of the last line plus epsilon.
  299. end_ts = self._interval_end
  300. try:
  301. if block[-1] != '\n':
  302. raise ValueError("final block didn't end with a newline")
  303. if end_ts is None:
  304. (spos, epos) = self._get_last_noncomment(block)
  305. end_ts = extract_timestamp(block[spos:epos])
  306. end_ts += nilmdb.utils.time.epsilon
  307. except (ValueError, IndexError):
  308. pass # no timestamp is OK, if we have no data
  309. self._block_data = []
  310. self._block_len = 0
  311. # Next block is completely fresh
  312. self._interval_start = None
  313. self._interval_end = None
  314. else:
  315. # An intermediate block, e.g. "line1\nline2\nline3\nline4"
  316. # We need to save "line3\nline4" for the next block, and
  317. # use the timestamp from "line3" as the ending timestamp
  318. # for this one.
  319. try:
  320. (spos, epos) = self._get_last_noncomment(block)
  321. end_ts = extract_timestamp(block[spos:epos])
  322. except (ValueError, IndexError):
  323. # If we found no timestamp, give up; we could send this
  324. # block later when we have more data.
  325. return
  326. if spos == 0:
  327. # Not enough data to send an intermediate block
  328. return
  329. if self._interval_end is not None and end_ts > self._interval_end:
  330. # User gave us bad endpoints; send it anyway, and let
  331. # the server complain so that the error is the same
  332. # as if we hadn't done this chunking.
  333. end_ts = self._interval_end
  334. self._block_data = [ block[spos:] ]
  335. self._block_len = (epos - spos)
  336. block = block[:spos]
  337. # Next block continues where this one ended
  338. self._interval_start = end_ts
  339. # Double check endpoints
  340. if start_ts is None or end_ts is None:
  341. # If the block has no non-comment lines, it's OK
  342. try:
  343. self._get_first_noncomment(block)
  344. except IndexError:
  345. return
  346. raise ClientError("have data to send, but no start/end times")
  347. # Send it
  348. params = { "path": self._path,
  349. "start": timestamp_to_string(start_ts),
  350. "end": timestamp_to_string(end_ts) }
  351. self.last_response = self._http.put("stream/insert", block, params)
  352. return