You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

550 lines
21 KiB

  1. """CherryPy-based server for accessing NILM database via HTTP"""
  2. # Need absolute_import so that "import nilmdb" won't pull in
  3. # nilmdb.py, but will pull the nilmdb module instead.
  4. import nilmdb.server
  5. from nilmdb.utils.printf import *
  6. from nilmdb.server.errors import NilmDBError
  7. from nilmdb.utils.time import string_to_timestamp
  8. import cherrypy
  9. import sys
  10. import os
  11. import socket
  12. import json
  13. import decorator
  14. import psutil
  15. import traceback
  16. from nilmdb.server.serverutil import (
  17. chunked_response,
  18. response_type,
  19. exception_to_httperror,
  20. CORS_allow,
  21. json_to_request_params,
  22. json_error_page,
  23. cherrypy_start,
  24. cherrypy_stop,
  25. bool_param,
  26. )
  27. # Add CORS_allow tool
  28. cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
  29. class NilmApp(object):
  30. def __init__(self, db):
  31. self.db = db
  32. # CherryPy apps
  33. class Root(NilmApp):
  34. """Root application for NILM database"""
  35. def __init__(self, db):
  36. super(Root, self).__init__(db)
  37. # /
  38. @cherrypy.expose
  39. def index(self):
  40. cherrypy.response.headers['Content-Type'] = 'text/plain'
  41. msg = sprintf("This is NilmDB version %s, running on host %s.\n",
  42. nilmdb.__version__, socket.getfqdn())
  43. return msg
  44. # /favicon.ico
  45. @cherrypy.expose
  46. def favicon_ico(self):
  47. raise cherrypy.NotFound()
  48. # /version
  49. @cherrypy.expose
  50. @cherrypy.tools.json_out()
  51. def version(self):
  52. return nilmdb.__version__
  53. # /dbinfo
  54. @cherrypy.expose
  55. @cherrypy.tools.json_out()
  56. def dbinfo(self):
  57. """Return a dictionary with the database path,
  58. size of the database in bytes, and free disk space in bytes"""
  59. path = self.db.get_basepath()
  60. usage = psutil.disk_usage(path)
  61. dbsize = nilmdb.utils.du(path)
  62. return { "path": path,
  63. "size": dbsize,
  64. "other": max(usage.used - dbsize, 0),
  65. "reserved": max(usage.total - usage.used - usage.free, 0),
  66. "free": usage.free }
  67. class Stream(NilmApp):
  68. """Stream-specific operations"""
  69. # Helpers
  70. def _get_times(self, start_param, end_param):
  71. (start, end) = (None, None)
  72. try:
  73. if start_param is not None:
  74. start = string_to_timestamp(start_param)
  75. except Exception:
  76. raise cherrypy.HTTPError("400 Bad Request", sprintf(
  77. "invalid start (%s): must be a numeric timestamp", start_param))
  78. try:
  79. if end_param is not None:
  80. end = string_to_timestamp(end_param)
  81. except Exception:
  82. raise cherrypy.HTTPError("400 Bad Request", sprintf(
  83. "invalid end (%s): must be a numeric timestamp", end_param))
  84. if start is not None and end is not None:
  85. if start >= end:
  86. raise cherrypy.HTTPError(
  87. "400 Bad Request",
  88. sprintf("start must precede end (%s >= %s)",
  89. start_param, end_param))
  90. return (start, end)
  91. # /stream/list
  92. # /stream/list?layout=float32_8
  93. # /stream/list?path=/newton/prep&extended=1
  94. @cherrypy.expose
  95. @cherrypy.tools.json_out()
  96. def list(self, path = None, layout = None, extended = None):
  97. """List all streams in the database. With optional path or
  98. layout parameter, just list streams that match the given path
  99. or layout.
  100. If extended is missing or zero, returns a list of lists
  101. containing the path and layout: [ path, layout ]
  102. If extended is true, returns a list of lists containing
  103. extended info: [ path, layout, extent_min, extent_max,
  104. total_rows, total_seconds ]. More data may be added.
  105. """
  106. return self.db.stream_list(path, layout, bool(extended))
  107. # /stream/create?path=/newton/prep&layout=float32_8
  108. @cherrypy.expose
  109. @cherrypy.tools.json_in()
  110. @cherrypy.tools.json_out()
  111. @exception_to_httperror(NilmDBError, ValueError)
  112. @cherrypy.tools.CORS_allow(methods = ["POST"])
  113. def create(self, path, layout):
  114. """Create a new stream in the database. Provide path
  115. and one of the nilmdb.layout.layouts keys.
  116. """
  117. return self.db.stream_create(path, layout)
  118. # /stream/destroy?path=/newton/prep
  119. @cherrypy.expose
  120. @cherrypy.tools.json_in()
  121. @cherrypy.tools.json_out()
  122. @exception_to_httperror(NilmDBError)
  123. @cherrypy.tools.CORS_allow(methods = ["POST"])
  124. def destroy(self, path):
  125. """Delete a stream. Fails if any data is still present."""
  126. return self.db.stream_destroy(path)
  127. # /stream/rename?oldpath=/newton/prep&newpath=/newton/prep/1
  128. @cherrypy.expose
  129. @cherrypy.tools.json_in()
  130. @cherrypy.tools.json_out()
  131. @exception_to_httperror(NilmDBError, ValueError)
  132. @cherrypy.tools.CORS_allow(methods = ["POST"])
  133. def rename(self, oldpath, newpath):
  134. """Rename a stream."""
  135. return self.db.stream_rename(oldpath, newpath)
  136. # /stream/get_metadata?path=/newton/prep
  137. # /stream/get_metadata?path=/newton/prep&key=foo&key=bar
  138. @cherrypy.expose
  139. @cherrypy.tools.json_out()
  140. def get_metadata(self, path, key=None):
  141. """Get metadata for the named stream. If optional
  142. key parameters are specified, only return metadata
  143. matching the given keys."""
  144. try:
  145. data = self.db.stream_get_metadata(path)
  146. except nilmdb.server.nilmdb.StreamError as e:
  147. raise cherrypy.HTTPError("404 Not Found", str(e))
  148. if key is None: # If no keys specified, return them all
  149. key = list(data.keys())
  150. elif not isinstance(key, list):
  151. key = [ key ]
  152. result = {}
  153. for k in key:
  154. if k in data:
  155. result[k] = data[k]
  156. else: # Return "None" for keys with no matching value
  157. result[k] = None
  158. return result
  159. # Helper for set_metadata and get_metadata
  160. def _metadata_helper(self, function, path, data):
  161. if not isinstance(data, dict):
  162. try:
  163. data = dict(json.loads(data))
  164. except TypeError as e:
  165. raise NilmDBError("can't parse 'data' parameter: " + str(e))
  166. for key in data:
  167. if not (isinstance(data[key], str) or
  168. isinstance(data[key], float) or
  169. isinstance(data[key], int)):
  170. raise NilmDBError("metadata values must be a string or number")
  171. function(path, data)
  172. # /stream/set_metadata?path=/newton/prep&data=<json>
  173. @cherrypy.expose
  174. @cherrypy.tools.json_in()
  175. @cherrypy.tools.json_out()
  176. @exception_to_httperror(NilmDBError, LookupError)
  177. @cherrypy.tools.CORS_allow(methods = ["POST"])
  178. def set_metadata(self, path, data):
  179. """Set metadata for the named stream, replacing any existing
  180. metadata. Data can be json-encoded or a plain dictionary."""
  181. self._metadata_helper(self.db.stream_set_metadata, path, data)
  182. # /stream/update_metadata?path=/newton/prep&data=<json>
  183. @cherrypy.expose
  184. @cherrypy.tools.json_in()
  185. @cherrypy.tools.json_out()
  186. @exception_to_httperror(NilmDBError, LookupError, ValueError)
  187. @cherrypy.tools.CORS_allow(methods = ["POST"])
  188. def update_metadata(self, path, data):
  189. """Set metadata for the named stream, replacing any existing
  190. metadata. Data can be json-encoded or a plain dictionary."""
  191. self._metadata_helper(self.db.stream_update_metadata, path, data)
  192. # /stream/insert?path=/newton/prep
  193. @cherrypy.expose
  194. @cherrypy.tools.json_out()
  195. @exception_to_httperror(NilmDBError, ValueError)
  196. @cherrypy.tools.CORS_allow(methods = ["PUT"])
  197. def insert(self, path, start, end, binary = False):
  198. """
  199. Insert new data into the database. Provide textual data
  200. (matching the path's layout) as a HTTP PUT.
  201. If 'binary' is True, expect raw binary data, rather than lines
  202. of ASCII-formatted data. Raw binary data is always
  203. little-endian and matches the database types (including an
  204. int64 timestamp).
  205. """
  206. binary = bool_param(binary)
  207. # Important that we always read the input before throwing any
  208. # errors, to keep lengths happy for persistent connections.
  209. # Note that CherryPy 3.2.2 has a bug where this fails for GET
  210. # requests, if we ever want to handle those (issue #1134)
  211. body = cherrypy.request.body.read()
  212. # Verify content type for binary data
  213. content_type = cherrypy.request.headers.get('content-type')
  214. if binary and content_type:
  215. if content_type != "application/octet-stream":
  216. raise cherrypy.HTTPError("400", "Content type must be "
  217. "application/octet-stream for "
  218. "binary data, not " + content_type)
  219. # Note that non-binary data is *not* decoded from bytes to string,
  220. # but rather passed directly to stream_insert.
  221. # Check path and get layout
  222. if len(self.db.stream_list(path = path)) != 1:
  223. raise cherrypy.HTTPError("404", "No such stream: " + path)
  224. # Check limits
  225. (start, end) = self._get_times(start, end)
  226. # Pass the data directly to nilmdb, which will parse it and
  227. # raise a ValueError if there are any problems.
  228. self.db.stream_insert(path, start, end, body, binary)
  229. # Done
  230. return
  231. # /stream/remove?path=/newton/prep
  232. # /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0
  233. @cherrypy.expose
  234. @cherrypy.tools.json_in()
  235. @cherrypy.tools.CORS_allow(methods = ["POST"])
  236. @chunked_response
  237. @response_type("application/x-json-stream")
  238. def remove(self, path, start = None, end = None):
  239. """
  240. Remove data from the backend database. Removes all data in
  241. the interval [start, end).
  242. Returns the number of data points removed. Since this is a potentially
  243. long-running operation, multiple numbers may be returned as the
  244. data gets removed from the backend database. The total number of
  245. points removed is the sum of all of these numbers.
  246. """
  247. (start, end) = self._get_times(start, end)
  248. if len(self.db.stream_list(path = path)) != 1:
  249. raise cherrypy.HTTPError("404", "No such stream: " + path)
  250. def content(start, end):
  251. # Note: disable chunked responses to see tracebacks from here.
  252. while True:
  253. (removed, restart) = self.db.stream_remove(path, start, end)
  254. response = json.dumps(removed) + "\r\n"
  255. yield response.encode('utf-8')
  256. if restart is None:
  257. break
  258. start = restart
  259. return content(start, end)
  260. # /stream/intervals?path=/newton/prep
  261. # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
  262. # /stream/intervals?path=/newton/prep&diffpath=/newton/prep2
  263. @cherrypy.expose
  264. @chunked_response
  265. @response_type("application/x-json-stream")
  266. def intervals(self, path, start = None, end = None, diffpath = None):
  267. """
  268. Get intervals from backend database. Streams the resulting
  269. intervals as JSON strings separated by CR LF pairs. This may
  270. make multiple requests to the nilmdb backend to avoid causing
  271. it to block for too long.
  272. Returns intervals between 'start' and 'end' belonging to
  273. 'path'. If 'diff' is provided, the set-difference between
  274. intervals in 'path' and intervals in 'diffpath' are
  275. returned instead.
  276. Note that the response type is the non-standard
  277. 'application/x-json-stream' for lack of a better option.
  278. """
  279. (start, end) = self._get_times(start, end)
  280. if len(self.db.stream_list(path = path)) != 1:
  281. raise cherrypy.HTTPError("404", "No such stream: " + path)
  282. if diffpath and len(self.db.stream_list(path = diffpath)) != 1:
  283. raise cherrypy.HTTPError("404", "No such stream: " + diffpath)
  284. def content(start, end):
  285. # Note: disable chunked responses to see tracebacks from here.
  286. while True:
  287. (ints, restart) = self.db.stream_intervals(path, start, end,
  288. diffpath)
  289. response = ''.join([ json.dumps(i) + "\r\n" for i in ints ])
  290. yield response.encode('utf-8')
  291. if restart is None:
  292. break
  293. start = restart
  294. return content(start, end)
  295. # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
  296. @cherrypy.expose
  297. @chunked_response
  298. def extract(self, path, start = None, end = None,
  299. count = False, markup = False, binary = False):
  300. """
  301. Extract data from backend database. Streams the resulting
  302. entries as ASCII text lines separated by newlines. This may
  303. make multiple requests to the nilmdb backend to avoid causing
  304. it to block for too long.
  305. If 'count' is True, returns a count rather than actual data.
  306. If 'markup' is True, adds comments to the stream denoting each
  307. interval's start and end timestamp.
  308. If 'binary' is True, return raw binary data, rather than lines
  309. of ASCII-formatted data. Raw binary data is always
  310. little-endian and matches the database types (including an
  311. int64 timestamp).
  312. """
  313. binary = bool_param(binary)
  314. markup = bool_param(markup)
  315. count = bool_param(count)
  316. (start, end) = self._get_times(start, end)
  317. # Check path and get layout
  318. if len(self.db.stream_list(path = path)) != 1:
  319. raise cherrypy.HTTPError("404", "No such stream: " + path)
  320. if binary:
  321. content_type = "application/octet-stream"
  322. if markup or count:
  323. raise cherrypy.HTTPError("400", "can't mix binary and "
  324. "markup or count modes")
  325. else:
  326. content_type = "text/plain"
  327. cherrypy.response.headers['Content-Type'] = content_type
  328. def content(start, end):
  329. # Note: disable chunked responses to see tracebacks from here.
  330. if count:
  331. matched = self.db.stream_extract(path, start, end,
  332. count = True)
  333. yield sprintf(b"%d\n", matched)
  334. return
  335. while True:
  336. (data, restart) = self.db.stream_extract(
  337. path, start, end, count = False,
  338. markup = markup, binary = binary)
  339. yield data
  340. if restart is None:
  341. return
  342. start = restart
  343. return content(start, end)
  344. class Exiter(object):
  345. """App that exits the server, for testing"""
  346. @cherrypy.expose
  347. def index(self):
  348. cherrypy.response.headers['Content-Type'] = 'text/plain'
  349. def content():
  350. yield b'Exiting by request'
  351. raise SystemExit
  352. return content()
  353. index._cp_config = { 'response.stream': True }
  354. class Server(object):
  355. def __init__(self, db, host = '127.0.0.1', port = 8080,
  356. stoppable = False, # whether /exit URL exists
  357. embedded = True, # hide diagnostics and output, etc
  358. fast_shutdown = False, # don't wait for clients to disconn.
  359. force_traceback = False, # include traceback in all errors
  360. basepath = '', # base URL path for cherrypy.tree
  361. ):
  362. # Save server version, just for verification during tests
  363. self.version = nilmdb.__version__
  364. self.embedded = embedded
  365. self.db = db
  366. if not getattr(db, "_thread_safe", None):
  367. raise KeyError("Database object " + str(db) + " doesn't claim "
  368. "to be thread safe. You should pass "
  369. "nilmdb.utils.serializer_proxy(NilmDB)(args) "
  370. "rather than NilmDB(args).")
  371. # Build up global server configuration
  372. cherrypy.config.update({
  373. 'server.socket_host': host,
  374. 'server.socket_port': port,
  375. 'engine.autoreload.on': False,
  376. 'server.max_request_body_size': 8*1024*1024,
  377. })
  378. if self.embedded: # pragma: no branch (always taken in test suite)
  379. cherrypy.config.update({ 'environment': 'embedded' })
  380. # Build up application specific configuration
  381. app_config = {}
  382. app_config.update({
  383. 'error_page.default': self.json_error_page,
  384. })
  385. # Some default headers to just help identify that things are working
  386. app_config.update({ 'response.headers.X-Jim-Is-Awesome': 'yeah' })
  387. # Set up Cross-Origin Resource Sharing (CORS) handler so we
  388. # can correctly respond to browsers' CORS preflight requests.
  389. # This also limits verbs to GET and HEAD by default.
  390. app_config.update({ 'tools.CORS_allow.on': True,
  391. 'tools.CORS_allow.methods': ['GET', 'HEAD'] })
  392. # Configure the 'json_in' tool to also allow other content-types
  393. # (like x-www-form-urlencoded), and to treat JSON as a dict that
  394. # fills requests.param.
  395. app_config.update({ 'tools.json_in.force': False,
  396. 'tools.json_in.processor': json_to_request_params })
  397. # Send tracebacks in error responses. They're hidden by the
  398. # error_page function for client errors (code 400-499).
  399. app_config.update({ 'request.show_tracebacks' : True })
  400. self.force_traceback = force_traceback
  401. # Patch CherryPy error handler to never pad out error messages.
  402. # This isn't necessary, but then again, neither is padding the
  403. # error messages.
  404. cherrypy._cperror._ie_friendly_error_sizes = {}
  405. # Build up the application and mount it
  406. root = Root(self.db)
  407. root.stream = Stream(self.db)
  408. if stoppable:
  409. root.exit = Exiter()
  410. cherrypy.tree.apps = {}
  411. cherrypy.tree.mount(root, basepath, config = { "/" : app_config })
  412. # Shutdowns normally wait for clients to disconnect. To speed
  413. # up tests, set fast_shutdown = True
  414. if fast_shutdown:
  415. # Setting timeout to 0 triggers os._exit(70) at shutdown, grr...
  416. cherrypy.server.shutdown_timeout = 0.01
  417. else:
  418. cherrypy.server.shutdown_timeout = 5
  419. # Set up the WSGI application pointer for external programs
  420. self.wsgi_application = cherrypy.tree
  421. def json_error_page(self, status, message, traceback, version):
  422. """Return a custom error page in JSON so the client can parse it"""
  423. return json_error_page(status, message, traceback, version,
  424. self.force_traceback)
  425. def start(self, blocking = False, event = None):
  426. cherrypy_start(blocking, event, self.embedded)
  427. def stop(self):
  428. cherrypy_stop()
  429. # Use a single global nilmdb.server.NilmDB and nilmdb.server.Server
  430. # instance since the database can only be opened once. For this to
  431. # work, the web server must use only a single process and single
  432. # Python interpreter. Multiple threads are OK.
  433. _wsgi_server = None
  434. def wsgi_application(dbpath, basepath): # pragma: no cover
  435. """Return a WSGI application object with a database at the
  436. specified path.
  437. 'dbpath' is a filesystem location, e.g. /home/nilm/db
  438. 'basepath' is the URL path of the application base, which
  439. is the same as the first argument to Apache's WSGIScriptAlias
  440. directive.
  441. """
  442. def application(environ, start_response):
  443. global _wsgi_server
  444. if _wsgi_server is None:
  445. # Try to start the server
  446. try:
  447. db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(dbpath)
  448. _wsgi_server = nilmdb.server.Server(
  449. db, embedded = True,
  450. basepath = basepath.rstrip('/'))
  451. except Exception:
  452. # Build an error message on failure
  453. import pprint
  454. err = sprintf("Initializing database at path '%s' failed:\n\n",
  455. dbpath)
  456. err += traceback.format_exc()
  457. try:
  458. import pwd
  459. import grp
  460. err += sprintf("\nRunning as: uid=%d (%s), gid=%d (%s) "
  461. "on host %s, pid %d\n",
  462. os.getuid(), pwd.getpwuid(os.getuid())[0],
  463. os.getgid(), grp.getgrgid(os.getgid())[0],
  464. socket.gethostname(), os.getpid())
  465. except ImportError:
  466. pass
  467. err += sprintf("\nEnvironment:\n%s\n", pprint.pformat(environ))
  468. if _wsgi_server is None:
  469. # Serve up the error with our own mini WSGI app.
  470. headers = [ ('Content-type', 'text/plain'),
  471. ('Content-length', str(len(err))) ]
  472. start_response("500 Internal Server Error", headers)
  473. return [err]
  474. # Call the normal application
  475. return _wsgi_server.wsgi_application(environ, start_response)
  476. return application