Compare commits
	
		
			91 Commits
		
	
	
		
			nilmdb-1.4
			...
			nilmdb-1.9
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 6868f5f126 | |||
| ca0943ec19 | |||
| 68addb4e4a | |||
| 68c33b1f14 | |||
| 8dd8741100 | |||
| 8e6341ae5d | |||
| 422b1e2df2 | |||
| 0f745b3047 | |||
| 71cd7ed9b7 | |||
| a79d6104d5 | |||
| 8e8ec59e30 | |||
| b89b945a0f | |||
| bd7bdb2eb8 | |||
| 840cd2fd13 | |||
| bbd59c8b50 | |||
| 405c110fd7 | |||
| 274adcd856 | |||
| a1850c9c2c | |||
| 6cd28b67b1 | |||
| d6d215d53d | |||
| e02143ddb2 | |||
| e275384d03 | |||
| a6a67ec15c | |||
| fc43107307 | |||
| 90633413bb | |||
| c7c3aff0fb | |||
| e2347c954e | |||
| 222a5c6c53 | |||
| 1ca2c143e5 | |||
| b5df575c79 | |||
| 2768a5ad15 | |||
| a105543c38 | |||
| 309f38d0ed | |||
| 9a27b6ef6a | |||
| 99532cf9e0 | |||
| dfdd0e5c74 | |||
| 9a2699adfc | |||
| 9bbb95b18b | |||
| 6bbed322c5 | |||
| 2317894355 | |||
| 539c92226c | |||
| 77c766d85d | |||
| 49d04db1d6 | |||
| ea838d05ae | |||
| f2a48bdb2a | |||
| 6d14e0b8aa | |||
| b31b9327b9 | |||
| b98ff1331a | |||
| 00e6ba1124 | |||
| 01029230c9 | |||
| ecc4e5ef9d | |||
| 23f31c472b | |||
| a1e2746360 | |||
| 1c40d59a52 | |||
| bfb09a189f | |||
| 416a499866 | |||
| 637d193807 | |||
| b7fa5745ce | |||
| 0104c8edd9 | |||
| cf3b8e787d | |||
| 83d022016c | |||
| 43b740ecaa | |||
| 4ce059b920 | |||
| 99a4228285 | |||
| 230ec72609 | |||
| d36ece3767 | |||
| 231963538e | |||
| b4d6aad6de | |||
| e95142eabf | |||
| d21c3470bc | |||
| 7576883f49 | |||
| cc211542f8 | |||
| 8292dcf70b | |||
| b362fd37f6 | |||
| 41ec13ee17 | |||
| efa9aa9097 | |||
| d9afb48f45 | |||
| d1140e0f16 | |||
| 6091e44561 | |||
| e233ba790f | |||
| f0304b4c00 | |||
| 60594ca58e | |||
| c7f2df4abc | |||
| 5b7409f802 | |||
| 06038062a2 | |||
| ae9fe89759 | |||
| 04def60021 | |||
| 9ce0f69dff | |||
| 90c3be91c4 | |||
| ebccfb3531 | |||
| e006f1d02e | 
| @@ -7,4 +7,4 @@ | ||||
| exclude_lines = | ||||
| 	pragma: no cover | ||||
| 	if 0: | ||||
| omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py | ||||
| omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck | ||||
|   | ||||
							
								
								
									
										6
									
								
								Makefile
									
									
									
									
									
								
							
							
						
						
									
										6
									
								
								Makefile
									
									
									
									
									
								
							| @@ -1,5 +1,5 @@ | ||||
| # By default, run the tests. | ||||
| all: test | ||||
| all: fscktest | ||||
|  | ||||
| version: | ||||
| 	python setup.py version | ||||
| @@ -23,6 +23,10 @@ docs: | ||||
| lint: | ||||
| 	pylint --rcfile=.pylintrc nilmdb | ||||
|  | ||||
| fscktest: | ||||
| 	python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/wsgi/db').check()" | ||||
| #	python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/mnt/bucket/mnt/sharon/data/db', True).check()" | ||||
|  | ||||
| test: | ||||
| ifeq ($(INSIDE_EMACS), t) | ||||
| # Use the slightly more flexible script | ||||
|   | ||||
| @@ -8,7 +8,11 @@ Prerequisites: | ||||
|  | ||||
|   # Base NilmDB dependencies | ||||
|   sudo apt-get install python-cherrypy3 python-decorator python-simplejson | ||||
|   sudo apt-get install python-requests python-dateutil python-tz python-psutil | ||||
|   sudo apt-get install python-requests python-dateutil python-tz | ||||
|   sudo apt-get install python-progressbar python-psutil | ||||
|  | ||||
|   # Other dependencies (required by some modules) | ||||
|   sudo apt-get install python-numpy | ||||
|  | ||||
|   # Tools for running tests | ||||
|   sudo apt-get install python-nose python-coverage | ||||
| @@ -23,6 +27,7 @@ Install: | ||||
| Usage: | ||||
|  | ||||
|   nilmdb-server --help | ||||
|   nilmdb-fsck --help | ||||
|   nilmtool --help | ||||
|  | ||||
| See docs/wsgi.md for info on setting up a WSGI application in Apache. | ||||
|   | ||||
| @@ -389,3 +389,52 @@ Possible solutions: | ||||
|     are always printed as int64 values, and a new format | ||||
|     "@1234567890123456" is added to the parser for specifying them | ||||
|     exactly. | ||||
|  | ||||
| Binary interface | ||||
| ---------------- | ||||
|  | ||||
| The ASCII interface is too slow for high-bandwidth processing, like | ||||
| sinefits, prep, etc.  A binary interface was added so that you can | ||||
| extract the raw binary out of the bulkdata storage.  This binary is | ||||
| a little-endian format, e.g. in C a uint16_6 stream would be: | ||||
|  | ||||
|     #include <endian.h> | ||||
|     #include <stdint.h> | ||||
|     struct { | ||||
|         int64_t timestamp_le; | ||||
|         uint16_t data_le[6]; | ||||
|     } __attribute__((packed)); | ||||
|  | ||||
| Remember to byteswap (with e.g. `letoh` in C)! | ||||
|  | ||||
| This interface is used by the new `nilmdb.client.numpyclient.NumpyClient` | ||||
| class, which is a subclass of the normal `nilmcb.client.client.Client` | ||||
| and has all of the same functions.  It adds three new functions: | ||||
|  | ||||
| - `stream_extract_numpy` to extract data as a Numpy array | ||||
|  | ||||
| - `stream_insert_numpy` to insert data as a Numpy array | ||||
|  | ||||
| - `stream_insert_numpy_context` is the context manager for | ||||
|   incrementally inserting data | ||||
|  | ||||
| It is significantly faster!  It is about 20 times faster to decimate a | ||||
| stream with `nilm-decimate` when the filter code is using the new | ||||
| binary/numpy interface. | ||||
|  | ||||
|  | ||||
| WSGI interface & chunked requests | ||||
| --------------------------------- | ||||
|  | ||||
| mod_wsgi requires "WSGIChunkedRequest On" to handle | ||||
| "Transfer-encoding: Chunked" requests.  However, `/stream/insert` | ||||
| doesn't handle this correctly right now, because: | ||||
|  | ||||
| - The `cherrpy.request.body.read()` call needs to be fixed for chunked requests | ||||
|  | ||||
| - We don't want to just buffer endlessly in the server, and it will | ||||
|   require some thought on how to handle data in chunks (what to do about | ||||
|   interval endpoints). | ||||
|  | ||||
| It is probably better to just keep the endpoint management on the client | ||||
| side, so leave "WSGIChunkedRequest off" for now. | ||||
|   | ||||
| @@ -19,12 +19,12 @@ Then, set up Apache with a configuration like: | ||||
|  | ||||
|     <VirtualHost> | ||||
|         WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi | ||||
|         WSGIApplicationGroup nilmdb-appgroup | ||||
|         WSGIProcessGroup nilmdb-procgroup | ||||
|         WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm | ||||
|  | ||||
|         # Access control example: | ||||
|         <Location /nilmdb> | ||||
|             WSGIProcessGroup nilmdb-procgroup | ||||
|             WSGIApplicationGroup nilmdb-appgroup | ||||
|  | ||||
|             # Access control example: | ||||
|             Order deny,allow | ||||
|             Deny from all | ||||
|             Allow from 1.2.3.4 | ||||
|   | ||||
							
								
								
									
										50
									
								
								extras/fix-oversize-files.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										50
									
								
								extras/fix-oversize-files.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,50 @@ | ||||
| #!/usr/bin/python | ||||
|  | ||||
| import os | ||||
| import sys | ||||
| import cPickle as pickle | ||||
| import argparse | ||||
| import fcntl | ||||
| import re | ||||
| from nilmdb.client.numpyclient import layout_to_dtype | ||||
|  | ||||
| parser = argparse.ArgumentParser( | ||||
|     description = """ | ||||
| Fix database corruption where binary writes caused too much data to be | ||||
| written to the file.  Truncates files to the correct length.  This was | ||||
| fixed by b98ff1331a515ad47fd3203615e835b529b039f9. | ||||
| """) | ||||
| parser.add_argument("path", action="store", help='Database root path') | ||||
| parser.add_argument("-y", "--yes", action="store_true", help='Fix them') | ||||
| args = parser.parse_args() | ||||
|  | ||||
| lock = os.path.join(args.path, "data.lock") | ||||
| with open(lock, "w") as f: | ||||
|     fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) | ||||
|  | ||||
|     fix = {} | ||||
|  | ||||
|     for (path, dirs, files) in os.walk(args.path): | ||||
|         if "_format" in files: | ||||
|             with open(os.path.join(path, "_format")) as format: | ||||
|                 fmt = pickle.load(format) | ||||
|                 rowsize = layout_to_dtype(fmt["layout"]).itemsize | ||||
|                 maxsize = rowsize * fmt["rows_per_file"] | ||||
|                 fix[path] = maxsize | ||||
|                 if maxsize < 128000000: # sanity check | ||||
|                     raise Exception("bad maxsize " + str(maxsize)) | ||||
|  | ||||
|     for fixpath in fix: | ||||
|         for (path, dirs, files) in os.walk(fixpath): | ||||
|             for fn in files: | ||||
|                 if not re.match("^[0-9a-f]{4,}$", fn): | ||||
|                     continue | ||||
|                 fn = os.path.join(path, fn) | ||||
|                 size = os.path.getsize(fn) | ||||
|                 maxsize = fix[fixpath] | ||||
|                 if size > maxsize: | ||||
|                     diff = size - maxsize | ||||
|                     print diff, "too big:", fn | ||||
|                     if args.yes: | ||||
|                         with open(fn, "a+") as dbfile: | ||||
|                             dbfile.truncate(maxsize) | ||||
| @@ -58,6 +58,11 @@ class Client(object): | ||||
|         return self.http.get("dbinfo") | ||||
|  | ||||
|     def stream_list(self, path = None, layout = None, extended = False): | ||||
|         """Return a sorted list of [path, layout] lists.  If 'path' or | ||||
|         'layout' are specified, only return streams that match those | ||||
|         exact values.  If 'extended' is True, the returned lists have | ||||
|         extended info, e.g.: [path, layout, extent_min, extent_max, | ||||
|         total_rows, total_seconds.""" | ||||
|         params = {} | ||||
|         if path is not None: | ||||
|             params["path"] = path | ||||
| @@ -65,9 +70,11 @@ class Client(object): | ||||
|             params["layout"] = layout | ||||
|         if extended: | ||||
|             params["extended"] = 1 | ||||
|         return self.http.get("stream/list", params) | ||||
|         streams = self.http.get("stream/list", params) | ||||
|         return nilmdb.utils.sort.sort_human(streams, key = lambda s: s[0]) | ||||
|  | ||||
|     def stream_get_metadata(self, path, keys = None): | ||||
|         """Get stream metadata""" | ||||
|         params = { "path": path } | ||||
|         if keys is not None: | ||||
|             params["key"] = keys | ||||
| @@ -116,15 +123,19 @@ class Client(object): | ||||
|             params["start"] = timestamp_to_string(start) | ||||
|         if end is not None: | ||||
|             params["end"] = timestamp_to_string(end) | ||||
|         return self.http.post("stream/remove", params) | ||||
|         total = 0 | ||||
|         for count in self.http.post_gen("stream/remove", params): | ||||
|             total += int(count) | ||||
|         return total | ||||
|  | ||||
|     @contextlib.contextmanager | ||||
|     def stream_insert_context(self, path, start = None, end = None): | ||||
|         """Return a context manager that allows data to be efficiently | ||||
|         inserted into a stream in a piecewise manner.  Data is be provided | ||||
|         as single lines, and is aggregated and sent to the server in larger | ||||
|         chunks as necessary.  Data lines must match the database layout for | ||||
|         the given path, and end with a newline. | ||||
|         inserted into a stream in a piecewise manner.  Data is | ||||
|         provided as ASCII lines, and is aggregated and sent to the | ||||
|         server in larger or smaller chunks as necessary.  Data lines | ||||
|         must match the database layout for the given path, and end | ||||
|         with a newline. | ||||
|  | ||||
|         Example: | ||||
|           with client.stream_insert_context('/path', start, end) as ctx: | ||||
| @@ -136,15 +147,17 @@ class Client(object): | ||||
|         This may make multiple requests to the server, if the data is | ||||
|         large enough or enough time has passed between insertions. | ||||
|         """ | ||||
|         ctx = StreamInserter(self.http, path, start, end) | ||||
|         ctx = StreamInserter(self, path, start, end) | ||||
|         yield ctx | ||||
|         ctx.finalize() | ||||
|         ctx.destroy() | ||||
|  | ||||
|     def stream_insert(self, path, data, start = None, end = None): | ||||
|         """Insert rows of data into a stream.  data should be a string | ||||
|         or iterable that provides ASCII data that matches the database | ||||
|         layout for path.  See stream_insert_context for details on the | ||||
|         'start' and 'end' parameters.""" | ||||
|         layout for path.  Data is passed through stream_insert_context, | ||||
|         so it will be broken into reasonably-sized chunks and | ||||
|         start/end will be deduced if missing.""" | ||||
|         with self.stream_insert_context(path, start, end) as ctx: | ||||
|             if isinstance(data, basestring): | ||||
|                 ctx.insert(data) | ||||
| @@ -153,11 +166,28 @@ class Client(object): | ||||
|                     ctx.insert(chunk) | ||||
|         return ctx.last_response | ||||
|  | ||||
|     def stream_insert_block(self, path, data, start, end, binary = False): | ||||
|         """Insert a single fixed block of data into the stream.  It is | ||||
|         sent directly to the server in one block with no further | ||||
|         processing. | ||||
|  | ||||
|         If 'binary' is True, provide raw binary data in little-endian | ||||
|         format matching the path layout, including an int64 timestamp. | ||||
|         Otherwise, provide ASCII data matching the layout.""" | ||||
|         params = { | ||||
|             "path": path, | ||||
|             "start": timestamp_to_string(start), | ||||
|             "end": timestamp_to_string(end), | ||||
|         } | ||||
|         if binary: | ||||
|             params["binary"] = 1 | ||||
|         return self.http.put("stream/insert", data, params, binary = binary) | ||||
|  | ||||
|     def stream_intervals(self, path, start = None, end = None, diffpath = None): | ||||
|         """ | ||||
|         Return a generator that yields each stream interval. | ||||
|  | ||||
|         If diffpath is not None, yields only interval ranges that are | ||||
|         If 'diffpath' is not None, yields only interval ranges that are | ||||
|         present in 'path' but not in 'diffpath'. | ||||
|         """ | ||||
|         params = { | ||||
| @@ -172,17 +202,22 @@ class Client(object): | ||||
|         return self.http.get_gen("stream/intervals", params) | ||||
|  | ||||
|     def stream_extract(self, path, start = None, end = None, | ||||
|                        count = False, markup = False): | ||||
|                        count = False, markup = False, binary = False): | ||||
|         """ | ||||
|         Extract data from a stream.  Returns a generator that yields | ||||
|         lines of ASCII-formatted data that matches the database | ||||
|         layout for the given path. | ||||
|  | ||||
|         Specify count = True to return a count of matching data points | ||||
|         If 'count' is True, return a count of matching data points | ||||
|         rather than the actual data.  The output format is unchanged. | ||||
|  | ||||
|         Specify markup = True to include comments in the returned data | ||||
|         If 'markup' is True, include comments in the returned data | ||||
|         that indicate interval starts and ends. | ||||
|  | ||||
|         If 'binary' is True, return chunks of raw binary data, rather | ||||
|         than lines of ASCII-formatted data.  Raw binary data is | ||||
|         little-endian and matches the database types (including an | ||||
|         int64 timestamp). | ||||
|         """ | ||||
|         params = { | ||||
|             "path": path, | ||||
| @@ -195,7 +230,9 @@ class Client(object): | ||||
|             params["count"] = 1 | ||||
|         if markup: | ||||
|             params["markup"] = 1 | ||||
|         return self.http.get_gen("stream/extract", params) | ||||
|         if binary: | ||||
|             params["binary"] = 1 | ||||
|         return self.http.get_gen("stream/extract", params, binary = binary) | ||||
|  | ||||
|     def stream_count(self, path, start = None, end = None): | ||||
|         """ | ||||
| @@ -244,13 +281,13 @@ class StreamInserter(object): | ||||
|     _max_data = 2 * 1024 * 1024 | ||||
|     _max_data_after_send = 64 * 1024 | ||||
|  | ||||
|     def __init__(self, http, path, start = None, end = None): | ||||
|         """'http' is the httpclient object.  'path' is the database | ||||
|     def __init__(self, client, path, start, end): | ||||
|         """'client' is the client object.  'path' is the database | ||||
|         path to insert to.  'start' and 'end' are used for the first | ||||
|         contiguous interval.""" | ||||
|         contiguous interval and may be None.""" | ||||
|         self.last_response = None | ||||
|  | ||||
|         self._http = http | ||||
|         self._client = client | ||||
|         self._path = path | ||||
|  | ||||
|         # Start and end for the overall contiguous interval we're | ||||
| @@ -263,6 +300,15 @@ class StreamInserter(object): | ||||
|         self._block_data = [] | ||||
|         self._block_len = 0 | ||||
|  | ||||
|         self.destroyed = False | ||||
|  | ||||
|     def destroy(self): | ||||
|         """Ensure this object can't be used again without raising | ||||
|         an error""" | ||||
|         def error(*args, **kwargs): | ||||
|             raise Exception("don't reuse this context object") | ||||
|         self._send_block = self.insert = self.finalize = self.send = error | ||||
|  | ||||
|     def insert(self, data): | ||||
|         """Insert a chunk of ASCII formatted data in string form.  The | ||||
|         overall data must consist of lines terminated by '\\n'.""" | ||||
| @@ -409,7 +455,7 @@ class StreamInserter(object): | ||||
|             self._interval_start = end_ts | ||||
|  | ||||
|         # Double check endpoints | ||||
|         if start_ts is None or end_ts is None: | ||||
|         if (start_ts is None or end_ts is None) or (start_ts == end_ts): | ||||
|             # If the block has no non-comment lines, it's OK | ||||
|             try: | ||||
|                 self._get_first_noncomment(block) | ||||
| @@ -418,9 +464,7 @@ class StreamInserter(object): | ||||
|             raise ClientError("have data to send, but no start/end times") | ||||
|  | ||||
|         # Send it | ||||
|         params = { "path": self._path, | ||||
|                    "start": timestamp_to_string(start_ts), | ||||
|                    "end": timestamp_to_string(end_ts) } | ||||
|         self.last_response = self._http.put("stream/insert", block, params) | ||||
|         self.last_response = self._client.stream_insert_block( | ||||
|             self._path, block, start_ts, end_ts, binary = False) | ||||
|  | ||||
|         return | ||||
|   | ||||
| @@ -105,12 +105,17 @@ class HTTPClient(object): | ||||
|         else: | ||||
|             return self._req("POST", url, None, params) | ||||
|  | ||||
|     def put(self, url, data, params = None): | ||||
|     def put(self, url, data, params = None, binary = False): | ||||
|         """Simple PUT (parameters in URL, data in body)""" | ||||
|         return self._req("PUT", url, params, data) | ||||
|         if binary: | ||||
|             h = { 'Content-type': 'application/octet-stream' } | ||||
|         else: | ||||
|             h = { 'Content-type': 'text/plain; charset=utf-8' } | ||||
|         return self._req("PUT", url, query = params, body = data, headers = h) | ||||
|  | ||||
|     # Generator versions that return data one line at a time. | ||||
|     def _req_gen(self, method, url, query = None, body = None, headers = None): | ||||
|     def _req_gen(self, method, url, query = None, body = None, | ||||
|                  headers = None, binary = False): | ||||
|         """ | ||||
|         Make a request and return a generator that gives back strings | ||||
|         or JSON decoded lines of the body data, or raise an error if | ||||
| @@ -118,16 +123,50 @@ class HTTPClient(object): | ||||
|         """ | ||||
|         (response, isjson) = self._do_req(method, url, query, body, | ||||
|                                           stream = True, headers = headers) | ||||
|         if isjson: | ||||
|             for line in response.iter_lines(): | ||||
|  | ||||
|         # Like the iter_lines function in Requests, but only splits on | ||||
|         # the specified line ending. | ||||
|         def lines(source, ending): | ||||
|             pending = None | ||||
|             for chunk in source: | ||||
|                 if pending is not None: | ||||
|                     chunk = pending + chunk | ||||
|                 tmp = chunk.split(ending) | ||||
|                 lines = tmp[:-1] | ||||
|                 if chunk.endswith(ending): | ||||
|                     pending = None | ||||
|                 else: | ||||
|                     pending = tmp[-1] | ||||
|                 for line in lines: | ||||
|                     yield line | ||||
|             if pending is not None: # pragma: no cover (missing newline) | ||||
|                 yield pending | ||||
|  | ||||
|         # Yield the chunks or lines as requested | ||||
|         if binary: | ||||
|             for chunk in response.iter_content(chunk_size = 65536): | ||||
|                 yield chunk | ||||
|         elif isjson: | ||||
|             for line in lines(response.iter_content(chunk_size = 1), | ||||
|                               ending = '\r\n'): | ||||
|                 yield json.loads(line) | ||||
|         else: | ||||
|             for line in response.iter_lines(): | ||||
|             for line in lines(response.iter_content(chunk_size = 65536), | ||||
|                               ending = '\n'): | ||||
|                 yield line | ||||
|  | ||||
|     def get_gen(self, url, params = None): | ||||
|     def get_gen(self, url, params = None, binary = False): | ||||
|         """Simple GET (parameters in URL) returning a generator""" | ||||
|         return self._req_gen("GET", url, params) | ||||
|         return self._req_gen("GET", url, params, binary = binary) | ||||
|  | ||||
|     def post_gen(self, url, params = None): | ||||
|         """Simple POST (parameters in body) returning a generator""" | ||||
|         if self.post_json: | ||||
|             return self._req_gen("POST", url, None, | ||||
|                                  json.dumps(params), | ||||
|                                  { 'Content-type': 'application/json' }) | ||||
|         else: | ||||
|             return self._req_gen("POST", url, None, params) | ||||
|  | ||||
|     # Not much use for a POST or PUT generator, since they don't | ||||
|     # return much data. | ||||
|   | ||||
							
								
								
									
										258
									
								
								nilmdb/client/numpyclient.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										258
									
								
								nilmdb/client/numpyclient.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,258 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| """Provide a NumpyClient class that is based on normal Client, but has | ||||
| additional methods for extracting and inserting data via Numpy arrays.""" | ||||
|  | ||||
| import nilmdb.utils | ||||
| import nilmdb.client.client | ||||
| import nilmdb.client.httpclient | ||||
| from nilmdb.client.errors import ClientError | ||||
|  | ||||
| import contextlib | ||||
| from nilmdb.utils.time import timestamp_to_string, string_to_timestamp | ||||
|  | ||||
| import numpy | ||||
| import cStringIO | ||||
|  | ||||
| def layout_to_dtype(layout): | ||||
|     ltype = layout.split('_')[0] | ||||
|     lcount = int(layout.split('_')[1]) | ||||
|     if ltype.startswith('int'): | ||||
|         atype = '<i' + str(int(ltype[3:]) / 8) | ||||
|     elif ltype.startswith('uint'): | ||||
|         atype = '<u' + str(int(ltype[4:]) / 8) | ||||
|     elif ltype.startswith('float'): | ||||
|         atype = '<f' + str(int(ltype[5:]) / 8) | ||||
|     else: | ||||
|         raise ValueError("bad layout") | ||||
|     return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)]) | ||||
|  | ||||
| class NumpyClient(nilmdb.client.client.Client): | ||||
|     """Subclass of nilmdb.client.Client that adds additional methods for | ||||
|     extracting and inserting data via Numpy arrays.""" | ||||
|  | ||||
|     def _get_dtype(self, path, layout): | ||||
|         if layout is None: | ||||
|             streams = self.stream_list(path) | ||||
|             if len(streams) != 1: | ||||
|                 raise ClientError("can't get layout for path: " + path) | ||||
|             layout = streams[0][1] | ||||
|         return layout_to_dtype(layout) | ||||
|  | ||||
|     def stream_extract_numpy(self, path, start = None, end = None, | ||||
|                              layout = None, maxrows = 100000, | ||||
|                              structured = False): | ||||
|         """ | ||||
|         Extract data from a stream.  Returns a generator that yields | ||||
|         Numpy arrays of up to 'maxrows' of data each. | ||||
|  | ||||
|         If 'layout' is None, it is read using stream_info. | ||||
|  | ||||
|         If 'structured' is False, all data is converted to float64 | ||||
|         and returned in a flat 2D array.  Otherwise, data is returned | ||||
|         as a structured dtype in a 1D array. | ||||
|         """ | ||||
|         dtype = self._get_dtype(path, layout) | ||||
|  | ||||
|         def to_numpy(data): | ||||
|             a = numpy.fromstring(data, dtype) | ||||
|             if structured: | ||||
|                 return a | ||||
|             return numpy.c_[a['timestamp'], a['data']] | ||||
|  | ||||
|         chunks = [] | ||||
|         total_len = 0 | ||||
|         maxsize = dtype.itemsize * maxrows | ||||
|         for data in self.stream_extract(path, start, end, binary = True): | ||||
|             # Add this block of binary data | ||||
|             chunks.append(data) | ||||
|             total_len += len(data) | ||||
|  | ||||
|             # See if we have enough to make the requested Numpy array | ||||
|             while total_len >= maxsize: | ||||
|                 assembled = "".join(chunks) | ||||
|                 total_len -= maxsize | ||||
|                 chunks = [ assembled[maxsize:] ] | ||||
|                 block = assembled[:maxsize] | ||||
|                 yield to_numpy(block) | ||||
|  | ||||
|         if total_len: | ||||
|             yield to_numpy("".join(chunks)) | ||||
|  | ||||
|     @contextlib.contextmanager | ||||
|     def stream_insert_numpy_context(self, path, start = None, end = None, | ||||
|                                     layout = None): | ||||
|         """Return a context manager that allows data to be efficiently | ||||
|         inserted into a stream in a piecewise manner.  Data is | ||||
|         provided as Numpy arrays, and is aggregated and sent to the | ||||
|         server in larger or smaller chunks as necessary.  Data format | ||||
|         must match the database layout for the given path. | ||||
|  | ||||
|         For more details, see help for | ||||
|         nilmdb.client.numpyclient.StreamInserterNumpy | ||||
|  | ||||
|         If 'layout' is not None, use it as the layout rather than | ||||
|         querying the database. | ||||
|         """ | ||||
|         dtype = self._get_dtype(path, layout) | ||||
|         ctx = StreamInserterNumpy(self, path, start, end, dtype) | ||||
|         yield ctx | ||||
|         ctx.finalize() | ||||
|         ctx.destroy() | ||||
|  | ||||
|     def stream_insert_numpy(self, path, data, start = None, end = None, | ||||
|                             layout = None): | ||||
|         """Insert data into a stream.  data should be a Numpy array | ||||
|         which will be passed through stream_insert_numpy_context to | ||||
|         break it into chunks etc.  See the help for that function | ||||
|         for details.""" | ||||
|         with self.stream_insert_numpy_context(path, start, end, layout) as ctx: | ||||
|             if isinstance(data, numpy.ndarray): | ||||
|                 ctx.insert(data) | ||||
|             else: | ||||
|                 for chunk in data: | ||||
|                     ctx.insert(chunk) | ||||
|         return ctx.last_response | ||||
|  | ||||
| class StreamInserterNumpy(nilmdb.client.client.StreamInserter): | ||||
|     """Object returned by stream_insert_numpy_context() that manages | ||||
|     the insertion of rows of data into a particular path. | ||||
|  | ||||
|     See help for nilmdb.client.client.StreamInserter for details. | ||||
|     The only difference is that, instead of ASCII formatted data, | ||||
|     this context manager can take Numpy arrays, which are either | ||||
|     structured (1D with complex dtype) or flat (2D with simple dtype). | ||||
|     """ | ||||
|  | ||||
|     # Soft limit of how many bytes to send per HTTP request. | ||||
|     _max_data = 2 * 1024 * 1024 | ||||
|  | ||||
|     def __init__(self, client, path, start, end, dtype): | ||||
|         """ | ||||
|         'client' is the client object.  'path' is the database path | ||||
|         to insert to.  'start' and 'end' are used for the first | ||||
|         contiguous interval and may be None.  'dtype' is the Numpy | ||||
|         dtype for this stream. | ||||
|         """ | ||||
|         super(StreamInserterNumpy, self).__init__(client, path, start, end) | ||||
|         self._dtype = dtype | ||||
|  | ||||
|         # Max rows to send at once | ||||
|         self._max_rows = self._max_data // self._dtype.itemsize | ||||
|  | ||||
|         # List of the current arrays we're building up to send | ||||
|         self._block_arrays = [] | ||||
|         self._block_rows = 0 | ||||
|  | ||||
|     def insert(self, array): | ||||
|         """Insert Numpy data, which must match the layout type.""" | ||||
|         if type(array) != numpy.ndarray: | ||||
|             array = numpy.array(array) | ||||
|         if array.ndim == 1: | ||||
|             # Already a structured array; just verify the type | ||||
|             if array.dtype != self._dtype: | ||||
|                 raise ValueError("wrong dtype for 1D (structured) array") | ||||
|         elif array.ndim == 2: | ||||
|             # Convert to structured array | ||||
|             sarray = numpy.zeros(array.shape[0], dtype=self._dtype) | ||||
|             try: | ||||
|                 sarray['timestamp'] = array[:,0] | ||||
|                 # Need the squeeze in case sarray['data'] is 1 dimensional | ||||
|                 sarray['data'] = numpy.squeeze(array[:,1:]) | ||||
|             except (IndexError, ValueError): | ||||
|                 raise ValueError("wrong number of fields for this data type") | ||||
|             array = sarray | ||||
|         else: | ||||
|             raise ValueError("wrong number of dimensions in array") | ||||
|  | ||||
|         length = len(array) | ||||
|         maxrows = self._max_rows | ||||
|  | ||||
|         if length == 0: | ||||
|             return | ||||
|         if length > maxrows: | ||||
|             # This is more than twice what we wanted to send, so split | ||||
|             # it up.  This is a bit inefficient, but the user really | ||||
|             # shouldn't be providing this much data at once. | ||||
|             for cut in range(0, length, maxrows): | ||||
|                 self.insert(array[cut:(cut + maxrows)]) | ||||
|             return | ||||
|  | ||||
|         # Add this array to our list | ||||
|         self._block_arrays.append(array) | ||||
|         self._block_rows += length | ||||
|  | ||||
|         # Send if it's too long | ||||
|         if self._block_rows >= maxrows: | ||||
|             self._send_block(final = False) | ||||
|  | ||||
|     def _send_block(self, final = False): | ||||
|         """Send the data current stored up.  One row might be left | ||||
|         over if we need its timestamp saved.""" | ||||
|  | ||||
|         # Build the full array to send | ||||
|         if self._block_rows == 0: | ||||
|             array = numpy.zeros(0, dtype = self._dtype) | ||||
|         else: | ||||
|             array = numpy.hstack(self._block_arrays) | ||||
|  | ||||
|         # Get starting timestamp | ||||
|         start_ts = self._interval_start | ||||
|         if start_ts is None: | ||||
|             # Pull start from the first row | ||||
|             try: | ||||
|                 start_ts = array['timestamp'][0] | ||||
|             except IndexError: | ||||
|                 pass # no timestamp is OK, if we have no data | ||||
|  | ||||
|         # Get ending timestamp | ||||
|         if final: | ||||
|             # For a final block, the timestamp is either the | ||||
|             # user-provided end, or the timestamp of the last line | ||||
|             # plus epsilon. | ||||
|             end_ts = self._interval_end | ||||
|             if end_ts is None: | ||||
|                 try: | ||||
|                     end_ts = array['timestamp'][-1] | ||||
|                     end_ts += nilmdb.utils.time.epsilon | ||||
|                 except IndexError: | ||||
|                     pass # no timestamp is OK, if we have no data | ||||
|             self._block_arrays = [] | ||||
|             self._block_rows = 0 | ||||
|  | ||||
|             # Next block is completely fresh | ||||
|             self._interval_start = None | ||||
|             self._interval_end = None | ||||
|         else: | ||||
|             # An intermediate block.  We need to save the last row | ||||
|             # for the next block, and use its timestamp as the ending | ||||
|             # timestamp for this one. | ||||
|             if len(array) < 2: | ||||
|                 # Not enough data to send an intermediate block | ||||
|                 return | ||||
|             end_ts = array['timestamp'][-1] | ||||
|             if self._interval_end is not None and end_ts > self._interval_end: | ||||
|                 # User gave us bad endpoints; send it anyway, and let | ||||
|                 # the server complain so that the error is the same | ||||
|                 # as if we hadn't done this chunking. | ||||
|                 end_ts = self._interval_end | ||||
|             self._block_arrays = [ array[-1:] ] | ||||
|             self._block_rows = 1 | ||||
|             array = array[:-1] | ||||
|  | ||||
|             # Next block continues where this one ended | ||||
|             self._interval_start = end_ts | ||||
|  | ||||
|         # If we have no endpoints, or equal endpoints, it's OK as long | ||||
|         # as there's no data to send | ||||
|         if (start_ts is None or end_ts is None) or (start_ts == end_ts): | ||||
|             if len(array) == 0: | ||||
|                 return | ||||
|             raise ClientError("have data to send, but invalid start/end times") | ||||
|  | ||||
|         # Send it | ||||
|         data = array.tostring() | ||||
|         self.last_response = self._client.stream_insert_block( | ||||
|             self._path, data, start_ts, end_ts, binary = True) | ||||
|  | ||||
|         return | ||||
| @@ -10,6 +10,7 @@ import sys | ||||
| import os | ||||
| import argparse | ||||
| from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||
| import signal | ||||
|  | ||||
| try: # pragma: no cover | ||||
|     import argcomplete | ||||
| @@ -28,6 +29,14 @@ for cmd in subcommands: | ||||
|     subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist = [ cmd ]) | ||||
|  | ||||
| class JimArgumentParser(argparse.ArgumentParser): | ||||
|     def parse_args(self, args=None, namespace=None): | ||||
|         # Look for --version anywhere and change it to just "nilmtool | ||||
|         # --version".  This makes "nilmtool cmd --version" work, which | ||||
|         # is needed by help2man. | ||||
|         if "--version" in (args or sys.argv[1:]): | ||||
|             args = [ "--version" ] | ||||
|         return argparse.ArgumentParser.parse_args(self, args, namespace) | ||||
|  | ||||
|     def error(self, message): | ||||
|         self.print_usage(sys.stderr) | ||||
|         self.exit(2, sprintf("error: %s\n", message)) | ||||
| @@ -71,17 +80,29 @@ class Complete(object): # pragma: no cover | ||||
|         path = parsed_args.path | ||||
|         if not path: | ||||
|             return [] | ||||
|         return ( self.escape(k + '=' + v) | ||||
|                  for (k,v) in client.stream_get_metadata(path).iteritems() | ||||
|                  if k.startswith(prefix) ) | ||||
|  | ||||
|         results = [] | ||||
|         # prefix comes in as UTF-8, but results need to be Unicode, | ||||
|         # weird.  Still doesn't work in all cases, but that's bugs in | ||||
|         # argcomplete. | ||||
|         prefix = nilmdb.utils.unicode.decode(prefix) | ||||
|         for (k,v) in client.stream_get_metadata(path).iteritems(): | ||||
|             kv = self.escape(k + '=' + v) | ||||
|             if kv.startswith(prefix): | ||||
|                 results.append(kv) | ||||
|         return results | ||||
|  | ||||
| class Cmdline(object): | ||||
|  | ||||
|     def __init__(self, argv = None): | ||||
|         self.argv = argv or sys.argv[1:] | ||||
|         try: | ||||
|             # Assume command line arguments are encoded with stdin's encoding, | ||||
|             # and reverse it.  Won't be needed in Python 3, but for now.. | ||||
|             self.argv = [ x.decode(sys.stdin.encoding) for x in self.argv ] | ||||
|         except Exception: # pragma: no cover | ||||
|             pass | ||||
|         self.client = None | ||||
|         self.def_url = os.environ.get("NILMDB_URL", "http://localhost:12380") | ||||
|         self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/") | ||||
|         self.subcmd = {} | ||||
|         self.complete = Complete() | ||||
|  | ||||
| @@ -126,6 +147,13 @@ class Cmdline(object): | ||||
|         sys.exit(-1) | ||||
|  | ||||
|     def run(self): | ||||
|         # Set SIGPIPE to its default handler -- we don't need Python | ||||
|         # to catch it for us. | ||||
|         try: | ||||
|             signal.signal(signal.SIGPIPE, signal.SIG_DFL) | ||||
|         except ValueError: # pragma: no cover | ||||
|             pass | ||||
|  | ||||
|         # Clear cached timezone, so that we can pick up timezone changes | ||||
|         # while running this from the test suite. | ||||
|         datetime_tz._localtz = None | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
| import fnmatch | ||||
|  | ||||
| from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||
|  | ||||
| @@ -10,25 +11,39 @@ def setup(self, sub): | ||||
|                          Destroy the stream at the specified path. | ||||
|                          The stream must be empty.  All metadata | ||||
|                          related to the stream is permanently deleted. | ||||
|  | ||||
|                          Wildcards and multiple paths are supported. | ||||
|                          """) | ||||
|     cmd.set_defaults(handler = cmd_destroy) | ||||
|     group = cmd.add_argument_group("Options") | ||||
|     group.add_argument("-R", "--remove", action="store_true", | ||||
|                        help="Remove all data before destroying stream") | ||||
|     group.add_argument("-q", "--quiet", action="store_true", | ||||
|                        help="Don't display names when destroying " | ||||
|                        "multiple paths") | ||||
|     group = cmd.add_argument_group("Required arguments") | ||||
|     group.add_argument("path", | ||||
|                        help="Path of the stream to delete, e.g. /foo/bar", | ||||
|     group.add_argument("path", nargs='+', | ||||
|                        help="Path of the stream to delete, e.g. /foo/bar/*", | ||||
|                        ).completer = self.complete.path | ||||
|     return cmd | ||||
|  | ||||
| def cmd_destroy(self): | ||||
|     """Destroy stream""" | ||||
|     if self.args.remove: | ||||
|     streams = [ s[0] for s in self.client.stream_list() ] | ||||
|     paths = [] | ||||
|     for path in self.args.path: | ||||
|         new = fnmatch.filter(streams, path) | ||||
|         if not new: | ||||
|             self.die("error: no stream matched path: %s", path) | ||||
|         paths.extend(new) | ||||
|  | ||||
|     for path in paths: | ||||
|         if not self.args.quiet and len(paths) > 1: | ||||
|             printf("Destroying %s\n", path) | ||||
|  | ||||
|         try: | ||||
|             count = self.client.stream_remove(self.args.path) | ||||
|             if self.args.remove: | ||||
|                 count = self.client.stream_remove(path) | ||||
|             self.client.stream_destroy(path) | ||||
|         except nilmdb.client.ClientError as e: | ||||
|             self.die("error removing data: %s", str(e)) | ||||
|     try: | ||||
|         self.client.stream_destroy(self.args.path) | ||||
|     except nilmdb.client.ClientError as e: | ||||
|         self.die("error destroying stream: %s", str(e)) | ||||
|             self.die("error destroying stream: %s", str(e)) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from __future__ import print_function | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
| import sys | ||||
|  | ||||
| def setup(self, sub): | ||||
|     cmd = sub.add_parser("extract", help="Extract data", | ||||
| @@ -24,6 +25,8 @@ def setup(self, sub): | ||||
|                        ).completer = self.complete.time | ||||
|  | ||||
|     group = cmd.add_argument_group("Output format") | ||||
|     group.add_argument("-B", "--binary", action="store_true", | ||||
|                        help="Raw binary output") | ||||
|     group.add_argument("-b", "--bare", action="store_true", | ||||
|                        help="Exclude timestamps from output lines") | ||||
|     group.add_argument("-a", "--annotate", action="store_true", | ||||
| @@ -42,6 +45,11 @@ def cmd_extract_verify(self): | ||||
|         if self.args.start > self.args.end: | ||||
|             self.parser.error("start is after end") | ||||
|  | ||||
|     if self.args.binary: | ||||
|         if (self.args.bare or self.args.annotate or self.args.markup or | ||||
|             self.args.timestamp_raw or self.args.count): | ||||
|             self.parser.error("--binary cannot be combined with other options") | ||||
|  | ||||
| def cmd_extract(self): | ||||
|     streams = self.client.stream_list(self.args.path) | ||||
|     if len(streams) != 1: | ||||
| @@ -60,16 +68,23 @@ def cmd_extract(self): | ||||
|         printf("# end: %s\n", time_string(self.args.end)) | ||||
|  | ||||
|     printed = False | ||||
|     if self.args.binary: | ||||
|         printer = sys.stdout.write | ||||
|     else: | ||||
|         printer = print | ||||
|     bare = self.args.bare | ||||
|     count = self.args.count | ||||
|     for dataline in self.client.stream_extract(self.args.path, | ||||
|                                                self.args.start, | ||||
|                                                self.args.end, | ||||
|                                                self.args.count, | ||||
|                                                self.args.markup): | ||||
|         if self.args.bare and not self.args.count: | ||||
|                                                self.args.markup, | ||||
|                                                self.args.binary): | ||||
|         if bare and not count: | ||||
|             # Strip timestamp (first element).  Doesn't make sense | ||||
|             # if we are only returning a count. | ||||
|             dataline = ' '.join(dataline.split(' ')[1:]) | ||||
|         print(dataline) | ||||
|         printer(dataline) | ||||
|         printed = True | ||||
|     if not printed: | ||||
|         if self.args.annotate: | ||||
|   | ||||
| @@ -21,5 +21,8 @@ def cmd_info(self): | ||||
|     printf("Server URL: %s\n", self.client.geturl()) | ||||
|     dbinfo = self.client.dbinfo() | ||||
|     printf("Server database path: %s\n", dbinfo["path"]) | ||||
|     printf("Server database size: %s\n", human_size(dbinfo["size"])) | ||||
|     printf("Server database free space: %s\n", human_size(dbinfo["free"])) | ||||
|     for (desc, field) in [("used by NilmDB", "size"), | ||||
|                           ("used by other", "other"), | ||||
|                           ("reserved", "reserved"), | ||||
|                           ("free", "free")]: | ||||
|         printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field])) | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.utils.time | ||||
| from nilmdb.utils.interval import Interval | ||||
|  | ||||
| import fnmatch | ||||
| import argparse | ||||
| @@ -42,6 +43,8 @@ def setup(self, sub): | ||||
|     group = cmd.add_argument_group("Misc options") | ||||
|     group.add_argument("-T", "--timestamp-raw", action="store_true", | ||||
|                        help="Show raw timestamps when printing times") | ||||
|     group.add_argument("-o", "--optimize", action="store_true", | ||||
|                        help="Optimize (merge adjacent) intervals") | ||||
|  | ||||
|     return cmd | ||||
|  | ||||
| @@ -58,9 +61,16 @@ def cmd_intervals(self): | ||||
|         time_string = nilmdb.utils.time.timestamp_to_human | ||||
|  | ||||
|     try: | ||||
|            for (start, end) in self.client.stream_intervals( | ||||
|                self.args.path, self.args.start, self.args.end, self.args.diff): | ||||
|                printf("[ %s -> %s ]\n", time_string(start), time_string(end)) | ||||
|         intervals = ( Interval(start, end) for (start, end) in | ||||
|                       self.client.stream_intervals(self.args.path, | ||||
|                                                    self.args.start, | ||||
|                                                    self.args.end, | ||||
|                                                    self.args.diff) ) | ||||
|         if self.args.optimize: | ||||
|             intervals = nilmdb.utils.interval.optimize(intervals) | ||||
|         for i in intervals: | ||||
|             printf("[ %s -> %s ]\n", time_string(i.start), time_string(i.end)) | ||||
|  | ||||
|     except nilmdb.client.ClientError as e: | ||||
|         self.die("error listing intervals: %s", str(e)) | ||||
|  | ||||
|   | ||||
| @@ -10,22 +10,16 @@ def setup(self, sub): | ||||
|                          formatter_class = def_form, | ||||
|                          description=""" | ||||
|                          List streams available in the database, | ||||
|                          optionally filtering by layout or path.  Wildcards | ||||
|                          are accepted. | ||||
|                          optionally filtering by path.  Wildcards | ||||
|                          are accepted; non-matching paths or wildcards | ||||
|                          are ignored. | ||||
|                          """) | ||||
|     cmd.set_defaults(verify = cmd_list_verify, | ||||
|                      handler = cmd_list) | ||||
|  | ||||
|     group = cmd.add_argument_group("Stream filtering") | ||||
|     group.add_argument("-p", "--path", metavar="PATH", default="*", | ||||
|                        help="Match only this path (-p can be omitted)", | ||||
|     group.add_argument("path", metavar="PATH", default=["*"], nargs='*', | ||||
|                        ).completer = self.complete.path | ||||
|     group.add_argument("path_positional", default="*", | ||||
|                        nargs="?", help=argparse.SUPPRESS, | ||||
|                        ).completer = self.complete.path | ||||
|     group.add_argument("-l", "--layout", default="*", | ||||
|                        help="Match only this stream layout", | ||||
|                        ).completer = self.complete.layout | ||||
|  | ||||
|     group = cmd.add_argument_group("Interval info") | ||||
|     group.add_argument("-E", "--ext", action="store_true", | ||||
| @@ -49,20 +43,12 @@ def setup(self, sub): | ||||
|     group = cmd.add_argument_group("Misc options") | ||||
|     group.add_argument("-T", "--timestamp-raw", action="store_true", | ||||
|                        help="Show raw timestamps when printing times") | ||||
|     group.add_argument("-l", "--layout", action="store_true", | ||||
|                        help="Show layout type next to path name") | ||||
|  | ||||
|     return cmd | ||||
|  | ||||
| def cmd_list_verify(self): | ||||
|     # A hidden "path_positional" argument lets the user leave off the | ||||
|     # "-p" when specifying the path.  Handle it here. | ||||
|     got_opt = self.args.path != "*" | ||||
|     got_pos = self.args.path_positional != "*" | ||||
|     if got_pos: | ||||
|         if got_opt: | ||||
|             self.parser.error("too many paths specified") | ||||
|         else: | ||||
|             self.args.path = self.args.path_positional | ||||
|  | ||||
|     if self.args.start is not None and self.args.end is not None: | ||||
|         if self.args.start >= self.args.end: | ||||
|             self.parser.error("start must precede end") | ||||
| @@ -80,29 +66,33 @@ def cmd_list(self): | ||||
|     else: | ||||
|         time_string = nilmdb.utils.time.timestamp_to_human | ||||
|  | ||||
|     for stream in streams: | ||||
|         (path, layout, int_min, int_max, rows, time) = stream[:6] | ||||
|         if not (fnmatch.fnmatch(path, self.args.path) and | ||||
|                 fnmatch.fnmatch(layout, self.args.layout)): | ||||
|             continue | ||||
|     for argpath in self.args.path: | ||||
|         for stream in streams: | ||||
|             (path, layout, int_min, int_max, rows, time) = stream[:6] | ||||
|             if not fnmatch.fnmatch(path, argpath): | ||||
|                 continue | ||||
|  | ||||
|         printf("%s %s\n", path, layout) | ||||
|  | ||||
|         if self.args.ext: | ||||
|             if int_min is None or int_max is None: | ||||
|                 printf("  interval extents: (no data)\n") | ||||
|             if self.args.layout: | ||||
|                 printf("%s %s\n", path, layout) | ||||
|             else: | ||||
|                 printf("  interval extents: %s -> %s\n", | ||||
|                        time_string(int_min), time_string(int_max)) | ||||
|             printf("        total data: %d rows, %.6f seconds\n", | ||||
|                    rows or 0, | ||||
|                    nilmdb.utils.time.timestamp_to_seconds(time or 0)) | ||||
|                 printf("%s\n", path) | ||||
|  | ||||
|         if self.args.detail: | ||||
|             printed = False | ||||
|             for (start, end) in self.client.stream_intervals( | ||||
|                 path, self.args.start, self.args.end): | ||||
|                 printf("  [ %s -> %s ]\n", time_string(start), time_string(end)) | ||||
|                 printed = True | ||||
|             if not printed: | ||||
|                 printf("  (no intervals)\n") | ||||
|             if self.args.ext: | ||||
|                 if int_min is None or int_max is None: | ||||
|                     printf("  interval extents: (no data)\n") | ||||
|                 else: | ||||
|                     printf("  interval extents: %s -> %s\n", | ||||
|                            time_string(int_min), time_string(int_max)) | ||||
|                 printf("        total data: %d rows, %.6f seconds\n", | ||||
|                        rows or 0, | ||||
|                        nilmdb.utils.time.timestamp_to_seconds(time or 0)) | ||||
|  | ||||
|             if self.args.detail: | ||||
|                 printed = False | ||||
|                 for (start, end) in self.client.stream_intervals( | ||||
|                     path, self.args.start, self.args.end): | ||||
|                     printf("  [ %s -> %s ]\n", | ||||
|                            time_string(start), time_string(end)) | ||||
|                     printed = True | ||||
|                 if not printed: | ||||
|                     printf("  (no intervals)\n") | ||||
|   | ||||
| @@ -9,7 +9,8 @@ def setup(self, sub): | ||||
|                          a stream. | ||||
|                          """, | ||||
|                          usage="%(prog)s path [-g [key ...] | " | ||||
|                          "-s key=value [...] | -u key=value [...]]") | ||||
|                          "-s key=value [...] | -u key=value [...]] | " | ||||
|                          "-d [key ...]") | ||||
|     cmd.set_defaults(handler = cmd_metadata) | ||||
|  | ||||
|     group = cmd.add_argument_group("Required arguments") | ||||
| @@ -30,6 +31,9 @@ def setup(self, sub): | ||||
|                      help="Update metadata using provided " | ||||
|                      "key=value pairs", | ||||
|                      ).completer = self.complete.meta_keyval | ||||
|     exc.add_argument("-d", "--delete", nargs="*", metavar="key", | ||||
|                      help="Delete metadata for specified keys (default all)", | ||||
|                      ).completer = self.complete.meta_key | ||||
|     return cmd | ||||
|  | ||||
| def cmd_metadata(self): | ||||
| @@ -37,10 +41,10 @@ def cmd_metadata(self): | ||||
|     if self.args.set is not None or self.args.update is not None: | ||||
|         # Either set, or update | ||||
|         if self.args.set is not None: | ||||
|             keyvals = self.args.set | ||||
|             keyvals = map(nilmdb.utils.unicode.decode, self.args.set) | ||||
|             handler = self.client.stream_set_metadata | ||||
|         else: | ||||
|             keyvals = self.args.update | ||||
|             keyvals = map(nilmdb.utils.unicode.decode, self.args.update) | ||||
|             handler = self.client.stream_update_metadata | ||||
|  | ||||
|         # Extract key=value pairs | ||||
| @@ -56,15 +60,31 @@ def cmd_metadata(self): | ||||
|             handler(self.args.path, data) | ||||
|         except nilmdb.client.ClientError as e: | ||||
|             self.die("error setting/updating metadata: %s", str(e)) | ||||
|     elif self.args.delete is not None: | ||||
|         # Delete (by setting values to empty strings) | ||||
|         keys = None | ||||
|         if self.args.delete: | ||||
|             keys = map(nilmdb.utils.unicode.decode, self.args.delete) | ||||
|         try: | ||||
|             data = self.client.stream_get_metadata(self.args.path, keys) | ||||
|             for key in data: | ||||
|                 data[key] = "" | ||||
|             self.client.stream_update_metadata(self.args.path, data) | ||||
|         except nilmdb.client.ClientError as e: | ||||
|             self.die("error deleting metadata: %s", str(e)) | ||||
|     else: | ||||
|         # Get (or unspecified) | ||||
|         keys = self.args.get or None | ||||
|         keys = None | ||||
|         if self.args.get: | ||||
|             keys = map(nilmdb.utils.unicode.decode, self.args.get) | ||||
|         try: | ||||
|             data = self.client.stream_get_metadata(self.args.path, keys) | ||||
|         except nilmdb.client.ClientError as e: | ||||
|             self.die("error getting metadata: %s", str(e)) | ||||
|         for key, value in sorted(data.items()): | ||||
|             # Omit nonexistant keys | ||||
|             # Print nonexistant keys as having empty value | ||||
|             if value is None: | ||||
|                 value = "" | ||||
|             printf("%s=%s\n", key, value) | ||||
|             printf("%s=%s\n", | ||||
|                    nilmdb.utils.unicode.encode(key), | ||||
|                    nilmdb.utils.unicode.encode(value)) | ||||
|   | ||||
| @@ -1,17 +1,19 @@ | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
| import fnmatch | ||||
|  | ||||
| def setup(self, sub): | ||||
|     cmd = sub.add_parser("remove", help="Remove data", | ||||
|                          description=""" | ||||
|                          Remove all data from a specified time range within a | ||||
|                          stream. | ||||
|                          stream.  If multiple streams or wildcards are provided, | ||||
|                          the same time range is removed from all streams. | ||||
|                          """) | ||||
|     cmd.set_defaults(handler = cmd_remove) | ||||
|  | ||||
|     group = cmd.add_argument_group("Data selection") | ||||
|     group.add_argument("path", | ||||
|                        help="Path of stream, e.g. /foo/bar", | ||||
|     group.add_argument("path", nargs='+', | ||||
|                        help="Path of stream, e.g. /foo/bar/*", | ||||
|                        ).completer = self.complete.path | ||||
|     group.add_argument("-s", "--start", required=True, | ||||
|                        metavar="TIME", type=self.arg_time, | ||||
| @@ -23,18 +25,31 @@ def setup(self, sub): | ||||
|                        ).completer = self.complete.time | ||||
|  | ||||
|     group = cmd.add_argument_group("Output format") | ||||
|     group.add_argument("-q", "--quiet", action="store_true", | ||||
|                        help="Don't display names when removing " | ||||
|                        "from multiple paths") | ||||
|     group.add_argument("-c", "--count", action="store_true", | ||||
|                        help="Output number of data points removed") | ||||
|     return cmd | ||||
|  | ||||
| def cmd_remove(self): | ||||
|     streams = [ s[0] for s in self.client.stream_list() ] | ||||
|     paths = [] | ||||
|     for path in self.args.path: | ||||
|         new = fnmatch.filter(streams, path) | ||||
|         if not new: | ||||
|             self.die("error: no stream matched path: %s", path) | ||||
|         paths.extend(new) | ||||
|  | ||||
|     try: | ||||
|         count = self.client.stream_remove(self.args.path, | ||||
|                                           self.args.start, self.args.end) | ||||
|         for path in paths: | ||||
|             if not self.args.quiet and len(paths) > 1: | ||||
|                 printf("Removing from %s\n", path) | ||||
|             count = self.client.stream_remove(path, | ||||
|                                               self.args.start, self.args.end) | ||||
|             if self.args.count: | ||||
|                 printf("%d\n", count); | ||||
|     except nilmdb.client.ClientError as e: | ||||
|         self.die("error removing data: %s", str(e)) | ||||
|  | ||||
|     if self.args.count: | ||||
|         printf("%d\n", count) | ||||
|  | ||||
|     return 0 | ||||
|   | ||||
							
								
								
									
										5
									
								
								nilmdb/fsck/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								nilmdb/fsck/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,5 @@ | ||||
| """nilmdb.fsck""" | ||||
|  | ||||
| from __future__ import absolute_import | ||||
|  | ||||
| from nilmdb.fsck.fsck import Fsck | ||||
							
								
								
									
										458
									
								
								nilmdb/fsck/fsck.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										458
									
								
								nilmdb/fsck/fsck.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,458 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| """Check database consistency, with some ability to fix problems. | ||||
| This should be able to fix cases where a database gets corrupted due | ||||
| to unexpected system shutdown, and detect other cases that may cause | ||||
| NilmDB to return errors when trying to manipulate the database.""" | ||||
|  | ||||
| import nilmdb.utils | ||||
| import nilmdb.server | ||||
| import nilmdb.client.numpyclient | ||||
| from nilmdb.utils.interval import IntervalError | ||||
| from nilmdb.server.interval import Interval, IntervalSet | ||||
| from nilmdb.utils.printf import * | ||||
| from nilmdb.utils.time import timestamp_to_string | ||||
|  | ||||
| from collections import defaultdict | ||||
| import sqlite3 | ||||
| import os | ||||
| import sys | ||||
| import progressbar | ||||
| import re | ||||
| import time | ||||
| import shutil | ||||
| import cPickle as pickle | ||||
| import numpy | ||||
|  | ||||
| class FsckError(Exception): | ||||
|     def __init__(self, msg = "", *args): | ||||
|         if args: | ||||
|             msg = sprintf(msg, *args) | ||||
|         Exception.__init__(self, msg) | ||||
| class FixableFsckError(FsckError): | ||||
|     def __init__(self, msg = "", *args): | ||||
|         if args: | ||||
|             msg = sprintf(msg, *args) | ||||
|         FsckError.__init__(self, "%s\nThis may be fixable with \"--fix\".", msg) | ||||
| class RetryFsck(FsckError): | ||||
|     pass | ||||
|  | ||||
| def log(format, *args): | ||||
|     printf(format, *args) | ||||
|  | ||||
| def err(format, *args): | ||||
|     fprintf(sys.stderr, format, *args) | ||||
|  | ||||
| # Decorator that retries a function if it returns a specific value | ||||
| def retry_if_raised(exc, message = None, max_retries = 100): | ||||
|     def f1(func): | ||||
|         def f2(*args, **kwargs): | ||||
|             for n in range(max_retries): | ||||
|                 try: | ||||
|                     return func(*args, **kwargs) | ||||
|                 except exc as e: | ||||
|                     if message: | ||||
|                         log("%s\n\n", message) | ||||
|             raise Exception("Max number of retries (%d) exceeded; giving up") | ||||
|         return f2 | ||||
|     return f1 | ||||
|  | ||||
| class Progress(object): | ||||
|     def __init__(self, maxval): | ||||
|         self.bar = progressbar.ProgressBar( | ||||
|             maxval = maxval, | ||||
|             widgets = [ progressbar.Percentage(), ' ', | ||||
|                         progressbar.Bar(), ' ', | ||||
|                         progressbar.ETA() ]) | ||||
|         if self.bar.term_width == 0: | ||||
|             self.bar.term_width = 75 | ||||
|     def __enter__(self): | ||||
|         self.bar.start() | ||||
|         self.last_update = 0 | ||||
|         return self | ||||
|     def __exit__(self, exc_type, exc_value, traceback): | ||||
|         if exc_type is None: | ||||
|             self.bar.finish() | ||||
|         else: | ||||
|             printf("\n") | ||||
|     def update(self, val): | ||||
|         self.bar.update(val) | ||||
|  | ||||
| class Fsck(object): | ||||
|  | ||||
|     def __init__(self, path, fix = False): | ||||
|         self.basepath = path | ||||
|         self.sqlpath = os.path.join(path, "data.sql") | ||||
|         self.bulkpath = os.path.join(path, "data") | ||||
|         self.bulklock = os.path.join(path, "data.lock") | ||||
|         self.fix = fix | ||||
|  | ||||
|     ### Main checks | ||||
|  | ||||
|     @retry_if_raised(RetryFsck, "Something was fixed: restarting fsck") | ||||
|     def check(self, skip_data = False): | ||||
|         self.bulk = None | ||||
|         self.sql = None | ||||
|         try: | ||||
|             self.check_paths() | ||||
|             self.check_sql() | ||||
|             self.check_streams() | ||||
|             self.check_intervals() | ||||
|             if skip_data: | ||||
|                 log("skipped data check\n") | ||||
|             else: | ||||
|                 self.check_data() | ||||
|         finally: | ||||
|             if self.bulk: | ||||
|                 self.bulk.close() | ||||
|             if self.sql: | ||||
|                 self.sql.commit() | ||||
|                 self.sql.close() | ||||
|         log("ok\n") | ||||
|  | ||||
|     ### Check basic path structure | ||||
|  | ||||
|     def check_paths(self): | ||||
|         log("checking paths\n") | ||||
|         if self.bulk: | ||||
|             self.bulk.close() | ||||
|         if not os.path.isfile(self.sqlpath): | ||||
|             raise FsckError("SQL database missing (%s)", self.sqlpath) | ||||
|         if not os.path.isdir(self.bulkpath): | ||||
|             raise FsckError("Bulk data directory missing (%s)", self.bulkpath) | ||||
|         with open(self.bulklock, "w") as lockfile: | ||||
|             if not nilmdb.utils.lock.exclusive_lock(lockfile): | ||||
|                 raise FsckError('Database already locked by another process\n' | ||||
|                                 'Make sure all other processes that might be ' | ||||
|                                 'using the database are stopped.\n' | ||||
|                                 'Restarting apache will cause it to unlock ' | ||||
|                                 'the db until a request is received.') | ||||
|             # unlocked immediately | ||||
|         self.bulk = nilmdb.server.bulkdata.BulkData(self.basepath) | ||||
|  | ||||
|     ### Check SQL database health | ||||
|  | ||||
|     def check_sql(self): | ||||
|         log("checking sqlite database\n") | ||||
|  | ||||
|         self.sql = sqlite3.connect(self.sqlpath) | ||||
|         with self.sql: | ||||
|             cur = self.sql.cursor() | ||||
|             ver = cur.execute("PRAGMA user_version").fetchone()[0] | ||||
|             good = max(nilmdb.server.nilmdb._sql_schema_updates.keys()) | ||||
|             if ver != good: | ||||
|                 raise FsckError("database version %d too old, should be %d", | ||||
|                                 ver, good) | ||||
|             self.stream_path = {} | ||||
|             self.stream_layout = {} | ||||
|             log("  loading paths\n") | ||||
|             result = cur.execute("SELECT id, path, layout FROM streams") | ||||
|             for r in result: | ||||
|                 if r[0] in self.stream_path: | ||||
|                     raise FsckError("duplicated ID %d in stream IDs", r[0]) | ||||
|                 self.stream_path[r[0]] = r[1] | ||||
|                 self.stream_layout[r[0]] = r[2] | ||||
|  | ||||
|             log("  loading intervals\n") | ||||
|             self.stream_interval = defaultdict(list) | ||||
|             result = cur.execute("SELECT stream_id, start_time, end_time, " | ||||
|                                  "start_pos, end_pos FROM ranges " | ||||
|                                  "ORDER BY start_time") | ||||
|             for r in result: | ||||
|                 if r[0] not in self.stream_path: | ||||
|                     raise FsckError("interval ID %d not in streams", k) | ||||
|                 self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4])) | ||||
|  | ||||
|             log("  loading metadata\n") | ||||
|             self.stream_meta = defaultdict(dict) | ||||
|             result = cur.execute("SELECT stream_id, key, value FROM metadata") | ||||
|             for r in result: | ||||
|                 if r[0] not in self.stream_path: | ||||
|                     raise FsckError("metadata ID %d not in streams", k) | ||||
|                 if r[1] in self.stream_meta[r[0]]: | ||||
|                     raise FsckError("duplicate metadata key '%s' for stream %d", | ||||
|                                     r[1], r[0]) | ||||
|                 self.stream_meta[r[0]][r[1]] = r[2] | ||||
|  | ||||
|     ### Check streams and basic interval overlap | ||||
|  | ||||
|     def check_streams(self): | ||||
|         ids = self.stream_path.keys() | ||||
|         log("checking %s streams\n", "{:,d}".format(len(ids))) | ||||
|         with Progress(len(ids)) as pbar: | ||||
|             for i, sid in enumerate(ids): | ||||
|                 pbar.update(i) | ||||
|                 path = self.stream_path[sid] | ||||
|  | ||||
|                 # unique path, valid layout | ||||
|                 if self.stream_path.values().count(path) != 1: | ||||
|                     raise FsckError("duplicated path %s", path) | ||||
|                 layout = self.stream_layout[sid].split('_')[0] | ||||
|                 if layout not in ('int8', 'int16', 'int32', 'int64', | ||||
|                                   'uint8', 'uint16', 'uint32', 'uint64', | ||||
|                                   'float32', 'float64'): | ||||
|                     raise FsckError("bad layout %s for %s", layout, path) | ||||
|                 count = int(self.stream_layout[sid].split('_')[1]) | ||||
|                 if count < 1 or count > 1024: | ||||
|                     raise FsckError("bad count %d for %s", count, path) | ||||
|  | ||||
|                 # must exist in bulkdata | ||||
|                 bulk = self.bulkpath + path | ||||
|                 if not os.path.isdir(bulk): | ||||
|                     raise FsckError("%s: missing bulkdata dir", path) | ||||
|                 if not nilmdb.server.bulkdata.Table.exists(bulk): | ||||
|                     raise FsckError("%s: bad bulkdata table", path) | ||||
|  | ||||
|                 # intervals don't overlap.  Abuse IntervalSet to check | ||||
|                 # for intervals in file positions, too. | ||||
|                 timeiset = IntervalSet() | ||||
|                 posiset = IntervalSet() | ||||
|                 for (stime, etime, spos, epos) in self.stream_interval[sid]: | ||||
|                     new = Interval(stime, etime) | ||||
|                     try: | ||||
|                         timeiset += new | ||||
|                     except IntervalError: | ||||
|                         raise FsckError("%s: overlap in intervals:\n" | ||||
|                                         "set: %s\nnew: %s", | ||||
|                                         path, str(timeiset), str(new)) | ||||
|                     if spos != epos: | ||||
|                         new = Interval(spos, epos) | ||||
|                         try: | ||||
|                             posiset += new | ||||
|                         except IntervalError: | ||||
|                             raise FsckError("%s: overlap in file offsets:\n" | ||||
|                                             "set: %s\nnew: %s", | ||||
|                                             path, str(posiset), str(new)) | ||||
|  | ||||
|                 # check bulkdata | ||||
|                 self.check_bulkdata(sid, path, bulk) | ||||
|  | ||||
|                 # Check that we can open bulkdata | ||||
|                 try: | ||||
|                     tab = None | ||||
|                     try: | ||||
|                         tab = nilmdb.server.bulkdata.Table(bulk) | ||||
|                     except Exception as e: | ||||
|                         raise FsckError("%s: can't open bulkdata: %s", | ||||
|                                         path, str(e)) | ||||
|                 finally: | ||||
|                     if tab: | ||||
|                         tab.close() | ||||
|  | ||||
|     ### Check that bulkdata is good enough to be opened | ||||
|  | ||||
|     @retry_if_raised(RetryFsck) | ||||
|     def check_bulkdata(self, sid, path, bulk): | ||||
|         with open(os.path.join(bulk, "_format"), "rb") as f: | ||||
|             fmt = pickle.load(f) | ||||
|         if fmt["version"] != 3: | ||||
|             raise FsckError("%s: bad or unsupported bulkdata version %d", | ||||
|                             path, fmt["version"]) | ||||
|         row_per_file = int(fmt["rows_per_file"]) | ||||
|         files_per_dir = int(fmt["files_per_dir"]) | ||||
|         layout = fmt["layout"] | ||||
|         if layout != self.stream_layout[sid]: | ||||
|             raise FsckError("%s: layout mismatch %s != %s", path, | ||||
|                             layout, self.stream_layout[sid]) | ||||
|  | ||||
|         # Every file should have a size that's the multiple of the row size | ||||
|         rkt = nilmdb.server.rocket.Rocket(layout, None) | ||||
|         row_size = rkt.binary_size | ||||
|         rkt.close() | ||||
|  | ||||
|         # Find all directories | ||||
|         regex = re.compile("^[0-9a-f]{4,}$") | ||||
|         subdirs = sorted(filter(regex.search, os.listdir(bulk)), | ||||
|                          key = lambda x: int(x, 16), reverse = True) | ||||
|         for subdir in subdirs: | ||||
|             # Find all files in that dir | ||||
|             subpath = os.path.join(bulk, subdir) | ||||
|             files = filter(regex.search, os.listdir(subpath)) | ||||
|             if not files: | ||||
|                 self.fix_empty_subdir(subpath) | ||||
|                 raise RetryFsck | ||||
|             # Verify that their size is a multiple of the row size | ||||
|             for filename in files: | ||||
|                 filepath = os.path.join(subpath, filename) | ||||
|                 offset = os.path.getsize(filepath) | ||||
|                 if offset % row_size: | ||||
|                     self.fix_bad_filesize(path, filepath, offset, row_size) | ||||
|  | ||||
|     def fix_empty_subdir(self, subpath): | ||||
|         msg = sprintf("bulkdata path %s is missing data files", subpath) | ||||
|         if not self.fix: | ||||
|             raise FixableFsckError(msg) | ||||
|         # Try to fix it by just deleting whatever is present, | ||||
|         # as long as it's only ".removed" files. | ||||
|         err("\n%s\n", msg) | ||||
|         for fn in os.listdir(subpath): | ||||
|             if not fn.endswith(".removed"): | ||||
|                 raise FsckError("can't fix automatically: please manually " | ||||
|                                 "remove the file %s and try again", | ||||
|                                 os.path.join(subpath, fn)) | ||||
|         # Remove the whole thing | ||||
|         err("Removing empty subpath\n") | ||||
|         shutil.rmtree(subpath) | ||||
|         raise RetryFsck | ||||
|  | ||||
|     def fix_bad_filesize(self, path, filepath, offset, row_size): | ||||
|         extra = offset % row_size | ||||
|         msg = sprintf("%s: size of file %s (%d) is not a multiple" + | ||||
|                       " of row size (%d): %d extra bytes present", | ||||
|                       path, filepath, offset, row_size, extra) | ||||
|         if not self.fix: | ||||
|             raise FixableFsckError(msg) | ||||
|         # Try to fix it by just truncating the file | ||||
|         err("\n%s\n", msg) | ||||
|         newsize = offset - extra | ||||
|         err("Truncating file to %d bytes and retrying\n", newsize) | ||||
|         with open(filepath, "r+b") as f: | ||||
|             f.truncate(newsize) | ||||
|             raise RetryFsck | ||||
|  | ||||
|     ### Check interval endpoints | ||||
|  | ||||
|     def check_intervals(self): | ||||
|         total_ints = sum(len(x) for x in self.stream_interval.values()) | ||||
|         log("checking %s intervals\n", "{:,d}".format(total_ints)) | ||||
|         done = 0 | ||||
|         with Progress(total_ints) as pbar: | ||||
|             for sid in self.stream_interval: | ||||
|                 try: | ||||
|                     bulk = self.bulkpath + self.stream_path[sid] | ||||
|                     tab = nilmdb.server.bulkdata.Table(bulk) | ||||
|                     def update(x): | ||||
|                         pbar.update(done + x) | ||||
|                     ints = self.stream_interval[sid] | ||||
|                     done += self.check_table_intervals(sid, ints, tab, update) | ||||
|                 finally: | ||||
|                     tab.close() | ||||
|  | ||||
|     def check_table_intervals(self, sid, ints, tab, update): | ||||
|         # look in the table to make sure we can pick out the interval's | ||||
|         # endpoints | ||||
|         path = self.stream_path[sid] | ||||
|         tab.file_open.cache_remove_all() | ||||
|         for (i, intv) in enumerate(ints): | ||||
|             update(i) | ||||
|             (stime, etime, spos, epos) = intv | ||||
|             if spos == epos and spos >= 0 and spos <= tab.nrows: | ||||
|                 continue | ||||
|             try: | ||||
|                 srow = tab[spos] | ||||
|                 erow = tab[epos-1] | ||||
|             except Exception as e: | ||||
|                 self.fix_bad_interval(sid, intv, tab, str(e)) | ||||
|                 raise RetryFsck | ||||
|         return len(ints) | ||||
|  | ||||
|     def fix_bad_interval(self, sid, intv, tab, msg): | ||||
|         path = self.stream_path[sid] | ||||
|         msg = sprintf("%s: interval %s error accessing rows: %s", | ||||
|                       path, str(intv), str(msg)) | ||||
|         if not self.fix: | ||||
|             raise FixableFsckError(msg) | ||||
|         err("\n%s\n", msg) | ||||
|  | ||||
|         (stime, etime, spos, epos) = intv | ||||
|         # If it's just that the end pos is more than the number of rows | ||||
|         # in the table, lower end pos and truncate interval time too. | ||||
|         if spos < tab.nrows and epos >= tab.nrows: | ||||
|             err("end position is past endrows, but it can be truncated\n") | ||||
|             err("old end: time %d, pos %d\n", etime, epos) | ||||
|             new_epos = tab.nrows | ||||
|             new_etime = tab[new_epos-1] + 1 | ||||
|             err("new end: time %d, pos %d\n", new_etime, new_epos) | ||||
|             if stime < new_etime: | ||||
|                 # Change it in SQL | ||||
|                 with self.sql: | ||||
|                     cur = self.sql.cursor() | ||||
|                     cur.execute("UPDATE ranges SET end_time=?, end_pos=? " | ||||
|                                 "WHERE stream_id=? AND start_time=? AND " | ||||
|                                 "end_time=? AND start_pos=? AND end_pos=?", | ||||
|                                 (new_etime, new_epos, sid, stime, etime, | ||||
|                                  spos, epos)) | ||||
|                     if cur.rowcount != 1: | ||||
|                         raise FsckError("failed to fix SQL database") | ||||
|                 raise RetryFsck | ||||
|             err("actually it can't be truncated; times are bad too") | ||||
|  | ||||
|         # Otherwise, the only hope is to delete the interval entirely. | ||||
|         err("*** Deleting the entire interval from SQL.\n") | ||||
|         err("This may leave stale data on disk.  To fix that, copy all\n") | ||||
|         err("data from this stream to a new stream, then remove all data\n") | ||||
|         err("from and destroy %s.\n") | ||||
|         with self.sql: | ||||
|             cur = self.sql.cursor() | ||||
|             cur.execute("DELETE FROM ranges WHERE " | ||||
|                         "stream_id=? AND start_time=? AND " | ||||
|                         "end_time=? AND start_pos=? AND end_pos=?", | ||||
|                         (sid, stime, etime, spos, epos)) | ||||
|             if cur.rowcount != 1: | ||||
|                 raise FsckError("failed to remove interval") | ||||
|         raise RetryFsck | ||||
|  | ||||
|     ### Check data in each interval | ||||
|  | ||||
|     def check_data(self): | ||||
|         total_rows = sum(sum((y[3] - y[2]) for y in x) | ||||
|                          for x in self.stream_interval.values()) | ||||
|         log("checking %s rows of data\n", "{:,d}".format(total_rows)) | ||||
|         done = 0 | ||||
|         with Progress(total_rows) as pbar: | ||||
|             for sid in self.stream_interval: | ||||
|                 try: | ||||
|                     bulk = self.bulkpath + self.stream_path[sid] | ||||
|                     tab = nilmdb.server.bulkdata.Table(bulk) | ||||
|                     def update(x): | ||||
|                         pbar.update(done + x) | ||||
|                     ints = self.stream_interval[sid] | ||||
|                     done += self.check_table_data(sid, ints, tab, update) | ||||
|                 finally: | ||||
|                     tab.close() | ||||
|  | ||||
|     def check_table_data(self, sid, ints, tab, update): | ||||
|         # Pull out all of the interval's data and verify that it's | ||||
|         # monotonic. | ||||
|         maxrows = 100000 | ||||
|         path = self.stream_path[sid] | ||||
|         layout = self.stream_layout[sid] | ||||
|         dtype = nilmdb.client.numpyclient.layout_to_dtype(layout) | ||||
|         tab.file_open.cache_remove_all() | ||||
|         done = 0 | ||||
|         for intv in ints: | ||||
|             last_ts = None | ||||
|             (stime, etime, spos, epos) = intv | ||||
|             if spos == epos: | ||||
|                 continue | ||||
|             for start in xrange(*slice(spos, epos, maxrows).indices(epos)): | ||||
|                 stop = min(start + maxrows, epos) | ||||
|                 count = stop - start | ||||
|                 # Get raw data, convert to NumPy arary | ||||
|                 try: | ||||
|                     raw = tab.get_data(start, stop, binary = True) | ||||
|                     data = numpy.fromstring(raw, dtype) | ||||
|                 except Exception as e: | ||||
|                     raise FsckError("%s: failed to grab rows %d through %d: %s", | ||||
|                                     path, start, stop, repr(e)) | ||||
|  | ||||
|                 # Verify that timestamps are monotonic | ||||
|                 if (numpy.diff(data['timestamp']) <= 0).any(): | ||||
|                     raise FsckError("%s: non-monotonic timestamp(s) in rows " | ||||
|                                     "%d through %d", path, start, stop) | ||||
|                 first_ts = data['timestamp'][0] | ||||
|                 if last_ts is not None and first_ts <= last_ts: | ||||
|                     raise FsckError("%s: first interval timestamp %d is not " | ||||
|                                     "greater than the previous last interval " | ||||
|                                     "timestamp %d, at row %d", | ||||
|                                     path, first_ts, last_ts, start) | ||||
|                 last_ts = data['timestamp'][-1] | ||||
|  | ||||
|                 # These are probably fixable, by removing the offending | ||||
|                 # intervals.  But I'm not going to bother implementing | ||||
|                 # that yet. | ||||
|  | ||||
|                 # Done | ||||
|                 done += count | ||||
|                 update(done) | ||||
|         return done | ||||
							
								
								
									
										27
									
								
								nilmdb/scripts/nilmdb_fsck.py
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										27
									
								
								nilmdb/scripts/nilmdb_fsck.py
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| #!/usr/bin/python | ||||
|  | ||||
| import nilmdb.fsck | ||||
| import argparse | ||||
| import os | ||||
| import sys | ||||
|  | ||||
| def main(): | ||||
|     """Main entry point for the 'nilmdb-fsck' command line script""" | ||||
|  | ||||
|     parser = argparse.ArgumentParser( | ||||
|         description = 'Check database consistency', | ||||
|         formatter_class = argparse.ArgumentDefaultsHelpFormatter) | ||||
|     parser.add_argument("-V", "--version", action="version", | ||||
|                         version = nilmdb.__version__) | ||||
|     parser.add_argument("-f", "--fix", action="store_true", | ||||
|                         default=False, help = 'Fix errors when possible ' | ||||
|                         '(which may involve removing data)') | ||||
|     parser.add_argument("-n", "--no-data", action="store_true", | ||||
|                         default=False, help = 'Skip the slow full-data check') | ||||
|     parser.add_argument('database', help = 'Database directory') | ||||
|     args = parser.parse_args() | ||||
|  | ||||
|     nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data = args.no_data) | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     main() | ||||
| @@ -19,8 +19,8 @@ from . import rocket | ||||
|  | ||||
| # Up to 256 open file descriptors at any given time. | ||||
| # These variables are global so they can be used in the decorator arguments. | ||||
| table_cache_size = 16 | ||||
| fd_cache_size = 16 | ||||
| table_cache_size = 32 | ||||
| fd_cache_size = 8 | ||||
|  | ||||
| @nilmdb.utils.must_close(wrap_verify = False) | ||||
| class BulkData(object): | ||||
| @@ -79,7 +79,12 @@ class BulkData(object): | ||||
|         if Table.exists(ospath): | ||||
|             raise ValueError("stream already exists at this path") | ||||
|         if os.path.isdir(ospath): | ||||
|             raise ValueError("subdirs of this path already exist") | ||||
|             # Look for any files in subdirectories.  Fully empty subdirectories | ||||
|             # are OK; they might be there during a rename | ||||
|             for (root, dirs, files) in os.walk(ospath): | ||||
|                 if len(files): | ||||
|                     raise ValueError( | ||||
|                         "non-empty subdirs of this path already exist") | ||||
|  | ||||
|     def _create_parents(self, unicodepath): | ||||
|         """Verify the path name, and create parent directories if they | ||||
| @@ -188,7 +193,6 @@ class BulkData(object): | ||||
|         # Basic checks | ||||
|         if oldospath == newospath: | ||||
|             raise ValueError("old and new paths are the same") | ||||
|         self._create_check_ospath(newospath) | ||||
|  | ||||
|         # Move the table to a temporary location | ||||
|         tmpdir = tempfile.mkdtemp(prefix = "rename-", dir = self.root) | ||||
| @@ -196,6 +200,9 @@ class BulkData(object): | ||||
|         os.rename(oldospath, tmppath) | ||||
|  | ||||
|         try: | ||||
|             # Check destination path | ||||
|             self._create_check_ospath(newospath) | ||||
|  | ||||
|             # Create parent dirs for new location | ||||
|             self._create_parents(newunicodepath) | ||||
|  | ||||
| @@ -323,7 +330,8 @@ class Table(object): | ||||
|  | ||||
|         # Find the last directory.  We sort and loop through all of them, | ||||
|         # starting with the numerically greatest, because the dirs could be | ||||
|         # empty if something was deleted. | ||||
|         # empty if something was deleted but the directory was unexpectedly | ||||
|         # not deleted. | ||||
|         subdirs = sorted(filter(regex.search, os.listdir(self.root)), | ||||
|                          key = lambda x: int(x, 16), reverse = True) | ||||
|  | ||||
| @@ -406,12 +414,16 @@ class Table(object): | ||||
|         return rocket.Rocket(self.layout, | ||||
|                              os.path.join(self.root, subdir, filename)) | ||||
|  | ||||
|     def append_string(self, data, start, end): | ||||
|     def append_data(self, data, start, end, binary = False): | ||||
|         """Parse the formatted string in 'data', according to the | ||||
|         current layout, and append it to the table.  If any timestamps | ||||
|         are non-monotonic, or don't fall between 'start' and 'end', | ||||
|         a ValueError is raised. | ||||
|  | ||||
|         If 'binary' is True, the data should be in raw binary format | ||||
|         instead: little-endian, matching the current table's layout, | ||||
|         including the int64 timestamp. | ||||
|  | ||||
|         If this function succeeds, it returns normally.  Otherwise, | ||||
|         the table is reverted back to its original state by truncating | ||||
|         or deleting files as necessary.""" | ||||
| @@ -430,17 +442,26 @@ class Table(object): | ||||
|                 # Ask the rocket object to parse and append up to "count" | ||||
|                 # rows of data, verifying things along the way. | ||||
|                 try: | ||||
|                     if binary: | ||||
|                         appender = f.append_binary | ||||
|                     else: | ||||
|                         appender = f.append_string | ||||
|                     (added_rows, data_offset, last_timestamp, linenum | ||||
|                      ) = f.append_string(count, data, data_offset, linenum, | ||||
|                                          start, end, last_timestamp) | ||||
|                      ) = appender(count, data, data_offset, linenum, | ||||
|                                   start, end, last_timestamp) | ||||
|                 except rocket.ParseError as e: | ||||
|                     (linenum, colnum, errtype, obj) = e.args | ||||
|                     where = "line %d, column %d: " % (linenum, colnum) | ||||
|                     if binary: | ||||
|                         where = "byte %d: " % (linenum) | ||||
|                     else: | ||||
|                         where = "line %d, column %d: " % (linenum, colnum) | ||||
|                     # Extract out the error line, add column marker | ||||
|                     try: | ||||
|                         if binary: | ||||
|                             raise IndexError | ||||
|                         bad = data.splitlines()[linenum-1] | ||||
|                         badptr = ' ' * (colnum - 1) + '^' | ||||
|                     except IndexError: # pragma: no cover | ||||
|                         bad += '\n' + ' ' * (colnum - 1) + '^' | ||||
|                     except IndexError: | ||||
|                         bad = "" | ||||
|                     if errtype == rocket.ERR_NON_MONOTONIC: | ||||
|                         err = "timestamp is not monotonically increasing" | ||||
| @@ -456,7 +477,7 @@ class Table(object): | ||||
|                     else: | ||||
|                         err = str(obj) | ||||
|                     raise ValueError("error parsing input data: " + | ||||
|                                      where + err + "\n" + bad + "\n" + badptr) | ||||
|                                      where + err + "\n" + bad) | ||||
|                 tot_rows += added_rows | ||||
|         except Exception: | ||||
|             # Some failure, so try to roll things back by truncating or | ||||
| @@ -472,7 +493,7 @@ class Table(object): | ||||
|             # Success, so update self.nrows accordingly | ||||
|             self.nrows = tot_rows | ||||
|  | ||||
|     def get_data(self, start, stop): | ||||
|     def get_data(self, start, stop, binary = False): | ||||
|         """Extract data corresponding to Python range [n:m], | ||||
|         and returns a formatted string""" | ||||
|         if (start is None or | ||||
| @@ -490,10 +511,13 @@ class Table(object): | ||||
|             if count > remaining: | ||||
|                 count = remaining | ||||
|             f = self.file_open(subdir, filename) | ||||
|             ret.append(f.extract_string(offset, count)) | ||||
|             if binary: | ||||
|                 ret.append(f.extract_binary(offset, count)) | ||||
|             else: | ||||
|                 ret.append(f.extract_string(offset, count)) | ||||
|             remaining -= count | ||||
|             row += count | ||||
|         return "".join(ret) | ||||
|         return b"".join(ret) | ||||
|  | ||||
|     def __getitem__(self, row): | ||||
|         """Extract timestamps from a row, with table[n] notation.""" | ||||
|   | ||||
| @@ -176,7 +176,7 @@ class NilmDB(object): | ||||
|             raise NilmDBError("start must precede end") | ||||
|         return (start, end) | ||||
|  | ||||
|     @nilmdb.utils.lru_cache(size = 16) | ||||
|     @nilmdb.utils.lru_cache(size = 64) | ||||
|     def _get_intervals(self, stream_id): | ||||
|         """ | ||||
|         Return a mutable IntervalSet corresponding to the given stream ID. | ||||
| @@ -475,12 +475,16 @@ class NilmDB(object): | ||||
|             con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,)) | ||||
|             con.execute("DELETE FROM streams WHERE id=?", (stream_id,)) | ||||
|  | ||||
|     def stream_insert(self, path, start, end, data): | ||||
|     def stream_insert(self, path, start, end, data, binary = False): | ||||
|         """Insert new data into the database. | ||||
|            path: Path at which to add the data | ||||
|            start: Starting timestamp | ||||
|            end: Ending timestamp | ||||
|            data: Textual data, formatted according to the layout of path | ||||
|  | ||||
|            'binary', if True, means that 'data' is raw binary: | ||||
|            little-endian, matching the current table's layout, | ||||
|            including the int64 timestamp. | ||||
|            """ | ||||
|         # First check for basic overlap using timestamp info given. | ||||
|         stream_id = self._stream_id(path) | ||||
| @@ -494,7 +498,7 @@ class NilmDB(object): | ||||
|         # there are any parse errors. | ||||
|         table = self.data.getnode(path) | ||||
|         row_start = table.nrows | ||||
|         table.append_string(data, start, end) | ||||
|         table.append_data(data, start, end, binary) | ||||
|         row_end = table.nrows | ||||
|  | ||||
|         # Insert the record into the sql database. | ||||
| @@ -538,7 +542,7 @@ class NilmDB(object): | ||||
|                                   dbinterval.db_endpos) | ||||
|  | ||||
|     def stream_extract(self, path, start = None, end = None, | ||||
|                        count = False, markup = False): | ||||
|                        count = False, markup = False, binary = False): | ||||
|         """ | ||||
|         Returns (data, restart) tuple. | ||||
|  | ||||
| @@ -559,6 +563,9 @@ class NilmDB(object): | ||||
|         'markup', if true, indicates that returned data should be | ||||
|         marked with a comment denoting when a particular interval | ||||
|         starts, and another comment when an interval ends. | ||||
|  | ||||
|         'binary', if true, means to return raw binary rather than | ||||
|         ASCII-formatted data. | ||||
|         """ | ||||
|         stream_id = self._stream_id(path) | ||||
|         table = self.data.getnode(path) | ||||
| @@ -569,6 +576,8 @@ class NilmDB(object): | ||||
|         matched = 0 | ||||
|         remaining = self.max_results | ||||
|         restart = None | ||||
|         if binary and (markup or count): | ||||
|             raise NilmDBError("binary mode can't be used with markup or count") | ||||
|         for interval in intervals.intersection(requested): | ||||
|             # Reading single rows from the table is too slow, so | ||||
|             # we use two bisections to find both the starting and | ||||
| @@ -593,7 +602,7 @@ class NilmDB(object): | ||||
|                               timestamp_to_string(interval.start) + "\n") | ||||
|  | ||||
|             # Gather these results up | ||||
|             result.append(table.get_data(row_start, row_end)) | ||||
|             result.append(table.get_data(row_start, row_end, binary)) | ||||
|  | ||||
|             # Count them | ||||
|             remaining -= row_end - row_start | ||||
| @@ -666,6 +675,7 @@ class NilmDB(object): | ||||
|  | ||||
|             # Count how many were removed | ||||
|             removed += row_end - row_start | ||||
|             remaining -= row_end - row_start | ||||
|  | ||||
|             if restart is not None: | ||||
|                 break | ||||
|   | ||||
| @@ -5,6 +5,9 @@ | ||||
| #include <ctype.h> | ||||
| #include <stdint.h> | ||||
|  | ||||
| #define __STDC_FORMAT_MACROS | ||||
| #include <inttypes.h> | ||||
|  | ||||
| /* Values missing from stdint.h */ | ||||
| #define UINT8_MIN 0 | ||||
| #define UINT16_MIN 0 | ||||
| @@ -19,16 +22,9 @@ | ||||
|  | ||||
| typedef int64_t timestamp_t; | ||||
|  | ||||
| /* This code probably needs to be double-checked for the case where | ||||
|    sizeof(long) != 8, so enforce that here with something that will | ||||
|    fail at build time.  We assume that the python integer type can | ||||
|    hold an int64_t. */ | ||||
| const static char __long_ok[1 - 2*!(sizeof(int64_t) == | ||||
| 				    sizeof(long int))] = { 0 }; | ||||
|  | ||||
| /* Somewhat arbitrary, just so we can use fixed sizes for strings | ||||
|    etc. */ | ||||
| static const int MAX_LAYOUT_COUNT = 128; | ||||
| static const int MAX_LAYOUT_COUNT = 1024; | ||||
|  | ||||
| /* Error object and constants */ | ||||
| static PyObject *ParseError; | ||||
| @@ -58,7 +54,7 @@ static PyObject *raise_str(int line, int col, int code, const char *string) | ||||
| static PyObject *raise_int(int line, int col, int code, int64_t num) | ||||
| { | ||||
| 	PyObject *o; | ||||
| 	o = Py_BuildValue("(iiil)", line, col, code, num); | ||||
| 	o = Py_BuildValue("(iiiL)", line, col, code, (long long)num); | ||||
| 	if (o != NULL) { | ||||
| 		PyErr_SetObject(ParseError, o); | ||||
| 		Py_DECREF(o); | ||||
| @@ -249,11 +245,11 @@ static PyObject *Rocket_get_file_size(Rocket *self) | ||||
| /**** | ||||
|  * Append from string | ||||
|  */ | ||||
| static inline long int strtol10(const char *nptr, char **endptr) { | ||||
| 	return strtol(nptr, endptr, 10); | ||||
| static inline long int strtoll10(const char *nptr, char **endptr) { | ||||
| 	return strtoll(nptr, endptr, 10); | ||||
| } | ||||
| static inline long int strtoul10(const char *nptr, char **endptr) { | ||||
| 	return strtoul(nptr, endptr, 10); | ||||
| static inline long int strtoull10(const char *nptr, char **endptr) { | ||||
| 	return strtoull(nptr, endptr, 10); | ||||
| } | ||||
|  | ||||
| /* .append_string(count, data, offset, linenum, start, end, last_timestamp) */ | ||||
| @@ -264,6 +260,7 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args) | ||||
| 	int offset; | ||||
| 	const char *linestart; | ||||
| 	int linenum; | ||||
|         long long ll1, ll2, ll3; | ||||
| 	timestamp_t start; | ||||
| 	timestamp_t end; | ||||
| 	timestamp_t last_timestamp; | ||||
| @@ -280,10 +277,13 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args) | ||||
| 	   but we need the null termination for strto*.  If we had | ||||
| 	   strnto* that took a length, we could use t# and not require | ||||
| 	   a copy. */ | ||||
| 	if (!PyArg_ParseTuple(args, "isiilll:append_string", &count, | ||||
| 	if (!PyArg_ParseTuple(args, "isiiLLL:append_string", &count, | ||||
| 			      &data, &offset, &linenum, | ||||
| 			      &start, &end, &last_timestamp)) | ||||
| 			      &ll1, &ll2, &ll3)) | ||||
| 		return NULL; | ||||
|         start = ll1; | ||||
|         end = ll2; | ||||
|         last_timestamp = ll3; | ||||
|  | ||||
| 	/* Skip spaces, but don't skip over a newline. */ | ||||
| #define SKIP_BLANK(buf) do {			\ | ||||
| @@ -372,14 +372,14 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args) | ||||
| 				goto extra_data_on_line;		\ | ||||
| 			break | ||||
|  | ||||
| 			CS(INT8,   strtol10,  t64.i, t8.i,  t8.u,         , 1); | ||||
| 			CS(UINT8,  strtoul10, t64.u, t8.u,  t8.u,         , 1); | ||||
| 			CS(INT16,  strtol10,  t64.i, t16.i, t16.u, le16toh, 2); | ||||
| 			CS(UINT16, strtoul10, t64.u, t16.u, t16.u, le16toh, 2); | ||||
| 			CS(INT32,  strtol10,  t64.i, t32.i, t32.u, le32toh, 4); | ||||
| 			CS(UINT32, strtoul10, t64.u, t32.u, t32.u, le32toh, 4); | ||||
| 			CS(INT64,  strtol10,  t64.i, t64.i, t64.u, le64toh, 8); | ||||
| 			CS(UINT64, strtoul10, t64.u, t64.u, t64.u, le64toh, 8); | ||||
| 			CS(INT8,   strtoll10,  t64.i, t8.i,  t8.u,         , 1); | ||||
| 			CS(UINT8,  strtoull10, t64.u, t8.u,  t8.u,         , 1); | ||||
| 			CS(INT16,  strtoll10,  t64.i, t16.i, t16.u, le16toh, 2); | ||||
| 			CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2); | ||||
| 			CS(INT32,  strtoll10,  t64.i, t32.i, t32.u, le32toh, 4); | ||||
| 			CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4); | ||||
| 			CS(INT64,  strtoll10,  t64.i, t64.i, t64.u, le64toh, 8); | ||||
| 			CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8); | ||||
| 			CS(FLOAT32, strtod,   t64.d, t32.f, t32.u, le32toh, 4); | ||||
| 			CS(FLOAT64, strtod,   t64.d, t64.d, t64.u, le64toh, 8); | ||||
| #undef CS | ||||
| @@ -397,7 +397,8 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args) | ||||
| 	/* Build return value and return */ | ||||
| 	offset = buf - data; | ||||
| 	PyObject *o; | ||||
| 	o = Py_BuildValue("(iili)", written, offset, last_timestamp, linenum); | ||||
| 	o = Py_BuildValue("(iiLi)", written, offset, | ||||
|                           (long long)last_timestamp, linenum); | ||||
| 	return o; | ||||
| err: | ||||
| 	PyErr_SetFromErrno(PyExc_OSError); | ||||
| @@ -419,6 +420,72 @@ extra_data_on_line: | ||||
| 			 ERR_OTHER, "extra data on line"); | ||||
| } | ||||
|  | ||||
| /**** | ||||
|  * Append from binary data | ||||
|  */ | ||||
|  | ||||
| /* .append_binary(count, data, offset, linenum, start, end, last_timestamp) */ | ||||
| static PyObject *Rocket_append_binary(Rocket *self, PyObject *args) | ||||
| { | ||||
|         int count; | ||||
| 	const uint8_t *data; | ||||
|         int data_len; | ||||
|         int linenum; | ||||
| 	int offset; | ||||
|         long long ll1, ll2, ll3; | ||||
| 	timestamp_t start; | ||||
| 	timestamp_t end; | ||||
| 	timestamp_t last_timestamp; | ||||
|  | ||||
| 	if (!PyArg_ParseTuple(args, "it#iiLLL:append_binary", | ||||
|                               &count, &data, &data_len, &offset, | ||||
|                               &linenum, &ll1, &ll2, &ll3)) | ||||
| 		return NULL; | ||||
|         start = ll1; | ||||
|         end = ll2; | ||||
|         last_timestamp = ll3; | ||||
|  | ||||
|         /* Advance to offset */ | ||||
|         if (offset > data_len) | ||||
|                 return raise_str(0, 0, ERR_OTHER, "bad offset"); | ||||
|         data += offset; | ||||
|         data_len -= offset; | ||||
|  | ||||
|         /* Figure out max number of rows to insert */ | ||||
|         int rows = data_len / self->binary_size; | ||||
|         if (rows > count) | ||||
|                 rows = count; | ||||
|  | ||||
|         /* Check timestamps */ | ||||
|         timestamp_t ts; | ||||
| 	int i; | ||||
|         for (i = 0; i < rows; i++) { | ||||
|                 /* Read raw timestamp, byteswap if needed */ | ||||
|                 memcpy(&ts, &data[i * self->binary_size], 8); | ||||
|                 ts = le64toh(ts); | ||||
|  | ||||
|                 /* Check limits */ | ||||
|                 if (ts <= last_timestamp) | ||||
|                         return raise_int(i, 0, ERR_NON_MONOTONIC, ts); | ||||
|                 last_timestamp = ts; | ||||
|                 if (ts < start || ts >= end) | ||||
|                         return raise_int(i, 0, ERR_OUT_OF_INTERVAL, ts); | ||||
|         } | ||||
|  | ||||
|         /* Write binary data */ | ||||
|         if (fwrite(data, self->binary_size, rows, self->file) != rows) { | ||||
|                 PyErr_SetFromErrno(PyExc_OSError); | ||||
|                 return NULL; | ||||
|         } | ||||
| 	fflush(self->file); | ||||
|  | ||||
| 	/* Build return value and return */ | ||||
| 	PyObject *o; | ||||
| 	o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size, | ||||
|                           (long long)last_timestamp, linenum); | ||||
| 	return o; | ||||
| } | ||||
|  | ||||
| /**** | ||||
|  * Extract to string | ||||
|  */ | ||||
| @@ -472,7 +539,7 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args) | ||||
| 		if (fread(&t64.u, 8, 1, self->file) != 1) | ||||
| 			goto err; | ||||
| 		t64.u = le64toh(t64.u); | ||||
| 		ret = sprintf(&str[len], "%ld", t64.i); | ||||
| 		ret = sprintf(&str[len], "%" PRId64, t64.i); | ||||
| 		if (ret <= 0) | ||||
| 			goto err; | ||||
| 		len += ret; | ||||
| @@ -484,7 +551,7 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args) | ||||
| 			/* read and format in a loop */			\ | ||||
| 			for (i = 0; i < self->layout_count; i++) {	\ | ||||
| 				if (fread(&disktype, bytes,		\ | ||||
| 					  1, self->file) < 0)		\ | ||||
| 					  1, self->file) != 1)		\ | ||||
| 					goto err;			\ | ||||
| 				disktype = letoh(disktype);		\ | ||||
| 				ret = sprintf(&str[len], " " fmt,	\ | ||||
| @@ -494,14 +561,14 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args) | ||||
| 				len += ret;				\ | ||||
| 			}						\ | ||||
| 			break | ||||
| 			CASE(INT8,   "%hhd",   t8.i,  t8.u,         , 1); | ||||
| 			CASE(UINT8,  "%hhu",   t8.u,  t8.u,         , 1); | ||||
| 			CASE(INT16,  "%hd",    t16.i, t16.u, le16toh, 2); | ||||
| 			CASE(UINT16, "%hu",    t16.u, t16.u, le16toh, 2); | ||||
| 			CASE(INT32,  "%d",     t32.i, t32.u, le32toh, 4); | ||||
| 			CASE(UINT32, "%u",     t32.u, t32.u, le32toh, 4); | ||||
| 			CASE(INT64,  "%ld",    t64.i, t64.u, le64toh, 8); | ||||
| 			CASE(UINT64, "%lu",    t64.u, t64.u, le64toh, 8); | ||||
| 			CASE(INT8,   "%" PRId8,  t8.i,  t8.u,         , 1); | ||||
| 			CASE(UINT8,  "%" PRIu8,  t8.u,  t8.u,         , 1); | ||||
| 			CASE(INT16,  "%" PRId16, t16.i, t16.u, le16toh, 2); | ||||
| 			CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2); | ||||
| 			CASE(INT32,  "%" PRId32, t32.i, t32.u, le32toh, 4); | ||||
| 			CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4); | ||||
| 			CASE(INT64,  "%" PRId64, t64.i, t64.u, le64toh, 8); | ||||
| 			CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8); | ||||
| 			/* These next two are a bit debatable.  floats | ||||
| 			   are 6-9 significant figures, so we print 7. | ||||
| 			   Doubles are 15-19, so we print 17.  This is | ||||
| @@ -527,6 +594,46 @@ err: | ||||
| 	return NULL; | ||||
| } | ||||
|  | ||||
| /**** | ||||
|  * Extract to binary string containing raw little-endian binary data | ||||
|  */ | ||||
| static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args) | ||||
| { | ||||
| 	long count; | ||||
| 	long offset; | ||||
|  | ||||
| 	if (!PyArg_ParseTuple(args, "ll", &offset, &count)) | ||||
| 		return NULL; | ||||
| 	if (!self->file) { | ||||
| 		PyErr_SetString(PyExc_Exception, "no file"); | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	/* Seek to target location */ | ||||
| 	if (fseek(self->file, offset, SEEK_SET) < 0) { | ||||
| 		PyErr_SetFromErrno(PyExc_OSError); | ||||
| 		return NULL; | ||||
| 	} | ||||
|  | ||||
|         uint8_t *str; | ||||
|         int len = count * self->binary_size; | ||||
|         str = malloc(len); | ||||
|         if (str == NULL) { | ||||
|                 PyErr_SetFromErrno(PyExc_OSError); | ||||
|                 return NULL; | ||||
|         } | ||||
|  | ||||
|         /* Data in the file is already in the desired little-endian | ||||
|            binary format, so just read it directly. */ | ||||
|         if (fread(str, self->binary_size, count, self->file) != count) { | ||||
|                 free(str); | ||||
|                 PyErr_SetFromErrno(PyExc_OSError); | ||||
|                 return NULL; | ||||
|         } | ||||
|  | ||||
| 	PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len); | ||||
| 	free(str); | ||||
| 	return pystr; | ||||
| } | ||||
|  | ||||
| /**** | ||||
|  * Extract timestamp | ||||
| @@ -551,7 +658,7 @@ static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args) | ||||
|  | ||||
| 	/* Convert and return */ | ||||
| 	t64.u = le64toh(t64.u); | ||||
| 	return Py_BuildValue("l", t64.i); | ||||
| 	return Py_BuildValue("L", (long long)t64.i); | ||||
| } | ||||
|  | ||||
| /**** | ||||
| @@ -571,11 +678,13 @@ static PyMemberDef Rocket_members[] = { | ||||
| }; | ||||
|  | ||||
| static PyMethodDef Rocket_methods[] = { | ||||
| 	{ "close", (PyCFunction)Rocket_close, METH_NOARGS, | ||||
| 	{ "close", | ||||
|           (PyCFunction)Rocket_close, METH_NOARGS, | ||||
| 	  "close(self)\n\n" | ||||
| 	  "Close file handle" }, | ||||
|  | ||||
| 	{ "append_string", (PyCFunction)Rocket_append_string, METH_VARARGS, | ||||
| 	{ "append_string", | ||||
|           (PyCFunction)Rocket_append_string, METH_VARARGS, | ||||
| 	  "append_string(self, count, data, offset, line, start, end, ts)\n\n" | ||||
|           "Parse string and append data.\n" | ||||
| 	  "\n" | ||||
| @@ -590,16 +699,46 @@ static PyMethodDef Rocket_methods[] = { | ||||
| 	  "Raises ParseError if timestamps are non-monotonic, outside\n" | ||||
| 	  "the start/end interval etc.\n" | ||||
| 	  "\n" | ||||
|           "On success, return a tuple with three values:\n" | ||||
|           "On success, return a tuple:\n" | ||||
|           "  added_rows: how many rows were added from the file\n" | ||||
|           "  data_offset: current offset into the data string\n" | ||||
|           "  last_timestamp: last timestamp we parsed" }, | ||||
|           "  last_timestamp: last timestamp we parsed\n" | ||||
|           "  linenum: current line number" }, | ||||
|  | ||||
| 	{ "extract_string", (PyCFunction)Rocket_extract_string, METH_VARARGS, | ||||
| 	{ "append_binary", | ||||
| 	  (PyCFunction)Rocket_append_binary, METH_VARARGS, | ||||
| 	  "append_binary(self, count, data, offset, line, start, end, ts)\n\n" | ||||
|           "Append binary data, which must match the data layout.\n" | ||||
| 	  "\n" | ||||
| 	  "  count: maximum number of rows to add\n" | ||||
|           "  data: binary data\n" | ||||
|           "  offset: byte offset into data to start adding\n" | ||||
|           "  line: current line number (unused)\n" | ||||
|           "  start: starting timestamp for interval\n" | ||||
|           "  end: end timestamp for interval\n" | ||||
|           "  ts: last timestamp that was previously parsed\n" | ||||
| 	  "\n" | ||||
| 	  "Raises ParseError if timestamps are non-monotonic, outside\n" | ||||
| 	  "the start/end interval etc.\n" | ||||
| 	  "\n" | ||||
|           "On success, return a tuple:\n" | ||||
|           "  added_rows: how many rows were added from the file\n" | ||||
|           "  data_offset: current offset into the data string\n" | ||||
|           "  last_timestamp: last timestamp we parsed\n" | ||||
|           "  linenum: current line number (copied from argument)" }, | ||||
|  | ||||
| 	{ "extract_string", | ||||
|           (PyCFunction)Rocket_extract_string, METH_VARARGS, | ||||
| 	  "extract_string(self, offset, count)\n\n" | ||||
| 	  "Extract count rows of data from the file at offset offset.\n" | ||||
| 	  "Return an ascii formatted string according to the layout" }, | ||||
|  | ||||
| 	{ "extract_binary", | ||||
| 	  (PyCFunction)Rocket_extract_binary, METH_VARARGS, | ||||
| 	  "extract_binary(self, offset, count)\n\n" | ||||
| 	  "Extract count rows of data from the file at offset offset.\n" | ||||
| 	  "Return a raw binary string of data matching the data layout." }, | ||||
|  | ||||
| 	{ "extract_timestamp", | ||||
| 	  (PyCFunction)Rocket_extract_timestamp, METH_VARARGS, | ||||
| 	  "extract_timestamp(self, offset)\n\n" | ||||
|   | ||||
| @@ -17,126 +17,26 @@ import decorator | ||||
| import psutil | ||||
| import traceback | ||||
|  | ||||
| from nilmdb.server.serverutil import ( | ||||
|     chunked_response, | ||||
|     response_type, | ||||
|     workaround_cp_bug_1200, | ||||
|     exception_to_httperror, | ||||
|     CORS_allow, | ||||
|     json_to_request_params, | ||||
|     json_error_page, | ||||
|     cherrypy_start, | ||||
|     cherrypy_stop, | ||||
|     bool_param, | ||||
|     ) | ||||
|  | ||||
| # Add CORS_allow tool | ||||
| cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow) | ||||
|  | ||||
| class NilmApp(object): | ||||
|     def __init__(self, db): | ||||
|         self.db = db | ||||
|  | ||||
| # Decorators | ||||
| def chunked_response(func): | ||||
|     """Decorator to enable chunked responses.""" | ||||
|     # Set this to False to get better tracebacks from some requests | ||||
|     # (/stream/extract, /stream/intervals). | ||||
|     func._cp_config = { 'response.stream': True } | ||||
|     return func | ||||
|  | ||||
| def response_type(content_type): | ||||
|     """Return a decorator-generating function that sets the | ||||
|     response type to the specified string.""" | ||||
|     def wrapper(func, *args, **kwargs): | ||||
|         cherrypy.response.headers['Content-Type'] = content_type | ||||
|         return func(*args, **kwargs) | ||||
|     return decorator.decorator(wrapper) | ||||
|  | ||||
| @decorator.decorator | ||||
| def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover | ||||
|     """Decorator to work around CherryPy bug #1200 in a response | ||||
|     generator. | ||||
|  | ||||
|     Even if chunked responses are disabled, LookupError or | ||||
|     UnicodeError exceptions may still be swallowed by CherryPy due to | ||||
|     bug #1200.  This throws them as generic Exceptions instead so that | ||||
|     they make it through. | ||||
|     """ | ||||
|     exc_info = None | ||||
|     try: | ||||
|         for val in func(*args, **kwargs): | ||||
|             yield val | ||||
|     except (LookupError, UnicodeError): | ||||
|         # Re-raise it, but maintain the original traceback | ||||
|         exc_info = sys.exc_info() | ||||
|         new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1])) | ||||
|         raise new_exc, None, exc_info[2] | ||||
|     finally: | ||||
|         del exc_info | ||||
|  | ||||
| def exception_to_httperror(*expected): | ||||
|     """Return a decorator-generating function that catches expected | ||||
|     errors and throws a HTTPError describing it instead. | ||||
|  | ||||
|         @exception_to_httperror(NilmDBError, ValueError) | ||||
|         def foo(): | ||||
|             pass | ||||
|     """ | ||||
|     def wrapper(func, *args, **kwargs): | ||||
|         exc_info = None | ||||
|         try: | ||||
|             return func(*args, **kwargs) | ||||
|         except expected: | ||||
|             # Re-raise it, but maintain the original traceback | ||||
|             exc_info = sys.exc_info() | ||||
|             new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1])) | ||||
|             raise new_exc, None, exc_info[2] | ||||
|         finally: | ||||
|             del exc_info | ||||
|     # We need to preserve the function's argspecs for CherryPy to | ||||
|     # handle argument errors correctly.  Decorator.decorator takes | ||||
|     # care of that. | ||||
|     return decorator.decorator(wrapper) | ||||
|  | ||||
| # Custom CherryPy tools | ||||
|  | ||||
| def CORS_allow(methods): | ||||
|     """This does several things: | ||||
|  | ||||
|     Handles CORS preflight requests. | ||||
|     Adds Allow: header to all requests. | ||||
|     Raise 405 if request.method not in method. | ||||
|  | ||||
|     It is similar to cherrypy.tools.allow, with the CORS stuff added. | ||||
|     """ | ||||
|     request = cherrypy.request.headers | ||||
|     response = cherrypy.response.headers | ||||
|  | ||||
|     if not isinstance(methods, (tuple, list)): # pragma: no cover | ||||
|         methods = [ methods ] | ||||
|     methods = [ m.upper() for m in methods if m ] | ||||
|     if not methods: # pragma: no cover | ||||
|         methods = [ 'GET', 'HEAD' ] | ||||
|     elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover | ||||
|         methods.append('HEAD') | ||||
|     response['Allow'] = ', '.join(methods) | ||||
|  | ||||
|     # Allow all origins | ||||
|     if 'Origin' in request: | ||||
|         response['Access-Control-Allow-Origin'] = request['Origin'] | ||||
|  | ||||
|     # If it's a CORS request, send response. | ||||
|     request_method = request.get("Access-Control-Request-Method", None) | ||||
|     request_headers = request.get("Access-Control-Request-Headers", None) | ||||
|     if (cherrypy.request.method == "OPTIONS" and | ||||
|         request_method and request_headers): | ||||
|         response['Access-Control-Allow-Headers'] = request_headers | ||||
|         response['Access-Control-Allow-Methods'] = ', '.join(methods) | ||||
|         # Try to stop further processing and return a 200 OK | ||||
|         cherrypy.response.status = "200 OK" | ||||
|         cherrypy.response.body = "" | ||||
|         cherrypy.request.handler = lambda: "" | ||||
|         return | ||||
|  | ||||
|     # Reject methods that were not explicitly allowed | ||||
|     if cherrypy.request.method not in methods: | ||||
|         raise cherrypy.HTTPError(405) | ||||
|  | ||||
| cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow) | ||||
|  | ||||
| # Helper for json_in tool to process JSON data into normal request | ||||
| # parameters. | ||||
| def json_to_request_params(body): | ||||
|     cherrypy.lib.jsontools.json_processor(body) | ||||
|     if not isinstance(cherrypy.request.json, dict): | ||||
|         raise cherrypy.HTTPError(415) | ||||
|     cherrypy.request.params.update(cherrypy.request.json) | ||||
|  | ||||
| # CherryPy apps | ||||
| class Root(NilmApp): | ||||
|     """Root application for NILM database""" | ||||
| @@ -147,7 +47,10 @@ class Root(NilmApp): | ||||
|     # / | ||||
|     @cherrypy.expose | ||||
|     def index(self): | ||||
|         raise cherrypy.NotFound() | ||||
|         cherrypy.response.headers['Content-Type'] = 'text/plain' | ||||
|         msg = sprintf("This is NilmDB version %s, running on host %s.\n", | ||||
|                       nilmdb.__version__, socket.getfqdn()) | ||||
|         return msg | ||||
|  | ||||
|     # /favicon.ico | ||||
|     @cherrypy.expose | ||||
| @@ -167,9 +70,13 @@ class Root(NilmApp): | ||||
|         """Return a dictionary with the database path, | ||||
|         size of the database in bytes, and free disk space in bytes""" | ||||
|         path = self.db.get_basepath() | ||||
|         usage = psutil.disk_usage(path) | ||||
|         dbsize = nilmdb.utils.du(path) | ||||
|         return { "path": path, | ||||
|                  "size": nilmdb.utils.du(path), | ||||
|                  "free": psutil.disk_usage(path).free } | ||||
|                  "size": dbsize, | ||||
|                  "other": usage.used - dbsize, | ||||
|                  "reserved": usage.total - usage.used - usage.free, | ||||
|                  "free": usage.free } | ||||
|  | ||||
| class Stream(NilmApp): | ||||
|     """Stream-specific operations""" | ||||
| @@ -177,10 +84,18 @@ class Stream(NilmApp): | ||||
|     # Helpers | ||||
|     def _get_times(self, start_param, end_param): | ||||
|         (start, end) = (None, None) | ||||
|         if start_param is not None: | ||||
|             start = string_to_timestamp(start_param) | ||||
|         if end_param is not None: | ||||
|             end = string_to_timestamp(end_param) | ||||
|         try: | ||||
|             if start_param is not None: | ||||
|                 start = string_to_timestamp(start_param) | ||||
|         except Exception: | ||||
|             raise cherrypy.HTTPError("400 Bad Request", sprintf( | ||||
|                 "invalid start (%s): must be a numeric timestamp", start_param)) | ||||
|         try: | ||||
|             if end_param is not None: | ||||
|                 end = string_to_timestamp(end_param) | ||||
|         except Exception: | ||||
|             raise cherrypy.HTTPError("400 Bad Request", sprintf( | ||||
|                 "invalid end (%s): must be a numeric timestamp", end_param)) | ||||
|         if start is not None and end is not None: | ||||
|             if start >= end: | ||||
|                 raise cherrypy.HTTPError( | ||||
| @@ -199,10 +114,10 @@ class Stream(NilmApp): | ||||
|         layout parameter, just list streams that match the given path | ||||
|         or layout. | ||||
|  | ||||
|         If extent is not given, returns a list of lists containing | ||||
|         the path and layout: [ path, layout ] | ||||
|         If extended is missing or zero, returns a list of lists | ||||
|         containing the path and layout: [ path, layout ] | ||||
|  | ||||
|         If extended is provided, returns a list of lists containing | ||||
|         If extended is true, returns a list of lists containing | ||||
|         extended info: [ path, layout, extent_min, extent_max, | ||||
|         total_rows, total_seconds ].  More data may be added. | ||||
|         """ | ||||
| @@ -305,28 +220,42 @@ class Stream(NilmApp): | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror(NilmDBError, ValueError) | ||||
|     @cherrypy.tools.CORS_allow(methods = ["PUT"]) | ||||
|     def insert(self, path, start, end): | ||||
|     def insert(self, path, start, end, binary = False): | ||||
|         """ | ||||
|         Insert new data into the database.  Provide textual data | ||||
|         (matching the path's layout) as a HTTP PUT. | ||||
|  | ||||
|         If 'binary' is True, expect raw binary data, rather than lines | ||||
|         of ASCII-formatted data.  Raw binary data is always | ||||
|         little-endian and matches the database types (including an | ||||
|         int64 timestamp). | ||||
|         """ | ||||
|         binary = bool_param(binary) | ||||
|  | ||||
|         # Important that we always read the input before throwing any | ||||
|         # errors, to keep lengths happy for persistent connections. | ||||
|         # Note that CherryPy 3.2.2 has a bug where this fails for GET | ||||
|         # requests, if we ever want to handle those (issue #1134) | ||||
|         body = cherrypy.request.body.read() | ||||
|  | ||||
|         # Verify content type for binary data | ||||
|         content_type = cherrypy.request.headers.get('content-type') | ||||
|         if binary and content_type: | ||||
|             if content_type != "application/octet-stream": | ||||
|                 raise cherrypy.HTTPError("400", "Content type must be " | ||||
|                                          "application/octet-stream for " | ||||
|                                          "binary data, not " + content_type) | ||||
|  | ||||
|         # Check path and get layout | ||||
|         streams = self.db.stream_list(path = path) | ||||
|         if len(streams) != 1: | ||||
|             raise cherrypy.HTTPError("404 Not Found", "No such stream") | ||||
|         if len(self.db.stream_list(path = path)) != 1: | ||||
|             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||
|  | ||||
|         # Check limits | ||||
|         (start, end) = self._get_times(start, end) | ||||
|  | ||||
|         # Pass the data directly to nilmdb, which will parse it and | ||||
|         # raise a ValueError if there are any problems. | ||||
|         self.db.stream_insert(path, start, end, body) | ||||
|         self.db.stream_insert(path, start, end, body, binary) | ||||
|  | ||||
|         # Done | ||||
|         return | ||||
| @@ -335,24 +264,34 @@ class Stream(NilmApp): | ||||
|     # /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_in() | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror(NilmDBError) | ||||
|     @cherrypy.tools.CORS_allow(methods = ["POST"]) | ||||
|     @chunked_response | ||||
|     @response_type("application/x-json-stream") | ||||
|     def remove(self, path, start = None, end = None): | ||||
|         """ | ||||
|         Remove data from the backend database.  Removes all data in | ||||
|         the interval [start, end).  Returns the number of data points | ||||
|         removed. | ||||
|         the interval [start, end). | ||||
|  | ||||
|         Returns the number of data points removed.  Since this is a potentially | ||||
|         long-running operation, multiple numbers may be returned as the | ||||
|         data gets removed from the backend database.  The total number of | ||||
|         points removed is the sum of all of these numbers. | ||||
|         """ | ||||
|         (start, end) = self._get_times(start, end) | ||||
|         total_removed = 0 | ||||
|         while True: | ||||
|             (removed, restart) = self.db.stream_remove(path, start, end) | ||||
|             total_removed += removed | ||||
|             if restart is None: | ||||
|                 break | ||||
|             start = restart | ||||
|         return total_removed | ||||
|  | ||||
|         if len(self.db.stream_list(path = path)) != 1: | ||||
|             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||
|  | ||||
|         @workaround_cp_bug_1200 | ||||
|         def content(start, end): | ||||
|             # Note: disable chunked responses to see tracebacks from here. | ||||
|             while True: | ||||
|                 (removed, restart) = self.db.stream_remove(path, start, end) | ||||
|                 yield json.dumps(removed) + "\r\n" | ||||
|                 if restart is None: | ||||
|                     break | ||||
|                 start = restart | ||||
|         return content(start, end) | ||||
|  | ||||
|     # /stream/intervals?path=/newton/prep | ||||
|     # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||
| @@ -399,9 +338,8 @@ class Stream(NilmApp): | ||||
|     # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||
|     @cherrypy.expose | ||||
|     @chunked_response | ||||
|     @response_type("text/plain") | ||||
|     def extract(self, path, start = None, end = None, | ||||
|                 count = False, markup = False): | ||||
|                 count = False, markup = False, binary = False): | ||||
|         """ | ||||
|         Extract data from backend database.  Streams the resulting | ||||
|         entries as ASCII text lines separated by newlines.  This may | ||||
| @@ -412,13 +350,30 @@ class Stream(NilmApp): | ||||
|  | ||||
|         If 'markup' is True, adds comments to the stream denoting each | ||||
|         interval's start and end timestamp. | ||||
|  | ||||
|         If 'binary' is True, return raw binary data, rather than lines | ||||
|         of ASCII-formatted data.  Raw binary data is always | ||||
|         little-endian and matches the database types (including an | ||||
|         int64 timestamp). | ||||
|         """ | ||||
|         binary = bool_param(binary) | ||||
|         markup = bool_param(markup) | ||||
|         count = bool_param(count) | ||||
|  | ||||
|         (start, end) = self._get_times(start, end) | ||||
|  | ||||
|         # Check path and get layout | ||||
|         streams = self.db.stream_list(path = path) | ||||
|         if len(streams) != 1: | ||||
|             raise cherrypy.HTTPError("404 Not Found", "No such stream") | ||||
|         if len(self.db.stream_list(path = path)) != 1: | ||||
|             raise cherrypy.HTTPError("404", "No such stream: " + path) | ||||
|  | ||||
|         if binary: | ||||
|             content_type = "application/octet-stream" | ||||
|             if markup or count: | ||||
|                 raise cherrypy.HTTPError("400", "can't mix binary and " | ||||
|                                          "markup or count modes") | ||||
|         else: | ||||
|             content_type = "text/plain" | ||||
|         cherrypy.response.headers['Content-Type'] = content_type | ||||
|  | ||||
|         @workaround_cp_bug_1200 | ||||
|         def content(start, end): | ||||
| @@ -431,7 +386,8 @@ class Stream(NilmApp): | ||||
|  | ||||
|             while True: | ||||
|                 (data, restart) = self.db.stream_extract( | ||||
|                     path, start, end, count = False, markup = markup) | ||||
|                     path, start, end, count = False, | ||||
|                     markup = markup, binary = binary) | ||||
|                 yield data | ||||
|  | ||||
|                 if restart is None: | ||||
| @@ -531,70 +487,14 @@ class Server(object): | ||||
|  | ||||
|     def json_error_page(self, status, message, traceback, version): | ||||
|         """Return a custom error page in JSON so the client can parse it""" | ||||
|         errordata = { "status" : status, | ||||
|                       "message" : message, | ||||
|                       "traceback" : traceback } | ||||
|         # Don't send a traceback if the error was 400-499 (client's fault) | ||||
|         try: | ||||
|             code = int(status.split()[0]) | ||||
|             if not self.force_traceback: | ||||
|                 if code >= 400 and code <= 499: | ||||
|                     errordata["traceback"] = "" | ||||
|         except Exception: # pragma: no cover | ||||
|             pass | ||||
|         # Override the response type, which was previously set to text/html | ||||
|         cherrypy.serving.response.headers['Content-Type'] = ( | ||||
|             "application/json;charset=utf-8" ) | ||||
|         # Undo the HTML escaping that cherrypy's get_error_page function applies | ||||
|         # (cherrypy issue 1135) | ||||
|         for k, v in errordata.iteritems(): | ||||
|             v = v.replace("<","<") | ||||
|             v = v.replace(">",">") | ||||
|             v = v.replace("&","&") | ||||
|             errordata[k] = v | ||||
|         return json.dumps(errordata, separators=(',',':')) | ||||
|         return json_error_page(status, message, traceback, version, | ||||
|                                self.force_traceback) | ||||
|  | ||||
|     def start(self, blocking = False, event = None): | ||||
|  | ||||
|         if not self.embedded: # pragma: no cover | ||||
|             # Handle signals nicely | ||||
|             if hasattr(cherrypy.engine, "signal_handler"): | ||||
|                 cherrypy.engine.signal_handler.subscribe() | ||||
|             if hasattr(cherrypy.engine, "console_control_handler"): | ||||
|                 cherrypy.engine.console_control_handler.subscribe() | ||||
|  | ||||
|         # Cherrypy stupidly calls os._exit(70) when it can't bind the | ||||
|         # port.  At least try to print a reasonable error and continue | ||||
|         # in this case, rather than just dying silently (as we would | ||||
|         # otherwise do in embedded mode) | ||||
|         real_exit = os._exit | ||||
|         def fake_exit(code): # pragma: no cover | ||||
|             if code == os.EX_SOFTWARE: | ||||
|                 fprintf(sys.stderr, "error: CherryPy called os._exit!\n") | ||||
|             else: | ||||
|                 real_exit(code) | ||||
|         os._exit = fake_exit | ||||
|         cherrypy.engine.start() | ||||
|         os._exit = real_exit | ||||
|  | ||||
|         # Signal that the engine has started successfully | ||||
|         if event is not None: | ||||
|             event.set() | ||||
|  | ||||
|         if blocking: | ||||
|             try: | ||||
|                 cherrypy.engine.wait(cherrypy.engine.states.EXITING, | ||||
|                                      interval = 0.1, channel = 'main') | ||||
|             except (KeyboardInterrupt, IOError): # pragma: no cover | ||||
|                 cherrypy.engine.log('Keyboard Interrupt: shutting down bus') | ||||
|                 cherrypy.engine.exit() | ||||
|             except SystemExit: # pragma: no cover | ||||
|                 cherrypy.engine.log('SystemExit raised: shutting down bus') | ||||
|                 cherrypy.engine.exit() | ||||
|                 raise | ||||
|         cherrypy_start(blocking, event, self.embedded) | ||||
|  | ||||
|     def stop(self): | ||||
|         cherrypy.engine.exit() | ||||
|         cherrypy_stop() | ||||
|  | ||||
| # Use a single global nilmdb.server.NilmDB and nilmdb.server.Server | ||||
| # instance since the database can only be opened once.  For this to | ||||
|   | ||||
							
								
								
									
										214
									
								
								nilmdb/server/serverutil.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										214
									
								
								nilmdb/server/serverutil.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,214 @@ | ||||
| """Miscellaneous decorators and other helpers for running a CherryPy | ||||
| server""" | ||||
|  | ||||
| import cherrypy | ||||
| import sys | ||||
| import os | ||||
| import decorator | ||||
| import simplejson as json | ||||
|  | ||||
| # Helper to parse parameters into booleans | ||||
| def bool_param(s): | ||||
|     """Return a bool indicating whether parameter 's' was True or False, | ||||
|     supporting a few different types for 's'.""" | ||||
|     try: | ||||
|         ss = s.lower() | ||||
|         if ss in [ "0", "false", "f", "no", "n" ]: | ||||
|             return False | ||||
|         if ss in [ "1", "true", "t", "yes", "y" ]: | ||||
|             return True | ||||
|     except Exception: | ||||
|         return bool(s) | ||||
|     raise cherrypy.HTTPError("400 Bad Request", | ||||
|                              "can't parse parameter: " + ss) | ||||
|  | ||||
| # Decorators | ||||
| def chunked_response(func): | ||||
|     """Decorator to enable chunked responses.""" | ||||
|     # Set this to False to get better tracebacks from some requests | ||||
|     # (/stream/extract, /stream/intervals). | ||||
|     func._cp_config = { 'response.stream': True } | ||||
|     return func | ||||
|  | ||||
| def response_type(content_type): | ||||
|     """Return a decorator-generating function that sets the | ||||
|     response type to the specified string.""" | ||||
|     def wrapper(func, *args, **kwargs): | ||||
|         cherrypy.response.headers['Content-Type'] = content_type | ||||
|         return func(*args, **kwargs) | ||||
|     return decorator.decorator(wrapper) | ||||
|  | ||||
| @decorator.decorator | ||||
| def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover | ||||
|     """Decorator to work around CherryPy bug #1200 in a response | ||||
|     generator. | ||||
|  | ||||
|     Even if chunked responses are disabled, LookupError or | ||||
|     UnicodeError exceptions may still be swallowed by CherryPy due to | ||||
|     bug #1200.  This throws them as generic Exceptions instead so that | ||||
|     they make it through. | ||||
|     """ | ||||
|     exc_info = None | ||||
|     try: | ||||
|         for val in func(*args, **kwargs): | ||||
|             yield val | ||||
|     except (LookupError, UnicodeError): | ||||
|         # Re-raise it, but maintain the original traceback | ||||
|         exc_info = sys.exc_info() | ||||
|         new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1])) | ||||
|         raise new_exc, None, exc_info[2] | ||||
|     finally: | ||||
|         del exc_info | ||||
|  | ||||
| def exception_to_httperror(*expected): | ||||
|     """Return a decorator-generating function that catches expected | ||||
|     errors and throws a HTTPError describing it instead. | ||||
|  | ||||
|         @exception_to_httperror(NilmDBError, ValueError) | ||||
|         def foo(): | ||||
|             pass | ||||
|     """ | ||||
|     def wrapper(func, *args, **kwargs): | ||||
|         exc_info = None | ||||
|         try: | ||||
|             return func(*args, **kwargs) | ||||
|         except expected: | ||||
|             # Re-raise it, but maintain the original traceback | ||||
|             exc_info = sys.exc_info() | ||||
|             new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1])) | ||||
|             raise new_exc, None, exc_info[2] | ||||
|         finally: | ||||
|             del exc_info | ||||
|     # We need to preserve the function's argspecs for CherryPy to | ||||
|     # handle argument errors correctly.  Decorator.decorator takes | ||||
|     # care of that. | ||||
|     return decorator.decorator(wrapper) | ||||
|  | ||||
| # Custom CherryPy tools | ||||
|  | ||||
| def CORS_allow(methods): | ||||
|     """This does several things: | ||||
|  | ||||
|     Handles CORS preflight requests. | ||||
|     Adds Allow: header to all requests. | ||||
|     Raise 405 if request.method not in method. | ||||
|  | ||||
|     It is similar to cherrypy.tools.allow, with the CORS stuff added. | ||||
|  | ||||
|     Add this to CherryPy with: | ||||
|     cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow) | ||||
|     """ | ||||
|     request = cherrypy.request.headers | ||||
|     response = cherrypy.response.headers | ||||
|  | ||||
|     if not isinstance(methods, (tuple, list)): # pragma: no cover | ||||
|         methods = [ methods ] | ||||
|     methods = [ m.upper() for m in methods if m ] | ||||
|     if not methods: # pragma: no cover | ||||
|         methods = [ 'GET', 'HEAD' ] | ||||
|     elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover | ||||
|         methods.append('HEAD') | ||||
|     response['Allow'] = ', '.join(methods) | ||||
|  | ||||
|     # Allow all origins | ||||
|     if 'Origin' in request: | ||||
|         response['Access-Control-Allow-Origin'] = request['Origin'] | ||||
|  | ||||
|     # If it's a CORS request, send response. | ||||
|     request_method = request.get("Access-Control-Request-Method", None) | ||||
|     request_headers = request.get("Access-Control-Request-Headers", None) | ||||
|     if (cherrypy.request.method == "OPTIONS" and | ||||
|         request_method and request_headers): | ||||
|         response['Access-Control-Allow-Headers'] = request_headers | ||||
|         response['Access-Control-Allow-Methods'] = ', '.join(methods) | ||||
|         # Try to stop further processing and return a 200 OK | ||||
|         cherrypy.response.status = "200 OK" | ||||
|         cherrypy.response.body = "" | ||||
|         cherrypy.request.handler = lambda: "" | ||||
|         return | ||||
|  | ||||
|     # Reject methods that were not explicitly allowed | ||||
|     if cherrypy.request.method not in methods: | ||||
|         raise cherrypy.HTTPError(405) | ||||
|  | ||||
|  | ||||
| # Helper for json_in tool to process JSON data into normal request | ||||
| # parameters. | ||||
| def json_to_request_params(body): | ||||
|     cherrypy.lib.jsontools.json_processor(body) | ||||
|     if not isinstance(cherrypy.request.json, dict): | ||||
|         raise cherrypy.HTTPError(415) | ||||
|     cherrypy.request.params.update(cherrypy.request.json) | ||||
|  | ||||
| # Used as an "error_page.default" handler | ||||
| def json_error_page(status, message, traceback, version, | ||||
|                     force_traceback = False): | ||||
|     """Return a custom error page in JSON so the client can parse it""" | ||||
|     errordata = { "status" : status, | ||||
|                   "message" : message, | ||||
|                   "traceback" : traceback } | ||||
|     # Don't send a traceback if the error was 400-499 (client's fault) | ||||
|     try: | ||||
|         code = int(status.split()[0]) | ||||
|         if not force_traceback: | ||||
|             if code >= 400 and code <= 499: | ||||
|                 errordata["traceback"] = "" | ||||
|     except Exception: # pragma: no cover | ||||
|         pass | ||||
|     # Override the response type, which was previously set to text/html | ||||
|     cherrypy.serving.response.headers['Content-Type'] = ( | ||||
|         "application/json;charset=utf-8" ) | ||||
|     # Undo the HTML escaping that cherrypy's get_error_page function applies | ||||
|     # (cherrypy issue 1135) | ||||
|     for k, v in errordata.iteritems(): | ||||
|         v = v.replace("<","<") | ||||
|         v = v.replace(">",">") | ||||
|         v = v.replace("&","&") | ||||
|         errordata[k] = v | ||||
|     return json.dumps(errordata, separators=(',',':')) | ||||
|  | ||||
| # Start/stop CherryPy standalone server | ||||
| def cherrypy_start(blocking = False, event = False, embedded = False): | ||||
|     """Start the CherryPy server, handling errors and signals | ||||
|     somewhat gracefully.""" | ||||
|  | ||||
|     if not embedded: # pragma: no cover | ||||
|         # Handle signals nicely | ||||
|         if hasattr(cherrypy.engine, "signal_handler"): | ||||
|             cherrypy.engine.signal_handler.subscribe() | ||||
|         if hasattr(cherrypy.engine, "console_control_handler"): | ||||
|             cherrypy.engine.console_control_handler.subscribe() | ||||
|  | ||||
|     # Cherrypy stupidly calls os._exit(70) when it can't bind the | ||||
|     # port.  At least try to print a reasonable error and continue | ||||
|     # in this case, rather than just dying silently (as we would | ||||
|     # otherwise do in embedded mode) | ||||
|     real_exit = os._exit | ||||
|     def fake_exit(code): # pragma: no cover | ||||
|         if code == os.EX_SOFTWARE: | ||||
|             fprintf(sys.stderr, "error: CherryPy called os._exit!\n") | ||||
|         else: | ||||
|             real_exit(code) | ||||
|     os._exit = fake_exit | ||||
|     cherrypy.engine.start() | ||||
|     os._exit = real_exit | ||||
|  | ||||
|     # Signal that the engine has started successfully | ||||
|     if event is not None: | ||||
|         event.set() | ||||
|  | ||||
|     if blocking: | ||||
|         try: | ||||
|             cherrypy.engine.wait(cherrypy.engine.states.EXITING, | ||||
|                                  interval = 0.1, channel = 'main') | ||||
|         except (KeyboardInterrupt, IOError): # pragma: no cover | ||||
|             cherrypy.engine.log('Keyboard Interrupt: shutting down bus') | ||||
|             cherrypy.engine.exit() | ||||
|         except SystemExit: # pragma: no cover | ||||
|             cherrypy.engine.log('SystemExit raised: shutting down bus') | ||||
|             cherrypy.engine.exit() | ||||
|             raise | ||||
|  | ||||
| # Stop CherryPy server | ||||
| def cherrypy_stop(): | ||||
|     cherrypy.engine.exit() | ||||
| @@ -13,3 +13,5 @@ import nilmdb.utils.time | ||||
| import nilmdb.utils.iterator | ||||
| import nilmdb.utils.interval | ||||
| import nilmdb.utils.lock | ||||
| import nilmdb.utils.sort | ||||
| import nilmdb.utils.unicode | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| import os | ||||
| import errno | ||||
| from math import log | ||||
|  | ||||
| def human_size(num): | ||||
| @@ -16,10 +17,17 @@ def human_size(num): | ||||
|         return '1 byte' | ||||
|  | ||||
| def du(path): | ||||
|     """Like du -sb, returns total size of path in bytes.""" | ||||
|     size = os.path.getsize(path) | ||||
|     if os.path.isdir(path): | ||||
|         for thisfile in os.listdir(path): | ||||
|             filepath = os.path.join(path, thisfile) | ||||
|             size += du(filepath) | ||||
|     return size | ||||
|     """Like du -sb, returns total size of path in bytes.  Ignore | ||||
|     errors that might occur if we encounter broken symlinks or | ||||
|     files in the process of being removed.""" | ||||
|     try: | ||||
|         size = os.path.getsize(path) | ||||
|         if os.path.isdir(path): | ||||
|             for thisfile in os.listdir(path): | ||||
|                 filepath = os.path.join(path, thisfile) | ||||
|                 size += du(filepath) | ||||
|         return size | ||||
|     except OSError as e: # pragma: no cover | ||||
|         if e.errno != errno.ENOENT: | ||||
|             raise | ||||
|         return 0 | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| """Interval.  Like nilmdb.server.interval, but re-implemented here | ||||
| in plain Python so clients have easier access to it. | ||||
| in plain Python so clients have easier access to it, and with a few | ||||
| helper functions. | ||||
|  | ||||
| Intervals are half-open, ie. they include data points with timestamps | ||||
| [start, end) | ||||
| @@ -34,6 +35,10 @@ class Interval: | ||||
|         return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) + | ||||
|                 " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")") | ||||
|  | ||||
|     def human_string(self): | ||||
|         return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) + | ||||
|                 " -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]") | ||||
|  | ||||
|     def __cmp__(self, other): | ||||
|         """Compare two intervals.  If non-equal, order by start then end""" | ||||
|         return cmp(self.start, other.start) or cmp(self.end, other.end) | ||||
| @@ -53,18 +58,11 @@ class Interval: | ||||
|             raise IntervalError("not a subset") | ||||
|         return Interval(start, end) | ||||
|  | ||||
| def set_difference(a, b): | ||||
|     """ | ||||
|     Compute the difference (a \\ b) between the intervals in 'a' and | ||||
|     the intervals in 'b'; i.e., the ranges that are present in 'self' | ||||
|     but not 'other'. | ||||
|  | ||||
|     'a' and 'b' must both be iterables. | ||||
|  | ||||
|     Returns a generator that yields each interval in turn. | ||||
|     Output intervals are built as subsets of the intervals in the | ||||
|     first argument (a). | ||||
|     """ | ||||
| def _interval_math_helper(a, b, op, subset = True): | ||||
|     """Helper for set_difference, intersection functions, | ||||
|     to compute interval subsets based on a math operator on ranges | ||||
|     present in A and B.  Subsets are computed from A, or new intervals | ||||
|     are generated if subset = False.""" | ||||
|     # Iterate through all starts and ends in sorted order.  Add a | ||||
|     # tag to the iterator so that we can figure out which one they | ||||
|     # were, after sorting. | ||||
| @@ -79,28 +77,71 @@ def set_difference(a, b): | ||||
|     # At each point, evaluate which type of end it is, to determine | ||||
|     # how to build up the output intervals. | ||||
|     a_interval = None | ||||
|     b_interval = None | ||||
|     in_a = False | ||||
|     in_b = False | ||||
|     out_start = None | ||||
|     for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter): | ||||
|         if k == 0: | ||||
|             # start a interval | ||||
|             a_interval = i | ||||
|             if b_interval is None: | ||||
|                 out_start = ts | ||||
|             in_a = True | ||||
|         elif k == 1: | ||||
|             # start b interval | ||||
|             b_interval = i | ||||
|             if out_start is not None and out_start != ts: | ||||
|                 yield a_interval.subset(out_start, ts) | ||||
|             out_start = None | ||||
|             in_b = True | ||||
|         elif k == 2: | ||||
|             # end a interval | ||||
|             if out_start is not None and out_start != ts: | ||||
|                 yield a_interval.subset(out_start, ts) | ||||
|             out_start = None | ||||
|             a_interval = None | ||||
|             in_a = False | ||||
|         elif k == 3: | ||||
|             # end b interval | ||||
|             b_interval = None | ||||
|             if a_interval: | ||||
|                 out_start = ts | ||||
|             in_b = False | ||||
|         include = op(in_a, in_b) | ||||
|         if include and out_start is None: | ||||
|             out_start = ts | ||||
|         elif not include: | ||||
|             if out_start is not None and out_start != ts: | ||||
|                 if subset: | ||||
|                     yield a_interval.subset(out_start, ts) | ||||
|                 else: | ||||
|                     yield Interval(out_start, ts) | ||||
|             out_start = None | ||||
|  | ||||
| def set_difference(a, b): | ||||
|     """ | ||||
|     Compute the difference (a \\ b) between the intervals in 'a' and | ||||
|     the intervals in 'b'; i.e., the ranges that are present in 'self' | ||||
|     but not 'other'. | ||||
|  | ||||
|     'a' and 'b' must both be iterables. | ||||
|  | ||||
|     Returns a generator that yields each interval in turn. | ||||
|     Output intervals are built as subsets of the intervals in the | ||||
|     first argument (a). | ||||
|     """ | ||||
|     return _interval_math_helper(a, b, (lambda a, b: a and not b)) | ||||
|  | ||||
| def intersection(a, b): | ||||
|     """ | ||||
|     Compute the intersection between the intervals in 'a' and the | ||||
|     intervals in 'b'; i.e., the ranges that are present in both 'a' | ||||
|     and 'b'. | ||||
|  | ||||
|     'a' and 'b' must both be iterables. | ||||
|  | ||||
|     Returns a generator that yields each interval in turn. | ||||
|     Output intervals are built as subsets of the intervals in the | ||||
|     first argument (a). | ||||
|     """ | ||||
|     return _interval_math_helper(a, b, (lambda a, b: a and b)) | ||||
|  | ||||
| def optimize(it): | ||||
|     """ | ||||
|     Given an iterable 'it' with intervals, optimize them by joining | ||||
|     together intervals that are adjacent in time, and return a generator | ||||
|     that yields the new intervals. | ||||
|     """ | ||||
|     saved_int = None | ||||
|     for interval in it: | ||||
|         if saved_int is not None: | ||||
|             if saved_int.end == interval.start: | ||||
|                 interval.start = saved_int.start | ||||
|             else: | ||||
|                 yield saved_int | ||||
|         saved_int = interval | ||||
|     if saved_int is not None: | ||||
|         yield saved_int | ||||
|   | ||||
| @@ -91,6 +91,20 @@ def serializer_proxy(obj_or_type): | ||||
|             r = SerializerCallProxy(self.__call_queue, attr, self) | ||||
|             return r | ||||
|  | ||||
|         # For an interable object, on __iter__(), save the object's | ||||
|         # iterator and return this proxy.  On next(), call the object's | ||||
|         # iterator through this proxy. | ||||
|         def __iter__(self): | ||||
|             attr = getattr(self.__object, "__iter__") | ||||
|             self.__iter = SerializerCallProxy(self.__call_queue, attr, self)() | ||||
|             return self | ||||
|         def next(self): | ||||
|             return SerializerCallProxy(self.__call_queue, | ||||
|                                        self.__iter.next, self)() | ||||
|  | ||||
|         def __getitem__(self, key): | ||||
|             return self.__getattr__("__getitem__")(key) | ||||
|  | ||||
|         def __call__(self, *args, **kwargs): | ||||
|             """Call this to instantiate the type, if a type was passed | ||||
|             to serializer_proxy.  Otherwise, pass the call through.""" | ||||
|   | ||||
							
								
								
									
										18
									
								
								nilmdb/utils/sort.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										18
									
								
								nilmdb/utils/sort.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,18 @@ | ||||
| import re | ||||
|  | ||||
| def sort_human(items, key = None): | ||||
|     """Human-friendly sort (/stream/2 before /stream/10)""" | ||||
|     def to_num(val): | ||||
|         try: | ||||
|             return int(val) | ||||
|         except Exception: | ||||
|             return val | ||||
|  | ||||
|     def human_key(text): | ||||
|         if key: | ||||
|             text = key(text) | ||||
|         # Break into character and numeric chunks. | ||||
|         chunks = re.split(r'([0-9]+)', text) | ||||
|         return [ to_num(c) for c in chunks ] | ||||
|  | ||||
|     return sorted(items, key = human_key) | ||||
| @@ -6,7 +6,7 @@ import time | ||||
|  | ||||
| # Range | ||||
| min_timestamp = (-2**63) | ||||
| max_timestamp = (2**62 - 1) | ||||
| max_timestamp = (2**63 - 1) | ||||
|  | ||||
| # Smallest representable step | ||||
| epsilon = 1 | ||||
| @@ -32,6 +32,10 @@ def timestamp_to_human(timestamp): | ||||
|     """Convert a timestamp (integer microseconds since epoch) to a | ||||
|     human-readable string, using the local timezone for display | ||||
|     (e.g. from the TZ env var).""" | ||||
|     if timestamp == min_timestamp: | ||||
|         return "(minimum)" | ||||
|     if timestamp == max_timestamp: | ||||
|         return "(maximum)" | ||||
|     dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_unix(timestamp)) | ||||
|     return dt.strftime("%a, %d %b %Y %H:%M:%S.%f %z") | ||||
|  | ||||
| @@ -56,7 +60,7 @@ def rate_to_period(hz, cycles = 1): | ||||
| def parse_time(toparse): | ||||
|     """ | ||||
|     Parse a free-form time string and return a nilmdb timestamp | ||||
|     (integer seconds since epoch).  If the string doesn't contain a | ||||
|     (integer microseconds since epoch).  If the string doesn't contain a | ||||
|     timestamp, the current local timezone is assumed (e.g. from the TZ | ||||
|     env var). | ||||
|     """ | ||||
| @@ -65,6 +69,14 @@ def parse_time(toparse): | ||||
|     if toparse == "max": | ||||
|         return max_timestamp | ||||
|  | ||||
|     # If it starts with @, treat it as a NILM timestamp | ||||
|     # (integer microseconds since epoch) | ||||
|     try: | ||||
|         if toparse[0] == '@': | ||||
|             return int(toparse[1:]) | ||||
|     except (ValueError, KeyError, IndexError): | ||||
|         pass | ||||
|  | ||||
|     # If string isn't "now" and doesn't contain at least 4 digits, | ||||
|     # consider it invalid.  smartparse might otherwise accept | ||||
|     # empty strings and strings with just separators. | ||||
| @@ -78,14 +90,6 @@ def parse_time(toparse): | ||||
|     except (ValueError, OverflowError): | ||||
|         pass | ||||
|  | ||||
|     # If it starts with @, treat it as a NILM timestamp | ||||
|     # (integer microseconds since epoch) | ||||
|     try: | ||||
|         if toparse[0] == '@': | ||||
|             return int(toparse[1:]) | ||||
|     except (ValueError, KeyError): | ||||
|         pass | ||||
|  | ||||
|     # If it's parseable as a float, treat it as a Unix or NILM | ||||
|     # timestamp based on its range. | ||||
|     try: | ||||
|   | ||||
							
								
								
									
										29
									
								
								nilmdb/utils/unicode.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								nilmdb/utils/unicode.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| import sys | ||||
|  | ||||
| if sys.version_info[0] >= 3: # pragma: no cover (future Python3 compat) | ||||
|     text_type = str | ||||
| else: | ||||
|     text_type = unicode | ||||
|  | ||||
| def encode(u): | ||||
|     """Try to encode something from Unicode to a string using the | ||||
|     default encoding.  If it fails, try encoding as UTF-8.""" | ||||
|     if not isinstance(u, text_type): | ||||
|         return u | ||||
|     try: | ||||
|         return u.encode() | ||||
|     except UnicodeEncodeError: | ||||
|         return u.encode("utf-8") | ||||
|  | ||||
| def decode(s): | ||||
|     """Try to decode someting from string to Unicode using the | ||||
|     default encoding.  If it fails, try decoding as UTF-8.""" | ||||
|     if isinstance(s, text_type): | ||||
|         return s | ||||
|     try: | ||||
|         return s.decode() | ||||
|     except UnicodeDecodeError: | ||||
|         try: | ||||
|             return s.decode("utf-8") | ||||
|         except UnicodeDecodeError: | ||||
|             return s # best we can do | ||||
							
								
								
									
										5
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								setup.py
									
									
									
									
									
								
							| @@ -107,17 +107,18 @@ setup(name='nilmdb', | ||||
|       author_email = 'jim@jtan.com', | ||||
|       tests_require = [ 'nose', | ||||
|                         'coverage', | ||||
|                         'numpy', | ||||
|                         ], | ||||
|       setup_requires = [ 'distribute', | ||||
|                          ], | ||||
|       install_requires = [ 'decorator', | ||||
|                            'cherrypy >= 3.2', | ||||
|                            'simplejson', | ||||
|                            'pycurl', | ||||
|                            'python-dateutil', | ||||
|                            'pytz', | ||||
|                            'psutil >= 0.3.0', | ||||
|                            'requests >= 1.1.0, < 2.0.0', | ||||
|                            'progressbar >= 2.2', | ||||
|                            ], | ||||
|       packages = [ 'nilmdb', | ||||
|                    'nilmdb.utils', | ||||
| @@ -126,11 +127,13 @@ setup(name='nilmdb', | ||||
|                    'nilmdb.client', | ||||
|                    'nilmdb.cmdline', | ||||
|                    'nilmdb.scripts', | ||||
|                    'nilmdb.fsck', | ||||
|                    ], | ||||
|       entry_points = { | ||||
|           'console_scripts': [ | ||||
|               'nilmtool = nilmdb.scripts.nilmtool:main', | ||||
|               'nilmdb-server = nilmdb.scripts.nilmdb_server:main', | ||||
|               'nilmdb-fsck = nilmdb.scripts.nilmdb_fsck:main', | ||||
|               ], | ||||
|           }, | ||||
|       ext_modules = ext_modules, | ||||
|   | ||||
							
								
								
									
										8
									
								
								tests/data/timestamped
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								tests/data/timestamped
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| -10000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| -100000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| -100000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| -1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| 1 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| 1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| 1000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| 1000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03   | ||||
| @@ -12,6 +12,7 @@ test_interval.py | ||||
| test_bulkdata.py | ||||
| test_nilmdb.py | ||||
| test_client.py | ||||
| test_numpyclient.py | ||||
| test_cmdline.py | ||||
|  | ||||
| test_*.py | ||||
|   | ||||
| @@ -69,9 +69,9 @@ class TestBulkData(object): | ||||
|         raw = [] | ||||
|         for i in range(1000): | ||||
|             raw.append("%d 1 2 3 4 5 6 7 8\n" % (10000 + i)) | ||||
|         node.append_string("".join(raw[0:1]), 0, 50000) | ||||
|         node.append_string("".join(raw[1:100]), 0, 50000) | ||||
|         node.append_string("".join(raw[100:]), 0, 50000) | ||||
|         node.append_data("".join(raw[0:1]), 0, 50000) | ||||
|         node.append_data("".join(raw[1:100]), 0, 50000) | ||||
|         node.append_data("".join(raw[100:]), 0, 50000) | ||||
|  | ||||
|         misc_slices = [ 0, 100, slice(None), slice(0), slice(10), | ||||
|                         slice(5,10), slice(3,None), slice(3,-3), | ||||
| @@ -85,7 +85,7 @@ class TestBulkData(object): | ||||
|         # Extract misc slices while appending, to make sure the | ||||
|         # data isn't being added in the middle of the file | ||||
|         for s in [2, slice(1,5), 2, slice(1,5)]: | ||||
|             node.append_string("0 0 0 0 0 0 0 0 0\n", 0, 50000) | ||||
|             node.append_data("0 0 0 0 0 0 0 0 0\n", 0, 50000) | ||||
|             raw.append("0 0 0 0 0 0 0 0 0\n") | ||||
|             eq_(get_node_slice(s), raw[s]) | ||||
|  | ||||
|   | ||||
| @@ -23,6 +23,7 @@ import warnings | ||||
| import resource | ||||
| import time | ||||
| import re | ||||
| import struct | ||||
|  | ||||
| from testutil.helpers import * | ||||
|  | ||||
| @@ -104,16 +105,19 @@ class TestClient(object): | ||||
|                 client.http.post("/stream/list") | ||||
|         client = nilmdb.client.Client(url = testurl) | ||||
|  | ||||
|         # Create three streams | ||||
|         # Create four streams | ||||
|         client.stream_create("/newton/prep", "float32_8") | ||||
|         client.stream_create("/newton/raw", "uint16_6") | ||||
|         client.stream_create("/newton/zzz/rawnotch", "uint16_9") | ||||
|         client.stream_create("/newton/zzz/rawnotch2", "uint16_9") | ||||
|         client.stream_create("/newton/zzz/rawnotch11", "uint16_9") | ||||
|  | ||||
|         # Verify we got 3 streams | ||||
|         # Verify we got 4 streams in the right order | ||||
|         eq_(client.stream_list(), [ ["/newton/prep", "float32_8"], | ||||
|                                     ["/newton/raw", "uint16_6"], | ||||
|                                     ["/newton/zzz/rawnotch", "uint16_9"] | ||||
|                                     ["/newton/zzz/rawnotch2", "uint16_9"], | ||||
|                                     ["/newton/zzz/rawnotch11", "uint16_9"] | ||||
|                                     ]) | ||||
|  | ||||
|         # Match just one type or one path | ||||
|         eq_(client.stream_list(layout="uint16_6"), | ||||
|             [ ["/newton/raw", "uint16_6"] ]) | ||||
| @@ -238,6 +242,35 @@ class TestClient(object): | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("start must precede end", str(e.exception)) | ||||
|  | ||||
|         # Invalid times in HTTP request | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put("stream/insert", "", { "path": "/newton/prep", | ||||
|                                                    "start": "asdf", "end": 0 }) | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("invalid start", str(e.exception)) | ||||
|  | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put("stream/insert", "", { "path": "/newton/prep", | ||||
|                                                    "start": 0, "end": "asdf" }) | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("invalid end", str(e.exception)) | ||||
|  | ||||
|         # Good content type | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put("stream/insert", "", | ||||
|                             { "path": "xxxx", "start": 0, "end": 1, | ||||
|                               "binary": 1 }, | ||||
|                             binary = True) | ||||
|         in_("No such stream", str(e.exception)) | ||||
|  | ||||
|         # Bad content type | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put("stream/insert", "", | ||||
|                             { "path": "xxxx", "start": 0, "end": 1, | ||||
|                               "binary": 1 }, | ||||
|                             binary = False) | ||||
|         in_("Content type must be application/octet-stream", str(e.exception)) | ||||
|  | ||||
|         # Specify start/end (starts too late) | ||||
|         data = timestamper.TimestamperRate(testfile, start, 120) | ||||
|         with assert_raises(ClientError) as e: | ||||
| @@ -293,6 +326,27 @@ class TestClient(object): | ||||
|         # Test count | ||||
|         eq_(client.stream_count("/newton/prep"), 14400) | ||||
|  | ||||
|         # Test binary output | ||||
|         with assert_raises(ClientError) as e: | ||||
|             list(client.stream_extract("/newton/prep", | ||||
|                                        markup = True, binary = True)) | ||||
|         with assert_raises(ClientError) as e: | ||||
|             list(client.stream_extract("/newton/prep", | ||||
|                                        count = True, binary = True)) | ||||
|         data = "".join(client.stream_extract("/newton/prep", binary = True)) | ||||
|         # Quick check using struct | ||||
|         unpacker = struct.Struct("<qffffffff") | ||||
|         out = [] | ||||
|         for i in range(14400): | ||||
|             out.append(unpacker.unpack_from(data, i * unpacker.size)) | ||||
|         eq_(out[0], (1332511200000000, 266568.0, 224029.0, 5161.39990234375, | ||||
|                      2525.169921875, 8350.83984375, 3724.699951171875, | ||||
|                      1355.3399658203125, 2039.0)) | ||||
|  | ||||
|         # Just get some coverage | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.post("/stream/remove", { "path": "/none" }) | ||||
|  | ||||
|         client.close() | ||||
|  | ||||
|     def test_client_06_generators(self): | ||||
| @@ -313,10 +367,6 @@ class TestClient(object): | ||||
|         with assert_raises(ServerError) as e: | ||||
|             client.http.get_gen("http://nosuchurl.example.com./").next() | ||||
|  | ||||
|         # Trigger a curl error in generator | ||||
|         with assert_raises(ServerError) as e: | ||||
|             client.http.get_gen("http://nosuchurl.example.com./").next() | ||||
|  | ||||
|         # Check 404 for missing streams | ||||
|         for function in [ client.stream_intervals, client.stream_extract ]: | ||||
|             with assert_raises(ClientError) as e: | ||||
| @@ -355,16 +405,38 @@ class TestClient(object): | ||||
|                                  headers()) | ||||
|  | ||||
|         # Extract | ||||
|         x = http.get("stream/extract", | ||||
|                             { "path": "/newton/prep", | ||||
|                               "start": "123", | ||||
|                               "end": "124" }) | ||||
|         x = http.get("stream/extract", { "path": "/newton/prep", | ||||
|                                          "start": "123", "end": "124" }) | ||||
|         if "transfer-encoding: chunked" not in headers(): | ||||
|             warnings.warn("Non-chunked HTTP response for /stream/extract") | ||||
|         if "content-type: text/plain;charset=utf-8" not in headers(): | ||||
|             raise AssertionError("/stream/extract is not text/plain:\n" + | ||||
|                                  headers()) | ||||
|  | ||||
|         x = http.get("stream/extract", { "path": "/newton/prep", | ||||
|                                          "start": "123", "end": "124", | ||||
|                                          "binary": "1" }) | ||||
|         if "transfer-encoding: chunked" not in headers(): | ||||
|             warnings.warn("Non-chunked HTTP response for /stream/extract") | ||||
|         if "content-type: application/octet-stream" not in headers(): | ||||
|             raise AssertionError("/stream/extract is not binary:\n" + | ||||
|                                  headers()) | ||||
|  | ||||
|         # Make sure a binary of "0" is really off | ||||
|         x = http.get("stream/extract", { "path": "/newton/prep", | ||||
|                                          "start": "123", "end": "124", | ||||
|                                          "binary": "0" }) | ||||
|         if "content-type: application/octet-stream" in headers(): | ||||
|                     raise AssertionError("/stream/extract is not text:\n" + | ||||
|                                          headers()) | ||||
|  | ||||
|         # Invalid parameters | ||||
|         with assert_raises(ClientError) as e: | ||||
|             x = http.get("stream/extract", { "path": "/newton/prep", | ||||
|                                              "start": "123", "end": "124", | ||||
|                                              "binary": "asdfasfd" }) | ||||
|         in_("can't parse parameter", str(e.exception)) | ||||
|  | ||||
|         client.close() | ||||
|  | ||||
|     def test_client_08_unicode(self): | ||||
| @@ -441,72 +513,75 @@ class TestClient(object): | ||||
|             # override _max_data to trigger frequent server updates | ||||
|             ctx._max_data = 15 | ||||
|  | ||||
|             ctx.insert("100 1\n") | ||||
|             ctx.insert("1000 1\n") | ||||
|  | ||||
|             ctx.insert("101 ") | ||||
|             ctx.insert("1\n102 1") | ||||
|             ctx.insert("1010 ") | ||||
|             ctx.insert("1\n1020 1") | ||||
|             ctx.insert("") | ||||
|             ctx.insert("\n103 1\n") | ||||
|             ctx.insert("\n1030 1\n") | ||||
|  | ||||
|             ctx.insert("104 1\n") | ||||
|             ctx.insert("1040 1\n") | ||||
|             ctx.insert("# hello\n") | ||||
|             ctx.insert("   # hello\n") | ||||
|             ctx.insert("  105 1\n") | ||||
|             ctx.insert("  1050 1\n") | ||||
|             ctx.finalize() | ||||
|  | ||||
|             ctx.insert("107 1\n") | ||||
|             ctx.update_end(108) | ||||
|             ctx.insert("1070 1\n") | ||||
|             ctx.update_end(1080) | ||||
|             ctx.finalize() | ||||
|             ctx.update_start(109) | ||||
|             ctx.insert("110 1\n") | ||||
|             ctx.insert("111 1\n") | ||||
|             ctx.update_start(1090) | ||||
|             ctx.insert("1100 1\n") | ||||
|             ctx.insert("1110 1\n") | ||||
|             ctx.send() | ||||
|             ctx.insert("112 1\n") | ||||
|             ctx.insert("113 1\n") | ||||
|             ctx.insert("114 1\n") | ||||
|             ctx.update_end(116) | ||||
|             ctx.insert("115 1\n") | ||||
|             ctx.update_end(117) | ||||
|             ctx.insert("116 1\n") | ||||
|             ctx.update_end(118) | ||||
|             ctx.insert("117 1" + | ||||
|             ctx.insert("1120 1\n") | ||||
|             ctx.insert("1130 1\n") | ||||
|             ctx.insert("1140 1\n") | ||||
|             ctx.update_end(1160) | ||||
|             ctx.insert("1150 1\n") | ||||
|             ctx.update_end(1170) | ||||
|             ctx.insert("1160 1\n") | ||||
|             ctx.update_end(1180) | ||||
|             ctx.insert("1170 1" + | ||||
|                        " # this is super long" * 100 + | ||||
|                        "\n") | ||||
|             ctx.finalize() | ||||
|             ctx.insert("# this is super long" * 100) | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_context("/context/test", 100, 200) as ctx: | ||||
|                 ctx.insert("118 1\n") | ||||
|             with client.stream_insert_context("/context/test", | ||||
|                                               1000, 2000) as ctx: | ||||
|                 ctx.insert("1180 1\n") | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_context("/context/test", 200, 300) as ctx: | ||||
|                 ctx.insert("118 1\n") | ||||
|             with client.stream_insert_context("/context/test", | ||||
|                                               2000, 3000) as ctx: | ||||
|                 ctx.insert("1180 1\n") | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_context("/context/test") as ctx: | ||||
|                 ctx.insert("bogus data\n") | ||||
|  | ||||
|         with client.stream_insert_context("/context/test", 200, 300) as ctx: | ||||
|         with client.stream_insert_context("/context/test", 2000, 3000) as ctx: | ||||
|             # make sure our override wasn't permanent | ||||
|             ne_(ctx._max_data, 15) | ||||
|             ctx.insert("225 1\n") | ||||
|             ctx.insert("2250 1\n") | ||||
|             ctx.finalize() | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_context("/context/test", 300, 400) as ctx: | ||||
|                 ctx.insert("301 1\n") | ||||
|                 ctx.insert("302 2\n") | ||||
|                 ctx.insert("303 3\n") | ||||
|                 ctx.insert("304 4\n") | ||||
|                 ctx.insert("304 4\n") # non-monotonic after a few lines | ||||
|             with client.stream_insert_context("/context/test", | ||||
|                                               3000, 4000) as ctx: | ||||
|                 ctx.insert("3010 1\n") | ||||
|                 ctx.insert("3020 2\n") | ||||
|                 ctx.insert("3030 3\n") | ||||
|                 ctx.insert("3040 4\n") | ||||
|                 ctx.insert("3040 4\n") # non-monotonic after a few lines | ||||
|                 ctx.finalize() | ||||
|  | ||||
|         eq_(list(client.stream_intervals("/context/test")), | ||||
|             [ [ 100, 106 ], | ||||
|               [ 107, 108 ], | ||||
|               [ 109, 118 ], | ||||
|               [ 200, 300 ] ]) | ||||
|             [ [ 1000, 1051 ], | ||||
|               [ 1070, 1080 ], | ||||
|               [ 1090, 1180 ], | ||||
|               [ 2000, 3000 ] ]) | ||||
|  | ||||
|         # destroy stream (try without removing data first) | ||||
|         with assert_raises(ClientError): | ||||
| @@ -565,8 +640,12 @@ class TestClient(object): | ||||
|         with client.stream_insert_context("/empty/test", end = 950): | ||||
|             pass | ||||
|  | ||||
|         # Equal start and end is OK as long as there's no data | ||||
|         with client.stream_insert_context("/empty/test", start=9, end=9): | ||||
|             pass | ||||
|  | ||||
|         # Try various things that might cause problems | ||||
|         with client.stream_insert_context("/empty/test", 1000, 1050): | ||||
|         with client.stream_insert_context("/empty/test", 1000, 1050) as ctx: | ||||
|             ctx.finalize() # inserts [1000, 1050] | ||||
|             ctx.finalize() # nothing | ||||
|             ctx.finalize() # nothing | ||||
|   | ||||
| @@ -59,8 +59,7 @@ class TestCmdline(object): | ||||
|  | ||||
|     def run(self, arg_string, infile=None, outfile=None): | ||||
|         """Run a cmdline client with the specified argument string, | ||||
|         passing the given input.  Returns a tuple with the output and | ||||
|         exit code""" | ||||
|         passing the given input.  Save the output and exit code.""" | ||||
|         # printf("TZ=UTC ./nilmtool.py %s\n", arg_string) | ||||
|         os.environ['NILMDB_URL'] = "http://localhost:32180/" | ||||
|         class stdio_wrapper: | ||||
| @@ -88,7 +87,7 @@ class TestCmdline(object): | ||||
|                 sys.exit(0) | ||||
|             except SystemExit as e: | ||||
|                 exitcode = e.code | ||||
|         captured = outfile.getvalue() | ||||
|         captured = nilmdb.utils.unicode.decode(outfile.getvalue()) | ||||
|         self.captured = captured | ||||
|         self.exitcode = exitcode | ||||
|  | ||||
| @@ -160,6 +159,12 @@ class TestCmdline(object): | ||||
|         self.ok("--help") | ||||
|         self.contain("usage:") | ||||
|  | ||||
|         # help | ||||
|         self.ok("--version") | ||||
|         ver = self.captured | ||||
|         self.ok("list --version") | ||||
|         eq_(self.captured, ver) | ||||
|  | ||||
|         # fail for no args | ||||
|         self.fail("") | ||||
|  | ||||
| @@ -245,8 +250,10 @@ class TestCmdline(object): | ||||
|         self.contain("Client version: " + nilmdb.__version__) | ||||
|         self.contain("Server version: " + test_server.version) | ||||
|         self.contain("Server database path") | ||||
|         self.contain("Server database size") | ||||
|         self.contain("Server database free space") | ||||
|         self.contain("Server disk space used by NilmDB") | ||||
|         self.contain("Server disk space used by other") | ||||
|         self.contain("Server disk space reserved") | ||||
|         self.contain("Server disk space free") | ||||
|  | ||||
|     def test_04_createlist(self): | ||||
|         # Basic stream tests, like those in test_client. | ||||
| @@ -300,38 +307,19 @@ class TestCmdline(object): | ||||
|  | ||||
|         # Verify we got those 3 streams and they're returned in | ||||
|         # alphabetical order. | ||||
|         self.ok("list") | ||||
|         self.ok("list -l") | ||||
|         self.match("/newton/prep float32_8\n" | ||||
|                    "/newton/raw uint16_6\n" | ||||
|                    "/newton/zzz/rawnotch uint16_9\n") | ||||
|  | ||||
|         # Match just one type or one path.  Also check | ||||
|         # that --path is optional | ||||
|         self.ok("list --path /newton/raw") | ||||
|         self.match("/newton/raw uint16_6\n") | ||||
|  | ||||
|         self.ok("list /newton/raw") | ||||
|         self.match("/newton/raw uint16_6\n") | ||||
|  | ||||
|         self.fail("list -p /newton/raw /newton/raw") | ||||
|         self.contain("too many paths") | ||||
|  | ||||
|         self.ok("list --layout uint16_6") | ||||
|         self.ok("list --layout /newton/raw") | ||||
|         self.match("/newton/raw uint16_6\n") | ||||
|  | ||||
|         # Wildcard matches | ||||
|         self.ok("list --layout uint16*") | ||||
|         self.match("/newton/raw uint16_6\n" | ||||
|                    "/newton/zzz/rawnotch uint16_9\n") | ||||
|  | ||||
|         self.ok("list --path *zzz* --layout uint16*") | ||||
|         self.match("/newton/zzz/rawnotch uint16_9\n") | ||||
|  | ||||
|         self.ok("list *zzz* --layout uint16*") | ||||
|         self.match("/newton/zzz/rawnotch uint16_9\n") | ||||
|  | ||||
|         self.ok("list --path *zzz* --layout float32*") | ||||
|         self.match("") | ||||
|         self.ok("list *zzz*") | ||||
|         self.match("/newton/zzz/rawnotch\n") | ||||
|  | ||||
|         # reversed range | ||||
|         self.fail("list /newton/prep --start 2020-01-01 --end 2000-01-01") | ||||
| @@ -369,6 +357,8 @@ class TestCmdline(object): | ||||
|         self.contain("No stream at path") | ||||
|         self.fail("metadata /newton/nosuchstream --set foo=bar") | ||||
|         self.contain("No stream at path") | ||||
|         self.fail("metadata /newton/nosuchstream --delete") | ||||
|         self.contain("No stream at path") | ||||
|  | ||||
|         self.ok("metadata /newton/prep") | ||||
|         self.match("description=The Data\nv_scale=1.234\n") | ||||
| @@ -394,6 +384,19 @@ class TestCmdline(object): | ||||
|         self.fail("metadata /newton/nosuchpath") | ||||
|         self.contain("No stream at path /newton/nosuchpath") | ||||
|  | ||||
|         self.ok("metadata /newton/prep --delete") | ||||
|         self.ok("metadata /newton/prep --get") | ||||
|         self.match("") | ||||
|         self.ok("metadata /newton/prep --set " | ||||
|                 "'description=The Data' " | ||||
|                 "v_scale=1.234") | ||||
|         self.ok("metadata /newton/prep --delete v_scale") | ||||
|         self.ok("metadata /newton/prep --get") | ||||
|         self.match("description=The Data\n") | ||||
|         self.ok("metadata /newton/prep --set description=") | ||||
|         self.ok("metadata /newton/prep --get") | ||||
|         self.match("") | ||||
|  | ||||
|     def test_06_insert(self): | ||||
|         self.ok("insert --help") | ||||
|  | ||||
| @@ -477,33 +480,40 @@ class TestCmdline(object): | ||||
|         # bad start time | ||||
|         self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null") | ||||
|  | ||||
|         # Test negative times | ||||
|         self.ok("insert --start @-10000000000 --end @1000000001 /newton/prep" | ||||
|                 " tests/data/timestamped") | ||||
|         self.ok("extract -c /newton/prep --start min --end @1000000001") | ||||
|         self.match("8\n") | ||||
|         self.ok("remove /newton/prep --start min --end @1000000001") | ||||
|  | ||||
|     def test_07_detail_extended(self): | ||||
|         # Just count the number of lines, it's probably fine | ||||
|         self.ok("list --detail") | ||||
|         lines_(self.captured, 8) | ||||
|  | ||||
|         self.ok("list --detail --path *prep") | ||||
|         self.ok("list --detail *prep") | ||||
|         lines_(self.captured, 4) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'") | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 10:02'") | ||||
|         lines_(self.captured, 3) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'") | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 10:05'") | ||||
|         lines_(self.captured, 2) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'") | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 10:05:15'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.000") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'") | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.500") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'") | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 19:05:15.50'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("no intervals") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'" | ||||
|         self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'" | ||||
|                 + " --end='23 Mar 2012 10:05:15.51'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.500") | ||||
| @@ -512,15 +522,15 @@ class TestCmdline(object): | ||||
|         lines_(self.captured, 8) | ||||
|  | ||||
|         # Verify the "raw timestamp" output | ||||
|         self.ok("list --detail --path *prep --timestamp-raw " | ||||
|         self.ok("list --detail *prep --timestamp-raw " | ||||
|                 "--start='23 Mar 2012 10:05:15.50'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("[ 1332497115500000 -> 1332497160000000 ]") | ||||
|  | ||||
|         # bad time | ||||
|         self.fail("list --detail --path *prep -T --start='9332497115.612'") | ||||
|         self.fail("list --detail *prep -T --start='9332497115.612'") | ||||
|         # good time | ||||
|         self.ok("list --detail --path *prep -T --start='1332497115.612'") | ||||
|         self.ok("list --detail *prep -T --start='1332497115.612'") | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("[ 1332497115612000 -> 1332497160000000 ]") | ||||
|  | ||||
| @@ -600,11 +610,19 @@ class TestCmdline(object): | ||||
|         test(8, "10:01:59.9", "10:02:00.1", extra="-m") | ||||
|  | ||||
|         # all data put in by tests | ||||
|         self.ok("extract -a /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
|         self.ok("extract -a /newton/prep --start min --end max") | ||||
|         lines_(self.captured, 43204) | ||||
|         self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
|         self.match("43200\n") | ||||
|  | ||||
|         # test binary mode | ||||
|         self.fail("extract -c -B /newton/prep -s min -e max") | ||||
|         self.contain("binary cannot be combined") | ||||
|         self.fail("extract -m -B /newton/prep -s min -e max") | ||||
|         self.contain("binary cannot be combined") | ||||
|         self.ok("extract -B /newton/prep -s min -e max") | ||||
|         eq_(len(self.captured), 43200 * (8 + 8*4)) | ||||
|  | ||||
|         # markup for 3 intervals, plus extra markup lines whenever we had | ||||
|         # a "restart" from the nilmdb.stream_extract function | ||||
|         self.ok("extract -m /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
| @@ -624,7 +642,7 @@ class TestCmdline(object): | ||||
|  | ||||
|         # Try nonexistent stream | ||||
|         self.fail("remove /no/such/foo --start 2000-01-01 --end 2020-01-01") | ||||
|         self.contain("No stream at path") | ||||
|         self.contain("no stream matched path") | ||||
|  | ||||
|         # empty or backward ranges return errors | ||||
|         self.fail("remove /newton/prep --start 2020-01-01 --end 2000-01-01") | ||||
| @@ -652,9 +670,14 @@ class TestCmdline(object): | ||||
|                 "--start '23 Mar 2022 20:00:30' " + | ||||
|                 "--end '23 Mar 2022 20:00:31'") | ||||
|         self.match("0\n") | ||||
|         self.ok("remove -c /newton/prep /newton/pre* " + | ||||
|                 "--start '23 Mar 2022 20:00:30' " + | ||||
|                 "--end '23 Mar 2022 20:00:31'") | ||||
|         self.match("Removing from /newton/prep\n0\n" + | ||||
|                    "Removing from /newton/prep\n0\n") | ||||
|  | ||||
|         # Make sure we have the data we expect | ||||
|         self.ok("list --detail /newton/prep") | ||||
|         self.ok("list -l --detail /newton/prep") | ||||
|         self.match("/newton/prep float32_8\n" + | ||||
|                    "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000" | ||||
|                    " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n" | ||||
| @@ -689,7 +712,7 @@ class TestCmdline(object): | ||||
|         self.match("24000\n") | ||||
|  | ||||
|         # See the missing chunks in list output | ||||
|         self.ok("list --detail /newton/prep") | ||||
|         self.ok("list --layout --detail /newton/prep") | ||||
|         self.match("/newton/prep float32_8\n" + | ||||
|                    "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000" | ||||
|                    " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n" | ||||
| @@ -703,7 +726,7 @@ class TestCmdline(object): | ||||
|         # Remove all data, verify it's missing | ||||
|         self.ok("remove /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
|         self.match("")  # no count requested this time | ||||
|         self.ok("list --detail /newton/prep") | ||||
|         self.ok("list -l --detail /newton/prep") | ||||
|         self.match("/newton/prep float32_8\n" + | ||||
|                    "  (no intervals)\n") | ||||
|  | ||||
| @@ -721,16 +744,16 @@ class TestCmdline(object): | ||||
|         self.contain("too few arguments") | ||||
|  | ||||
|         self.fail("destroy /no/such/stream") | ||||
|         self.contain("No stream at path") | ||||
|         self.contain("no stream matched path") | ||||
|  | ||||
|         self.fail("destroy -R /no/such/stream") | ||||
|         self.contain("No stream at path") | ||||
|         self.contain("no stream matched path") | ||||
|  | ||||
|         self.fail("destroy asdfasdf") | ||||
|         self.contain("No stream at path") | ||||
|         self.contain("no stream matched path") | ||||
|  | ||||
|         # From previous tests, we have: | ||||
|         self.ok("list") | ||||
|         self.ok("list -l") | ||||
|         self.match("/newton/prep float32_8\n" | ||||
|                    "/newton/raw uint16_6\n" | ||||
|                    "/newton/zzz/rawnotch uint16_9\n") | ||||
| @@ -746,13 +769,13 @@ class TestCmdline(object): | ||||
|         lines_(self.captured, 7) | ||||
|  | ||||
|         # Destroy for real | ||||
|         self.ok("destroy -R /newton/prep") | ||||
|         self.ok("list") | ||||
|         self.ok("destroy -R /n*/prep") | ||||
|         self.ok("list -l") | ||||
|         self.match("/newton/raw uint16_6\n" | ||||
|                    "/newton/zzz/rawnotch uint16_9\n") | ||||
|  | ||||
|         self.ok("destroy /newton/zzz/rawnotch") | ||||
|         self.ok("list") | ||||
|         self.ok("list -l") | ||||
|         self.match("/newton/raw uint16_6\n") | ||||
|  | ||||
|         self.ok("destroy /newton/raw") | ||||
| @@ -771,18 +794,17 @@ class TestCmdline(object): | ||||
|             self.ok("list") | ||||
|             self.contain(path) | ||||
|             # Make sure it was created empty | ||||
|             self.ok("list --detail --path " + path) | ||||
|             self.ok("list --detail " + path) | ||||
|             self.contain("(no intervals)") | ||||
|  | ||||
|     def test_12_unicode(self): | ||||
|         # Unicode paths. | ||||
|         self.ok("destroy /newton/asdf/qwer") | ||||
|         self.ok("destroy /newton/prep") | ||||
|         self.ok("destroy /newton/raw") | ||||
|         self.ok("destroy /newton/prep /newton/raw") | ||||
|         self.ok("destroy /newton/zzz") | ||||
|  | ||||
|         self.ok(u"create /düsseldorf/raw uint16_6") | ||||
|         self.ok("list --detail") | ||||
|         self.ok("list -l --detail") | ||||
|         self.contain(u"/düsseldorf/raw uint16_6") | ||||
|         self.contain("(no intervals)") | ||||
|  | ||||
| @@ -868,7 +890,7 @@ class TestCmdline(object): | ||||
|         du_before = nilmdb.utils.diskusage.du(testdb) | ||||
|  | ||||
|         # Make sure we have the data we expect | ||||
|         self.ok("list --detail") | ||||
|         self.ok("list -l --detail") | ||||
|         self.match("/newton/prep float32_8\n" + | ||||
|                    "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000" | ||||
|                    " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n" | ||||
| @@ -904,7 +926,7 @@ class TestCmdline(object): | ||||
|         self.match("3600\n") | ||||
|  | ||||
|         # See the missing chunks in list output | ||||
|         self.ok("list --detail") | ||||
|         self.ok("list -l --detail") | ||||
|         self.match("/newton/prep float32_8\n" + | ||||
|                    "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000" | ||||
|                    " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n" | ||||
| @@ -994,6 +1016,18 @@ class TestCmdline(object): | ||||
|         self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -" | ||||
|                    "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n") | ||||
|  | ||||
|         # optimize | ||||
|         self.ok("insert -s 01-01-2002 -e 01-01-2004 /diff/1 /dev/null") | ||||
|         self.ok("intervals /diff/1") | ||||
|         self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -" | ||||
|                    "> Thu, 01 Jan 2004 00:00:00.000000 +0000 ]\n" | ||||
|                    "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -" | ||||
|                    "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n") | ||||
|         self.ok("intervals /diff/1 --optimize") | ||||
|         self.ok("intervals /diff/1 -o") | ||||
|         self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -" | ||||
|                    "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n") | ||||
|  | ||||
|         self.ok("destroy -R /diff/1") | ||||
|         self.ok("destroy -R /diff/2") | ||||
|  | ||||
| @@ -1028,7 +1062,7 @@ class TestCmdline(object): | ||||
|             else: | ||||
|                 raise AssertionError("data not found at " + seek) | ||||
|             # Verify "list" output | ||||
|             self.ok("list") | ||||
|             self.ok("list -l") | ||||
|             self.match("/" + "/".join(components) + " float32_8\n") | ||||
|  | ||||
|         # Lots of renames | ||||
| @@ -1038,10 +1072,12 @@ class TestCmdline(object): | ||||
|         self.contain("old and new paths are the same") | ||||
|         check_path("newton", "prep") | ||||
|         self.fail("rename /newton/prep /newton") | ||||
|         self.contain("subdirs of this path already exist") | ||||
|         self.contain("path must contain at least one folder") | ||||
|         self.fail("rename /newton/prep /newton/prep/") | ||||
|         self.contain("invalid path") | ||||
|         self.ok("rename /newton/prep /newton/foo") | ||||
|         self.ok("rename /newton/prep /newton/foo/1") | ||||
|         check_path("newton", "foo", "1") | ||||
|         self.ok("rename /newton/foo/1 /newton/foo") | ||||
|         check_path("newton", "foo") | ||||
|         self.ok("rename /newton/foo /totally/different/thing") | ||||
|         check_path("totally", "different", "thing") | ||||
|   | ||||
| @@ -59,6 +59,14 @@ class TestInterval: | ||||
|         self.test_interval_intersect() | ||||
|         Interval = NilmdbInterval | ||||
|  | ||||
|         # Other helpers in nilmdb.utils.interval | ||||
|         i = [ UtilsInterval(1,2), UtilsInterval(2,3), UtilsInterval(4,5) ] | ||||
|         eq_(list(nilmdb.utils.interval.optimize(i)), | ||||
|             [ UtilsInterval(1,3), UtilsInterval(4,5) ]) | ||||
|         eq_(UtilsInterval(1234567890123456, 1234567890654321).human_string(), | ||||
|             "[ Fri, 13 Feb 2009 18:31:30.123456 -0500 -> " + | ||||
|             "Fri, 13 Feb 2009 18:31:30.654321 -0500 ]") | ||||
|  | ||||
|     def test_interval(self): | ||||
|         # Test Interval class | ||||
|         os.environ['TZ'] = "America/New_York" | ||||
| @@ -226,13 +234,16 @@ class TestInterval: | ||||
|             x = makeset("[--)") & 1234 | ||||
|  | ||||
|         def do_test(a, b, c, d): | ||||
|             # a & b == c | ||||
|             # a & b == c (using nilmdb.server.interval) | ||||
|             ab = IntervalSet() | ||||
|             for x in b: | ||||
|                 for i in (a & x): | ||||
|                     ab += i | ||||
|             eq_(ab,c) | ||||
|  | ||||
|             # a & b == c (using nilmdb.utils.interval) | ||||
|             eq_(IntervalSet(nilmdb.utils.interval.intersection(a,b)), c) | ||||
|  | ||||
|             # a \ b == d | ||||
|             eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d) | ||||
|  | ||||
| @@ -302,6 +313,17 @@ class TestInterval: | ||||
|         eq_(nilmdb.utils.interval.set_difference( | ||||
|             a.intersection(list(c)[0]), b.intersection(list(c)[0])), d) | ||||
|  | ||||
|         # Fill out test coverage for non-subsets | ||||
|         def diff2(a,b, subset): | ||||
|             return nilmdb.utils.interval._interval_math_helper( | ||||
|                 a, b, (lambda a, b: b and not a), subset=subset) | ||||
|         with assert_raises(nilmdb.utils.interval.IntervalError): | ||||
|             list(diff2(a,b,True)) | ||||
|         list(diff2(a,b,False)) | ||||
|  | ||||
|         # Empty second set | ||||
|         eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a) | ||||
|  | ||||
|         # Empty second set | ||||
|         eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a) | ||||
|  | ||||
|   | ||||
| @@ -90,13 +90,16 @@ class Test00Nilmdb(object):  # named 00 so it runs first | ||||
|         eq_(db.stream_get_metadata("/newton/prep"), meta1) | ||||
|         eq_(db.stream_get_metadata("/newton/raw"), meta1) | ||||
|  | ||||
|         # fill in some test coverage for start >= end | ||||
|         # fill in some misc. test coverage | ||||
|         with assert_raises(nilmdb.server.NilmDBError): | ||||
|             db.stream_remove("/newton/prep", 0, 0) | ||||
|         with assert_raises(nilmdb.server.NilmDBError): | ||||
|             db.stream_remove("/newton/prep", 1, 0) | ||||
|         db.stream_remove("/newton/prep", 0, 1) | ||||
|  | ||||
|         with assert_raises(nilmdb.server.NilmDBError): | ||||
|             db.stream_extract("/newton/prep", count = True, binary = True) | ||||
|  | ||||
|         db.close() | ||||
|  | ||||
| class TestBlockingServer(object): | ||||
| @@ -154,11 +157,14 @@ class TestServer(object): | ||||
|  | ||||
|     def test_server(self): | ||||
|         # Make sure we can't force an exit, and test other 404 errors | ||||
|         for url in [ "/exit", "/", "/favicon.ico" ]: | ||||
|         for url in [ "/exit", "/favicon.ico" ]: | ||||
|             with assert_raises(HTTPError) as e: | ||||
|                 geturl(url) | ||||
|             eq_(e.exception.code, 404) | ||||
|  | ||||
|         # Root page | ||||
|         in_("This is NilmDB", geturl("/")) | ||||
|  | ||||
|         # Check version | ||||
|         eq_(distutils.version.LooseVersion(getjson("/version")), | ||||
|             distutils.version.LooseVersion(nilmdb.__version__)) | ||||
|   | ||||
							
								
								
									
										373
									
								
								tests/test_numpyclient.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										373
									
								
								tests/test_numpyclient.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,373 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import nilmdb.server | ||||
| import nilmdb.client | ||||
| import nilmdb.client.numpyclient | ||||
|  | ||||
| from nilmdb.utils.printf import * | ||||
| from nilmdb.utils import timestamper | ||||
| from nilmdb.client import ClientError, ServerError | ||||
| from nilmdb.utils import datetime_tz | ||||
|  | ||||
| from nose.plugins.skip import SkipTest | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
| import itertools | ||||
| import distutils.version | ||||
|  | ||||
| from testutil.helpers import * | ||||
|  | ||||
| import numpy as np | ||||
|  | ||||
| testdb = "tests/numpyclient-testdb" | ||||
| testurl = "http://localhost:32180/" | ||||
|  | ||||
| def setup_module(): | ||||
|     global test_server, test_db | ||||
|     # Clear out DB | ||||
|     recursive_unlink(testdb) | ||||
|  | ||||
|     # Start web app on a custom port | ||||
|     test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)( | ||||
|         testdb, bulkdata_args = { "file_size" : 16384, | ||||
|                                   "files_per_dir" : 3 } ) | ||||
|  | ||||
|     test_server = nilmdb.server.Server(test_db, host = "127.0.0.1", | ||||
|                                        port = 32180, stoppable = False, | ||||
|                                        fast_shutdown = True, | ||||
|                                        force_traceback = True) | ||||
|     test_server.start(blocking = False) | ||||
|  | ||||
| def teardown_module(): | ||||
|     global test_server, test_db | ||||
|     # Close web app | ||||
|     test_server.stop() | ||||
|     test_db.close() | ||||
|  | ||||
| class TestNumpyClient(object): | ||||
|  | ||||
|     def test_numpyclient_01_basic(self): | ||||
|         # Test basic connection | ||||
|         client = nilmdb.client.numpyclient.NumpyClient(url = testurl) | ||||
|         version = client.version() | ||||
|         eq_(distutils.version.LooseVersion(version), | ||||
|             distutils.version.LooseVersion(test_server.version)) | ||||
|  | ||||
|         # Verify subclassing | ||||
|         assert(isinstance(client, nilmdb.client.Client)) | ||||
|  | ||||
|         # Layouts | ||||
|         for layout in "int8_t", "something_8", "integer_1": | ||||
|             with assert_raises(ValueError): | ||||
|                 for x in client.stream_extract_numpy("/foo", layout=layout): | ||||
|                     pass | ||||
|         for layout in "int8_1", "uint8_30", "int16_20", "float64_100": | ||||
|             with assert_raises(ClientError) as e: | ||||
|                 for x in client.stream_extract_numpy("/foo", layout=layout): | ||||
|                     pass | ||||
|             in_("No such stream", str(e.exception)) | ||||
|  | ||||
|         with assert_raises(ClientError) as e: | ||||
|             for x in client.stream_extract_numpy("/foo"): | ||||
|                 pass | ||||
|         in_("can't get layout for path", str(e.exception)) | ||||
|  | ||||
|         client.close() | ||||
|  | ||||
|     def test_numpyclient_02_extract(self): | ||||
|         client = nilmdb.client.numpyclient.NumpyClient(url = testurl) | ||||
|  | ||||
|         # Insert some data as text | ||||
|         client.stream_create("/newton/prep", "float32_8") | ||||
|         testfile = "tests/data/prep-20120323T1000" | ||||
|         start = nilmdb.utils.time.parse_time("20120323T1000") | ||||
|         rate = 120 | ||||
|         data = timestamper.TimestamperRate(testfile, start, rate) | ||||
|         result = client.stream_insert("/newton/prep", data, | ||||
|                                       start, start + 119999777) | ||||
|  | ||||
|         # Extract Numpy arrays | ||||
|         array = None | ||||
|         pieces = 0 | ||||
|         for chunk in client.stream_extract_numpy("/newton/prep", maxrows=1000): | ||||
|             pieces += 1 | ||||
|             if array is not None: | ||||
|                 array = np.vstack((array, chunk)) | ||||
|             else: | ||||
|                 array = chunk | ||||
|         eq_(array.shape, (14400, 9)) | ||||
|         eq_(pieces, 15) | ||||
|  | ||||
|         # Try structured | ||||
|         s = list(client.stream_extract_numpy("/newton/prep", structured = True)) | ||||
|         assert(np.array_equal(np.c_[s[0]['timestamp'], s[0]['data']], array)) | ||||
|  | ||||
|         # Compare.  Will be close but not exact because the conversion | ||||
|         # to and from ASCII was lossy. | ||||
|         data = timestamper.TimestamperRate(testfile, start, rate) | ||||
|         actual = np.fromstring(" ".join(data), sep=' ').reshape(14400, 9) | ||||
|         assert(np.allclose(array, actual)) | ||||
|  | ||||
|         client.close() | ||||
|  | ||||
|     def test_numpyclient_03_insert(self): | ||||
|         client = nilmdb.client.numpyclient.NumpyClient(url = testurl) | ||||
|  | ||||
|         # Limit _max_data just to get better coverage | ||||
|         old_max_data = nilmdb.client.numpyclient.StreamInserterNumpy._max_data | ||||
|         nilmdb.client.numpyclient.StreamInserterNumpy._max_data = 100000 | ||||
|  | ||||
|         client.stream_create("/test/1", "uint16_1") | ||||
|         client.stream_insert_numpy("/test/1", | ||||
|                                    np.array([[0, 1], | ||||
|                                              [1, 2], | ||||
|                                              [2, 3], | ||||
|                                              [3, 4]])) | ||||
|  | ||||
|         # Wrong number of dimensions | ||||
|         with assert_raises(ValueError) as e: | ||||
|             client.stream_insert_numpy("/test/1", | ||||
|                                        np.array([[[0, 1], | ||||
|                                                   [1, 2]], | ||||
|                                                  [[3, 4], | ||||
|                                                   [4, 5]]])) | ||||
|         in_("wrong number of dimensions", str(e.exception)) | ||||
|  | ||||
|         # Wrong number of fields | ||||
|         with assert_raises(ValueError) as e: | ||||
|             client.stream_insert_numpy("/test/1", | ||||
|                                        np.array([[0, 1, 2], | ||||
|                                                  [1, 2, 3], | ||||
|                                                  [3, 4, 5], | ||||
|                                                  [4, 5, 6]])) | ||||
|         in_("wrong number of fields", str(e.exception)) | ||||
|  | ||||
|         # Unstructured | ||||
|         client.stream_create("/test/2", "float32_8") | ||||
|         client.stream_insert_numpy( | ||||
|             "/test/2", | ||||
|             client.stream_extract_numpy( | ||||
|                 "/newton/prep", structured = False, maxrows = 1000)) | ||||
|  | ||||
|         # Structured, and specifying layout | ||||
|         client.stream_create("/test/3", "float32_8") | ||||
|         client.stream_insert_numpy( | ||||
|             path = "/test/3", layout = "float32_8", | ||||
|             data = client.stream_extract_numpy( | ||||
|                 "/newton/prep", structured = True, maxrows = 1000)) | ||||
|  | ||||
|         # Structured, specifying wrong layout | ||||
|         client.stream_create("/test/4", "float32_8") | ||||
|         with assert_raises(ValueError) as e: | ||||
|             client.stream_insert_numpy( | ||||
|                 "/test/4", layout = "uint16_1", | ||||
|                 data = client.stream_extract_numpy( | ||||
|                     "/newton/prep", structured = True, maxrows = 1000)) | ||||
|         in_("wrong dtype", str(e.exception)) | ||||
|  | ||||
|         # Unstructured, and specifying wrong layout | ||||
|         client.stream_create("/test/5", "float32_8") | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.stream_insert_numpy( | ||||
|                 "/test/5", layout = "uint16_8", | ||||
|                 data = client.stream_extract_numpy( | ||||
|                     "/newton/prep", structured = False, maxrows = 1000)) | ||||
|         # timestamps will be screwy here, because data will be parsed wrong | ||||
|         in_("error parsing input data", str(e.exception)) | ||||
|  | ||||
|         # Make sure the /newton/prep copies are identical | ||||
|         a = np.vstack(client.stream_extract_numpy("/newton/prep")) | ||||
|         b = np.vstack(client.stream_extract_numpy("/test/2")) | ||||
|         c = np.vstack(client.stream_extract_numpy("/test/3")) | ||||
|         assert(np.array_equal(a,b)) | ||||
|         assert(np.array_equal(a,c)) | ||||
|  | ||||
|         # Make sure none of the files are greater than 16384 bytes as | ||||
|         # we configured with the bulkdata_args above. | ||||
|         datapath = os.path.join(testdb, "data") | ||||
|         for (dirpath, dirnames, filenames) in os.walk(datapath): | ||||
|             for f in filenames: | ||||
|                 fn = os.path.join(dirpath, f) | ||||
|                 size = os.path.getsize(fn) | ||||
|                 if size > 16384: | ||||
|                     raise AssertionError(sprintf("%s is too big: %d > %d\n", | ||||
|                                                  fn, size, 16384)) | ||||
|  | ||||
|         nilmdb.client.numpyclient.StreamInserterNumpy._max_data = old_max_data | ||||
|         client.close() | ||||
|  | ||||
|     def test_numpyclient_04_context(self): | ||||
|         # Like test_client_context, but with Numpy data | ||||
|         client = nilmdb.client.numpyclient.NumpyClient(testurl) | ||||
|  | ||||
|         client.stream_create("/context/test", "uint16_1") | ||||
|         with client.stream_insert_numpy_context("/context/test") as ctx: | ||||
|             # override _max_rows to trigger frequent server updates | ||||
|             ctx._max_rows = 2 | ||||
|             ctx.insert([[1000, 1]]) | ||||
|             ctx.insert([[1010, 1], [1020, 1], [1030, 1]]) | ||||
|             ctx.insert([[1040, 1], [1050, 1]]) | ||||
|             ctx.finalize() | ||||
|             ctx.insert([[1070, 1]]) | ||||
|             ctx.update_end(1080) | ||||
|             ctx.finalize() | ||||
|             ctx.update_start(1090) | ||||
|             ctx.insert([[1100, 1]]) | ||||
|             ctx.insert([[1110, 1]]) | ||||
|             ctx.send() | ||||
|             ctx.insert([[1120, 1], [1130, 1], [1140, 1]]) | ||||
|             ctx.update_end(1160) | ||||
|             ctx.insert([[1150, 1]]) | ||||
|             ctx.update_end(1170) | ||||
|             ctx.insert([[1160, 1]]) | ||||
|             ctx.update_end(1180) | ||||
|             ctx.insert([[1170, 123456789.0]]) | ||||
|             ctx.finalize() | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_numpy_context("/context/test", | ||||
|                                                     1000, 2000) as ctx: | ||||
|                 ctx.insert([[1180, 1]]) | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_numpy_context("/context/test", | ||||
|                                                     2000, 3000) as ctx: | ||||
|                 ctx._max_rows = 2 | ||||
|                 ctx.insert([[3180, 1]]) | ||||
|                 ctx.insert([[3181, 1]]) | ||||
|  | ||||
|         with client.stream_insert_numpy_context("/context/test", | ||||
|                                                 2000, 3000) as ctx: | ||||
|             # make sure our override wasn't permanent | ||||
|             ne_(ctx._max_rows, 2) | ||||
|             ctx.insert([[2250, 1]]) | ||||
|             ctx.finalize() | ||||
|  | ||||
|         with assert_raises(ClientError): | ||||
|             with client.stream_insert_numpy_context("/context/test", | ||||
|                                                     3000, 4000) as ctx: | ||||
|                 ctx.insert([[3010, 1]]) | ||||
|                 ctx.insert([[3020, 2]]) | ||||
|                 ctx.insert([[3030, 3]]) | ||||
|                 ctx.insert([[3040, 4]]) | ||||
|                 ctx.insert([[3040, 4]]) # non-monotonic after a few lines | ||||
|                 ctx.finalize() | ||||
|  | ||||
|         eq_(list(client.stream_intervals("/context/test")), | ||||
|             [ [ 1000, 1051 ], | ||||
|               [ 1070, 1080 ], | ||||
|               [ 1090, 1180 ], | ||||
|               [ 2000, 3000 ] ]) | ||||
|  | ||||
|         client.stream_remove("/context/test") | ||||
|         client.stream_destroy("/context/test") | ||||
|         client.close() | ||||
|  | ||||
|     def test_numpyclient_05_emptyintervals(self): | ||||
|         # Like test_client_emptyintervals, with insert_numpy_context | ||||
|         client = nilmdb.client.numpyclient.NumpyClient(testurl) | ||||
|         client.stream_create("/empty/test", "uint16_1") | ||||
|         def info(): | ||||
|             result = [] | ||||
|             for interval in list(client.stream_intervals("/empty/test")): | ||||
|                 result.append((client.stream_count("/empty/test", *interval), | ||||
|                                interval)) | ||||
|             return result | ||||
|         eq_(info(), []) | ||||
|  | ||||
|         # Insert a region with just a few points | ||||
|         with client.stream_insert_numpy_context("/empty/test") as ctx: | ||||
|             ctx.update_start(100) | ||||
|             ctx.insert([[140, 1]]) | ||||
|             ctx.insert([[150, 1]]) | ||||
|             ctx.insert([[160, 1]]) | ||||
|             ctx.update_end(200) | ||||
|             ctx.finalize() | ||||
|         eq_(info(), [(3, [100, 200])]) | ||||
|  | ||||
|         # Delete chunk, which will leave one data point and two intervals | ||||
|         client.stream_remove("/empty/test", 145, 175) | ||||
|         eq_(info(), [(1, [100, 145]), | ||||
|                      (0, [175, 200])]) | ||||
|  | ||||
|         # Try also creating a completely empty interval from scratch, | ||||
|         # in a few different ways. | ||||
|         client.stream_insert("/empty/test", "", 300, 350) | ||||
|         client.stream_insert("/empty/test", [], 400, 450) | ||||
|         with client.stream_insert_numpy_context("/empty/test", 500, 550): | ||||
|             pass | ||||
|  | ||||
|         # If enough timestamps aren't provided, empty streams won't be created. | ||||
|         client.stream_insert("/empty/test", []) | ||||
|         with client.stream_insert_numpy_context("/empty/test"): | ||||
|             pass | ||||
|         client.stream_insert("/empty/test", [], start = 600) | ||||
|         with client.stream_insert_numpy_context("/empty/test", start = 700): | ||||
|             pass | ||||
|         client.stream_insert("/empty/test", [], end = 850) | ||||
|         with client.stream_insert_numpy_context("/empty/test", end = 950): | ||||
|             pass | ||||
|  | ||||
|         # Equal start and end is OK as long as there's no data | ||||
|         with assert_raises(ClientError) as e: | ||||
|             with client.stream_insert_numpy_context("/empty/test", | ||||
|                                                     start=9, end=9) as ctx: | ||||
|                 ctx.insert([[9, 9]]) | ||||
|                 ctx.finalize() | ||||
|         in_("have data to send, but invalid start/end times", str(e.exception)) | ||||
|  | ||||
|         with client.stream_insert_numpy_context("/empty/test", | ||||
|                                                 start=9, end=9) as ctx: | ||||
|             pass | ||||
|  | ||||
|         # reusing a context object is bad | ||||
|         with assert_raises(Exception) as e: | ||||
|             ctx.insert([[9, 9]]) | ||||
|  | ||||
|         # Try various things that might cause problems | ||||
|         with client.stream_insert_numpy_context("/empty/test", | ||||
|                                                 1000, 1050) as ctx: | ||||
|             ctx.finalize() # inserts [1000, 1050] | ||||
|             ctx.finalize() # nothing | ||||
|             ctx.finalize() # nothing | ||||
|             ctx.insert([[1100, 1]]) | ||||
|             ctx.finalize() # inserts [1100, 1101] | ||||
|             ctx.update_start(1199) | ||||
|             ctx.insert([[1200, 1]]) | ||||
|             ctx.update_end(1250) | ||||
|             ctx.finalize() # inserts [1199, 1250] | ||||
|             ctx.update_start(1299) | ||||
|             ctx.finalize() # nothing | ||||
|             ctx.update_end(1350) | ||||
|             ctx.finalize() # nothing | ||||
|             ctx.update_start(1400) | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|             ctx.update_end(1450) | ||||
|             ctx.finalize() | ||||
|             ctx.update_start(1500) | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|             ctx.update_end(1550) | ||||
|             ctx.finalize() | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|             ctx.insert(np.zeros((0,2))) | ||||
|             ctx.finalize() | ||||
|  | ||||
|         # Check everything | ||||
|         eq_(info(), [(1, [100, 145]), | ||||
|                      (0, [175, 200]), | ||||
|                      (0, [300, 350]), | ||||
|                      (0, [400, 450]), | ||||
|                      (0, [500, 550]), | ||||
|                      (0, [1000, 1050]), | ||||
|                      (1, [1100, 1101]), | ||||
|                      (1, [1199, 1250]), | ||||
|                      (0, [1400, 1450]), | ||||
|                      (0, [1500, 1550]), | ||||
|                      ]) | ||||
|  | ||||
|         # Clean up | ||||
|         client.stream_remove("/empty/test") | ||||
|         client.stream_destroy("/empty/test") | ||||
|         client.close() | ||||
| @@ -62,6 +62,28 @@ class Base(object): | ||||
|         eq_(self.foo.val, 20) | ||||
|         eq_(self.foo.init_thread, self.foo.test_thread) | ||||
|  | ||||
| class ListLike(object): | ||||
|     def __init__(self): | ||||
|         self.thread = threading.current_thread().name | ||||
|         self.foo = 0 | ||||
|  | ||||
|     def __iter__(self): | ||||
|         eq_(threading.current_thread().name, self.thread) | ||||
|         self.foo = 0 | ||||
|         return self | ||||
|  | ||||
|     def __getitem__(self, key): | ||||
|         eq_(threading.current_thread().name, self.thread) | ||||
|         return key | ||||
|  | ||||
|     def next(self): | ||||
|         eq_(threading.current_thread().name, self.thread) | ||||
|         if self.foo < 5: | ||||
|             self.foo += 1 | ||||
|             return self.foo | ||||
|         else: | ||||
|             raise StopIteration | ||||
|  | ||||
| class TestUnserialized(Base): | ||||
|     def setUp(self): | ||||
|         self.foo = Foo() | ||||
| @@ -84,3 +106,9 @@ class TestSerializer(Base): | ||||
|         sp(sp(Foo("x"))).t() | ||||
|         sp(sp(Foo)("x")).t() | ||||
|         sp(sp(Foo))("x").t() | ||||
|  | ||||
|     def test_iter(self): | ||||
|         sp = nilmdb.utils.serializer_proxy | ||||
|         i = sp(ListLike)() | ||||
|         eq_(list(i), [1,2,3,4,5]) | ||||
|         eq_(i[3], 3) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user