# -*- coding: utf-8 -*- import nilmdb.server from nilmdb.utils.printf import * import nilmdb.cmdline from nilmdb.utils import datetime_tz import unittest from nose.tools import * from nose.tools import assert_raises import itertools import os import re import sys import StringIO import shlex import warnings from testutil.helpers import * testdb = "tests/cmdline-testdb" def server_start(max_results = None, max_removals = None, bulkdata_args = {}): global test_server, test_db # Start web app on a custom port test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)( testdb, max_results = max_results, max_removals = max_removals, bulkdata_args = bulkdata_args) test_server = nilmdb.server.Server(test_db, host = "127.0.0.1", port = 32180, stoppable = False, fast_shutdown = True, force_traceback = False) test_server.start(blocking = False) def server_stop(): global test_server, test_db # Close web app test_server.stop() test_db.close() def setup_module(): global test_server, test_db # Clear out DB recursive_unlink(testdb) server_start() def teardown_module(): server_stop() # Add an encoding property to StringIO so Python will convert Unicode # properly when writing or reading. class UTF8StringIO(StringIO.StringIO): encoding = 'utf-8' class TestCmdline(object): def run(self, arg_string, infile=None, outfile=None): """Run a cmdline client with the specified argument string, passing the given input. Returns a tuple with the output and exit code""" # printf("TZ=UTC ./nilmtool.py %s\n", arg_string) os.environ['NILMDB_URL'] = "http://localhost:32180/" class stdio_wrapper: def __init__(self, stdin, stdout, stderr): self.io = (stdin, stdout, stderr) def __enter__(self): self.saved = ( sys.stdin, sys.stdout, sys.stderr ) ( sys.stdin, sys.stdout, sys.stderr ) = self.io def __exit__(self, type, value, traceback): ( sys.stdin, sys.stdout, sys.stderr ) = self.saved # Empty input if none provided if infile is None: infile = UTF8StringIO("") # Capture stderr errfile = UTF8StringIO() if outfile is None: # If no output file, capture stdout with stderr outfile = errfile with stdio_wrapper(infile, outfile, errfile) as s: try: # shlex doesn't support Unicode very well. Encode the # string as UTF-8 explicitly before splitting. args = shlex.split(arg_string.encode('utf-8')) nilmdb.cmdline.Cmdline(args).run() sys.exit(0) except SystemExit as e: exitcode = e.code captured = outfile.getvalue() self.captured = captured self.exitcode = exitcode def ok(self, arg_string, infile = None): self.run(arg_string, infile) if self.exitcode != 0: self.dump() eq_(self.exitcode, 0) def fail(self, arg_string, infile = None, exitcode = None, require_error = True): self.run(arg_string, infile) if exitcode is not None and self.exitcode != exitcode: # Wrong exit code self.dump() eq_(self.exitcode, exitcode) if self.exitcode == 0: # Success, when we wanted failure self.dump() ne_(self.exitcode, 0) # Make sure the output contains the word "error" at the # beginning of a line, but only if an exitcode wasn't # specified. if require_error and not re.search("^error", self.captured, re.MULTILINE): raise AssertionError("command failed, but output doesn't " "contain the string 'error'") def contain(self, checkstring): in_(checkstring, self.captured) def match(self, checkstring): eq_(checkstring, self.captured) def matchfile(self, file): # Captured data should match file contents exactly with open(file) as f: contents = f.read() if contents != self.captured: print "--- reference file (first 1000 bytes):\n" print contents[0:1000] + "\n" print "--- captured data (first 1000 bytes):\n" print self.captured[0:1000] + "\n" zipped = itertools.izip_longest(contents, self.captured) for (n, (a, b)) in enumerate(zipped): if a != b: print "--- first difference is at offset", n print "--- reference:", repr(a) print "--- captured:", repr(b) break raise AssertionError("captured data doesn't match " + file) def matchfilecount(self, file): # Last line of captured data should match the number of # non-commented lines in file count = 0 with open(file) as f: for line in f: if line[0] != '#': count += 1 eq_(self.captured.splitlines()[-1], sprintf("%d", count)) def dump(self): printf("-----dump start-----\n%s-----dump end-----\n", self.captured) def test_01_basic(self): # help self.ok("--help") self.contain("usage:") # fail for no args self.fail("") # fail for no such option self.fail("--nosuchoption") # fail for bad command self.fail("badcommand") # try some URL constructions self.fail("--url http://nosuchurl/ info") self.contain("error connecting to server") self.fail("--url nosuchurl info") self.contain("error connecting to server") self.fail("-u nosuchurl/foo info") self.contain("error connecting to server") self.fail("-u localhost:1 info") self.contain("error connecting to server") self.ok("-u localhost:32180 info") self.ok("info") # Duplicated arguments should fail, but this isn't implemented # due to it being kind of a pain with argparse. if 0: self.fail("-u url1 -u url2 info") self.contain("duplicated argument") self.fail("list --detail --detail") self.contain("duplicated argument") self.fail("list --detail --path path1 --path path2") self.contain("duplicated argument") self.fail("extract --start 2000-01-01 --start 2001-01-02") self.contain("duplicated argument") # Verify that "help command" and "command --help" are identical # for all commands. self.fail("") m = re.search(r"{(.*)}", self.captured) for command in [""] + m.group(1).split(','): self.ok(command + " --help") cap1 = self.captured self.ok("help " + command) cap2 = self.captured self.ok("help " + command + " asdf --url --zxcv -") cap3 = self.captured eq_(cap1, cap2) eq_(cap2, cap3) def test_02_parsetime(self): os.environ['TZ'] = "America/New_York" test = datetime_tz.datetime_tz.now() u2ts = nilmdb.utils.time.unix_to_timestamp parse_time = nilmdb.utils.time.parse_time eq_(parse_time(str(test)), u2ts(test.totimestamp())) test = u2ts(datetime_tz.datetime_tz.smartparse("20120405 1400-0400"). totimestamp()) eq_(parse_time("hi there 20120405 1400-0400 testing! 123"), test) eq_(parse_time("20120405 1800 UTC"), test) eq_(parse_time("20120405 1400-0400 UTC"), test) for badtime in [ "20120405 1400-9999", "hello", "-", "", "4:00" ]: with assert_raises(ValueError): x = parse_time(badtime) x = parse_time("now") eq_(parse_time("snapshot-20120405-140000.raw.gz"), test) eq_(parse_time("prep-20120405T1400"), test) eq_(parse_time("1333648800.0"), test) eq_(parse_time("1333648800000000"), test) eq_(parse_time("@1333648800000000"), test) eq_(parse_time("min"), nilmdb.utils.time.min_timestamp) eq_(parse_time("max"), nilmdb.utils.time.max_timestamp) with assert_raises(ValueError): parse_time("@hashtag12345") def test_03_info(self): self.ok("info") self.contain("Server URL: http://localhost:32180/") self.contain("Client version: " + nilmdb.__version__) self.contain("Server version: " + test_server.version) self.contain("Server database path") self.contain("Server database size") self.contain("Server database free space") def test_04_createlist(self): # Basic stream tests, like those in test_client. # No streams self.ok("list") self.match("") # Bad paths self.fail("create foo/bar/baz float32_8") self.contain("paths must start with /") self.fail("create /foo float32_8") self.contain("invalid path") self.fail("create /newton/prep/ float32_8") self.contain("invalid path") self.fail("create /newton/_format/prep float32_8") self.contain("path name is invalid") self.fail("create /_format/newton/prep float32_8") self.contain("path name is invalid") self.fail("create /newton/prep/_format float32_8") self.contain("path name is invalid") # Bad layout type self.fail("create /newton/prep NoSuchLayout") self.contain("no such layout") self.fail("create /newton/prep float32_0") self.contain("no such layout") self.fail("create /newton/prep float33_1") self.contain("no such layout") # Create a few streams self.ok("create /newton/zzz/rawnotch uint16_9") self.ok("create /newton/prep float32_8") self.ok("create /newton/raw uint16_6") # Create a stream that already exists self.fail("create /newton/raw uint16_6") self.contain("stream already exists at this path") # Should not be able to create a stream with another stream as # its parent self.fail("create /newton/prep/blah float32_8") self.contain("path is subdir of existing node") # Should not be able to create a stream at a location that # has other nodes as children self.fail("create /newton/zzz float32_8") self.contain("subdirs of this path already exist") # Verify we got those 3 streams and they're returned in # alphabetical order. self.ok("list") self.match("/newton/prep float32_8\n" "/newton/raw uint16_6\n" "/newton/zzz/rawnotch uint16_9\n") # Match just one type or one path. Also check # that --path is optional self.ok("list --path /newton/raw") self.match("/newton/raw uint16_6\n") self.ok("list /newton/raw") self.match("/newton/raw uint16_6\n") self.fail("list -p /newton/raw /newton/raw") self.contain("too many paths") self.ok("list --layout uint16_6") self.match("/newton/raw uint16_6\n") # Wildcard matches self.ok("list --layout uint16*") self.match("/newton/raw uint16_6\n" "/newton/zzz/rawnotch uint16_9\n") self.ok("list --path *zzz* --layout uint16*") self.match("/newton/zzz/rawnotch uint16_9\n") self.ok("list *zzz* --layout uint16*") self.match("/newton/zzz/rawnotch uint16_9\n") self.ok("list --path *zzz* --layout float32*") self.match("") # reversed range self.fail("list /newton/prep --start 2020-01-01 --end 2000-01-01") self.contain("start must precede end") def test_05_metadata(self): # Set / get metadata self.fail("metadata") self.fail("metadata --get") self.ok("metadata /newton/prep") self.match("") self.ok("metadata /newton/raw --get") self.match("") self.ok("metadata /newton/prep --set " "'description=The Data' " "v_scale=1.234") self.ok("metadata /newton/raw --update " "'description=The Data'") self.ok("metadata /newton/raw --update " "v_scale=1.234") # various parsing tests self.ok("metadata /newton/raw --update foo=") self.fail("metadata /newton/raw --update =bar") self.fail("metadata /newton/raw --update foo==bar") self.fail("metadata /newton/raw --update foo;bar") # errors self.fail("metadata /newton/nosuchstream foo=bar") self.contain("unrecognized arguments") self.fail("metadata /newton/nosuchstream") self.contain("No stream at path") self.fail("metadata /newton/nosuchstream --set foo=bar") self.contain("No stream at path") self.fail("metadata /newton/nosuchstream --delete") self.contain("No stream at path") self.ok("metadata /newton/prep") self.match("description=The Data\nv_scale=1.234\n") self.ok("metadata /newton/prep --get") self.match("description=The Data\nv_scale=1.234\n") self.ok("metadata /newton/prep --get descr") self.match("descr=\n") self.ok("metadata /newton/prep --get description") self.match("description=The Data\n") self.ok("metadata /newton/prep --get description v_scale") self.match("description=The Data\nv_scale=1.234\n") self.ok("metadata /newton/prep --set " "'description=The Data'") self.ok("metadata /newton/prep --get") self.match("description=The Data\n") self.fail("metadata /newton/nosuchpath") self.contain("No stream at path /newton/nosuchpath") self.ok("metadata /newton/prep --delete") self.ok("metadata /newton/prep --get") self.match("") self.ok("metadata /newton/prep --set " "'description=The Data' " "v_scale=1.234") self.ok("metadata /newton/prep --delete v_scale") self.ok("metadata /newton/prep --get") self.match("description=The Data\n") self.ok("metadata /newton/prep --set description=") self.ok("metadata /newton/prep --get") self.match("") def test_06_insert(self): self.ok("insert --help") self.fail("insert -s 2000 -e 2001 /foo/bar baz") self.contain("error getting stream info") self.fail("insert -s 2000 -e 2001 /newton/prep baz") self.match("error opening input file baz\n") self.fail("insert /newton/prep --timestamp -f -r 120") self.contain("error extracting start time") self.fail("insert /newton/prep --timestamp -r 120") self.contain("need --start or --filename") self.fail("insert /newton/prep " "tests/data/prep-20120323T1000") # insert pre-timestamped data, with bad times (non-monotonic) os.environ['TZ'] = "UTC" with open("tests/data/prep-20120323T1004-badtimes") as input: self.fail("insert -s 20120323T1004 -e 20120323T1006 /newton/prep", input) self.contain("error parsing input data") self.contain("line 7") self.contain("timestamp is not monotonically increasing") # insert pre-timestamped data, from stdin os.environ['TZ'] = "UTC" with open("tests/data/prep-20120323T1004-timestamped") as input: self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep", input) # insert data with normal timestamper from filename os.environ['TZ'] = "UTC" self.ok("insert --timestamp -f --rate 120 /newton/prep " "tests/data/prep-20120323T1000") self.fail("insert -t --filename /newton/prep " "tests/data/prep-20120323T1002") self.contain("rate is needed") self.ok("insert -t --filename --rate 120 /newton/prep " "tests/data/prep-20120323T1002") # overlap os.environ['TZ'] = "UTC" self.fail("insert --timestamp -f --rate 120 /newton/prep " "tests/data/prep-20120323T1004") self.contain("overlap") # Just to help test more situations -- stop and restart # the server now. This tests nilmdb's interval caching, # at the very least. server_stop() server_start() # still an overlap if we specify a different start os.environ['TZ'] = "America/New_York" self.fail("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep" " tests/data/prep-20120323T1004") self.contain("overlap") # wrong format os.environ['TZ'] = "UTC" self.fail("insert -t -r 120 -f /newton/raw " "tests/data/prep-20120323T1004") self.contain("error parsing input data") self.contain("can't parse value") # too few rows per line self.ok("create /insert/test float32_20") self.fail("insert -t -r 120 -f /insert/test " "tests/data/prep-20120323T1004") self.contain("error parsing input data") self.contain("wrong number of values") self.ok("destroy /insert/test") # empty data does nothing self.ok("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep " "/dev/null") # bad start time self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null") def test_07_detail_extended(self): # Just count the number of lines, it's probably fine self.ok("list --detail") lines_(self.captured, 8) self.ok("list --detail --path *prep") lines_(self.captured, 4) self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'") lines_(self.captured, 3) self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'") lines_(self.captured, 2) self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'") lines_(self.captured, 2) self.contain("10:05:15.000") self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'") lines_(self.captured, 2) self.contain("10:05:15.500") self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'") lines_(self.captured, 2) self.contain("no intervals") self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'" + " --end='23 Mar 2012 10:05:15.51'") lines_(self.captured, 2) self.contain("10:05:15.500") self.ok("list --detail") lines_(self.captured, 8) # Verify the "raw timestamp" output self.ok("list --detail --path *prep --timestamp-raw " "--start='23 Mar 2012 10:05:15.50'") lines_(self.captured, 2) self.contain("[ 1332497115500000 -> 1332497160000000 ]") # bad time self.fail("list --detail --path *prep -T --start='9332497115.612'") # good time self.ok("list --detail --path *prep -T --start='1332497115.612'") lines_(self.captured, 2) self.contain("[ 1332497115612000 -> 1332497160000000 ]") # Check --ext output self.ok("list --ext") lines_(self.captured, 9) self.ok("list -E -T") c = self.contain c("\n interval extents: 1332496800000000 -> 1332497160000000\n") c("\n total data: 43200 rows, 359.983336 seconds\n") c("\n interval extents: (no data)\n") c("\n total data: 0 rows, 0.000000 seconds\n") # Misc self.fail("list --ext --start='23 Mar 2012 10:05:15.50'") self.contain("--start and --end only make sense with --detail") def test_08_extract(self): # nonexistent stream self.fail("extract /no/such/foo --start 2000-01-01 --end 2020-01-01") self.contain("error getting stream info") # reversed range self.fail("extract -a /newton/prep --start 2020-01-01 --end 2000-01-01") self.contain("start is after end") # empty ranges return error 2 self.fail("extract -a /newton/prep " + "--start '23 Mar 2012 20:00:30' " + "--end '23 Mar 2012 20:00:31'", exitcode = 2, require_error = False) self.contain("no data") self.fail("extract -a /newton/prep " + "--start '23 Mar 2012 20:00:30.000001' " + "--end '23 Mar 2012 20:00:30.000002'", exitcode = 2, require_error = False) self.contain("no data") self.fail("extract -a /newton/prep " + "--start '23 Mar 2022 10:00:30' " + "--end '23 Mar 2022 10:00:31'", exitcode = 2, require_error = False) self.contain("no data") # but are ok if we're just counting results self.ok("extract --count /newton/prep " + "--start '23 Mar 2012 20:00:30' " + "--end '23 Mar 2012 20:00:31'") self.match("0\n") self.ok("extract -c /newton/prep " + "--start '23 Mar 2012 20:00:30.000001' " + "--end '23 Mar 2012 20:00:30.000002'") self.match("0\n") # Check various dumps against stored copies of how they should appear def test(file, start, end, extra=""): self.ok("extract " + extra + " /newton/prep " + "--start '23 Mar 2012 " + start + "' " + "--end '23 Mar 2012 " + end + "'") self.matchfile("tests/data/extract-" + str(file)) self.ok("extract --count " + extra + " /newton/prep " + "--start '23 Mar 2012 " + start + "' " + "--end '23 Mar 2012 " + end + "'") self.matchfilecount("tests/data/extract-" + str(file)) test(1, "10:00:30", "10:00:31", extra="-a") test(1, "10:00:30.000000", "10:00:31", extra="-a") test(2, "10:00:30.000001", "10:00:31") test(2, "10:00:30.008333", "10:00:31") test(3, "10:00:30.008333", "10:00:30.008334") test(3, "10:00:30.008333", "10:00:30.016667") test(4, "10:00:30.008333", "10:00:30.025") test(5, "10:00:30", "10:00:31", extra="--annotate --bare") test(6, "10:00:30", "10:00:31", extra="-b") test(7, "10:00:30", "10:00:30.999", extra="-a -T") test(7, "10:00:30", "10:00:30.999", extra="-a --timestamp-raw") test(8, "10:01:59.9", "10:02:00.1", extra="--markup") test(8, "10:01:59.9", "10:02:00.1", extra="-m") # all data put in by tests self.ok("extract -a /newton/prep --start 2000-01-01 --end 2020-01-01") lines_(self.captured, 43204) self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") self.match("43200\n") # markup for 3 intervals, plus extra markup lines whenever we had # a "restart" from the nilmdb.stream_extract function self.ok("extract -m /newton/prep --start 2000-01-01 --end 2020-01-01") lines_(self.captured, 43210) def test_09_truncated(self): # Test truncated responses by overriding the nilmdb max_results server_stop() server_start(max_results = 2) self.ok("list --detail") lines_(self.captured, 8) server_stop() server_start() def test_10_remove(self): # Removing data # Try nonexistent stream self.fail("remove /no/such/foo --start 2000-01-01 --end 2020-01-01") self.contain("No stream at path") # empty or backward ranges return errors self.fail("remove /newton/prep --start 2020-01-01 --end 2000-01-01") self.contain("start must precede end") self.fail("remove /newton/prep " + "--start '23 Mar 2012 10:00:30' " + "--end '23 Mar 2012 10:00:30'") self.contain("start must precede end") self.fail("remove /newton/prep " + "--start '23 Mar 2012 10:00:30.000001' " + "--end '23 Mar 2012 10:00:30.000001'") self.contain("start must precede end") self.fail("remove /newton/prep " + "--start '23 Mar 2022 10:00:30' " + "--end '23 Mar 2022 10:00:30'") self.contain("start must precede end") # Verbose self.ok("remove -c /newton/prep " + "--start '23 Mar 2022 20:00:30' " + "--end '23 Mar 2022 20:00:31'") self.match("0\n") self.ok("remove --count /newton/prep " + "--start '23 Mar 2022 20:00:30' " + "--end '23 Mar 2022 20:00:31'") self.match("0\n") # Make sure we have the data we expect self.ok("list --detail /newton/prep") self.match("/newton/prep float32_8\n" + " [ Fri, 23 Mar 2012 10:00:00.000000 +0000" " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n" " [ Fri, 23 Mar 2012 10:02:00.000000 +0000" " -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n" " [ Fri, 23 Mar 2012 10:04:00.000000 +0000" " -> Fri, 23 Mar 2012 10:06:00.000000 +0000 ]\n") # Remove various chunks of prep data and make sure # they're gone. self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:30' " + "--end '23 Mar 2012 10:00:40'") self.match("1200\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:10' " + "--end '23 Mar 2012 10:00:20'") self.match("1200\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:05' " + "--end '23 Mar 2012 10:00:25'") self.match("1200\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:03:50' " + "--end '23 Mar 2012 10:06:50'") self.match("15600\n") self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") self.match("24000\n") # See the missing chunks in list output self.ok("list --detail /newton/prep") self.match("/newton/prep float32_8\n" + " [ Fri, 23 Mar 2012 10:00:00.000000 +0000" " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n" " [ Fri, 23 Mar 2012 10:00:25.000000 +0000" " -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n" " [ Fri, 23 Mar 2012 10:00:40.000000 +0000" " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n" " [ Fri, 23 Mar 2012 10:02:00.000000 +0000" " -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n") # Remove all data, verify it's missing self.ok("remove /newton/prep --start 2000-01-01 --end 2020-01-01") self.match("") # no count requested this time self.ok("list --detail /newton/prep") self.match("/newton/prep float32_8\n" + " (no intervals)\n") # Reinsert some data, to verify that no overlaps with deleted # data are reported for minute in ["0", "2"]: self.ok("insert --timestamp -f --rate 120 /newton/prep" " tests/data/prep-20120323T100" + minute) def test_11_destroy(self): # Delete records self.ok("destroy --help") self.fail("destroy") self.contain("too few arguments") self.fail("destroy /no/such/stream") self.contain("No stream at path") self.fail("destroy -R /no/such/stream") self.contain("No stream at path") self.fail("destroy asdfasdf") self.contain("No stream at path") # From previous tests, we have: self.ok("list") self.match("/newton/prep float32_8\n" "/newton/raw uint16_6\n" "/newton/zzz/rawnotch uint16_9\n") # Notice how they're not empty self.ok("list --detail") lines_(self.captured, 7) # Fail to destroy because intervals still present self.fail("destroy /newton/prep") self.contain("all intervals must be removed") self.ok("list --detail") lines_(self.captured, 7) # Destroy for real self.ok("destroy -R /newton/prep") self.ok("list") self.match("/newton/raw uint16_6\n" "/newton/zzz/rawnotch uint16_9\n") self.ok("destroy /newton/zzz/rawnotch") self.ok("list") self.match("/newton/raw uint16_6\n") self.ok("destroy /newton/raw") self.ok("create /newton/raw uint16_6") # Specify --remove with no data self.ok("destroy --remove /newton/raw") self.ok("list") self.match("") # Re-create a previously deleted location, and some new ones rebuild = [ "/newton/prep", "/newton/zzz", "/newton/raw", "/newton/asdf/qwer" ] for path in rebuild: # Create the path self.ok("create " + path + " float32_8") self.ok("list") self.contain(path) # Make sure it was created empty self.ok("list --detail --path " + path) self.contain("(no intervals)") def test_12_unicode(self): # Unicode paths. self.ok("destroy /newton/asdf/qwer") self.ok("destroy /newton/prep") self.ok("destroy /newton/raw") self.ok("destroy /newton/zzz") self.ok(u"create /düsseldorf/raw uint16_6") self.ok("list --detail") self.contain(u"/düsseldorf/raw uint16_6") self.contain("(no intervals)") # Unicode metadata self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'") self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'") self.ok(u"metadata /düsseldorf/raw") self.match(u"α=β ε τ α\nγ=δ\n") self.ok(u"destroy /düsseldorf/raw") def test_13_files(self): # Test BulkData's ability to split into multiple files, # by forcing the file size to be really small. server_stop() server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file "files_per_dir" : 3 }) # Fill data self.ok("create /newton/prep float32_8") os.environ['TZ'] = "UTC" with open("tests/data/prep-20120323T1004-timestamped") as input: self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep", input) # Extract it self.ok("extract /newton/prep --start '2000-01-01' " + "--end '2012-03-23 10:04:01'") lines_(self.captured, 120) self.ok("extract /newton/prep --start '2000-01-01' " + "--end '2022-03-23 10:04:01'") lines_(self.captured, 14400) # Make sure there were lots of files generated in the database # dir nfiles = 0 for (dirpath, dirnames, filenames) in os.walk(testdb): nfiles += len(filenames) assert(nfiles > 500) # Make sure we can restart the server with a different file # size and have it still work server_stop() server_start() self.ok("extract /newton/prep --start '2000-01-01' " + "--end '2022-03-23 10:04:01'") lines_(self.captured, 14400) # Now recreate the data one more time and make sure there are # fewer files. self.ok("destroy --remove /newton/prep") self.fail("destroy /newton/prep") # already destroyed self.ok("create /newton/prep float32_8") os.environ['TZ'] = "UTC" with open("tests/data/prep-20120323T1004-timestamped") as input: self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep", input) nfiles = 0 for (dirpath, dirnames, filenames) in os.walk(testdb): nfiles += len(filenames) lt_(nfiles, 50) self.ok("destroy -R /newton/prep") # destroy again def test_14_remove_files(self): # Test BulkData's ability to remove when data is split into # multiple files. Should be a fairly comprehensive test of # remove functionality. # Also limit max_removals, to cover more functionality. server_stop() server_start(max_removals = 4321, bulkdata_args = { "file_size" : 920, # 23 rows per file "files_per_dir" : 3 }) # Insert data. Just for fun, insert out of order self.ok("create /newton/prep float32_8") os.environ['TZ'] = "UTC" self.ok("insert -t --filename --rate 120 /newton/prep " "tests/data/prep-20120323T1002") self.ok("insert -t --filename --rate 120 /newton/prep " "tests/data/prep-20120323T1000") # Should take up about 2.8 MB here (including directory entries) du_before = nilmdb.utils.diskusage.du(testdb) # Make sure we have the data we expect self.ok("list --detail") self.match("/newton/prep float32_8\n" + " [ Fri, 23 Mar 2012 10:00:00.000000 +0000" " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n" " [ Fri, 23 Mar 2012 10:02:00.000000 +0000" " -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n") # Remove various chunks of prep data and make sure # they're gone. self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") self.match("28800\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:30' " + "--end '23 Mar 2012 10:03:30'") self.match("21600\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:10' " + "--end '23 Mar 2012 10:00:20'") self.match("1200\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:00:05' " + "--end '23 Mar 2012 10:00:25'") self.match("1200\n") self.ok("remove -c /newton/prep " + "--start '23 Mar 2012 10:03:50' " + "--end '23 Mar 2012 10:06:50'") self.match("1200\n") self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") self.match("3600\n") # See the missing chunks in list output self.ok("list --detail") self.match("/newton/prep float32_8\n" + " [ Fri, 23 Mar 2012 10:00:00.000000 +0000" " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n" " [ Fri, 23 Mar 2012 10:00:25.000000 +0000" " -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n" " [ Fri, 23 Mar 2012 10:03:30.000000 +0000" " -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n") # We have 1/8 of the data that we had before, so the file size # should have dropped below 1/4 of what it used to be du_after = nilmdb.utils.diskusage.du(testdb) lt_(du_after, (du_before / 4)) # Remove anything that came from the 10:02 data file self.ok("remove /newton/prep " + "--start '23 Mar 2012 10:02:00' --end '2020-01-01'") # Re-insert 19 lines from that file, then remove them again. # With the specific file_size above, this will cause the last # file in the bulk data storage to be exactly file_size large, # so removing the data should also remove that last file. self.ok("insert --timestamp -f --rate 120 /newton/prep " + "tests/data/prep-20120323T1002-first19lines") self.ok("remove /newton/prep " + "--start '23 Mar 2012 10:02:00' --end '2020-01-01'") # Shut down and restart server, to force nrows to get refreshed. server_stop() server_start() # Re-add the full 10:02 data file. This tests adding new data once # we removed data near the end. self.ok("insert -t -f -r 120 /newton/prep " "tests/data/prep-20120323T1002") # See if we can extract it all self.ok("extract /newton/prep --start 2000-01-01 --end 2020-01-01") lines_(self.captured, 15600) def test_15_intervals_diff(self): # Test "intervals" and "intervals --diff" command. os.environ['TZ'] = "UTC" self.ok("create /diff/1 uint8_1") self.match("") self.ok("intervals /diff/1") self.match("") self.ok("intervals /diff/1 --diff /diff/1") self.match("") self.ok("intervals --diff /diff/1 /diff/1") self.match("") self.fail("intervals /diff/2") self.fail("intervals /diff/1 -d /diff/2") self.ok("create /diff/2 uint8_1") self.ok("intervals -T /diff/1 -d /diff/2") self.match("") self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/1 /dev/null") self.ok("intervals /diff/1") self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -" "> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n") self.ok("intervals /diff/1 -d /diff/2") self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -" "> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n") self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/2 /dev/null") self.ok("intervals /diff/1 -d /diff/2") self.match("") self.ok("insert -s 01-01-2001 -e 01-01-2002 /diff/1 /dev/null") self.ok("insert -s 01-01-2002 -e 01-01-2003 /diff/2 /dev/null") self.ok("intervals /diff/1 -d /diff/2") self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -" "> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n") self.ok("insert -s 01-01-2004 -e 01-01-2005 /diff/1 /dev/null") self.ok("intervals /diff/1 -d /diff/2") self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -" "> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n" "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -" "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n") self.fail("intervals -s 01-01-2003 -e 01-01-2000 /diff/1 -d /diff/2") self.ok("intervals -s 01-01-2003 -e 01-01-2008 /diff/1 -d /diff/2") self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -" "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n") self.ok("destroy -R /diff/1") self.ok("destroy -R /diff/2") def test_16_rename(self): # Test renaming. Force file size smaller so we get more files server_stop() recursive_unlink(testdb) server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file "files_per_dir" : 3 }) # Fill data self.ok("create /newton/prep float32_8") os.environ['TZ'] = "UTC" with open("tests/data/prep-20120323T1004-timestamped") as input: self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep", input) # Extract it self.ok("extract /newton/prep --start '2000-01-01' " + "--end '2012-03-23 10:04:01'") extract_before = self.captured def check_path(*components): # Verify the paths look right on disk seek = os.path.join(testdb, "data", *components) for (dirpath, dirnames, filenames) in os.walk(testdb): if "_format" in filenames: if dirpath == seek: break raise AssertionError("data also found at " + dirpath) else: raise AssertionError("data not found at " + seek) # Verify "list" output self.ok("list") self.match("/" + "/".join(components) + " float32_8\n") # Lots of renames check_path("newton", "prep") self.fail("rename /newton/prep /newton/prep") self.contain("old and new paths are the same") check_path("newton", "prep") self.fail("rename /newton/prep /newton") self.contain("path must contain at least one folder") self.fail("rename /newton/prep /newton/prep/") self.contain("invalid path") self.ok("rename /newton/prep /newton/foo/1") check_path("newton", "foo", "1") self.ok("rename /newton/foo/1 /newton/foo") check_path("newton", "foo") self.ok("rename /newton/foo /totally/different/thing") check_path("totally", "different", "thing") self.ok("rename /totally/different/thing /totally/something") check_path("totally", "something") self.ok("rename /totally/something /totally/something/cool") check_path("totally", "something", "cool") self.ok("rename /totally/something/cool /foo/bar") check_path("foo", "bar") self.ok("create /xxx/yyy/zzz float32_8") self.fail("rename /foo/bar /xxx/yyy") self.contain("subdirs of this path already exist") self.fail("rename /foo/bar /xxx/yyy/zzz") self.contain("stream already exists at this path") self.fail("rename /foo/bar /xxx/yyy/zzz/www") self.contain("path is subdir of existing node") self.ok("rename /foo/bar /xxx/yyy/mmm") self.ok("destroy -R /xxx/yyy/zzz") check_path("xxx", "yyy", "mmm") # Extract it at the final path self.ok("extract /xxx/yyy/mmm --start '2000-01-01' " + "--end '2012-03-23 10:04:01'") eq_(self.captured, extract_before) self.ok("destroy -R /xxx/yyy/mmm") # Make sure temporary rename dirs weren't left around for (dirpath, dirnames, filenames) in os.walk(testdb): if "rename-" in dirpath: raise AssertionError("temporary directories not cleaned up") if "totally" in dirpath or "newton" in dirpath: raise AssertionError("old directories not cleaned up") server_stop() server_start()