nilmdb/tests/test_cmdline.py
Jim Paris 022b50950f Support using a higher initial nrows in bulkdata, for tests
This gives an easy way to get a large values in the database start_pos
and end_pos fields, which is necessary for testing failure modes when
those get too large (e.g. on 32-bit systems).  Adjust tests to make
use of this knob.
2015-01-18 17:49:52 -05:00

1149 lines
44 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# -*- coding: utf-8 -*-
import nilmdb.server
from nilmdb.utils.printf import *
import nilmdb.cmdline
from nilmdb.utils import datetime_tz
import unittest
from nose.tools import *
from nose.tools import assert_raises
import itertools
import os
import re
import sys
import StringIO
import shlex
import warnings
from testutil.helpers import *
testdb = "tests/cmdline-testdb"
def server_start(max_results = None,
max_removals = None,
max_int_removals = None,
bulkdata_args = {}):
global test_server, test_db
# Start web app on a custom port
test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(
testdb,
max_results = max_results,
max_removals = max_removals,
max_int_removals = max_int_removals,
bulkdata_args = bulkdata_args)
test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
port = 32180, stoppable = False,
fast_shutdown = True,
force_traceback = False)
test_server.start(blocking = False)
def server_stop():
global test_server, test_db
# Close web app
test_server.stop()
test_db.close()
def setup_module():
global test_server, test_db
# Clear out DB
recursive_unlink(testdb)
server_start()
def teardown_module():
server_stop()
# Add an encoding property to StringIO so Python will convert Unicode
# properly when writing or reading.
class UTF8StringIO(StringIO.StringIO):
encoding = 'utf-8'
class TestCmdline(object):
def run(self, arg_string, infile=None, outfile=None):
"""Run a cmdline client with the specified argument string,
passing the given input. Save the output and exit code."""
# printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
os.environ['NILMDB_URL'] = "http://localhost:32180/"
class stdio_wrapper:
def __init__(self, stdin, stdout, stderr):
self.io = (stdin, stdout, stderr)
def __enter__(self):
self.saved = ( sys.stdin, sys.stdout, sys.stderr )
( sys.stdin, sys.stdout, sys.stderr ) = self.io
def __exit__(self, type, value, traceback):
( sys.stdin, sys.stdout, sys.stderr ) = self.saved
# Empty input if none provided
if infile is None:
infile = UTF8StringIO("")
# Capture stderr
errfile = UTF8StringIO()
if outfile is None:
# If no output file, capture stdout with stderr
outfile = errfile
with stdio_wrapper(infile, outfile, errfile) as s:
try:
# shlex doesn't support Unicode very well. Encode the
# string as UTF-8 explicitly before splitting.
args = shlex.split(arg_string.encode('utf-8'))
nilmdb.cmdline.Cmdline(args).run()
sys.exit(0)
except SystemExit as e:
exitcode = e.code
captured = nilmdb.utils.unicode.decode(outfile.getvalue())
self.captured = captured
self.exitcode = exitcode
def ok(self, arg_string, infile = None):
self.run(arg_string, infile)
if self.exitcode != 0:
self.dump()
eq_(self.exitcode, 0)
def fail(self, arg_string, infile = None,
exitcode = None, require_error = True):
self.run(arg_string, infile)
if exitcode is not None and self.exitcode != exitcode:
# Wrong exit code
self.dump()
eq_(self.exitcode, exitcode)
if self.exitcode == 0:
# Success, when we wanted failure
self.dump()
ne_(self.exitcode, 0)
# Make sure the output contains the word "error" at the
# beginning of a line, but only if an exitcode wasn't
# specified.
if require_error and not re.search("^error",
self.captured, re.MULTILINE):
raise AssertionError("command failed, but output doesn't "
"contain the string 'error'")
def contain(self, checkstring):
in_(checkstring, self.captured)
def match(self, checkstring):
eq_(checkstring, self.captured)
def matchfile(self, file):
# Captured data should match file contents exactly
with open(file) as f:
contents = f.read()
if contents != self.captured:
print "--- reference file (first 1000 bytes):\n"
print contents[0:1000] + "\n"
print "--- captured data (first 1000 bytes):\n"
print self.captured[0:1000] + "\n"
zipped = itertools.izip_longest(contents, self.captured)
for (n, (a, b)) in enumerate(zipped):
if a != b:
print "--- first difference is at offset", n
print "--- reference:", repr(a)
print "--- captured:", repr(b)
break
raise AssertionError("captured data doesn't match " + file)
def matchfilecount(self, file):
# Last line of captured data should match the number of
# non-commented lines in file
count = 0
with open(file) as f:
for line in f:
if line[0] != '#':
count += 1
eq_(self.captured.splitlines()[-1], sprintf("%d", count))
def dump(self):
printf("-----dump start-----\n%s-----dump end-----\n", self.captured)
def test_01_basic(self):
# help
self.ok("--help")
self.contain("usage:")
# help
self.ok("--version")
ver = self.captured
self.ok("list --version")
eq_(self.captured, ver)
# fail for no args
self.fail("")
# fail for no such option
self.fail("--nosuchoption")
# fail for bad command
self.fail("badcommand")
# try some URL constructions
self.fail("--url http://nosuchurl/ info")
self.contain("error connecting to server")
self.fail("--url nosuchurl info")
self.contain("error connecting to server")
self.fail("-u nosuchurl/foo info")
self.contain("error connecting to server")
self.fail("-u localhost:1 info")
self.contain("error connecting to server")
self.ok("-u localhost:32180 info")
self.ok("info")
# Duplicated arguments should fail, but this isn't implemented
# due to it being kind of a pain with argparse.
if 0:
self.fail("-u url1 -u url2 info")
self.contain("duplicated argument")
self.fail("list --detail --detail")
self.contain("duplicated argument")
self.fail("list --detail --path path1 --path path2")
self.contain("duplicated argument")
self.fail("extract --start 2000-01-01 --start 2001-01-02")
self.contain("duplicated argument")
# Verify that "help command" and "command --help" are identical
# for all commands.
self.fail("")
m = re.search(r"{(.*)}", self.captured)
for command in [""] + m.group(1).split(','):
self.ok(command + " --help")
cap1 = self.captured
self.ok("help " + command)
cap2 = self.captured
self.ok("help " + command + " asdf --url --zxcv -")
cap3 = self.captured
eq_(cap1, cap2)
eq_(cap2, cap3)
def test_02_parsetime(self):
os.environ['TZ'] = "America/New_York"
test = datetime_tz.datetime_tz.now()
u2ts = nilmdb.utils.time.unix_to_timestamp
parse_time = nilmdb.utils.time.parse_time
eq_(parse_time(str(test)), u2ts(test.totimestamp()))
test = u2ts(datetime_tz.datetime_tz.smartparse("20120405 1400-0400").
totimestamp())
eq_(parse_time("hi there 20120405 1400-0400 testing! 123"), test)
eq_(parse_time("20120405 1800 UTC"), test)
eq_(parse_time("20120405 1400-0400 UTC"), test)
for badtime in [ "20120405 1400-9999", "hello", "-", "", "4:00" ]:
with assert_raises(ValueError):
x = parse_time(badtime)
x = parse_time("now")
eq_(parse_time("snapshot-20120405-140000.raw.gz"), test)
eq_(parse_time("prep-20120405T1400"), test)
eq_(parse_time("1333648800.0"), test)
eq_(parse_time("1333648800000000"), test)
eq_(parse_time("@1333648800000000"), test)
eq_(parse_time("min"), nilmdb.utils.time.min_timestamp)
eq_(parse_time("max"), nilmdb.utils.time.max_timestamp)
with assert_raises(ValueError):
parse_time("@hashtag12345")
def test_03_info(self):
self.ok("info")
self.contain("Server URL: http://localhost:32180/")
self.contain("Client version: " + nilmdb.__version__)
self.contain("Server version: " + test_server.version)
self.contain("Server database path")
self.contain("Server disk space used by NilmDB")
self.contain("Server disk space used by other")
self.contain("Server disk space reserved")
self.contain("Server disk space free")
def test_04_createlist(self):
# Basic stream tests, like those in test_client.
# No streams
self.ok("list")
self.match("")
# Bad paths
self.fail("create foo/bar/baz float32_8")
self.contain("paths must start with /")
self.fail("create /foo float32_8")
self.contain("invalid path")
self.fail("create /newton/prep/ float32_8")
self.contain("invalid path")
self.fail("create /newton/_format/prep float32_8")
self.contain("path name is invalid")
self.fail("create /_format/newton/prep float32_8")
self.contain("path name is invalid")
self.fail("create /newton/prep/_format float32_8")
self.contain("path name is invalid")
# Bad layout type
self.fail("create /newton/prep NoSuchLayout")
self.contain("no such layout")
self.fail("create /newton/prep float32_0")
self.contain("no such layout")
self.fail("create /newton/prep float33_1")
self.contain("no such layout")
# Create a few streams
self.ok("create /newton/zzz/rawnotch uint16_9")
self.ok("create /newton/prep float32_8")
self.ok("create /newton/raw uint16_6")
self.ok("create /newton/raw~decim-1234 uint16_6")
# Create a stream that already exists
self.fail("create /newton/raw uint16_6")
self.contain("stream already exists at this path")
# Should not be able to create a stream with another stream as
# its parent
self.fail("create /newton/prep/blah float32_8")
self.contain("path is subdir of existing node")
# Should not be able to create a stream at a location that
# has other nodes as children
self.fail("create /newton/zzz float32_8")
self.contain("subdirs of this path already exist")
# Verify we got those 4 streams and they're returned in
# alphabetical order.
self.ok("list -l")
self.match("/newton/prep float32_8\n"
"/newton/raw uint16_6\n"
"/newton/raw~decim-1234 uint16_6\n"
"/newton/zzz/rawnotch uint16_9\n")
# No decimated streams if -n specified
self.ok("list -n -l")
self.match("/newton/prep float32_8\n"
"/newton/raw uint16_6\n"
"/newton/zzz/rawnotch uint16_9\n")
# Delete that decimated stream
self.ok("destroy /newton/raw~decim-1234")
# Match just one type or one path. Also check
# that --path is optional
self.ok("list --layout /newton/raw")
self.match("/newton/raw uint16_6\n")
# Wildcard matches
self.ok("list *zzz*")
self.match("/newton/zzz/rawnotch\n")
# reversed range
self.fail("list /newton/prep --start 2020-01-01 --end 2000-01-01")
self.contain("start must precede end")
def test_05_metadata(self):
# Set / get metadata
self.fail("metadata")
self.fail("metadata --get")
self.ok("metadata /newton/prep")
self.match("")
self.ok("metadata /newton/raw --get")
self.match("")
self.ok("metadata /newton/prep --set "
"'description=The Data' "
"v_scale=1.234")
self.ok("metadata /newton/raw --update "
"'description=The Data'")
self.ok("metadata /newton/raw --update "
"v_scale=1.234")
# various parsing tests
self.ok("metadata /newton/raw --update foo=")
self.fail("metadata /newton/raw --update =bar")
self.fail("metadata /newton/raw --update foo==bar")
self.fail("metadata /newton/raw --update foo;bar")
# errors
self.fail("metadata /newton/nosuchstream foo=bar")
self.contain("unrecognized arguments")
self.fail("metadata /newton/nosuchstream")
self.contain("No stream at path")
self.fail("metadata /newton/nosuchstream --set foo=bar")
self.contain("No stream at path")
self.fail("metadata /newton/nosuchstream --delete")
self.contain("No stream at path")
self.ok("metadata /newton/prep")
self.match("description=The Data\nv_scale=1.234\n")
self.ok("metadata /newton/prep --get")
self.match("description=The Data\nv_scale=1.234\n")
self.ok("metadata /newton/prep --get descr")
self.match("descr=\n")
self.ok("metadata /newton/prep --get description")
self.match("description=The Data\n")
self.ok("metadata /newton/prep --get description v_scale")
self.match("description=The Data\nv_scale=1.234\n")
self.ok("metadata /newton/prep --set "
"'description=The Data'")
self.ok("metadata /newton/prep --get")
self.match("description=The Data\n")
self.fail("metadata /newton/nosuchpath")
self.contain("No stream at path /newton/nosuchpath")
self.ok("metadata /newton/prep --delete")
self.ok("metadata /newton/prep --get")
self.match("")
self.ok("metadata /newton/prep --set "
"'description=The Data' "
"v_scale=1.234")
self.ok("metadata /newton/prep --delete v_scale")
self.ok("metadata /newton/prep --get")
self.match("description=The Data\n")
self.ok("metadata /newton/prep --set description=")
self.ok("metadata /newton/prep --get")
self.match("")
def test_06_insert(self):
self.ok("insert --help")
self.fail("insert -s 2000 -e 2001 /foo/bar baz")
self.contain("error getting stream info")
self.fail("insert -s 2000 -e 2001 /newton/prep baz")
self.match("error opening input file baz\n")
self.fail("insert /newton/prep --timestamp -f -r 120")
self.contain("error extracting start time")
self.fail("insert /newton/prep --timestamp -r 120")
self.contain("need --start or --filename")
self.fail("insert /newton/prep "
"tests/data/prep-20120323T1000")
# insert pre-timestamped data, with bad times (non-monotonic)
os.environ['TZ'] = "UTC"
with open("tests/data/prep-20120323T1004-badtimes") as input:
self.fail("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
input)
self.contain("error parsing input data")
self.contain("line 7")
self.contain("timestamp is not monotonically increasing")
# insert pre-timestamped data, from stdin
os.environ['TZ'] = "UTC"
with open("tests/data/prep-20120323T1004-timestamped") as input:
self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
input)
# insert data with normal timestamper from filename
os.environ['TZ'] = "UTC"
self.ok("insert --timestamp -f --rate 120 /newton/prep "
"tests/data/prep-20120323T1000")
self.fail("insert -t --filename /newton/prep "
"tests/data/prep-20120323T1002")
self.contain("rate is needed")
self.ok("insert -t --filename --rate 120 /newton/prep "
"tests/data/prep-20120323T1002")
# overlap
os.environ['TZ'] = "UTC"
self.fail("insert --timestamp -f --rate 120 /newton/prep "
"tests/data/prep-20120323T1004")
self.contain("overlap")
# Just to help test more situations -- stop and restart
# the server now. This tests nilmdb's interval caching,
# at the very least.
server_stop()
server_start()
# still an overlap if we specify a different start
os.environ['TZ'] = "America/New_York"
self.fail("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep"
" tests/data/prep-20120323T1004")
self.contain("overlap")
# wrong format
os.environ['TZ'] = "UTC"
self.fail("insert -t -r 120 -f /newton/raw "
"tests/data/prep-20120323T1004")
self.contain("error parsing input data")
self.contain("can't parse value")
# too few rows per line
self.ok("create /insert/test float32_20")
self.fail("insert -t -r 120 -f /insert/test "
"tests/data/prep-20120323T1004")
self.contain("error parsing input data")
self.contain("wrong number of values")
self.ok("destroy /insert/test")
# empty data does nothing
self.ok("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep "
"/dev/null")
# bad start time
self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null")
# Test negative times
self.ok("insert --start @-10000000000 --end @1000000001 /newton/prep"
" tests/data/timestamped")
self.ok("extract -c /newton/prep --start min --end @1000000001")
self.match("8\n")
self.ok("remove /newton/prep --start min --end @1000000001")
def test_07_detail_extended(self):
# Just count the number of lines, it's probably fine
self.ok("list --detail")
lines_(self.captured, 8)
self.ok("list --detail *prep")
lines_(self.captured, 4)
self.ok("list --detail *prep --start='23 Mar 2012 10:02'")
lines_(self.captured, 3)
self.ok("list --detail *prep --start='23 Mar 2012 10:05'")
lines_(self.captured, 2)
self.ok("list --detail *prep --start='23 Mar 2012 10:05:15'")
lines_(self.captured, 2)
self.contain("10:05:15.000")
self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'")
lines_(self.captured, 2)
self.contain("10:05:15.500")
self.ok("list --detail *prep --start='23 Mar 2012 19:05:15.50'")
lines_(self.captured, 2)
self.contain("no intervals")
self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'"
+ " --end='23 Mar 2012 10:05:15.51'")
lines_(self.captured, 2)
self.contain("10:05:15.500")
self.ok("list --detail")
lines_(self.captured, 8)
# Verify the "raw timestamp" output
self.ok("list --detail *prep --timestamp-raw "
"--start='23 Mar 2012 10:05:15.50'")
lines_(self.captured, 2)
self.contain("[ 1332497115500000 -> 1332497160000000 ]")
# bad time
self.fail("list --detail *prep -T --start='9332497115.612'")
# good time
self.ok("list --detail *prep -T --start='1332497115.612'")
lines_(self.captured, 2)
self.contain("[ 1332497115612000 -> 1332497160000000 ]")
# Check --ext output
self.ok("list --ext")
lines_(self.captured, 9)
self.ok("list -E -T")
c = self.contain
c("\n interval extents: 1332496800000000 -> 1332497160000000\n")
c("\n total data: 43200 rows, 359.983336 seconds\n")
c("\n interval extents: (no data)\n")
c("\n total data: 0 rows, 0.000000 seconds\n")
# Misc
self.fail("list --ext --start='23 Mar 2012 10:05:15.50'")
self.contain("--start and --end only make sense with --detail")
def test_08_extract(self):
# nonexistent stream
self.fail("extract /no/such/foo --start 2000-01-01 --end 2020-01-01")
self.contain("error getting stream info")
# reversed range
self.fail("extract -a /newton/prep --start 2020-01-01 --end 2000-01-01")
self.contain("start is after end")
# empty ranges return error 2
self.fail("extract -a /newton/prep " +
"--start '23 Mar 2012 20:00:30' " +
"--end '23 Mar 2012 20:00:31'",
exitcode = 2, require_error = False)
self.contain("no data")
self.fail("extract -a /newton/prep " +
"--start '23 Mar 2012 20:00:30.000001' " +
"--end '23 Mar 2012 20:00:30.000002'",
exitcode = 2, require_error = False)
self.contain("no data")
self.fail("extract -a /newton/prep " +
"--start '23 Mar 2022 10:00:30' " +
"--end '23 Mar 2022 10:00:31'",
exitcode = 2, require_error = False)
self.contain("no data")
# but are ok if we're just counting results
self.ok("extract --count /newton/prep " +
"--start '23 Mar 2012 20:00:30' " +
"--end '23 Mar 2012 20:00:31'")
self.match("0\n")
self.ok("extract -c /newton/prep " +
"--start '23 Mar 2012 20:00:30.000001' " +
"--end '23 Mar 2012 20:00:30.000002'")
self.match("0\n")
# Check various dumps against stored copies of how they should appear
def test(file, start, end, extra=""):
self.ok("extract " + extra + " /newton/prep " +
"--start '23 Mar 2012 " + start + "' " +
"--end '23 Mar 2012 " + end + "'")
self.matchfile("tests/data/extract-" + str(file))
self.ok("extract --count " + extra + " /newton/prep " +
"--start '23 Mar 2012 " + start + "' " +
"--end '23 Mar 2012 " + end + "'")
self.matchfilecount("tests/data/extract-" + str(file))
test(1, "10:00:30", "10:00:31", extra="-a")
test(1, "10:00:30.000000", "10:00:31", extra="-a")
test(2, "10:00:30.000001", "10:00:31")
test(2, "10:00:30.008333", "10:00:31")
test(3, "10:00:30.008333", "10:00:30.008334")
test(3, "10:00:30.008333", "10:00:30.016667")
test(4, "10:00:30.008333", "10:00:30.025")
test(5, "10:00:30", "10:00:31", extra="--annotate --bare")
test(6, "10:00:30", "10:00:31", extra="-b")
test(7, "10:00:30", "10:00:30.999", extra="-a -T")
test(7, "10:00:30", "10:00:30.999", extra="-a --timestamp-raw")
test(8, "10:01:59.9", "10:02:00.1", extra="--markup")
test(8, "10:01:59.9", "10:02:00.1", extra="-m")
# all data put in by tests
self.ok("extract -a /newton/prep --start min --end max")
lines_(self.captured, 43204)
self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
self.match("43200\n")
# test binary mode
self.fail("extract -c -B /newton/prep -s min -e max")
self.contain("binary cannot be combined")
self.fail("extract -m -B /newton/prep -s min -e max")
self.contain("binary cannot be combined")
self.ok("extract -B /newton/prep -s min -e max")
eq_(len(self.captured), 43200 * (8 + 8*4))
# markup for 3 intervals, plus extra markup lines whenever we had
# a "restart" from the nilmdb.stream_extract function
self.ok("extract -m /newton/prep --start 2000-01-01 --end 2020-01-01")
lines_(self.captured, 43210)
def test_09_truncated(self):
# Test truncated responses by overriding the nilmdb max_results
server_stop()
server_start(max_results = 2)
self.ok("list --detail")
lines_(self.captured, 8)
server_stop()
server_start()
def test_10_remove(self):
# Removing data
# Try nonexistent stream
self.fail("remove /no/such/foo --start 2000-01-01 --end 2020-01-01")
self.contain("no stream matched path")
# empty or backward ranges return errors
self.fail("remove /newton/prep --start 2020-01-01 --end 2000-01-01")
self.contain("start must precede end")
self.fail("remove /newton/prep " +
"--start '23 Mar 2012 10:00:30' " +
"--end '23 Mar 2012 10:00:30'")
self.contain("start must precede end")
self.fail("remove /newton/prep " +
"--start '23 Mar 2012 10:00:30.000001' " +
"--end '23 Mar 2012 10:00:30.000001'")
self.contain("start must precede end")
self.fail("remove /newton/prep " +
"--start '23 Mar 2022 10:00:30' " +
"--end '23 Mar 2022 10:00:30'")
self.contain("start must precede end")
# Verbose
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2022 20:00:30' " +
"--end '23 Mar 2022 20:00:31'")
self.match("0\n")
self.ok("remove --count /newton/prep " +
"--start '23 Mar 2022 20:00:30' " +
"--end '23 Mar 2022 20:00:31'")
self.match("0\n")
self.ok("remove -c /newton/prep /newton/pre* " +
"--start '23 Mar 2022 20:00:30' " +
"--end '23 Mar 2022 20:00:31'")
self.match("Removing from /newton/prep\n0\n" +
"Removing from /newton/prep\n0\n")
# Make sure we have the data we expect
self.ok("list -l --detail /newton/prep")
self.match("/newton/prep float32_8\n" +
" [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:04:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:06:00.000000 +0000 ]\n")
# Remove various chunks of prep data and make sure
# they're gone.
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:30' " +
"--end '23 Mar 2012 10:00:40'")
self.match("1200\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:10' " +
"--end '23 Mar 2012 10:00:20'")
self.match("1200\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:05' " +
"--end '23 Mar 2012 10:00:25'")
self.match("1200\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:03:50' " +
"--end '23 Mar 2012 10:06:50'")
self.match("15600\n")
self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
self.match("24000\n")
# See the missing chunks in list output
self.ok("list --layout --detail /newton/prep")
self.match("/newton/prep float32_8\n" +
" [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:00:25.000000 +0000"
" -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:00:40.000000 +0000"
" -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n")
# Remove all data, verify it's missing
self.ok("remove /newton/prep --start 2000-01-01 --end 2020-01-01")
self.match("") # no count requested this time
self.ok("list -l --detail /newton/prep")
self.match("/newton/prep float32_8\n" +
" (no intervals)\n")
# Reinsert some data, to verify that no overlaps with deleted
# data are reported
for minute in ["0", "2"]:
self.ok("insert --timestamp -f --rate 120 /newton/prep"
" tests/data/prep-20120323T100" + minute)
def test_11_destroy(self):
# Delete records
self.ok("destroy --help")
self.fail("destroy")
self.contain("too few arguments")
self.fail("destroy /no/such/stream")
self.contain("no stream matched path")
self.fail("destroy -R /no/such/stream")
self.contain("no stream matched path")
self.fail("destroy asdfasdf")
self.contain("no stream matched path")
# From previous tests, we have:
self.ok("list -l")
self.match("/newton/prep float32_8\n"
"/newton/raw uint16_6\n"
"/newton/zzz/rawnotch uint16_9\n")
# Notice how they're not empty
self.ok("list --detail")
lines_(self.captured, 7)
# Fail to destroy because intervals still present
self.fail("destroy /newton/prep")
self.contain("all intervals must be removed")
self.ok("list --detail")
lines_(self.captured, 7)
# Destroy for real
self.ok("destroy -R /n*/prep")
self.ok("list -l")
self.match("/newton/raw uint16_6\n"
"/newton/zzz/rawnotch uint16_9\n")
self.ok("destroy /newton/zzz/rawnotch")
self.ok("list -l")
self.match("/newton/raw uint16_6\n")
self.ok("destroy /newton/raw")
self.ok("create /newton/raw uint16_6")
# Specify --remove with no data
self.ok("destroy --remove /newton/raw")
self.ok("list")
self.match("")
# Re-create a previously deleted location, and some new ones
rebuild = [ "/newton/prep", "/newton/zzz",
"/newton/raw", "/newton/asdf/qwer" ]
for path in rebuild:
# Create the path
self.ok("create " + path + " float32_8")
self.ok("list")
self.contain(path)
# Make sure it was created empty
self.ok("list --detail " + path)
self.contain("(no intervals)")
def test_12_unicode(self):
# Unicode paths.
self.ok("destroy /newton/asdf/qwer")
self.ok("destroy /newton/prep /newton/raw")
self.ok("destroy /newton/zzz")
self.ok(u"create /düsseldorf/raw uint16_6")
self.ok("list -l --detail")
self.contain(u"/düsseldorf/raw uint16_6")
self.contain("(no intervals)")
# Unicode metadata
self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ'")
self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
self.ok(u"metadata /düsseldorf/raw")
self.match(u"α=β ε τ α\nγ\n")
self.ok(u"destroy /düsseldorf/raw")
def test_13_files(self):
# Test BulkData's ability to split into multiple files,
# by forcing the file size to be really small.
# Also increase the initial nrows, so that start/end positions
# in the database are very large (> 32 bit)
server_stop()
server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
"files_per_dir" : 3,
"initial_nrows" : 2**40 })
# Fill data
self.ok("create /newton/prep float32_8")
os.environ['TZ'] = "UTC"
with open("tests/data/prep-20120323T1004-timestamped") as input:
self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
input)
# Extract it
self.ok("extract /newton/prep --start '2000-01-01' " +
"--end '2012-03-23 10:04:01'")
lines_(self.captured, 120)
self.ok("extract /newton/prep --start '2000-01-01' " +
"--end '2022-03-23 10:04:01'")
lines_(self.captured, 14400)
# Make sure there were lots of files generated in the database
# dir
nfiles = 0
for (dirpath, dirnames, filenames) in os.walk(testdb):
nfiles += len(filenames)
assert(nfiles > 500)
# Make sure we can restart the server with a different file
# size and have it still work
server_stop()
server_start()
self.ok("extract /newton/prep --start '2000-01-01' " +
"--end '2022-03-23 10:04:01'")
lines_(self.captured, 14400)
# Now recreate the data one more time and make sure there are
# fewer files.
self.ok("destroy --remove /newton/prep")
self.fail("destroy /newton/prep") # already destroyed
self.ok("create /newton/prep float32_8")
os.environ['TZ'] = "UTC"
with open("tests/data/prep-20120323T1004-timestamped") as input:
self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
input)
nfiles = 0
for (dirpath, dirnames, filenames) in os.walk(testdb):
nfiles += len(filenames)
lt_(nfiles, 50)
self.ok("destroy -R /newton/prep") # destroy again
def test_14_remove_files(self):
# Limit max_removals, to cover more functionality.
server_stop()
server_start(max_removals = 4321,
bulkdata_args = { "file_size" : 920, # 23 rows per file
"files_per_dir" : 3,
"initial_nrows" : 2**40 })
self.do_remove_files()
self.ok("destroy -R /newton/prep") # destroy again
def test_14b_remove_files_maxint(self):
# Limit max_int_removals, to cover more functionality.
server_stop()
server_start(max_int_removals = 1,
bulkdata_args = { "file_size" : 920, # 23 rows per file
"files_per_dir" : 3,
"initial_nrows" : 2**40 })
self.do_remove_files()
def do_remove_files(self):
# Test BulkData's ability to remove when data is split into
# multiple files. Should be a fairly comprehensive test of
# remove functionality.
# Insert data. Just for fun, insert out of order
self.ok("create /newton/prep float32_8")
os.environ['TZ'] = "UTC"
self.ok("insert -t --filename --rate 120 /newton/prep "
"tests/data/prep-20120323T1002")
self.ok("insert -t --filename --rate 120 /newton/prep "
"tests/data/prep-20120323T1000")
# Should take up about 2.8 MB here (including directory entries)
du_before = nilmdb.utils.diskusage.du(testdb)
# Make sure we have the data we expect
self.ok("list -l --detail")
self.match("/newton/prep float32_8\n" +
" [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n")
# Remove various chunks of prep data and make sure
# they're gone.
self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
self.match("28800\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:30' " +
"--end '23 Mar 2012 10:03:30'")
self.match("21600\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:10' " +
"--end '23 Mar 2012 10:00:20'")
self.match("1200\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:00:05' " +
"--end '23 Mar 2012 10:00:25'")
self.match("1200\n")
self.ok("remove -c /newton/prep " +
"--start '23 Mar 2012 10:03:50' " +
"--end '23 Mar 2012 10:06:50'")
self.match("1200\n")
self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
self.match("3600\n")
# See the missing chunks in list output
self.ok("list -l --detail")
self.match("/newton/prep float32_8\n" +
" [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
" -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:00:25.000000 +0000"
" -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n"
" [ Fri, 23 Mar 2012 10:03:30.000000 +0000"
" -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n")
# We have 1/8 of the data that we had before, so the file size
# should have dropped below 1/4 of what it used to be
du_after = nilmdb.utils.diskusage.du(testdb)
lt_(du_after, (du_before / 4))
# Remove anything that came from the 10:02 data file
self.ok("remove /newton/prep " +
"--start '23 Mar 2012 10:02:00' --end '2020-01-01'")
# Re-insert 19 lines from that file, then remove them again.
# With the specific file_size above, this will cause the last
# file in the bulk data storage to be exactly file_size large,
# so removing the data should also remove that last file.
self.ok("insert --timestamp -f --rate 120 /newton/prep " +
"tests/data/prep-20120323T1002-first19lines")
self.ok("remove /newton/prep " +
"--start '23 Mar 2012 10:02:00' --end '2020-01-01'")
# Shut down and restart server, to force nrows to get refreshed.
server_stop()
server_start()
# Re-add the full 10:02 data file. This tests adding new data once
# we removed data near the end.
self.ok("insert -t -f -r 120 /newton/prep "
"tests/data/prep-20120323T1002")
# See if we can extract it all
self.ok("extract /newton/prep --start 2000-01-01 --end 2020-01-01")
lines_(self.captured, 15600)
def test_15_intervals_diff(self):
# Test "intervals" and "intervals --diff" command.
os.environ['TZ'] = "UTC"
self.ok("create /diff/1 uint8_1")
self.match("")
self.ok("intervals /diff/1")
self.match("")
self.ok("intervals /diff/1 --diff /diff/1")
self.match("")
self.ok("intervals --diff /diff/1 /diff/1")
self.match("")
self.fail("intervals /diff/2")
self.fail("intervals /diff/1 -d /diff/2")
self.ok("create /diff/2 uint8_1")
self.ok("intervals -T /diff/1 -d /diff/2")
self.match("")
self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/1 /dev/null")
self.ok("intervals /diff/1")
self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
"> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n")
self.ok("intervals /diff/1 -d /diff/2")
self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
"> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n")
self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/2 /dev/null")
self.ok("intervals /diff/1 -d /diff/2")
self.match("")
self.ok("insert -s 01-01-2001 -e 01-01-2002 /diff/1 /dev/null")
self.ok("insert -s 01-01-2002 -e 01-01-2003 /diff/2 /dev/null")
self.ok("intervals /diff/1 -d /diff/2")
self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -"
"> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n")
self.ok("insert -s 01-01-2004 -e 01-01-2005 /diff/1 /dev/null")
self.ok("intervals /diff/1 -d /diff/2")
self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -"
"> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n"
"[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
"> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
self.fail("intervals -s 01-01-2003 -e 01-01-2000 /diff/1 -d /diff/2")
self.ok("intervals -s 01-01-2003 -e 01-01-2008 /diff/1 -d /diff/2")
self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
"> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
# optimize
self.ok("insert -s 01-01-2002 -e 01-01-2004 /diff/1 /dev/null")
self.ok("intervals /diff/1")
self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
"> Thu, 01 Jan 2004 00:00:00.000000 +0000 ]\n"
"[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
"> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
self.ok("intervals /diff/1 --optimize")
self.ok("intervals /diff/1 -o")
self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
"> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
self.ok("destroy -R /diff/1")
self.ok("destroy -R /diff/2")
def test_16_rename(self):
# Test renaming. Force file size smaller so we get more files
server_stop()
recursive_unlink(testdb)
server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
"files_per_dir" : 3 })
# Fill data
self.ok("create /newton/prep float32_8")
os.environ['TZ'] = "UTC"
with open("tests/data/prep-20120323T1004-timestamped") as input:
self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
input)
# Extract it
self.ok("extract /newton/prep --start '2000-01-01' " +
"--end '2012-03-23 10:04:01'")
extract_before = self.captured
def check_path(*components):
# Verify the paths look right on disk
seek = os.path.join(testdb, "data", *components)
for (dirpath, dirnames, filenames) in os.walk(testdb):
if "_format" in filenames:
if dirpath == seek:
break
raise AssertionError("data also found at " + dirpath)
else:
raise AssertionError("data not found at " + seek)
# Verify "list" output
self.ok("list -l")
self.match("/" + "/".join(components) + " float32_8\n")
# Lots of renames
check_path("newton", "prep")
self.fail("rename /newton/prep /newton/prep")
self.contain("old and new paths are the same")
check_path("newton", "prep")
self.fail("rename /newton/prep /newton")
self.contain("path must contain at least one folder")
self.fail("rename /newton/prep /newton/prep/")
self.contain("invalid path")
self.ok("rename /newton/prep /newton/foo/1")
check_path("newton", "foo", "1")
self.ok("rename /newton/foo/1 /newton/foo")
check_path("newton", "foo")
self.ok("rename /newton/foo /totally/different/thing")
check_path("totally", "different", "thing")
self.ok("rename /totally/different/thing /totally/something")
check_path("totally", "something")
self.ok("rename /totally/something /totally/something/cool")
check_path("totally", "something", "cool")
self.ok("rename /totally/something/cool /foo/bar")
check_path("foo", "bar")
self.ok("create /xxx/yyy/zzz float32_8")
self.fail("rename /foo/bar /xxx/yyy")
self.contain("subdirs of this path already exist")
self.fail("rename /foo/bar /xxx/yyy/zzz")
self.contain("stream already exists at this path")
self.fail("rename /foo/bar /xxx/yyy/zzz/www")
self.contain("path is subdir of existing node")
self.ok("rename /foo/bar /xxx/yyy/mmm")
self.ok("destroy -R /xxx/yyy/zzz")
check_path("xxx", "yyy", "mmm")
# Extract it at the final path
self.ok("extract /xxx/yyy/mmm --start '2000-01-01' " +
"--end '2012-03-23 10:04:01'")
eq_(self.captured, extract_before)
self.ok("destroy -R /xxx/yyy/mmm")
# Make sure temporary rename dirs weren't left around
for (dirpath, dirnames, filenames) in os.walk(testdb):
if "rename-" in dirpath:
raise AssertionError("temporary directories not cleaned up")
if "totally" in dirpath or "newton" in dirpath:
raise AssertionError("old directories not cleaned up")
server_stop()
server_start()