nilmdb/tests/test_cmdline.py

# -*- coding: utf-8 -*-

import nilmdb.server

from nilmdb.utils.printf import *
import nilmdb.cmdline
from nilmdb.utils import datetime_tz

import unittest
from nose.tools import *
from nose.tools import assert_raises
import itertools
import os
import re
import sys
import StringIO
import shlex
import warnings

from testutil.helpers import *

testdb = "tests/cmdline-testdb"

def server_start(max_results = None,
                 max_removals = None,
                 max_int_removals = None,
                 bulkdata_args = {}):
    global test_server, test_db
    # Start web app on a custom port
    test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(
        testdb,
        max_results = max_results,
        max_removals = max_removals,
        max_int_removals = max_int_removals,
        bulkdata_args = bulkdata_args)
    test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
                                       port = 32180, stoppable = False,
                                       fast_shutdown = True,
                                       force_traceback = False)
    test_server.start(blocking = False)

def server_stop():
    global test_server, test_db
    # Close web app
    test_server.stop()
    test_db.close()

def setup_module():
    global test_server, test_db
    # Clear out DB
    recursive_unlink(testdb)
    server_start()

def teardown_module():
    server_stop()

# Add an encoding property to StringIO so Python will convert Unicode
# properly when writing or reading.
class UTF8StringIO(StringIO.StringIO):
    encoding = 'utf-8'

class TestCmdline(object):

    def run(self, arg_string, infile=None, outfile=None):
        """Run a cmdline client with the specified argument string,
        passing the given input.  Save the output and exit code."""
        # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
        os.environ['NILMDB_URL'] = "http://localhost:32180/"
        class stdio_wrapper:
            def __init__(self, stdin, stdout, stderr):
                self.io = (stdin, stdout, stderr)
            def __enter__(self):
                self.saved = ( sys.stdin, sys.stdout, sys.stderr )
                ( sys.stdin, sys.stdout, sys.stderr ) = self.io
            def __exit__(self, type, value, traceback):
                ( sys.stdin, sys.stdout, sys.stderr ) = self.saved
        # Empty input if none provided
        if infile is None:
            infile = UTF8StringIO("")
        # Capture stderr
        errfile = UTF8StringIO()
        if outfile is None:
            # If no output file, capture stdout with stderr
            outfile = errfile
        with stdio_wrapper(infile, outfile, errfile) as s:
            try:
                # shlex doesn't support Unicode very well.  Encode the
                # string as UTF-8 explicitly before splitting.
                args = shlex.split(arg_string.encode('utf-8'))
                nilmdb.cmdline.Cmdline(args).run()
                sys.exit(0)
            except SystemExit as e:
                exitcode = e.code
        captured = nilmdb.utils.unicode.decode(outfile.getvalue())
        self.captured = captured
        self.exitcode = exitcode

    def ok(self, arg_string, infile = None):
        self.run(arg_string, infile)
        if self.exitcode != 0:
            self.dump()
            eq_(self.exitcode, 0)

    def fail(self, arg_string, infile = None,
             exitcode = None, require_error = True):
        self.run(arg_string, infile)
        if exitcode is not None and self.exitcode != exitcode:
            # Wrong exit code
            self.dump()
            eq_(self.exitcode, exitcode)
        if self.exitcode == 0:
            # Success, when we wanted failure
            self.dump()
            ne_(self.exitcode, 0)
        # Make sure the output contains the word "error" at the
        # beginning of a line, but only if an exitcode wasn't
        # specified.
        if require_error and not re.search("^error",
                                           self.captured, re.MULTILINE):
            raise AssertionError("command failed, but output doesn't "
                                 "contain the string 'error'")

    def contain(self, checkstring):
        in_(checkstring, self.captured)

    def match(self, checkstring):
        eq_(checkstring, self.captured)

    def matchfile(self, file):
        # Captured data should match file contents exactly
        with open(file) as f:
            contents = f.read()
            if contents != self.captured:
                print "--- reference file (first 1000 bytes):\n"
                print contents[0:1000] + "\n"
                print "--- captured data (first 1000 bytes):\n"
                print self.captured[0:1000] + "\n"
                zipped = itertools.izip_longest(contents, self.captured)
                for (n, (a, b)) in enumerate(zipped):
                    if a != b:
                        print "--- first difference is at offset", n
                        print "--- reference:", repr(a)
                        print "---  captured:", repr(b)
                        break
                raise AssertionError("captured data doesn't match " + file)

    def matchfilecount(self, file):
        # Last line of captured data should match the number of
        # non-commented lines in file
        count = 0
        with open(file) as f:
            for line in f:
                if line[0] != '#':
                    count += 1
        eq_(self.captured.splitlines()[-1], sprintf("%d", count))

    def dump(self):
        printf("-----dump start-----\n%s-----dump end-----\n", self.captured)

    def test_01_basic(self):

        # help
        self.ok("--help")
        self.contain("usage:")

        # help
        self.ok("--version")
        ver = self.captured
        self.ok("list --version")
        eq_(self.captured, ver)

        # fail for no args
        self.fail("")

        # fail for no such option
        self.fail("--nosuchoption")

        # fail for bad command
        self.fail("badcommand")

        # try some URL constructions
        self.fail("--url http://nosuchurl/ info")
        self.contain("error connecting to server")

        self.fail("--url nosuchurl info")
        self.contain("error connecting to server")

        self.fail("-u nosuchurl/foo info")
        self.contain("error connecting to server")

        self.fail("-u localhost:1 info")
        self.contain("error connecting to server")

        self.ok("-u localhost:32180 info")
        self.ok("info")

        # Duplicated arguments should fail, but this isn't implemented
        # due to it being kind of a pain with argparse.
        if 0:
            self.fail("-u url1 -u url2 info")
            self.contain("duplicated argument")

            self.fail("list --detail --detail")
            self.contain("duplicated argument")

            self.fail("list --detail --path path1 --path path2")
            self.contain("duplicated argument")

            self.fail("extract --start 2000-01-01 --start 2001-01-02")
            self.contain("duplicated argument")

        # Verify that "help command" and "command --help" are identical
        # for all commands.
        self.fail("")
        m = re.search(r"{(.*)}", self.captured)
        for command in [""] + m.group(1).split(','):
            self.ok(command + " --help")
            cap1 = self.captured
            self.ok("help " + command)
            cap2 = self.captured
            self.ok("help " + command + " asdf --url --zxcv -")
            cap3 = self.captured
            eq_(cap1, cap2)
            eq_(cap2, cap3)

    def test_02_parsetime(self):
        os.environ['TZ'] = "America/New_York"
        test = datetime_tz.datetime_tz.now()
        u2ts = nilmdb.utils.time.unix_to_timestamp
        parse_time = nilmdb.utils.time.parse_time
        eq_(parse_time(str(test)), u2ts(test.totimestamp()))
        test = u2ts(datetime_tz.datetime_tz.smartparse("20120405 1400-0400").
                    totimestamp())
        eq_(parse_time("hi there 20120405 1400-0400 testing! 123"), test)
        eq_(parse_time("20120405 1800 UTC"), test)
        eq_(parse_time("20120405 1400-0400 UTC"), test)
        for badtime in [ "20120405 1400-9999", "hello", "-", "", "4:00" ]:
            with assert_raises(ValueError):
                x = parse_time(badtime)
        x = parse_time("now")
        eq_(parse_time("snapshot-20120405-140000.raw.gz"), test)
        eq_(parse_time("prep-20120405T1400"), test)
        eq_(parse_time("1333648800.0"), test)
        eq_(parse_time("1333648800000000"), test)
        eq_(parse_time("@1333648800000000"), test)
        eq_(parse_time("min"), nilmdb.utils.time.min_timestamp)
        eq_(parse_time("max"), nilmdb.utils.time.max_timestamp)
        with assert_raises(ValueError):
            parse_time("@hashtag12345")

    def test_03_info(self):
        self.ok("info")
        self.contain("Server URL: http://localhost:32180/")
        self.contain("Client version: " + nilmdb.__version__)
        self.contain("Server version: " + test_server.version)
        self.contain("Server database path")
        self.contain("Server disk space used by NilmDB")
        self.contain("Server disk space used by other")
        self.contain("Server disk space reserved")
        self.contain("Server disk space free")

    def test_04_createlist(self):
        # Basic stream tests, like those in test_client.

        # No streams
        self.ok("list")
        self.match("")

        # Bad paths
        self.fail("create foo/bar/baz float32_8")
        self.contain("paths must start with /")

        self.fail("create /foo float32_8")
        self.contain("invalid path")
        self.fail("create /newton/prep/ float32_8")
        self.contain("invalid path")

        self.fail("create /newton/_format/prep float32_8")
        self.contain("path name is invalid")
        self.fail("create /_format/newton/prep float32_8")
        self.contain("path name is invalid")
        self.fail("create /newton/prep/_format float32_8")
        self.contain("path name is invalid")

        # Bad layout type
        self.fail("create /newton/prep NoSuchLayout")
        self.contain("no such layout")
        self.fail("create /newton/prep float32_0")
        self.contain("no such layout")
        self.fail("create /newton/prep float33_1")
        self.contain("no such layout")

        # Create a few streams
        self.ok("create /newton/zzz/rawnotch uint16_9")
        self.ok("create /newton/prep float32_8")
        self.ok("create /newton/raw uint16_6")
        self.ok("create /newton/raw~decim-1234 uint16_6")

        # Create a stream that already exists
        self.fail("create /newton/raw uint16_6")
        self.contain("stream already exists at this path")

        # Should not be able to create a stream with another stream as
        # its parent
        self.fail("create /newton/prep/blah float32_8")
        self.contain("path is subdir of existing node")

        # Should not be able to create a stream at a location that
        # has other nodes as children
        self.fail("create /newton/zzz float32_8")
        self.contain("subdirs of this path already exist")

        # Verify we got those 4 streams and they're returned in
        # alphabetical order.
        self.ok("list -l")
        self.match("/newton/prep float32_8\n"
                   "/newton/raw uint16_6\n"
                   "/newton/raw~decim-1234 uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")

        # No decimated streams if -n specified
        self.ok("list -n -l")
        self.match("/newton/prep float32_8\n"
                   "/newton/raw uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")

        # Delete that decimated stream
        self.ok("destroy /newton/raw~decim-1234")

        # Match just one type or one path.  Also check
        # that --path is optional
        self.ok("list --layout /newton/raw")
        self.match("/newton/raw uint16_6\n")

        # Wildcard matches
        self.ok("list *zzz*")
        self.match("/newton/zzz/rawnotch\n")

        # reversed range
        self.fail("list /newton/prep --start 2020-01-01 --end 2000-01-01")
        self.contain("start must precede end")

    def test_05_metadata(self):
        # Set / get metadata
        self.fail("metadata")
        self.fail("metadata --get")

        self.ok("metadata /newton/prep")
        self.match("")

        self.ok("metadata /newton/raw --get")
        self.match("")

        self.ok("metadata /newton/prep --set "
                "'description=The Data' "
                "v_scale=1.234")
        self.ok("metadata /newton/raw --update "
                "'description=The Data'")
        self.ok("metadata /newton/raw --update "
                "v_scale=1.234")

        # various parsing tests
        self.ok("metadata /newton/raw --update foo=")
        self.fail("metadata /newton/raw --update =bar")
        self.fail("metadata /newton/raw --update foo==bar")
        self.fail("metadata /newton/raw --update foo;bar")

        # errors
        self.fail("metadata /newton/nosuchstream foo=bar")
        self.contain("unrecognized arguments")
        self.fail("metadata /newton/nosuchstream")
        self.contain("No stream at path")
        self.fail("metadata /newton/nosuchstream --set foo=bar")
        self.contain("No stream at path")
        self.fail("metadata /newton/nosuchstream --delete")
        self.contain("No stream at path")

        self.ok("metadata /newton/prep")
        self.match("description=The Data\nv_scale=1.234\n")

        self.ok("metadata /newton/prep --get")
        self.match("description=The Data\nv_scale=1.234\n")

        self.ok("metadata /newton/prep --get descr")
        self.match("descr=\n")

        self.ok("metadata /newton/prep --get description")
        self.match("description=The Data\n")

        self.ok("metadata /newton/prep --get description v_scale")
        self.match("description=The Data\nv_scale=1.234\n")

        self.ok("metadata /newton/prep --set "
                "'description=The Data'")

        self.ok("metadata /newton/prep --get")
        self.match("description=The Data\n")

        self.fail("metadata /newton/nosuchpath")
        self.contain("No stream at path /newton/nosuchpath")

        self.ok("metadata /newton/prep --delete")
        self.ok("metadata /newton/prep --get")
        self.match("")
        self.ok("metadata /newton/prep --set "
                "'description=The Data' "
                "v_scale=1.234")
        self.ok("metadata /newton/prep --delete v_scale")
        self.ok("metadata /newton/prep --get")
        self.match("description=The Data\n")
        self.ok("metadata /newton/prep --set description=")
        self.ok("metadata /newton/prep --get")
        self.match("")

    def test_06_insert(self):
        self.ok("insert --help")

        self.fail("insert -s 2000 -e 2001 /foo/bar baz")
        self.contain("error getting stream info")

        self.fail("insert -s 2000 -e 2001 /newton/prep baz")
        self.match("error opening input file baz\n")

        self.fail("insert /newton/prep --timestamp -f -r 120")
        self.contain("error extracting start time")

        self.fail("insert /newton/prep --timestamp -r 120")
        self.contain("need --start or --filename")

        self.fail("insert /newton/prep "
                  "tests/data/prep-20120323T1000")

        # insert pre-timestamped data, with bad times (non-monotonic)
        os.environ['TZ'] = "UTC"
        with open("tests/data/prep-20120323T1004-badtimes") as input:
            self.fail("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
                      input)
            self.contain("error parsing input data")
            self.contain("line 7")
            self.contain("timestamp is not monotonically increasing")

        # insert pre-timestamped data, from stdin
        os.environ['TZ'] = "UTC"
        with open("tests/data/prep-20120323T1004-timestamped") as input:
            self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
                    input)

        # insert data with normal timestamper from filename
        os.environ['TZ'] = "UTC"
        self.ok("insert --timestamp -f --rate 120 /newton/prep "
                "tests/data/prep-20120323T1000")
        self.fail("insert -t --filename /newton/prep "
                "tests/data/prep-20120323T1002")
        self.contain("rate is needed")
        self.ok("insert -t --filename --rate 120 /newton/prep "
                "tests/data/prep-20120323T1002")

        # overlap
        os.environ['TZ'] = "UTC"
        self.fail("insert --timestamp -f --rate 120 /newton/prep "
                  "tests/data/prep-20120323T1004")
        self.contain("overlap")

        # Just to help test more situations -- stop and restart
        # the server now.  This tests nilmdb's interval caching,
        # at the very least.
        server_stop()
        server_start()

        # still an overlap if we specify a different start
        os.environ['TZ'] = "America/New_York"
        self.fail("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep"
                  " tests/data/prep-20120323T1004")
        self.contain("overlap")

        # wrong format
        os.environ['TZ'] = "UTC"
        self.fail("insert -t -r 120 -f /newton/raw "
                  "tests/data/prep-20120323T1004")
        self.contain("error parsing input data")
        self.contain("can't parse value")

        # too few rows per line
        self.ok("create /insert/test float32_20")
        self.fail("insert -t -r 120 -f /insert/test "
                  "tests/data/prep-20120323T1004")
        self.contain("error parsing input data")
        self.contain("wrong number of values")
        self.ok("destroy /insert/test")

        # empty data does nothing
        self.ok("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep "
                "/dev/null")

        # bad start time
        self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null")

        # Test negative times
        self.ok("insert --start @-10000000000 --end @1000000001 /newton/prep"
                " tests/data/timestamped")
        self.ok("extract -c /newton/prep --start min --end @1000000001")
        self.match("8\n")
        self.ok("remove /newton/prep --start min --end @1000000001")

    def test_07_detail_extended(self):
        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
        lines_(self.captured, 8)

        self.ok("list --detail *prep")
        lines_(self.captured, 4)

        self.ok("list --detail *prep --start='23 Mar 2012 10:02'")
        lines_(self.captured, 3)

        self.ok("list --detail *prep --start='23 Mar 2012 10:05'")
        lines_(self.captured, 2)

        self.ok("list --detail *prep --start='23 Mar 2012 10:05:15'")
        lines_(self.captured, 2)
        self.contain("10:05:15.000")

        self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'")
        lines_(self.captured, 2)
        self.contain("10:05:15.500")

        self.ok("list --detail *prep --start='23 Mar 2012 19:05:15.50'")
        lines_(self.captured, 2)
        self.contain("no intervals")

        self.ok("list --detail *prep --start='23 Mar 2012 10:05:15.50'"
                + " --end='23 Mar 2012 10:05:15.51'")
        lines_(self.captured, 2)
        self.contain("10:05:15.500")

        self.ok("list --detail")
        lines_(self.captured, 8)

        # Verify the "raw timestamp" output
        self.ok("list --detail *prep --timestamp-raw "
                "--start='23 Mar 2012 10:05:15.50'")
        lines_(self.captured, 2)
        self.contain("[ 1332497115500000 -> 1332497160000000 ]")

        # bad time
        self.fail("list --detail *prep -T --start='9332497115.612'")
        # good time
        self.ok("list --detail *prep -T --start='1332497115.612'")
        lines_(self.captured, 2)
        self.contain("[ 1332497115612000 -> 1332497160000000 ]")

        # Check --ext output
        self.ok("list --ext")
        lines_(self.captured, 9)

        self.ok("list -E -T")
        c = self.contain
        c("\n  interval extents: 1332496800000000 -> 1332497160000000\n")
        c("\n        total data: 43200 rows, 359.983336 seconds\n")
        c("\n  interval extents: (no data)\n")
        c("\n        total data: 0 rows, 0.000000 seconds\n")

        # Misc
        self.fail("list --ext --start='23 Mar 2012 10:05:15.50'")
        self.contain("--start and --end only make sense with --detail")

    def test_08_extract(self):
        # nonexistent stream
        self.fail("extract /no/such/foo --start 2000-01-01 --end 2020-01-01")
        self.contain("error getting stream info")

        # reversed range
        self.fail("extract -a /newton/prep --start 2020-01-01 --end 2000-01-01")
        self.contain("start is after end")

        # empty ranges return error 2
        self.fail("extract -a /newton/prep " +
                  "--start '23 Mar 2012 20:00:30' " +
                  "--end '23 Mar 2012 20:00:31'",
                  exitcode = 2, require_error = False)
        self.contain("no data")
        self.fail("extract -a /newton/prep " +
                  "--start '23 Mar 2012 20:00:30.000001' " +
                  "--end '23 Mar 2012 20:00:30.000002'",
                  exitcode = 2, require_error = False)
        self.contain("no data")
        self.fail("extract -a /newton/prep " +
                  "--start '23 Mar 2022 10:00:30' " +
                  "--end '23 Mar 2022 10:00:31'",
                  exitcode = 2, require_error = False)
        self.contain("no data")

        # but are ok if we're just counting results
        self.ok("extract --count /newton/prep " +
                "--start '23 Mar 2012 20:00:30' " +
                "--end '23 Mar 2012 20:00:31'")
        self.match("0\n")
        self.ok("extract -c /newton/prep " +
                "--start '23 Mar 2012 20:00:30.000001' " +
                "--end '23 Mar 2012 20:00:30.000002'")
        self.match("0\n")

        # Check various dumps against stored copies of how they should appear
        def test(file, start, end, extra=""):
            self.ok("extract " + extra + " /newton/prep " +
                    "--start '23 Mar 2012 " + start + "' " +
                    "--end '23 Mar 2012 " + end + "'")
            self.matchfile("tests/data/extract-" + str(file))
            self.ok("extract --count " + extra + " /newton/prep " +
                    "--start '23 Mar 2012 " + start + "' " +
                    "--end '23 Mar 2012 " + end + "'")
            self.matchfilecount("tests/data/extract-" + str(file))
        test(1, "10:00:30", "10:00:31", extra="-a")
        test(1, "10:00:30.000000", "10:00:31", extra="-a")
        test(2, "10:00:30.000001", "10:00:31")
        test(2, "10:00:30.008333", "10:00:31")
        test(3, "10:00:30.008333", "10:00:30.008334")
        test(3, "10:00:30.008333", "10:00:30.016667")
        test(4, "10:00:30.008333", "10:00:30.025")
        test(5, "10:00:30", "10:00:31", extra="--annotate --bare")
        test(6, "10:00:30", "10:00:31", extra="-b")
        test(7, "10:00:30", "10:00:30.999", extra="-a -T")
        test(7, "10:00:30", "10:00:30.999", extra="-a --timestamp-raw")
        test(8, "10:01:59.9", "10:02:00.1", extra="--markup")
        test(8, "10:01:59.9", "10:02:00.1", extra="-m")

        # all data put in by tests
        self.ok("extract -a /newton/prep --start min --end max")
        lines_(self.captured, 43204)
        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("43200\n")

        # test binary mode
        self.fail("extract -c -B /newton/prep -s min -e max")
        self.contain("binary cannot be combined")
        self.fail("extract -m -B /newton/prep -s min -e max")
        self.contain("binary cannot be combined")
        self.ok("extract -B /newton/prep -s min -e max")
        eq_(len(self.captured), 43200 * (8 + 8*4))

        # markup for 3 intervals, plus extra markup lines whenever we had
        # a "restart" from the nilmdb.stream_extract function
        self.ok("extract -m /newton/prep --start 2000-01-01 --end 2020-01-01")
        lines_(self.captured, 43210)

    def test_09_truncated(self):
        # Test truncated responses by overriding the nilmdb max_results
        server_stop()
        server_start(max_results = 2)
        self.ok("list --detail")
        lines_(self.captured, 8)
        server_stop()
        server_start()

    def test_10_remove(self):
        # Removing data

        # Try nonexistent stream
        self.fail("remove /no/such/foo --start 2000-01-01 --end 2020-01-01")
        self.contain("no stream matched path")

        # empty or backward ranges return errors
        self.fail("remove /newton/prep --start 2020-01-01 --end 2000-01-01")
        self.contain("start must precede end")

        self.fail("remove /newton/prep " +
                  "--start '23 Mar 2012 10:00:30' " +
                  "--end '23 Mar 2012 10:00:30'")
        self.contain("start must precede end")
        self.fail("remove /newton/prep " +
                  "--start '23 Mar 2012 10:00:30.000001' " +
                  "--end '23 Mar 2012 10:00:30.000001'")
        self.contain("start must precede end")
        self.fail("remove /newton/prep " +
                  "--start '23 Mar 2022 10:00:30' " +
                  "--end '23 Mar 2022 10:00:30'")
        self.contain("start must precede end")

        # Verbose
        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2022 20:00:30' " +
                "--end '23 Mar 2022 20:00:31'")
        self.match("0\n")
        self.ok("remove --count /newton/prep " +
                "--start '23 Mar 2022 20:00:30' " +
                "--end '23 Mar 2022 20:00:31'")
        self.match("0\n")
        self.ok("remove -c /newton/prep /newton/pre* " +
                "--start '23 Mar 2022 20:00:30' " +
                "--end '23 Mar 2022 20:00:31'")
        self.match("Removing from /newton/prep\n0\n" +
                   "Removing from /newton/prep\n0\n")

        # Make sure we have the data we expect
        self.ok("list -l --detail /newton/prep")
        self.match("/newton/prep float32_8\n" +
                   "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:04:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:06:00.000000 +0000 ]\n")

        # Remove various chunks of prep data and make sure
        # they're gone.
        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:30' " +
                "--end '23 Mar 2012 10:00:40'")
        self.match("1200\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:10' " +
                "--end '23 Mar 2012 10:00:20'")
        self.match("1200\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:05' " +
                "--end '23 Mar 2012 10:00:25'")
        self.match("1200\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:03:50' " +
                "--end '23 Mar 2012 10:06:50'")
        self.match("15600\n")

        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("24000\n")

        # See the missing chunks in list output
        self.ok("list --layout --detail /newton/prep")
        self.match("/newton/prep float32_8\n" +
                   "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:00:25.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:00:40.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n")

        # Remove all data, verify it's missing
        self.ok("remove /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("")  # no count requested this time
        self.ok("list -l --detail /newton/prep")
        self.match("/newton/prep float32_8\n" +
                   "  (no intervals)\n")

        # Reinsert some data, to verify that no overlaps with deleted
        # data are reported
        for minute in ["0", "2"]:
            self.ok("insert --timestamp -f --rate 120 /newton/prep"
                    " tests/data/prep-20120323T100" + minute)

    def test_11_destroy(self):
        # Delete records
        self.ok("destroy --help")

        self.fail("destroy")
        self.contain("too few arguments")

        self.fail("destroy /no/such/stream")
        self.contain("no stream matched path")

        self.fail("destroy -R /no/such/stream")
        self.contain("no stream matched path")

        self.fail("destroy asdfasdf")
        self.contain("no stream matched path")

        # From previous tests, we have:
        self.ok("list -l")
        self.match("/newton/prep float32_8\n"
                   "/newton/raw uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")

        # Notice how they're not empty
        self.ok("list --detail")
        lines_(self.captured, 7)

        # Fail to destroy because intervals still present
        self.fail("destroy /newton/prep")
        self.contain("all intervals must be removed")
        self.ok("list --detail")
        lines_(self.captured, 7)

        # Destroy for real
        self.ok("destroy -R /n*/prep")
        self.ok("list -l")
        self.match("/newton/raw uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")

        self.ok("destroy /newton/zzz/rawnotch")
        self.ok("list -l")
        self.match("/newton/raw uint16_6\n")

        self.ok("destroy /newton/raw")
        self.ok("create /newton/raw uint16_6")
        # Specify --remove with no data
        self.ok("destroy --remove /newton/raw")
        self.ok("list")
        self.match("")

        # Re-create a previously deleted location, and some new ones
        rebuild = [ "/newton/prep", "/newton/zzz",
                    "/newton/raw", "/newton/asdf/qwer" ]
        for path in rebuild:
            # Create the path
            self.ok("create " + path + " float32_8")
            self.ok("list")
            self.contain(path)
            # Make sure it was created empty
            self.ok("list --detail " + path)
            self.contain("(no intervals)")

    def test_12_unicode(self):
        # Unicode paths.
        self.ok("destroy /newton/asdf/qwer")
        self.ok("destroy /newton/prep /newton/raw")
        self.ok("destroy /newton/zzz")

        self.ok(u"create /düsseldorf/raw uint16_6")
        self.ok("list -l --detail")
        self.contain(u"/düsseldorf/raw uint16_6")
        self.contain("(no intervals)")

        # Unicode metadata
        self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'")
        self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
        self.ok(u"metadata /düsseldorf/raw")
        self.match(u"α=β ε τ α\nγ=δ\n")

        self.ok(u"destroy /düsseldorf/raw")

    def test_13_files(self):
        # Test BulkData's ability to split into multiple files,
        # by forcing the file size to be really small.
        # Also increase the initial nrows, so that start/end positions
        # in the database are very large (> 32 bit)
        server_stop()
        server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
                                       "files_per_dir" : 3,
                                       "initial_nrows" : 2**40 })

        # Fill data
        self.ok("create /newton/prep float32_8")
        os.environ['TZ'] = "UTC"
        with open("tests/data/prep-20120323T1004-timestamped") as input:
            self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
                    input)

        # Extract it
        self.ok("extract /newton/prep --start '2000-01-01' " +
                "--end '2012-03-23 10:04:01'")
        lines_(self.captured, 120)
        self.ok("extract /newton/prep --start '2000-01-01' " +
                "--end '2022-03-23 10:04:01'")
        lines_(self.captured, 14400)

        # Make sure there were lots of files generated in the database
        # dir
        nfiles = 0
        for (dirpath, dirnames, filenames) in os.walk(testdb):
            nfiles += len(filenames)
        assert(nfiles > 500)

        # Make sure we can restart the server with a different file
        # size and have it still work
        server_stop()
        server_start()
        self.ok("extract /newton/prep --start '2000-01-01' " +
                "--end '2022-03-23 10:04:01'")
        lines_(self.captured, 14400)

        # Now recreate the data one more time and make sure there are
        # fewer files.
        self.ok("destroy --remove /newton/prep")
        self.fail("destroy /newton/prep") # already destroyed
        self.ok("create /newton/prep float32_8")
        os.environ['TZ'] = "UTC"
        with open("tests/data/prep-20120323T1004-timestamped") as input:
            self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
                    input)
        nfiles = 0
        for (dirpath, dirnames, filenames) in os.walk(testdb):
            nfiles += len(filenames)
        lt_(nfiles, 50)
        self.ok("destroy -R /newton/prep") # destroy again

    def test_14_remove_files(self):
        # Limit max_removals, to cover more functionality.
        server_stop()
        server_start(max_removals = 4321,
                     bulkdata_args = { "file_size" : 920, # 23 rows per file
                                       "files_per_dir" : 3,
                                       "initial_nrows" : 2**40 })
        self.do_remove_files()
        self.ok("destroy -R /newton/prep") # destroy again

    def test_14b_remove_files_maxint(self):
        # Limit max_int_removals, to cover more functionality.
        server_stop()
        server_start(max_int_removals = 1,
                     bulkdata_args = { "file_size" : 920, # 23 rows per file
                                       "files_per_dir" : 3,
                                       "initial_nrows" : 2**40 })
        self.do_remove_files()

    def do_remove_files(self):
        # Test BulkData's ability to remove when data is split into
        # multiple files.  Should be a fairly comprehensive test of
        # remove functionality.

        # Insert data.  Just for fun, insert out of order
        self.ok("create /newton/prep float32_8")
        os.environ['TZ'] = "UTC"
        self.ok("insert -t --filename --rate 120 /newton/prep "
                "tests/data/prep-20120323T1002")
        self.ok("insert -t --filename --rate 120 /newton/prep "
                "tests/data/prep-20120323T1000")

        # Should take up about 2.8 MB here (including directory entries)
        du_before = nilmdb.utils.diskusage.du(testdb)

        # Make sure we have the data we expect
        self.ok("list -l --detail")
        self.match("/newton/prep float32_8\n" +
                   "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:01:59.991668 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:02:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:03:59.991668 +0000 ]\n")

        # Remove various chunks of prep data and make sure
        # they're gone.
        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("28800\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:30' " +
                "--end '23 Mar 2012 10:03:30'")
        self.match("21600\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:10' " +
                "--end '23 Mar 2012 10:00:20'")
        self.match("1200\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:00:05' " +
                "--end '23 Mar 2012 10:00:25'")
        self.match("1200\n")

        self.ok("remove -c /newton/prep " +
                "--start '23 Mar 2012 10:03:50' " +
                "--end '23 Mar 2012 10:06:50'")
        self.match("1200\n")

        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("3600\n")

        # See the missing chunks in list output
        self.ok("list -l --detail")
        self.match("/newton/prep float32_8\n" +
                   "  [ Fri, 23 Mar 2012 10:00:00.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:00:05.000000 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:00:25.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:00:30.000000 +0000 ]\n"
                   "  [ Fri, 23 Mar 2012 10:03:30.000000 +0000"
                   " -> Fri, 23 Mar 2012 10:03:50.000000 +0000 ]\n")

        # We have 1/8 of the data that we had before, so the file size
        # should have dropped below 1/4 of what it used to be
        du_after = nilmdb.utils.diskusage.du(testdb)
        lt_(du_after, (du_before / 4))

        # Remove anything that came from the 10:02 data file
        self.ok("remove /newton/prep " +
                "--start '23 Mar 2012 10:02:00' --end '2020-01-01'")

        # Re-insert 19 lines from that file, then remove them again.
        # With the specific file_size above, this will cause the last
        # file in the bulk data storage to be exactly file_size large,
        # so removing the data should also remove that last file.
        self.ok("insert --timestamp -f --rate 120 /newton/prep " +
                "tests/data/prep-20120323T1002-first19lines")
        self.ok("remove /newton/prep " +
                "--start '23 Mar 2012 10:02:00' --end '2020-01-01'")

        # Shut down and restart server, to force nrows to get refreshed.
        server_stop()
        server_start()

        # Re-add the full 10:02 data file.  This tests adding new data once
        # we removed data near the end.
        self.ok("insert -t -f -r 120 /newton/prep "
                "tests/data/prep-20120323T1002")

        # See if we can extract it all
        self.ok("extract /newton/prep --start 2000-01-01 --end 2020-01-01")
        lines_(self.captured, 15600)

    def test_15_intervals_diff(self):
        # Test "intervals" and "intervals --diff" command.
        os.environ['TZ'] = "UTC"

        self.ok("create /diff/1 uint8_1")
        self.match("")
        self.ok("intervals /diff/1")
        self.match("")
        self.ok("intervals /diff/1 --diff /diff/1")
        self.match("")
        self.ok("intervals --diff /diff/1 /diff/1")
        self.match("")
        self.fail("intervals /diff/2")
        self.fail("intervals /diff/1 -d /diff/2")

        self.ok("create /diff/2 uint8_1")
        self.ok("intervals -T /diff/1 -d /diff/2")
        self.match("")
        self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/1 /dev/null")

        self.ok("intervals /diff/1")
        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
                   "> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n")

        self.ok("intervals /diff/1 -d /diff/2")
        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
                   "> Mon, 01 Jan 2001 00:00:00.000000 +0000 ]\n")

        self.ok("insert -s 01-01-2000 -e 01-01-2001 /diff/2 /dev/null")
        self.ok("intervals /diff/1 -d /diff/2")
        self.match("")

        self.ok("insert -s 01-01-2001 -e 01-01-2002 /diff/1 /dev/null")
        self.ok("insert -s 01-01-2002 -e 01-01-2003 /diff/2 /dev/null")
        self.ok("intervals /diff/1 -d /diff/2")
        self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -"
                   "> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n")

        self.ok("insert -s 01-01-2004 -e 01-01-2005 /diff/1 /dev/null")
        self.ok("intervals /diff/1 -d /diff/2")
        self.match("[ Mon, 01 Jan 2001 00:00:00.000000 +0000 -"
                   "> Tue, 01 Jan 2002 00:00:00.000000 +0000 ]\n"
                   "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")

        self.fail("intervals -s 01-01-2003 -e 01-01-2000 /diff/1 -d /diff/2")
        self.ok("intervals -s 01-01-2003 -e 01-01-2008 /diff/1 -d /diff/2")
        self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")

        # optimize
        self.ok("insert -s 01-01-2002 -e 01-01-2004 /diff/1 /dev/null")
        self.ok("intervals /diff/1")
        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
                   "> Thu, 01 Jan 2004 00:00:00.000000 +0000 ]\n"
                   "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
        self.ok("intervals /diff/1 --optimize")
        self.ok("intervals /diff/1 -o")
        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")

        self.ok("destroy -R /diff/1")
        self.ok("destroy -R /diff/2")

    def test_16_rename(self):
        # Test renaming.  Force file size smaller so we get more files
        server_stop()
        recursive_unlink(testdb)
        server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
                                       "files_per_dir" : 3 })


        # Fill data
        self.ok("create /newton/prep float32_8")
        os.environ['TZ'] = "UTC"
        with open("tests/data/prep-20120323T1004-timestamped") as input:
            self.ok("insert -s 20120323T1004 -e 20120323T1006 /newton/prep",
                    input)

        # Extract it
        self.ok("extract /newton/prep --start '2000-01-01' " +
                "--end '2012-03-23 10:04:01'")
        extract_before = self.captured

        def check_path(*components):
            # Verify the paths look right on disk
            seek = os.path.join(testdb, "data", *components)
            for (dirpath, dirnames, filenames) in os.walk(testdb):
                if "_format" in filenames:
                    if dirpath == seek:
                        break
                    raise AssertionError("data also found at " + dirpath)
            else:
                raise AssertionError("data not found at " + seek)
            # Verify "list" output
            self.ok("list -l")
            self.match("/" + "/".join(components) + " float32_8\n")

        # Lots of renames
        check_path("newton", "prep")

        self.fail("rename /newton/prep /newton/prep")
        self.contain("old and new paths are the same")
        check_path("newton", "prep")
        self.fail("rename /newton/prep /newton")
        self.contain("path must contain at least one folder")
        self.fail("rename /newton/prep /newton/prep/")
        self.contain("invalid path")
        self.ok("rename /newton/prep /newton/foo/1")
        check_path("newton", "foo", "1")
        self.ok("rename /newton/foo/1 /newton/foo")
        check_path("newton", "foo")
        self.ok("rename /newton/foo /totally/different/thing")
        check_path("totally", "different", "thing")
        self.ok("rename /totally/different/thing /totally/something")
        check_path("totally", "something")
        self.ok("rename /totally/something /totally/something/cool")
        check_path("totally", "something", "cool")
        self.ok("rename /totally/something/cool /foo/bar")
        check_path("foo", "bar")
        self.ok("create /xxx/yyy/zzz float32_8")
        self.fail("rename /foo/bar /xxx/yyy")
        self.contain("subdirs of this path already exist")
        self.fail("rename /foo/bar /xxx/yyy/zzz")
        self.contain("stream already exists at this path")
        self.fail("rename /foo/bar /xxx/yyy/zzz/www")
        self.contain("path is subdir of existing node")
        self.ok("rename /foo/bar /xxx/yyy/mmm")
        self.ok("destroy -R /xxx/yyy/zzz")
        check_path("xxx", "yyy", "mmm")

        # Extract it at the final path
        self.ok("extract /xxx/yyy/mmm --start '2000-01-01' " +
                "--end '2012-03-23 10:04:01'")
        eq_(self.captured, extract_before)

        self.ok("destroy -R /xxx/yyy/mmm")

        # Make sure temporary rename dirs weren't left around
        for (dirpath, dirnames, filenames) in os.walk(testdb):
            if "rename-" in dirpath:
                raise AssertionError("temporary directories not cleaned up")
            if "totally" in dirpath or "newton" in dirpath:
                raise AssertionError("old directories not cleaned up")

        server_stop()
        server_start()