Update git URL

Update README for Python 3.8 and newer
rocket: suppress build warnings
2020-08-03 16:48:54 -04:00 · 2020-08-03 16:36:10 -04:00 · 2020-08-03 16:27:55 -04:00 · 2019-08-30 17:14:50 -04:00 · 2019-08-30 16:29:39 -04:00 · 2019-08-30 16:29:39 -04:00
83 changed files with 5295 additions and 3393 deletions
--- a/.coveragerc
+++ b/.coveragerc
@@ -1,10 +1,11 @@
 # -*- conf -*-

 [run]
-# branch = True
+branch = True

 [report]
 exclude_lines =
 	pragma: no cover
 	if 0:
-omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py
+omit = nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck
+show_missing = True
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,7 @@ tests/*testdb/
 db/

 # Compiled / cythonized files
+README.html
 docs/*.html
 build/
 *.pyc
@@ -15,10 +16,8 @@ nilmdb/server/rbtree.c
 # Setup junk
 dist/
 nilmdb.egg-info/
-
-# This gets generated as needed by setup.py
-MANIFEST.in
-MANIFEST
+venv/
+.eggs/

 # Misc
 timeit*out
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,250 +0,0 @@
-# -*- conf -*-
-[MASTER]
-
-# Specify a configuration file.
-#rcfile=
-
-# Python code to execute, usually for sys.path manipulation such as
-# pygtk.require().
-#init-hook=
-
-# Profiled execution.
-profile=no
-
-# Add files or directories to the blacklist. They should be base names, not
-# paths.
-ignore=datetime_tz
-
-# Pickle collected data for later comparisons.
-persistent=no
-
-# List of plugins (as comma separated values of python modules names) to load,
-# usually to register additional checkers.
-load-plugins=
-
-
-[MESSAGES CONTROL]
-
-# Enable the message, report, category or checker with the given id(s). You can
-# either give multiple identifier separated by comma (,) or put this option
-# multiple time.
-#enable=
-
-# Disable the message, report, category or checker with the given id(s). You
-# can either give multiple identifier separated by comma (,) or put this option
-# multiple time (only on the command line, not in the configuration file where
-# it should appear only once).
-disable=C0111,R0903,R0201,R0914,R0912,W0142,W0703,W0702
-
-
-[REPORTS]
-
-# Set the output format. Available formats are text, parseable, colorized, msvs
-# (visual studio) and html
-output-format=parseable
-
-# Include message's id in output
-include-ids=yes
-
-# Put messages in a separate file for each module / package specified on the
-# command line instead of printing them on stdout. Reports (if any) will be
-# written in a file name "pylint_global.[txt|html]".
-files-output=no
-
-# Tells whether to display a full report or only the messages
-reports=yes
-
-# Python expression which should return a note less than 10 (10 is the highest
-# note). You have access to the variables errors warning, statement which
-# respectively contain the number of errors / warnings messages and the total
-# number of statements analyzed. This is used by the global evaluation report
-# (RP0004).
-evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
-
-# Add a comment according to your evaluation note. This is used by the global
-# evaluation report (RP0004).
-comment=no
-
-
-[SIMILARITIES]
-
-# Minimum lines number of a similarity.
-min-similarity-lines=4
-
-# Ignore comments when computing similarities.
-ignore-comments=yes
-
-# Ignore docstrings when computing similarities.
-ignore-docstrings=yes
-
-
-[TYPECHECK]
-
-# Tells whether missing members accessed in mixin class should be ignored. A
-# mixin class is detected if its name ends with "mixin" (case insensitive).
-ignore-mixin-members=yes
-
-# List of classes names for which member attributes should not be checked
-# (useful for classes with attributes dynamically set).
-ignored-classes=SQLObject
-
-# When zope mode is activated, add a predefined set of Zope acquired attributes
-# to generated-members.
-zope=no
-
-# List of members which are set dynamically and missed by pylint inference
-# system, and so shouldn't trigger E0201 when accessed. Python regular
-# expressions are accepted.
-generated-members=REQUEST,acl_users,aq_parent
-
-
-[FORMAT]
-
-# Maximum number of characters on a single line.
-max-line-length=80
-
-# Maximum number of lines in a module
-max-module-lines=1000
-
-# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
-# tab).
-indent-string='    '
-
-
-[MISCELLANEOUS]
-
-# List of note tags to take in consideration, separated by a comma.
-notes=FIXME,XXX,TODO
-
-
-[VARIABLES]
-
-# Tells whether we should check for unused import in __init__ files.
-init-import=no
-
-# A regular expression matching the beginning of the name of dummy variables
-# (i.e. not used).
-dummy-variables-rgx=_|dummy
-
-# List of additional names supposed to be defined in builtins. Remember that
-# you should avoid to define new builtins when possible.
-additional-builtins=
-
-
-[BASIC]
-
-# Required attributes for module, separated by a comma
-required-attributes=
-
-# List of builtins function names that should not be used, separated by a comma
-bad-functions=apply,input
-
-# Regular expression which should only match correct module names
-module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
-
-# Regular expression which should only match correct module level names
-const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__)|version)$
-
-# Regular expression which should only match correct class names
-class-rgx=[A-Z_][a-zA-Z0-9]+$
-
-# Regular expression which should only match correct function names
-function-rgx=[a-z_][a-z0-9_]{0,30}$
-
-# Regular expression which should only match correct method names
-method-rgx=[a-z_][a-z0-9_]{0,30}$
-
-# Regular expression which should only match correct instance attribute names
-attr-rgx=[a-z_][a-z0-9_]{0,30}$
-
-# Regular expression which should only match correct argument names
-argument-rgx=[a-z_][a-z0-9_]{0,30}$
-
-# Regular expression which should only match correct variable names
-variable-rgx=[a-z_][a-z0-9_]{0,30}$
-
-# Regular expression which should only match correct list comprehension /
-# generator expression variable names
-inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
-
-# Good variable names which should always be accepted, separated by a comma
-good-names=i,j,k,ex,Run,_
-
-# Bad variable names which should always be refused, separated by a comma
-bad-names=foo,bar,baz,toto,tutu,tata
-
-# Regular expression which should only match functions or classes name which do
-# not require a docstring
-no-docstring-rgx=__.*__
-
-
-[CLASSES]
-
-# List of interface methods to ignore, separated by a comma. This is used for
-# instance to not check methods defines in Zope's Interface base class.
-ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
-
-# List of method names used to declare (i.e. assign) instance attributes.
-defining-attr-methods=__init__,__new__,setUp
-
-# List of valid names for the first argument in a class method.
-valid-classmethod-first-arg=cls
-
-
-[DESIGN]
-
-# Maximum number of arguments for function / method
-max-args=5
-
-# Argument names that match this expression will be ignored. Default to name
-# with leading underscore
-ignored-argument-names=_.*
-
-# Maximum number of locals for function / method body
-max-locals=15
-
-# Maximum number of return / yield for function / method body
-max-returns=6
-
-# Maximum number of branch for function / method body
-max-branchs=12
-
-# Maximum number of statements in function / method body
-max-statements=50
-
-# Maximum number of parents for a class (see R0901).
-max-parents=7
-
-# Maximum number of attributes for a class (see R0902).
-max-attributes=7
-
-# Minimum number of public methods for a class (see R0903).
-min-public-methods=2
-
-# Maximum number of public methods for a class (see R0904).
-max-public-methods=20
-
-
-[IMPORTS]
-
-# Deprecated modules which should not be used, separated by a comma
-deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
-
-# Create a graph of every (i.e. internal and external) dependencies in the
-# given file (report RP0402 must not be disabled)
-import-graph=
-
-# Create a graph of external dependencies in the given file (report RP0402 must
-# not be disabled)
-ext-import-graph=
-
-# Create a graph of internal dependencies in the given file (report RP0402 must
-# not be disabled)
-int-import-graph=
-
-
-[EXCEPTIONS]
-
-# Exceptions that will emit a warning when being caught. Defaults to
-# "Exception"
-overgeneral-exceptions=Exception
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -0,0 +1,29 @@
+# Root
+include README.txt
+include setup.cfg
+include setup.py
+include versioneer.py
+include Makefile
+include .coveragerc
+include .pylintrc
+include requirements.txt
+
+# Cython files -- include .pyx source, but not the generated .c files
+# (Downstream systems must have cython installed in order to build)
+recursive-include nilmdb/server *.pyx *.pyxdep *.pxd
+exclude nilmdb/server/interval.c
+exclude nilmdb/server/rbtree.c
+
+# Version
+include nilmdb/_version.py
+
+# Tests
+recursive-include tests *.py
+recursive-include tests/data *
+include tests/test.order
+
+# Docs
+recursive-include docs Makefile *.md
+
+# Extras
+recursive-include extras *
--- a/30
+++ b/30
@@ -2,45 +2,49 @@
 all: test

 version:
-	python setup.py version
+	python3 setup.py version

 build:
-	python setup.py build_ext --inplace
+	python3 setup.py build_ext --inplace

 dist: sdist
 sdist:
-	python setup.py sdist
+	python3 setup.py sdist

 install:
-	python setup.py install
+	python3 setup.py install

 develop:
-	python setup.py develop
+	python3 setup.py develop

 docs:
 	make -C docs

+ctrl: flake
+flake:
+	flake8 nilmdb
 lint:
-	pylint --rcfile=.pylintrc nilmdb
+	pylint3 --rcfile=setup.cfg nilmdb

 test:
-ifeq ($(INSIDE_EMACS), t)
+ifneq ($(INSIDE_EMACS),)
 # Use the slightly more flexible script
-	python setup.py build_ext --inplace
-	python tests/runtests.py
+	python3 setup.py build_ext --inplace
+	python3 tests/runtests.py
 else
 # Let setup.py check dependencies, build stuff, and run the test
-	python setup.py nosetests
+	python3 setup.py nosetests
 endif

 clean::
-	find . -name '*pyc' | xargs rm -f
+	find . -name '*.pyc' -o -name '__pycache__' -print0 | xargs -0 rm -rf
 	rm -f .coverage
 	rm -rf tests/*testdb*
-	rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so MANIFEST.in
+	rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so
 	make -C docs clean

 gitclean::
 	git clean -dXf

-.PHONY: all version build dist sdist install docs lint test clean gitclean
+.PHONY: all version build dist sdist install docs test
+.PHONY: ctrl lint flake clean gitclean
--- a/README.md
+++ b/README.md
@@ -0,0 +1,40 @@
+# nilmdb: Non-Intrusive Load Monitor Database
+by Jim Paris <jim@jtan.com>
+
+NilmDB requires Python 3.7 or newer.
+
+## Prerequisites:
+
+    # Runtime and build environments
+    sudo apt install python3 python3-dev python3-venv python3-pip
+
+    # Create a new Python virtual environment to isolate deps.
+    python3 -m venv ../venv
+    source ../venv/bin/activate   # run "deactivate" to leave
+
+    # Install all Python dependencies
+    pip3 install -r requirements.txt
+
+## Test:
+
+    python3 setup.py nosetests
+
+## Install:
+
+Install it into the virtual environment
+
+    python3 setup.py install
+
+If you want to instead install it system-wide, you will also need to
+install the requirements system-wide:
+
+    sudo pip3 install -r requirements.txt
+    sudo python3 setup.py install
+
+## Usage:
+
+    nilmdb-server --help
+    nilmdb-fsck --help
+    nilmtool --help
+
+See docs/wsgi.md for info on setting up a WSGI application in Apache.
--- a/README.txt
+++ b/README.txt
@@ -1,31 +0,0 @@
-nilmdb: Non-Intrusive Load Monitor Database
-by Jim Paris <jim@jtan.com>
-
-Prerequisites:
-
-  # Runtime and build environments
-  sudo apt-get install python2.7 python2.7-dev python-setuptools cython
-
-  # Base NilmDB dependencies
-  sudo apt-get install python-cherrypy3 python-decorator python-simplejson
-  sudo apt-get install python-requests python-dateutil python-tz python-psutil
-
-  # Other dependencies (required by some modules)
-  sudo apt-get install python-numpy
-
-  # Tools for running tests
-  sudo apt-get install python-nose python-coverage
-
-Test:
-  python setup.py nosetests
-
-Install:
-
-  python setup.py install
-
-Usage:
-
-  nilmdb-server --help
-  nilmtool --help
-
-See docs/wsgi.md for info on setting up a WSGI application in Apache.
--- a/docs/design.md
+++ b/docs/design.md
@@ -421,3 +421,49 @@ and has all of the same functions.  It adds three new functions:
 It is significantly faster!  It is about 20 times faster to decimate a
 stream with `nilm-decimate` when the filter code is using the new
 binary/numpy interface.
+
+
+WSGI interface & chunked requests
+---------------------------------
+
+mod_wsgi requires "WSGIChunkedRequest On" to handle
+"Transfer-encoding: Chunked" requests.  However, `/stream/insert`
+doesn't handle this correctly right now, because:
+
+- The `cherrypy.request.body.read()` call needs to be fixed for chunked requests
+
+- We don't want to just buffer endlessly in the server, and it will
+  require some thought on how to handle data in chunks (what to do about
+  interval endpoints).
+
+It is probably better to just keep the endpoint management on the client
+side, so leave "WSGIChunkedRequest off" for now.
+
+
+Unicode & character encoding
+----------------------------
+
+Stream data is passed back and forth as raw `bytes` objects in most
+places, including the `nilmdb.client` and command-line interfaces.
+This is done partially for performance reasons, and partially to
+support the binary insert/extract options, where character-set encoding
+would not apply.
+
+For the HTTP server, the raw bytes transferred over HTTP are interpreted
+as follows:
+- For `/stream/insert`, the client-provided `Content-Type` is ignored,
+  and the data is read as if it were `application/octet-stream`.
+- For `/stream/extract`, the returned data is `application/octet-stream`.
+- All other endpoints communicate via JSON, which is specified to always
+  be encoded as UTF-8.  This includes:
+    - `/version`
+    - `/dbinfo`
+    - `/stream/list`
+    - `/stream/create`
+    - `/stream/destroy`
+    - `/stream/rename`
+    - `/stream/get_metadata`
+    - `/stream/set_metadata`
+    - `/stream/update_metadata`
+    - `/stream/remove`
+    - `/stream/intervals`
--- a/docs/wsgi.md
+++ b/docs/wsgi.md
@@ -19,12 +19,12 @@ Then, set up Apache with a configuration like:

    <VirtualHost>
        WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi
-        WSGIApplicationGroup nilmdb-appgroup
-        WSGIProcessGroup nilmdb-procgroup
        WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm
+        <Location /nilmdb>
+            WSGIProcessGroup nilmdb-procgroup
+            WSGIApplicationGroup nilmdb-appgroup

            # Access control example:
-        <Location /nilmdb>
            Order deny,allow
            Deny from all
            Allow from 1.2.3.4
--- a/extras/fix-oversize-files.py
+++ b/extras/fix-oversize-files.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+import os
+import sys
+import pickle
+import argparse
+import fcntl
+import re
+from nilmdb.client.numpyclient import layout_to_dtype
+
+parser = argparse.ArgumentParser(
+    description = """
+Fix database corruption where binary writes caused too much data to be
+written to the file.  Truncates files to the correct length.  This was
+fixed by b98ff1331a515ad47fd3203615e835b529b039f9.
+""")
+parser.add_argument("path", action="store", help='Database root path')
+parser.add_argument("-y", "--yes", action="store_true", help='Fix them')
+args = parser.parse_args()
+
+lock = os.path.join(args.path, "data.lock")
+with open(lock, "w") as f:
+    fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+    fix = {}
+
+    for (path, dirs, files) in os.walk(args.path):
+        if "_format" in files:
+            with open(os.path.join(path, "_format")) as format:
+                fmt = pickle.load(format)
+                rowsize = layout_to_dtype(fmt["layout"]).itemsize
+                maxsize = rowsize * fmt["rows_per_file"]
+                fix[path] = maxsize
+                if maxsize < 128000000: # sanity check
+                    raise Exception("bad maxsize " + str(maxsize))
+
+    for fixpath in fix:
+        for (path, dirs, files) in os.walk(fixpath):
+            for fn in files:
+                if not re.match("^[0-9a-f]{4,}$", fn):
+                    continue
+                fn = os.path.join(path, fn)
+                size = os.path.getsize(fn)
+                maxsize = fix[fixpath]
+                if size > maxsize:
+                    diff = size - maxsize
+                    print(diff, "too big:", fn)
+                    if args.yes:
+                        with open(fn, "a+") as dbfile:
+                            dbfile.truncate(maxsize)
--- a/nilmdb/init.py
+++ b/nilmdb/init.py
@@ -1,10 +1,5 @@
 """Main NilmDB import"""

-# These aren't imported automatically, because loading the server
-# stuff isn't always necessary.
-#from nilmdb.server import NilmDB, Server
-#from nilmdb.client import Client
-
-from nilmdb._version import get_versions
+from ._version import get_versions
 __version__ = get_versions()['version']
 del get_versions
--- a/nilmdb/_version.py
+++ b/nilmdb/_version.py
@@ -1,197 +1,520 @@

-IN_LONG_VERSION_PY = True
 # This file helps to compute a version number in source trees obtained from
 # git-archive tarball (such as those provided by githubs download-from-tag
-# feature). Distribution tarballs (build by setup.py sdist) and build
+# feature). Distribution tarballs (built by setup.py sdist) and build
 # directories (produced by setup.py build) will contain a much shorter file
 # that just contains the computed version number.

 # This file is released into the public domain. Generated by
-# versioneer-0.7+ (https://github.com/warner/python-versioneer)
-
-# these strings will be replaced by git during git-archive
-git_refnames = "$Format:%d$"
-git_full = "$Format:%H$"
+# versioneer-0.18 (https://github.com/warner/python-versioneer)

+"""Git implementation of _version.py."""

+import errno
+import os
+import re
 import subprocess
 import sys

-def run_command(args, cwd=None, verbose=False):
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = "nilmdb-"
+    cfg.parentdir_prefix = "nilmdb-"
+    cfg.versionfile_source = "nilmdb/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY = {}
+HANDLERS = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Decorator to mark a method as the handler for a particular VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    p = None
+    for c in commands:
        try:
+            dispcmd = str([c] + args)
            # remember shell=False, so use git.cmd on windows, not just git
-        p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
+            p = subprocess.Popen([c] + args, cwd=cwd, env=env,
+                                 stdout=subprocess.PIPE,
+                                 stderr=(subprocess.PIPE if hide_stderr
+                                         else None))
+            break
        except EnvironmentError:
            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
            if verbose:
-            print("unable to run %s" % args[0])
+                print("unable to run %s" % dispcmd)
                print(e)
-        return None
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
    stdout = p.communicate()[0].strip()
-    if sys.version >= '3':
+    if sys.version_info[0] >= 3:
        stdout = stdout.decode()
    if p.returncode != 0:
        if verbose:
-            print("unable to run %s (error)" % args[0])
-        return None
-    return stdout
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, p.returncode
+    return stdout, p.returncode


-import sys
-import re
-import os.path
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.

-def get_expanded_variables(versionfile_source):
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for i in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        else:
+            rootdirs.append(root)
+            root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
    # the code embedded in _version.py can just fetch the value of these
-    # variables. When used from setup.py, we don't want to import
-    # _version.py, so we do it with a regexp instead. This function is not
-    # used from _version.py.
-    variables = {}
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
    try:
-        for line in open(versionfile_source,"r").readlines():
+        f = open(versionfile_abs, "r")
+        for line in f.readlines():
            if line.strip().startswith("git_refnames ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
-                    variables["refnames"] = mo.group(1)
+                    keywords["refnames"] = mo.group(1)
            if line.strip().startswith("git_full ="):
                mo = re.search(r'=\s*"(.*)"', line)
                if mo:
-                    variables["full"] = mo.group(1)
+                    keywords["full"] = mo.group(1)
+            if line.strip().startswith("git_date ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    keywords["date"] = mo.group(1)
+        f.close()
    except EnvironmentError:
        pass
-    return variables
+    return keywords

-def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
-    refnames = variables["refnames"].strip()
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if not keywords:
+        raise NotThisMethod("no keywords at all, weird")
+    date = keywords.get("date")
+    if date is not None:
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
    if refnames.startswith("$Format"):
        if verbose:
-            print("variables are unexpanded, not using")
-        return {} # unexpanded, so not in an unpacked git-archive tarball
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
    refs = set([r.strip() for r in refnames.strip("()").split(",")])
-    for ref in list(refs):
-        if not re.search(r'\d', ref):
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = set([r for r in refs if re.search(r'\d', r)])
        if verbose:
-                print("discarding '%s', no digits" % ref)
-            refs.discard(ref)
-            # Assume all version tags have a digit. git's %d expansion
-            # behaves like git log --decorate=short and strips out the
-            # refs/heads/ and refs/tags/ prefixes that would let us
-            # distinguish between branches and tags. By ignoring refnames
-            # without digits, we filter out many common branch names like
-            # "release" and "stabilization", as well as "HEAD" and "master".
+            print("discarding '%s', no digits" % ",".join(refs - tags))
    if verbose:
-        print("remaining refs: %s" % ",".join(sorted(refs)))
-    for ref in sorted(refs):
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
        # sorting will prefer e.g. "2.0" over "2.0rc1"
        if ref.startswith(tag_prefix):
            r = ref[len(tag_prefix):]
            if verbose:
                print("picking %s" % r)
-            return { "version": r,
-                     "full": variables["full"].strip() }
-    # no suitable tags, so we use the full revision id
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
    if verbose:
-        print("no suitable tags, using full revision id")
-    return { "version": variables["full"].strip(),
-             "full": variables["full"].strip() }
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}

-def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
-    # this runs 'git' from the root of the source tree. That either means
-    # someone ran a setup.py command (and this code is in versioneer.py, so
-    # IN_LONG_VERSION_PY=False, thus the containing directory is the root of
-    # the source tree), or someone ran a project-specific entry point (and
-    # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
-    # containing directory is somewhere deeper in the source tree). This only
-    # gets called if the git-archive 'subst' variables were *not* expanded,
-    # and _version.py hasn't already been rewritten with a short version
-    # string, meaning we're inside a checked out source tree.

-    try:
-        here = os.path.abspath(__file__)
-    except NameError:
-        # some py2exe/bbfreeze/non-CPython implementations don't do __file__
-        return {} # not always correct
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
+    """Get version from 'git describe' in the root of the source tree.

-    # versionfile_source is the relative path from the top of the source tree
-    # (where the .git directory might live) to this file. Invert this to find
-    # the root from __file__.
-    root = here
-    if IN_LONG_VERSION_PY:
-        for i in range(len(versionfile_source.split("/"))):
-            root = os.path.dirname(root)
-    else:
-        root = os.path.dirname(here)
-    if not os.path.exists(os.path.join(root, ".git")):
-        if verbose:
-            print("no .git in %s" % root)
-        return {}
-
-    GIT = "git"
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
    if sys.platform == "win32":
-        GIT = "git.cmd"
-    stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
+        GITS = ["git.cmd", "git.exe"]
+
+    out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                          hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
+                                          "--always", "--long",
+                                          "--match", "%s*" % tag_prefix],
                                   cwd=root)
-    if stdout is None:
-        return {}
-    if not stdout.startswith(tag_prefix):
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparseable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
            if verbose:
-            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
-        return {}
-    tag = stdout[len(tag_prefix):]
-    stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
-    if stdout is None:
-        return {}
-    full = stdout.strip()
-    if tag.endswith("-dirty"):
-        full += "-dirty"
-    return {"version": tag, "full": full}
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]

+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)

-def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
-    if IN_LONG_VERSION_PY:
-        # We're running from _version.py. If it's from a source tree
-        # (execute-in-place), we can work upwards to find the root of the
-        # tree, and then check the parent directory for a version string. If
-        # it's in an installed application, there's no hope.
-        try:
-            here = os.path.abspath(__file__)
-        except NameError:
-            # py2exe/bbfreeze/non-CPython don't have __file__
-            return {} # without __file__, we have no hope
-        # versionfile_source is the relative path from the top of the source
-        # tree to _version.py. Invert this to find the root from __file__.
-        root = here
-        for i in range(len(versionfile_source.split("/"))):
-            root = os.path.dirname(root)
    else:
-        # we're running from versioneer.py, which means we're running from
-        # the setup.py in a source tree. sys.argv[0] is setup.py in the root.
-        here = os.path.abspath(sys.argv[0])
-        root = os.path.dirname(here)
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
+                                    cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits

-    # Source tarballs conventionally unpack into a directory that includes
-    # both the project name and a version string.
-    dirname = os.path.basename(root)
-    if not dirname.startswith(parentdir_prefix):
-        if verbose:
-            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
-                  (root, dirname, parentdir_prefix))
-        return None
-    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+                       cwd=root)[0].strip()
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

-tag_prefix = "nilmdb-"
-parentdir_prefix = "nilmdb-"
-versionfile_source = "nilmdb/_version.py"
+    return pieces

-def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
-    variables = { "refnames": git_refnames, "full": git_full }
-    ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
-    if not ver:
-        ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
-    if not ver:
-        ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_pre(pieces):
+    """TAG[.post.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += ".post.dev%d" % pieces["distance"]
+    else:
+        # exception #1
+        rendered = "0.post.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Eexceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
                                          verbose)
-    if not ver:
-        ver = default
-    return ver
+    except NotThisMethod:
+        pass

+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for i in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
--- a/nilmdb/client/client.py
+++ b/nilmdb/client/client.py
@@ -2,25 +2,24 @@

 """Class for performing HTTP client requests via libcurl"""

+import json
+import contextlib
+
 import nilmdb.utils
 import nilmdb.client.httpclient
 from nilmdb.client.errors import ClientError
-
-import re
-import time
-import simplejson as json
-import contextlib
-
 from nilmdb.utils.time import timestamp_to_string, string_to_timestamp

+
 def extract_timestamp(line):
    """Extract just the timestamp from a line of data text"""
    return string_to_timestamp(line.split()[0])

-class Client(object):
+
+class Client():
    """Main client interface to the Nilm database."""

-    def __init__(self, url, post_json = False):
+    def __init__(self, url, post_json=False):
        """Initialize client with given URL.  If post_json is true,
        POST requests are sent with Content-Type 'application/json'
        instead of the default 'x-www-form-urlencoded'."""
@@ -39,7 +38,7 @@ class Client(object):
        if self.post_json:
            # If we're posting as JSON, we don't need to encode it further here
            return data
-        return json.dumps(data, separators=(',',':'))
+        return json.dumps(data, separators=(',', ':'))

    def close(self):
        """Close the connection; safe to call multiple times"""
@@ -58,7 +57,12 @@ class Client(object):
        as a dictionary."""
        return self.http.get("dbinfo")

-    def stream_list(self, path = None, layout = None, extended = False):
+    def stream_list(self, path=None, layout=None, extended=False):
+        """Return a sorted list of [path, layout] lists.  If 'path' or
+        'layout' are specified, only return streams that match those
+        exact values.  If 'extended' is True, the returned lists have
+        extended info, e.g.: [path, layout, extent_min, extent_max,
+        total_rows, total_seconds."""
        params = {}
        if path is not None:
            params["path"] = path
@@ -66,15 +70,12 @@ class Client(object):
            params["layout"] = layout
        if extended:
            params["extended"] = 1
-        def sort_streams_nicely(x):
-            """Human-friendly sort (/stream/2 before /stream/10)"""
-            num = lambda t: int(t) if t.isdigit() else t
-            key = lambda k: [ num(c) for c in re.split('([0-9]+)', k[0]) ]
-            return sorted(x, key = key)
-        return sort_streams_nicely(self.http.get("stream/list", params))
+        streams = self.http.get("stream/list", params)
+        return nilmdb.utils.sort.sort_human(streams, key=lambda s: s[0])

-    def stream_get_metadata(self, path, keys = None):
-        params = { "path": path }
+    def stream_get_metadata(self, path, keys=None):
+        """Get stream metadata"""
+        params = {"path": path}
        if keys is not None:
            params["key"] = keys
        return self.http.get("stream/get_metadata", params)
@@ -98,22 +99,28 @@ class Client(object):

    def stream_create(self, path, layout):
        """Create a new stream"""
-        params = { "path": path,
-                   "layout" : layout }
+        params = {
+            "path": path,
+            "layout": layout
+        }
        return self.http.post("stream/create", params)

    def stream_destroy(self, path):
        """Delete stream.  Fails if any data is still present."""
-        params = { "path": path }
+        params = {
+            "path": path
+        }
        return self.http.post("stream/destroy", params)

    def stream_rename(self, oldpath, newpath):
        """Rename a stream."""
-        params = { "oldpath": oldpath,
-                   "newpath": newpath }
+        params = {
+            "oldpath": oldpath,
+            "newpath": newpath
+        }
        return self.http.post("stream/rename", params)

-    def stream_remove(self, path, start = None, end = None):
+    def stream_remove(self, path, start=None, end=None):
        """Remove data from the specified time range"""
        params = {
            "path": path
@@ -128,7 +135,7 @@ class Client(object):
        return total

    @contextlib.contextmanager
-    def stream_insert_context(self, path, start = None, end = None):
+    def stream_insert_context(self, path, start=None, end=None):
        """Return a context manager that allows data to be efficiently
        inserted into a stream in a piecewise manner.  Data is
        provided as ASCII lines, and is aggregated and sent to the
@@ -149,22 +156,23 @@ class Client(object):
        ctx = StreamInserter(self, path, start, end)
        yield ctx
        ctx.finalize()
+        ctx.destroy()

-    def stream_insert(self, path, data, start = None, end = None):
+    def stream_insert(self, path, data, start=None, end=None):
        """Insert rows of data into a stream.  data should be a string
        or iterable that provides ASCII data that matches the database
        layout for path.  Data is passed through stream_insert_context,
        so it will be broken into reasonably-sized chunks and
        start/end will be deduced if missing."""
        with self.stream_insert_context(path, start, end) as ctx:
-            if isinstance(data, basestring):
+            if isinstance(data, bytes):
                ctx.insert(data)
            else:
                for chunk in data:
                    ctx.insert(chunk)
        return ctx.last_response

-    def stream_insert_block(self, path, data, start, end, binary = False):
+    def stream_insert_block(self, path, data, start, end, binary=False):
        """Insert a single fixed block of data into the stream.  It is
        sent directly to the server in one block with no further
        processing.
@@ -179,9 +187,9 @@ class Client(object):
        }
        if binary:
            params["binary"] = 1
-        return self.http.put("stream/insert", data, params, binary = binary)
+        return self.http.put("stream/insert", data, params)

-    def stream_intervals(self, path, start = None, end = None, diffpath = None):
+    def stream_intervals(self, path, start=None, end=None, diffpath=None):
        """
        Return a generator that yields each stream interval.

@@ -199,8 +207,8 @@ class Client(object):
            params["end"] = timestamp_to_string(end)
        return self.http.get_gen("stream/intervals", params)

-    def stream_extract(self, path, start = None, end = None,
-                       count = False, markup = False, binary = False):
+    def stream_extract(self, path, start=None, end=None,
+                       count=False, markup=False, binary=False):
        """
        Extract data from a stream.  Returns a generator that yields
        lines of ASCII-formatted data that matches the database
@@ -230,17 +238,18 @@ class Client(object):
            params["markup"] = 1
        if binary:
            params["binary"] = 1
-        return self.http.get_gen("stream/extract", params, binary = binary)
+        return self.http.get_gen("stream/extract", params, binary=binary)

-    def stream_count(self, path, start = None, end = None):
+    def stream_count(self, path, start=None, end=None):
        """
        Return the number of rows of data in the stream that satisfy
        the given timestamps.
        """
-        counts = list(self.stream_extract(path, start, end, count = True))
+        counts = list(self.stream_extract(path, start, end, count=True))
        return int(counts[0])

-class StreamInserter(object):
+
+class StreamInserter():
    """Object returned by stream_insert_context() that manages
    the insertion of rows of data into a particular path.

@@ -298,6 +307,15 @@ class StreamInserter(object):
        self._block_data = []
        self._block_len = 0

+        self.destroyed = False
+
+    def destroy(self):
+        """Ensure this object can't be used again without raising
+        an error"""
+        def error(*args, **kwargs):
+            raise Exception("don't reuse this context object")
+        self._send_block = self.insert = self.finalize = self.send = error
+
    def insert(self, data):
        """Insert a chunk of ASCII formatted data in string form.  The
        overall data must consist of lines terminated by '\\n'."""
@@ -319,8 +337,8 @@ class StreamInserter(object):

        # Send the block once we have enough data
        if self._block_len >= maxdata:
-            self._send_block(final = False)
-            if self._block_len >= self._max_data_after_send: # pragma: no cover
+            self._send_block(final=False)
+            if self._block_len >= self._max_data_after_send:
                raise ValueError("too much data left over after trying"
                                 " to send intermediate block; is it"
                                 " missing newlines or malformed?")
@@ -346,12 +364,12 @@ class StreamInserter(object):

        If more data is inserted after a finalize(), it will become
        part of a new interval and there may be a gap left in-between."""
-        self._send_block(final = True)
+        self._send_block(final=True)

    def send(self):
        """Send any data that we might have buffered up.  Does not affect
        any other treatment of timestamps or endpoints."""
-        self._send_block(final = False)
+        self._send_block(final=False)

    def _get_first_noncomment(self, block):
        """Return the (start, end) indices of the first full line in
@@ -359,10 +377,10 @@ class StreamInserter(object):
        there isn't one."""
        start = 0
        while True:
-            end = block.find('\n', start)
+            end = block.find(b'\n', start)
            if end < 0:
                raise IndexError
-            if block[start] != '#':
+            if block[start] != b'#'[0]:
                return (start, (end + 1))
            start = end + 1

@@ -370,22 +388,22 @@ class StreamInserter(object):
        """Return the (start, end) indices of the last full line in
        block[:length] that isn't a comment, or raise IndexError if
        there isn't one."""
-        end = block.rfind('\n')
+        end = block.rfind(b'\n')
        if end <= 0:
            raise IndexError
        while True:
-            start = block.rfind('\n', 0, end)
-            if block[start + 1] != '#':
+            start = block.rfind(b'\n', 0, end)
+            if block[start + 1] != b'#'[0]:
                return ((start + 1), end)
            if start == -1:
                raise IndexError
            end = start

-    def _send_block(self, final = False):
+    def _send_block(self, final=False):
        """Send data currently in the block.  The data sent will
        consist of full lines only, so some might be left over."""
        # Build the full string to send
-        block = "".join(self._block_data)
+        block = b"".join(self._block_data)

        start_ts = self._interval_start
        if start_ts is None:
@@ -402,7 +420,7 @@ class StreamInserter(object):
            # or the timestamp of the last line plus epsilon.
            end_ts = self._interval_end
            try:
-                if block[-1] != '\n':
+                if block[-1] != b'\n'[0]:
                    raise ValueError("final block didn't end with a newline")
                if end_ts is None:
                    (spos, epos) = self._get_last_noncomment(block)
@@ -436,7 +454,7 @@ class StreamInserter(object):
                # the server complain so that the error is the same
                # as if we hadn't done this chunking.
                end_ts = self._interval_end
-            self._block_data = [ block[spos:] ]
+            self._block_data = [block[spos:]]
            self._block_len = (epos - spos)
            block = block[:spos]

@@ -444,7 +462,7 @@ class StreamInserter(object):
            self._interval_start = end_ts

        # Double check endpoints
-        if start_ts is None or end_ts is None:
+        if (start_ts is None or end_ts is None) or (start_ts == end_ts):
            # If the block has no non-comment lines, it's OK
            try:
                self._get_first_noncomment(block)
@@ -454,6 +472,6 @@ class StreamInserter(object):

        # Send it
        self.last_response = self._client.stream_insert_block(
-            self._path, block, start_ts, end_ts, binary = False)
+            self._path, block, start_ts, end_ts, binary=False)

        return
--- a/nilmdb/client/errors.py
+++ b/nilmdb/client/errors.py
@@ -1,33 +1,41 @@
 """HTTP client errors"""

-from nilmdb.utils.printf import *
+from nilmdb.utils.printf import sprintf
+

 class Error(Exception):
    """Base exception for both ClientError and ServerError responses"""
    def __init__(self,
-                 status = "Unspecified error",
-                 message = None,
-                 url = None,
-                 traceback = None):
-        Exception.__init__(self, status)
+                 status="Unspecified error",
+                 message=None,
+                 url=None,
+                 traceback=None):
+        super().__init__(status)
        self.status = status     # e.g. "400 Bad Request"
        self.message = message   # textual message from the server
        self.url = url           # URL we were requesting
        self.traceback = traceback  # server traceback, if available
+
    def _format_error(self, show_url):
        s = sprintf("[%s]", self.status)
        if self.message:
            s += sprintf(" %s", self.message)
-        if show_url and self.url: # pragma: no cover
+        if show_url and self.url:
            s += sprintf(" (%s)", self.url)
-        if self.traceback: # pragma: no cover
+        if self.traceback:
            s += sprintf("\nServer traceback:\n%s", self.traceback)
        return s
+
    def __str__(self):
-        return self._format_error(show_url = False)
-    def __repr__(self): # pragma: no cover
-        return self._format_error(show_url = True)
+        return self._format_error(show_url=False)
+
+    def __repr__(self):
+        return self._format_error(show_url=True)
+
+
 class ClientError(Error):
    pass
+
+
 class ServerError(Error):
    pass
--- a/nilmdb/client/httpclient.py
+++ b/nilmdb/client/httpclient.py
@@ -1,26 +1,25 @@
 """HTTP client library"""

-import nilmdb.utils
-from nilmdb.client.errors import ClientError, ServerError, Error
-
-import simplejson as json
-import urlparse
+import json
+import urllib.parse
 import requests

-class HTTPClient(object):
+from nilmdb.client.errors import ClientError, ServerError, Error
+
+
+class HTTPClient():
    """Class to manage and perform HTTP requests from the client"""
-    def __init__(self, baseurl = "", post_json = False):
+    def __init__(self, baseurl="", post_json=False, verify_ssl=True):
        """If baseurl is supplied, all other functions that take
        a URL can be given a relative URL instead."""
        # Verify / clean up URL
-        reparsed = urlparse.urlparse(baseurl).geturl()
+        reparsed = urllib.parse.urlparse(baseurl).geturl()
        if '://' not in reparsed:
-            reparsed = urlparse.urlparse("http://" + baseurl).geturl()
+            reparsed = urllib.parse.urlparse("http://" + baseurl).geturl()
        self.baseurl = reparsed.rstrip('/') + '/'

-        # Build Requests session object, enable SSL verification
-        self.session = requests.Session()
-        self.session.verify = True
+        # Note whether we want SSL verification
+        self.verify_ssl = verify_ssl

        # Saved response, so that tests can verify a few things.
        self._last_response = {}
@@ -33,44 +32,64 @@ class HTTPClient(object):
        # Default variables for exception.  We use the entire body as
        # the default message, in case we can't extract it from a JSON
        # response.
-        args = { "url" : url,
-                 "status" : str(code),
-                 "message" : body,
-                 "traceback" : None }
+        args = {
+            "url": url,
+            "status": str(code),
+            "message": body,
+            "traceback": None
+        }
        try:
            # Fill with server-provided data if we can
            jsonerror = json.loads(body)
            args["status"] = jsonerror["status"]
            args["message"] = jsonerror["message"]
            args["traceback"] = jsonerror["traceback"]
-        except Exception: # pragma: no cover
+        except Exception:
            pass
-        if code >= 400 and code <= 499:
+        if 400 <= code <= 499:
            raise ClientError(**args)
-        else: # pragma: no cover
-            if code >= 500 and code <= 599:
+        else:
+            if 500 <= code <= 599:
                if args["message"] is None:
-                    args["message"] = ("(no message; try disabling " +
-                                       "response.stream option in " +
+                    args["message"] = ("(no message; try disabling "
+                                       "response.stream option in "
                                       "nilmdb.server for better debugging)")
                raise ServerError(**args)
            else:
                raise Error(**args)

    def close(self):
-        self.session.close()
+        pass

    def _do_req(self, method, url, query_data, body_data, stream, headers):
-        url = urlparse.urljoin(self.baseurl, url)
+        url = urllib.parse.urljoin(self.baseurl, url)
        try:
-            response = self.session.request(method, url,
-                                            params = query_data,
-                                            data = body_data,
-                                            stream = stream,
-                                            headers = headers)
+            # Create a new session, ensure we send "Connection: close",
+            # and explicitly close connection after the transfer.
+            # This is to avoid HTTP/1.1 persistent connections
+            # (keepalive), because they have fundamental race
+            # conditions when there are delays between requests:
+            # a new request may be sent at the same instant that the
+            # server decides to timeout the connection.
+            session = requests.Session()
+            if headers is None:
+                headers = {}
+            headers["Connection"] = "close"
+            response = session.request(method, url,
+                                       params=query_data,
+                                       data=body_data,
+                                       stream=stream,
+                                       headers=headers,
+                                       verify=self.verify_ssl)
+
+            # Close the connection.  If it's a generator (stream =
+            # True), the requests library shouldn't actually close the
+            # HTTP connection until all data has been read from the
+            # response.
+            session.close()
        except requests.RequestException as e:
-            raise ServerError(status = "502 Error", url = url,
-                              message = str(e.message))
+            raise ServerError(status="502 Error", url=url,
+                              message=str(e))
        if response.status_code != 200:
            self._handle_error(url, response.status_code, response.content)
        self._last_response = response
@@ -81,68 +100,88 @@ class HTTPClient(object):
            return (response, False)

    # Normal versions that return data directly
-    def _req(self, method, url, query = None, body = None, headers = None):
+    def _req(self, method, url, query=None, body=None, headers=None):
        """
        Make a request and return the body data as a string or parsed
        JSON object, or raise an error if it contained an error.
        """
        (response, isjson) = self._do_req(method, url, query, body,
-                                          stream = False, headers = headers)
+                                          stream=False, headers=headers)
        if isjson:
            return json.loads(response.content)
        return response.content

-    def get(self, url, params = None):
+    def get(self, url, params=None):
        """Simple GET (parameters in URL)"""
        return self._req("GET", url, params, None)

-    def post(self, url, params = None):
+    def post(self, url, params=None):
        """Simple POST (parameters in body)"""
        if self.post_json:
            return self._req("POST", url, None,
                             json.dumps(params),
-                             { 'Content-type': 'application/json' })
+                             {'Content-type': 'application/json'})
        else:
            return self._req("POST", url, None, params)

-    def put(self, url, data, params = None, binary = False):
+    def put(self, url, data, params=None,
+            content_type="application/octet-stream"):
        """Simple PUT (parameters in URL, data in body)"""
-        if binary:
-            h = { 'Content-type': 'application/octet-stream' }
-        else:
-            h = { 'Content-type': 'text/plain; charset=utf-8' }
-        return self._req("PUT", url, query = params, body = data, headers = h)
+        h = {'Content-type': content_type}
+        return self._req("PUT", url, query=params, body=data, headers=h)

    # Generator versions that return data one line at a time.
-    def _req_gen(self, method, url, query = None, body = None,
-                 headers = None, binary = False):
+    def _req_gen(self, method, url, query=None, body=None,
+                 headers=None, binary=False):
        """
        Make a request and return a generator that gives back strings
        or JSON decoded lines of the body data, or raise an error if
        it contained an eror.
        """
        (response, isjson) = self._do_req(method, url, query, body,
-                                          stream = True, headers = headers)
+                                          stream=True, headers=headers)
+
+        # Like the iter_lines function in Requests, but only splits on
+        # the specified line ending.
+        def lines(source, ending):
+            pending = None
+            for chunk in source:
+                if pending is not None:
+                    chunk = pending + chunk
+                tmp = chunk.split(ending)
+                lines = tmp[:-1]
+                if chunk.endswith(ending):
+                    pending = None
+                else:
+                    pending = tmp[-1]
+                for line in lines:
+                    yield line
+            if pending is not None:
+                yield pending
+
+        # Yield the chunks or lines as requested
        if binary:
-            for chunk in response.iter_content(chunk_size = 65536):
+            for chunk in response.iter_content(chunk_size=65536):
                yield chunk
        elif isjson:
-            for line in response.iter_lines():
+            for line in lines(response.iter_content(chunk_size=1),
+                              ending=b'\r\n'):
                yield json.loads(line)
        else:
-            for line in response.iter_lines():
+            for line in lines(response.iter_content(chunk_size=65536),
+                              ending=b'\n'):
                yield line

-    def get_gen(self, url, params = None, binary = False):
+    def get_gen(self, url, params=None, binary=False):
        """Simple GET (parameters in URL) returning a generator"""
-        return self._req_gen("GET", url, params, binary = binary)
+        return self._req_gen("GET", url, params, binary=binary)

-    def post_gen(self, url, params = None):
+    def post_gen(self, url, params=None):
        """Simple POST (parameters in body) returning a generator"""
        if self.post_json:
            return self._req_gen("POST", url, None,
                                 json.dumps(params),
-                                 { 'Content-type': 'application/json' })
+                                 {'Content-type': 'application/json'})
        else:
            return self._req_gen("POST", url, None, params)

--- a/nilmdb/client/numpyclient.py
+++ b/nilmdb/client/numpyclient.py
@@ -3,29 +3,33 @@
 """Provide a NumpyClient class that is based on normal Client, but has
 additional methods for extracting and inserting data via Numpy arrays."""

+import contextlib
+
+import numpy
+
 import nilmdb.utils
 import nilmdb.client.client
 import nilmdb.client.httpclient
 from nilmdb.client.errors import ClientError

-import contextlib
-from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
-
-import numpy
-import cStringIO

 def layout_to_dtype(layout):
    ltype = layout.split('_')[0]
    lcount = int(layout.split('_')[1])
    if ltype.startswith('int'):
-        atype = '<i' + str(int(ltype[3:]) / 8)
+        atype = '<i' + str(int(ltype[3:]) // 8)
    elif ltype.startswith('uint'):
-        atype = '<u' + str(int(ltype[4:]) / 8)
+        atype = '<u' + str(int(ltype[4:]) // 8)
    elif ltype.startswith('float'):
-        atype = '<f' + str(int(ltype[5:]) / 8)
+        atype = '<f' + str(int(ltype[5:]) // 8)
    else:
        raise ValueError("bad layout")
-    return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])
+    if lcount == 1:
+        dtype = [('timestamp', '<i8'), ('data', atype)]
+    else:
+        dtype = [('timestamp', '<i8'), ('data', atype, lcount)]
+    return numpy.dtype(dtype)
+

 class NumpyClient(nilmdb.client.client.Client):
    """Subclass of nilmdb.client.Client that adds additional methods for
@@ -39,9 +43,9 @@ class NumpyClient(nilmdb.client.client.Client):
            layout = streams[0][1]
        return layout_to_dtype(layout)

-    def stream_extract_numpy(self, path, start = None, end = None,
-                             layout = None, maxrows = 100000,
-                             structured = False):
+    def stream_extract_numpy(self, path, start=None, end=None,
+                             layout=None, maxrows=100000,
+                             structured=False):
        """
        Extract data from a stream.  Returns a generator that yields
        Numpy arrays of up to 'maxrows' of data each.
@@ -63,25 +67,25 @@ class NumpyClient(nilmdb.client.client.Client):
        chunks = []
        total_len = 0
        maxsize = dtype.itemsize * maxrows
-        for data in self.stream_extract(path, start, end, binary = True):
+        for data in self.stream_extract(path, start, end, binary=True):
            # Add this block of binary data
            chunks.append(data)
            total_len += len(data)

            # See if we have enough to make the requested Numpy array
            while total_len >= maxsize:
-                assembled = "".join(chunks)
+                assembled = b"".join(chunks)
                total_len -= maxsize
-                chunks = [ assembled[maxsize:] ]
+                chunks = [assembled[maxsize:]]
                block = assembled[:maxsize]
                yield to_numpy(block)

        if total_len:
-            yield to_numpy("".join(chunks))
+            yield to_numpy(b"".join(chunks))

    @contextlib.contextmanager
-    def stream_insert_numpy_context(self, path, start = None, end = None,
-                                    layout = None):
+    def stream_insert_numpy_context(self, path, start=None, end=None,
+                                    layout=None):
        """Return a context manager that allows data to be efficiently
        inserted into a stream in a piecewise manner.  Data is
        provided as Numpy arrays, and is aggregated and sent to the
@@ -98,9 +102,10 @@ class NumpyClient(nilmdb.client.client.Client):
        ctx = StreamInserterNumpy(self, path, start, end, dtype)
        yield ctx
        ctx.finalize()
+        ctx.destroy()

-    def stream_insert_numpy(self, path, data, start = None, end = None,
-                            layout = None):
+    def stream_insert_numpy(self, path, data, start=None, end=None,
+                            layout=None):
        """Insert data into a stream.  data should be a Numpy array
        which will be passed through stream_insert_numpy_context to
        break it into chunks etc.  See the help for that function
@@ -113,6 +118,7 @@ class NumpyClient(nilmdb.client.client.Client):
                    ctx.insert(chunk)
        return ctx.last_response

+
 class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
    """Object returned by stream_insert_numpy_context() that manages
    the insertion of rows of data into a particular path.
@@ -133,16 +139,8 @@ class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
        contiguous interval and may be None.  'dtype' is the Numpy
        dtype for this stream.
        """
-        self.last_response = None
-
+        super(StreamInserterNumpy, self).__init__(client, path, start, end)
        self._dtype = dtype
-        self._client = client
-        self._path = path
-
-        # Start and end for the overall contiguous interval we're
-        # filling
-        self._interval_start = start
-        self._interval_end = end

        # Max rows to send at once
        self._max_rows = self._max_data // self._dtype.itemsize
@@ -153,7 +151,7 @@ class StreamInserterNumpy(nilmdb.client.client.StreamInserter):

    def insert(self, array):
        """Insert Numpy data, which must match the layout type."""
-        if type(array) != numpy.ndarray:
+        if not isinstance(array, numpy.ndarray):
            array = numpy.array(array)
        if array.ndim == 1:
            # Already a structured array; just verify the type
@@ -163,9 +161,9 @@ class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
            # Convert to structured array
            sarray = numpy.zeros(array.shape[0], dtype=self._dtype)
            try:
-                sarray['timestamp'] = array[:,0]
+                sarray['timestamp'] = array[:, 0]
                # Need the squeeze in case sarray['data'] is 1 dimensional
-                sarray['data'] = numpy.squeeze(array[:,1:])
+                sarray['data'] = numpy.squeeze(array[:, 1:])
            except (IndexError, ValueError):
                raise ValueError("wrong number of fields for this data type")
            array = sarray
@@ -191,15 +189,15 @@ class StreamInserterNumpy(nilmdb.client.client.StreamInserter):

        # Send if it's too long
        if self._block_rows >= maxrows:
-            self._send_block(final = False)
+            self._send_block(final=False)

-    def _send_block(self, final = False):
+    def _send_block(self, final=False):
        """Send the data current stored up.  One row might be left
        over if we need its timestamp saved."""

        # Build the full array to send
        if self._block_rows == 0:
-            array = numpy.zeros(0, dtype = self._dtype)
+            array = numpy.zeros(0, dtype=self._dtype)
        else:
            array = numpy.hstack(self._block_arrays)

@@ -243,20 +241,23 @@ class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
                # the server complain so that the error is the same
                # as if we hadn't done this chunking.
                end_ts = self._interval_end
-            self._block_arrays = [ array[-1:] ]
+            self._block_arrays = [array[-1:]]
            self._block_rows = 1
            array = array[:-1]

            # Next block continues where this one ended
            self._interval_start = end_ts

-        # If we have no endpoints, it's because we had no data to send.
-        if start_ts is None or end_ts is None:
+        # If we have no endpoints, or equal endpoints, it's OK as long
+        # as there's no data to send
+        if (start_ts is None or end_ts is None) or (start_ts == end_ts):
+            if not array:
                return
+            raise ClientError("have data to send, but invalid start/end times")

        # Send it
        data = array.tostring()
        self.last_response = self._client.stream_insert_block(
-            self._path, data, start_ts, end_ts, binary = True)
+            self._path, data, start_ts, end_ts, binary=True)

        return
--- a/nilmdb/cmdline/cmdline.py
+++ b/nilmdb/cmdline/cmdline.py
@@ -1,43 +1,48 @@
 """Command line client functionality"""

-import nilmdb.client
-
-from nilmdb.utils.printf import *
-from nilmdb.utils import datetime_tz
-import nilmdb.utils.time
-
-import sys
 import os
+import sys
+import signal
 import argparse
 from argparse import ArgumentDefaultsHelpFormatter as def_form
-import signal

-try: # pragma: no cover
-    import argcomplete
-except ImportError: # pragma: no cover
-    argcomplete = None
+import nilmdb.client
+from nilmdb.utils.printf import fprintf, sprintf
+import nilmdb.utils.time
+
+import argcomplete
+import datetime_tz

 # Valid subcommands.  Defined in separate files just to break
 # things up -- they're still called with Cmdline as self.
-subcommands = [ "help", "info", "create", "list", "metadata",
-                "insert", "extract", "remove", "destroy",
-                "intervals", "rename" ]
+subcommands = ["help", "info", "create", "rename", "list", "intervals",
+               "metadata", "insert", "extract", "remove", "destroy"]

 # Import the subcommand modules
 subcmd_mods = {}
 for cmd in subcommands:
-    subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist = [ cmd ])
+    subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist=[cmd])
+

 class JimArgumentParser(argparse.ArgumentParser):
+    def parse_args(self, args=None, namespace=None):
+        # Look for --version anywhere and change it to just "nilmtool
+        # --version".  This makes "nilmtool cmd --version" work, which
+        # is needed by help2man.
+        if "--version" in (args or sys.argv[1:]):
+            args = ["--version"]
+        return argparse.ArgumentParser.parse_args(self, args, namespace)
+
    def error(self, message):
        self.print_usage(sys.stderr)
        self.exit(2, sprintf("error: %s\n", message))

-class Complete(object): # pragma: no cover
+
+class Complete():
    # Completion helpers, for using argcomplete (see
    # extras/nilmtool-bash-completion.sh)
    def escape(self, s):
-        quote_chars = [ "\\", "\"", "'", " " ]
+        quote_chars = ["\\", "\"", "'", " "]
        for char in quote_chars:
            s = s.replace(char, "\\" + char)
        return s
@@ -50,18 +55,18 @@ class Complete(object): # pragma: no cover

    def path(self, prefix, parsed_args, **kwargs):
        client = nilmdb.client.Client(parsed_args.url)
-        return ( self.escape(s[0])
+        return (self.escape(s[0])
                for s in client.stream_list()
-                 if s[0].startswith(prefix) )
+                if s[0].startswith(prefix))

    def layout(self, prefix, parsed_args, **kwargs):
-        types = [ "int8", "int16", "int32", "int64",
+        types = ["int8", "int16", "int32", "int64",
                 "uint8", "uint16", "uint32", "uint64",
-                  "float32", "float64" ]
+                 "float32", "float64"]
        layouts = []
-        for i in range(1,10):
+        for i in range(1, 10):
            layouts.extend([(t + "_" + str(i)) for t in types])
-        return ( l for l in layouts if l.startswith(prefix) )
+        return (l for l in layouts if l.startswith(prefix))

    def meta_key(self, prefix, parsed_args, **kwargs):
        return (kv.split('=')[0] for kv
@@ -72,25 +77,23 @@ class Complete(object): # pragma: no cover
        path = parsed_args.path
        if not path:
            return []
-        return ( self.escape(k + '=' + v)
-                 for (k,v) in client.stream_get_metadata(path).iteritems()
-                 if k.startswith(prefix) )
+        results = []
+        for (k, v) in client.stream_get_metadata(path).items():
+            kv = self.escape(k + '=' + v)
+            if kv.startswith(prefix):
+                results.append(kv)
+        return results


-class Cmdline(object):
+class Cmdline():

-    def __init__(self, argv = None):
+    def __init__(self, argv=None):
        self.argv = argv or sys.argv[1:]
-        try:
-            # Assume command line arguments are encoded with stdin's encoding,
-            # and reverse it.  Won't be needed in Python 3, but for now..
-            self.argv = [ x.decode(sys.stdin.encoding) for x in self.argv ]
-        except Exception: # pragma: no cover
-            pass
        self.client = None
        self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
        self.subcmd = {}
        self.complete = Complete()
+        self.complete_output_stream = None  # overridden by test suite

    def arg_time(self, toparse):
        """Parse a time string argument"""
@@ -102,14 +105,14 @@ class Cmdline(object):

    # Set up the parser
    def parser_setup(self):
-        self.parser = JimArgumentParser(add_help = False,
-                                        formatter_class = def_form)
+        self.parser = JimArgumentParser(add_help=False,
+                                        formatter_class=def_form)

        group = self.parser.add_argument_group("General options")
        group.add_argument("-h", "--help", action='help',
                           help='show this help message and exit')
-        group.add_argument("-V", "--version", action="version",
-                           version = nilmdb.__version__)
+        group.add_argument("-v", "--version", action="version",
+                           version=nilmdb.__version__)

        group = self.parser.add_argument_group("Server")
        group.add_argument("-u", "--url", action="store",
@@ -118,7 +121,7 @@ class Cmdline(object):
                           ).completer = self.complete.url

        sub = self.parser.add_subparsers(
-            title="Commands", dest="command",
+            title="Commands", dest="command", required=True,
            description="Use 'help command' or 'command --help' for more "
            "details on a particular command.")

@@ -135,10 +138,7 @@ class Cmdline(object):
    def run(self):
        # Set SIGPIPE to its default handler -- we don't need Python
        # to catch it for us.
-        try:
        signal.signal(signal.SIGPIPE, signal.SIG_DFL)
-        except ValueError: # pragma: no cover
-            pass

        # Clear cached timezone, so that we can pick up timezone changes
        # while running this from the test suite.
@@ -146,8 +146,8 @@ class Cmdline(object):

        # Run parser
        self.parser_setup()
-        if argcomplete: # pragma: no cover
-            argcomplete.autocomplete(self.parser)
+        argcomplete.autocomplete(self.parser, exit_method=sys.exit,
+                                 output_stream=self.complete_output_stream)
        self.args = self.parser.parse_args(self.argv)

        # Run arg verify handler if there is one
@@ -160,7 +160,7 @@ class Cmdline(object):
        # unless the particular command requests that we don't.
        if "no_test_connect" not in self.args:
            try:
-                server_version = self.client.version()
+                self.client.version()
            except nilmdb.client.Error as e:
                self.die("error connecting to server: %s", str(e))

--- a/nilmdb/cmdline/create.py
+++ b/nilmdb/cmdline/create.py
@@ -1,11 +1,11 @@
-from nilmdb.utils.printf import *
+from argparse import RawDescriptionHelpFormatter as raw_form
+
 import nilmdb.client

-from argparse import RawDescriptionHelpFormatter as raw_form

 def setup(self, sub):
    cmd = sub.add_parser("create", help="Create a new stream",
-                         formatter_class = raw_form,
+                         formatter_class=raw_form,
                         description="""
 Create a new empty stream at the specified path and with the specified
 layout type.
@@ -19,7 +19,7 @@ Layout types are of the format: type_count
  For example, 'float32_8' means the data for this stream has 8 columns of
  32-bit floating point values.
 """)
-    cmd.set_defaults(handler = cmd_create)
+    cmd.set_defaults(handler=cmd_create)
    group = cmd.add_argument_group("Required arguments")
    group.add_argument("path",
                       help="Path (in database) of new stream, e.g. /foo/bar",
@@ -29,6 +29,7 @@ Layout types are of the format: type_count
                       ).completer = self.complete.layout
    return cmd

+
 def cmd_create(self):
    """Create new stream"""
    try:
--- a/nilmdb/cmdline/destroy.py
+++ b/nilmdb/cmdline/destroy.py
@@ -1,12 +1,14 @@
-from nilmdb.utils.printf import *
-import nilmdb.client
 import fnmatch

 from argparse import ArgumentDefaultsHelpFormatter as def_form

+from nilmdb.utils.printf import printf
+import nilmdb.client
+
+
 def setup(self, sub):
    cmd = sub.add_parser("destroy", help="Delete a stream and all data",
-                         formatter_class = def_form,
+                         formatter_class=def_form,
                         description="""
                         Destroy the stream at the specified path.
                         The stream must be empty.  All metadata
@@ -14,7 +16,7 @@ def setup(self, sub):

                         Wildcards and multiple paths are supported.
                         """)
-    cmd.set_defaults(handler = cmd_destroy)
+    cmd.set_defaults(handler=cmd_destroy)
    group = cmd.add_argument_group("Options")
    group.add_argument("-R", "--remove", action="store_true",
                       help="Remove all data before destroying stream")
@@ -27,9 +29,10 @@ def setup(self, sub):
                       ).completer = self.complete.path
    return cmd

+
 def cmd_destroy(self):
    """Destroy stream"""
-    streams = [ s[0] for s in self.client.stream_list() ]
+    streams = [s[0] for s in self.client.stream_list()]
    paths = []
    for path in self.args.path:
        new = fnmatch.filter(streams, path)
@@ -43,7 +46,7 @@ def cmd_destroy(self):

        try:
            if self.args.remove:
-                count = self.client.stream_remove(path)
+                self.client.stream_remove(path)
            self.client.stream_destroy(path)
        except nilmdb.client.ClientError as e:
            self.die("error destroying stream: %s", str(e))
--- a/nilmdb/cmdline/extract.py
+++ b/nilmdb/cmdline/extract.py
@@ -1,14 +1,16 @@
-from __future__ import print_function
-from nilmdb.utils.printf import *
+import sys
+
+from nilmdb.utils.printf import printf
 import nilmdb.client

+
 def setup(self, sub):
    cmd = sub.add_parser("extract", help="Extract data",
                         description="""
                         Extract data from a stream.
                         """)
-    cmd.set_defaults(verify = cmd_extract_verify,
-                     handler = cmd_extract)
+    cmd.set_defaults(verify=cmd_extract_verify,
+                     handler=cmd_extract)

    group = cmd.add_argument_group("Data selection")
    group.add_argument("path",
@@ -24,6 +26,8 @@ def setup(self, sub):
                       ).completer = self.complete.time

    group = cmd.add_argument_group("Output format")
+    group.add_argument("-B", "--binary", action="store_true",
+                       help="Raw binary output")
    group.add_argument("-b", "--bare", action="store_true",
                       help="Exclude timestamps from output lines")
    group.add_argument("-a", "--annotate", action="store_true",
@@ -37,11 +41,17 @@ def setup(self, sub):
                       help="Just output a count of matched data points")
    return cmd

+
 def cmd_extract_verify(self):
-    if self.args.start is not None and self.args.end is not None:
    if self.args.start > self.args.end:
        self.parser.error("start is after end")

+    if self.args.binary:
+        if (self.args.bare or self.args.annotate or self.args.markup or
+                self.args.timestamp_raw or self.args.count):
+            self.parser.error("--binary cannot be combined with other options")
+
+
 def cmd_extract(self):
    streams = self.client.stream_list(self.args.path)
    if len(streams) != 1:
@@ -60,16 +70,23 @@ def cmd_extract(self):
        printf("# end: %s\n", time_string(self.args.end))

    printed = False
+    if self.args.binary:
+        printer = sys.stdout.buffer.write
+    else:
+        printer = lambda x: print(x.decode('utf-8'))
+    bare = self.args.bare
+    count = self.args.count
    for dataline in self.client.stream_extract(self.args.path,
                                               self.args.start,
                                               self.args.end,
                                               self.args.count,
-                                               self.args.markup):
-        if self.args.bare and not self.args.count:
+                                               self.args.markup,
+                                               self.args.binary):
+        if bare and not count:
            # Strip timestamp (first element).  Doesn't make sense
            # if we are only returning a count.
-            dataline = ' '.join(dataline.split(' ')[1:])
-        print(dataline)
+            dataline = b' '.join(dataline.split(b' ')[1:])
+        printer(dataline)
        printed = True
    if not printed:
        if self.args.annotate:
--- a/nilmdb/cmdline/help.py
+++ b/nilmdb/cmdline/help.py
@@ -1,7 +1,5 @@
-from nilmdb.utils.printf import *
-
 import argparse
-import sys
+

 def setup(self, sub):
    cmd = sub.add_parser("help", help="Show detailed help for a command",
@@ -9,14 +7,15 @@ def setup(self, sub):
                         Show help for a command. 'help command' is
                         the same as 'command --help'.
                         """)
-    cmd.set_defaults(handler = cmd_help)
-    cmd.set_defaults(no_test_connect = True)
+    cmd.set_defaults(handler=cmd_help)
+    cmd.set_defaults(no_test_connect=True)
    cmd.add_argument("command", nargs="?",
                     help="Command to get help about")
    cmd.add_argument("rest", nargs=argparse.REMAINDER,
                     help=argparse.SUPPRESS)
    return cmd

+
 def cmd_help(self):
    if self.args.command in self.subcmd:
        self.subcmd[self.args.command].print_help()
--- a/nilmdb/cmdline/info.py
+++ b/nilmdb/cmdline/info.py
@@ -1,19 +1,21 @@
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
 import nilmdb.client
-from nilmdb.utils.printf import *
+from nilmdb.utils.printf import printf
 from nilmdb.utils import human_size

-from argparse import ArgumentDefaultsHelpFormatter as def_form

 def setup(self, sub):
    cmd = sub.add_parser("info", help="Server information",
-                         formatter_class = def_form,
+                         formatter_class=def_form,
                         description="""
                         List information about the server, like
                         version.
                         """)
-    cmd.set_defaults(handler = cmd_info)
+    cmd.set_defaults(handler=cmd_info)
    return cmd

+
 def cmd_info(self):
    """Print info about the server"""
    printf("Client version: %s\n", nilmdb.__version__)
@@ -21,5 +23,8 @@ def cmd_info(self):
    printf("Server URL: %s\n", self.client.geturl())
    dbinfo = self.client.dbinfo()
    printf("Server database path: %s\n", dbinfo["path"])
-    printf("Server database size: %s\n", human_size(dbinfo["size"]))
-    printf("Server database free space: %s\n", human_size(dbinfo["free"]))
+    for (desc, field) in [("used by NilmDB", "size"),
+                          ("used by other", "other"),
+                          ("reserved", "reserved"),
+                          ("free", "free")]:
+        printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field]))
--- a/nilmdb/cmdline/insert.py
+++ b/nilmdb/cmdline/insert.py
@@ -1,17 +1,18 @@
-from nilmdb.utils.printf import *
+import sys
+
+from nilmdb.utils.printf import printf
 import nilmdb.client
 import nilmdb.utils.timestamper as timestamper
 import nilmdb.utils.time

-import sys

 def setup(self, sub):
    cmd = sub.add_parser("insert", help="Insert data",
                         description="""
                         Insert data into a stream.
                         """)
-    cmd.set_defaults(verify = cmd_insert_verify,
-                     handler = cmd_insert)
+    cmd.set_defaults(verify=cmd_insert_verify,
+                     handler=cmd_insert)
    cmd.add_argument("-q", "--quiet", action='store_true',
                     help='suppress unnecessary messages')

@@ -61,21 +62,24 @@ def setup(self, sub):
    group.add_argument("path",
                       help="Path of stream, e.g. /foo/bar",
                       ).completer = self.complete.path
-    group.add_argument("file", nargs = '?', default='-',
+    group.add_argument("file", nargs='?', default='-',
                       help="File to insert (default: - (stdin))")
    return cmd

+
 def cmd_insert_verify(self):
    if self.args.timestamp:
        if not self.args.rate:
            self.die("error: --rate is needed, but was not specified")
        if not self.args.filename and self.args.start is None:
-            self.die("error: need --start or --filename when adding timestamps")
+            self.die("error: need --start or --filename "
+                     "when adding timestamps")
    else:
        if self.args.start is None or self.args.end is None:
            self.die("error: when not adding timestamps, --start and "
                     "--end are required")

+
 def cmd_insert(self):
    # Find requested stream
    streams = self.client.stream_list(self.args.path)
@@ -87,7 +91,7 @@ def cmd_insert(self):
    try:
        filename = arg.file
        if filename == '-':
-            infile = sys.stdin
+            infile = sys.stdin.buffer
        else:
            try:
                infile = open(filename, "rb")
@@ -104,7 +108,7 @@ def cmd_insert(self):
        if arg.timestamp:
            data = timestamper.TimestamperRate(infile, arg.start, arg.rate)
        else:
-            data = iter(lambda: infile.read(1048576), '')
+            data = iter(lambda: infile.read(1048576), b'')

        # Print info
        if not arg.quiet:
--- a/nilmdb/cmdline/intervals.py
+++ b/nilmdb/cmdline/intervals.py
@@ -1,13 +1,13 @@
-from nilmdb.utils.printf import *
-import nilmdb.utils.time
-
-import fnmatch
-import argparse
 from argparse import ArgumentDefaultsHelpFormatter as def_form

+from nilmdb.utils.printf import printf
+import nilmdb.utils.time
+from nilmdb.utils.interval import Interval
+
+
 def setup(self, sub):
    cmd = sub.add_parser("intervals", help="List intervals",
-                         formatter_class = def_form,
+                         formatter_class=def_form,
                         description="""
                         List intervals in a stream, similar to
                         'list --detail path'.
@@ -16,8 +16,8 @@ def setup(self, sub):
                         interval ranges that are present in 'path'
                         and not present in 'diffpath' are printed.
                         """)
-    cmd.set_defaults(verify = cmd_intervals_verify,
-                     handler = cmd_intervals)
+    cmd.set_defaults(verify=cmd_intervals_verify,
+                     handler=cmd_intervals)

    group = cmd.add_argument_group("Stream selection")
    group.add_argument("path", metavar="PATH",
@@ -42,14 +42,18 @@ def setup(self, sub):
    group = cmd.add_argument_group("Misc options")
    group.add_argument("-T", "--timestamp-raw", action="store_true",
                       help="Show raw timestamps when printing times")
+    group.add_argument("-o", "--optimize", action="store_true",
+                       help="Optimize (merge adjacent) intervals")

    return cmd

+
 def cmd_intervals_verify(self):
    if self.args.start is not None and self.args.end is not None:
        if self.args.start >= self.args.end:
            self.parser.error("start must precede end")

+
 def cmd_intervals(self):
    """List intervals in a stream"""
    if self.args.timestamp_raw:
@@ -58,9 +62,15 @@ def cmd_intervals(self):
        time_string = nilmdb.utils.time.timestamp_to_human

    try:
-           for (start, end) in self.client.stream_intervals(
-               self.args.path, self.args.start, self.args.end, self.args.diff):
-               printf("[ %s -> %s ]\n", time_string(start), time_string(end))
+        intervals = (Interval(start, end) for (start, end) in
+                     self.client.stream_intervals(self.args.path,
+                                                  self.args.start,
+                                                  self.args.end,
+                                                  self.args.diff))
+        if self.args.optimize:
+            intervals = nilmdb.utils.interval.optimize(intervals)
+        for i in intervals:
+            printf("[ %s -> %s ]\n", time_string(i.start), time_string(i.end))
+
    except nilmdb.client.ClientError as e:
        self.die("error listing intervals: %s", str(e))
-
--- a/nilmdb/cmdline/list.py
+++ b/nilmdb/cmdline/list.py
@@ -1,21 +1,21 @@
-from nilmdb.utils.printf import *
+import fnmatch
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+from nilmdb.utils.printf import printf
 import nilmdb.utils.time

-import fnmatch
-import argparse
-from argparse import ArgumentDefaultsHelpFormatter as def_form

 def setup(self, sub):
    cmd = sub.add_parser("list", help="List streams",
-                         formatter_class = def_form,
+                         formatter_class=def_form,
                         description="""
                         List streams available in the database,
                         optionally filtering by path.  Wildcards
                         are accepted; non-matching paths or wildcards
                         are ignored.
                         """)
-    cmd.set_defaults(verify = cmd_list_verify,
-                     handler = cmd_list)
+    cmd.set_defaults(verify=cmd_list_verify,
+                     handler=cmd_list)

    group = cmd.add_argument_group("Stream filtering")
    group.add_argument("path", metavar="PATH", default=["*"], nargs='*',
@@ -45,9 +45,12 @@ def setup(self, sub):
                       help="Show raw timestamps when printing times")
    group.add_argument("-l", "--layout", action="store_true",
                       help="Show layout type next to path name")
+    group.add_argument("-n", "--no-decim", action="store_true",
+                       help="Skip paths containing \"~decim-\"")

    return cmd

+
 def cmd_list_verify(self):
    if self.args.start is not None and self.args.end is not None:
        if self.args.start >= self.args.end:
@@ -55,11 +58,13 @@ def cmd_list_verify(self):

    if self.args.start is not None or self.args.end is not None:
        if not self.args.detail:
-            self.parser.error("--start and --end only make sense with --detail")
+            self.parser.error("--start and --end only make sense "
+                              "with --detail")
+

 def cmd_list(self):
    """List available streams"""
-    streams = self.client.stream_list(extended = True)
+    streams = self.client.stream_list(extended=True)

    if self.args.timestamp_raw:
        time_string = nilmdb.utils.time.timestamp_to_string
@@ -71,6 +76,8 @@ def cmd_list(self):
            (path, layout, int_min, int_max, rows, time) = stream[:6]
            if not fnmatch.fnmatch(path, argpath):
                continue
+            if self.args.no_decim and "~decim-" in path:
+                continue

            if self.args.layout:
                printf("%s %s\n", path, layout)
--- a/nilmdb/cmdline/metadata.py
+++ b/nilmdb/cmdline/metadata.py
@@ -1,7 +1,8 @@
-from nilmdb.utils.printf import *
+from nilmdb.utils.printf import printf
 import nilmdb
 import nilmdb.client

+
 def setup(self, sub):
    cmd = sub.add_parser("metadata", help="Get or set stream metadata",
                         description="""
@@ -11,7 +12,7 @@ def setup(self, sub):
                         usage="%(prog)s path [-g [key ...] | "
                         "-s key=value [...] | -u key=value [...]] | "
                         "-d [key ...]")
-    cmd.set_defaults(handler = cmd_metadata)
+    cmd.set_defaults(handler=cmd_metadata)

    group = cmd.add_argument_group("Required arguments")
    group.add_argument("path",
@@ -36,6 +37,7 @@ def setup(self, sub):
                     ).completer = self.complete.meta_key
    return cmd

+
 def cmd_metadata(self):
    """Manipulate metadata"""
    if self.args.set is not None or self.args.update is not None:
@@ -62,7 +64,9 @@ def cmd_metadata(self):
            self.die("error setting/updating metadata: %s", str(e))
    elif self.args.delete is not None:
        # Delete (by setting values to empty strings)
-        keys = self.args.delete or None
+        keys = None
+        if self.args.delete:
+            keys = list(self.args.delete)
        try:
            data = self.client.stream_get_metadata(self.args.path, keys)
            for key in data:
@@ -72,7 +76,9 @@ def cmd_metadata(self):
            self.die("error deleting metadata: %s", str(e))
    else:
        # Get (or unspecified)
-        keys = self.args.get or None
+        keys = None
+        if self.args.get:
+            keys = list(self.args.get)
        try:
            data = self.client.stream_get_metadata(self.args.path, keys)
        except nilmdb.client.ClientError as e:
--- a/nilmdb/cmdline/remove.py
+++ b/nilmdb/cmdline/remove.py
@@ -1,15 +1,18 @@
-from nilmdb.utils.printf import *
-import nilmdb.client
 import fnmatch

+from nilmdb.utils.printf import printf
+import nilmdb.client
+
+
 def setup(self, sub):
    cmd = sub.add_parser("remove", help="Remove data",
                         description="""
                         Remove all data from a specified time range within a
-                         stream.  If multiple streams or wildcards are provided,
-                         the same time range is removed from all streams.
+                         stream.  If multiple streams or wildcards are
+                         provided, the same time range is removed from all
+                         streams.
                         """)
-    cmd.set_defaults(handler = cmd_remove)
+    cmd.set_defaults(handler=cmd_remove)

    group = cmd.add_argument_group("Data selection")
    group.add_argument("path", nargs='+',
@@ -32,8 +35,9 @@ def setup(self, sub):
                       help="Output number of data points removed")
    return cmd

+
 def cmd_remove(self):
-    streams = [ s[0] for s in self.client.stream_list() ]
+    streams = [s[0] for s in self.client.stream_list()]
    paths = []
    for path in self.args.path:
        new = fnmatch.filter(streams, path)
@@ -48,7 +52,7 @@ def cmd_remove(self):
            count = self.client.stream_remove(path,
                                              self.args.start, self.args.end)
            if self.args.count:
-                printf("%d\n", count);
+                printf("%d\n", count)
    except nilmdb.client.ClientError as e:
        self.die("error removing data: %s", str(e))

--- a/nilmdb/cmdline/rename.py
+++ b/nilmdb/cmdline/rename.py
@@ -1,18 +1,18 @@
-from nilmdb.utils.printf import *
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
 import nilmdb.client

-from argparse import ArgumentDefaultsHelpFormatter as def_form

 def setup(self, sub):
    cmd = sub.add_parser("rename", help="Rename a stream",
-                         formatter_class = def_form,
+                         formatter_class=def_form,
                         description="""
                         Rename a stream.

                         Only the stream's path is renamed; no
                         metadata is changed.
                         """)
-    cmd.set_defaults(handler = cmd_rename)
+    cmd.set_defaults(handler=cmd_rename)
    group = cmd.add_argument_group("Required arguments")
    group.add_argument("oldpath",
                       help="Old path, e.g. /foo/old",
@@ -23,6 +23,7 @@ def setup(self, sub):

    return cmd

+
 def cmd_rename(self):
    """Rename a stream"""
    try:
--- a/nilmdb/fsck/init.py
+++ b/nilmdb/fsck/init.py
@@ -0,0 +1,3 @@
+"""nilmdb.fsck"""
+
+from nilmdb.fsck.fsck import Fsck
--- a/nilmdb/fsck/fsck.py
+++ b/nilmdb/fsck/fsck.py
@@ -0,0 +1,466 @@
+# -*- coding: utf-8 -*-
+
+raise Exception("todo: fix path bytes issues")
+
+"""Check database consistency, with some ability to fix problems.
+This should be able to fix cases where a database gets corrupted due
+to unexpected system shutdown, and detect other cases that may cause
+NilmDB to return errors when trying to manipulate the database."""
+
+import nilmdb.utils
+import nilmdb.server
+import nilmdb.client.numpyclient
+from nilmdb.utils.interval import IntervalError
+from nilmdb.server.interval import Interval, IntervalSet
+from nilmdb.utils.printf import printf, fprintf, sprintf
+from nilmdb.utils.time import timestamp_to_string
+
+from collections import defaultdict
+import sqlite3
+import os
+import sys
+import progressbar
+import re
+import time
+import shutil
+import pickle
+import numpy
+
+class FsckError(Exception):
+    def __init__(self, msg = "", *args):
+        if args:
+            msg = sprintf(msg, *args)
+        Exception.__init__(self, msg)
+class FixableFsckError(FsckError):
+    def __init__(self, msg = "", *args):
+        if args:
+            msg = sprintf(msg, *args)
+        FsckError.__init__(self, "%s\nThis may be fixable with \"--fix\".", msg)
+class RetryFsck(FsckError):
+    pass
+
+def log(format, *args):
+    printf(format, *args)
+
+def err(format, *args):
+    fprintf(sys.stderr, format, *args)
+
+# Decorator that retries a function if it returns a specific value
+def retry_if_raised(exc, message = None, max_retries = 100):
+    def f1(func):
+        def f2(*args, **kwargs):
+            for n in range(max_retries):
+                try:
+                    return func(*args, **kwargs)
+                except exc as e:
+                    if message:
+                        log("%s\n\n", message)
+            raise Exception("Max number of retries (%d) exceeded; giving up")
+        return f2
+    return f1
+
+class Progress(object):
+    def __init__(self, maxval):
+        if maxval == 0:
+            maxval = 1
+        self.bar = progressbar.ProgressBar(
+            maxval = maxval,
+            widgets = [ progressbar.Percentage(), ' ',
+                        progressbar.Bar(), ' ',
+                        progressbar.ETA() ])
+        if self.bar.term_width == 0:
+            self.bar.term_width = 75
+    def __enter__(self):
+        self.bar.start()
+        self.last_update = 0
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            self.bar.finish()
+        else:
+            printf("\n")
+    def update(self, val):
+        self.bar.update(val)
+
+class Fsck(object):
+
+    def __init__(self, path, fix = False):
+        self.basepath = path
+        self.sqlpath = os.path.join(path, "data.sql")
+        self.bulkpath = os.path.join(path, "data")
+        self.bulklock = os.path.join(path, "data.lock")
+        self.fix = fix
+
+    ### Main checks
+
+    @retry_if_raised(RetryFsck, "Something was fixed: restarting fsck")
+    def check(self, skip_data = False):
+        self.bulk = None
+        self.sql = None
+        try:
+            self.check_paths()
+            self.check_sql()
+            self.check_streams()
+            self.check_intervals()
+            if skip_data:
+                log("skipped data check\n")
+            else:
+                self.check_data()
+        finally:
+            if self.bulk:
+                self.bulk.close()
+            if self.sql:
+                self.sql.commit()
+                self.sql.close()
+        log("ok\n")
+
+    ### Check basic path structure
+
+    def check_paths(self):
+        log("checking paths\n")
+        if self.bulk:
+            self.bulk.close()
+        if not os.path.isfile(self.sqlpath):
+            raise FsckError("SQL database missing (%s)", self.sqlpath)
+        if not os.path.isdir(self.bulkpath):
+            raise FsckError("Bulk data directory missing (%s)", self.bulkpath)
+        with open(self.bulklock, "w") as lockfile:
+            if not nilmdb.utils.lock.exclusive_lock(lockfile):
+                raise FsckError('Database already locked by another process\n'
+                                'Make sure all other processes that might be '
+                                'using the database are stopped.\n'
+                                'Restarting apache will cause it to unlock '
+                                'the db until a request is received.')
+            # unlocked immediately
+        self.bulk = nilmdb.server.bulkdata.BulkData(self.basepath)
+
+    ### Check SQL database health
+
+    def check_sql(self):
+        log("checking sqlite database\n")
+
+        self.sql = sqlite3.connect(self.sqlpath)
+        with self.sql:
+            cur = self.sql.cursor()
+            ver = cur.execute("PRAGMA user_version").fetchone()[0]
+            good = max(nilmdb.server.nilmdb._sql_schema_updates.keys())
+            if ver != good:
+                raise FsckError("database version %d too old, should be %d",
+                                ver, good)
+            self.stream_path = {}
+            self.stream_layout = {}
+            log("  loading paths\n")
+            result = cur.execute("SELECT id, path, layout FROM streams")
+            for r in result:
+                if r[0] in self.stream_path:
+                    raise FsckError("duplicated ID %d in stream IDs", r[0])
+                self.stream_path[r[0]] = r[1]
+                self.stream_layout[r[0]] = r[2]
+
+            log("  loading intervals\n")
+            self.stream_interval = defaultdict(list)
+            result = cur.execute("SELECT stream_id, start_time, end_time, "
+                                 "start_pos, end_pos FROM ranges "
+                                 "ORDER BY start_time")
+            for r in result:
+                if r[0] not in self.stream_path:
+                    raise FsckError("interval ID %d not in streams", k)
+                self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4]))
+
+            log("  loading metadata\n")
+            self.stream_meta = defaultdict(dict)
+            result = cur.execute("SELECT stream_id, key, value FROM metadata")
+            for r in result:
+                if r[0] not in self.stream_path:
+                    raise FsckError("metadata ID %d not in streams", k)
+                if r[1] in self.stream_meta[r[0]]:
+                    raise FsckError("duplicate metadata key '%s' for stream %d",
+                                    r[1], r[0])
+                self.stream_meta[r[0]][r[1]] = r[2]
+
+    ### Check streams and basic interval overlap
+
+    def check_streams(self):
+        ids = list(self.stream_path.keys())
+        log("checking %s streams\n", "{:,d}".format(len(ids)))
+        with Progress(len(ids)) as pbar:
+            for i, sid in enumerate(ids):
+                pbar.update(i)
+                path = self.stream_path[sid]
+
+                # unique path, valid layout
+                if list(self.stream_path.values()).count(path) != 1:
+                    raise FsckError("duplicated path %s", path)
+                layout = self.stream_layout[sid].split('_')[0]
+                if layout not in ('int8', 'int16', 'int32', 'int64',
+                                  'uint8', 'uint16', 'uint32', 'uint64',
+                                  'float32', 'float64'):
+                    raise FsckError("bad layout %s for %s", layout, path)
+                count = int(self.stream_layout[sid].split('_')[1])
+                if count < 1 or count > 1024:
+                    raise FsckError("bad count %d for %s", count, path)
+
+                # must exist in bulkdata
+                bulk = self.bulkpath + path
+                if not os.path.isdir(bulk):
+                    raise FsckError("%s: missing bulkdata dir", path)
+                if not nilmdb.server.bulkdata.Table.exists(bulk):
+                    raise FsckError("%s: bad bulkdata table", path)
+
+                # intervals don't overlap.  Abuse IntervalSet to check
+                # for intervals in file positions, too.
+                timeiset = IntervalSet()
+                posiset = IntervalSet()
+                for (stime, etime, spos, epos) in self.stream_interval[sid]:
+                    new = Interval(stime, etime)
+                    try:
+                        timeiset += new
+                    except IntervalError:
+                        raise FsckError("%s: overlap in intervals:\n"
+                                        "set: %s\nnew: %s",
+                                        path, str(timeiset), str(new))
+                    if spos != epos:
+                        new = Interval(spos, epos)
+                        try:
+                            posiset += new
+                        except IntervalError:
+                            raise FsckError("%s: overlap in file offsets:\n"
+                                            "set: %s\nnew: %s",
+                                            path, str(posiset), str(new))
+
+                # check bulkdata
+                self.check_bulkdata(sid, path, bulk)
+
+                # Check that we can open bulkdata
+                try:
+                    tab = None
+                    try:
+                        tab = nilmdb.server.bulkdata.Table(bulk)
+                    except Exception as e:
+                        raise FsckError("%s: can't open bulkdata: %s",
+                                        path, str(e))
+                finally:
+                    if tab:
+                        tab.close()
+
+    ### Check that bulkdata is good enough to be opened
+
+    @retry_if_raised(RetryFsck)
+    def check_bulkdata(self, sid, path, bulk):
+        with open(os.path.join(bulk, "_format"), "rb") as f:
+            fmt = pickle.load(f)
+        if fmt["version"] != 3:
+            raise FsckError("%s: bad or unsupported bulkdata version %d",
+                            path, fmt["version"])
+        row_per_file = int(fmt["rows_per_file"])
+        files_per_dir = int(fmt["files_per_dir"])
+        layout = fmt["layout"]
+        if layout != self.stream_layout[sid]:
+            raise FsckError("%s: layout mismatch %s != %s", path,
+                            layout, self.stream_layout[sid])
+
+        # Every file should have a size that's the multiple of the row size
+        rkt = nilmdb.server.rocket.Rocket(layout, None)
+        row_size = rkt.binary_size
+        rkt.close()
+
+        # Find all directories
+        regex = re.compile("^[0-9a-f]{4,}$")
+        subdirs = sorted(filter(regex.search, os.listdir(bulk)),
+                         key = lambda x: int(x, 16), reverse = True)
+        for subdir in subdirs:
+            # Find all files in that dir
+            subpath = os.path.join(bulk, subdir)
+            files = list(filter(regex.search, os.listdir(subpath)))
+            if not files:
+                self.fix_empty_subdir(subpath)
+                raise RetryFsck
+            # Verify that their size is a multiple of the row size
+            for filename in files:
+                filepath = os.path.join(subpath, filename)
+                offset = os.path.getsize(filepath)
+                if offset % row_size:
+                    self.fix_bad_filesize(path, filepath, offset, row_size)
+
+    def fix_empty_subdir(self, subpath):
+        msg = sprintf("bulkdata path %s is missing data files", subpath)
+        if not self.fix:
+            raise FixableFsckError(msg)
+        # Try to fix it by just deleting whatever is present,
+        # as long as it's only ".removed" files.
+        err("\n%s\n", msg)
+        for fn in os.listdir(subpath):
+            if not fn.endswith(".removed"):
+                raise FsckError("can't fix automatically: please manually "
+                                "remove the file %s and try again",
+                                os.path.join(subpath, fn))
+        # Remove the whole thing
+        err("Removing empty subpath\n")
+        shutil.rmtree(subpath)
+        raise RetryFsck
+
+    def fix_bad_filesize(self, path, filepath, offset, row_size):
+        extra = offset % row_size
+        msg = sprintf("%s: size of file %s (%d) is not a multiple" +
+                      " of row size (%d): %d extra bytes present",
+                      path, filepath, offset, row_size, extra)
+        if not self.fix:
+            raise FixableFsckError(msg)
+        # Try to fix it by just truncating the file
+        err("\n%s\n", msg)
+        newsize = offset - extra
+        err("Truncating file to %d bytes and retrying\n", newsize)
+        with open(filepath, "r+b") as f:
+            f.truncate(newsize)
+            raise RetryFsck
+
+    ### Check interval endpoints
+
+    def check_intervals(self):
+        total_ints = sum(len(x) for x in list(self.stream_interval.values()))
+        log("checking %s intervals\n", "{:,d}".format(total_ints))
+        done = 0
+        with Progress(total_ints) as pbar:
+            for sid in self.stream_interval:
+                try:
+                    bulk = self.bulkpath + self.stream_path[sid]
+                    tab = nilmdb.server.bulkdata.Table(bulk)
+                    def update(x):
+                        pbar.update(done + x)
+                    ints = self.stream_interval[sid]
+                    done += self.check_table_intervals(sid, ints, tab, update)
+                finally:
+                    tab.close()
+
+    def check_table_intervals(self, sid, ints, tab, update):
+        # look in the table to make sure we can pick out the interval's
+        # endpoints
+        path = self.stream_path[sid]
+        tab.file_open.cache_remove_all()
+        for (i, intv) in enumerate(ints):
+            update(i)
+            (stime, etime, spos, epos) = intv
+            if spos == epos and spos >= 0 and spos <= tab.nrows:
+                continue
+            try:
+                srow = tab[spos]
+                erow = tab[epos-1]
+            except Exception as e:
+                self.fix_bad_interval(sid, intv, tab, str(e))
+                raise RetryFsck
+        return len(ints)
+
+    def fix_bad_interval(self, sid, intv, tab, msg):
+        path = self.stream_path[sid]
+        msg = sprintf("%s: interval %s error accessing rows: %s",
+                      path, str(intv), str(msg))
+        if not self.fix:
+            raise FixableFsckError(msg)
+        err("\n%s\n", msg)
+
+        (stime, etime, spos, epos) = intv
+        # If it's just that the end pos is more than the number of rows
+        # in the table, lower end pos and truncate interval time too.
+        if spos < tab.nrows and epos >= tab.nrows:
+            err("end position is past endrows, but it can be truncated\n")
+            err("old end: time %d, pos %d\n", etime, epos)
+            new_epos = tab.nrows
+            new_etime = tab[new_epos-1] + 1
+            err("new end: time %d, pos %d\n", new_etime, new_epos)
+            if stime < new_etime:
+                # Change it in SQL
+                with self.sql:
+                    cur = self.sql.cursor()
+                    cur.execute("UPDATE ranges SET end_time=?, end_pos=? "
+                                "WHERE stream_id=? AND start_time=? AND "
+                                "end_time=? AND start_pos=? AND end_pos=?",
+                                (new_etime, new_epos, sid, stime, etime,
+                                 spos, epos))
+                    if cur.rowcount != 1:
+                        raise FsckError("failed to fix SQL database")
+                raise RetryFsck
+            err("actually it can't be truncated; times are bad too")
+
+        # Otherwise, the only hope is to delete the interval entirely.
+        err("*** Deleting the entire interval from SQL.\n")
+        err("This may leave stale data on disk.  To fix that, copy all\n")
+        err("data from this stream to a new stream, then remove all data\n")
+        err("from and destroy %s.\n", path)
+        with self.sql:
+            cur = self.sql.cursor()
+            cur.execute("DELETE FROM ranges WHERE "
+                        "stream_id=? AND start_time=? AND "
+                        "end_time=? AND start_pos=? AND end_pos=?",
+                        (sid, stime, etime, spos, epos))
+            if cur.rowcount != 1:
+                raise FsckError("failed to remove interval")
+        raise RetryFsck
+
+    ### Check data in each interval
+
+    def check_data(self):
+        total_rows = sum(sum((y[3] - y[2]) for y in x)
+                         for x in list(self.stream_interval.values()))
+        log("checking %s rows of data\n", "{:,d}".format(total_rows))
+        done = 0
+        with Progress(total_rows) as pbar:
+            for sid in self.stream_interval:
+                try:
+                    bulk = self.bulkpath + self.stream_path[sid]
+                    tab = nilmdb.server.bulkdata.Table(bulk)
+                    def update(x):
+                        pbar.update(done + x)
+                    ints = self.stream_interval[sid]
+                    done += self.check_table_data(sid, ints, tab, update)
+                finally:
+                    tab.close()
+
+    def check_table_data(self, sid, ints, tab, update):
+        # Pull out all of the interval's data and verify that it's
+        # monotonic.
+        maxrows = 100000
+        path = self.stream_path[sid]
+        layout = self.stream_layout[sid]
+        dtype = nilmdb.client.numpyclient.layout_to_dtype(layout)
+        tab.file_open.cache_remove_all()
+        done = 0
+        for intv in ints:
+            last_ts = None
+            (stime, etime, spos, epos) = intv
+
+            # Break interval into maxrows-sized chunks
+            next_start = spos
+            while next_start < epos:
+                start = next_start
+                stop = min(start + maxrows, epos)
+                count = stop - start
+                next_start = stop
+
+                # Get raw data, convert to NumPy arary
+                try:
+                    raw = tab.get_data(start, stop, binary = True)
+                    data = numpy.fromstring(raw, dtype)
+                except Exception as e:
+                    raise FsckError("%s: failed to grab rows %d through %d: %s",
+                                    path, start, stop, repr(e))
+
+                # Verify that timestamps are monotonic
+                if (numpy.diff(data['timestamp']) <= 0).any():
+                    raise FsckError("%s: non-monotonic timestamp(s) in rows "
+                                    "%d through %d", path, start, stop)
+                first_ts = data['timestamp'][0]
+                if last_ts is not None and first_ts <= last_ts:
+                    raise FsckError("%s: first interval timestamp %d is not "
+                                    "greater than the previous last interval "
+                                    "timestamp %d, at row %d",
+                                    path, first_ts, last_ts, start)
+                last_ts = data['timestamp'][-1]
+
+                # These are probably fixable, by removing the offending
+                # intervals.  But I'm not going to bother implementing
+                # that yet.
+
+                # Done
+                done += count
+                update(done)
+        return done
--- a/nilmdb/scripts/nilmdb_fsck.py
+++ b/nilmdb/scripts/nilmdb_fsck.py
@@ -0,0 +1,27 @@
+#!/usr/bin/python
+
+import nilmdb.fsck
+import argparse
+import os
+import sys
+
+def main():
+    """Main entry point for the 'nilmdb-fsck' command line script"""
+
+    parser = argparse.ArgumentParser(
+        description = 'Check database consistency',
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("-v", "--version", action="version",
+                        version = nilmdb.__version__)
+    parser.add_argument("-f", "--fix", action="store_true",
+                        default=False, help = 'Fix errors when possible '
+                        '(which may involve removing data)')
+    parser.add_argument("-n", "--no-data", action="store_true",
+                        default=False, help = 'Skip the slow full-data check')
+    parser.add_argument('database', help = 'Database directory')
+    args = parser.parse_args()
+
+    nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data = args.no_data)
+
+if __name__ == "__main__":
+    main()
--- a/nilmdb/scripts/nilmdb_server.py
+++ b/nilmdb/scripts/nilmdb_server.py
@@ -1,38 +1,43 @@
 #!/usr/bin/python

-import nilmdb.server
-import argparse
 import os
+import sys
 import socket
+import argparse
+
+import cherrypy
+
+import nilmdb.server
+

 def main():
    """Main entry point for the 'nilmdb-server' command line script"""

    parser = argparse.ArgumentParser(
-        description = 'Run the NilmDB server',
-        formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+        description='Run the NilmDB server',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

-    parser.add_argument("-V", "--version", action="version",
-                        version = nilmdb.__version__)
+    parser.add_argument("-v", "--version", action="version",
+                        version=nilmdb.__version__)

    group = parser.add_argument_group("Standard options")
    group.add_argument('-a', '--address',
-                       help = 'Only listen on the given address',
-                       default = '0.0.0.0')
-    group.add_argument('-p', '--port', help = 'Listen on the given port',
-                       type = int, default = 12380)
-    group.add_argument('-d', '--database', help = 'Database directory',
-                       default = "./db")
-    group.add_argument('-q', '--quiet', help = 'Silence output',
-                       action = 'store_true')
+                       help='Only listen on the given address',
+                       default='0.0.0.0')
+    group.add_argument('-p', '--port', help='Listen on the given port',
+                       type=int, default=12380)
+    group.add_argument('-d', '--database', help='Database directory',
+                       default="./db")
+    group.add_argument('-q', '--quiet', help='Silence output',
+                       action='store_true')
    group.add_argument('-t', '--traceback',
-                       help = 'Provide tracebacks in client errors',
-                       action = 'store_true', default = False)
+                       help='Provide tracebacks in client errors',
+                       action='store_true', default=False)

    group = parser.add_argument_group("Debug options")
-    group.add_argument('-y', '--yappi', help = 'Run under yappi profiler and '
+    group.add_argument('-y', '--yappi', help='Run under yappi profiler and '
                       'invoke interactive shell afterwards',
-                       action = 'store_true')
+                       action='store_true')

    args = parser.parse_args()

@@ -41,47 +46,51 @@ def main():
    db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(args.database)

    # Configure the server
-    if args.quiet:
-        embedded = True
-    else:
-        embedded = False
+    if not args.quiet:
+        cherrypy._cpconfig.environments['embedded']['log.screen'] = True
+
    server = nilmdb.server.Server(db,
-                                  host = args.address,
-                                  port = args.port,
-                                  embedded = embedded,
-                                  force_traceback = args.traceback)
+                                  host=args.address,
+                                  port=args.port,
+                                  force_traceback=args.traceback)

    # Print info
    if not args.quiet:
-        print "Version: %s" % nilmdb.__version__
-        print "Database: %s" % (os.path.realpath(args.database))
+        print("Version: %s" % nilmdb.__version__)
+        print("Database: %s" % (os.path.realpath(args.database)))
        if args.address == '0.0.0.0' or args.address == '::':
            host = socket.getfqdn()
        else:
            host = args.address
-        print "Server URL: http://%s:%d/" % ( host, args.port)
-        print "----"
+        print("Server URL: http://%s:%d/" % (host, args.port))
+        print("----")

    # Run it
+    try:
        if args.yappi:
-        print "Running in yappi"
+            print("Running in yappi")
            try:
                import yappi
                yappi.start()
-            server.start(blocking = True)
+                server.start(blocking=True)
            finally:
                yappi.stop()
-            yappi.print_stats(sort_type = yappi.SORTTYPE_TTOT, limit = 50)
+                stats = yappi.get_func_stats()
+                stats.sort("ttot")
+                stats.print_all()
                from IPython import embed
-            embed(header = "Use the yappi object to explore further, "
-                  "quit to exit")
+                embed(header="Use the `yappi` or `stats` object to explore "
+                      "further, quit to exit")
        else:
-        server.start(blocking = True)
-
-    # Clean up
+            server.start(blocking=True)
+    except nilmdb.server.serverutil.CherryPyExit:
+        print("Exiting due to CherryPy error", file=sys.stderr)
+        raise
+    finally:
        if not args.quiet:
-        print "Closing database"
+            print("Closing database")
            db.close()

+
 if __name__ == "__main__":
    main()
--- a/nilmdb/scripts/nilmtool.py
+++ b/nilmdb/scripts/nilmtool.py
@@ -2,9 +2,11 @@

 import nilmdb.cmdline

+
 def main():
    """Main entry point for the 'nilmtool' command line script"""
    nilmdb.cmdline.Cmdline().run()

+
 if __name__ == "__main__":
    main()
--- a/nilmdb/server/init.py
+++ b/nilmdb/server/init.py
@@ -1,20 +1,8 @@
 """nilmdb.server"""

-from __future__ import absolute_import
-
-# Try to set up pyximport to automatically rebuild Cython modules.  If
-# this doesn't work, it's OK, as long as the modules were built externally.
-# (e.g. python setup.py build_ext --inplace)
-try: # pragma: no cover
-    import Cython
-    import distutils.version
-    if (distutils.version.LooseVersion(Cython.__version__) <
-        distutils.version.LooseVersion("0.17")): # pragma: no cover
-        raise ImportError("Cython version too old")
-    import pyximport
-    pyximport.install(inplace = True, build_in_temp = False)
-except (ImportError, TypeError): # pragma: no cover
-    pass
+# Set up pyximport to automatically rebuild Cython modules if needed.
+import pyximport
+pyximport.install(inplace=True, build_in_temp=False)

 from nilmdb.server.nilmdb import NilmDB
 from nilmdb.server.server import Server, wsgi_application
--- a/nilmdb/server/bulkdata.py
+++ b/nilmdb/server/bulkdata.py
@@ -1,48 +1,54 @@
 # Fixed record size bulk data storage

-# Need absolute_import so that "import nilmdb" won't pull in
-# nilmdb.py, but will pull the parent nilmdb module instead.
-from __future__ import absolute_import
-from __future__ import division
-from nilmdb.utils.printf import *
-from nilmdb.utils.time import timestamp_to_string as timestamp_to_string
-import nilmdb.utils
-
 import os
-import cPickle as pickle
 import re
 import sys
+import pickle
 import tempfile

+from nilmdb.utils.printf import sprintf
+from nilmdb.utils.time import timestamp_to_string
+import nilmdb.utils
+
 import nilmdb.utils.lock
 from . import rocket

 # Up to 256 open file descriptors at any given time.
 # These variables are global so they can be used in the decorator arguments.
-table_cache_size = 16
-fd_cache_size = 16
+table_cache_size = 32
+fd_cache_size = 8

-@nilmdb.utils.must_close(wrap_verify = False)
-class BulkData(object):
+
+@nilmdb.utils.must_close(wrap_verify=False)
+class BulkData():
    def __init__(self, basepath, **kwargs):
+        if isinstance(basepath, str):
+            self.basepath = self._encode_filename(basepath)
+        else:
            self.basepath = basepath
-        self.root = os.path.join(self.basepath, "data")
-        self.lock = self.root + ".lock"
+        self.root = os.path.join(self.basepath, b"data")
+        self.lock = self.root + b".lock"
        self.lockfile = None

        # Tuneables
-        if "file_size" in kwargs:
+        if "file_size" in kwargs and kwargs["file_size"] is not None:
            self.file_size = kwargs["file_size"]
        else:
            # Default to approximately 128 MiB per file
            self.file_size = 128 * 1024 * 1024

-        if "files_per_dir" in kwargs:
+        if "files_per_dir" in kwargs and kwargs["files_per_dir"] is not None:
            self.files_per_dir = kwargs["files_per_dir"]
        else:
            # 32768 files per dir should work even on FAT32
            self.files_per_dir = 32768

+        if "initial_nrows" in kwargs and kwargs["initial_nrows"] is not None:
+            self.initial_nrows = kwargs["initial_nrows"]
+        else:
+            # First row is 0
+            self.initial_nrows = 0
+
        # Make root path
        if not os.path.isdir(self.root):
            os.mkdir(self.root)
@@ -50,7 +56,8 @@ class BulkData(object):
        # Create the lock
        self.lockfile = open(self.lock, "w")
        if not nilmdb.utils.lock.exclusive_lock(self.lockfile):
-            raise IOError('database at "' + self.basepath +
+            raise IOError('database at "' +
+                          self._decode_filename(self.basepath) +
                          '" is already locked by another process')

    def close(self):
@@ -60,21 +67,21 @@ class BulkData(object):
            self.lockfile.close()
            try:
                os.unlink(self.lock)
-            except OSError: # pragma: no cover
+            except OSError:
                pass
            self.lockfile = None

    def _encode_filename(self, path):
-        # Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
-        # because we want to be able to represent all code points and the user
-        # will never be directly exposed to filenames.  We can then do path
-        # manipulations on the UTF-8 directly.
-        if isinstance(path, unicode):
+        # Translate unicode strings to raw bytes, if needed.  We
+        # always manipulate paths internally as bytes.
        return path.encode('utf-8')
-        return path
+
+    def _decode_filename(self, path):
+        # Translate raw bytes to unicode strings, escaping if needed
+        return path.decode('utf-8', errors='backslashreplace')

    def _create_check_ospath(self, ospath):
-        if ospath[-1] == '/':
+        if ospath[-1:] == b'/':
            raise ValueError("invalid path; should not end with a /")
        if Table.exists(ospath):
            raise ValueError("stream already exists at this path")
@@ -82,7 +89,7 @@ class BulkData(object):
            # Look for any files in subdirectories.  Fully empty subdirectories
            # are OK; they might be there during a rename
            for (root, dirs, files) in os.walk(ospath):
-                if len(files):
+                if files:
                    raise ValueError(
                        "non-empty subdirs of this path already exist")

@@ -91,13 +98,13 @@ class BulkData(object):
        don't exist.  Returns a list of elements that got created."""
        path = self._encode_filename(unicodepath)

-        if path[0] != '/':
-            raise ValueError("paths must start with /")
-        [ group, node ] = path.rsplit("/", 1)
-        if group == '':
+        if path[0:1] != b'/':
+            raise ValueError("paths must start with / ")
+        [group, node] = path.rsplit(b"/", 1)
+        if group == b'':
            raise ValueError("invalid path; path must contain at least one "
                             "folder")
-        if node == '':
+        if node == b'':
            raise ValueError("invalid path; should not end with a /")
        if not Table.valid_path(path):
            raise ValueError("path name is invalid or contains reserved words")
@@ -108,7 +115,7 @@ class BulkData(object):
        # os.path.join)

        # Make directories leading up to this one
-        elements = path.lstrip('/').split('/')
+        elements = path.lstrip(b'/').split(b'/')
        made_dirs = []
        try:
            # Make parent elements
@@ -119,15 +126,11 @@ class BulkData(object):
                if not os.path.isdir(ospath):
                    os.mkdir(ospath)
                    made_dirs.append(ospath)
-        except Exception as e:
-            # Try to remove paths that we created; ignore errors
-            exc_info = sys.exc_info()
-            for ospath in reversed(made_dirs): # pragma: no cover (hard to hit)
-                try:
+        except Exception:
+            # Remove paths that we created
+            for ospath in reversed(made_dirs):
                os.rmdir(ospath)
-                except OSError:
-                    pass
-            raise exc_info[1], None, exc_info[2]
+            raise

        return elements

@@ -162,7 +165,7 @@ class BulkData(object):
                os.rmdir(ospath)
            except OSError:
                pass
-            raise exc_info[1], None, exc_info[2]
+            raise exc_info[1].with_traceback(exc_info[2])

        # Success
        return
@@ -170,8 +173,8 @@ class BulkData(object):
    def _remove_leaves(self, unicodepath):
        """Remove empty directories starting at the leaves of unicodepath"""
        path = self._encode_filename(unicodepath)
-        elements = path.lstrip('/').split('/')
-        for i in reversed(range(len(elements))):
+        elements = path.lstrip(b'/').split(b'/')
+        for i in reversed(list(range(len(elements)))):
            ospath = os.path.join(self.root, *elements[0:i+1])
            try:
                os.rmdir(ospath)
@@ -185,18 +188,21 @@ class BulkData(object):
        newpath = self._encode_filename(newunicodepath)

        # Get OS paths
-        oldelements = oldpath.lstrip('/').split('/')
+        oldelements = oldpath.lstrip(b'/').split(b'/')
        oldospath = os.path.join(self.root, *oldelements)
-        newelements = newpath.lstrip('/').split('/')
+        newelements = newpath.lstrip(b'/').split(b'/')
        newospath = os.path.join(self.root, *newelements)

        # Basic checks
        if oldospath == newospath:
            raise ValueError("old and new paths are the same")

+        # Remove Table object at old path from cache
+        self.getnode.cache_remove(self, oldunicodepath)
+
        # Move the table to a temporary location
-        tmpdir = tempfile.mkdtemp(prefix = "rename-", dir = self.root)
-        tmppath = os.path.join(tmpdir, "table")
+        tmpdir = tempfile.mkdtemp(prefix=b"rename-", dir=self.root)
+        tmppath = os.path.join(tmpdir, b"table")
        os.rename(oldospath, tmppath)

        try:
@@ -224,7 +230,7 @@ class BulkData(object):
        path = self._encode_filename(unicodepath)

        # Get OS path
-        elements = path.lstrip('/').split('/')
+        elements = path.lstrip(b'/').split(b'/')
        ospath = os.path.join(self.root, *elements)

        # Remove Table object from cache
@@ -233,7 +239,7 @@ class BulkData(object):
        # Remove the contents of the target directory
        if not Table.exists(ospath):
            raise ValueError("nothing at that path")
-        for (root, dirs, files) in os.walk(ospath, topdown = False):
+        for (root, dirs, files) in os.walk(ospath, topdown=False):
            for name in files:
                os.remove(os.path.join(root, name))
            for name in dirs:
@@ -243,18 +249,19 @@ class BulkData(object):
        self._remove_leaves(unicodepath)

    # Cache open tables
-    @nilmdb.utils.lru_cache(size = table_cache_size,
-                            onremove = lambda x: x.close())
+    @nilmdb.utils.lru_cache(size=table_cache_size,
+                            onremove=lambda x: x.close())
    def getnode(self, unicodepath):
        """Return a Table object corresponding to the given database
        path, which must exist."""
        path = self._encode_filename(unicodepath)
-        elements = path.lstrip('/').split('/')
+        elements = path.lstrip(b'/').split(b'/')
        ospath = os.path.join(self.root, *elements)
-        return Table(ospath)
+        return Table(ospath, self.initial_nrows)

-@nilmdb.utils.must_close(wrap_verify = False)
-class Table(object):
+
+@nilmdb.utils.must_close(wrap_verify=False)
+class Table():
    """Tools to help access a single table (data at a specific OS path)."""
    # See design.md for design details

@@ -262,12 +269,12 @@ class Table(object):
    @classmethod
    def valid_path(cls, root):
        """Return True if a root path is a valid name"""
-        return "_format" not in root.split("/")
+        return b"_format" not in root.split(b"/")

    @classmethod
    def exists(cls, root):
        """Return True if a table appears to exist at this OS path"""
-        return os.path.isfile(os.path.join(root, "_format"))
+        return os.path.isfile(os.path.join(root, b"_format"))

    @classmethod
    def create(cls, root, layout, file_size, files_per_dir):
@@ -280,23 +287,26 @@ class Table(object):
        rows_per_file = max(file_size // rkt.binary_size, 1)
        rkt.close()

-        fmt = { "rows_per_file": rows_per_file,
+        fmt = {
+            "rows_per_file": rows_per_file,
            "files_per_dir": files_per_dir,
            "layout": layout,
-                "version": 3 }
-        with open(os.path.join(root, "_format"), "wb") as f:
+            "version": 3
+        }
+        with open(os.path.join(root, b"_format"), "wb") as f:
            pickle.dump(fmt, f, 2)

    # Normal methods
-    def __init__(self, root):
+    def __init__(self, root, initial_nrows=0):
        """'root' is the full OS path to the directory of this table"""
        self.root = root
+        self.initial_nrows = initial_nrows

        # Load the format
-        with open(os.path.join(self.root, "_format"), "rb") as f:
+        with open(os.path.join(self.root, b"_format"), "rb") as f:
            fmt = pickle.load(f)

-        if fmt["version"] != 3: # pragma: no cover
+        if fmt["version"] != 3:
            # Old versions used floating point timestamps, which aren't
            # valid anymore.
            raise NotImplementedError("old version " + str(fmt["version"]) +
@@ -326,31 +336,38 @@ class Table(object):
        # greater than the row number of any piece of data that
        # currently exists, not necessarily all data that _ever_
        # existed.
-        regex = re.compile("^[0-9a-f]{4,}$")
+        regex = re.compile(b"^[0-9a-f]{4,}$")

        # Find the last directory.  We sort and loop through all of them,
        # starting with the numerically greatest, because the dirs could be
-        # empty if something was deleted.
+        # empty if something was deleted but the directory was unexpectedly
+        # not deleted.
        subdirs = sorted(filter(regex.search, os.listdir(self.root)),
-                         key = lambda x: int(x, 16), reverse = True)
+                         key=lambda x: int(x, 16), reverse=True)

        for subdir in subdirs:
            # Now find the last file in that dir
            path = os.path.join(self.root, subdir)
-            files = filter(regex.search, os.listdir(path))
-            if not files: # pragma: no cover (shouldn't occur)
+            files = list(filter(regex.search, os.listdir(path)))
+            if not files:
                # Empty dir: try the next one
                continue

            # Find the numerical max
-            filename = max(files, key = lambda x: int(x, 16))
+            filename = max(files, key=lambda x: int(x, 16))
            offset = os.path.getsize(os.path.join(self.root, subdir, filename))

            # Convert to row number
            return self._row_from_offset(subdir, filename, offset)

-        # No files, so no data
-        return 0
+        # No files, so no data.  We typically start at row 0 in this
+        # case, although initial_nrows is specified during some tests
+        # to exercise other parts of the code better.  Since we have
+        # no files yet, round initial_nrows up so it points to a row
+        # that would begin a new file.
+        nrows = ((self.initial_nrows + (self.rows_per_file - 1)) //
+                 self.rows_per_file) * self.rows_per_file
+        return nrows

    def _offset_from_row(self, row):
        """Return a (subdir, filename, offset, count) tuple:
@@ -363,8 +380,8 @@ class Table(object):
        filenum = row // self.rows_per_file
        # It's OK if these format specifiers are too short; the filenames
        # will just get longer but will still sort correctly.
-        dirname = sprintf("%04x", filenum // self.files_per_dir)
-        filename = sprintf("%04x", filenum % self.files_per_dir)
+        dirname = sprintf(b"%04x", filenum // self.files_per_dir)
+        filename = sprintf(b"%04x", filenum % self.files_per_dir)
        offset = (row % self.rows_per_file) * self.row_size
        count = self.rows_per_file - (row % self.rows_per_file)
        return (dirname, filename, offset, count)
@@ -372,14 +389,14 @@ class Table(object):
    def _row_from_offset(self, subdir, filename, offset):
        """Return the row number that corresponds to the given
        'subdir/filename' and byte-offset within that file."""
-        if (offset % self.row_size) != 0: # pragma: no cover
+        if (offset % self.row_size) != 0:
            # this shouldn't occur, unless there is some corruption somewhere
            raise ValueError("file offset is not a multiple of data size")
        filenum = int(subdir, 16) * self.files_per_dir + int(filename, 16)
        row = (filenum * self.rows_per_file) + (offset // self.row_size)
        return row

-    def _remove_or_truncate_file(self, subdir, filename, offset = 0):
+    def _remove_or_truncate_file(self, subdir, filename, offset=0):
        """Remove the given file, and remove the subdirectory too
        if it's empty.  If offset is nonzero, truncate the file
        to that size instead."""
@@ -399,8 +416,8 @@ class Table(object):
                pass

    # Cache open files
-    @nilmdb.utils.lru_cache(size = fd_cache_size,
-                            onremove = lambda f: f.close())
+    @nilmdb.utils.lru_cache(size=fd_cache_size,
+                            onremove=lambda f: f.close())
    def file_open(self, subdir, filename):
        """Open and map a given 'subdir/filename' (relative to self.root).
        Will be automatically closed when evicted from the cache."""
@@ -413,12 +430,14 @@ class Table(object):
        return rocket.Rocket(self.layout,
                             os.path.join(self.root, subdir, filename))

-    def append_data(self, data, start, end, binary = False):
+    def append_data(self, data, start, end, binary=False):
        """Parse the formatted string in 'data', according to the
        current layout, and append it to the table.  If any timestamps
        are non-monotonic, or don't fall between 'start' and 'end',
        a ValueError is raised.

+        Note that data is always of 'bytes' type.
+
        If 'binary' is True, the data should be in raw binary format
        instead: little-endian, matching the current table's layout,
        including the int64 timestamp.
@@ -435,7 +454,7 @@ class Table(object):
            while data_offset < len(data):
                # See how many rows we can fit into the current file,
                # and open it
-                (subdir, fname, offset, count) = self._offset_from_row(tot_rows)
+                (subdir, fname, offs, count) = self._offset_from_row(tot_rows)
                f = self.file_open(subdir, fname)

                # Ask the rocket object to parse and append up to "count"
@@ -459,9 +478,9 @@ class Table(object):
                        if binary:
                            raise IndexError
                        bad = data.splitlines()[linenum-1]
-                        bad += '\n' + ' ' * (colnum - 1) + '^'
+                        bad += b'\n' + b' ' * (colnum - 1) + b'^'
                    except IndexError:
-                        bad = ""
+                        bad = b""
                    if errtype == rocket.ERR_NON_MONOTONIC:
                        err = "timestamp is not monotonically increasing"
                    elif errtype == rocket.ERR_OUT_OF_INTERVAL:
@@ -475,16 +494,17 @@ class Table(object):
                                          timestamp_to_string(end))
                    else:
                        err = str(obj)
+                    bad_str = bad.decode('utf-8', errors='backslashreplace')
                    raise ValueError("error parsing input data: " +
-                                     where + err + "\n" + bad)
+                                     where + err + "\n" + bad_str)
                tot_rows += added_rows
        except Exception:
            # Some failure, so try to roll things back by truncating or
            # deleting files that we may have appended data to.
            cleanpos = self.nrows
            while cleanpos <= tot_rows:
-                (subdir, fname, offset, count) = self._offset_from_row(cleanpos)
-                self._remove_or_truncate_file(subdir, fname, offset)
+                (subdir, fname, offs, count) = self._offset_from_row(cleanpos)
+                self._remove_or_truncate_file(subdir, fname, offs)
                cleanpos += count
            # Re-raise original exception
            raise
@@ -492,14 +512,11 @@ class Table(object):
            # Success, so update self.nrows accordingly
            self.nrows = tot_rows

-    def get_data(self, start, stop, binary = False):
+    def get_data(self, start, stop, binary=False):
        """Extract data corresponding to Python range [n:m],
        and returns a formatted string"""
-        if (start is None or
-            stop is None or
-            start > stop or
-            start < 0 or
-            stop > self.nrows):
+        if (start is None or stop is None or
+                start > stop or start < 0 or stop > self.nrows):
            raise IndexError("Index out of range")

        ret = []
@@ -539,7 +556,7 @@ class Table(object):
        # file.  Only when the list covers the entire extent of the
        # file will that file be removed.
        datafile = os.path.join(self.root, subdir, filename)
-        cachefile = datafile + ".removed"
+        cachefile = datafile + b".removed"
        try:
            with open(cachefile, "rb") as f:
                ranges = pickle.load(f)
@@ -566,7 +583,8 @@ class Table(object):
                # Not connected; append previous and start again
                merged.append(prev)
                prev = new
-        if prev is not None:
+        # Last range we were looking at goes into the file.  We know
+        # there was at least one (the one we just removed).
        merged.append(prev)

        # If the range covered the whole file, we can delete it now.
--- a/nilmdb/server/errors.py
+++ b/nilmdb/server/errors.py
@@ -1,12 +1,15 @@
 """Exceptions"""

+
 class NilmDBError(Exception):
    """Base exception for NilmDB errors"""
-    def __init__(self, message = "Unspecified error"):
-        Exception.__init__(self, message)
+    def __init__(self, msg="Unspecified error"):
+        super().__init__(msg)
+

 class StreamError(NilmDBError):
    pass

+
 class OverlapError(NilmDBError):
    pass
--- a/nilmdb/server/interval.pyx
+++ b/nilmdb/server/interval.pyx
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
 """Interval, IntervalSet

 The Interval implemented here is just like
@@ -58,9 +60,19 @@ cdef class Interval:
        return ("[" + timestamp_to_string(self.start) +
                " -> " + timestamp_to_string(self.end) + ")")

-    def __cmp__(self, Interval other):
-        """Compare two intervals.  If non-equal, order by start then end"""
-        return cmp(self.start, other.start) or cmp(self.end, other.end)
+    # Compare two intervals.  If non-equal, order by start then end
+    def __lt__(self, Interval other):
+        return (self.start, self.end) < (other.start, other.end)
+    def __gt__(self, Interval other):
+        return (self.start, self.end) > (other.start, other.end)
+    def __le__(self, Interval other):
+        return (self.start, self.end) <= (other.start, other.end)
+    def __ge__(self, Interval other):
+        return (self.start, self.end) >= (other.start, other.end)
+    def __eq__(self, Interval other):
+        return (self.start, self.end) == (other.start, other.end)
+    def __ne__(self, Interval other):
+        return (self.start, self.end) != (other.start, other.end)

    cpdef intersects(self, Interval other):
        """Return True if two Interval objects intersect"""
--- a/nilmdb/server/nilmdb.py
+++ b/nilmdb/server/nilmdb.py
@@ -7,12 +7,13 @@ Object that represents a NILM database file.
 Manages both the SQL database and the table storage backend.
 """

-# Need absolute_import so that "import nilmdb" won't pull in
-# nilmdb.py, but will pull the parent nilmdb module instead.
-from __future__ import absolute_import
+import os
+import errno
+import sqlite3
+
 import nilmdb.utils
-from nilmdb.utils.printf import *
-from nilmdb.utils.time import timestamp_to_string
+from nilmdb.utils.printf import printf
+from nilmdb.utils.time import timestamp_to_bytes

 from nilmdb.utils.interval import IntervalError
 from nilmdb.server.interval import Interval, DBInterval, IntervalSet
@@ -20,11 +21,6 @@ from nilmdb.server.interval import Interval, DBInterval, IntervalSet
 from nilmdb.server import bulkdata
 from nilmdb.server.errors import NilmDBError, StreamError, OverlapError

-import sqlite3
-import os
-import errno
-import bisect
-
 # Note about performance and transactions:
 #
 # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
@@ -38,7 +34,7 @@ import bisect
 # seems that 'PRAGMA synchronous=NORMAL' and 'PRAGMA journal_mode=WAL'
 # give an equivalent speedup more safely.  That is what is used here.
 _sql_schema_updates = {
-    0: { "next": 1, "sql": """
+    0: {"next": 1, "sql": """
    -- All streams
    CREATE TABLE streams(
        id INTEGER PRIMARY KEY,		-- stream ID
@@ -62,29 +58,33 @@ _sql_schema_updates = {
        end_pos INTEGER NOT NULL
    );
    CREATE INDEX _ranges_index ON ranges (stream_id, start_time, end_time);
-    """ },
+    """},

-    1: { "next": 3, "sql": """
+    1: {"next": 3, "sql": """
    -- Generic dictionary-type metadata that can be associated with a stream
    CREATE TABLE metadata(
        stream_id INTEGER NOT NULL,
        key TEXT NOT NULL,
        value TEXT
    );
-    """ },
+    """},

-    2: { "error": "old format with floating-point timestamps requires "
-         "nilmdb 1.3.1 or older" },
+    2: {"error": "old format with floating-point timestamps requires "
+        "nilmdb 1.3.1 or older"},

-    3: { "next": None },
+    3: {"next": None},
 }

+
@nilmdb.utils.must_close()
-class NilmDB(object):
+class NilmDB():
    verbose = 0

-    def __init__(self, basepath, max_results=None,
-                 max_removals=None, bulkdata_args=None):
+    def __init__(self, basepath,
+                 max_results=None,
+                 max_removals=None,
+                 max_int_removals=None,
+                 bulkdata_args=None):
        """Initialize NilmDB at the given basepath.
        Other arguments are for debugging / testing:

@@ -92,7 +92,10 @@ class NilmDB(object):
        stream_intervals or stream_extract response.

        'max_removals' is the max rows to delete at once
-        in stream_move.
+        in stream_remove.
+
+        'max_int_removals' is the max intervals to delete
+        at once in stream_remove.

        'bulkdata_args' is kwargs for the bulkdata module.
        """
@@ -106,9 +109,7 @@ class NilmDB(object):
        try:
            os.makedirs(self.basepath)
        except OSError as e:
-            if e.errno != errno.EEXIST: # pragma: no cover
-                # (no coverage, because it's hard to trigger this case
-                # if tests are run as root)
+            if e.errno != errno.EEXIST:
                raise IOError("can't create tree " + self.basepath)

        # Our data goes inside it
@@ -116,10 +117,10 @@ class NilmDB(object):

        # SQLite database too
        sqlfilename = os.path.join(self.basepath, "data.sql")
-        self.con = sqlite3.connect(sqlfilename, check_same_thread = True)
+        self.con = sqlite3.connect(sqlfilename, check_same_thread=True)
        try:
            self._sql_schema_update()
-        except Exception: # pragma: no cover
+        except Exception:
            self.data.close()
            raise

@@ -134,6 +135,9 @@ class NilmDB(object):
        # Remove up to this many rows per call to stream_remove.
        self.max_removals = max_removals or 1048576

+        # Remove up to this many intervals per call to stream_remove.
+        self.max_int_removals = max_int_removals or 4096
+
    def get_basepath(self):
        return self.basepath

@@ -141,6 +145,7 @@ class NilmDB(object):
        if self.con:
            self.con.commit()
            self.con.close()
+            self.con = None
        self.data.close()

    def _sql_schema_update(self):
@@ -149,18 +154,18 @@ class NilmDB(object):
        oldversion = version

        while True:
-            if version not in _sql_schema_updates: # pragma: no cover
+            if version not in _sql_schema_updates:
                raise Exception(self.basepath + ": unknown database version "
                                + str(version))
            update = _sql_schema_updates[version]
-            if "error" in update: # pragma: no cover
+            if "error" in update:
                raise Exception(self.basepath + ": can't use database version "
                                + str(version) + ": " + update["error"])
            if update["next"] is None:
                break
            cur.executescript(update["sql"])
            version = update["next"]
-            if self.verbose: # pragma: no cover
+            if self.verbose:
                printf("Database schema updated to %d\n", version)

        if version != oldversion:
@@ -176,7 +181,7 @@ class NilmDB(object):
            raise NilmDBError("start must precede end")
        return (start, end)

-    @nilmdb.utils.lru_cache(size = 16)
+    @nilmdb.utils.lru_cache(size=64)
    def _get_intervals(self, stream_id):
        """
        Return a mutable IntervalSet corresponding to the given stream ID.
@@ -191,7 +196,7 @@ class NilmDB(object):
                iset += DBInterval(start_time, end_time,
                                   start_time, end_time,
                                   start_pos, end_pos)
-        except IntervalError: # pragma: no cover
+        except IntervalError:
            raise NilmDBError("unexpected overlap in ranges table!")

        return iset
@@ -218,10 +223,6 @@ class NilmDB(object):
        # Load this stream's intervals
        iset = self._get_intervals(stream_id)

-        # Check for overlap
-        if iset.intersects(interval): # pragma: no cover (gets caught earlier)
-            raise NilmDBError("new interval overlaps existing data")
-
        # Check for adjacency.  If there's a stream in the database
        # that ends exactly when this one starts, and the database
        # rows match up, we can make one interval that covers the
@@ -264,10 +265,6 @@ class NilmDB(object):
         original: original DBInterval; must be already present in DB
        to_remove: DBInterval to remove; must be subset of 'original'
        """
-        # Just return if we have nothing to remove
-        if remove.start == remove.end: # pragma: no cover
-            return
-
        # Load this stream's intervals
        iset = self._get_intervals(stream_id)

@@ -282,7 +279,8 @@ class NilmDB(object):
        # the removed piece was in the middle.
        def add(iset, start, end, start_pos, end_pos):
            iset += DBInterval(start, end, start, end, start_pos, end_pos)
-            self._sql_interval_insert(stream_id, start, end, start_pos, end_pos)
+            self._sql_interval_insert(stream_id, start, end,
+                                      start_pos, end_pos)

        if original.start != remove.start:
            # Interval before the removed region
@@ -299,7 +297,7 @@ class NilmDB(object):

        return

-    def stream_list(self, path = None, layout = None, extended = False):
+    def stream_list(self, path=None, layout=None, extended=False):
        """Return list of lists of all streams in the database.

        If path is specified, include only streams with a path that
@@ -308,10 +306,10 @@ class NilmDB(object):
        If layout is specified, include only streams with a layout
        that matches the given string.

-        If extended = False, returns a list of lists containing
+        If extended=False, returns a list of lists containing
        the path and layout: [ path, layout ]

-        If extended = True, returns a list of lists containing
+        If extended=True, returns a list of lists containing
        more information:
           path
           layout
@@ -338,9 +336,9 @@ class NilmDB(object):
            params += (path,)
        query += " GROUP BY streams.id ORDER BY streams.path"
        result = self.con.execute(query, params).fetchall()
-        return [ list(x) for x in result ]
+        return [list(x) for x in result]

-    def stream_intervals(self, path, start = None, end = None, diffpath = None):
+    def stream_intervals(self, path, start=None, end=None, diffpath=None):
        """
        List all intervals in 'path' between 'start' and 'end'.  If
        'diffpath' is not none, list instead the set-difference
@@ -412,8 +410,8 @@ class NilmDB(object):

    def stream_set_metadata(self, path, data):
        """Set stream metadata from a dictionary, e.g.
-           { description = 'Downstairs lighting',
-             v_scaling = 123.45 }
+           { description: 'Downstairs lighting',
+             v_scaling: 123.45 }
           This replaces all existing metadata.
           """
        stream_id = self._stream_id(path)
@@ -461,7 +459,7 @@ class NilmDB(object):

        # Verify that no intervals are present, and clear the cache
        iset = self._get_intervals(stream_id)
-        if len(iset):
+        if iset:
            raise NilmDBError("all intervals must be removed before "
                              "destroying a stream")
        self._get_intervals.cache_remove(self, stream_id)
@@ -475,7 +473,7 @@ class NilmDB(object):
            con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,))
            con.execute("DELETE FROM streams WHERE id=?", (stream_id,))

-    def stream_insert(self, path, start, end, data, binary = False):
+    def stream_insert(self, path, start, end, data, binary=False):
        """Insert new data into the database.
           path: Path at which to add the data
           start: Starting timestamp
@@ -507,6 +505,17 @@ class NilmDB(object):
        # And that's all
        return

+    def _bisect_left(self, a, x, lo, hi):
+        # Like bisect.bisect_left, but doesn't choke on large indices on
+        # 32-bit systems, like bisect's fast C implementation does.
+        while lo < hi:
+            mid = (lo + hi) // 2
+            if a[mid] < x:
+                lo = mid + 1
+            else:
+                hi = mid
+        return lo
+
    def _find_start(self, table, dbinterval):
        """
        Given a DBInterval, find the row in the database that
@@ -517,7 +526,7 @@ class NilmDB(object):
        # Optimization for the common case where an interval wasn't truncated
        if dbinterval.start == dbinterval.db_start:
            return dbinterval.db_startpos
-        return bisect.bisect_left(table,
+        return self._bisect_left(table,
                                 dbinterval.start,
                                 dbinterval.db_startpos,
                                 dbinterval.db_endpos)
@@ -536,13 +545,13 @@ class NilmDB(object):
        # want to include the given timestamp in the results.  This is
        # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
        # non-overlapping data.
-        return bisect.bisect_left(table,
+        return self._bisect_left(table,
                                 dbinterval.end,
                                 dbinterval.db_startpos,
                                 dbinterval.db_endpos)

-    def stream_extract(self, path, start = None, end = None,
-                       count = False, markup = False, binary = False):
+    def stream_extract(self, path, start=None, end=None,
+                       count=False, markup=False, binary=False):
        """
        Returns (data, restart) tuple.

@@ -598,8 +607,8 @@ class NilmDB(object):

            # Add markup
            if markup:
-                result.append("# interval-start " +
-                              timestamp_to_string(interval.start) + "\n")
+                result.append(b"# interval-start " +
+                              timestamp_to_bytes(interval.start) + b"\n")

            # Gather these results up
            result.append(table.get_data(row_start, row_end, binary))
@@ -610,18 +619,19 @@ class NilmDB(object):
            # Add markup, and exit if restart is set.
            if restart is not None:
                if markup:
-                    result.append("# interval-end " +
-                                  timestamp_to_string(restart) + "\n")
+                    result.append(b"# interval-end " +
+                                  timestamp_to_bytes(restart) + b"\n")
                break
            if markup:
-                result.append("# interval-end " +
-                              timestamp_to_string(interval.end) + "\n")
+                result.append(b"# interval-end " +
+                              timestamp_to_bytes(interval.end) + b"\n")

        if count:
            return matched
-        return ("".join(result), restart)
+        full_result = b"".join(result)
+        return (full_result, restart)

-    def stream_remove(self, path, start = None, end = None):
+    def stream_remove(self, path, start=None, end=None):
        """
        Remove data from the specified time interval within a stream.

@@ -643,13 +653,22 @@ class NilmDB(object):
        to_remove = Interval(start, end)
        removed = 0
        remaining = self.max_removals
+        int_remaining = self.max_int_removals
        restart = None

        # Can't remove intervals from within the iterator, so we need to
        # remember what's currently in the intersection now.
-        all_candidates = list(intervals.intersection(to_remove, orig = True))
+        all_candidates = list(intervals.intersection(to_remove, orig=True))
+
+        remove_start = None
+        remove_end = None

        for (dbint, orig) in all_candidates:
+            # Stop if we've hit the max number of interval removals
+            if int_remaining <= 0:
+                restart = dbint.start
+                break
+
            # Find row start and end
            row_start = self._find_start(table, dbint)
            row_end = self._find_end(table, dbint)
@@ -670,14 +689,29 @@ class NilmDB(object):
            # Remove interval from the database
            self._remove_interval(stream_id, orig, dbint)

-            # Remove data from the underlying table storage
-            table.remove(row_start, row_end)
+            # Remove data from the underlying table storage,
+            # coalescing adjacent removals to reduce the number of calls
+            # to table.remove.
+            if remove_end == row_start:
+                # Extend our coalesced region
+                remove_end = row_end
+            else:
+                # Perform previous removal, then save this one
+                if remove_end is not None:
+                    table.remove(remove_start, remove_end)
+                remove_start = row_start
+                remove_end = row_end

            # Count how many were removed
            removed += row_end - row_start
            remaining -= row_end - row_start
+            int_remaining -= 1

            if restart is not None:
                break

+        # Perform any final coalesced removal
+        if remove_end is not None:
+            table.remove(remove_start, remove_end)
+
        return (removed, restart)
--- a/nilmdb/server/rbtree.pxd
+++ b/nilmdb/server/rbtree.pxd
@@ -1,3 +1,5 @@
+# cython: language_level=2
+
 cdef class RBNode:
    cdef public object obj
    cdef public double start, end
--- a/nilmdb/server/rbtree.pyx
+++ b/nilmdb/server/rbtree.pyx
@@ -1,5 +1,6 @@
 # cython: profile=False
 # cython: cdivision=True
+# cython: language_level=2

 """
 Jim Paris <jim@jtan.com>
--- a/nilmdb/server/rocket.c
+++ b/nilmdb/server/rocket.c
@@ -5,6 +5,9 @@
 #include <ctype.h>
 #include <stdint.h>

+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
 /* Values missing from stdint.h */
 #define UINT8_MIN 0
 #define UINT16_MIN 0
@@ -19,16 +22,9 @@

 typedef int64_t timestamp_t;

-/* This code probably needs to be double-checked for the case where
-   sizeof(long) != 8, so enforce that here with something that will
-   fail at build time.  We assume that the python integer type can
-   hold an int64_t. */
-const static char __long_ok[1 - 2*!(sizeof(int64_t) ==
-				    sizeof(long int))] = { 0 };
-
 /* Somewhat arbitrary, just so we can use fixed sizes for strings
   etc. */
-static const int MAX_LAYOUT_COUNT = 128;
+static const int MAX_LAYOUT_COUNT = 1024;

 /* Error object and constants */
 static PyObject *ParseError;
@@ -58,7 +54,7 @@ static PyObject *raise_str(int line, int col, int code, const char *string)
 static PyObject *raise_int(int line, int col, int code, int64_t num)
 {
 	PyObject *o;
-	o = Py_BuildValue("(iiil)", line, col, code, num);
+	o = Py_BuildValue("(iiiL)", line, col, code, (long long)num);
 	if (o != NULL) {
 		PyErr_SetObject(ParseError, o);
 		Py_DECREF(o);
@@ -142,7 +138,7 @@ static void Rocket_dealloc(Rocket *self)
 		fclose(self->file);
 		self->file = NULL;
 	}
-	self->ob_type->tp_free((PyObject *)self);
+	Py_TYPE(self)->tp_free((PyObject *)self);
 }

 static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
@@ -164,13 +160,19 @@ static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 static int Rocket_init(Rocket *self, PyObject *args, PyObject *kwds)
 {
 	const char *layout, *path;
+        int pathlen;
 	static char *kwlist[] = { "layout", "file", NULL };
-	if (!PyArg_ParseTupleAndKeywords(args, kwds, "sz", kwlist,
-					 &layout, &path))
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "sz#", kwlist,
+					 &layout, &path, &pathlen))
 		return -1;
 	if (!layout)
 		return -1;
 	if (path) {
+                if (strlen(path) != (size_t)pathlen) {
+                        PyErr_SetString(PyExc_ValueError, "path must not "
+                                        "contain NUL characters");
+                        return -1;
+                }
 		if ((self->file = fopen(path, "a+b")) == NULL) {
 			PyErr_SetFromErrno(PyExc_OSError);
 			return -1;
@@ -243,17 +245,17 @@ static PyObject *Rocket_get_file_size(Rocket *self)
 			return NULL;
 		}
 	}
-	return PyInt_FromLong(self->file_size);
+	return PyLong_FromLong(self->file_size);
 }

 /****
 * Append from string
 */
-static inline long int strtol10(const char *nptr, char **endptr) {
-	return strtol(nptr, endptr, 10);
+static inline long int strtoll10(const char *nptr, char **endptr) {
+	return strtoll(nptr, endptr, 10);
 }
-static inline long int strtoul10(const char *nptr, char **endptr) {
-	return strtoul(nptr, endptr, 10);
+static inline long int strtoull10(const char *nptr, char **endptr) {
+	return strtoull(nptr, endptr, 10);
 }

 /* .append_string(count, data, offset, linenum, start, end, last_timestamp) */
@@ -264,6 +266,7 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	int offset;
 	const char *linestart;
 	int linenum;
+        long long ll1, ll2, ll3;
 	timestamp_t start;
 	timestamp_t end;
 	timestamp_t last_timestamp;
@@ -276,14 +279,15 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	union64_t t64;
 	int i;

-	/* It would be nice to use 't#' instead of 's' for data,
-	   but we need the null termination for strto*.  If we had
-	   strnto* that took a length, we could use t# and not require
-	   a copy. */
-	if (!PyArg_ParseTuple(args, "isiilll:append_string", &count,
+        /* Input data is bytes.  Using 'y#' instead of 'y' might be
+           preferable, but strto* requires the null terminator. */
+	if (!PyArg_ParseTuple(args, "iyiiLLL:append_string", &count,
 			      &data, &offset, &linenum,
-			      &start, &end, &last_timestamp))
+			      &ll1, &ll2, &ll3))
 		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;

 	/* Skip spaces, but don't skip over a newline. */
 #define SKIP_BLANK(buf) do {			\
@@ -372,14 +376,14 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 				goto extra_data_on_line;		\
 			break

-			CS(INT8,   strtol10,  t64.i, t8.i,  t8.u,         , 1);
-			CS(UINT8,  strtoul10, t64.u, t8.u,  t8.u,         , 1);
-			CS(INT16,  strtol10,  t64.i, t16.i, t16.u, le16toh, 2);
-			CS(UINT16, strtoul10, t64.u, t16.u, t16.u, le16toh, 2);
-			CS(INT32,  strtol10,  t64.i, t32.i, t32.u, le32toh, 4);
-			CS(UINT32, strtoul10, t64.u, t32.u, t32.u, le32toh, 4);
-			CS(INT64,  strtol10,  t64.i, t64.i, t64.u, le64toh, 8);
-			CS(UINT64, strtoul10, t64.u, t64.u, t64.u, le64toh, 8);
+			CS(INT8,   strtoll10,  t64.i, t8.i,  t8.u,         , 1);
+			CS(UINT8,  strtoull10, t64.u, t8.u,  t8.u,         , 1);
+			CS(INT16,  strtoll10,  t64.i, t16.i, t16.u, le16toh, 2);
+			CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2);
+			CS(INT32,  strtoll10,  t64.i, t32.i, t32.u, le32toh, 4);
+			CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4);
+			CS(INT64,  strtoll10,  t64.i, t64.i, t64.u, le64toh, 8);
+			CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8);
 			CS(FLOAT32, strtod,   t64.d, t32.f, t32.u, le32toh, 4);
 			CS(FLOAT64, strtod,   t64.d, t64.d, t64.u, le64toh, 8);
 #undef CS
@@ -397,7 +401,8 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	/* Build return value and return */
 	offset = buf - data;
 	PyObject *o;
-	o = Py_BuildValue("(iili)", written, offset, last_timestamp, linenum);
+	o = Py_BuildValue("(iiLi)", written, offset,
+                          (long long)last_timestamp, linenum);
 	return o;
 err:
 	PyErr_SetFromErrno(PyExc_OSError);
@@ -431,14 +436,18 @@ static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)
        int data_len;
        int linenum;
 	int offset;
+        long long ll1, ll2, ll3;
 	timestamp_t start;
 	timestamp_t end;
 	timestamp_t last_timestamp;

-	if (!PyArg_ParseTuple(args, "it#iilll:append_binary",
+	if (!PyArg_ParseTuple(args, "iy#iiLLL:append_binary",
                              &count, &data, &data_len, &offset,
-                              &linenum, &start, &end, &last_timestamp))
+                              &linenum, &ll1, &ll2, &ll3))
 		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;

        /* Advance to offset */
        if (offset > data_len)
@@ -468,7 +477,7 @@ static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)
        }

        /* Write binary data */
-        if (fwrite(data, data_len, 1, self->file) != 1) {
+        if (fwrite(data, self->binary_size, rows, self->file) != (size_t)rows) {
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
        }
@@ -476,13 +485,13 @@ static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)

 	/* Build return value and return */
 	PyObject *o;
-	o = Py_BuildValue("(iili)", rows, offset + rows * self->binary_size,
-                          last_timestamp, linenum);
+	o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size,
+                          (long long)last_timestamp, linenum);
 	return o;
 }

 /****
- * Extract to string
+ * Extract to binary bytes object containing ASCII text-formatted data
 */

 static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
@@ -534,7 +543,7 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
 		if (fread(&t64.u, 8, 1, self->file) != 1)
 			goto err;
 		t64.u = le64toh(t64.u);
-		ret = sprintf(&str[len], "%ld", t64.i);
+		ret = sprintf(&str[len], "%" PRId64, t64.i);
 		if (ret <= 0)
 			goto err;
 		len += ret;
@@ -556,14 +565,14 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
 				len += ret;				\
 			}						\
 			break
-			CASE(INT8,   "%hhd",   t8.i,  t8.u,         , 1);
-			CASE(UINT8,  "%hhu",   t8.u,  t8.u,         , 1);
-			CASE(INT16,  "%hd",    t16.i, t16.u, le16toh, 2);
-			CASE(UINT16, "%hu",    t16.u, t16.u, le16toh, 2);
-			CASE(INT32,  "%d",     t32.i, t32.u, le32toh, 4);
-			CASE(UINT32, "%u",     t32.u, t32.u, le32toh, 4);
-			CASE(INT64,  "%ld",    t64.i, t64.u, le64toh, 8);
-			CASE(UINT64, "%lu",    t64.u, t64.u, le64toh, 8);
+			CASE(INT8,   "%" PRId8,  t8.i,  t8.u,         , 1);
+			CASE(UINT8,  "%" PRIu8,  t8.u,  t8.u,         , 1);
+			CASE(INT16,  "%" PRId16, t16.i, t16.u, le16toh, 2);
+			CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2);
+			CASE(INT32,  "%" PRId32, t32.i, t32.u, le32toh, 4);
+			CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4);
+			CASE(INT64,  "%" PRId64, t64.i, t64.u, le64toh, 8);
+			CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8);
 			/* These next two are a bit debatable.  floats
 			   are 6-9 significant figures, so we print 7.
 			   Doubles are 15-19, so we print 17.  This is
@@ -580,7 +589,7 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
 		str[len++] = '\n';
 	}

-	PyObject *pystr = PyString_FromStringAndSize(str, len);
+	PyObject *pystr = PyBytes_FromStringAndSize(str, len);
 	free(str);
 	return pystr;
 err:
@@ -590,7 +599,7 @@ err:
 }

 /****
- * Extract to binary string containing raw little-endian binary data
+ * Extract to binary bytes object containing raw little-endian binary data
 */
 static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)
 {
@@ -619,7 +628,7 @@ static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)

        /* Data in the file is already in the desired little-endian
           binary format, so just read it directly. */
-        if (fread(str, self->binary_size, count, self->file) != count) {
+        if (fread(str, self->binary_size, count, self->file) != (size_t)count) {
                free(str);
                PyErr_SetFromErrno(PyExc_OSError);
                return NULL;
@@ -653,7 +662,7 @@ static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args)

 	/* Convert and return */
 	t64.u = le64toh(t64.u);
-	return Py_BuildValue("l", t64.i);
+	return Py_BuildValue("L", (long long)t64.i);
 }

 /****
@@ -743,7 +752,7 @@ static PyMethodDef Rocket_methods[] = {
 };

 static PyTypeObject RocketType = {
-	PyObject_HEAD_INIT(NULL)
+	PyVarObject_HEAD_INIT(NULL, 0)

 	.tp_name	= "rocket.Rocket",
 	.tp_basicsize	= sizeof(Rocket),
@@ -768,17 +777,23 @@ static PyMethodDef module_methods[] = {
 	{ NULL },
 };

-PyMODINIT_FUNC
-initrocket(void)
+static struct PyModuleDef moduledef = {
+        PyModuleDef_HEAD_INIT,
+        .m_name        = "rocker",
+        .m_doc         = "Rocket data parsing and formatting module",
+        .m_size        = -1,
+        .m_methods     = module_methods,
+};
+
+PyMODINIT_FUNC PyInit_rocket(void)
 {
 	PyObject *module;

 	RocketType.tp_new = PyType_GenericNew;
 	if (PyType_Ready(&RocketType) < 0)
-		return;
+		return NULL;

-	module = Py_InitModule3("rocket", module_methods,
-				"Rocket data parsing and formatting module");
+	module = PyModule_Create(&moduledef);
 	Py_INCREF(&RocketType);
 	PyModule_AddObject(module, "Rocket", (PyObject *)&RocketType);

@@ -787,5 +802,5 @@ initrocket(void)
 	PyModule_AddObject(module, "ParseError", ParseError);
 	add_parseerror_codes(module);

-	return;
+	return module;
 }
--- a/nilmdb/server/server.py
+++ b/nilmdb/server/server.py
@@ -1,153 +1,49 @@
 """CherryPy-based server for accessing NILM database via HTTP"""

-# Need absolute_import so that "import nilmdb" won't pull in
-# nilmdb.py, but will pull the nilmdb module instead.
-from __future__ import absolute_import
+import os
+import json
+import socket
+import traceback
+
+import psutil
+import cherrypy
+
 import nilmdb.server
-from nilmdb.utils.printf import *
+from nilmdb.utils.printf import sprintf
 from nilmdb.server.errors import NilmDBError
 from nilmdb.utils.time import string_to_timestamp

-import cherrypy
-import sys
-import os
-import socket
-import simplejson as json
-import decorator
-import psutil
-import traceback
+from nilmdb.server.serverutil import (
+    chunked_response,
+    response_type,
+    exception_to_httperror,
+    CORS_allow,
+    json_to_request_params,
+    json_error_page,
+    cherrypy_start,
+    cherrypy_stop,
+    bool_param,
+    )

-class NilmApp(object):
+# Add CORS_allow tool
+cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+
+
+class NilmApp():
    def __init__(self, db):
        self.db = db

-# Decorators
-def chunked_response(func):
-    """Decorator to enable chunked responses."""
-    # Set this to False to get better tracebacks from some requests
-    # (/stream/extract, /stream/intervals).
-    func._cp_config = { 'response.stream': True }
-    return func
-
-def response_type(content_type):
-    """Return a decorator-generating function that sets the
-    response type to the specified string."""
-    def wrapper(func, *args, **kwargs):
-        cherrypy.response.headers['Content-Type'] = content_type
-        return func(*args, **kwargs)
-    return decorator.decorator(wrapper)
-
-@decorator.decorator
-def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover
-    """Decorator to work around CherryPy bug #1200 in a response
-    generator.
-
-    Even if chunked responses are disabled, LookupError or
-    UnicodeError exceptions may still be swallowed by CherryPy due to
-    bug #1200.  This throws them as generic Exceptions instead so that
-    they make it through.
-    """
-    exc_info = None
-    try:
-        for val in func(*args, **kwargs):
-            yield val
-    except (LookupError, UnicodeError):
-        # Re-raise it, but maintain the original traceback
-        exc_info = sys.exc_info()
-        new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1]))
-        raise new_exc, None, exc_info[2]
-    finally:
-        del exc_info
-
-def exception_to_httperror(*expected):
-    """Return a decorator-generating function that catches expected
-    errors and throws a HTTPError describing it instead.
-
-        @exception_to_httperror(NilmDBError, ValueError)
-        def foo():
-            pass
-    """
-    def wrapper(func, *args, **kwargs):
-        exc_info = None
-        try:
-            return func(*args, **kwargs)
-        except expected:
-            # Re-raise it, but maintain the original traceback
-            exc_info = sys.exc_info()
-            new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
-            raise new_exc, None, exc_info[2]
-        finally:
-            del exc_info
-    # We need to preserve the function's argspecs for CherryPy to
-    # handle argument errors correctly.  Decorator.decorator takes
-    # care of that.
-    return decorator.decorator(wrapper)
-
-# Custom CherryPy tools
-
-def CORS_allow(methods):
-    """This does several things:
-
-    Handles CORS preflight requests.
-    Adds Allow: header to all requests.
-    Raise 405 if request.method not in method.
-
-    It is similar to cherrypy.tools.allow, with the CORS stuff added.
-    """
-    request = cherrypy.request.headers
-    response = cherrypy.response.headers
-
-    if not isinstance(methods, (tuple, list)): # pragma: no cover
-        methods = [ methods ]
-    methods = [ m.upper() for m in methods if m ]
-    if not methods: # pragma: no cover
-        methods = [ 'GET', 'HEAD' ]
-    elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover
-        methods.append('HEAD')
-    response['Allow'] = ', '.join(methods)
-
-    # Allow all origins
-    if 'Origin' in request:
-        response['Access-Control-Allow-Origin'] = request['Origin']
-
-    # If it's a CORS request, send response.
-    request_method = request.get("Access-Control-Request-Method", None)
-    request_headers = request.get("Access-Control-Request-Headers", None)
-    if (cherrypy.request.method == "OPTIONS" and
-        request_method and request_headers):
-        response['Access-Control-Allow-Headers'] = request_headers
-        response['Access-Control-Allow-Methods'] = ', '.join(methods)
-        # Try to stop further processing and return a 200 OK
-        cherrypy.response.status = "200 OK"
-        cherrypy.response.body = ""
-        cherrypy.request.handler = lambda: ""
-        return
-
-    # Reject methods that were not explicitly allowed
-    if cherrypy.request.method not in methods:
-        raise cherrypy.HTTPError(405)
-
-cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
-
-# Helper for json_in tool to process JSON data into normal request
-# parameters.
-def json_to_request_params(body):
-    cherrypy.lib.jsontools.json_processor(body)
-    if not isinstance(cherrypy.request.json, dict):
-        raise cherrypy.HTTPError(415)
-    cherrypy.request.params.update(cherrypy.request.json)

 # CherryPy apps
 class Root(NilmApp):
    """Root application for NILM database"""
-
-    def __init__(self, db):
-        super(Root, self).__init__(db)
-
    # /
    @cherrypy.expose
    def index(self):
-        raise cherrypy.NotFound()
+        cherrypy.response.headers['Content-Type'] = 'text/plain'
+        msg = sprintf("This is NilmDB version %s, running on host %s.\n",
+                      nilmdb.__version__, socket.getfqdn())
+        return msg

    # /favicon.ico
    @cherrypy.expose
@@ -167,9 +63,16 @@ class Root(NilmApp):
        """Return a dictionary with the database path,
        size of the database in bytes, and free disk space in bytes"""
        path = self.db.get_basepath()
-        return { "path": path,
-                 "size": nilmdb.utils.du(path),
-                 "free": psutil.disk_usage(path).free }
+        usage = psutil.disk_usage(path)
+        dbsize = nilmdb.utils.du(path)
+        return {
+            "path": path,
+            "size": dbsize,
+            "other": max(usage.used - dbsize, 0),
+            "reserved": max(usage.total - usage.used - usage.free, 0),
+            "free": usage.free
+        }
+

 class Stream(NilmApp):
    """Stream-specific operations"""
@@ -177,10 +80,19 @@ class Stream(NilmApp):
    # Helpers
    def _get_times(self, start_param, end_param):
        (start, end) = (None, None)
+        try:
            if start_param is not None:
                start = string_to_timestamp(start_param)
+        except Exception:
+            raise cherrypy.HTTPError("400 Bad Request", sprintf(
+                "invalid start (%s): must be a numeric timestamp",
+                start_param))
+        try:
            if end_param is not None:
                end = string_to_timestamp(end_param)
+        except Exception:
+            raise cherrypy.HTTPError("400 Bad Request", sprintf(
+                "invalid end (%s): must be a numeric timestamp", end_param))
        if start is not None and end is not None:
            if start >= end:
                raise cherrypy.HTTPError(
@@ -194,15 +106,15 @@ class Stream(NilmApp):
    # /stream/list?path=/newton/prep&extended=1
    @cherrypy.expose
    @cherrypy.tools.json_out()
-    def list(self, path = None, layout = None, extended = None):
+    def list(self, path=None, layout=None, extended=None):
        """List all streams in the database.  With optional path or
        layout parameter, just list streams that match the given path
        or layout.

-        If extent is not given, returns a list of lists containing
-        the path and layout: [ path, layout ]
+        If extended is missing or zero, returns a list of lists
+        containing the path and layout: [ path, layout ]

-        If extended is provided, returns a list of lists containing
+        If extended is true, returns a list of lists containing
        extended info: [ path, layout, extent_min, extent_max,
        total_rows, total_seconds ].  More data may be added.
        """
@@ -213,7 +125,7 @@ class Stream(NilmApp):
    @cherrypy.tools.json_in()
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError, ValueError)
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    def create(self, path, layout):
        """Create a new stream in the database.  Provide path
        and one of the nilmdb.layout.layouts keys.
@@ -225,7 +137,7 @@ class Stream(NilmApp):
    @cherrypy.tools.json_in()
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError)
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    def destroy(self, path):
        """Delete a stream.  Fails if any data is still present."""
        return self.db.stream_destroy(path)
@@ -235,7 +147,7 @@ class Stream(NilmApp):
    @cherrypy.tools.json_in()
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError, ValueError)
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    def rename(self, oldpath, newpath):
        """Rename a stream."""
        return self.db.stream_rename(oldpath, newpath)
@@ -251,11 +163,11 @@ class Stream(NilmApp):
        try:
            data = self.db.stream_get_metadata(path)
        except nilmdb.server.nilmdb.StreamError as e:
-            raise cherrypy.HTTPError("404 Not Found", e.message)
+            raise cherrypy.HTTPError("404 Not Found", str(e))
        if key is None:  # If no keys specified, return them all
-            key = data.keys()
+            key = list(data.keys())
        elif not isinstance(key, list):
-            key = [ key ]
+            key = [key]
        result = {}
        for k in key:
            if k in data:
@@ -270,11 +182,9 @@ class Stream(NilmApp):
            try:
                data = dict(json.loads(data))
            except TypeError as e:
-                raise NilmDBError("can't parse 'data' parameter: " + e.message)
+                raise NilmDBError("can't parse 'data' parameter: " + str(e))
        for key in data:
-            if not (isinstance(data[key], basestring) or
-                    isinstance(data[key], float) or
-                    isinstance(data[key], int)):
+            if not isinstance(data[key], (str, float, int)):
                raise NilmDBError("metadata values must be a string or number")
        function(path, data)

@@ -283,7 +193,7 @@ class Stream(NilmApp):
    @cherrypy.tools.json_in()
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError, LookupError)
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    def set_metadata(self, path, data):
        """Set metadata for the named stream, replacing any existing
        metadata.  Data can be json-encoded or a plain dictionary."""
@@ -294,7 +204,7 @@ class Stream(NilmApp):
    @cherrypy.tools.json_in()
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError, LookupError, ValueError)
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    def update_metadata(self, path, data):
        """Set metadata for the named stream, replacing any existing
        metadata.  Data can be json-encoded or a plain dictionary."""
@@ -304,8 +214,8 @@ class Stream(NilmApp):
    @cherrypy.expose
    @cherrypy.tools.json_out()
    @exception_to_httperror(NilmDBError, ValueError)
-    @cherrypy.tools.CORS_allow(methods = ["PUT"])
-    def insert(self, path, start, end, binary = False):
+    @cherrypy.tools.CORS_allow(methods=["PUT"])
+    def insert(self, path, start, end, binary=False):
        """
        Insert new data into the database.  Provide textual data
        (matching the path's layout) as a HTTP PUT.
@@ -315,6 +225,8 @@ class Stream(NilmApp):
        little-endian and matches the database types (including an
        int64 timestamp).
        """
+        binary = bool_param(binary)
+
        # Important that we always read the input before throwing any
        # errors, to keep lengths happy for persistent connections.
        # Note that CherryPy 3.2.2 has a bug where this fails for GET
@@ -329,8 +241,11 @@ class Stream(NilmApp):
                                         "application/octet-stream for "
                                         "binary data, not " + content_type)

+        # Note that non-binary data is *not* decoded from bytes to string,
+        # but rather passed directly to stream_insert.
+
        # Check path and get layout
-        if len(self.db.stream_list(path = path)) != 1:
+        if len(self.db.stream_list(path=path)) != 1:
            raise cherrypy.HTTPError("404", "No such stream: " + path)

        # Check limits
@@ -347,10 +262,10 @@ class Stream(NilmApp):
    # /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
    @cherrypy.tools.json_in()
-    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @cherrypy.tools.CORS_allow(methods=["POST"])
    @chunked_response
    @response_type("application/x-json-stream")
-    def remove(self, path, start = None, end = None):
+    def remove(self, path, start=None, end=None):
        """
        Remove data from the backend database.  Removes all data in
        the interval [start, end).
@@ -362,15 +277,15 @@ class Stream(NilmApp):
        """
        (start, end) = self._get_times(start, end)

-        if len(self.db.stream_list(path = path)) != 1:
+        if len(self.db.stream_list(path=path)) != 1:
            raise cherrypy.HTTPError("404", "No such stream: " + path)

-        @workaround_cp_bug_1200
        def content(start, end):
            # Note: disable chunked responses to see tracebacks from here.
            while True:
                (removed, restart) = self.db.stream_remove(path, start, end)
-                yield json.dumps(removed) + "\r\n"
+                response = json.dumps(removed) + "\r\n"
+                yield response.encode('utf-8')
                if restart is None:
                    break
                start = restart
@@ -382,7 +297,7 @@ class Stream(NilmApp):
    @cherrypy.expose
    @chunked_response
    @response_type("application/x-json-stream")
-    def intervals(self, path, start = None, end = None, diffpath = None):
+    def intervals(self, path, start=None, end=None, diffpath=None):
        """
        Get intervals from backend database.  Streams the resulting
        intervals as JSON strings separated by CR LF pairs.  This may
@@ -399,20 +314,19 @@ class Stream(NilmApp):
        """
        (start, end) = self._get_times(start, end)

-        if len(self.db.stream_list(path = path)) != 1:
+        if len(self.db.stream_list(path=path)) != 1:
            raise cherrypy.HTTPError("404", "No such stream: " + path)

-        if diffpath and len(self.db.stream_list(path = diffpath)) != 1:
+        if diffpath and len(self.db.stream_list(path=diffpath)) != 1:
            raise cherrypy.HTTPError("404", "No such stream: " + diffpath)

-        @workaround_cp_bug_1200
        def content(start, end):
            # Note: disable chunked responses to see tracebacks from here.
            while True:
                (ints, restart) = self.db.stream_intervals(path, start, end,
                                                           diffpath)
-                response = ''.join([ json.dumps(i) + "\r\n" for i in ints ])
-                yield response
+                response = ''.join([json.dumps(i) + "\r\n" for i in ints])
+                yield response.encode('utf-8')
                if restart is None:
                    break
                start = restart
@@ -421,8 +335,8 @@ class Stream(NilmApp):
    # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
    @chunked_response
-    def extract(self, path, start = None, end = None,
-                count = False, markup = False, binary = False):
+    def extract(self, path, start=None, end=None,
+                count=False, markup=False, binary=False):
        """
        Extract data from backend database.  Streams the resulting
        entries as ASCII text lines separated by newlines.  This may
@@ -439,10 +353,14 @@ class Stream(NilmApp):
        little-endian and matches the database types (including an
        int64 timestamp).
        """
+        binary = bool_param(binary)
+        markup = bool_param(markup)
+        count = bool_param(count)
+
        (start, end) = self._get_times(start, end)

        # Check path and get layout
-        if len(self.db.stream_list(path = path)) != 1:
+        if len(self.db.stream_list(path=path)) != 1:
            raise cherrypy.HTTPError("404", "No such stream: " + path)

        if binary:
@@ -454,19 +372,18 @@ class Stream(NilmApp):
            content_type = "text/plain"
        cherrypy.response.headers['Content-Type'] = content_type

-        @workaround_cp_bug_1200
        def content(start, end):
            # Note: disable chunked responses to see tracebacks from here.
            if count:
                matched = self.db.stream_extract(path, start, end,
-                                                 count = True)
-                yield sprintf("%d\n", matched)
+                                                 count=True)
+                yield sprintf(b"%d\n", matched)
                return

            while True:
                (data, restart) = self.db.stream_extract(
-                    path, start, end, count = False,
-                    markup = markup, binary = binary)
+                    path, start, end, count=False,
+                    markup=markup, binary=binary)
                yield data

                if restart is None:
@@ -474,29 +391,31 @@ class Stream(NilmApp):
                start = restart
        return content(start, end)

-class Exiter(object):
+
+class Exiter():
    """App that exits the server, for testing"""
    @cherrypy.expose
    def index(self):
        cherrypy.response.headers['Content-Type'] = 'text/plain'
-        def content():
-            yield 'Exiting by request'
-            raise SystemExit
-        return content()
-    index._cp_config = { 'response.stream': True }

-class Server(object):
-    def __init__(self, db, host = '127.0.0.1', port = 8080,
-                 stoppable = False,       # whether /exit URL exists
-                 embedded = True,         # hide diagnostics and output, etc
-                 fast_shutdown = False,   # don't wait for clients to disconn.
-                 force_traceback = False, # include traceback in all errors
-                 basepath = '',           # base URL path for cherrypy.tree
+        def content():
+            yield b'Exiting by request'
+            raise SystemExit
+
+        return content()
+    index._cp_config = {'response.stream': True}
+
+
+class Server():
+    def __init__(self, db, host='127.0.0.1', port=8080,
+                 stoppable=False,        # whether /exit URL exists
+                 fast_shutdown=False,    # don't wait for clients to disconn.
+                 force_traceback=False,  # include traceback in all errors
+                 basepath='',            # base URL path for cherrypy.tree
                 ):
        # Save server version, just for verification during tests
        self.version = nilmdb.__version__

-        self.embedded = embedded
        self.db = db
        if not getattr(db, "_thread_safe", None):
            raise KeyError("Database object " + str(db) + " doesn't claim "
@@ -506,13 +425,12 @@ class Server(object):

        # Build up global server configuration
        cherrypy.config.update({
+            'environment': 'embedded',
            'server.socket_host': host,
            'server.socket_port': port,
-            'engine.autoreload_on': False,
+            'engine.autoreload.on': False,
            'server.max_request_body_size': 8*1024*1024,
            })
-        if self.embedded:
-            cherrypy.config.update({ 'environment': 'embedded' })

        # Build up application specific configuration
        app_config = {}
@@ -521,23 +439,23 @@ class Server(object):
            })

        # Some default headers to just help identify that things are working
-        app_config.update({ 'response.headers.X-Jim-Is-Awesome': 'yeah' })
+        app_config.update({'response.headers.X-Jim-Is-Awesome': 'yeah'})

        # Set up Cross-Origin Resource Sharing (CORS) handler so we
        # can correctly respond to browsers' CORS preflight requests.
        # This also limits verbs to GET and HEAD by default.
-        app_config.update({ 'tools.CORS_allow.on': True,
-                            'tools.CORS_allow.methods': ['GET', 'HEAD'] })
+        app_config.update({'tools.CORS_allow.on': True,
+                           'tools.CORS_allow.methods': ['GET', 'HEAD']})

        # Configure the 'json_in' tool to also allow other content-types
        # (like x-www-form-urlencoded), and to treat JSON as a dict that
        # fills requests.param.
-        app_config.update({ 'tools.json_in.force': False,
-                            'tools.json_in.processor': json_to_request_params })
+        app_config.update({'tools.json_in.force': False,
+                           'tools.json_in.processor': json_to_request_params})

        # Send tracebacks in error responses.  They're hidden by the
        # error_page function for client errors (code 400-499).
-        app_config.update({ 'request.show_tracebacks' : True })
+        app_config.update({'request.show_tracebacks': True})
        self.force_traceback = force_traceback

        # Patch CherryPy error handler to never pad out error messages.
@@ -551,13 +469,12 @@ class Server(object):
        if stoppable:
            root.exit = Exiter()
        cherrypy.tree.apps = {}
-        cherrypy.tree.mount(root, basepath, config = { "/" : app_config })
+        cherrypy.tree.mount(root, basepath, config={"/": app_config})

        # Shutdowns normally wait for clients to disconnect.  To speed
        # up tests, set fast_shutdown = True
        if fast_shutdown:
-            # Setting timeout to 0 triggers os._exit(70) at shutdown, grr...
-            cherrypy.server.shutdown_timeout = 0.01
+            cherrypy.server.shutdown_timeout = 0
        else:
            cherrypy.server.shutdown_timeout = 5

@@ -566,77 +483,24 @@ class Server(object):

    def json_error_page(self, status, message, traceback, version):
        """Return a custom error page in JSON so the client can parse it"""
-        errordata = { "status" : status,
-                      "message" : message,
-                      "traceback" : traceback }
-        # Don't send a traceback if the error was 400-499 (client's fault)
-        try:
-            code = int(status.split()[0])
-            if not self.force_traceback:
-                if code >= 400 and code <= 499:
-                    errordata["traceback"] = ""
-        except Exception: # pragma: no cover
-            pass
-        # Override the response type, which was previously set to text/html
-        cherrypy.serving.response.headers['Content-Type'] = (
-            "application/json;charset=utf-8" )
-        # Undo the HTML escaping that cherrypy's get_error_page function applies
-        # (cherrypy issue 1135)
-        for k, v in errordata.iteritems():
-            v = v.replace("&lt;","<")
-            v = v.replace("&gt;",">")
-            v = v.replace("&amp;","&")
-            errordata[k] = v
-        return json.dumps(errordata, separators=(',',':'))
+        return json_error_page(status, message, traceback, version,
+                               self.force_traceback)

-    def start(self, blocking = False, event = None):
-
-        if not self.embedded: # pragma: no cover
-            # Handle signals nicely
-            if hasattr(cherrypy.engine, "signal_handler"):
-                cherrypy.engine.signal_handler.subscribe()
-            if hasattr(cherrypy.engine, "console_control_handler"):
-                cherrypy.engine.console_control_handler.subscribe()
-
-        # Cherrypy stupidly calls os._exit(70) when it can't bind the
-        # port.  At least try to print a reasonable error and continue
-        # in this case, rather than just dying silently (as we would
-        # otherwise do in embedded mode)
-        real_exit = os._exit
-        def fake_exit(code): # pragma: no cover
-            if code == os.EX_SOFTWARE:
-                fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
-            else:
-                real_exit(code)
-        os._exit = fake_exit
-        cherrypy.engine.start()
-        os._exit = real_exit
-
-        # Signal that the engine has started successfully
-        if event is not None:
-            event.set()
-
-        if blocking:
-            try:
-                cherrypy.engine.wait(cherrypy.engine.states.EXITING,
-                                     interval = 0.1, channel = 'main')
-            except (KeyboardInterrupt, IOError): # pragma: no cover
-                cherrypy.engine.log('Keyboard Interrupt: shutting down bus')
-                cherrypy.engine.exit()
-            except SystemExit: # pragma: no cover
-                cherrypy.engine.log('SystemExit raised: shutting down bus')
-                cherrypy.engine.exit()
-                raise
+    def start(self, blocking=False, event=None):
+        cherrypy_start(blocking, event)

    def stop(self):
-        cherrypy.engine.exit()
+        cherrypy_stop()
+

 # Use a single global nilmdb.server.NilmDB and nilmdb.server.Server
 # instance since the database can only be opened once.  For this to
 # work, the web server must use only a single process and single
 # Python interpreter.  Multiple threads are OK.
 _wsgi_server = None
-def wsgi_application(dbpath, basepath): # pragma: no cover
+
+
+def wsgi_application(dbpath, basepath):
    """Return a WSGI application object with a database at the
    specified path.

@@ -651,17 +515,16 @@ def wsgi_application(dbpath, basepath): # pragma: no cover
        if _wsgi_server is None:
            # Try to start the server
            try:
-                db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(dbpath)
+                db = nilmdb.utils.serializer_proxy(
+                    nilmdb.server.NilmDB)(dbpath)
                _wsgi_server = nilmdb.server.Server(
-                    db, embedded = True,
-                    basepath = basepath.rstrip('/'))
+                    db, basepath=basepath.rstrip('/'))
            except Exception:
                # Build an error message on failure
                import pprint
                err = sprintf("Initializing database at path '%s' failed:\n\n",
                              dbpath)
                err += traceback.format_exc()
-                try:
                import pwd
                import grp
                err += sprintf("\nRunning as: uid=%d (%s), gid=%d (%s) "
@@ -669,15 +532,14 @@ def wsgi_application(dbpath, basepath): # pragma: no cover
                               os.getuid(), pwd.getpwuid(os.getuid())[0],
                               os.getgid(), grp.getgrgid(os.getgid())[0],
                               socket.gethostname(), os.getpid())
-                except ImportError:
-                    pass
                err += sprintf("\nEnvironment:\n%s\n", pprint.pformat(environ))
        if _wsgi_server is None:
            # Serve up the error with our own mini WSGI app.
-            headers = [ ('Content-type', 'text/plain'),
-                        ('Content-length', str(len(err))) ]
+            err_b = err.encode('utf-8')
+            headers = [('Content-type', 'text/plain; charset=utf-8'),
+                       ('Content-length', str(len(err_b)))]
            start_response("500 Internal Server Error", headers)
-            return [err]
+            return [err_b]

        # Call the normal application
        return _wsgi_server.wsgi_application(environ, start_response)
--- a/nilmdb/server/serverutil.py
+++ b/nilmdb/server/serverutil.py
@@ -0,0 +1,211 @@
+"""Miscellaneous decorators and other helpers for running a CherryPy
+server"""
+
+import os
+import sys
+import json
+import decorator
+import functools
+
+import cherrypy
+
+
+# Helper to parse parameters into booleans
+def bool_param(s):
+    """Return a bool indicating whether parameter 's' was True or False,
+    supporting a few different types for 's'."""
+    try:
+        ss = s.lower()
+        if ss in ["0", "false", "f", "no", "n"]:
+            return False
+        if ss in ["1", "true", "t", "yes", "y"]:
+            return True
+    except Exception:
+        return bool(s)
+    raise cherrypy.HTTPError("400 Bad Request",
+                             "can't parse parameter: " + ss)
+
+
+# Decorators
+def chunked_response(func):
+    """Decorator to enable chunked responses."""
+    # Set this to False to get better tracebacks from some requests
+    # (/stream/extract, /stream/intervals).
+    func._cp_config = {'response.stream': True}
+    return func
+
+
+def response_type(content_type):
+    """Return a decorator-generating function that sets the
+    response type to the specified string."""
+    def wrapper(func, *args, **kwargs):
+        cherrypy.response.headers['Content-Type'] = content_type
+        return func(*args, **kwargs)
+    return decorator.decorator(wrapper)
+
+
+def exception_to_httperror(*expected):
+    """Return a decorator-generating function that catches expected
+    errors and throws a HTTPError describing it instead.
+
+        @exception_to_httperror(NilmDBError, ValueError)
+        def foo():
+            pass
+    """
+    def wrapper(func, *args, **kwargs):
+        exc_info = None
+        try:
+            return func(*args, **kwargs)
+        except expected:
+            # Re-raise it, but maintain the original traceback
+            exc_info = sys.exc_info()
+            new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
+            raise new_exc.with_traceback(exc_info[2])
+        finally:
+            del exc_info
+    # We need to preserve the function's argspecs for CherryPy to
+    # handle argument errors correctly.  Decorator.decorator takes
+    # care of that.
+    return decorator.decorator(wrapper)
+
+
+# Custom CherryPy tools
+def CORS_allow(methods):
+    """This does several things:
+
+    Handles CORS preflight requests.
+    Adds Allow: header to all requests.
+    Raise 405 if request.method not in method.
+
+    It is similar to cherrypy.tools.allow, with the CORS stuff added.
+
+    Add this to CherryPy with:
+    cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+    """
+    request = cherrypy.request.headers
+    response = cherrypy.response.headers
+
+    if not isinstance(methods, (tuple, list)):
+        methods = [methods]
+    methods = [m.upper() for m in methods if m]
+    if not methods:
+        methods = ['GET', 'HEAD']
+    elif 'GET' in methods and 'HEAD' not in methods:
+        methods.append('HEAD')
+    response['Allow'] = ', '.join(methods)
+
+    # Allow all origins
+    if 'Origin' in request:
+        response['Access-Control-Allow-Origin'] = request['Origin']
+
+    # If it's a CORS request, send response.
+    request_method = request.get("Access-Control-Request-Method", None)
+    request_headers = request.get("Access-Control-Request-Headers", None)
+    if (cherrypy.request.method == "OPTIONS" and
+            request_method and request_headers):
+        response['Access-Control-Allow-Headers'] = request_headers
+        response['Access-Control-Allow-Methods'] = ', '.join(methods)
+        # Try to stop further processing and return a 200 OK
+        cherrypy.response.status = "200 OK"
+        cherrypy.response.body = b""
+        cherrypy.request.handler = lambda: ""
+        return
+
+    # Reject methods that were not explicitly allowed
+    if cherrypy.request.method not in methods:
+        raise cherrypy.HTTPError(405)
+
+
+# Helper for json_in tool to process JSON data into normal request
+# parameters.
+def json_to_request_params(body):
+    cherrypy.lib.jsontools.json_processor(body)
+    if not isinstance(cherrypy.request.json, dict):
+        raise cherrypy.HTTPError(415)
+    cherrypy.request.params.update(cherrypy.request.json)
+
+
+# Used as an "error_page.default" handler
+def json_error_page(status, message, traceback, version,
+                    force_traceback=False):
+    """Return a custom error page in JSON so the client can parse it"""
+    errordata = {"status": status,
+                 "message": message,
+                 "version": version,
+                 "traceback": traceback}
+    # Don't send a traceback if the error was 400-499 (client's fault)
+    code = int(status.split()[0])
+    if not force_traceback:
+        if 400 <= code <= 499:
+            errordata["traceback"] = ""
+    # Override the response type, which was previously set to text/html
+    cherrypy.serving.response.headers['Content-Type'] = (
+        "application/json;charset=utf-8")
+    # Undo the HTML escaping that cherrypy's get_error_page function applies
+    # (cherrypy issue 1135)
+    for k, v in errordata.items():
+        v = v.replace("&lt;", "<")
+        v = v.replace("&gt;", ">")
+        v = v.replace("&amp;", "&")
+        errordata[k] = v
+    return json.dumps(errordata, separators=(',', ':'))
+
+
+class CherryPyExit(SystemExit):
+    pass
+
+
+def cherrypy_patch_exit():
+    # Cherrypy stupidly calls os._exit(70) when it can't bind the port
+    # and exits.  Instead of that, raise a CherryPyExit (derived from
+    # SystemExit).  This exception may not make it back up to the caller
+    # due to internal thread use in the CherryPy engine, but there should
+    # be at least some indication that it happened.
+    bus = cherrypy.process.wspbus.bus
+    if "_patched_exit" in bus.__dict__:
+        return
+    bus._patched_exit = True
+
+    def patched_exit(orig):
+        real_exit = os._exit
+
+        def fake_exit(code):
+            raise CherryPyExit(code)
+        os._exit = fake_exit
+        try:
+            orig()
+        finally:
+            os._exit = real_exit
+    bus.exit = functools.partial(patched_exit, bus.exit)
+
+
+# Start/stop CherryPy standalone server
+def cherrypy_start(blocking=False, event=False):
+    """Start the CherryPy server, handling errors and signals
+    somewhat gracefully."""
+
+    cherrypy_patch_exit()
+
+    # Start the server
+    cherrypy.engine.start()
+
+    # Signal that the engine has started successfully
+    if event is not None:
+        event.set()
+
+    if blocking:
+        try:
+            cherrypy.engine.wait(cherrypy.engine.states.EXITING,
+                                 interval=0.1, channel='main')
+        except (KeyboardInterrupt, IOError):
+            cherrypy.engine.log('Keyboard Interrupt: shutting down')
+            cherrypy.engine.exit()
+        except SystemExit:
+            cherrypy.engine.log('SystemExit raised: shutting down')
+            cherrypy.engine.exit()
+            raise
+
+
+# Stop CherryPy server
+def cherrypy_stop():
+    cherrypy.engine.exit()
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -1,6 +1,6 @@
 """NilmDB utilities"""

-from __future__ import absolute_import
+
 from nilmdb.utils.timer import Timer
 from nilmdb.utils.serializer import serializer_proxy
 from nilmdb.utils.lrucache import lru_cache
@@ -13,3 +13,4 @@ import nilmdb.utils.time
 import nilmdb.utils.iterator
 import nilmdb.utils.interval
 import nilmdb.utils.lock
+import nilmdb.utils.sort
--- a/nilmdb/utils/atomic.py
+++ b/nilmdb/utils/atomic.py
@@ -2,12 +2,12 @@

 import os

+
 def replace_file(filename, content):
    """Attempt to atomically and durably replace the filename with the
-    given contents.  This is intended to be 'pretty good on most
-    OSes', but not necessarily bulletproof."""
+    given contents"""

-    newfilename = filename + ".new"
+    newfilename = filename + b".new"

    # Write to new file, flush it
    with open(newfilename, "wb") as f:
@@ -16,11 +16,4 @@ def replace_file(filename, content):
        os.fsync(f.fileno())

    # Move new file over old one
-    try:
-        os.rename(newfilename, filename)
-    except OSError: # pragma: no cover
-        # Some OSes might not support renaming over an existing file.
-        # This is definitely NOT atomic!
-        os.remove(filename)
-        os.rename(newfilename, filename)
-
+    os.replace(newfilename, filename)
--- a/nilmdb/utils/datetime_tz/init.py
+++ b/nilmdb/utils/datetime_tz/init.py
@@ -1,710 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2009 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-#
-# Disable the invalid name warning as we are inheriting from a standard library
-# object.
-# pylint: disable-msg=C6409,W0212
-
-"""A version of the datetime module which *cares* about timezones.
-
-This module will never return a naive datetime object. This requires the module
-know your local timezone, which it tries really hard to figure out.
-
-You can override the detection by using the datetime.tzaware.defaulttz_set
-method. It the module is unable to figure out the timezone itself this method
-*must* be called before the normal module is imported. If done before importing
-it can also speed up the time taken to import as the defaulttz will no longer
-try and do the detection.
-"""
-
-__author__ = "tansell@google.com (Tim Ansell)"
-
-import calendar
-import datetime
-import os
-import os.path
-import re
-import time
-import warnings
-import dateutil.parser
-import dateutil.relativedelta
-import dateutil.tz
-import pytz
-import pytz_abbr
-
-
-try:
-  # pylint: disable-msg=C6204
-  import functools
-except ImportError, e:
-
-  class functools(object):
-    """Fake replacement for a full functools."""
-
-    # pylint: disable-msg=W0613
-    @staticmethod
-    def wraps(f, *args, **kw):
-      return f
-
-
-# Need to patch pytz.utc to have a _utcoffset so you can normalize/localize
-# using it.
-pytz.utc._utcoffset = datetime.timedelta()
-
-
-timedelta = datetime.timedelta
-
-
-def _tzinfome(tzinfo):
-  """Gets a tzinfo object from a string.
-
-  Args:
-    tzinfo: A string (or string like) object, or a datetime.tzinfo object.
-
-  Returns:
-    An datetime.tzinfo object.
-
-  Raises:
-    UnknownTimeZoneError: If the timezone given can't be decoded.
-  """
-  if not isinstance(tzinfo, datetime.tzinfo):
-    try:
-      tzinfo = pytz.timezone(tzinfo)
-    except AttributeError:
-      raise pytz.UnknownTimeZoneError("Unknown timezone! %s" % tzinfo)
-  return tzinfo
-
-
-# Our "local" timezone
-_localtz = None
-
-
-def localtz():
-  """Get the local timezone.
-
-  Returns:
-    The localtime timezone as a tzinfo object.
-  """
-  # pylint: disable-msg=W0603
-  global _localtz
-  if _localtz is None:
-    _localtz = detect_timezone()
-  return _localtz
-
-
-def localtz_set(timezone):
-  """Set the local timezone."""
-  # pylint: disable-msg=W0603
-  global _localtz
-  _localtz = _tzinfome(timezone)
-
-
-def detect_timezone():
-  """Try and detect the timezone that Python is currently running in.
-
-  We have a bunch of different methods for trying to figure this out (listed in
-  order they are attempted).
-    * Try TZ environment variable.
-    * Try and find /etc/timezone file (with timezone name).
-    * Try and find /etc/localtime file (with timezone data).
-    * Try and match a TZ to the current dst/offset/shortname.
-
-  Returns:
-    The detected local timezone as a tzinfo object
-
-  Raises:
-    pytz.UnknownTimeZoneError: If it was unable to detect a timezone.
-  """
-  # First we try the TZ variable
-  tz = _detect_timezone_environ()
-  if tz is not None:
-    return tz
-
-  # Second we try /etc/timezone and use the value in that
-  tz = _detect_timezone_etc_timezone()
-  if tz is not None:
-    return tz
-
-  # Next we try and see if something matches the tzinfo in /etc/localtime
-  tz = _detect_timezone_etc_localtime()
-  if tz is not None:
-    return tz
-
-  # Next we try and use a similiar method to what PHP does.
-  # We first try to search on time.tzname, time.timezone, time.daylight to
-  # match a pytz zone.
-  warnings.warn("Had to fall back to worst detection method (the 'PHP' "
-                "method).")
-
-  tz = _detect_timezone_php()
-  if tz is not None:
-    return tz
-
-  raise pytz.UnknownTimeZoneError("Unable to detect your timezone!")
-
-
-def _detect_timezone_environ():
-  if "TZ" in os.environ:
-    try:
-      return pytz.timezone(os.environ["TZ"])
-    except (IOError, pytz.UnknownTimeZoneError):
-      warnings.warn("You provided a TZ environment value (%r) we did not "
-                    "understand!" % os.environ["TZ"])
-
-
-def _detect_timezone_etc_timezone():
-  if os.path.exists("/etc/timezone"):
-    try:
-      tz = file("/etc/timezone").read().strip()
-      try:
-        return pytz.timezone(tz)
-      except (IOError, pytz.UnknownTimeZoneError), ei:
-        warnings.warn("Your /etc/timezone file references a timezone (%r) that"
-                      " is not valid (%r)." % (tz, ei))
-
-    # Problem reading the /etc/timezone file
-    except IOError, eo:
-      warnings.warn("Could not access your /etc/timezone file: %s" % eo)
-
-
-def _detect_timezone_etc_localtime():
-  matches = []
-  if os.path.exists("/etc/localtime"):
-    localtime = pytz.tzfile.build_tzinfo("/etc/localtime",
-                                         file("/etc/localtime"))
-
-    # See if we can find a "Human Name" for this..
-    for tzname in pytz.all_timezones:
-      tz = _tzinfome(tzname)
-
-      if dir(tz) != dir(localtime):
-        continue
-
-      for attrib in dir(tz):
-        # Ignore functions and specials
-        if callable(getattr(tz, attrib)) or attrib.startswith("__"):
-          continue
-
-        # This will always be different
-        if attrib == "zone" or attrib == "_tzinfos":
-          continue
-
-        if getattr(tz, attrib) != getattr(localtime, attrib):
-          break
-
-      # We get here iff break didn't happen, i.e. no meaningful attributes
-      # differ between tz and localtime
-      else:
-        matches.append(tzname)
-
-    if len(matches) == 1:
-      return _tzinfome(matches[0])
-    else:
-      # Warn the person about this!
-      warning = "Could not get a human name for your timezone: "
-      if len(matches) > 1:
-        warning += ("We detected multiple matches for your /etc/localtime. "
-                    "(Matches where %s)" % matches)
-        return _tzinfome(matches[0])
-      else:
-        warning += "We detected no matches for your /etc/localtime."
-      warnings.warn(warning)
-
-      # Register /etc/localtime as the timezone loaded.
-      pytz._tzinfo_cache['/etc/localtime'] = localtime
-      return localtime
-
-
-def _detect_timezone_php():
-  tomatch = (time.tzname[0], time.timezone, time.daylight)
-  now = datetime.datetime.now()
-
-  matches = []
-  for tzname in pytz.all_timezones:
-    try:
-      tz = pytz.timezone(tzname)
-    except IOError:
-      continue
-
-    try:
-      indst = tz.localize(now).timetuple()[-1]
-
-      if tomatch == (tz._tzname, -tz._utcoffset.seconds, indst):
-        matches.append(tzname)
-
-    # pylint: disable-msg=W0704
-    except AttributeError:
-      pass
-
-  if len(matches) > 1:
-    warnings.warn("We detected multiple matches for the timezone, choosing "
-                  "the first %s. (Matches where %s)" % (matches[0], matches))
-    return pytz.timezone(matches[0])
-
-
-class datetime_tz(datetime.datetime):
-  """An extension of the inbuilt datetime adding more functionality.
-
-  The extra functionality includes:
-    * Partial parsing support (IE 2006/02/30 matches %Y/%M/%D %H:%M)
-    * Full integration with pytz (just give it the string of the timezone!)
-    * Proper support for going to/from Unix timestamps (which are in UTC!).
-  """
-  __slots__ = ["is_dst"]
-
-  def __new__(cls, *args, **kw):
-    args = list(args)
-    if not args:
-      raise TypeError("Not enough arguments given.")
-
-    # See if we are given a tzinfo object...
-    tzinfo = None
-    if isinstance(args[-1], (datetime.tzinfo, basestring)):
-      tzinfo = _tzinfome(args.pop(-1))
-    elif kw.get("tzinfo", None) is not None:
-      tzinfo = _tzinfome(kw.pop("tzinfo"))
-
-    # Create a datetime object if we don't have one
-    if isinstance(args[0], datetime.datetime):
-      # Convert the datetime instance to a datetime object.
-      newargs = (list(args[0].timetuple()[0:6]) +
-                 [args[0].microsecond, args[0].tzinfo])
-      dt = datetime.datetime(*newargs)
-
-      if tzinfo is None and dt.tzinfo is None:
-        raise TypeError("Must specify a timezone!")
-
-      if tzinfo is not None and dt.tzinfo is not None:
-        raise TypeError("Can not give a timezone with timezone aware"
-                        " datetime object! (Use localize.)")
-    else:
-      dt = datetime.datetime(*args, **kw)
-
-    if dt.tzinfo is not None:
-      # Re-normalize the dt object
-      dt = dt.tzinfo.normalize(dt)
-
-    else:
-      if tzinfo is None:
-        tzinfo = localtz()
-
-      try:
-        dt = tzinfo.localize(dt, is_dst=None)
-      except pytz.AmbiguousTimeError:
-        is_dst = None
-        if "is_dst" in kw:
-          is_dst = kw.pop("is_dst")
-
-        try:
-          dt = tzinfo.localize(dt, is_dst)
-        except IndexError:
-          raise pytz.AmbiguousTimeError("No such time exists!")
-
-    newargs = list(dt.timetuple()[0:6])+[dt.microsecond, dt.tzinfo]
-    obj = datetime.datetime.__new__(cls, *newargs)
-    obj.is_dst = obj.dst() != datetime.timedelta(0)
-    return obj
-
-  def asdatetime(self, naive=True):
-    """Return this datetime_tz as a datetime object.
-
-    Args:
-      naive: Return *without* any tz info.
-
-    Returns:
-      This datetime_tz as a datetime object.
-    """
-    args = list(self.timetuple()[0:6])+[self.microsecond]
-    if not naive:
-      args.append(self.tzinfo)
-    return datetime.datetime(*args)
-
-  def asdate(self):
-    """Return this datetime_tz as a date object.
-
-    Returns:
-      This datetime_tz as a date object.
-    """
-    return datetime.date(self.year, self.month, self.day)
-
-  def totimestamp(self):
-    """Convert this datetime object back to a unix timestamp.
-
-    The Unix epoch is the time 00:00:00 UTC on January 1, 1970.
-
-    Returns:
-      Unix timestamp.
-    """
-    return calendar.timegm(self.utctimetuple())+1e-6*self.microsecond
-
-  def astimezone(self, tzinfo):
-    """Returns a version of this timestamp converted to the given timezone.
-
-    Args:
-      tzinfo: Either a datetime.tzinfo object or a string (which will be looked
-              up in pytz.
-
-    Returns:
-      A datetime_tz object in the given timezone.
-    """
-    # Assert we are not a naive datetime object
-    assert self.tzinfo is not None
-
-    tzinfo = _tzinfome(tzinfo)
-
-    d = self.asdatetime(naive=False).astimezone(tzinfo)
-    return datetime_tz(d)
-
-  # pylint: disable-msg=C6113
-  def replace(self, **kw):
-    """Return datetime with new specified fields given as arguments.
-
-    For example, dt.replace(days=4) would return a new datetime_tz object with
-    exactly the same as dt but with the days attribute equal to 4.
-
-    Any attribute can be replaced, but tzinfo can not be set to None.
-
-    Args:
-      Any datetime_tz attribute.
-
-    Returns:
-      A datetime_tz object with the attributes replaced.
-
-    Raises:
-      TypeError: If the given replacement is invalid.
-    """
-    if "tzinfo" in kw:
-      if kw["tzinfo"] is None:
-        raise TypeError("Can not remove the timezone use asdatetime()")
-
-    is_dst = None
-    if "is_dst" in kw:
-      is_dst = kw["is_dst"]
-      del kw["is_dst"]
-    else:
-      # Use our own DST setting..
-      is_dst = self.is_dst
-
-    replaced = self.asdatetime().replace(**kw)
-
-    return datetime_tz(replaced, tzinfo=self.tzinfo.zone, is_dst=is_dst)
-
-  # pylint: disable-msg=C6310
-  @classmethod
-  def smartparse(cls, toparse, tzinfo=None):
-    """Method which uses dateutil.parse and extras to try and parse the string.
-
-    Valid dates are found at:
-     http://labix.org/python-dateutil#head-1443e0f14ad5dff07efd465e080d1110920673d8-2
-
-    Other valid formats include:
-      "now" or "today"
-      "yesterday"
-      "tommorrow"
-      "5 minutes ago"
-      "10 hours ago"
-      "10h5m ago"
-      "start of yesterday"
-      "end of tommorrow"
-      "end of 3rd of March"
-
-    Args:
-      toparse: The string to parse.
-      tzinfo: Timezone for the resultant datetime_tz object should be in.
-              (Defaults to your local timezone.)
-
-    Returns:
-      New datetime_tz object.
-
-    Raises:
-      ValueError: If unable to make sense of the input.
-    """
-    # Default for empty fields are:
-    #  year/month/day == now
-    #  hour/minute/second/microsecond == 0
-    toparse = toparse.strip()
-
-    if tzinfo is None:
-      dt = cls.now()
-    else:
-      dt = cls.now(tzinfo)
-
-    default = dt.replace(hour=0, minute=0, second=0, microsecond=0)
-
-    # Remove "start of " and "end of " prefix in the string
-    if toparse.lower().startswith("end of "):
-      toparse = toparse[7:].strip()
-
-      dt += datetime.timedelta(days=1)
-      dt = dt.replace(hour=0, minute=0, second=0, microsecond=0)
-      dt -= datetime.timedelta(microseconds=1)
-
-      default = dt
-
-    elif toparse.lower().startswith("start of "):
-      toparse = toparse[9:].strip()
-
-      dt = dt.replace(hour=0, minute=0, second=0, microsecond=0)
-      default = dt
-
-    # Handle strings with "now", "today", "yesterday", "tomorrow" and "ago".
-    # Need to use lowercase
-    toparselower = toparse.lower()
-
-    if toparselower in ["now", "today"]:
-      pass
-
-    elif toparselower == "yesterday":
-      dt -= datetime.timedelta(days=1)
-
-    elif toparselower == "tommorrow":
-      dt += datetime.timedelta(days=1)
-
-    elif "ago" in toparselower:
-      # Remove the "ago" bit
-      toparselower = toparselower[:-3]
-      # Replace all "a day and an hour" with "1 day 1 hour"
-      toparselower = toparselower.replace("a ", "1 ")
-      toparselower = toparselower.replace("an ", "1 ")
-      toparselower = toparselower.replace(" and ", " ")
-
-      # Match the following
-      # 1 hour ago
-      # 1h ago
-      # 1 h ago
-      # 1 hour ago
-      # 2 hours ago
-      # Same with minutes, seconds, etc.
-
-      tocheck = ("seconds", "minutes", "hours", "days", "weeks", "months",
-                 "years")
-      result = {}
-      for match in re.finditer("([0-9]+)([^0-9]*)", toparselower):
-        amount = int(match.group(1))
-        unit = match.group(2).strip()
-
-        for bit in tocheck:
-          regex = "^([%s]|((%s)s?))$" % (
-              bit[0], bit[:-1])
-
-          bitmatch = re.search(regex, unit)
-          if bitmatch:
-            result[bit] = amount
-            break
-        else:
-          raise ValueError("Was not able to parse date unit %r!" % unit)
-
-      delta = dateutil.relativedelta.relativedelta(**result)
-      dt -= delta
-
-    else:
-      # Handle strings with normal datetime format, use original case.
-      dt = dateutil.parser.parse(toparse, default=default.asdatetime(),
-                                 tzinfos=pytz_abbr.tzinfos)
-      if dt is None:
-        raise ValueError("Was not able to parse date!")
-
-      if dt.tzinfo is pytz_abbr.unknown:
-        dt = dt.replace(tzinfo=None)
-
-      if dt.tzinfo is None:
-        if tzinfo is None:
-          tzinfo = localtz()
-        dt = cls(dt, tzinfo)
-      else:
-        if isinstance(dt.tzinfo, pytz_abbr.tzabbr):
-          abbr = dt.tzinfo
-          dt = dt.replace(tzinfo=None)
-          dt = cls(dt, abbr.zone, is_dst=abbr.dst)
-
-        dt = cls(dt)
-
-    return dt
-
-  @classmethod
-  def utcfromtimestamp(cls, timestamp):
-    """Returns a datetime object of a given timestamp (in UTC)."""
-    obj = datetime.datetime.utcfromtimestamp(timestamp)
-    obj = pytz.utc.localize(obj)
-    return cls(obj)
-
-  @classmethod
-  def fromtimestamp(cls, timestamp):
-    """Returns a datetime object of a given timestamp (in local tz)."""
-    d = cls.utcfromtimestamp(timestamp)
-    return d.astimezone(localtz())
-
-  @classmethod
-  def utcnow(cls):
-    """Return a new datetime representing UTC day and time."""
-    obj = datetime.datetime.utcnow()
-    obj = cls(obj, tzinfo=pytz.utc)
-    return obj
-
-  @classmethod
-  def now(cls, tzinfo=None):
-    """[tz] -> new datetime with tz's local day and time."""
-    obj = cls.utcnow()
-    if tzinfo is None:
-      tzinfo = localtz()
-    return obj.astimezone(tzinfo)
-
-  today = now
-
-  @staticmethod
-  def fromordinal(ordinal):
-    raise SyntaxError("Not enough information to create a datetime_tz object "
-                      "from an ordinal. Please use datetime.date.fromordinal")
-
-
-class iterate(object):
-  """Helpful iterators for working with datetime_tz objects."""
-
-  @staticmethod
-  def between(start, delta, end=None):
-    """Return an iterator between this date till given end point.
-
-    Example usage:
-      >>> d = datetime_tz.smartparse("5 days ago")
-      2008/05/12 11:45
-      >>> for i in d.between(timedelta(days=1), datetime_tz.now()):
-      >>>    print i
-      2008/05/12 11:45
-      2008/05/13 11:45
-      2008/05/14 11:45
-      2008/05/15 11:45
-      2008/05/16 11:45
-
-    Args:
-      start: The date to start at.
-      delta: The interval to iterate with.
-      end: (Optional) Date to end at. If not given the iterator will never
-           terminate.
-
-    Yields:
-      datetime_tz objects.
-    """
-    toyield = start
-    while end is None or toyield < end:
-      yield toyield
-      toyield += delta
-
-  @staticmethod
-  def weeks(start, end=None):
-    """Iterate over the weeks between the given datetime_tzs.
-
-    Args:
-      start: datetime_tz to start from.
-      end: (Optional) Date to end at, if not given the iterator will never
-           terminate.
-
-    Returns:
-      An iterator which generates datetime_tz objects a week apart.
-    """
-    return iterate.between(start, datetime.timedelta(days=7), end)
-
-  @staticmethod
-  def days(start, end=None):
-    """Iterate over the days between the given datetime_tzs.
-
-    Args:
-      start: datetime_tz to start from.
-      end: (Optional) Date to end at, if not given the iterator will never
-           terminate.
-
-    Returns:
-      An iterator which generates datetime_tz objects a day apart.
-    """
-    return iterate.between(start, datetime.timedelta(days=1), end)
-
-  @staticmethod
-  def hours(start, end=None):
-    """Iterate over the hours between the given datetime_tzs.
-
-    Args:
-      start: datetime_tz to start from.
-      end: (Optional) Date to end at, if not given the iterator will never
-           terminate.
-
-    Returns:
-      An iterator which generates datetime_tz objects a hour apart.
-    """
-    return iterate.between(start, datetime.timedelta(hours=1), end)
-
-  @staticmethod
-  def minutes(start, end=None):
-    """Iterate over the minutes between the given datetime_tzs.
-
-    Args:
-      start: datetime_tz to start from.
-      end: (Optional) Date to end at, if not given the iterator will never
-           terminate.
-
-    Returns:
-      An iterator which generates datetime_tz objects a minute apart.
-    """
-    return iterate.between(start, datetime.timedelta(minutes=1), end)
-
-  @staticmethod
-  def seconds(start, end=None):
-    """Iterate over the seconds between the given datetime_tzs.
-
-    Args:
-      start: datetime_tz to start from.
-      end: (Optional) Date to end at, if not given the iterator will never
-           terminate.
-
-    Returns:
-      An iterator which generates datetime_tz objects a second apart.
-    """
-    return iterate.between(start, datetime.timedelta(minutes=1), end)
-
-
-def _wrap_method(name):
-  """Wrap a method.
-
-  Patch a method which might return a datetime.datetime to return a
-  datetime_tz.datetime_tz instead.
-
-  Args:
-    name: The name of the method to patch
-  """
-  method = getattr(datetime.datetime, name)
-
-  # Have to give the second argument as method has no __module__ option.
-  @functools.wraps(method, ("__name__", "__doc__"), ())
-  def wrapper(*args, **kw):
-    r = method(*args, **kw)
-
-    if isinstance(r, datetime.datetime) and not isinstance(r, datetime_tz):
-      r = datetime_tz(r)
-    return r
-
-  setattr(datetime_tz, name, wrapper)
-
-for methodname in ["__add__", "__radd__", "__rsub__", "__sub__", "combine"]:
-
-  # Make sure we have not already got an override for this method
-  assert methodname not in datetime_tz.__dict__
-
-  _wrap_method(methodname)
-
-
-__all__ = ['datetime_tz', 'detect_timezone', 'iterate', 'localtz',
-    'localtz_set', 'timedelta', '_detect_timezone_environ',
-    '_detect_timezone_etc_localtime', '_detect_timezone_etc_timezone',
-    '_detect_timezone_php']
--- a/nilmdb/utils/datetime_tz/pytz_abbr.py
+++ b/nilmdb/utils/datetime_tz/pytz_abbr.py
@@ -1,230 +0,0 @@
-#!/usr/bin/python2.4
-# -*- coding: utf-8 -*-
-#
-# Copyright 2010 Google Inc. All Rights Reserved.
-#
-
-"""
-Common time zone acronyms/abbreviations for use with the datetime_tz module.
-
-*WARNING*: There are lots of caveats when using this module which are listed
-below.
-
-CAVEAT 1: The acronyms/abbreviations are not globally unique, they are not even
-unique within a region. For example, EST can mean any of,
-  Eastern Standard Time in Australia (which is 10 hour ahead of UTC)
-  Eastern Standard Time in North America (which is 5 hours behind UTC)
-
-Where there are two abbreviations the more popular one will appear in the all
-dictionary, while the less common one will only appear in that countries region
-dictionary. IE If using all, EST will be mapped to Eastern Standard Time in
-North America.
-
-CAVEAT 2: Many of the acronyms don't map to a neat Oslon timezones. For example,
-Eastern European Summer Time (EEDT) is used by many different countries in
-Europe *at different times*! If the acronym does not map neatly to one zone it
-is mapped to the Etc/GMT+-XX Oslon zone. This means that any date manipulations
-can end up with idiot things like summer time in the middle of winter.
-
-CAVEAT 3: The Summer/Standard time difference is really important! For an hour
-each year it is needed to determine which time you are actually talking about.
-    2002-10-27 01:20:00 EST != 2002-10-27 01:20:00 EDT
-"""
-
-import datetime
-import pytz
-import pytz.tzfile
-
-
-class tzabbr(datetime.tzinfo):
-  """A timezone abbreviation.
-
-  *WARNING*: This is not a tzinfo implementation! Trying to use this as tzinfo
-  object will result in failure.  We inherit from datetime.tzinfo so we can get
-  through the dateutil checks.
-  """
-  pass
-
-
-# A "marker" tzinfo object which is used to signify an unknown timezone.
-unknown = datetime.tzinfo(0)
-
-
-regions = {'all': {}, 'military': {}}
-# Create a special alias for the all and military regions
-all = regions['all']
-military = regions['military']
-
-
-def tzabbr_register(abbr, name, region, zone, dst):
-  """Register a new timezone abbreviation in the global registry.
-
-  If another abbreviation with the same name has already been registered it new
-  abbreviation will only be registered in region specific dictionary.
-  """
-  newabbr = tzabbr()
-  newabbr.abbr = abbr
-  newabbr.name = name
-  newabbr.region = region
-  newabbr.zone = zone
-  newabbr.dst = dst
-
-  if abbr not in all:
-    all[abbr] = newabbr
-
-  if not region in regions:
-    regions[region] = {}
-
-  assert abbr not in regions[region]
-  regions[region][abbr] = newabbr
-
-
-def tzinfos_create(use_region):
-  abbrs = regions[use_region]
-
-  def tzinfos(abbr, offset):
-    if abbr:
-      if abbr in abbrs:
-        result = abbrs[abbr]
-        if offset:
-          # FIXME: Check the offset matches the abbreviation we just selected.
-          pass
-        return result
-      else:
-        raise ValueError, "Unknown timezone found %s" % abbr
-    if offset == 0:
-      return pytz.utc
-    if offset:
-      return pytz.FixedOffset(offset/60)
-    return unknown
-
-  return tzinfos
-
-
-# Create a special alias for the all tzinfos
-tzinfos = tzinfos_create('all')
-
-
-# Create the abbreviations.
-# *WARNING*: Order matters!
-tzabbr_register("A", u"Alpha Time Zone", u"Military", "Etc/GMT-1", False)
-tzabbr_register("ACDT", u"Australian Central Daylight Time", u"Australia",
-                "Australia/Adelaide", True)
-tzabbr_register("ACST", u"Australian Central Standard Time", u"Australia",
-                "Australia/Adelaide", False)
-tzabbr_register("ADT", u"Atlantic Daylight Time", u"North America",
-                "America/Halifax", True)
-tzabbr_register("AEDT", u"Australian Eastern Daylight Time", u"Australia",
-                "Australia/Sydney", True)
-tzabbr_register("AEST", u"Australian Eastern Standard Time", u"Australia",
-                "Australia/Sydney", False)
-tzabbr_register("AKDT", u"Alaska Daylight Time", u"North America",
-                "US/Alaska", True)
-tzabbr_register("AKST", u"Alaska Standard Time", u"North America",
-                "US/Alaska", False)
-tzabbr_register("AST", u"Atlantic Standard Time", u"North America",
-                "America/Halifax", False)
-tzabbr_register("AWDT", u"Australian Western Daylight Time", u"Australia",
-                "Australia/West", True)
-tzabbr_register("AWST", u"Australian Western Standard Time", u"Australia",
-                "Australia/West", False)
-tzabbr_register("B", u"Bravo Time Zone", u"Military", "Etc/GMT-2", False)
-tzabbr_register("BST", u"British Summer Time", u"Europe", "Europe/London", True)
-tzabbr_register("C", u"Charlie Time Zone", u"Military", "Etc/GMT-2", False)
-tzabbr_register("CDT", u"Central Daylight Time", u"North America",
-                "US/Central", True)
-tzabbr_register("CEDT", u"Central European Daylight Time", u"Europe",
-                "Etc/GMT+2", True)
-tzabbr_register("CEST", u"Central European Summer Time", u"Europe",
-                "Etc/GMT+2", True)
-tzabbr_register("CET", u"Central European Time", u"Europe", "Etc/GMT+1", False)
-tzabbr_register("CST", u"Central Standard Time", u"North America",
-                "US/Central", False)
-tzabbr_register("CXT", u"Christmas Island Time", u"Australia",
-                "Indian/Christmas", False)
-tzabbr_register("D", u"Delta Time Zone", u"Military", "Etc/GMT-2", False)
-tzabbr_register("E", u"Echo Time Zone", u"Military", "Etc/GMT-2", False)
-tzabbr_register("EDT", u"Eastern Daylight Time", u"North America",
-                "US/Eastern", True)
-tzabbr_register("EEDT", u"Eastern European Daylight Time", u"Europe",
-                "Etc/GMT+3", True)
-tzabbr_register("EEST", u"Eastern European Summer Time", u"Europe",
-                "Etc/GMT+3", True)
-tzabbr_register("EET", u"Eastern European Time", u"Europe", "Etc/GMT+2", False)
-tzabbr_register("EST", u"Eastern Standard Time", u"North America",
-                "US/Eastern", False)
-tzabbr_register("F", u"Foxtrot Time Zone", u"Military", "Etc/GMT-6", False)
-tzabbr_register("G", u"Golf Time Zone", u"Military", "Etc/GMT-7", False)
-tzabbr_register("GMT", u"Greenwich Mean Time", u"Europe", pytz.utc, False)
-tzabbr_register("H", u"Hotel Time Zone", u"Military", "Etc/GMT-8", False)
-#tzabbr_register("HAA", u"Heure Avancée de l'Atlantique", u"North America", u"UTC - 3 hours")
-#tzabbr_register("HAC", u"Heure Avancée du Centre", u"North America", u"UTC - 5 hours")
-tzabbr_register("HADT", u"Hawaii-Aleutian Daylight Time", u"North America",
-                "Pacific/Honolulu", True)
-#tzabbr_register("HAE", u"Heure Avancée de l'Est", u"North America", u"UTC - 4 hours")
-#tzabbr_register("HAP", u"Heure Avancée du Pacifique", u"North America", u"UTC - 7 hours")
-#tzabbr_register("HAR", u"Heure Avancée des Rocheuses", u"North America", u"UTC - 6 hours")
-tzabbr_register("HAST", u"Hawaii-Aleutian Standard Time", u"North America",
-                "Pacific/Honolulu", False)
-#tzabbr_register("HAT", u"Heure Avancée de Terre-Neuve", u"North America", u"UTC - 2:30 hours")
-#tzabbr_register("HAY", u"Heure Avancée du Yukon", u"North America", u"UTC - 8 hours")
-tzabbr_register("HDT", u"Hawaii Daylight Time", u"North America",
-                "Pacific/Honolulu", True)
-#tzabbr_register("HNA", u"Heure Normale de l'Atlantique", u"North America", u"UTC - 4 hours")
-#tzabbr_register("HNC", u"Heure Normale du Centre", u"North America", u"UTC - 6 hours")
-#tzabbr_register("HNE", u"Heure Normale de l'Est", u"North America", u"UTC - 5 hours")
-#tzabbr_register("HNP", u"Heure Normale du Pacifique", u"North America", u"UTC - 8 hours")
-#tzabbr_register("HNR", u"Heure Normale des Rocheuses", u"North America", u"UTC - 7 hours")
-#tzabbr_register("HNT", u"Heure Normale de Terre-Neuve", u"North America", u"UTC - 3:30 hours")
-#tzabbr_register("HNY", u"Heure Normale du Yukon", u"North America", u"UTC - 9 hours")
-tzabbr_register("HST", u"Hawaii Standard Time", u"North America",
-                "Pacific/Honolulu", False)
-tzabbr_register("I", u"India Time Zone", u"Military", "Etc/GMT-9", False)
-tzabbr_register("IST", u"Irish Summer Time", u"Europe", "Europe/Dublin", True)
-tzabbr_register("K", u"Kilo Time Zone", u"Military", "Etc/GMT-10", False)
-tzabbr_register("L", u"Lima Time Zone", u"Military", "Etc/GMT-11", False)
-tzabbr_register("M", u"Mike Time Zone", u"Military", "Etc/GMT-12", False)
-tzabbr_register("MDT", u"Mountain Daylight Time", u"North America",
-                "US/Mountain", True)
-#tzabbr_register("MESZ", u"Mitteleuroäische Sommerzeit", u"Europe", u"UTC + 2 hours")
-#tzabbr_register("MEZ", u"Mitteleuropäische Zeit", u"Europe", u"UTC + 1 hour")
-tzabbr_register("MSD", u"Moscow Daylight Time", u"Europe",
-                "Europe/Moscow", True)
-tzabbr_register("MSK", u"Moscow Standard Time", u"Europe",
-                "Europe/Moscow", False)
-tzabbr_register("MST", u"Mountain Standard Time", u"North America",
-                "US/Mountain", False)
-tzabbr_register("N", u"November Time Zone", u"Military", "Etc/GMT+1", False)
-tzabbr_register("NDT", u"Newfoundland Daylight Time", u"North America",
-                "America/St_Johns", True)
-tzabbr_register("NFT", u"Norfolk (Island) Time", u"Australia",
-                "Pacific/Norfolk", False)
-tzabbr_register("NST", u"Newfoundland Standard Time", u"North America",
-                "America/St_Johns", False)
-tzabbr_register("O", u"Oscar Time Zone", u"Military", "Etc/GMT+2", False)
-tzabbr_register("P", u"Papa Time Zone", u"Military", "Etc/GMT+3", False)
-tzabbr_register("PDT", u"Pacific Daylight Time", u"North America",
-                "US/Pacific", True)
-tzabbr_register("PST", u"Pacific Standard Time", u"North America",
-                "US/Pacific", False)
-tzabbr_register("Q", u"Quebec Time Zone", u"Military", "Etc/GMT+4", False)
-tzabbr_register("R", u"Romeo Time Zone", u"Military", "Etc/GMT+5", False)
-tzabbr_register("S", u"Sierra Time Zone", u"Military", "Etc/GMT+6", False)
-tzabbr_register("T", u"Tango Time Zone", u"Military", "Etc/GMT+7", False)
-tzabbr_register("U", u"Uniform Time Zone", u"Military", "Etc/GMT+8", False)
-tzabbr_register("UTC", u"Coordinated Universal Time", u"Europe",
-                pytz.utc, False)
-tzabbr_register("V", u"Victor Time Zone", u"Military", "Etc/GMT+9", False)
-tzabbr_register("W", u"Whiskey Time Zone", u"Military", "Etc/GMT+10", False)
-tzabbr_register("WDT", u"Western Daylight Time", u"Australia",
-                "Australia/West", True)
-tzabbr_register("WEDT", u"Western European Daylight Time", u"Europe",
-                "Etc/GMT+1", True)
-tzabbr_register("WEST", u"Western European Summer Time", u"Europe",
-                "Etc/GMT+1", True)
-tzabbr_register("WET", u"Western European Time", u"Europe", pytz.utc, False)
-tzabbr_register("WST", u"Western Standard Time", u"Australia",
-                "Australia/West", False)
-tzabbr_register("X", u"X-ray Time Zone", u"Military", "Etc/GMT+11", False)
-tzabbr_register("Y", u"Yankee Time Zone", u"Military", "Etc/GMT+12", False)
-tzabbr_register("Z", u"Zulu Time Zone", u"Military", pytz.utc, False)
--- a/nilmdb/utils/diskusage.py
+++ b/nilmdb/utils/diskusage.py
@@ -1,25 +1,36 @@
 import os
+import errno
 from math import log

+
 def human_size(num):
    """Human friendly file size"""
-    unit_list = zip(['bytes', 'kiB', 'MiB', 'GiB', 'TiB'], [0, 0, 1, 2, 2])
-    if num > 1:
+    unit_list = list(zip(['bytes', 'kiB', 'MiB', 'GiB', 'TiB'],
+                         [0, 0, 1, 2, 2]))
+    if num == 0:
+        return '0 bytes'
+    if num == 1:
+        return '1 byte'
    exponent = min(int(log(num, 1024)), len(unit_list) - 1)
    quotient = float(num) / 1024**exponent
    unit, num_decimals = unit_list[exponent]
    format_string = '{:.%sf} {}' % (num_decimals)
    return format_string.format(quotient, unit)
-    if num == 0: # pragma: no cover
-        return '0 bytes'
-    if num == 1: # pragma: no cover
-        return '1 byte'
+

 def du(path):
-    """Like du -sb, returns total size of path in bytes."""
-    size = os.path.getsize(path)
+    """Like du -sb, returns total size of path in bytes.  Ignore
+    errors that might occur if we encounter broken symlinks or
+    files in the process of being removed."""
+    try:
+        st = os.stat(path)
+        size = st.st_blocks * 512
        if os.path.isdir(path):
            for thisfile in os.listdir(path):
                filepath = os.path.join(path, thisfile)
                size += du(filepath)
        return size
+    except OSError as e:
+        if e.errno != errno.ENOENT:
+            raise
+        return 0
--- a/nilmdb/utils/fallocate.py
+++ b/nilmdb/utils/fallocate.py
@@ -1,49 +1,20 @@
 # Implementation of hole punching via fallocate, if the OS
 # and filesystem support it.

-try:
-    import os
-    import ctypes
-    import ctypes.util
+import fallocate

-    def make_fallocate():
-        libc_name = ctypes.util.find_library('c')
-        libc = ctypes.CDLL(libc_name, use_errno=True)

-        _fallocate = libc.fallocate
-        _fallocate.restype = ctypes.c_int
-        _fallocate.argtypes = [ ctypes.c_int, ctypes.c_int,
-                                ctypes.c_int64, ctypes.c_int64 ]
-
-        del libc
-        del libc_name
-
-        def fallocate(fd, mode, offset, len_):
-            res = _fallocate(fd, mode, offset, len_)
-            if res != 0: # pragma: no cover
-                errno = ctypes.get_errno()
-                raise IOError(errno, os.strerror(errno))
-        return fallocate
-
-    fallocate = make_fallocate()
-    del make_fallocate
-except Exception: # pragma: no cover
-    fallocate = None
-
-FALLOC_FL_KEEP_SIZE = 0x01
-FALLOC_FL_PUNCH_HOLE = 0x02
-
-def punch_hole(filename, offset, length, ignore_errors = True):
+def punch_hole(filename, offset, length, ignore_errors=True):
    """Punch a hole in the file.  This isn't well supported, so errors
    are ignored by default."""
    try:
-        if fallocate is None: # pragma: no cover
-            raise IOError("fallocate not available")
        with open(filename, "r+") as f:
-            fallocate(f.fileno(),
-                      FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
-                      offset, length)
-    except IOError: # pragma: no cover
+            fallocate.fallocate(
+                f.fileno(),
+                offset,
+                length,
+                fallocate.FALLOC_FL_KEEP_SIZE | fallocate.FALLOC_FL_PUNCH_HOLE)
+    except Exception:
        if ignore_errors:
            return
        raise
--- a/nilmdb/utils/interval.py
+++ b/nilmdb/utils/interval.py
@@ -1,5 +1,6 @@
 """Interval.  Like nilmdb.server.interval, but re-implemented here
-in plain Python so clients have easier access to it.
+in plain Python so clients have easier access to it, and with a few
+helper functions.

 Intervals are half-open, ie. they include data points with timestamps
 [start, end)
@@ -8,10 +9,12 @@ Intervals are half-open, ie. they include data points with timestamps
 import nilmdb.utils.time
 import nilmdb.utils.iterator

+
 class IntervalError(Exception):
    """Error due to interval overlap, etc"""
    pass

+
 # Interval
 class Interval:
    """Represents an interval of time."""
@@ -21,7 +24,7 @@ class Interval:
        'start' and 'end' are arbitrary numbers that represent time
        """
        if start >= end:
-            # Explicitly disallow zero-width intervals (since they're half-open)
+            # Explicitly disallow zero-width intervals, since they're half-open
            raise IntervalError("start %s must precede end %s" % (start, end))
        self.start = start
        self.end = end
@@ -34,9 +37,28 @@ class Interval:
        return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
                " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")

-    def __cmp__(self, other):
-        """Compare two intervals.  If non-equal, order by start then end"""
-        return cmp(self.start, other.start) or cmp(self.end, other.end)
+    def human_string(self):
+        return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) +
+                " -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]")
+
+    # Compare two intervals.  If non-equal, order by start then end
+    def __lt__(self, other):
+        return (self.start, self.end) < (other.start, other.end)
+
+    def __gt__(self, other):
+        return (self.start, self.end) > (other.start, other.end)
+
+    def __le__(self, other):
+        return (self.start, self.end) <= (other.start, other.end)
+
+    def __ge__(self, other):
+        return (self.start, self.end) >= (other.start, other.end)
+
+    def __eq__(self, other):
+        return (self.start, self.end) == (other.start, other.end)
+
+    def __ne__(self, other):
+        return (self.start, self.end) != (other.start, other.end)

    def intersects(self, other):
        """Return True if two Interval objects intersect"""
@@ -53,18 +75,12 @@ class Interval:
            raise IntervalError("not a subset")
        return Interval(start, end)

-def set_difference(a, b):
-    """
-    Compute the difference (a \\ b) between the intervals in 'a' and
-    the intervals in 'b'; i.e., the ranges that are present in 'self'
-    but not 'other'.

-    'a' and 'b' must both be iterables.
-
-    Returns a generator that yields each interval in turn.
-    Output intervals are built as subsets of the intervals in the
-    first argument (a).
-    """
+def _interval_math_helper(a, b, op, subset=True):
+    """Helper for set_difference, intersection functions,
+    to compute interval subsets based on a math operator on ranges
+    present in A and B.  Subsets are computed from A, or new intervals
+    are generated if subset = False."""
    # Iterate through all starts and ends in sorted order.  Add a
    # tag to the iterator so that we can figure out which one they
    # were, after sorting.
@@ -79,28 +95,74 @@ def set_difference(a, b):
    # At each point, evaluate which type of end it is, to determine
    # how to build up the output intervals.
    a_interval = None
-    b_interval = None
+    in_a = False
+    in_b = False
    out_start = None
    for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
        if k == 0:
-            # start a interval
            a_interval = i
-            if b_interval is None:
-                out_start = ts
+            in_a = True
        elif k == 1:
-            # start b interval
-            b_interval = i
-            if out_start is not None and out_start != ts:
-                yield a_interval.subset(out_start, ts)
-            out_start = None
+            in_b = True
        elif k == 2:
-            # end a interval
-            if out_start is not None and out_start != ts:
-                yield a_interval.subset(out_start, ts)
-            out_start = None
-            a_interval = None
-        elif k == 3:
-            # end b interval
-            b_interval = None
-            if a_interval:
+            in_a = False
+        else:  # k == 3
+            in_b = False
+        include = op(in_a, in_b)
+        if include and out_start is None:
            out_start = ts
+        elif not include:
+            if out_start is not None and out_start != ts:
+                if subset:
+                    yield a_interval.subset(out_start, ts)
+                else:
+                    yield Interval(out_start, ts)
+            out_start = None
+
+
+def set_difference(a, b):
+    """
+    Compute the difference (a \\ b) between the intervals in 'a' and
+    the intervals in 'b'; i.e., the ranges that are present in 'self'
+    but not 'other'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    return _interval_math_helper(a, b, (lambda a, b: a and not b))
+
+
+def intersection(a, b):
+    """
+    Compute the intersection between the intervals in 'a' and the
+    intervals in 'b'; i.e., the ranges that are present in both 'a'
+    and 'b'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    return _interval_math_helper(a, b, (lambda a, b: a and b))
+
+
+def optimize(it):
+    """
+    Given an iterable 'it' with intervals, optimize them by joining
+    together intervals that are adjacent in time, and return a generator
+    that yields the new intervals.
+    """
+    saved_int = None
+    for interval in it:
+        if saved_int is not None:
+            if saved_int.end == interval.start:
+                interval.start = saved_int.start
+            else:
+                yield saved_int
+        saved_int = interval
+    if saved_int is not None:
+        yield saved_int
--- a/nilmdb/utils/iterator.py
+++ b/nilmdb/utils/iterator.py
@@ -2,6 +2,8 @@

 # Iterator merging, based on http://code.activestate.com/recipes/491285/
 import heapq
+
+
 def imerge(*iterables):
    '''Merge multiple sorted inputs into a single sorted output.

@@ -17,8 +19,8 @@ def imerge(*iterables):
    h_append = h.append
    for it in map(iter, iterables):
        try:
-            next = it.next
-            h_append([next(), next])
+            nexter = it.__next__
+            h_append([nexter(), nexter])
        except _Stop:
            pass
    heapq.heapify(h)
@@ -26,9 +28,9 @@ def imerge(*iterables):
    while 1:
        try:
            while 1:
-                v, next = s = h[0]      # raises IndexError when h is empty
+                v, nexter = s = h[0]    # raises IndexError when h is empty
                yield v
-                s[0] = next()           # raises StopIteration when exhausted
+                s[0] = nexter()         # raises StopIteration when exhausted
                siftup(h, 0)            # restore heap condition
        except _Stop:
            heappop(h)                  # remove empty iterator
--- a/nilmdb/utils/lock.py
+++ b/nilmdb/utils/lock.py
@@ -1,12 +1,10 @@
 # File locking

-import warnings
+import fcntl
+import errno

-try:
-    import fcntl
-    import errno

-    def exclusive_lock(f):
+def exclusive_lock(f):
    """Acquire an exclusive lock.  Returns True on successful
    lock, or False on error."""
    try:
@@ -14,20 +12,11 @@ try:
    except IOError as e:
        if e.errno in (errno.EACCES, errno.EAGAIN):
            return False
-            else: # pragma: no cover
+        else:
            raise
    return True

-    def exclusive_unlock(f):
+
+def exclusive_unlock(f):
    """Release an exclusive lock."""
    fcntl.flock(f.fileno(), fcntl.LOCK_UN)
-
-except ImportError: # pragma: no cover
-    def exclusive_lock(f):
-        """Dummy lock function -- does not lock!"""
-        warnings.warn("Pretending to lock " + str(f))
-        return True
-
-    def exclusive_unlock(f):
-        """Release an exclusive lock."""
-        return
--- a/nilmdb/utils/lrucache.py
+++ b/nilmdb/utils/lrucache.py
@@ -6,10 +6,11 @@
 import collections
 import decorator

-def lru_cache(size = 10, onremove = None, keys = slice(None)):
+
+def lru_cache(size=10, onremove=None, keys=slice(None)):
    """Least-recently-used cache decorator.

-    @lru_cache(size = 10, onevict = None)
+    @lru_cache(size=10, onremove=None)
    def f(...):
        pass

@@ -53,14 +54,17 @@ def lru_cache(size = 10, onremove = None, keys = slice(None)):
            if key in cache:
                evict(cache.pop(key))
            else:
-                if len(cache) > 0 and len(args) != len(cache.iterkeys().next()):
+                if cache:
+                    if len(args) != len(next(iter(cache.keys()))):
                        raise KeyError("trying to remove from LRU cache, but "
                                       "number of arguments doesn't match the "
                                       "cache key length")

        def cache_remove_all():
+            nonlocal cache
            for key in cache:
-                evict(cache.pop(key))
+                evict(cache[key])
+            cache = collections.OrderedDict()

        def cache_info():
            return (func.cache_hits, func.cache_misses)
--- a/nilmdb/utils/mustclose.py
+++ b/nilmdb/utils/mustclose.py
@@ -1,9 +1,10 @@
-from nilmdb.utils.printf import *
 import sys
 import inspect
 import decorator
+from nilmdb.utils.printf import fprintf

-def must_close(errorfile = sys.stderr, wrap_verify = False):
+
+def must_close(errorfile=sys.stderr, wrap_verify=False):
    """Class decorator that warns on 'errorfile' at deletion time if
    the class's close() member wasn't called.

@@ -12,12 +13,17 @@ def must_close(errorfile = sys.stderr, wrap_verify = False):
    already been called."""
    def class_decorator(cls):

+        def is_method_or_function(x):
+            return inspect.ismethod(x) or inspect.isfunction(x)
+
        def wrap_class_method(wrapper):
            try:
-                orig = getattr(cls, wrapper.__name__).im_func
-            except Exception:
+                orig = getattr(cls, wrapper.__name__)
+            except AttributeError:
                orig = lambda x: None
-            setattr(cls, wrapper.__name__, decorator.decorator(wrapper, orig))
+            if is_method_or_function(orig):
+                setattr(cls, wrapper.__name__,
+                        decorator.decorator(wrapper, orig))

        @wrap_class_method
        def __init__(orig, self, *args, **kwargs):
@@ -28,10 +34,13 @@ def must_close(errorfile = sys.stderr, wrap_verify = False):

        @wrap_class_method
        def __del__(orig, self, *args, **kwargs):
+            try:
                if "_must_close" in self.__dict__:
                    fprintf(errorfile, "error: %s.close() wasn't called!\n",
                            self.__class__.__name__)
                return orig(self, *args, **kwargs)
+            except:
+                pass

        @wrap_class_method
        def close(orig, self, *args, **kwargs):
@@ -46,16 +55,17 @@ def must_close(errorfile = sys.stderr, wrap_verify = False):
                raise AssertionError("called " + str(orig) + " after close")
            return orig(self, *args, **kwargs)
        if wrap_verify:
-            for (name, method) in inspect.getmembers(cls, inspect.ismethod):
-                # Skip class methods
-                if method.__self__ is not None:
-                    continue
+            for (name, method) in inspect.getmembers(cls,
+                                                     is_method_or_function):
                # Skip some methods
-                if name in [ "__del__", "__init__" ]:
+                if name in ["__del__", "__init__"]:
                    continue
                # Set up wrapper
-                setattr(cls, name, decorator.decorator(verifier,
-                                                       method.im_func))
+                if inspect.ismethod(method):
+                    func = method.__func__
+                else:
+                    func = method
+                setattr(cls, name, decorator.decorator(verifier, func))

        return cls
    return class_decorator
--- a/nilmdb/utils/printf.py
+++ b/nilmdb/utils/printf.py
@@ -1,9 +1,13 @@
 """printf, fprintf, sprintf"""

-from __future__ import print_function
+
 def printf(_str, *args):
    print(_str % args, end='')
+
+
 def fprintf(_file, _str, *args):
    print(_str % args, end='', file=_file)
+
+
 def sprintf(_str, *args):
    return (_str % args)
--- a/nilmdb/utils/serializer.py
+++ b/nilmdb/utils/serializer.py
@@ -1,10 +1,6 @@
-import Queue
+import queue
 import threading
 import sys
-import decorator
-import inspect
-import types
-import functools

 # This file provides a class that will wrap an object and serialize
 # all calls to its methods.  All calls to that object will be queued
@@ -13,6 +9,7 @@ import functools

 # Based partially on http://stackoverflow.com/questions/2642515/

+
 class SerializerThread(threading.Thread):
    """Thread that retrieves call information from the queue, makes the
    call, and returns the results."""
@@ -40,6 +37,7 @@ class SerializerThread(threading.Thread):
            result_queue.put((exception, result))
            del exception, result

+
 def serializer_proxy(obj_or_type):
    """Wrap the given object or type in a SerializerObjectProxy.

@@ -49,61 +47,88 @@ def serializer_proxy(obj_or_type):
    The proxied requests, including instantiation, are performed in a
    single thread and serialized between caller threads.
    """
-    class SerializerCallProxy(object):
+    class SerializerCallProxy():
        def __init__(self, call_queue, func, objectproxy):
            self.call_queue = call_queue
            self.func = func
            # Need to hold a reference to object proxy so it doesn't
            # go away (and kill the thread) until after get called.
            self.objectproxy = objectproxy
+
        def __call__(self, *args, **kwargs):
-            result_queue = Queue.Queue()
+            result_queue = queue.Queue()
            self.call_queue.put((result_queue, self.func, args, kwargs))
-            ( exc_info, result ) = result_queue.get()
+            (exc_info, result) = result_queue.get()
            if exc_info is None:
                return result
            else:
-                raise exc_info[0], exc_info[1], exc_info[2]
+                raise exc_info[1].with_traceback(exc_info[2])

-    class SerializerObjectProxy(object):
+    class SerializerObjectProxy():
        def __init__(self, obj_or_type, *args, **kwargs):
            self.__object = obj_or_type
-            try:
-                if type(obj_or_type) in (types.TypeType, types.ClassType):
+            if isinstance(obj_or_type, type):
                classname = obj_or_type.__name__
            else:
                classname = obj_or_type.__class__.__name__
-            except AttributeError: # pragma: no cover
-                classname = "???"
-            self.__call_queue = Queue.Queue()
+            self.__call_queue = queue.Queue()
            self.__thread = SerializerThread(classname, self.__call_queue)
            self.__thread.daemon = True
            self.__thread.start()
            self._thread_safe = True

        def __getattr__(self, key):
-            if key.startswith("_SerializerObjectProxy__"): # pragma: no cover
-                raise AttributeError
+            # If the attribute is a function, we want to return a
+            # proxy that will perform the call through the serializer
+            # when called.  Otherwise, we want to return the value
+            # directly.  This means we need to grab the attribute once,
+            # and therefore self.__object.__getattr__ may be called
+            # in an unsafe way, from the caller's thread.
            attr = getattr(self.__object, key)
            if not callable(attr):
+                # It's not callable, so perform the getattr from within
+                # the serializer thread, then return its value.
+                # That may differ from the "attr" value we just grabbed
+                # from here, due to forced ordering in the serializer.
                getter = SerializerCallProxy(self.__call_queue, getattr, self)
                return getter(self.__object, key)
+            else:
+                # It is callable, so return an object that will proxy through
+                # the serializer when called.
                r = SerializerCallProxy(self.__call_queue, attr, self)
                return r

+        # For an interable object, on __iter__(), save the object's
+        # iterator and return this proxy.  On next(), call the object's
+        # iterator through this proxy.
+        def __iter__(self):
+            attr = getattr(self.__object, "__iter__")
+            self.__iter = SerializerCallProxy(self.__call_queue, attr, self)()
+            return self
+
+        def __next__(self):
+            return SerializerCallProxy(self.__call_queue,
+                                       self.__iter.__next__, self)()
+
+        def __getitem__(self, key):
+            return self.__getattr__("__getitem__")(key)
+
        def __call__(self, *args, **kwargs):
            """Call this to instantiate the type, if a type was passed
            to serializer_proxy.  Otherwise, pass the call through."""
            ret = SerializerCallProxy(self.__call_queue,
                                      self.__object, self)(*args, **kwargs)
-            if type(self.__object) in (types.TypeType, types.ClassType):
+            if isinstance(self.__object, type):
                # Instantiation
                self.__object = ret
                return self
            return ret

        def __del__(self):
+            try:
+                # Signal thread to exit, but don't wait for it.
                self.__call_queue.put((None, None, None, None))
-            self.__thread.join()
+            except:
+                pass

    return SerializerObjectProxy(obj_or_type)
--- a/nilmdb/utils/sort.py
+++ b/nilmdb/utils/sort.py
@@ -0,0 +1,19 @@
+import re
+
+
+def sort_human(items, key=None):
+    """Human-friendly sort (/stream/2 before /stream/10)"""
+    def to_num(val):
+        try:
+            return int(val)
+        except Exception:
+            return val
+
+    def human_key(text):
+        if key:
+            text = key(text)
+        # Break into character and numeric chunks.
+        chunks = re.split(r'([0-9]+)', text)
+        return [to_num(c) for c in chunks]
+
+    return sorted(items, key=human_key)
--- a/nilmdb/utils/threadsafety.py
+++ b/nilmdb/utils/threadsafety.py
@@ -1,26 +1,25 @@
-from nilmdb.utils.printf import *
 import threading
-import warnings
-import types
+from nilmdb.utils.printf import sprintf

-def verify_proxy(obj_or_type, exception = False, check_thread = True,
-                 check_concurrent = True):
+
+def verify_proxy(obj_or_type, check_thread=True,
+                 check_concurrent=True):
    """Wrap the given object or type in a VerifyObjectProxy.

    Returns a VerifyObjectProxy that proxies all method calls to the
    given object, as well as attribute retrievals.

-    When calling methods, the following checks are performed.  If
-    exception is True, an exception is raised.  Otherwise, a warning
-    is printed.
+    When calling methods, the following checks are performed.  On
+    failure, an exception is raised.

-    check_thread = True     # Warn/fail if two different threads call methods.
-    check_concurrent = True # Warn/fail if two functions are concurrently
+    check_thread = True     # Fail if two different threads call methods.
+    check_concurrent = True # Fail if two functions are concurrently
                            # run through this proxy
    """
-    class Namespace(object):
+    class Namespace():
        pass
-    class VerifyCallProxy(object):
+
+    class VerifyCallProxy():
        def __init__(self, func, parent_namespace):
            self.func = func
            self.parent_namespace = parent_namespace
@@ -42,22 +41,16 @@ def verify_proxy(obj_or_type, exception = False, check_thread = True,
                              " but %s called %s.%s",
                              p.thread.name, p.classname, p.thread_callee,
                              this.name, p.classname, callee)
-                if exception:
                raise AssertionError(err)
-                else: # pragma: no cover
-                    warnings.warn(err)

            need_concur_unlock = False
            if check_concurrent:
-                if p.concur_lock.acquire(False) == False:
+                if not p.concur_lock.acquire(False):
                    err = sprintf("unsafe concurrency: %s called %s.%s "
                                  "while %s is still in %s.%s",
                                  this.name, p.classname, callee,
                                  p.concur_tname, p.classname, p.concur_callee)
-                    if exception:
                    raise AssertionError(err)
-                    else: # pragma: no cover
-                        warnings.warn(err)
                else:
                    p.concur_tname = this.name
                    p.concur_callee = callee
@@ -70,7 +63,7 @@ def verify_proxy(obj_or_type, exception = False, check_thread = True,
                    p.concur_lock.release()
            return ret

-    class VerifyObjectProxy(object):
+    class VerifyObjectProxy():
        def __init__(self, obj_or_type, *args, **kwargs):
            p = Namespace()
            self.__ns = p
@@ -80,17 +73,12 @@ def verify_proxy(obj_or_type, exception = False, check_thread = True,
            p.concur_tname = None
            p.concur_callee = None
            self.__obj = obj_or_type
-            try:
-                if type(obj_or_type) in (types.TypeType, types.ClassType):
+            if isinstance(obj_or_type, type):
                p.classname = self.__obj.__name__
            else:
                p.classname = self.__obj.__class__.__name__
-            except AttributeError: # pragma: no cover
-                p.classname = "???"

        def __getattr__(self, key):
-            if key.startswith("_VerifyObjectProxy__"): # pragma: no cover
-                raise AttributeError
            attr = getattr(self.__obj, key)
            if not callable(attr):
                return VerifyCallProxy(getattr, self.__ns)(self.__obj, key)
@@ -100,7 +88,7 @@ def verify_proxy(obj_or_type, exception = False, check_thread = True,
            """Call this to instantiate the type, if a type was passed
            to verify_proxy.  Otherwise, pass the call through."""
            ret = VerifyCallProxy(self.__obj, self.__ns)(*args, **kwargs)
-            if type(self.__obj) in (types.TypeType, types.ClassType):
+            if isinstance(self.__obj, type):
                # Instantiation
                self.__obj = ret
                return self
--- a/nilmdb/utils/time.py
+++ b/nilmdb/utils/time.py
@@ -1,8 +1,6 @@
-from __future__ import absolute_import
-
-from nilmdb.utils import datetime_tz
 import re
 import time
+import datetime_tz

 # Range
 min_timestamp = (-2**63)
@@ -11,15 +9,17 @@ max_timestamp = (2**63 - 1)
 # Smallest representable step
 epsilon = 1

-def string_to_timestamp(str):
+
+def string_to_timestamp(string):
    """Convert a string that represents an integer number of microseconds
    since epoch."""
    try:
        # Parse a string like "1234567890123456" and return an integer
-        return int(str)
+        return int(string)
    except ValueError:
        # Try parsing as a float, in case it's "1234567890123456.0"
-        return int(round(float(str)))
+        return int(round(float(string)))
+

 def timestamp_to_string(timestamp):
    """Convert a timestamp (integer microseconds since epoch) to a string"""
@@ -28,6 +28,13 @@ def timestamp_to_string(timestamp):
    else:
        return str(timestamp)

+
+def timestamp_to_bytes(timestamp):
+    """Convert a timestamp (integer microseconds since epoch) to a Python
+    bytes object"""
+    return timestamp_to_string(timestamp).encode('utf-8')
+
+
 def timestamp_to_human(timestamp):
    """Convert a timestamp (integer microseconds since epoch) to a
    human-readable string, using the local timezone for display
@@ -39,28 +46,34 @@ def timestamp_to_human(timestamp):
    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_unix(timestamp))
    return dt.strftime("%a, %d %b %Y %H:%M:%S.%f %z")

+
 def unix_to_timestamp(unix):
    """Convert a Unix timestamp (floating point seconds since epoch)
    into a NILM timestamp (integer microseconds since epoch)"""
    return int(round(unix * 1e6))
-seconds_to_timestamp = unix_to_timestamp
+

 def timestamp_to_unix(timestamp):
    """Convert a NILM timestamp (integer microseconds since epoch)
    into a Unix timestamp (floating point seconds since epoch)"""
    return timestamp / 1e6
+
+
+seconds_to_timestamp = unix_to_timestamp
 timestamp_to_seconds = timestamp_to_unix

-def rate_to_period(hz, cycles = 1):
+
+def rate_to_period(hz, cycles=1):
    """Convert a rate (in Hz) to a period (in timestamp units).
    Returns an integer."""
    period = unix_to_timestamp(cycles) / float(hz)
    return int(round(period))

+
 def parse_time(toparse):
    """
    Parse a free-form time string and return a nilmdb timestamp
-    (integer seconds since epoch).  If the string doesn't contain a
+    (integer microseconds since epoch).  If the string doesn't contain a
    timestamp, the current local timezone is assumed (e.g. from the TZ
    env var).
    """
@@ -87,7 +100,7 @@ def parse_time(toparse):
    try:
        return unix_to_timestamp(datetime_tz.datetime_tz.
                                 smartparse(toparse).totimestamp())
-    except (ValueError, OverflowError):
+    except (ValueError, OverflowError, TypeError):
        pass

    # If it's parseable as a float, treat it as a Unix or NILM
@@ -95,9 +108,9 @@ def parse_time(toparse):
    try:
        val = float(toparse)
        # range is from about year 2001 - 2128
-        if val > 1e9 and val < 5e9:
+        if 1e9 < val < 5e9:
            return unix_to_timestamp(val)
-        if val > 1e15 and val < 5e15:
+        if 1e15 < val < 5e15:
            return val
    except ValueError:
        pass
@@ -129,6 +142,7 @@ def parse_time(toparse):
    # just give up for now.
    raise ValueError("unable to parse timestamp")

+
 def now():
    """Return current timestamp"""
    return unix_to_timestamp(time.time())
--- a/nilmdb/utils/timer.py
+++ b/nilmdb/utils/timer.py
@@ -5,18 +5,17 @@
 #   with nilmdb.utils.Timer("flush"):
 #       foo.flush()

-from __future__ import print_function
-from __future__ import absolute_import
 import contextlib
 import time

+
@contextlib.contextmanager
-def Timer(name = None, tosyslog = False):
+def Timer(name=None, tosyslog=False):
    start = time.time()
    yield
    elapsed = int((time.time() - start) * 1000)
    msg = (name or 'elapsed') + ": " + str(elapsed) + " ms"
-    if tosyslog: # pragma: no cover
+    if tosyslog:
        import syslog
        syslog.syslog(msg)
    else:
--- a/nilmdb/utils/timestamper.py
+++ b/nilmdb/utils/timestamper.py
@@ -1,16 +1,17 @@
 """File-like objects that add timestamps to the input lines"""

-from nilmdb.utils.printf import *
+from nilmdb.utils.printf import sprintf
 import nilmdb.utils.time

-class Timestamper(object):
+
+class Timestamper():
    """A file-like object that adds timestamps to lines of an input file."""
    def __init__(self, infile, ts_iter):
        """file: filename, or another file-like object
           ts_iter: iterator that returns a timestamp string for
           each line of the file"""
-        if isinstance(infile, basestring):
-            self.file = open(infile, "r")
+        if isinstance(infile, str):
+            self.file = open(infile, "rb")
        else:
            self.file = infile
        self.ts_iter = ts_iter
@@ -22,17 +23,17 @@ class Timestamper(object):
        while True:
            line = self.file.readline(*args)
            if not line:
-                return ""
-            if line[0] == '#':
+                return b""
+            if line[0:1] == b'#':
                continue
            break
        try:
-            return self.ts_iter.next() + line
+            return next(self.ts_iter) + line
        except StopIteration:
-            return ""
+            return b""

-    def readlines(self, size = None):
-        out = ""
+    def readlines(self, size=None):
+        out = b""
        while True:
            line = self.readline()
            out += line
@@ -43,15 +44,16 @@ class Timestamper(object):
    def __iter__(self):
        return self

-    def next(self):
+    def __next__(self):
        result = self.readline()
        if not result:
            raise StopIteration
        return result

+
 class TimestamperRate(Timestamper):
    """Timestamper that uses a start time and a fixed rate"""
-    def __init__(self, infile, start, rate, end = None):
+    def __init__(self, infile, start, rate, end=None):
        """
        file: file name or object

@@ -61,33 +63,39 @@ class TimestamperRate(Timestamper):

        end: If specified, raise StopIteration before outputting a value
             greater than this."""
-        timestamp_to_string = nilmdb.utils.time.timestamp_to_string
+        timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes
        rate_to_period = nilmdb.utils.time.rate_to_period
+
        def iterator(start, rate, end):
            n = 0
            rate = float(rate)
            while True:
                now = start + rate_to_period(rate, n)
                if end and now >= end:
-                    raise StopIteration
-                yield timestamp_to_string(now) + " "
+                    return
+                yield timestamp_to_bytes(now) + b" "
                n += 1
        Timestamper.__init__(self, infile, iterator(start, rate, end))
        self.start = start
        self.rate = rate
+
    def __str__(self):
        return sprintf("TimestamperRate(..., start=\"%s\", rate=%g)",
                       nilmdb.utils.time.timestamp_to_human(self.start),
                       self.rate)

+
 class TimestamperNow(Timestamper):
    """Timestamper that uses current time"""
    def __init__(self, infile):
-        timestamp_to_string = nilmdb.utils.time.timestamp_to_string
+        timestamp_to_bytes = nilmdb.utils.time.timestamp_to_bytes
        get_now = nilmdb.utils.time.now
+
        def iterator():
            while True:
-                yield timestamp_to_string(get_now()) + " "
+                yield timestamp_to_bytes(get_now()) + b" "
+
        Timestamper.__init__(self, infile, iterator())
+
    def __str__(self):
        return "TimestamperNow(...)"
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,16 @@
+argcomplete>=1.10.0
+CherryPy>=18.1.2
+coverage>=4.5.4
+cython>=0.29.13
+decorator>=4.4.0
+fallocate>=1.6.4
+flake8>=3.7.8
+nose>=1.3.7
+numpy>=1.17.0
+progressbar>=2.5
+psutil>=5.6.3
+python-datetime-tz>=0.5.4
+python-dateutil>=2.8.0
+requests>=2.22.0
+tz>=0.2.2
+WebTest>=2.0.33
--- a/setup.cfg
+++ b/setup.cfg
@@ -13,8 +13,6 @@ cover-package=nilmdb
 cover-erase=1
 # this works, puts html output in cover/ dir:
 # cover-html=1
-# need nose 1.1.3 for this:
-# cover-branches=1
 #debug=nose
 #debug-log=nose.log
 stop=1
@@ -39,3 +37,20 @@ tests=tests
 #with-profile=1
 #profile-sort=time
 ##profile-restrict=10  # doesn't work right, treated as string or something
+
+[versioneer]
+VCS=git
+style=pep440
+versionfile_source=nilmdb/_version.py
+versionfile_build=nilmdb/_version.py
+tag_prefix=nilmdb-
+parentdir_prefix=nilmdb-
+
+[flake8]
+exclude=_version.py,fsck.py,nilmdb_fsck.py
+extend-ignore=E731
+per-file-ignores=__init__.py:F401,E402 serializer.py:E722 mustclose.py:E722
+
+[pylint]
+ignore=_version.py,fsck.py,nilmdb_fsck.py
+disable=C0103,C0111,R0913,R0914
--- a/setup.py
+++ b/setup.py
@@ -4,133 +4,59 @@
 #   git tag -a nilmdb-1.1 -m "Version 1.1"
 #   git push --tags
 # Then just package it up:
-#   python setup.py sdist
+#   python3 setup.py sdist

-# This is supposed to be using Distribute:
-#
-#   distutils provides a "setup" method.
-#   setuptools is a set of monkeypatches on top of that.
-#   distribute is a particular version/implementation of setuptools.
-#
-# So we don't really know if this is using the old setuptools or the
-# Distribute-provided version of setuptools.
-
-import traceback
 import sys
 import os
-
-try:
-    from setuptools import setup, find_packages
-    from distutils.extension import Extension
-    import distutils.version
-except ImportError:
-    traceback.print_exc()
-    print "Please install the prerequisites listed in README.txt"
-    sys.exit(1)
+from setuptools import setup
+from distutils.extension import Extension

 # Versioneer manages version numbers from git tags.
 # https://github.com/warner/python-versioneer
 import versioneer
-versioneer.versionfile_source = 'nilmdb/_version.py'
-versioneer.versionfile_build = 'nilmdb/_version.py'
-versioneer.tag_prefix = 'nilmdb-'
-versioneer.parentdir_prefix = 'nilmdb-'
-
-# Hack to workaround logging/multiprocessing issue:
-# https://groups.google.com/d/msg/nose-users/fnJ-kAUbYHQ/_UsLN786ygcJ
-try: import multiprocessing
-except Exception: pass
-
-# Use Cython if it's new enough, otherwise use preexisting C files.
-cython_modules = [ 'nilmdb.server.interval',
-                   'nilmdb.server.rbtree' ]
-try:
-    import Cython
-    from Cython.Build import cythonize
-    if (distutils.version.LooseVersion(Cython.__version__) <
-        distutils.version.LooseVersion("0.16")):
-        print "Cython version", Cython.__version__, "is too old; not using it."
-        raise ImportError()
-    use_cython = True
-except ImportError:
-    use_cython = False

+# External modules that need to be built
 ext_modules = [ Extension('nilmdb.server.rocket', ['nilmdb/server/rocket.c' ]) ]
+
+# Use Cython.
+cython_modules = [ 'nilmdb.server.interval', 'nilmdb.server.rbtree' ]
+import Cython
+from Cython.Build import cythonize
 for modulename in cython_modules:
    filename = modulename.replace('.','/')
-    if use_cython:
    ext_modules.extend(cythonize(filename + ".pyx"))
-    else:
-        cfile = filename + ".c"
-        if not os.path.exists(cfile):
-            raise Exception("Missing source file " + cfile + ".  "
-                            "Try installing cython >= 0.16.")
-        ext_modules.append(Extension(modulename, [ cfile ]))

-# We need a MANIFEST.in.  Generate it here rather than polluting the
-# repository with yet another setup-related file.
-with open("MANIFEST.in", "w") as m:
-    m.write("""
-# Root
-include README.txt
-include setup.cfg
-include setup.py
-include versioneer.py
-include Makefile
-include .coveragerc
-include .pylintrc
-
-# Cython files -- include source.
-recursive-include nilmdb/server *.pyx *.pyxdep *.pxd
-
-# Tests
-recursive-include tests *.py
-recursive-include tests/data *
-include tests/test.order
-
-# Docs
-recursive-include docs Makefile *.md
-
-# Extras
-recursive-include extras *
-""")
+# Get list of requirements to use in `install_requires` below.  Note
+# that we don't make a distinction between things that are actually
+# required for end-users vs developers (or use `test_requires` or
+# anything else) -- just install everything for simplicity.
+install_requires = open('requirements.txt').readlines()

 # Run setup
 setup(name='nilmdb',
      version = versioneer.get_version(),
      cmdclass = versioneer.get_cmdclass(),
-      url = 'https://git.jim.sh/jim/lees/nilmdb.git',
+      url = 'https://git.jim.sh/nilm/nilmdb.git',
      author = 'Jim Paris',
      description = "NILM Database",
      long_description = "NILM Database",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      tests_require = [ 'nose',
-                        'coverage',
-                        'numpy',
-                        ],
-      setup_requires = [ 'distribute',
-                         ],
-      install_requires = [ 'decorator',
-                           'cherrypy >= 3.2',
-                           'simplejson',
-                           'python-dateutil',
-                           'pytz',
-                           'psutil >= 0.3.0',
-                           'requests >= 1.1.0, < 2.0.0',
-                           ],
+      setup_requires = [ 'setuptools' ],
+      install_requires = install_requires,
      packages = [ 'nilmdb',
                   'nilmdb.utils',
-                   'nilmdb.utils.datetime_tz',
                   'nilmdb.server',
                   'nilmdb.client',
                   'nilmdb.cmdline',
                   'nilmdb.scripts',
+                   'nilmdb.fsck',
                   ],
      entry_points = {
          'console_scripts': [
              'nilmtool = nilmdb.scripts.nilmtool:main',
              'nilmdb-server = nilmdb.scripts.nilmdb_server:main',
+              'nilmdb-fsck = nilmdb.scripts.nilmdb_fsck:main',
              ],
          },
      ext_modules = ext_modules,
--- a/tests/data/prep-20120323T1000
+++ b/tests/data/prep-20120323T1000
@@ -1,4 +1,4 @@
-# comments are cool?
+# comments are cool?  what if they contain â†’UNICODEâ†<C3A2> or invalid utf-8 like Ã(
 2.66568e+05  2.24029e+05  5.16140e+03  2.52517e+03  8.35084e+03  3.72470e+03  1.35534e+03  2.03900e+03  
 2.57914e+05  2.27183e+05  4.30368e+03  4.13080e+03  7.25535e+03  4.89047e+03  1.63859e+03  1.93496e+03  
 2.51717e+05  2.26047e+05  5.99445e+03  3.49363e+03  8.07250e+03  5.08267e+03  2.26917e+03  2.86231e+03  
--- a/tests/data/timestamped
+++ b/tests/data/timestamped
@@ -0,0 +1,8 @@
+-10000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-100000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-100000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
--- a/tests/test.order
+++ b/tests/test.order
@@ -2,6 +2,7 @@ test_printf.py
 test_threadsafety.py
 test_lrucache.py
 test_mustclose.py
+test_misc.py

 test_serializer.py

@@ -11,6 +12,7 @@ test_interval.py

 test_bulkdata.py
 test_nilmdb.py
+test_wsgi.py
 test_client.py
 test_numpyclient.py
 test_cmdline.py
--- a/tests/test_bulkdata.py
+++ b/tests/test_bulkdata.py
@@ -5,10 +5,12 @@ from nilmdb.utils.printf import *
 from nose.tools import *
 from nose.tools import assert_raises
 import itertools
+import errno
+import pickle

 from testutil.helpers import *

-testdb = "tests/bulkdata-testdb"
+testdb = b"tests/bulkdata-testdb"

 import nilmdb.server.bulkdata
 from nilmdb.server.bulkdata import BulkData
@@ -16,18 +18,25 @@ from nilmdb.server.bulkdata import BulkData
 class TestBulkData(object):

    def test_bulkdata(self):
-        for (size, files, db) in [ ( 0, 0, testdb ),
+        for (size, files, db) in [ ( None, None, testdb ),
                                       ( 25, 1000, testdb ),
                                       ( 1000, 3, testdb.decode("utf-8") ) ]:
            recursive_unlink(db)
            os.mkdir(db)
            self.do_basic(db, size, files)

+    def test_corruption(self):
+        db = testdb
+        recursive_unlink(db)
+        os.mkdir(db)
+
+        # Remove lock before close
+        data = BulkData(db)
+        os.unlink(data.lock)
+        data.close()
+
    def do_basic(self, db, size, files):
        """Do the basic test with variable file_size and files_per_dir"""
-        if not size or not files:
-            data = BulkData(db)
-        else:
        data = BulkData(db, file_size = size, files_per_dir = files)

        # Try opening it again (should result in locking error)
@@ -41,12 +50,19 @@ class TestBulkData(object):
        with assert_raises(ValueError):
            data.create("foo/bar", "uint16_8")
        data.create("/foo/bar", "uint16_8")
-        data.create(u"/foo/baz/quux", "float64_16")
-        with assert_raises(ValueError):
+        data.create("/foo/baz/quux", "float64_16")
+        with assert_raises(ValueError) as e:
            data.create("/foo/bar/baz", "uint16_8")
+        in_("path is subdir of existing node", str(e.exception))
        with assert_raises(ValueError):
            data.create("/foo/baz", "float64_16")

+        # filename too long (tests error paths in _create_parents)
+        with assert_raises(OSError) as e:
+            data.create("/test/long/" + "a"*10000 + "/foo", "int32_1")
+        eq_(e.exception.errno, errno.ENAMETOOLONG)
+
+
        # get node -- see if caching works
        nodes = []
        for i in range(5000):
@@ -57,7 +73,7 @@ class TestBulkData(object):
        def get_node_slice(key):
            if isinstance(key, slice):
                return [ node.get_data(x, x+1) for x in
-                         xrange(*key.indices(node.nrows)) ]
+                         range(*key.indices(node.nrows)) ]
            return node.get_data(key, key+1)

        # Test node
@@ -68,10 +84,10 @@ class TestBulkData(object):
            x = node[0] # timestamp
        raw = []
        for i in range(1000):
-            raw.append("%d 1 2 3 4 5 6 7 8\n" % (10000 + i))
-        node.append_data("".join(raw[0:1]), 0, 50000)
-        node.append_data("".join(raw[1:100]), 0, 50000)
-        node.append_data("".join(raw[100:]), 0, 50000)
+            raw.append(b"%d 1 2 3 4 5 6 7 8\n" % (10000 + i))
+        node.append_data(b"".join(raw[0:1]), 0, 50000)
+        node.append_data(b"".join(raw[1:100]), 0, 50000)
+        node.append_data(b"".join(raw[100:]), 0, 50000)

        misc_slices = [ 0, 100, slice(None), slice(0), slice(10),
                        slice(5,10), slice(3,None), slice(3,-3),
@@ -85,8 +101,8 @@ class TestBulkData(object):
        # Extract misc slices while appending, to make sure the
        # data isn't being added in the middle of the file
        for s in [2, slice(1,5), 2, slice(1,5)]:
-            node.append_data("0 0 0 0 0 0 0 0 0\n", 0, 50000)
-            raw.append("0 0 0 0 0 0 0 0 0\n")
+            node.append_data(b"0 0 0 0 0 0 0 0 0\n", 0, 50000)
+            raw.append(b"0 0 0 0 0 0 0 0 0\n")
            eq_(get_node_slice(s), raw[s])

        # Get some coverage of remove; remove is more fully tested
@@ -95,14 +111,37 @@ class TestBulkData(object):
            node.remove(9999,9998)

        # close, reopen
-        # reopen
        data.close()
-        if not size or not files:
-            data = BulkData(db)
-        else:
        data = BulkData(db, file_size = size, files_per_dir = files)
        node = data.getnode("/foo/bar")

+        # make an empty dir that will get ignored by _get_nrows
+        data.close()
+        os.mkdir(os.path.join(testdb, b"data/foo/bar/0123"))
+        data = BulkData(db, file_size = size, files_per_dir = files)
+        node = data.getnode("/foo/bar")
+
+        # make a corrupted file that's the wrong size
+        data.close()
+        with open(os.path.join(testdb, b"data/foo/bar/0123/0123"), "wb") as f:
+            f.write(b"x"*17)
+        data = BulkData(db, file_size = size, files_per_dir = files)
+        with assert_raises(ValueError) as e:
+            node = data.getnode("/foo/bar")
+        in_("file offset is not a multiple of data size", str(e.exception))
+
+        # mess with format
+        data.close()
+        with open(os.path.join(testdb, b"data/foo/bar/_format"), "rb") as f:
+            fmt = pickle.load(f)
+        fmt["version"] = 2
+        with open(os.path.join(testdb, b"data/foo/bar/_format"), "wb") as f:
+            pickle.dump(fmt, f, 2)
+        data = BulkData(db, file_size = size, files_per_dir = files)
+        with assert_raises(NotImplementedError) as e:
+            node = data.getnode("/foo/bar")
+        in_("old version 2 bulk data store is not supported", str(e.exception))
+
        # Extract slices
        for s in misc_slices:
            eq_(get_node_slice(s), raw[s])
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -5,8 +5,9 @@ import nilmdb.client

 from nilmdb.utils.printf import *
 from nilmdb.utils import timestamper
-from nilmdb.client import ClientError, ServerError
-from nilmdb.utils import datetime_tz
+from nilmdb.client import ClientError, ServerError, Error
+from nilmdb.utils.sort import sort_human
+import datetime_tz

 from nose.plugins.skip import SkipTest
 from nose.tools import *
@@ -16,8 +17,8 @@ import distutils.version
 import os
 import sys
 import threading
-import cStringIO
-import simplejson as json
+import io
+import json
 import unittest
 import warnings
 import resource
@@ -78,6 +79,32 @@ class TestClient(object):
        # Bad URLs should give 404, not 500
        with assert_raises(ClientError):
            client.http.get("/stream/create")
+
+        # Test error handling
+        url = testurl
+        args = { "url": url,
+                 "status": "400",
+                 "message": "Something went wrong",
+                 "traceback": None }
+        with assert_raises(ClientError):
+            client.http._handle_error(url, 400, json.dumps(args))
+        with assert_raises(ClientError):
+            client.http._handle_error(url, 400, "this is not JSON.. {")
+        args["status"] = "500"
+        with assert_raises(ServerError):
+            client.http._handle_error(url, 500, json.dumps(args))
+        args["message"] = None
+        with assert_raises(ServerError):
+            client.http._handle_error(url, 500, json.dumps(args))
+        args["status"] = "600"
+        with assert_raises(Error):
+            client.http._handle_error(url, 600, json.dumps(args))
+
+        # Use get_gen for an endpoint that doesn't have newlines,
+        # for better test coverage.
+        for line in client.http.get_gen("/version"):
+            pass
+
        client.close()

    def test_client_02_createlist(self):
@@ -98,23 +125,29 @@ class TestClient(object):

        # Bad method types
        with assert_raises(ClientError):
-            client.http.put("/stream/list","")
+            client.http.put("/stream/list",b"")
        # Try a bunch of times to make sure the request body is getting consumed
        for x in range(10):
            with assert_raises(ClientError):
                client.http.post("/stream/list")
        client = nilmdb.client.Client(url = testurl)

-        # Create three streams
+        # Create four streams
        client.stream_create("/newton/prep", "float32_8")
        client.stream_create("/newton/raw", "uint16_6")
-        client.stream_create("/newton/zzz/rawnotch", "uint16_9")
+        client.stream_create("/newton/zzz/rawnotch2", "uint16_9")
+        client.stream_create("/newton/zzz/rawnotch11", "uint16_9")

-        # Verify we got 3 streams
+        # Test sort_human (used by stream_list)
+        eq_(sort_human(["/s/10", "/s/2"]), ["/s/2", "/s/10"])
+
+        # Verify we got 4 streams in the right order
        eq_(client.stream_list(), [ ["/newton/prep", "float32_8"],
                                    ["/newton/raw", "uint16_6"],
-                                    ["/newton/zzz/rawnotch", "uint16_9"]
+                                    ["/newton/zzz/rawnotch2", "uint16_9"],
+                                    ["/newton/zzz/rawnotch11", "uint16_9"]
                                    ])
+
        # Match just one type or one path
        eq_(client.stream_list(layout="uint16_6"),
            [ ["/newton/raw", "uint16_6"] ])
@@ -127,8 +160,19 @@ class TestClient(object):
        # we create a table.
        limit = resource.getrlimit(resource.RLIMIT_FSIZE)
        resource.setrlimit(resource.RLIMIT_FSIZE, (1, limit[1]))
+
+        # normal
        with assert_raises(ServerError) as e:
            client.stream_create("/newton/hello", "uint16_6")
+
+        # same but with force_traceback == False, to improve coverage
+        global test_server
+        test_server.force_traceback = False
+        with assert_raises(ServerError) as e:
+            client.stream_create("/newton/world", "uint16_6")
+        test_server.force_traceback = True
+
+        # Reset resource limit
        resource.setrlimit(resource.RLIMIT_FSIZE, limit)

        client.close()
@@ -194,7 +238,7 @@ class TestClient(object):
        data = timestamper.TimestamperRate(testfile, start, 120)
        with assert_raises(ClientError) as e:
            result = client.stream_insert("/newton/no-such-path", data)
-        in_("404 Not Found", str(e.exception))
+        in_("404 Not Found", repr(e.exception))

        # Now try reversed timestamps
        data = timestamper.TimestamperRate(testfile, start, 120)
@@ -206,27 +250,27 @@ class TestClient(object):
             "start must precede end", str(e.exception))

        # Now try empty data (no server request made)
-        empty = cStringIO.StringIO("")
+        empty = io.StringIO("")
        data = timestamper.TimestamperRate(empty, start, 120)
        result = client.stream_insert("/newton/prep", data)
        eq_(result, None)

        # It's OK to insert an empty interval
-        client.http.put("stream/insert", "", { "path": "/newton/prep",
+        client.http.put("stream/insert", b"", { "path": "/newton/prep",
                                               "start": 1, "end": 2 })
        eq_(list(client.stream_intervals("/newton/prep")), [[1, 2]])
        client.stream_remove("/newton/prep")
        eq_(list(client.stream_intervals("/newton/prep")), [])

        # Timestamps can be negative too
-        client.http.put("stream/insert", "", { "path": "/newton/prep",
+        client.http.put("stream/insert", b"", { "path": "/newton/prep",
                                               "start": -2, "end": -1 })
        eq_(list(client.stream_intervals("/newton/prep")), [[-2, -1]])
        client.stream_remove("/newton/prep")
        eq_(list(client.stream_intervals("/newton/prep")), [])

        # Intervals that end at zero shouldn't be any different
-        client.http.put("stream/insert", "", { "path": "/newton/prep",
+        client.http.put("stream/insert", b"", { "path": "/newton/prep",
                                               "start": -1, "end": 0 })
        eq_(list(client.stream_intervals("/newton/prep")), [[-1, 0]])
        client.stream_remove("/newton/prep")
@@ -234,25 +278,37 @@ class TestClient(object):

        # Try forcing a server request with equal start and end
        with assert_raises(ClientError) as e:
-            client.http.put("stream/insert", "", { "path": "/newton/prep",
+            client.http.put("stream/insert", b"", { "path": "/newton/prep",
                                                   "start": 0, "end": 0 })
        in_("400 Bad Request", str(e.exception))
        in_("start must precede end", str(e.exception))

+        # Invalid times in HTTP request
+        with assert_raises(ClientError) as e:
+            client.http.put("stream/insert", b"", { "path": "/newton/prep",
+                                                   "start": "asdf", "end": 0 })
+        in_("400 Bad Request", str(e.exception))
+        in_("invalid start", str(e.exception))
+
+        with assert_raises(ClientError) as e:
+            client.http.put("stream/insert", b"", { "path": "/newton/prep",
+                                                   "start": 0, "end": "asdf" })
+        in_("400 Bad Request", str(e.exception))
+        in_("invalid end", str(e.exception))
+
        # Good content type
        with assert_raises(ClientError) as e:
-            client.http.put("stream/insert", "",
+            client.http.put("stream/insert", b"",
                            { "path": "xxxx", "start": 0, "end": 1,
-                              "binary": 1 },
-                            binary = True)
+                              "binary": 1 })
        in_("No such stream", str(e.exception))

        # Bad content type
        with assert_raises(ClientError) as e:
-            client.http.put("stream/insert", "",
+            client.http.put("stream/insert", b"",
                            { "path": "xxxx", "start": 0, "end": 1,
                              "binary": 1 },
-                            binary = False)
+                            content_type="text/plain; charset=utf-8")
        in_("Content type must be application/octet-stream", str(e.exception))

        # Specify start/end (starts too late)
@@ -276,11 +332,7 @@ class TestClient(object):
                         ">= end time 1332511201000000", str(e.exception))
               is not None)

-        # Now do the real load
-        data = timestamper.TimestamperRate(testfile, start, 120)
-        result = client.stream_insert("/newton/prep", data,
-                                      start, start + 119999777)
-
+        def check_data():
            # Verify the intervals.  Should be just one, even if the data
            # was inserted in chunks, due to nilmdb interval concatenation.
            intervals = list(client.stream_intervals("/newton/prep"))
@@ -293,6 +345,20 @@ class TestClient(object):
            in_("400 Bad Request", str(e.exception))
            in_("verlap", str(e.exception))

+        # Now do the real load
+        data = timestamper.TimestamperRate(testfile, start, 120)
+        result = client.stream_insert("/newton/prep", data,
+                                      start, start + 119999777)
+        check_data()
+
+        # Try inserting directly-passed data
+        client.stream_remove("/newton/prep", start, start + 119999777)
+        data = timestamper.TimestamperRate(testfile, start, 120)
+        data_bytes = b''.join(data)
+        result = client.stream_insert("/newton/prep", data_bytes,
+                                      start, start + 119999777)
+        check_data()
+
        nilmdb.client.client.StreamInserter._max_data = old_max_data
        client.close()

@@ -317,7 +383,7 @@ class TestClient(object):
        with assert_raises(ClientError) as e:
            list(client.stream_extract("/newton/prep",
                                       count = True, binary = True))
-        data = "".join(client.stream_extract("/newton/prep", binary = True))
+        data = b"".join(client.stream_extract("/newton/prep", binary = True))
        # Quick check using struct
        unpacker = struct.Struct("<qffffffff")
        out = []
@@ -327,6 +393,10 @@ class TestClient(object):
                     2525.169921875, 8350.83984375, 3724.699951171875,
                     1355.3399658203125, 2039.0))

+        # Just get some coverage
+        with assert_raises(ClientError) as e:
+            client.http.post("/stream/remove", { "path": "/none" })
+
        client.close()

    def test_client_06_generators(self):
@@ -339,22 +409,18 @@ class TestClient(object):
        end = nilmdb.utils.time.parse_time("20120323T1000")
        for function in [ client.stream_intervals, client.stream_extract ]:
            with assert_raises(ClientError) as e:
-                function("/newton/prep", start, end).next()
+                next(function("/newton/prep", start, end))
            in_("400 Bad Request", str(e.exception))
            in_("start must precede end", str(e.exception))

        # Trigger a curl error in generator
        with assert_raises(ServerError) as e:
-            client.http.get_gen("http://nosuchurl.example.com./").next()
-
-        # Trigger a curl error in generator
-        with assert_raises(ServerError) as e:
-            client.http.get_gen("http://nosuchurl.example.com./").next()
+            next(client.http.get_gen("http://nosuchurl.example.com./"))

        # Check 404 for missing streams
        for function in [ client.stream_intervals, client.stream_extract ]:
            with assert_raises(ClientError) as e:
-                function("/no/such/stream").next()
+                next(function("/no/such/stream"))
            in_("404 Not Found", str(e.exception))
            in_("No such stream", str(e.exception))

@@ -375,7 +441,7 @@ class TestClient(object):

        def headers():
            h = ""
-            for (k, v) in http._last_response.headers.items():
+            for (k, v) in list(http._last_response.headers.items()):
                h += k + ": " + v + "\n"
            return h.lower()

@@ -389,20 +455,16 @@ class TestClient(object):
                                 headers())

        # Extract
-        x = http.get("stream/extract",
-                            { "path": "/newton/prep",
-                              "start": "123",
-                              "end": "124" })
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124" })
        if "transfer-encoding: chunked" not in headers():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
        if "content-type: text/plain;charset=utf-8" not in headers():
            raise AssertionError("/stream/extract is not text/plain:\n" +
                                 headers())

-        x = http.get("stream/extract",
-                            { "path": "/newton/prep",
-                              "start": "123",
-                              "end": "124",
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124",
                                         "binary": "1" })
        if "transfer-encoding: chunked" not in headers():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
@@ -410,6 +472,21 @@ class TestClient(object):
            raise AssertionError("/stream/extract is not binary:\n" +
                                 headers())

+        # Make sure a binary of "0" is really off
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124",
+                                         "binary": "0" })
+        if "content-type: application/octet-stream" in headers():
+                    raise AssertionError("/stream/extract is not text:\n" +
+                                         headers())
+
+        # Invalid parameters
+        with assert_raises(ClientError) as e:
+            x = http.get("stream/extract", { "path": "/newton/prep",
+                                             "start": "123", "end": "124",
+                                             "binary": "asdfasfd" })
+        in_("can't parse parameter", str(e.exception))
+
        client.close()

    def test_client_08_unicode(self):
@@ -427,8 +504,8 @@ class TestClient(object):
            eq_(client.stream_list(), [])

            # Create Unicode stream, match it
-            raw = [ u"/düsseldorf/raw", u"uint16_6" ]
-            prep = [ u"/düsseldorf/prep", u"uint16_6" ]
+            raw = [ "/düsseldorf/raw", "uint16_6" ]
+            prep = [ "/düsseldorf/prep", "uint16_6" ]
            client.stream_create(*raw)
            eq_(client.stream_list(), [raw])
            eq_(client.stream_list(layout=raw[1]), [raw])
@@ -439,10 +516,10 @@ class TestClient(object):
            # Set / get metadata with Unicode keys and values
            eq_(client.stream_get_metadata(raw[0]), {})
            eq_(client.stream_get_metadata(prep[0]), {})
-            meta1 = { u"alpha": u"α",
-                      u"β": u"beta" }
-            meta2 = { u"alpha": u"α" }
-            meta3 = { u"β": u"beta" }
+            meta1 = { "alpha": "α",
+                      "β": "beta" }
+            meta2 = { "alpha": "α" }
+            meta3 = { "β": "beta" }
            client.stream_set_metadata(prep[0], meta1)
            client.stream_update_metadata(prep[0], {})
            client.stream_update_metadata(raw[0], meta2)
@@ -486,68 +563,74 @@ class TestClient(object):
            # override _max_data to trigger frequent server updates
            ctx._max_data = 15

-            ctx.insert("1000 1\n")
+            ctx.insert(b"1000 1\n")

-            ctx.insert("1010 ")
-            ctx.insert("1\n1020 1")
-            ctx.insert("")
-            ctx.insert("\n1030 1\n")
+            ctx.insert(b"1010 ")
+            ctx.insert(b"1\n1020 1")
+            ctx.insert(b"")
+            ctx.insert(b"\n1030 1\n")

-            ctx.insert("1040 1\n")
-            ctx.insert("# hello\n")
-            ctx.insert("   # hello\n")
-            ctx.insert("  1050 1\n")
+            ctx.insert(b"1040 1\n")
+            ctx.insert(b"# hello\n")
+            ctx.insert(b"   # hello\n")
+            ctx.insert(b"  1050 1\n")
            ctx.finalize()

-            ctx.insert("1070 1\n")
+            ctx.insert(b"1070 1\n")
            ctx.update_end(1080)
            ctx.finalize()
            ctx.update_start(1090)
-            ctx.insert("1100 1\n")
-            ctx.insert("1110 1\n")
+            ctx.insert(b"1100 1\n")
+            ctx.insert(b"1110 1\n")
            ctx.send()
-            ctx.insert("1120 1\n")
-            ctx.insert("1130 1\n")
-            ctx.insert("1140 1\n")
+            ctx.insert(b"1120 1\n")
+            ctx.insert(b"1130 1\n")
+            ctx.insert(b"1140 1\n")
            ctx.update_end(1160)
-            ctx.insert("1150 1\n")
+            ctx.insert(b"1150 1\n")
            ctx.update_end(1170)
-            ctx.insert("1160 1\n")
+            ctx.insert(b"1160 1\n")
            ctx.update_end(1180)
-            ctx.insert("1170 1" +
-                       " # this is super long" * 100 +
-                       "\n")
+            ctx.insert(b"1170 1" +
+                       b" # this is super long" * 100 +
+                       b"\n")
            ctx.finalize()
-            ctx.insert("# this is super long" * 100)
+            ctx.insert(b"# this is super long" * 100)
+
+            # override _max_data_after_send to trigger ValueError on a
+            # long nonterminated line
+            ctx._max_data_after_send = 1000
+            with assert_raises(ValueError):
+                ctx.insert(b"# this is super long" * 100)

        with assert_raises(ClientError):
            with client.stream_insert_context("/context/test",
                                              1000, 2000) as ctx:
-                ctx.insert("1180 1\n")
+                ctx.insert(b"1180 1\n")

        with assert_raises(ClientError):
            with client.stream_insert_context("/context/test",
                                              2000, 3000) as ctx:
-                ctx.insert("1180 1\n")
+                ctx.insert(b"1180 1\n")

        with assert_raises(ClientError):
            with client.stream_insert_context("/context/test") as ctx:
-                ctx.insert("bogus data\n")
+                ctx.insert(b"bogus data\n")

        with client.stream_insert_context("/context/test", 2000, 3000) as ctx:
            # make sure our override wasn't permanent
            ne_(ctx._max_data, 15)
-            ctx.insert("2250 1\n")
+            ctx.insert(b"2250 1\n")
            ctx.finalize()

        with assert_raises(ClientError):
            with client.stream_insert_context("/context/test",
                                              3000, 4000) as ctx:
-                ctx.insert("3010 1\n")
-                ctx.insert("3020 2\n")
-                ctx.insert("3030 3\n")
-                ctx.insert("3040 4\n")
-                ctx.insert("3040 4\n") # non-monotonic after a few lines
+                ctx.insert(b"3010 1\n")
+                ctx.insert(b"3020 2\n")
+                ctx.insert(b"3030 3\n")
+                ctx.insert(b"3040 4\n")
+                ctx.insert(b"3040 4\n") # non-monotonic after a few lines
                ctx.finalize()

        eq_(list(client.stream_intervals("/context/test")),
@@ -582,9 +665,9 @@ class TestClient(object):
        # Insert a region with just a few points
        with client.stream_insert_context("/empty/test") as ctx:
            ctx.update_start(100)
-            ctx.insert("140 1\n")
-            ctx.insert("150 1\n")
-            ctx.insert("160 1\n")
+            ctx.insert(b"140 1\n")
+            ctx.insert(b"150 1\n")
+            ctx.insert(b"160 1\n")
            ctx.update_end(200)
            ctx.finalize()

@@ -597,7 +680,7 @@ class TestClient(object):

        # Try also creating a completely empty interval from scratch,
        # in a few different ways.
-        client.stream_insert("/empty/test", "", 300, 350)
+        client.stream_insert("/empty/test", b"", 300, 350)
        client.stream_insert("/empty/test", [], 400, 450)
        with client.stream_insert_context("/empty/test", 500, 550):
            pass
@@ -613,15 +696,19 @@ class TestClient(object):
        with client.stream_insert_context("/empty/test", end = 950):
            pass

+        # Equal start and end is OK as long as there's no data
+        with client.stream_insert_context("/empty/test", start=9, end=9):
+            pass
+
        # Try various things that might cause problems
-        with client.stream_insert_context("/empty/test", 1000, 1050):
+        with client.stream_insert_context("/empty/test", 1000, 1050) as ctx:
            ctx.finalize() # inserts [1000, 1050]
            ctx.finalize() # nothing
            ctx.finalize() # nothing
-            ctx.insert("1100 1\n")
+            ctx.insert(b"1100 1\n")
            ctx.finalize() # inserts [1100, 1101]
            ctx.update_start(1199)
-            ctx.insert("1200 1\n")
+            ctx.insert(b"1200 1\n")
            ctx.update_end(1250)
            ctx.finalize() # inserts [1199, 1250]
            ctx.update_start(1299)
@@ -629,14 +716,14 @@ class TestClient(object):
            ctx.update_end(1350)
            ctx.finalize() # nothing
            ctx.update_start(1400)
-            ctx.insert("# nothing!\n")
+            ctx.insert(b"# nothing!\n")
            ctx.update_end(1450)
            ctx.finalize()
            ctx.update_start(1500)
-            ctx.insert("# nothing!")
+            ctx.insert(b"# nothing!")
            ctx.update_end(1550)
            ctx.finalize()
-            ctx.insert("# nothing!\n" * 10)
+            ctx.insert(b"# nothing!\n" * 10)
            ctx.finalize()
            # implicit last finalize inserts [1400, 1450]

@@ -659,40 +746,15 @@ class TestClient(object):
        client.close()

    def test_client_12_persistent(self):
-        # Check that connections are persistent when they should be.
-        # This is pretty hard to test; we have to poke deep into
-        # the Requests library.
+        # Check that connections are NOT persistent.  Rather than trying
+        # to verify this at the TCP level, just make sure that the response
+        # contained a "Connection: close" header.
        with nilmdb.client.Client(url = testurl) as c:
-            def connections():
-                try:
-                    poolmanager = c.http._last_response.connection.poolmanager
-                    pool = poolmanager.pools[('http','localhost',32180)]
-                    return (pool.num_connections, pool.num_requests)
-                except Exception:
-                    raise SkipTest("can't get connection info")
-
-            # First request makes a connection
            c.stream_create("/persist/test", "uint16_1")
-            eq_(connections(), (1, 1))
+            eq_(c.http._last_response.headers["Connection"], "close")

-            # Non-generator
-            c.stream_list("/persist/test")
-            eq_(connections(), (1, 2))
-            c.stream_list("/persist/test")
-            eq_(connections(), (1, 3))
-
-            # Generators
-            for x in c.stream_intervals("/persist/test"):
-                pass
-            eq_(connections(), (1, 4))
-            for x in c.stream_intervals("/persist/test"):
-                pass
-            eq_(connections(), (1, 5))
-
-            # Clean up
-            c.stream_remove("/persist/test")
            c.stream_destroy("/persist/test")
-            eq_(connections(), (1, 7))
+            eq_(c.http._last_response.headers["Connection"], "close")

    def test_client_13_timestamp_rounding(self):
        # Test potentially bad timestamps (due to floating point
@@ -703,19 +765,19 @@ class TestClient(object):
        client.stream_create("/rounding/test", "uint16_1")
        with client.stream_insert_context("/rounding/test",
                                          100000000, 200000000.1) as ctx:
-            ctx.insert("100000000.1 1\n")
-            ctx.insert("150000000.00003 1\n")
-            ctx.insert("199999999.4 1\n")
+            ctx.insert(b"100000000.1 1\n")
+            ctx.insert(b"150000000.00003 1\n")
+            ctx.insert(b"199999999.4 1\n")
        eq_(list(client.stream_intervals("/rounding/test")),
            [ [ 100000000, 200000000 ] ])

        with assert_raises(ClientError):
            with client.stream_insert_context("/rounding/test",
                                              200000000, 300000000) as ctx:
-                ctx.insert("200000000 1\n")
-                ctx.insert("250000000 1\n")
+                ctx.insert(b"200000000 1\n")
+                ctx.insert(b"250000000 1\n")
                # Server will round this and give an error on finalize()
-                ctx.insert("299999999.99 1\n")
+                ctx.insert(b"299999999.99 1\n")

        client.stream_remove("/rounding/test")
        client.stream_destroy("/rounding/test")
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -4,7 +4,7 @@ import nilmdb.server

 from nilmdb.utils.printf import *
 import nilmdb.cmdline
-from nilmdb.utils import datetime_tz
+import datetime_tz

 import unittest
 from nose.tools import *
@@ -13,7 +13,7 @@ import itertools
 import os
 import re
 import sys
-import StringIO
+import io
 import shlex
 import warnings

@@ -21,13 +21,17 @@ from testutil.helpers import *

 testdb = "tests/cmdline-testdb"

-def server_start(max_results = None, max_removals = None, bulkdata_args = {}):
+def server_start(max_results = None,
+                 max_removals = None,
+                 max_int_removals = None,
+                 bulkdata_args = {}):
    global test_server, test_db
    # Start web app on a custom port
    test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(
        testdb,
        max_results = max_results,
        max_removals = max_removals,
+        max_int_removals = max_int_removals,
        bulkdata_args = bulkdata_args)
    test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
                                       port = 32180, stoppable = False,
@@ -50,17 +54,11 @@ def setup_module():
 def teardown_module():
    server_stop()

-# Add an encoding property to StringIO so Python will convert Unicode
-# properly when writing or reading.
-class UTF8StringIO(StringIO.StringIO):
-    encoding = 'utf-8'
-
 class TestCmdline(object):

    def run(self, arg_string, infile=None, outfile=None):
        """Run a cmdline client with the specified argument string,
-        passing the given input.  Returns a tuple with the output and
-        exit code"""
+        passing the given input.  Save the output and exit code."""
        # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
        os.environ['NILMDB_URL'] = "http://localhost:32180/"
        class stdio_wrapper:
@@ -73,23 +71,29 @@ class TestCmdline(object):
                ( sys.stdin, sys.stdout, sys.stderr ) = self.saved
        # Empty input if none provided
        if infile is None:
-            infile = UTF8StringIO("")
+            infile = io.TextIOWrapper(io.BytesIO(b""))
        # Capture stderr
-        errfile = UTF8StringIO()
+        errfile = io.TextIOWrapper(io.BytesIO())
        if outfile is None:
            # If no output file, capture stdout with stderr
            outfile = errfile
        with stdio_wrapper(infile, outfile, errfile) as s:
            try:
-                # shlex doesn't support Unicode very well.  Encode the
-                # string as UTF-8 explicitly before splitting.
-                args = shlex.split(arg_string.encode('utf-8'))
+                args = shlex.split(arg_string)
                nilmdb.cmdline.Cmdline(args).run()
                sys.exit(0)
            except SystemExit as e:
                exitcode = e.code
-        captured = outfile.getvalue()
-        self.captured = captured
+
+        # Capture raw binary output, and also try to decode a Unicode
+        # string copy.
+        self.captured_binary = outfile.buffer.getvalue()
+        try:
+            outfile.seek(0)
+            self.captured = outfile.read()
+        except UnicodeDecodeError:
+            self.captured = None
+
        self.exitcode = exitcode

    def ok(self, arg_string, infile = None):
@@ -128,16 +132,16 @@ class TestCmdline(object):
        with open(file) as f:
            contents = f.read()
            if contents != self.captured:
-                print "--- reference file (first 1000 bytes):\n"
-                print contents[0:1000] + "\n"
-                print "--- captured data (first 1000 bytes):\n"
-                print self.captured[0:1000] + "\n"
-                zipped = itertools.izip_longest(contents, self.captured)
+                print("--- reference file (first 1000 bytes):\n")
+                print(contents[0:1000] + "\n")
+                print("--- captured data (first 1000 bytes):\n")
+                print(self.captured[0:1000] + "\n")
+                zipped = itertools.zip_longest(contents, self.captured)
                for (n, (a, b)) in enumerate(zipped):
                    if a != b:
-                        print "--- first difference is at offset", n
-                        print "--- reference:", repr(a)
-                        print "---  captured:", repr(b)
+                        print("--- first difference is at offset", n)
+                        print("--- reference:", repr(a))
+                        print("---  captured:", repr(b))
                        break
                raise AssertionError("captured data doesn't match " + file)

@@ -160,6 +164,12 @@ class TestCmdline(object):
        self.ok("--help")
        self.contain("usage:")

+        # help
+        self.ok("--version")
+        ver = self.captured
+        self.ok("list --version")
+        eq_(self.captured, ver)
+
        # fail for no args
        self.fail("")

@@ -245,8 +255,10 @@ class TestCmdline(object):
        self.contain("Client version: " + nilmdb.__version__)
        self.contain("Server version: " + test_server.version)
        self.contain("Server database path")
-        self.contain("Server database size")
-        self.contain("Server database free space")
+        self.contain("Server disk space used by NilmDB")
+        self.contain("Server disk space used by other")
+        self.contain("Server disk space reserved")
+        self.contain("Server disk space free")

    def test_04_createlist(self):
        # Basic stream tests, like those in test_client.
@@ -283,6 +295,7 @@ class TestCmdline(object):
        self.ok("create /newton/zzz/rawnotch uint16_9")
        self.ok("create /newton/prep float32_8")
        self.ok("create /newton/raw uint16_6")
+        self.ok("create /newton/raw~decim-1234 uint16_6")

        # Create a stream that already exists
        self.fail("create /newton/raw uint16_6")
@@ -298,13 +311,23 @@ class TestCmdline(object):
        self.fail("create /newton/zzz float32_8")
        self.contain("subdirs of this path already exist")

-        # Verify we got those 3 streams and they're returned in
+        # Verify we got those 4 streams and they're returned in
        # alphabetical order.
        self.ok("list -l")
        self.match("/newton/prep float32_8\n"
                   "/newton/raw uint16_6\n"
+                   "/newton/raw~decim-1234 uint16_6\n"
                   "/newton/zzz/rawnotch uint16_9\n")

+        # No decimated streams if -n specified
+        self.ok("list -n -l")
+        self.match("/newton/prep float32_8\n"
+                   "/newton/raw uint16_6\n"
+                   "/newton/zzz/rawnotch uint16_9\n")
+
+        # Delete that decimated stream
+        self.ok("destroy /newton/raw~decim-1234")
+
        # Match just one type or one path.  Also check
        # that --path is optional
        self.ok("list --layout /newton/raw")
@@ -337,6 +360,12 @@ class TestCmdline(object):
        self.ok("metadata /newton/raw --update "
                "v_scale=1.234")

+        # unicode
+        self.ok("metadata /newton/raw --set "
+                "a_𝓴𝓮𝔂=value a_key=𝓿𝓪𝓵𝓾𝓮 a_𝗸𝗲𝘆=𝘃𝗮𝗹𝘂𝗲")
+        self.ok("metadata /newton/raw --get")
+        self.match("a_key=𝓿𝓪𝓵𝓾𝓮\na_𝓴𝓮𝔂=value\na_𝗸𝗲𝘆=𝘃𝗮𝗹𝘂𝗲\n")
+
        # various parsing tests
        self.ok("metadata /newton/raw --update foo=")
        self.fail("metadata /newton/raw --update =bar")
@@ -470,9 +499,20 @@ class TestCmdline(object):
        self.ok("insert -t -r 120 --start '03/23/2012 06:05:00' /newton/prep "
                "/dev/null")

+        # --quiet option
+        self.ok("insert --quiet -t -r 120 -s @0 /newton/prep /dev/null")
+        self.match("")
+
        # bad start time
        self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null")

+        # Test negative times
+        self.ok("insert --start @-10000000000 --end @1000000001 /newton/prep"
+                " tests/data/timestamped")
+        self.ok("extract -c /newton/prep --start min --end @1000000001")
+        self.match("8\n")
+        self.ok("remove /newton/prep --start min --end @1000000001")
+
    def test_07_detail_extended(self):
        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
@@ -561,6 +601,13 @@ class TestCmdline(object):
                  exitcode = 2, require_error = False)
        self.contain("no data")

+        # unannotated empty extract is just empty, with an exit code of 2
+        self.fail("extract /newton/prep " +
+                  "--start '23 Mar 2022 10:00:30' " +
+                  "--end '23 Mar 2022 10:00:31'",
+                  exitcode = 2, require_error = False)
+        self.match("")
+
        # but are ok if we're just counting results
        self.ok("extract --count /newton/prep " +
                "--start '23 Mar 2012 20:00:30' " +
@@ -571,6 +618,14 @@ class TestCmdline(object):
                "--end '23 Mar 2012 20:00:30.000002'")
        self.match("0\n")

+        # Extract needs --start and --end
+        self.fail("extract -a /newton/prep")
+        self.contain("arguments are required")
+        self.fail("extract -a /newton/prep --start 2000-01-01")
+        self.contain("arguments are required")
+        self.fail("extract -a /newton/prep --end 2000-01-01")
+        self.contain("arguments are required")
+
        # Check various dumps against stored copies of how they should appear
        def test(file, start, end, extra=""):
            self.ok("extract " + extra + " /newton/prep " +
@@ -601,6 +656,14 @@ class TestCmdline(object):
        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("43200\n")

+        # test binary mode
+        self.fail("extract -c -B /newton/prep -s min -e max")
+        self.contain("binary cannot be combined")
+        self.fail("extract -m -B /newton/prep -s min -e max")
+        self.contain("binary cannot be combined")
+        self.ok("extract -B /newton/prep -s min -e max")
+        eq_(len(self.captured_binary), 43200 * (8 + 8*4))
+
        # markup for 3 intervals, plus extra markup lines whenever we had
        # a "restart" from the nilmdb.stream_extract function
        self.ok("extract -m /newton/prep --start 2000-01-01 --end 2020-01-01")
@@ -719,7 +782,7 @@ class TestCmdline(object):
        self.ok("destroy --help")

        self.fail("destroy")
-        self.contain("too few arguments")
+        self.contain("the following arguments are required")

        self.fail("destroy /no/such/stream")
        self.contain("no stream matched path")
@@ -781,25 +844,28 @@ class TestCmdline(object):
        self.ok("destroy /newton/prep /newton/raw")
        self.ok("destroy /newton/zzz")

-        self.ok(u"create /düsseldorf/raw uint16_6")
+        self.ok("create /düsseldorf/raw uint16_6")
        self.ok("list -l --detail")
-        self.contain(u"/düsseldorf/raw uint16_6")
+        self.contain("/düsseldorf/raw uint16_6")
        self.contain("(no intervals)")

        # Unicode metadata
-        self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'")
-        self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
-        self.ok(u"metadata /düsseldorf/raw")
-        self.match(u"α=β ε τ α\nγ=δ\n")
+        self.ok("metadata /düsseldorf/raw --set α=beta 'γ=δ'")
+        self.ok("metadata /düsseldorf/raw --update 'α=β ε τ α'")
+        self.ok("metadata /düsseldorf/raw")
+        self.match("α=β ε τ α\nγ=δ\n")

-        self.ok(u"destroy /düsseldorf/raw")
+        self.ok("destroy /düsseldorf/raw")

    def test_13_files(self):
        # Test BulkData's ability to split into multiple files,
        # by forcing the file size to be really small.
+        # Also increase the initial nrows, so that start/end positions
+        # in the database are very large (> 32 bit)
        server_stop()
        server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
-                                       "files_per_dir" : 3 })
+                                       "files_per_dir" : 3,
+                                       "initial_nrows" : 2**40 })

        # Fill data
        self.ok("create /newton/prep float32_8")
@@ -847,14 +913,28 @@ class TestCmdline(object):
        self.ok("destroy -R /newton/prep") # destroy again

    def test_14_remove_files(self):
-        # Test BulkData's ability to remove when data is split into
-        # multiple files.  Should be a fairly comprehensive test of
-        # remove functionality.
-        # Also limit max_removals, to cover more functionality.
+        # Limit max_removals, to cover more functionality.
        server_stop()
        server_start(max_removals = 4321,
                     bulkdata_args = { "file_size" : 920, # 23 rows per file
-                                       "files_per_dir" : 3 })
+                                       "files_per_dir" : 3,
+                                       "initial_nrows" : 2**40 })
+        self.do_remove_files()
+        self.ok("destroy -R /newton/prep") # destroy again
+
+    def test_14b_remove_files_maxint(self):
+        # Limit max_int_removals, to cover more functionality.
+        server_stop()
+        server_start(max_int_removals = 1,
+                     bulkdata_args = { "file_size" : 920, # 23 rows per file
+                                       "files_per_dir" : 3,
+                                       "initial_nrows" : 2**40 })
+        self.do_remove_files()
+
+    def do_remove_files(self):
+        # Test BulkData's ability to remove when data is split into
+        # multiple files.  Should be a fairly comprehensive test of
+        # remove functionality.

        # Insert data.  Just for fun, insert out of order
        self.ok("create /newton/prep float32_8")
@@ -994,6 +1074,18 @@ class TestCmdline(object):
        self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")

+        # optimize
+        self.ok("insert -s 01-01-2002 -e 01-01-2004 /diff/1 /dev/null")
+        self.ok("intervals /diff/1")
+        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
+                   "> Thu, 01 Jan 2004 00:00:00.000000 +0000 ]\n"
+                   "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
+                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
+        self.ok("intervals /diff/1 --optimize")
+        self.ok("intervals /diff/1 -o")
+        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
+                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
+
        self.ok("destroy -R /diff/1")
        self.ok("destroy -R /diff/2")

@@ -1080,3 +1172,74 @@ class TestCmdline(object):

        server_stop()
        server_start()
+
+    def test_05b_completion(self):
+        # Test bash completion.  This depends on some data put in the DB by
+        # earlier tests, so the execution order is important.
+        def complete(line, expect="<unspecified>"):
+            # set env vars
+            env = {
+                '_ARGCOMPLETE': '1',
+                'COMP_LINE': line,
+                'COMP_POINT': str(len(line)),
+                'COMP_TYPE': '8',
+                'NILMDB_URL': "http://localhost:32180/",
+            }
+            for (k, v) in env.items():
+                os.environ[k] = v
+
+            # create pipe for completion output
+            output = io.BytesIO()
+
+            # ensure argcomplete won't mess with any FDs
+            def fake_fdopen(fd, mode):
+                return io.BytesIO()
+            old_fdopen = os.fdopen
+            os.fdopen = fake_fdopen
+
+            # run cli
+            cmdline = nilmdb.cmdline.Cmdline([])
+            cmdline.complete_output_stream = output
+            try:
+                cmdline.run()
+                sys.exit(0)
+            except SystemExit as e:
+                exitcode = e.code
+            eq_(exitcode, 0)
+
+            # clean up
+            os.fdopen = old_fdopen
+            for (k, v) in env.items():
+                del os.environ[k]
+
+            # read completion output
+            comp = output.getvalue()
+
+            # replace completion separators with commas, for clarity
+            cleaned = comp.replace(b'\x0b', b',').decode('utf-8')
+
+            # expect the given match or prefix
+            if expect.endswith('*'):
+                if not cleaned.startswith(expect[:-1]):
+                    raise AssertionError(("completions:\n  '%s'\n"
+                                          "don't start with:\n  '%s'") %
+                                         (cleaned, expect[:-1]))
+            else:
+                if cleaned != expect:
+                    raise AssertionError(("completions:\n  '%s'\n"
+                                          "don't match:\n  '%s'") %
+                                         (cleaned, expect))
+
+        complete("nilmtool -u ", "")
+        complete("nilmtool list ", "-h,--help,-E,--ext*")
+        complete("nilmtool list --st", "--start ")
+        complete("nilmtool list --start ", "")
+        complete("nilmtool list /", "/newton/prep,/newton/raw*")
+        complete("nilmtool create /foo int3", "int32_1,int32_2*")
+        complete("nilmtool metadata /newton/raw --get a",
+                 "a_𝓴𝓮𝔂,a_key,a_𝗸𝗲𝘆")
+        complete("nilmtool metadata /newton/raw --set a",
+                 "a_𝓴𝓮𝔂=value,a_key=𝓿𝓪𝓵𝓾𝓮,a_𝗸𝗲𝘆=𝘃𝗮𝗹𝘂𝗲")
+        complete("nilmtool metadata /newton/raw --set a_𝗸", "a_𝗸𝗲𝘆=𝘃𝗮𝗹𝘂𝗲 ")
+        complete("nilmtool metadata '' --set a", "")
+        self.run("list")
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -2,7 +2,7 @@

 import nilmdb
 from nilmdb.utils.printf import *
-from nilmdb.utils import datetime_tz
+import datetime_tz

 from nose.tools import *
 from nose.tools import assert_raises
@@ -59,6 +59,15 @@ class TestInterval:
        self.test_interval_intersect()
        Interval = NilmdbInterval

+        # Other helpers in nilmdb.utils.interval
+        i = [ UtilsInterval(1,2), UtilsInterval(2,3), UtilsInterval(4,5) ]
+        eq_(list(nilmdb.utils.interval.optimize(i)),
+            [ UtilsInterval(1,3), UtilsInterval(4,5) ])
+        eq_(list(nilmdb.utils.interval.optimize([])), [])
+        eq_(UtilsInterval(1234567890123456, 1234567890654321).human_string(),
+            "[ Fri, 13 Feb 2009 18:31:30.123456 -0500 -> " +
+            "Fri, 13 Feb 2009 18:31:30.654321 -0500 ]")
+
    def test_interval(self):
        # Test Interval class
        os.environ['TZ'] = "America/New_York"
@@ -85,8 +94,13 @@ class TestInterval:

        # compare
        assert(Interval(d1, d2) == Interval(d1, d2))
+        assert(Interval(d1, d2) <= Interval(d1, d2))
+        assert(Interval(d1, d2) >= Interval(d1, d2))
+        assert(Interval(d1, d2) != Interval(d1, d3))
        assert(Interval(d1, d2) < Interval(d1, d3))
+        assert(Interval(d1, d2) <= Interval(d1, d3))
        assert(Interval(d1, d3) > Interval(d1, d2))
+        assert(Interval(d1, d3) >= Interval(d1, d2))
        assert(Interval(d1, d2) < Interval(d2, d3))
        assert(Interval(d1, d3) < Interval(d2, d3))
        assert(Interval(d2, d2+1) > Interval(d1, d3))
@@ -226,13 +240,16 @@ class TestInterval:
            x = makeset("[--)") & 1234

        def do_test(a, b, c, d):
-            # a & b == c
+            # a & b == c (using nilmdb.server.interval)
            ab = IntervalSet()
            for x in b:
                for i in (a & x):
                    ab += i
            eq_(ab,c)

+            # a & b == c (using nilmdb.utils.interval)
+            eq_(IntervalSet(nilmdb.utils.interval.intersection(a,b)), c)
+
            # a \ b == d
            eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)

@@ -302,6 +319,22 @@ class TestInterval:
        eq_(nilmdb.utils.interval.set_difference(
            a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)

+        # Fill out test coverage for non-subsets
+        def diff2(a,b, subset):
+            return nilmdb.utils.interval._interval_math_helper(
+                a, b, (lambda a, b: b and not a), subset=subset)
+        with assert_raises(nilmdb.utils.interval.IntervalError):
+            list(diff2(a,b,True))
+        list(diff2(a,b,False))
+
+        # Fill out test coverage with a union operator (not implemented
+        # in interval.py, because nilmdb doesn't need it)
+        def union(a, b):
+            return nilmdb.utils.interval._interval_math_helper(
+                a, b, (lambda a, b: a or b), subset=False)
+        list(union(makeset("[---) "),
+                   makeset(" [---)")))
+
        # Empty second set
        eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)

@@ -357,13 +390,13 @@ class TestIntervalTree:
        # make a set of 100 intervals
        iset = IntervalSet()
        j = 100
-        for i in random.sample(xrange(j),j):
+        for i in random.sample(range(j),j):
            interval = Interval(i, i+1)
            iset += interval
        render(iset, "Random Insertion")

        # remove about half of them
-        for i in random.sample(xrange(j),j):
+        for i in random.sample(range(j),j):
            if random.randint(0,1):
                iset -= Interval(i, i+1)

@@ -375,7 +408,7 @@ class TestIntervalTree:
        # make a set of 100 intervals, inserted in order
        iset = IntervalSet()
        j = 100
-        for i in xrange(j):
+        for i in range(j):
            interval = Interval(i, i+1)
            iset += interval
        render(iset, "In-order insertion")
@@ -388,14 +421,14 @@ class TestIntervalSpeed:
        import random
        import math

-        print
+        print()
        yappi.start()
        speeds = {}
        limit = 22 # was 20
        for j in [ 2**x for x in range(5,limit) ]:
            start = time.time()
            iset = IntervalSet()
-            for i in random.sample(xrange(j),j):
+            for i in random.sample(range(j),j):
                interval = Interval(i, i+1)
                iset += interval
            speed = (time.time() - start) * 1000000.0
@@ -406,4 +439,6 @@ class TestIntervalSpeed:
                   speed / (j*math.log(j))) # should be constant
            speeds[j] = speed
        yappi.stop()
-        yappi.print_stats(sort_type=yappi.SORTTYPE_TTOT, limit=10)
+        stats = yappi.get_func_stats()
+        stats.sort("ttot")
+        stats.print_all()
--- a/tests/test_misc.py
+++ b/tests/test_misc.py
@@ -0,0 +1,139 @@
+from nose.tools import *
+from nose.tools import assert_raises
+from testutil.helpers import *
+
+import io
+import os
+import sys
+import time
+import socket
+import cherrypy
+
+import nilmdb.server
+from nilmdb.utils import timer, lock
+
+class TestMisc(object):
+    def test_timer(self):
+        capture = io.StringIO()
+        old = sys.stdout
+        sys.stdout = capture
+        with nilmdb.utils.Timer("test"):
+            time.sleep(0.01)
+        with nilmdb.utils.Timer("test syslog", tosyslog=True):
+            time.sleep(0.01)
+        sys.stdout = old
+        in_("test: ", capture.getvalue())
+
+    def test_lock(self):
+        with open("/dev/null") as f:
+            eq_(nilmdb.utils.lock.exclusive_lock(f), True)
+            nilmdb.utils.lock.exclusive_unlock(f)
+        # Test error conditions
+        class FakeFile():
+            def __init__(self, fileno):
+                self._fileno = fileno
+            def fileno(self):
+                return self._fileno
+        with assert_raises(TypeError):
+            nilmdb.utils.lock.exclusive_lock(FakeFile('none'))
+        with assert_raises(ValueError):
+            nilmdb.utils.lock.exclusive_lock(FakeFile(-1))
+        with assert_raises(IOError):
+            nilmdb.utils.lock.exclusive_lock(FakeFile(12345))
+
+        # Lock failure is tested in test_bulkdata
+
+    def test_replace_file(self):
+        fn = b"tests/misc-testdb/file"
+        try:
+            os.mkdir(os.path.dirname(fn))
+        except FileExistsError:
+            pass
+        with open(fn, "wb") as f:
+            f.write(b"hello, world")
+        nilmdb.utils.atomic.replace_file(fn, b"goodbye, world")
+        with open(fn, "rb") as f:
+            eq_(f.read(), b"goodbye, world")
+
+    def test_punch(self):
+        fn = b"tests/misc-testdb/punchit"
+        try:
+            os.mkdir(os.path.dirname(fn))
+        except FileExistsError:
+            pass
+        with open(fn, "wb") as f:
+            f.write(b"hello, world")
+        nilmdb.utils.fallocate.punch_hole(fn, 3, 5)
+        with open(fn, "rb") as f:
+            eq_(f.read(), b"hel\0\0\0\0\0orld")
+        with assert_raises(OSError):
+            nilmdb.utils.fallocate.punch_hole(fn, 1, -1, False)
+        with assert_raises(OSError):
+            nilmdb.utils.fallocate.punch_hole("/", 1, 1, False)
+        # no exception because we ignore errors by default
+        nilmdb.utils.fallocate.punch_hole(fn, 1, -1)
+
+    def test_diskusage(self):
+        hs = nilmdb.utils.diskusage.human_size
+        eq_(hs(0), "0 bytes")
+        eq_(hs(1), "1 byte")
+        eq_(hs(1023), "1023 bytes")
+
+        eq_(hs(1024), "1 kiB")
+
+        eq_(hs(1048575), "1024 kiB")
+        eq_(hs(1048576), "1.0 MiB")
+
+        eq_(hs(1073741823), "1024.0 MiB")
+        eq_(hs(1073741824), "1.00 GiB")
+
+        eq_(hs(1099511627775), "1024.00 GiB")
+        eq_(hs(1099511627776), "1.00 TiB")
+
+        eq_(hs(1099511627776 * 5000.1234), "5000.12 TiB")
+
+        nilmdb.utils.diskusage.du("/dev")
+        with assert_raises(OSError):
+            nilmdb.utils.diskusage.du("/dev/null/bogus")
+        nilmdb.utils.diskusage.du("super-bogus-does-not-exist")
+
+    def test_cors_allow(self):
+        # Just to get some test coverage; these code paths aren't actually
+        # used in current code
+        cpy = nilmdb.server.serverutil.cherrypy
+        (req, resp) = (cpy.request, cpy.response)
+        cpy.request.method = "DELETE"
+        with assert_raises(cpy.HTTPError):
+            nilmdb.server.serverutil.CORS_allow(methods="POST")
+        with assert_raises(cpy.HTTPError):
+            nilmdb.server.serverutil.CORS_allow(methods=["POST"])
+        with assert_raises(cpy.HTTPError):
+            nilmdb.server.serverutil.CORS_allow(methods=["GET"])
+        with assert_raises(cpy.HTTPError):
+            nilmdb.server.serverutil.CORS_allow(methods=[])
+        (cpy.request, cpy.response) = (req, resp)
+
+    def test_cherrypy_failure(self):
+        # Test failure of cherrypy to start up because the port is
+        # already in use.  This also tests the functionality of
+        # serverutil:cherrypy_patch_exit()
+        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+        try:
+            sock.bind(("127.0.0.1", 32180))
+            sock.listen(1)
+        except OSError:
+            raise AssertionError("port 32180 must be free for tests")
+
+        nilmdb.server.serverutil.cherrypy_patch_exit()
+        cherrypy.config.update({
+            'environment': 'embedded',
+            'server.socket_host': '127.0.0.1',
+            'server.socket_port': 32180,
+            'engine.autoreload.on': False,
+            })
+        with assert_raises(Exception) as e:
+            cherrypy.engine.start()
+        in_("Address already in use", str(e.exception))
+
+        sock.close()
--- a/tests/test_mustclose.py
+++ b/tests/test_mustclose.py
@@ -8,12 +8,12 @@ from nose.tools import assert_raises
 from testutil.helpers import *

 import sys
-import cStringIO
+import io
 import gc

 import inspect

-err = cStringIO.StringIO()
+err = io.StringIO()

@nilmdb.utils.must_close(errorfile = err)
 class Foo:
@@ -71,6 +71,7 @@ class TestMustClose(object):

        # No error
        err.truncate(0)
+        err.seek(0)
        y = Foo("bye")
        y.close()
        del y
@@ -82,6 +83,7 @@ class TestMustClose(object):

        # Verify function calls when wrap_verify is True
        err.truncate(0)
+        err.seek(0)
        z = Bar()
        eq_(inspect.getargspec(z.blah),
            inspect.ArgSpec(args = ['self', 'arg'],
@@ -90,7 +92,7 @@ class TestMustClose(object):
        z.close()
        with assert_raises(AssertionError) as e:
            z.blah("hello")
-        in_("called <function blah at 0x", str(e.exception))
+        in_("called <function Bar.blah at 0x", str(e.exception))
        in_("> after close", str(e.exception))
        # Since the most recent assertion references 'z',
        # we need to raise another assertion here so that
@@ -107,8 +109,13 @@ class TestMustClose(object):

        # Class with missing methods
        err.truncate(0)
+        err.seek(0)
        w = Baz()
        w.close()
        del w
        eq_(err.getvalue(), "")

+        # Test errors during __del__, by closing stderr so the fprintf fails
+        r = Foo("hi")
+        err.close()
+        del r
--- a/tests/test_nilmdb.py
+++ b/tests/test_nilmdb.py
@@ -3,18 +3,23 @@ import nilmdb.server
 from nose.tools import *
 from nose.tools import assert_raises
 import distutils.version
-import simplejson as json
+import json
 import itertools
 import os
 import sys
 import threading
-import urllib2
-from urllib2 import urlopen, HTTPError
-import cStringIO
+import urllib.request, urllib.error, urllib.parse
+from urllib.request import urlopen
+from urllib.error import HTTPError
+import io
 import time
 import requests
+import socket
+import sqlite3
+import cherrypy

 from nilmdb.utils import serializer_proxy
+from nilmdb.server.interval import Interval

 testdb = "tests/testdb"

@@ -24,6 +29,16 @@ testdb = "tests/testdb"

 from testutil.helpers import *

+def setup_module():
+    # Make sure port is free
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+    try:
+        sock.bind(("127.0.0.1", 32180))
+    except OSError:
+        raise AssertionError("port 32180 must be free for tests")
+    sock.close()
+
 class Test00Nilmdb(object):  # named 00 so it runs first
    def test_NilmDB(self):
        recursive_unlink(testdb)
@@ -32,15 +47,54 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        db.close()
        db = nilmdb.server.NilmDB(testdb)
        db.close()
+        db.close()

-        # test timer, just to get coverage
-        capture = cStringIO.StringIO()
-        old = sys.stdout
-        sys.stdout = capture
-        with nilmdb.utils.Timer("test"):
-            time.sleep(0.01)
-        sys.stdout = old
-        in_("test: ", capture.getvalue())
+    def test_error_cases(self):
+        # Test some misc error cases to get better code coverage
+
+        with assert_raises(OSError) as e:
+            nilmdb.server.NilmDB("/dev/null/bogus")
+        in_("can't create tree", str(e.exception))
+
+        # Version upgrades
+        con = sqlite3.connect(os.path.join(testdb, "data.sql"))
+        con.execute("PRAGMA user_version = 2");
+        con.close()
+        with assert_raises(Exception) as e:
+            db = nilmdb.server.NilmDB(testdb)
+        in_("can't use database version 2", str(e.exception))
+
+        con = sqlite3.connect(os.path.join(testdb, "data.sql"))
+        con.execute("PRAGMA user_version = -1234");
+        con.close()
+        with assert_raises(Exception) as e:
+            db = nilmdb.server.NilmDB(testdb)
+        in_("unknown database version -1234", str(e.exception))
+
+        recursive_unlink(testdb)
+
+        nilmdb.server.NilmDB.verbose = 1
+        (old, sys.stdout) = (sys.stdout, io.StringIO())
+        db = nilmdb.server.NilmDB(testdb)
+        (output, sys.stdout) = (sys.stdout.getvalue(), old)
+        nilmdb.server.NilmDB.verbose = 0
+        db.close()
+        in_("Database schema updated to 1", output)
+
+        # Corrupted database (bad ranges)
+        recursive_unlink(testdb)
+        db = nilmdb.server.NilmDB(testdb)
+        db.con.executescript("""
+        INSERT INTO streams VALUES (1, "/test", "int32_1");
+        INSERT INTO ranges VALUES (1, 100, 200, 100, 200);
+        INSERT INTO ranges VALUES (1, 150, 250, 150, 250);
+        """)
+        db.close()
+        db = nilmdb.server.NilmDB(testdb)
+        with assert_raises(nilmdb.server.NilmDBError):
+            db.stream_intervals("/test")
+        db.close()
+        recursive_unlink(testdb)

    def test_stream(self):
        db = nilmdb.server.NilmDB(testdb)
@@ -67,15 +121,6 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        eq_(db.stream_list(layout="uint16_6"), [ ["/newton/raw", "uint16_6"] ])
        eq_(db.stream_list(path="/newton/raw"), [ ["/newton/raw", "uint16_6"] ])

-        # Verify that columns were made right (pytables specific)
-        if "h5file" in db.data.__dict__:
-            h5file = db.data.h5file
-            eq_(len(h5file.getNode("/newton/prep").cols), 9)
-            eq_(len(h5file.getNode("/newton/raw").cols), 7)
-            eq_(len(h5file.getNode("/newton/zzz/rawnotch").cols), 10)
-            assert(not h5file.getNode("/newton/prep").colindexed["timestamp"])
-            assert(not h5file.getNode("/newton/prep").colindexed["c1"])
-
        # Set / get metadata
        eq_(db.stream_get_metadata("/newton/prep"), {})
        eq_(db.stream_get_metadata("/newton/raw"), {})
@@ -119,6 +164,7 @@ class TestBlockingServer(object):
        self.server = nilmdb.server.Server(self.db, host = "127.0.0.1",
                                           port = 32180, stoppable = True)

+        def start_server():
            # Run it
            event = threading.Event()
            def run_server():
@@ -127,16 +173,29 @@ class TestBlockingServer(object):
            thread.start()
            if not event.wait(timeout = 10):
                raise AssertionError("server didn't start in 10 seconds")
+            return thread

-        # Send request to exit.
+        # Start server and request for it to exit
+        thread = start_server()
        req = urlopen("http://127.0.0.1:32180/exit/", timeout = 1)
-
-        # Wait for it
        thread.join()

+        # Mock some signals that should kill the server
+        def try_signal(sig):
+            old = cherrypy.engine.wait
+            def raise_sig(*args, **kwargs):
+                raise sig()
+            cherrypy.engine.wait = raise_sig
+            thread = start_server()
+            thread.join()
+            cherrypy.engine.wait = old
+        try_signal(SystemExit)
+        try_signal(KeyboardInterrupt)
+
 def geturl(path):
-    req = urlopen("http://127.0.0.1:32180" + path, timeout = 10)
-    return req.read()
+    resp = urlopen("http://127.0.0.1:32180" + path, timeout = 10)
+    body = resp.read()
+    return body.decode(resp.headers.get_content_charset() or 'utf-8')

 def getjson(path):
    return json.loads(geturl(path))
@@ -157,11 +216,14 @@ class TestServer(object):

    def test_server(self):
        # Make sure we can't force an exit, and test other 404 errors
-        for url in [ "/exit", "/", "/favicon.ico" ]:
+        for url in [ "/exit", "/favicon.ico" ]:
            with assert_raises(HTTPError) as e:
                geturl(url)
            eq_(e.exception.code, 404)

+        # Root page
+        in_("This is NilmDB", geturl("/"))
+
        # Check version
        eq_(distutils.version.LooseVersion(getjson("/version")),
            distutils.version.LooseVersion(nilmdb.__version__))
--- a/tests/test_numpyclient.py
+++ b/tests/test_numpyclient.py
@@ -7,7 +7,7 @@ import nilmdb.client.numpyclient
 from nilmdb.utils.printf import *
 from nilmdb.utils import timestamper
 from nilmdb.client import ClientError, ServerError
-from nilmdb.utils import datetime_tz
+import datetime_tz

 from nose.plugins.skip import SkipTest
 from nose.tools import *
@@ -28,7 +28,10 @@ def setup_module():
    recursive_unlink(testdb)

    # Start web app on a custom port
-    test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(testdb)
+    test_db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(
+        testdb, bulkdata_args = { "file_size" : 16384,
+                                  "files_per_dir" : 3 } )
+
    test_server = nilmdb.server.Server(test_db, host = "127.0.0.1",
                                       port = 32180, stoppable = False,
                                       fast_shutdown = True,
@@ -102,7 +105,8 @@ class TestNumpyClient(object):
        # Compare.  Will be close but not exact because the conversion
        # to and from ASCII was lossy.
        data = timestamper.TimestamperRate(testfile, start, rate)
-        actual = np.fromstring(" ".join(data), sep=' ').reshape(14400, 9)
+        data_str = b" ".join(data).decode('utf-8', errors='backslashreplace')
+        actual = np.fromstring(data_str, sep=' ').reshape(14400, 9)
        assert(np.allclose(array, actual))

        client.close()
@@ -146,12 +150,14 @@ class TestNumpyClient(object):
            client.stream_extract_numpy(
                "/newton/prep", structured = False, maxrows = 1000))

-        # Structured, and specifying layout
+        # Structured, and specifying layout.
+        # This also tests the final branch in stream_extract_numpy by specifing
+        # a value of maxrows that exactly matches how much data we had inserted.
        client.stream_create("/test/3", "float32_8")
        client.stream_insert_numpy(
            path = "/test/3", layout = "float32_8",
            data = client.stream_extract_numpy(
-                "/newton/prep", structured = True, maxrows = 1000))
+                "/newton/prep", structured = True, maxrows = 14400))

        # Structured, specifying wrong layout
        client.stream_create("/test/4", "float32_8")
@@ -173,12 +179,23 @@ class TestNumpyClient(object):
        in_("error parsing input data", str(e.exception))

        # Make sure the /newton/prep copies are identical
-        a = np.vstack(client.stream_extract_numpy("/newton/prep"))
-        b = np.vstack(client.stream_extract_numpy("/test/2"))
-        c = np.vstack(client.stream_extract_numpy("/test/3"))
+        a = np.vstack(list(client.stream_extract_numpy("/newton/prep")))
+        b = np.vstack(list(client.stream_extract_numpy("/test/2")))
+        c = np.vstack(list(client.stream_extract_numpy("/test/3")))
        assert(np.array_equal(a,b))
        assert(np.array_equal(a,c))

+        # Make sure none of the files are greater than 16384 bytes as
+        # we configured with the bulkdata_args above.
+        datapath = os.path.join(testdb, "data")
+        for (dirpath, dirnames, filenames) in os.walk(datapath):
+            for f in filenames:
+                fn = os.path.join(dirpath, f)
+                size = os.path.getsize(fn)
+                if size > 16384:
+                    raise AssertionError(sprintf("%s is too big: %d > %d\n",
+                                                 fn, size, 16384))
+
        nilmdb.client.numpyclient.StreamInserterNumpy._max_data = old_max_data
        client.close()

@@ -279,7 +296,7 @@ class TestNumpyClient(object):

        # Try also creating a completely empty interval from scratch,
        # in a few different ways.
-        client.stream_insert("/empty/test", "", 300, 350)
+        client.stream_insert("/empty/test", b"", 300, 350)
        client.stream_insert("/empty/test", [], 400, 450)
        with client.stream_insert_numpy_context("/empty/test", 500, 550):
            pass
@@ -295,8 +312,25 @@ class TestNumpyClient(object):
        with client.stream_insert_numpy_context("/empty/test", end = 950):
            pass

+        # Equal start and end is OK as long as there's no data
+        with assert_raises(ClientError) as e:
+            with client.stream_insert_numpy_context("/empty/test",
+                                                    start=9, end=9) as ctx:
+                ctx.insert([[9, 9]])
+                ctx.finalize()
+        in_("have data to send, but invalid start/end times", str(e.exception))
+
+        with client.stream_insert_numpy_context("/empty/test",
+                                                start=9, end=9) as ctx:
+            pass
+
+        # reusing a context object is bad
+        with assert_raises(Exception) as e:
+            ctx.insert([[9, 9]])
+
        # Try various things that might cause problems
-        with client.stream_insert_numpy_context("/empty/test", 1000, 1050):
+        with client.stream_insert_numpy_context("/empty/test",
+                                                1000, 1050) as ctx:
            ctx.finalize() # inserts [1000, 1050]
            ctx.finalize() # nothing
            ctx.finalize() # nothing
--- a/tests/test_printf.py
+++ b/tests/test_printf.py
@@ -3,7 +3,7 @@ from nilmdb.utils.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
-from cStringIO import StringIO
+from io import StringIO
 import sys

 from testutil.helpers import *
--- a/tests/test_rbtree.py
+++ b/tests/test_rbtree.py
@@ -36,12 +36,12 @@ class TestRBTree:
        # make a set of 100 intervals, inserted in order
        rb = RBTree()
        j = 100
-        for i in xrange(j):
+        for i in range(j):
            rb.insert(RBNode(i, i+1))
        render(rb, "in-order insert")

        # remove about half of them
-        for i in random.sample(xrange(j),j):
+        for i in random.sample(range(j),j):
            if random.randint(0,1):
                rb.delete(rb.find(i, i+1))
        render(rb, "in-order insert, random delete")
@@ -49,18 +49,18 @@ class TestRBTree:
        # make a set of 100 intervals, inserted at random
        rb = RBTree()
        j = 100
-        for i in random.sample(xrange(j),j):
+        for i in random.sample(range(j),j):
            rb.insert(RBNode(i, i+1))
        render(rb, "random insert")

        # remove about half of them
-        for i in random.sample(xrange(j),j):
+        for i in random.sample(range(j),j):
            if random.randint(0,1):
                rb.delete(rb.find(i, i+1))
        render(rb, "random insert, random delete")

        # in-order insert of 50 more
-        for i in xrange(50):
+        for i in range(50):
            rb.insert(RBNode(i+500, i+501))
        render(rb, "random insert, random delete, in-order insert")

--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -6,6 +6,7 @@ from nose.tools import *
 from nose.tools import assert_raises
 import threading
 import time
+import nilmdb.server

 from testutil.helpers import *

@@ -28,6 +29,9 @@ class Foo(object):
    def t(self):
        pass

+    def reent(self, func):
+        func()
+
    def tester(self, debug = False):
        # purposely not thread-safe
        self.test_thread = threading.current_thread().name
@@ -50,7 +54,7 @@ class Base(object):
        def func(foo):
            foo.test()
        threads = []
-        for i in xrange(20):
+        for i in range(20):
            threads.append(threading.Thread(target = func, args = (self.foo,)))
        for t in threads:
            t.start()
@@ -62,6 +66,28 @@ class Base(object):
        eq_(self.foo.val, 20)
        eq_(self.foo.init_thread, self.foo.test_thread)

+class ListLike(object):
+    def __init__(self):
+        self.thread = threading.current_thread().name
+        self.foo = 0
+
+    def __iter__(self):
+        eq_(threading.current_thread().name, self.thread)
+        self.foo = 0
+        return self
+
+    def __getitem__(self, key):
+        eq_(threading.current_thread().name, self.thread)
+        return key
+
+    def __next__(self):
+        eq_(threading.current_thread().name, self.thread)
+        if self.foo < 5:
+            self.foo += 1
+            return self.foo
+        else:
+            raise StopIteration
+
 class TestUnserialized(Base):
    def setUp(self):
        self.foo = Foo()
@@ -84,3 +110,23 @@ class TestSerializer(Base):
        sp(sp(Foo("x"))).t()
        sp(sp(Foo)("x")).t()
        sp(sp(Foo))("x").t()
+
+    def test_iter(self):
+        sp = nilmdb.utils.serializer_proxy
+        i = sp(ListLike)()
+        eq_(list(i), [1,2,3,4,5])
+        eq_(i[3], 3)
+
+    def test_del(self):
+        sp = nilmdb.utils.serializer_proxy
+        foo = sp(Foo("x"))
+
+        # trigger exception in __del__, which should be ignored
+        foo._SerializerObjectProxy__call_queue = None
+        del foo
+
+    def test_rocket(self):
+        # Serializer works on a C module?
+        sp = nilmdb.utils.serializer_proxy
+        rkt = sp(nilmdb.server.rocket.Rocket("int32_8", None))
+        eq_(rkt.binary_size, 40)
--- a/tests/test_threadsafety.py
+++ b/tests/test_threadsafety.py
@@ -76,14 +76,14 @@ class TestThreadSafety(object):
    def test(self):
        proxy = nilmdb.utils.threadsafety.verify_proxy
        self.tryit(Test(), True, True)
-        self.tryit(proxy(Test(), True, True, True), False, False)
-        self.tryit(proxy(Test(), True, True, False), False, True)
-        self.tryit(proxy(Test(), True, False, True), True, False)
-        self.tryit(proxy(Test(), True, False, False), True, True)
-        self.tryit(proxy(Test, True, True, True)(), False, False)
-        self.tryit(proxy(Test, True, True, False)(), False, True)
-        self.tryit(proxy(Test, True, False, True)(), True, False)
-        self.tryit(proxy(Test, True, False, False)(), True, True)
+        self.tryit(proxy(Test(), True, True), False, False)
+        self.tryit(proxy(Test(), True, False), False, True)
+        self.tryit(proxy(Test(), False, True), True, False)
+        self.tryit(proxy(Test(), False, False), True, True)
+        self.tryit(proxy(Test, True, True)(), False, False)
+        self.tryit(proxy(Test, True, False)(), False, True)
+        self.tryit(proxy(Test, False, True)(), True, False)
+        self.tryit(proxy(Test, False, False)(), True, True)

        proxy(proxy(proxy(Test))()).foo()

--- a/tests/test_timestamper.py
+++ b/tests/test_timestamper.py
@@ -1,11 +1,12 @@
 import nilmdb
 from nilmdb.utils.printf import *
+import datetime_tz

 from nose.tools import *
 from nose.tools import assert_raises
 import os
 import sys
-import cStringIO
+import io

 from testutil.helpers import *

@@ -17,60 +18,62 @@ class TestTimestamper(object):

    def test_timestamper(self):
        def join(list):
-            return "\n".join(list) + "\n"
+            return b"\n".join(list) + b"\n"
+
+        datetime_tz.localtz_set("America/New_York")

        start = nilmdb.utils.time.parse_time("03/24/2012")
-        lines_in  = [ "hello", "world", "hello world", "# commented out" ]
-        lines_out = [ "1332561600000000 hello",
-                      "1332561600000125 world",
-                      "1332561600000250 hello world" ]
+        lines_in  = [ b"hello", b"world", b"hello world", b"# commented out" ]
+        lines_out = [ b"1332561600000000 hello",
+                      b"1332561600000125 world",
+                      b"1332561600000250 hello world" ]

        # full
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000)
        foo = ts.readlines()
        eq_(foo, join(lines_out))
        in_("TimestamperRate(..., start=", str(ts))

        # first 30 or so bytes means the first 2 lines
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000)
        foo = ts.readlines(30)
        eq_(foo, join(lines_out[0:2]))

        # stop iteration early
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000,
                                                1332561600000200)
-        foo = ""
+        foo = b""
        for line in ts:
            foo += line
        eq_(foo, join(lines_out[0:2]))

        # stop iteration early (readlines)
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000,
                                                1332561600000200)
        foo = ts.readlines()
        eq_(foo, join(lines_out[0:2]))

        # stop iteration really early
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000,
                                                1332561600000000)
        foo = ts.readlines()
-        eq_(foo, "")
+        eq_(foo, b"")

        # use iterator
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperRate(input, start, 8000)
-        foo = ""
+        foo = b""
        for line in ts:
            foo += line
        eq_(foo, join(lines_out))

        # check that TimestamperNow gives similar result
-        input = cStringIO.StringIO(join(lines_in))
+        input = io.BytesIO(join(lines_in))
        ts = timestamper.TimestamperNow(input)
        foo = ts.readlines()
        ne_(foo, join(lines_out))
--- a/tests/test_wsgi.py
+++ b/tests/test_wsgi.py
@@ -0,0 +1,36 @@
+from nose.tools import *
+from nose.tools import assert_raises
+from testutil.helpers import *
+
+import io
+import os
+import sys
+import time
+
+import nilmdb.server
+import webtest
+
+testdb = "tests/testdb"
+
+# Test WSGI interface
+
+class TestWSGI(object):
+    def test_wsgi(self):
+
+        # Bad database gives debug info
+        app = webtest.TestApp(nilmdb.server.wsgi_application("/dev/null", "/"))
+        resp = app.get('/', expect_errors=True)
+        eq_(resp.status_int, 500)
+        eq_(resp.content_type, "text/plain")
+        body = resp.body.decode('utf-8')
+        in_("Initializing database at path '/dev/null' failed", body)
+        in_("Not a directory: b'/dev/null/data'", body)
+        in_("Running as: uid=", body)
+        in_("Environment:", body)
+
+        # Good database works fine
+        app = webtest.TestApp(nilmdb.server.wsgi_application(testdb, "/nilmdb"))
+        resp = app.get('/version', expect_errors=True)
+        eq_(resp.status_int, 404)
+        resp = app.get('/nilmdb/version')
+        eq_(resp.json, nilmdb.__version__)
--- a/tests/testutil/helpers.py
+++ b/tests/testutil/helpers.py
@@ -3,7 +3,7 @@
 import shutil, os

 def myrepr(x):
-    if isinstance(x, basestring):
+    if isinstance(x, str):
        return '"' + x + '"'
    else:
        return repr(x)
--- a/versioneer.py
+++ b/versioneer.py