Add nilmdb-fsck tool to check database consistency

Documentation fixups
Support "nilmtool cmd --version"
2013-08-03 14:23:14 -04:00 · 2013-08-01 16:24:51 -04:00 · 2013-08-01 15:14:34 -04:00 · 2013-07-31 13:37:04 -04:00 · 2013-07-30 15:31:51 -04:00 · 2013-07-30 15:21:09 -04:00
30 changed files with 867 additions and 300 deletions
--- a/.coveragerc
+++ b/.coveragerc
@@ -7,4 +7,4 @@
 exclude_lines =
 	pragma: no cover
 	if 0:
-omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py
+omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck
--- a/4
+++ b/4
@@ -23,6 +23,10 @@ docs:
 lint:
 	pylint --rcfile=.pylintrc nilmdb

+fscktest:
+#	python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/wsgi/db').check()"
+	python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/mnt/bucket/mnt/sharon/data/db').check()"
+
 test:
 ifeq ($(INSIDE_EMACS), t)
 # Use the slightly more flexible script
--- a/docs/wsgi.md
+++ b/docs/wsgi.md
@@ -19,12 +19,12 @@ Then, set up Apache with a configuration like:

    <VirtualHost>
        WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi
-        WSGIApplicationGroup nilmdb-appgroup
-        WSGIProcessGroup nilmdb-procgroup
        WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm
-
-        # Access control example:
        <Location /nilmdb>
+            WSGIProcessGroup nilmdb-procgroup
+            WSGIApplicationGroup nilmdb-appgroup
+
+            # Access control example:
            Order deny,allow
            Deny from all
            Allow from 1.2.3.4
--- a/nilmdb/client/client.py
+++ b/nilmdb/client/client.py
@@ -58,6 +58,11 @@ class Client(object):
        return self.http.get("dbinfo")

    def stream_list(self, path = None, layout = None, extended = False):
+        """Return a sorted list of [path, layout] lists.  If 'path' or
+        'layout' are specified, only return streams that match those
+        exact values.  If 'extended' is True, the returned lists have
+        extended info, e.g.: [path, layout, extent_min, extent_max,
+        total_rows, total_seconds."""
        params = {}
        if path is not None:
            params["path"] = path
@@ -69,6 +74,7 @@ class Client(object):
        return nilmdb.utils.sort.sort_human(streams, key = lambda s: s[0])

    def stream_get_metadata(self, path, keys = None):
+        """Get stream metadata"""
        params = { "path": path }
        if keys is not None:
            params["key"] = keys
--- a/nilmdb/client/httpclient.py
+++ b/nilmdb/client/httpclient.py
@@ -123,14 +123,36 @@ class HTTPClient(object):
        """
        (response, isjson) = self._do_req(method, url, query, body,
                                          stream = True, headers = headers)
+
+        # Like the iter_lines function in Requests, but only splits on
+        # the specified line ending.
+        def lines(source, ending):
+            pending = None
+            for chunk in source:
+                if pending is not None:
+                    chunk = pending + chunk
+                tmp = chunk.split(ending)
+                lines = tmp[:-1]
+                if chunk.endswith(ending):
+                    pending = None
+                else:
+                    pending = tmp[-1]
+                for line in lines:
+                    yield line
+            if pending is not None: # pragma: no cover (missing newline)
+                yield pending
+
+        # Yield the chunks or lines as requested
        if binary:
            for chunk in response.iter_content(chunk_size = 65536):
                yield chunk
        elif isjson:
-            for line in response.iter_lines():
+            for line in lines(response.iter_content(chunk_size = 1),
+                              ending = '\r\n'):
                yield json.loads(line)
        else:
-            for line in response.iter_lines():
+            for line in lines(response.iter_content(chunk_size = 65536),
+                              ending = '\n'):
                yield line

    def get_gen(self, url, params = None, binary = False):
--- a/nilmdb/cmdline/cmdline.py
+++ b/nilmdb/cmdline/cmdline.py
@@ -29,6 +29,14 @@ for cmd in subcommands:
    subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist = [ cmd ])

 class JimArgumentParser(argparse.ArgumentParser):
+    def parse_args(self, args=None, namespace=None):
+        # Look for --version anywhere and change it to just "nilmtool
+        # --version".  This makes "nilmtool cmd --version" work, which
+        # is needed by help2man.
+        if "--version" in (args or sys.argv[1:]):
+            args = [ "--version" ]
+        return argparse.ArgumentParser.parse_args(self, args, namespace)
+
    def error(self, message):
        self.print_usage(sys.stderr)
        self.exit(2, sprintf("error: %s\n", message))
@@ -72,10 +80,16 @@ class Complete(object): # pragma: no cover
        path = parsed_args.path
        if not path:
            return []
-        return ( self.escape(k + '=' + v)
-                 for (k,v) in client.stream_get_metadata(path).iteritems()
-                 if k.startswith(prefix) )
-
+        results = []
+        # prefix comes in as UTF-8, but results need to be Unicode,
+        # weird.  Still doesn't work in all cases, but that's bugs in
+        # argcomplete.
+        prefix = nilmdb.utils.unicode.decode(prefix)
+        for (k,v) in client.stream_get_metadata(path).iteritems():
+            kv = self.escape(k + '=' + v)
+            if kv.startswith(prefix):
+                results.append(kv)
+        return results

 class Cmdline(object):

--- a/nilmdb/cmdline/info.py
+++ b/nilmdb/cmdline/info.py
@@ -21,5 +21,8 @@ def cmd_info(self):
    printf("Server URL: %s\n", self.client.geturl())
    dbinfo = self.client.dbinfo()
    printf("Server database path: %s\n", dbinfo["path"])
-    printf("Server database size: %s\n", human_size(dbinfo["size"]))
-    printf("Server database free space: %s\n", human_size(dbinfo["free"]))
+    for (desc, field) in [("used by NilmDB", "size"),
+                          ("used by other", "other"),
+                          ("reserved", "reserved"),
+                          ("free", "free")]:
+        printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field]))
--- a/nilmdb/cmdline/intervals.py
+++ b/nilmdb/cmdline/intervals.py
@@ -1,5 +1,6 @@
 from nilmdb.utils.printf import *
 import nilmdb.utils.time
+from nilmdb.utils.interval import Interval

 import fnmatch
 import argparse
@@ -42,6 +43,8 @@ def setup(self, sub):
    group = cmd.add_argument_group("Misc options")
    group.add_argument("-T", "--timestamp-raw", action="store_true",
                       help="Show raw timestamps when printing times")
+    group.add_argument("-o", "--optimize", action="store_true",
+                       help="Optimize (merge adjacent) intervals")

    return cmd

@@ -58,9 +61,16 @@ def cmd_intervals(self):
        time_string = nilmdb.utils.time.timestamp_to_human

    try:
-           for (start, end) in self.client.stream_intervals(
-               self.args.path, self.args.start, self.args.end, self.args.diff):
-               printf("[ %s -> %s ]\n", time_string(start), time_string(end))
+        intervals = ( Interval(start, end) for (start, end) in
+                      self.client.stream_intervals(self.args.path,
+                                                   self.args.start,
+                                                   self.args.end,
+                                                   self.args.diff) )
+        if self.args.optimize:
+            intervals = nilmdb.utils.interval.optimize(intervals)
+        for i in intervals:
+            printf("[ %s -> %s ]\n", time_string(i.start), time_string(i.end))
+
    except nilmdb.client.ClientError as e:
        self.die("error listing intervals: %s", str(e))

--- a/nilmdb/cmdline/metadata.py
+++ b/nilmdb/cmdline/metadata.py
@@ -41,10 +41,10 @@ def cmd_metadata(self):
    if self.args.set is not None or self.args.update is not None:
        # Either set, or update
        if self.args.set is not None:
-            keyvals = self.args.set
+            keyvals = map(nilmdb.utils.unicode.decode, self.args.set)
            handler = self.client.stream_set_metadata
        else:
-            keyvals = self.args.update
+            keyvals = map(nilmdb.utils.unicode.decode, self.args.update)
            handler = self.client.stream_update_metadata

        # Extract key=value pairs
@@ -62,7 +62,9 @@ def cmd_metadata(self):
            self.die("error setting/updating metadata: %s", str(e))
    elif self.args.delete is not None:
        # Delete (by setting values to empty strings)
-        keys = self.args.delete or None
+        keys = None
+        if self.args.delete:
+            keys = map(nilmdb.utils.unicode.decode, self.args.delete)
        try:
            data = self.client.stream_get_metadata(self.args.path, keys)
            for key in data:
@@ -72,7 +74,9 @@ def cmd_metadata(self):
            self.die("error deleting metadata: %s", str(e))
    else:
        # Get (or unspecified)
-        keys = self.args.get or None
+        keys = None
+        if self.args.get:
+            keys = map(nilmdb.utils.unicode.decode, self.args.get)
        try:
            data = self.client.stream_get_metadata(self.args.path, keys)
        except nilmdb.client.ClientError as e:
@@ -81,4 +85,6 @@ def cmd_metadata(self):
            # Print nonexistant keys as having empty value
            if value is None:
                value = ""
-            printf("%s=%s\n", key, value)
+            printf("%s=%s\n",
+                   nilmdb.utils.unicode.encode(key),
+                   nilmdb.utils.unicode.encode(value))
--- a/nilmdb/fsck/.#fsck.py
+++ b/nilmdb/fsck/.#fsck.py
@@ -0,0 +1 @@
+jim@pilot.lees.18066:1373305995
--- a/nilmdb/fsck/init.py
+++ b/nilmdb/fsck/init.py
@@ -0,0 +1,5 @@
+"""nilmdb.fsck"""
+
+from __future__ import absolute_import
+
+from nilmdb.fsck.fsck import Fsck
--- a/nilmdb/fsck/fsck.py
+++ b/nilmdb/fsck/fsck.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+
+"""Check database consistency"""
+
+import nilmdb.utils
+import nilmdb.server
+from nilmdb.utils.interval import IntervalError
+from nilmdb.server.interval import Interval, IntervalSet
+from nilmdb.utils.printf import *
+from nilmdb.utils.time import timestamp_to_string
+
+from collections import defaultdict
+import sqlite3
+import os
+import progressbar
+import time
+import cPickle as pickle
+
+class FsckError(Exception):
+    def __init__(self, format, *args):
+        Exception.__init__(self, sprintf(format, *args))
+
+def log(format, *args):
+    printf(format, *args)
+
+def err(format, *args):
+    fprintf(sys.stderr, format, *args)
+
+class Progress(object):
+    def __init__(self, maxval):
+        self.bar = progressbar.ProgressBar(maxval = maxval)
+        if self.bar.term_width == 0:
+            self.bar.term_width = 75
+    def __enter__(self):
+        self.bar.start()
+        self.last_update = 0
+        return self
+    def __exit__(self, exc_type, exc_value, traceback):
+        if exc_type is None:
+            self.bar.finish()
+        else:
+            printf("\n")
+    def update(self, val):
+        self.bar.update(val)
+        #now = time.time()
+        #if now - self.last_update < 0.005:
+        #   time.sleep(0.005)
+        #self.last_update = now
+
+class Fsck(object):
+
+    def __init__(self, path):
+        self.basepath = path
+        self.sqlpath = os.path.join(path, "data.sql")
+        self.bulkpath = os.path.join(path, "data")
+        self.bulklock = os.path.join(path, "data.lock")
+
+    def check(self):
+        self.check_paths()
+        self.check_sql()
+        self.check_streams()
+        log("ok\n")
+
+    def check_paths(self):
+        log("checking paths\n")
+        if not os.path.isfile(self.sqlpath):
+            raise FsckError("SQL database missing")
+        if not os.path.isdir(self.bulkpath):
+            raise FsckError("Bulk data directory missing")
+        with open(self.bulklock, "w") as lockfile:
+            if not nilmdb.utils.lock.exclusive_lock(lockfile):
+                raise FsckError('database already locked by another process')
+        self.bulk = nilmdb.server.bulkdata.BulkData(self.basepath)
+        # override must_close warning
+        if "_must_close" in dir(self.bulk):
+            del self.bulk._must_close
+
+    def check_sql(self):
+        log("checking sqlite database\n")
+
+        self.sql = sqlite3.connect(self.sqlpath)
+        with self.sql as con:
+            ver = con.execute("PRAGMA user_version").fetchone()[0]
+            good = max(nilmdb.server.nilmdb._sql_schema_updates.keys())
+            if ver != good:
+                raise FsckError("database version %d too old, should be %d",
+                                ver, good)
+            self.stream_path = {}
+            self.stream_layout = {}
+            log("  loading paths\n")
+            result = con.execute("SELECT id, path, layout FROM streams")
+            for r in result:
+                if r[0] in self.stream_path:
+                    raise FsckError("duplicated ID %d in stream IDs", r[0])
+                self.stream_path[r[0]] = r[1]
+                self.stream_layout[r[0]] = r[2]
+
+            log("  loading intervals\n")
+            self.stream_interval = defaultdict(list)
+            result = con.execute("SELECT stream_id, start_time, end_time, "
+                                 "start_pos, end_pos FROM ranges")
+            for r in result:
+                if r[0] not in self.stream_path:
+                    raise FsckError("interval ID %d not in streams", k)
+                self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4]))
+
+            log("  loading metadata\n")
+            self.stream_meta = defaultdict(dict)
+            result = con.execute("SELECT stream_id, key, value FROM metadata")
+            for r in result:
+                if r[0] not in self.stream_path:
+                    raise FsckError("metadata ID %d not in streams", k)
+                if r[1] in self.stream_meta[r[0]]:
+                    raise FsckError("duplicate metadata key '%s' for stream %d",
+                                    r[1], r[0])
+                self.stream_meta[r[0]][r[1]] = r[2]
+
+    def check_streams(self):
+        log("checking streams\n")
+        ids = self.stream_path.keys()
+        with Progress(len(ids)) as pbar:
+            for i, sid in enumerate(ids):
+                pbar.update(i)
+                path = self.stream_path[sid]
+
+                # unique path, valid layout
+                if self.stream_path.values().count(path) != 1:
+                    raise FsckError("duplicated path %s", path)
+                layout = self.stream_layout[sid].split('_')[0]
+                if layout not in ('int8', 'int16', 'int32', 'int64',
+                                  'uint8', 'uint16', 'uint32', 'uint64',
+                                  'float32', 'float64'):
+                    raise FsckError("bad layout %s for %s", layout, path)
+                count = int(self.stream_layout[sid].split('_')[1])
+                if count < 1 or count > 1024:
+                    raise FsckError("bad count %d for %s", count, path)
+
+                # must exist in bulkdata
+                bulk = self.bulkpath + path
+                if not os.path.isdir(bulk):
+                    raise FsckError("%s: missing bulkdata dir", path)
+                if not nilmdb.server.bulkdata.Table.exists(bulk):
+                    raise FsckError("%s: bad bulkdata table", path)
+
+                # intervals don't overlap.  Abuse IntervalSet to check
+                # for intervals in file positions, too.
+                timeiset = IntervalSet()
+                posiset = IntervalSet()
+                for (stime, etime, spos, epos) in self.stream_interval[sid]:
+                    new = Interval(stime, etime)
+                    try:
+                        timeiset += new
+                    except IntervalError:
+                        raise FsckError("%s: overlap in intervals:\n"
+                                        "set: %s\nnew: %s\n",
+                                        path, str(timeiset), str(new))
+                    if spos != epos:
+                        new = Interval(spos, epos)
+                        try:
+                            posiset += new
+                        except IntervalError:
+                            raise FsckError("%s: overlap in file offsets:\n"
+                                            "set: %s\nnew: %s\n",
+                                            path, str(posiset), str(new))
+
+                # check bulkdata
+                self.check_bulkdata(sid, path, bulk)
+
+                continue
+                # verify we can can open it with bulkdata
+                try:
+                    tab = None
+                    try:
+                        tab = nilmdb.server.bulkdata.Table(bulk)
+                    except Exception as e:
+                        raise FsckError("%s: can't open bulkdata: %s",
+                                        path, str(e))
+                    self.check_bulkdata(path, tab)
+                finally:
+                    if tab:
+                        tab.close()
+
+    def check_bulkdata(self, sid, path, bulk):
+        with open(os.path.join(bulk, "_format"), "rb") as f:
+            fmt = pickle.load(f)
+        if fmt["version"] != 3:
+            raise FsckError("%s: bad or unsupported bulkdata version %d",
+                            path, fmt["version"])
+        row_per_file = int(fmt["rows_per_file"])
+        files_per_dir = int(fmt["files_per_dir"])
+        layout = fmt["layout"]
+        if layout != self.stream_layout[sid]:
+            raise FsckError("%s: layout mismatch %s != %s", path,
+                            layout, self.stream_layout[sid])
--- a/nilmdb/scripts/nilmdb_fsck.py
+++ b/nilmdb/scripts/nilmdb_fsck.py
@@ -0,0 +1,23 @@
+#!/usr/bin/python
+
+import nilmdb.fsck
+import argparse
+import os
+import sys
+
+def main():
+    """Main entry point for the 'nilmdb-fsck' command line script"""
+
+    parser = argparse.ArgumentParser(
+        description = 'Check database consistency',
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+    parser.add_argument("-V", "--version", action="version",
+                        version = nilmdb.__version__)
+    parser.add_argument('-d', '--database', help = 'Database directory',
+                        default = "./db")
+    args = parser.parse_args()
+
+    nilmdb.fsck.Fsck(args.database).check()
+
+if __name__ == "__main__":
+    main()
--- a/nilmdb/server/bulkdata.py
+++ b/nilmdb/server/bulkdata.py
@@ -19,8 +19,8 @@ from . import rocket

 # Up to 256 open file descriptors at any given time.
 # These variables are global so they can be used in the decorator arguments.
-table_cache_size = 16
-fd_cache_size = 16
+table_cache_size = 32
+fd_cache_size = 8

@nilmdb.utils.must_close(wrap_verify = False)
 class BulkData(object):
@@ -330,7 +330,8 @@ class Table(object):

        # Find the last directory.  We sort and loop through all of them,
        # starting with the numerically greatest, because the dirs could be
-        # empty if something was deleted.
+        # empty if something was deleted but the directory was unexpectedly
+        # not deleted.
        subdirs = sorted(filter(regex.search, os.listdir(self.root)),
                         key = lambda x: int(x, 16), reverse = True)

--- a/nilmdb/server/nilmdb.py
+++ b/nilmdb/server/nilmdb.py
@@ -176,7 +176,7 @@ class NilmDB(object):
            raise NilmDBError("start must precede end")
        return (start, end)

-    @nilmdb.utils.lru_cache(size = 16)
+    @nilmdb.utils.lru_cache(size = 64)
    def _get_intervals(self, stream_id):
        """
        Return a mutable IntervalSet corresponding to the given stream ID.
--- a/nilmdb/server/rocket.c
+++ b/nilmdb/server/rocket.c
@@ -5,6 +5,9 @@
 #include <ctype.h>
 #include <stdint.h>

+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
 /* Values missing from stdint.h */
 #define UINT8_MIN 0
 #define UINT16_MIN 0
@@ -19,13 +22,6 @@

 typedef int64_t timestamp_t;

-/* This code probably needs to be double-checked for the case where
-   sizeof(long) != 8, so enforce that here with something that will
-   fail at build time.  We assume that the python integer type can
-   hold an int64_t. */
-const static char __long_ok[1 - 2*!(sizeof(int64_t) ==
-				    sizeof(long int))] = { 0 };
-
 /* Somewhat arbitrary, just so we can use fixed sizes for strings
   etc. */
 static const int MAX_LAYOUT_COUNT = 1024;
@@ -58,7 +54,7 @@ static PyObject *raise_str(int line, int col, int code, const char *string)
 static PyObject *raise_int(int line, int col, int code, int64_t num)
 {
 	PyObject *o;
-	o = Py_BuildValue("(iiil)", line, col, code, num);
+	o = Py_BuildValue("(iiiL)", line, col, code, (long long)num);
 	if (o != NULL) {
 		PyErr_SetObject(ParseError, o);
 		Py_DECREF(o);
@@ -249,11 +245,11 @@ static PyObject *Rocket_get_file_size(Rocket *self)
 /****
 * Append from string
 */
-static inline long int strtol10(const char *nptr, char **endptr) {
-	return strtol(nptr, endptr, 10);
+static inline long int strtoll10(const char *nptr, char **endptr) {
+	return strtoll(nptr, endptr, 10);
 }
-static inline long int strtoul10(const char *nptr, char **endptr) {
-	return strtoul(nptr, endptr, 10);
+static inline long int strtoull10(const char *nptr, char **endptr) {
+	return strtoull(nptr, endptr, 10);
 }

 /* .append_string(count, data, offset, linenum, start, end, last_timestamp) */
@@ -264,6 +260,7 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	int offset;
 	const char *linestart;
 	int linenum;
+        long long ll1, ll2, ll3;
 	timestamp_t start;
 	timestamp_t end;
 	timestamp_t last_timestamp;
@@ -280,10 +277,13 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	   but we need the null termination for strto*.  If we had
 	   strnto* that took a length, we could use t# and not require
 	   a copy. */
-	if (!PyArg_ParseTuple(args, "isiilll:append_string", &count,
+	if (!PyArg_ParseTuple(args, "isiiLLL:append_string", &count,
 			      &data, &offset, &linenum,
-			      &start, &end, &last_timestamp))
+			      &ll1, &ll2, &ll3))
 		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;

 	/* Skip spaces, but don't skip over a newline. */
 #define SKIP_BLANK(buf) do {			\
@@ -372,14 +372,14 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 				goto extra_data_on_line;		\
 			break

-			CS(INT8,   strtol10,  t64.i, t8.i,  t8.u,         , 1);
-			CS(UINT8,  strtoul10, t64.u, t8.u,  t8.u,         , 1);
-			CS(INT16,  strtol10,  t64.i, t16.i, t16.u, le16toh, 2);
-			CS(UINT16, strtoul10, t64.u, t16.u, t16.u, le16toh, 2);
-			CS(INT32,  strtol10,  t64.i, t32.i, t32.u, le32toh, 4);
-			CS(UINT32, strtoul10, t64.u, t32.u, t32.u, le32toh, 4);
-			CS(INT64,  strtol10,  t64.i, t64.i, t64.u, le64toh, 8);
-			CS(UINT64, strtoul10, t64.u, t64.u, t64.u, le64toh, 8);
+			CS(INT8,   strtoll10,  t64.i, t8.i,  t8.u,         , 1);
+			CS(UINT8,  strtoull10, t64.u, t8.u,  t8.u,         , 1);
+			CS(INT16,  strtoll10,  t64.i, t16.i, t16.u, le16toh, 2);
+			CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2);
+			CS(INT32,  strtoll10,  t64.i, t32.i, t32.u, le32toh, 4);
+			CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4);
+			CS(INT64,  strtoll10,  t64.i, t64.i, t64.u, le64toh, 8);
+			CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8);
 			CS(FLOAT32, strtod,   t64.d, t32.f, t32.u, le32toh, 4);
 			CS(FLOAT64, strtod,   t64.d, t64.d, t64.u, le64toh, 8);
 #undef CS
@@ -397,7 +397,8 @@ static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
 	/* Build return value and return */
 	offset = buf - data;
 	PyObject *o;
-	o = Py_BuildValue("(iili)", written, offset, last_timestamp, linenum);
+	o = Py_BuildValue("(iiLi)", written, offset,
+                          (long long)last_timestamp, linenum);
 	return o;
 err:
 	PyErr_SetFromErrno(PyExc_OSError);
@@ -431,14 +432,18 @@ static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)
        int data_len;
        int linenum;
 	int offset;
+        long long ll1, ll2, ll3;
 	timestamp_t start;
 	timestamp_t end;
 	timestamp_t last_timestamp;

-	if (!PyArg_ParseTuple(args, "it#iilll:append_binary",
+	if (!PyArg_ParseTuple(args, "it#iiLLL:append_binary",
                              &count, &data, &data_len, &offset,
-                              &linenum, &start, &end, &last_timestamp))
+                              &linenum, &ll1, &ll2, &ll3))
 		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;

        /* Advance to offset */
        if (offset > data_len)
@@ -476,8 +481,8 @@ static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)

 	/* Build return value and return */
 	PyObject *o;
-	o = Py_BuildValue("(iili)", rows, offset + rows * self->binary_size,
-                          last_timestamp, linenum);
+	o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size,
+                          (long long)last_timestamp, linenum);
 	return o;
 }

@@ -534,7 +539,7 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
 		if (fread(&t64.u, 8, 1, self->file) != 1)
 			goto err;
 		t64.u = le64toh(t64.u);
-		ret = sprintf(&str[len], "%ld", t64.i);
+		ret = sprintf(&str[len], "%" PRId64, t64.i);
 		if (ret <= 0)
 			goto err;
 		len += ret;
@@ -556,14 +561,14 @@ static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
 				len += ret;				\
 			}						\
 			break
-			CASE(INT8,   "%hhd",   t8.i,  t8.u,         , 1);
-			CASE(UINT8,  "%hhu",   t8.u,  t8.u,         , 1);
-			CASE(INT16,  "%hd",    t16.i, t16.u, le16toh, 2);
-			CASE(UINT16, "%hu",    t16.u, t16.u, le16toh, 2);
-			CASE(INT32,  "%d",     t32.i, t32.u, le32toh, 4);
-			CASE(UINT32, "%u",     t32.u, t32.u, le32toh, 4);
-			CASE(INT64,  "%ld",    t64.i, t64.u, le64toh, 8);
-			CASE(UINT64, "%lu",    t64.u, t64.u, le64toh, 8);
+			CASE(INT8,   "%" PRId8,  t8.i,  t8.u,         , 1);
+			CASE(UINT8,  "%" PRIu8,  t8.u,  t8.u,         , 1);
+			CASE(INT16,  "%" PRId16, t16.i, t16.u, le16toh, 2);
+			CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2);
+			CASE(INT32,  "%" PRId32, t32.i, t32.u, le32toh, 4);
+			CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4);
+			CASE(INT64,  "%" PRId64, t64.i, t64.u, le64toh, 8);
+			CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8);
 			/* These next two are a bit debatable.  floats
 			   are 6-9 significant figures, so we print 7.
 			   Doubles are 15-19, so we print 17.  This is
@@ -653,7 +658,7 @@ static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args)

 	/* Convert and return */
 	t64.u = le64toh(t64.u);
-	return Py_BuildValue("l", t64.i);
+	return Py_BuildValue("L", (long long)t64.i);
 }

 /****
--- a/nilmdb/server/server.py
+++ b/nilmdb/server/server.py
@@ -17,126 +17,26 @@ import decorator
 import psutil
 import traceback

+from nilmdb.server.serverutil import (
+    chunked_response,
+    response_type,
+    workaround_cp_bug_1200,
+    exception_to_httperror,
+    CORS_allow,
+    json_to_request_params,
+    json_error_page,
+    cherrypy_start,
+    cherrypy_stop,
+    bool_param,
+    )
+
+# Add CORS_allow tool
+cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+
 class NilmApp(object):
    def __init__(self, db):
        self.db = db

-# Decorators
-def chunked_response(func):
-    """Decorator to enable chunked responses."""
-    # Set this to False to get better tracebacks from some requests
-    # (/stream/extract, /stream/intervals).
-    func._cp_config = { 'response.stream': True }
-    return func
-
-def response_type(content_type):
-    """Return a decorator-generating function that sets the
-    response type to the specified string."""
-    def wrapper(func, *args, **kwargs):
-        cherrypy.response.headers['Content-Type'] = content_type
-        return func(*args, **kwargs)
-    return decorator.decorator(wrapper)
-
-@decorator.decorator
-def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover
-    """Decorator to work around CherryPy bug #1200 in a response
-    generator.
-
-    Even if chunked responses are disabled, LookupError or
-    UnicodeError exceptions may still be swallowed by CherryPy due to
-    bug #1200.  This throws them as generic Exceptions instead so that
-    they make it through.
-    """
-    exc_info = None
-    try:
-        for val in func(*args, **kwargs):
-            yield val
-    except (LookupError, UnicodeError):
-        # Re-raise it, but maintain the original traceback
-        exc_info = sys.exc_info()
-        new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1]))
-        raise new_exc, None, exc_info[2]
-    finally:
-        del exc_info
-
-def exception_to_httperror(*expected):
-    """Return a decorator-generating function that catches expected
-    errors and throws a HTTPError describing it instead.
-
-        @exception_to_httperror(NilmDBError, ValueError)
-        def foo():
-            pass
-    """
-    def wrapper(func, *args, **kwargs):
-        exc_info = None
-        try:
-            return func(*args, **kwargs)
-        except expected:
-            # Re-raise it, but maintain the original traceback
-            exc_info = sys.exc_info()
-            new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
-            raise new_exc, None, exc_info[2]
-        finally:
-            del exc_info
-    # We need to preserve the function's argspecs for CherryPy to
-    # handle argument errors correctly.  Decorator.decorator takes
-    # care of that.
-    return decorator.decorator(wrapper)
-
-# Custom CherryPy tools
-
-def CORS_allow(methods):
-    """This does several things:
-
-    Handles CORS preflight requests.
-    Adds Allow: header to all requests.
-    Raise 405 if request.method not in method.
-
-    It is similar to cherrypy.tools.allow, with the CORS stuff added.
-    """
-    request = cherrypy.request.headers
-    response = cherrypy.response.headers
-
-    if not isinstance(methods, (tuple, list)): # pragma: no cover
-        methods = [ methods ]
-    methods = [ m.upper() for m in methods if m ]
-    if not methods: # pragma: no cover
-        methods = [ 'GET', 'HEAD' ]
-    elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover
-        methods.append('HEAD')
-    response['Allow'] = ', '.join(methods)
-
-    # Allow all origins
-    if 'Origin' in request:
-        response['Access-Control-Allow-Origin'] = request['Origin']
-
-    # If it's a CORS request, send response.
-    request_method = request.get("Access-Control-Request-Method", None)
-    request_headers = request.get("Access-Control-Request-Headers", None)
-    if (cherrypy.request.method == "OPTIONS" and
-        request_method and request_headers):
-        response['Access-Control-Allow-Headers'] = request_headers
-        response['Access-Control-Allow-Methods'] = ', '.join(methods)
-        # Try to stop further processing and return a 200 OK
-        cherrypy.response.status = "200 OK"
-        cherrypy.response.body = ""
-        cherrypy.request.handler = lambda: ""
-        return
-
-    # Reject methods that were not explicitly allowed
-    if cherrypy.request.method not in methods:
-        raise cherrypy.HTTPError(405)
-
-cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
-
-# Helper for json_in tool to process JSON data into normal request
-# parameters.
-def json_to_request_params(body):
-    cherrypy.lib.jsontools.json_processor(body)
-    if not isinstance(cherrypy.request.json, dict):
-        raise cherrypy.HTTPError(415)
-    cherrypy.request.params.update(cherrypy.request.json)
-
 # CherryPy apps
 class Root(NilmApp):
    """Root application for NILM database"""
@@ -147,7 +47,10 @@ class Root(NilmApp):
    # /
    @cherrypy.expose
    def index(self):
-        raise cherrypy.NotFound()
+        cherrypy.response.headers['Content-Type'] = 'text/plain'
+        msg = sprintf("This is NilmDB version %s, running on host %s.\n",
+                      nilmdb.__version__, socket.getfqdn())
+        return msg

    # /favicon.ico
    @cherrypy.expose
@@ -167,9 +70,13 @@ class Root(NilmApp):
        """Return a dictionary with the database path,
        size of the database in bytes, and free disk space in bytes"""
        path = self.db.get_basepath()
+        usage = psutil.disk_usage(path)
+        dbsize = nilmdb.utils.du(path)
        return { "path": path,
-                 "size": nilmdb.utils.du(path),
-                 "free": psutil.disk_usage(path).free }
+                 "size": dbsize,
+                 "other": usage.used - dbsize,
+                 "reserved": usage.total - usage.used - usage.free,
+                 "free": usage.free }

 class Stream(NilmApp):
    """Stream-specific operations"""
@@ -177,10 +84,18 @@ class Stream(NilmApp):
    # Helpers
    def _get_times(self, start_param, end_param):
        (start, end) = (None, None)
-        if start_param is not None:
-            start = string_to_timestamp(start_param)
-        if end_param is not None:
-            end = string_to_timestamp(end_param)
+        try:
+            if start_param is not None:
+                start = string_to_timestamp(start_param)
+        except Exception:
+            raise cherrypy.HTTPError("400 Bad Request", sprintf(
+                "invalid start (%s): must be a numeric timestamp", start_param))
+        try:
+            if end_param is not None:
+                end = string_to_timestamp(end_param)
+        except Exception:
+            raise cherrypy.HTTPError("400 Bad Request", sprintf(
+                "invalid end (%s): must be a numeric timestamp", end_param))
        if start is not None and end is not None:
            if start >= end:
                raise cherrypy.HTTPError(
@@ -199,10 +114,10 @@ class Stream(NilmApp):
        layout parameter, just list streams that match the given path
        or layout.

-        If extent is not given, returns a list of lists containing
-        the path and layout: [ path, layout ]
+        If extended is missing or zero, returns a list of lists
+        containing the path and layout: [ path, layout ]

-        If extended is provided, returns a list of lists containing
+        If extended is true, returns a list of lists containing
        extended info: [ path, layout, extent_min, extent_max,
        total_rows, total_seconds ].  More data may be added.
        """
@@ -315,6 +230,8 @@ class Stream(NilmApp):
        little-endian and matches the database types (including an
        int64 timestamp).
        """
+        binary = bool_param(binary)
+
        # Important that we always read the input before throwing any
        # errors, to keep lengths happy for persistent connections.
        # Note that CherryPy 3.2.2 has a bug where this fails for GET
@@ -439,6 +356,10 @@ class Stream(NilmApp):
        little-endian and matches the database types (including an
        int64 timestamp).
        """
+        binary = bool_param(binary)
+        markup = bool_param(markup)
+        count = bool_param(count)
+
        (start, end) = self._get_times(start, end)

        # Check path and get layout
@@ -566,70 +487,14 @@ class Server(object):

    def json_error_page(self, status, message, traceback, version):
        """Return a custom error page in JSON so the client can parse it"""
-        errordata = { "status" : status,
-                      "message" : message,
-                      "traceback" : traceback }
-        # Don't send a traceback if the error was 400-499 (client's fault)
-        try:
-            code = int(status.split()[0])
-            if not self.force_traceback:
-                if code >= 400 and code <= 499:
-                    errordata["traceback"] = ""
-        except Exception: # pragma: no cover
-            pass
-        # Override the response type, which was previously set to text/html
-        cherrypy.serving.response.headers['Content-Type'] = (
-            "application/json;charset=utf-8" )
-        # Undo the HTML escaping that cherrypy's get_error_page function applies
-        # (cherrypy issue 1135)
-        for k, v in errordata.iteritems():
-            v = v.replace("&lt;","<")
-            v = v.replace("&gt;",">")
-            v = v.replace("&amp;","&")
-            errordata[k] = v
-        return json.dumps(errordata, separators=(',',':'))
+        return json_error_page(status, message, traceback, version,
+                               self.force_traceback)

    def start(self, blocking = False, event = None):
-
-        if not self.embedded: # pragma: no cover
-            # Handle signals nicely
-            if hasattr(cherrypy.engine, "signal_handler"):
-                cherrypy.engine.signal_handler.subscribe()
-            if hasattr(cherrypy.engine, "console_control_handler"):
-                cherrypy.engine.console_control_handler.subscribe()
-
-        # Cherrypy stupidly calls os._exit(70) when it can't bind the
-        # port.  At least try to print a reasonable error and continue
-        # in this case, rather than just dying silently (as we would
-        # otherwise do in embedded mode)
-        real_exit = os._exit
-        def fake_exit(code): # pragma: no cover
-            if code == os.EX_SOFTWARE:
-                fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
-            else:
-                real_exit(code)
-        os._exit = fake_exit
-        cherrypy.engine.start()
-        os._exit = real_exit
-
-        # Signal that the engine has started successfully
-        if event is not None:
-            event.set()
-
-        if blocking:
-            try:
-                cherrypy.engine.wait(cherrypy.engine.states.EXITING,
-                                     interval = 0.1, channel = 'main')
-            except (KeyboardInterrupt, IOError): # pragma: no cover
-                cherrypy.engine.log('Keyboard Interrupt: shutting down bus')
-                cherrypy.engine.exit()
-            except SystemExit: # pragma: no cover
-                cherrypy.engine.log('SystemExit raised: shutting down bus')
-                cherrypy.engine.exit()
-                raise
+        cherrypy_start(blocking, event, self.embedded)

    def stop(self):
-        cherrypy.engine.exit()
+        cherrypy_stop()

 # Use a single global nilmdb.server.NilmDB and nilmdb.server.Server
 # instance since the database can only be opened once.  For this to
--- a/nilmdb/server/serverutil.py
+++ b/nilmdb/server/serverutil.py
@@ -0,0 +1,214 @@
+"""Miscellaneous decorators and other helpers for running a CherryPy
+server"""
+
+import cherrypy
+import sys
+import os
+import decorator
+import simplejson as json
+
+# Helper to parse parameters into booleans
+def bool_param(s):
+    """Return a bool indicating whether parameter 's' was True or False,
+    supporting a few different types for 's'."""
+    try:
+        ss = s.lower()
+        if ss in [ "0", "false", "f", "no", "n" ]:
+            return False
+        if ss in [ "1", "true", "t", "yes", "y" ]:
+            return True
+    except Exception:
+        return bool(s)
+    raise cherrypy.HTTPError("400 Bad Request",
+                             "can't parse parameter: " + ss)
+
+# Decorators
+def chunked_response(func):
+    """Decorator to enable chunked responses."""
+    # Set this to False to get better tracebacks from some requests
+    # (/stream/extract, /stream/intervals).
+    func._cp_config = { 'response.stream': True }
+    return func
+
+def response_type(content_type):
+    """Return a decorator-generating function that sets the
+    response type to the specified string."""
+    def wrapper(func, *args, **kwargs):
+        cherrypy.response.headers['Content-Type'] = content_type
+        return func(*args, **kwargs)
+    return decorator.decorator(wrapper)
+
+@decorator.decorator
+def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover
+    """Decorator to work around CherryPy bug #1200 in a response
+    generator.
+
+    Even if chunked responses are disabled, LookupError or
+    UnicodeError exceptions may still be swallowed by CherryPy due to
+    bug #1200.  This throws them as generic Exceptions instead so that
+    they make it through.
+    """
+    exc_info = None
+    try:
+        for val in func(*args, **kwargs):
+            yield val
+    except (LookupError, UnicodeError):
+        # Re-raise it, but maintain the original traceback
+        exc_info = sys.exc_info()
+        new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1]))
+        raise new_exc, None, exc_info[2]
+    finally:
+        del exc_info
+
+def exception_to_httperror(*expected):
+    """Return a decorator-generating function that catches expected
+    errors and throws a HTTPError describing it instead.
+
+        @exception_to_httperror(NilmDBError, ValueError)
+        def foo():
+            pass
+    """
+    def wrapper(func, *args, **kwargs):
+        exc_info = None
+        try:
+            return func(*args, **kwargs)
+        except expected:
+            # Re-raise it, but maintain the original traceback
+            exc_info = sys.exc_info()
+            new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
+            raise new_exc, None, exc_info[2]
+        finally:
+            del exc_info
+    # We need to preserve the function's argspecs for CherryPy to
+    # handle argument errors correctly.  Decorator.decorator takes
+    # care of that.
+    return decorator.decorator(wrapper)
+
+# Custom CherryPy tools
+
+def CORS_allow(methods):
+    """This does several things:
+
+    Handles CORS preflight requests.
+    Adds Allow: header to all requests.
+    Raise 405 if request.method not in method.
+
+    It is similar to cherrypy.tools.allow, with the CORS stuff added.
+
+    Add this to CherryPy with:
+    cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+    """
+    request = cherrypy.request.headers
+    response = cherrypy.response.headers
+
+    if not isinstance(methods, (tuple, list)): # pragma: no cover
+        methods = [ methods ]
+    methods = [ m.upper() for m in methods if m ]
+    if not methods: # pragma: no cover
+        methods = [ 'GET', 'HEAD' ]
+    elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover
+        methods.append('HEAD')
+    response['Allow'] = ', '.join(methods)
+
+    # Allow all origins
+    if 'Origin' in request:
+        response['Access-Control-Allow-Origin'] = request['Origin']
+
+    # If it's a CORS request, send response.
+    request_method = request.get("Access-Control-Request-Method", None)
+    request_headers = request.get("Access-Control-Request-Headers", None)
+    if (cherrypy.request.method == "OPTIONS" and
+        request_method and request_headers):
+        response['Access-Control-Allow-Headers'] = request_headers
+        response['Access-Control-Allow-Methods'] = ', '.join(methods)
+        # Try to stop further processing and return a 200 OK
+        cherrypy.response.status = "200 OK"
+        cherrypy.response.body = ""
+        cherrypy.request.handler = lambda: ""
+        return
+
+    # Reject methods that were not explicitly allowed
+    if cherrypy.request.method not in methods:
+        raise cherrypy.HTTPError(405)
+
+
+# Helper for json_in tool to process JSON data into normal request
+# parameters.
+def json_to_request_params(body):
+    cherrypy.lib.jsontools.json_processor(body)
+    if not isinstance(cherrypy.request.json, dict):
+        raise cherrypy.HTTPError(415)
+    cherrypy.request.params.update(cherrypy.request.json)
+
+# Used as an "error_page.default" handler
+def json_error_page(status, message, traceback, version,
+                    force_traceback = False):
+    """Return a custom error page in JSON so the client can parse it"""
+    errordata = { "status" : status,
+                  "message" : message,
+                  "traceback" : traceback }
+    # Don't send a traceback if the error was 400-499 (client's fault)
+    try:
+        code = int(status.split()[0])
+        if not force_traceback:
+            if code >= 400 and code <= 499:
+                errordata["traceback"] = ""
+    except Exception: # pragma: no cover
+        pass
+    # Override the response type, which was previously set to text/html
+    cherrypy.serving.response.headers['Content-Type'] = (
+        "application/json;charset=utf-8" )
+    # Undo the HTML escaping that cherrypy's get_error_page function applies
+    # (cherrypy issue 1135)
+    for k, v in errordata.iteritems():
+        v = v.replace("&lt;","<")
+        v = v.replace("&gt;",">")
+        v = v.replace("&amp;","&")
+        errordata[k] = v
+    return json.dumps(errordata, separators=(',',':'))
+
+# Start/stop CherryPy standalone server
+def cherrypy_start(blocking = False, event = False, embedded = False):
+    """Start the CherryPy server, handling errors and signals
+    somewhat gracefully."""
+
+    if not embedded: # pragma: no cover
+        # Handle signals nicely
+        if hasattr(cherrypy.engine, "signal_handler"):
+            cherrypy.engine.signal_handler.subscribe()
+        if hasattr(cherrypy.engine, "console_control_handler"):
+            cherrypy.engine.console_control_handler.subscribe()
+
+    # Cherrypy stupidly calls os._exit(70) when it can't bind the
+    # port.  At least try to print a reasonable error and continue
+    # in this case, rather than just dying silently (as we would
+    # otherwise do in embedded mode)
+    real_exit = os._exit
+    def fake_exit(code): # pragma: no cover
+        if code == os.EX_SOFTWARE:
+            fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
+        else:
+            real_exit(code)
+    os._exit = fake_exit
+    cherrypy.engine.start()
+    os._exit = real_exit
+
+    # Signal that the engine has started successfully
+    if event is not None:
+        event.set()
+
+    if blocking:
+        try:
+            cherrypy.engine.wait(cherrypy.engine.states.EXITING,
+                                 interval = 0.1, channel = 'main')
+        except (KeyboardInterrupt, IOError): # pragma: no cover
+            cherrypy.engine.log('Keyboard Interrupt: shutting down bus')
+            cherrypy.engine.exit()
+        except SystemExit: # pragma: no cover
+            cherrypy.engine.log('SystemExit raised: shutting down bus')
+            cherrypy.engine.exit()
+            raise
+
+# Stop CherryPy server
+def cherrypy_stop():
+    cherrypy.engine.exit()
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -14,3 +14,4 @@ import nilmdb.utils.iterator
 import nilmdb.utils.interval
 import nilmdb.utils.lock
 import nilmdb.utils.sort
+import nilmdb.utils.unicode
--- a/nilmdb/utils/interval.py
+++ b/nilmdb/utils/interval.py
@@ -1,5 +1,6 @@
 """Interval.  Like nilmdb.server.interval, but re-implemented here
-in plain Python so clients have easier access to it.
+in plain Python so clients have easier access to it, and with a few
+helper functions.

 Intervals are half-open, ie. they include data points with timestamps
 [start, end)
@@ -34,6 +35,10 @@ class Interval:
        return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
                " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")

+    def human_string(self):
+        return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) +
+                " -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]")
+
    def __cmp__(self, other):
        """Compare two intervals.  If non-equal, order by start then end"""
        return cmp(self.start, other.start) or cmp(self.end, other.end)
@@ -53,18 +58,11 @@ class Interval:
            raise IntervalError("not a subset")
        return Interval(start, end)

-def set_difference(a, b):
-    """
-    Compute the difference (a \\ b) between the intervals in 'a' and
-    the intervals in 'b'; i.e., the ranges that are present in 'self'
-    but not 'other'.
-
-    'a' and 'b' must both be iterables.
-
-    Returns a generator that yields each interval in turn.
-    Output intervals are built as subsets of the intervals in the
-    first argument (a).
-    """
+def _interval_math_helper(a, b, op, subset = True):
+    """Helper for set_difference, intersection functions,
+    to compute interval subsets based on a math operator on ranges
+    present in A and B.  Subsets are computed from A, or new intervals
+    are generated if subset = False."""
    # Iterate through all starts and ends in sorted order.  Add a
    # tag to the iterator so that we can figure out which one they
    # were, after sorting.
@@ -79,28 +77,71 @@ def set_difference(a, b):
    # At each point, evaluate which type of end it is, to determine
    # how to build up the output intervals.
    a_interval = None
-    b_interval = None
+    in_a = False
+    in_b = False
    out_start = None
    for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
        if k == 0:
-            # start a interval
            a_interval = i
-            if b_interval is None:
-                out_start = ts
+            in_a = True
        elif k == 1:
-            # start b interval
-            b_interval = i
-            if out_start is not None and out_start != ts:
-                yield a_interval.subset(out_start, ts)
-            out_start = None
+            in_b = True
        elif k == 2:
-            # end a interval
-            if out_start is not None and out_start != ts:
-                yield a_interval.subset(out_start, ts)
-            out_start = None
-            a_interval = None
+            in_a = False
        elif k == 3:
-            # end b interval
-            b_interval = None
-            if a_interval:
-                out_start = ts
+            in_b = False
+        include = op(in_a, in_b)
+        if include and out_start is None:
+            out_start = ts
+        elif not include:
+            if out_start is not None and out_start != ts:
+                if subset:
+                    yield a_interval.subset(out_start, ts)
+                else:
+                    yield Interval(out_start, ts)
+            out_start = None
+
+def set_difference(a, b):
+    """
+    Compute the difference (a \\ b) between the intervals in 'a' and
+    the intervals in 'b'; i.e., the ranges that are present in 'self'
+    but not 'other'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    return _interval_math_helper(a, b, (lambda a, b: a and not b))
+
+def intersection(a, b):
+    """
+    Compute the intersection between the intervals in 'a' and the
+    intervals in 'b'; i.e., the ranges that are present in both 'a'
+    and 'b'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    return _interval_math_helper(a, b, (lambda a, b: a and b))
+
+def optimize(it):
+    """
+    Given an iterable 'it' with intervals, optimize them by joining
+    together intervals that are adjacent in time, and return a generator
+    that yields the new intervals.
+    """
+    saved_int = None
+    for interval in it:
+        if saved_int is not None:
+            if saved_int.end == interval.start:
+                interval.start = saved_int.start
+            else:
+                yield saved_int
+        saved_int = interval
+    if saved_int is not None:
+        yield saved_int
--- a/nilmdb/utils/serializer.py
+++ b/nilmdb/utils/serializer.py
@@ -91,6 +91,20 @@ def serializer_proxy(obj_or_type):
            r = SerializerCallProxy(self.__call_queue, attr, self)
            return r

+        # For an interable object, on __iter__(), save the object's
+        # iterator and return this proxy.  On next(), call the object's
+        # iterator through this proxy.
+        def __iter__(self):
+            attr = getattr(self.__object, "__iter__")
+            self.__iter = SerializerCallProxy(self.__call_queue, attr, self)()
+            return self
+        def next(self):
+            return SerializerCallProxy(self.__call_queue,
+                                       self.__iter.next, self)()
+
+        def __getitem__(self, key):
+            return self.__getattr__("__getitem__")(key)
+
        def __call__(self, *args, **kwargs):
            """Call this to instantiate the type, if a type was passed
            to serializer_proxy.  Otherwise, pass the call through."""
--- a/nilmdb/utils/time.py
+++ b/nilmdb/utils/time.py
@@ -60,7 +60,7 @@ def rate_to_period(hz, cycles = 1):
 def parse_time(toparse):
    """
    Parse a free-form time string and return a nilmdb timestamp
-    (integer seconds since epoch).  If the string doesn't contain a
+    (integer microseconds since epoch).  If the string doesn't contain a
    timestamp, the current local timezone is assumed (e.g. from the TZ
    env var).
    """
--- a/nilmdb/utils/unicode.py
+++ b/nilmdb/utils/unicode.py
@@ -0,0 +1,29 @@
+import sys
+
+if sys.version_info[0] >= 3: # pragma: no cover (future Python3 compat)
+    text_type = str
+else:
+    text_type = unicode
+
+def encode(u):
+    """Try to encode something from Unicode to a string using the
+    default encoding.  If it fails, try encoding as UTF-8."""
+    if not isinstance(u, text_type):
+        return u
+    try:
+        return u.encode()
+    except UnicodeEncodeError:
+        return u.encode("utf-8")
+
+def decode(s):
+    """Try to decode someting from string to Unicode using the
+    default encoding.  If it fails, try decoding as UTF-8."""
+    if isinstance(s, text_type):
+        return s
+    try:
+        return s.decode()
+    except UnicodeDecodeError:
+        try:
+            return s.decode("utf-8")
+        except UnicodeDecodeError:
+            return s # best we can do
--- a/setup.py
+++ b/setup.py
@@ -126,11 +126,13 @@ setup(name='nilmdb',
                   'nilmdb.client',
                   'nilmdb.cmdline',
                   'nilmdb.scripts',
+                   'nilmdb.fsck',
                   ],
      entry_points = {
          'console_scripts': [
              'nilmtool = nilmdb.scripts.nilmtool:main',
              'nilmdb-server = nilmdb.scripts.nilmdb_server:main',
+              'nilmdb-fsck = nilmdb.scripts.nilmdb_fsck:main',
              ],
          },
      ext_modules = ext_modules,
--- a/tests/data/timestamped
+++ b/tests/data/timestamped
@@ -0,0 +1,8 @@
+-10000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-100000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-100000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+-1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
+1000000000 2.61246e+05  2.22735e+05  4.60340e+03  2.58221e+03  8.42804e+03  3.41890e+03  9.57898e+02  4.00585e+03  
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -242,6 +242,19 @@ class TestClient(object):
        in_("400 Bad Request", str(e.exception))
        in_("start must precede end", str(e.exception))

+        # Invalid times in HTTP request
+        with assert_raises(ClientError) as e:
+            client.http.put("stream/insert", "", { "path": "/newton/prep",
+                                                   "start": "asdf", "end": 0 })
+        in_("400 Bad Request", str(e.exception))
+        in_("invalid start", str(e.exception))
+
+        with assert_raises(ClientError) as e:
+            client.http.put("stream/insert", "", { "path": "/newton/prep",
+                                                   "start": 0, "end": "asdf" })
+        in_("400 Bad Request", str(e.exception))
+        in_("invalid end", str(e.exception))
+
        # Good content type
        with assert_raises(ClientError) as e:
            client.http.put("stream/insert", "",
@@ -354,10 +367,6 @@ class TestClient(object):
        with assert_raises(ServerError) as e:
            client.http.get_gen("http://nosuchurl.example.com./").next()

-        # Trigger a curl error in generator
-        with assert_raises(ServerError) as e:
-            client.http.get_gen("http://nosuchurl.example.com./").next()
-
        # Check 404 for missing streams
        for function in [ client.stream_intervals, client.stream_extract ]:
            with assert_raises(ClientError) as e:
@@ -396,27 +405,38 @@ class TestClient(object):
                                 headers())

        # Extract
-        x = http.get("stream/extract",
-                            { "path": "/newton/prep",
-                              "start": "123",
-                              "end": "124" })
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124" })
        if "transfer-encoding: chunked" not in headers():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
        if "content-type: text/plain;charset=utf-8" not in headers():
            raise AssertionError("/stream/extract is not text/plain:\n" +
                                 headers())

-        x = http.get("stream/extract",
-                            { "path": "/newton/prep",
-                              "start": "123",
-                              "end": "124",
-                              "binary": "1" })
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124",
+                                         "binary": "1" })
        if "transfer-encoding: chunked" not in headers():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
        if "content-type: application/octet-stream" not in headers():
            raise AssertionError("/stream/extract is not binary:\n" +
                                 headers())

+        # Make sure a binary of "0" is really off
+        x = http.get("stream/extract", { "path": "/newton/prep",
+                                         "start": "123", "end": "124",
+                                         "binary": "0" })
+        if "content-type: application/octet-stream" in headers():
+                    raise AssertionError("/stream/extract is not text:\n" +
+                                         headers())
+
+        # Invalid parameters
+        with assert_raises(ClientError) as e:
+            x = http.get("stream/extract", { "path": "/newton/prep",
+                                             "start": "123", "end": "124",
+                                             "binary": "asdfasfd" })
+        in_("can't parse parameter", str(e.exception))
+
        client.close()

    def test_client_08_unicode(self):
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -59,8 +59,7 @@ class TestCmdline(object):

    def run(self, arg_string, infile=None, outfile=None):
        """Run a cmdline client with the specified argument string,
-        passing the given input.  Returns a tuple with the output and
-        exit code"""
+        passing the given input.  Save the output and exit code."""
        # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
        os.environ['NILMDB_URL'] = "http://localhost:32180/"
        class stdio_wrapper:
@@ -88,7 +87,7 @@ class TestCmdline(object):
                sys.exit(0)
            except SystemExit as e:
                exitcode = e.code
-        captured = outfile.getvalue()
+        captured = nilmdb.utils.unicode.decode(outfile.getvalue())
        self.captured = captured
        self.exitcode = exitcode

@@ -160,6 +159,12 @@ class TestCmdline(object):
        self.ok("--help")
        self.contain("usage:")

+        # help
+        self.ok("--version")
+        ver = self.captured
+        self.ok("list --version")
+        eq_(self.captured, ver)
+
        # fail for no args
        self.fail("")

@@ -245,8 +250,10 @@ class TestCmdline(object):
        self.contain("Client version: " + nilmdb.__version__)
        self.contain("Server version: " + test_server.version)
        self.contain("Server database path")
-        self.contain("Server database size")
-        self.contain("Server database free space")
+        self.contain("Server disk space used by NilmDB")
+        self.contain("Server disk space used by other")
+        self.contain("Server disk space reserved")
+        self.contain("Server disk space free")

    def test_04_createlist(self):
        # Basic stream tests, like those in test_client.
@@ -473,6 +480,13 @@ class TestCmdline(object):
        # bad start time
        self.fail("insert -t -r 120 --start 'whatever' /newton/prep /dev/null")

+        # Test negative times
+        self.ok("insert --start @-10000000000 --end @1000000001 /newton/prep"
+                " tests/data/timestamped")
+        self.ok("extract -c /newton/prep --start min --end @1000000001")
+        self.match("8\n")
+        self.ok("remove /newton/prep --start min --end @1000000001")
+
    def test_07_detail_extended(self):
        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
@@ -1002,6 +1016,18 @@ class TestCmdline(object):
        self.match("[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")

+        # optimize
+        self.ok("insert -s 01-01-2002 -e 01-01-2004 /diff/1 /dev/null")
+        self.ok("intervals /diff/1")
+        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
+                   "> Thu, 01 Jan 2004 00:00:00.000000 +0000 ]\n"
+                   "[ Thu, 01 Jan 2004 00:00:00.000000 +0000 -"
+                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
+        self.ok("intervals /diff/1 --optimize")
+        self.ok("intervals /diff/1 -o")
+        self.match("[ Sat, 01 Jan 2000 00:00:00.000000 +0000 -"
+                   "> Sat, 01 Jan 2005 00:00:00.000000 +0000 ]\n")
+
        self.ok("destroy -R /diff/1")
        self.ok("destroy -R /diff/2")

--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -59,6 +59,14 @@ class TestInterval:
        self.test_interval_intersect()
        Interval = NilmdbInterval

+        # Other helpers in nilmdb.utils.interval
+        i = [ UtilsInterval(1,2), UtilsInterval(2,3), UtilsInterval(4,5) ]
+        eq_(list(nilmdb.utils.interval.optimize(i)),
+            [ UtilsInterval(1,3), UtilsInterval(4,5) ])
+        eq_(UtilsInterval(1234567890123456, 1234567890654321).human_string(),
+            "[ Fri, 13 Feb 2009 18:31:30.123456 -0500 -> " +
+            "Fri, 13 Feb 2009 18:31:30.654321 -0500 ]")
+
    def test_interval(self):
        # Test Interval class
        os.environ['TZ'] = "America/New_York"
@@ -226,13 +234,16 @@ class TestInterval:
            x = makeset("[--)") & 1234

        def do_test(a, b, c, d):
-            # a & b == c
+            # a & b == c (using nilmdb.server.interval)
            ab = IntervalSet()
            for x in b:
                for i in (a & x):
                    ab += i
            eq_(ab,c)

+            # a & b == c (using nilmdb.utils.interval)
+            eq_(IntervalSet(nilmdb.utils.interval.intersection(a,b)), c)
+
            # a \ b == d
            eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)

@@ -302,6 +313,17 @@ class TestInterval:
        eq_(nilmdb.utils.interval.set_difference(
            a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)

+        # Fill out test coverage for non-subsets
+        def diff2(a,b, subset):
+            return nilmdb.utils.interval._interval_math_helper(
+                a, b, (lambda a, b: b and not a), subset=subset)
+        with assert_raises(nilmdb.utils.interval.IntervalError):
+            list(diff2(a,b,True))
+        list(diff2(a,b,False))
+
+        # Empty second set
+        eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
+
        # Empty second set
        eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)

--- a/tests/test_nilmdb.py
+++ b/tests/test_nilmdb.py
@@ -157,11 +157,14 @@ class TestServer(object):

    def test_server(self):
        # Make sure we can't force an exit, and test other 404 errors
-        for url in [ "/exit", "/", "/favicon.ico" ]:
+        for url in [ "/exit", "/favicon.ico" ]:
            with assert_raises(HTTPError) as e:
                geturl(url)
            eq_(e.exception.code, 404)

+        # Root page
+        in_("This is NilmDB", geturl("/"))
+
        # Check version
        eq_(distutils.version.LooseVersion(getjson("/version")),
            distutils.version.LooseVersion(nilmdb.__version__))
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -62,6 +62,28 @@ class Base(object):
        eq_(self.foo.val, 20)
        eq_(self.foo.init_thread, self.foo.test_thread)

+class ListLike(object):
+    def __init__(self):
+        self.thread = threading.current_thread().name
+        self.foo = 0
+
+    def __iter__(self):
+        eq_(threading.current_thread().name, self.thread)
+        self.foo = 0
+        return self
+
+    def __getitem__(self, key):
+        eq_(threading.current_thread().name, self.thread)
+        return key
+
+    def next(self):
+        eq_(threading.current_thread().name, self.thread)
+        if self.foo < 5:
+            self.foo += 1
+            return self.foo
+        else:
+            raise StopIteration
+
 class TestUnserialized(Base):
    def setUp(self):
        self.foo = Foo()
@@ -84,3 +106,9 @@ class TestSerializer(Base):
        sp(sp(Foo("x"))).t()
        sp(sp(Foo)("x")).t()
        sp(sp(Foo))("x").t()
+
+    def test_iter(self):
+        sp = nilmdb.utils.serializer_proxy
+        i = sp(ListLike)()
+        eq_(list(i), [1,2,3,4,5])
+        eq_(i[3], 3)
Author	SHA1	Message	Date
Jim Paris	71cd7ed9b7	Add nilmdb-fsck tool to check database consistency	2013-08-03 14:23:14 -04:00
Jim Paris	a79d6104d5	Documentation fixups	2013-08-01 16:24:51 -04:00
Jim Paris	8e8ec59e30	Support "nilmtool cmd --version"	2013-08-01 15:14:34 -04:00
Jim Paris	b89b945a0f	Better responses to invalid HTTP times	2013-07-31 13:37:04 -04:00
Jim Paris	bd7bdb2eb8	Add --optimize option to nilmtool intervals	2013-07-30 15:31:51 -04:00
Jim Paris	840cd2fd13	Remove stray print	2013-07-30 15:21:09 -04:00
Jim Paris	bbd59c8b50	Add nilmdb.utils.interval.intersection by generalizing set_difference	2013-07-30 14:48:19 -04:00
Jim Paris	405c110fd7	Doc updates	2013-07-29 15:36:43 -04:00
Jim Paris	274adcd856	Documentation updates	2013-07-27 19:51:09 -04:00
Jim Paris	a1850c9c2c	Misc documentation	2013-07-25 16:08:35 -04:00
Jim Paris	6cd28b67b1	Support iterator protocol in Serializer	2013-07-24 14:52:26 -04:00
Jim Paris	d6d215d53d	Improve boolean HTTP parameter handling	2013-07-15 14:38:28 -04:00
Jim Paris	e02143ddb2	Remove duplicated test	2013-07-14 15:30:53 -04:00
Jim Paris	e275384d03	Fix WSGI docs again	2013-07-11 16:36:32 -04:00
Jim Paris	a6a67ec15c	Update WSGI docs	2013-07-10 14:16:25 -04:00
Jim Paris	fc43107307	Fill out test coverage	2013-07-09 19:06:26 -04:00
Jim Paris	90633413bb	Add nilmdb.utils.interval.human_string function	2013-07-09 19:01:53 -04:00
Jim Paris	c7c3aff0fb	Add nilmdb.utils.interval.optimize function	2013-07-09 17:50:21 -04:00
Jim Paris	e2347c954e	Split more CherrpyPy stuff into serverutil	2013-07-02 11:44:08 -04:00
Jim Paris	222a5c6c53	Move server decorators and other utilities to a separate file This will help with implementing nilmrun.	2013-07-02 11:32:19 -04:00
Jim Paris	1ca2c143e5	Fix typo	2013-06-29 12:39:00 -04:00
Jim Paris	b5df575c79	Fix tests	2013-05-09 22:27:10 -04:00
Jim Paris	2768a5ad15	Show FQDN rather than hostname.	2013-05-09 13:33:05 -04:00
Jim Paris	a105543c38	Show a more helpful message at the root nilmdb path	2013-05-09 13:30:10 -04:00
Jim Paris	309f38d0ed	Merge branch '32bit'	2013-05-08 17:20:31 -04:00
Jim Paris	9a27b6ef6a	Make rocket code suitable for 32-bit architectures	2013-05-08 16:35:32 -04:00
Jim Paris	99532cf9e0	Fix coverage	2013-05-07 23:00:44 -04:00
Jim Paris	dfdd0e5c74	Fix line parsing in http client	2013-05-07 22:56:00 -04:00
Jim Paris	9a2699adfc	Attempt at fixing up more Unicode issues with metadata.	2013-05-07 13:44:03 -04:00
Jim Paris	9bbb95b18b	Add unicode decode/encode helpers	2013-05-07 12:56:59 -04:00
Jim Paris	6bbed322c5	Fix unicode in completion	2013-05-07 12:49:12 -04:00
Jim Paris	2317894355	Tweak cache sizes to account for large numbers of decimated tables	2013-05-06 11:54:57 -04:00
Jim Paris	539c92226c	Add more disk space info	2013-05-06 11:36:28 -04:00