add .gitignore

WIP moving back to bxintersect, cleaning up bxintersect
2012-11-28 17:21:36 -05:00 · 2012-11-15 16:05:33 -05:00
52 changed files with 943 additions and 1750 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,2 @@
-db/
-tests/*testdb/
 .coverage
 *.pyc
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,2 @@
 sudo apt-get install python-nose python-coverage
-sudo apt-get install python-tables python-cherrypy3
-sudo apt-get install cython # 0.17.1-1 or newer
-
+sudo apt-get install python-tables cython python-cherrypy3
--- a/4
+++ b/4
@@ -1 +1,5 @@
 - Merge adjacent intervals on insert (maybe with client help?)
+
+- Better testing:
+  - see about getting coverage on layout.pyx
+  - layout.pyx performance tests, before and after generalization
--- a/design.md
+++ b/design.md
@@ -103,13 +103,13 @@ Speed

 - First approach was quadratic.  Adding four hours of data:

-    $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/1/raw
+    $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/1/raw 
 	real    24m31.093s
-	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/1/raw
+	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/1/raw 
 	real    43m44.528s
-	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-130002 /bpnilm/1/raw
+	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-130002 /bpnilm/1/raw 
 	real    93m29.713s
-	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-140003 /bpnilm/1/raw
+	$ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-140003 /bpnilm/1/raw 
 	real    166m53.007s

 - Disabling pytables indexing didn't help:
@@ -122,19 +122,19 @@ Speed
 - Server RAM usage is constant.

 - Speed problems were due to IntervalSet speed, of parsing intervals
-  from the database and adding the new one each time.
+  from the database and adding the new one each time. 

  - First optimization is to cache result of `nilmdb:_get_intervals`,
    which gives the best speedup.
-
+	
  - Also switched to internally using bxInterval from bx-python package.
    Speed of `tests/test_interval:TestIntervalSpeed` is pretty decent
 	and seems to be growing logarithmically now.  About 85μs per insertion
 	for inserting 131k entries.
-
+	
  - Storing the interval data in SQL might be better, with a scheme like:
    http://www.logarithmic.net/pfh/blog/01235197474
-
+  
 - Next slowdown target is nilmdb.layout.Parser.parse().
  - Rewrote parsers using cython and sscanf
  - Stats (rev 10831), with _add_interval disabled
@@ -142,14 +142,7 @@ Speed
 	 layout.pyx.parse:63               13913 sec, 5.1g calls
 	 numpy:records.py.fromrecords:569   7410 sec, 262k calls
  - Probably OK for now.
-
- After all updates, now takes about 8.5 minutes to insert an hour of
-  data, constant after adding 171 hours (4.9 billion data points)
-
- Data set size: 98 gigs = 20 bytes per data point.
-  6 uint16 data + 1 uint32 timestamp = 16 bytes per point
-  So compression must be off -- will retry with compression forced on.
-
+    
 IntervalSet speed
 -----------------
 - Initial implementation was pretty slow, even with binary search in
@@ -168,18 +161,6 @@ IntervalSet speed
 - Might be algorithmic improvements to be made in Interval.py,
  like in `__and__`

- Replaced again with rbtree.  Seems decent.  Numbers are time per
-  insert for 2**17 insertions, followed by total wall time and RAM
-  usage for running "make test" with `test_rbtree` and `test_interval`
-  with range(5,20):
-  - old values with bxinterval:
-    20.2 μS, total 20 s, 177 MB RAM
-  - rbtree, plain python:
-    97 μS, total 105 s, 846 MB RAM
-  - rbtree converted to cython:
-    26 μS, total 29 s, 320 MB RAM
-  - rbtree and interval converted to cython:
-    8.4 μS, total 12 s, 134 MB RAM

 Layouts
 -------
@@ -189,12 +170,12 @@ just collections and counts of a single type.  We'll still use strings
 to describe them, with format:

    type_count
-
+  
 where type is "uint16", "float32", or "float64", and count is an integer.

 nilmdb.layout.named() will parse these strings into the appropriate
 handlers.  For compatibility:
-
+  
    "RawData" == "uint16_6"
    "RawNotchedData" == "uint16_9"
    "PrepData" == "float32_8"
--- a/nilmdb/init.py
+++ b/nilmdb/init.py
@@ -3,10 +3,14 @@
 from .nilmdb import NilmDB
 from .server import Server
 from .client import Client
-
-import pyximport; pyximport.install()
-import layout
-import interval
+from .timer import Timer

 import cmdline

+import pyximport; pyximport.install()
+import layout
+
+import serializer
+import timestamper
+import interval
+import du
--- a/nilmdb/bulkdata.py
+++ b/nilmdb/bulkdata.py
@@ -1,310 +0,0 @@
-# Fixed record size bulk data storage
-
-from __future__ import absolute_import
-from __future__ import division
-import nilmdb
-from nilmdb.utils.printf import *
-
-import os
-import sys
-import cPickle as pickle
-import struct
-import fnmatch
-import mmap
-
-# Up to 256 open file descriptors at any given time
-table_cache_size = 16
-fd_cache_size = 16
-
-@nilmdb.utils.must_close()
-class BulkData(object):
-    def __init__(self, basepath):
-        self.basepath = basepath
-        self.root = os.path.join(self.basepath, "data")
-
-        # Make root path
-        if not os.path.isdir(self.root):
-            os.mkdir(self.root)
-
-    def close(self):
-        self.getnode.cache_remove_all()
-
-    def _encode_filename(self, path):
-        # Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
-        # because we want to be able to represent all code points and the user
-        # will never be directly exposed to filenames.  We can then do path
-        # manipulations on the UTF-8 directly.
-        if isinstance(path, unicode):
-            return path.encode('utf-8')
-        return path
-
-    def create(self, unicodepath, layout_name):
-        """
-        unicodepath: path to the data (e.g. u'/newton/prep').
-        Paths must contain at least two elements, e.g.:
-           /newton/prep
-           /newton/raw
-           /newton/upstairs/prep
-           /newton/upstairs/raw
-
-        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
-        """
-        path = self._encode_filename(unicodepath)
-
-        if path[0] != '/':
-            raise ValueError("paths must start with /")
-        [ group, node ] = path.rsplit("/", 1)
-        if group == '':
-            raise ValueError("invalid path")
-
-        # Get layout, and build format string for struct module
-        try:
-            layout = nilmdb.layout.get_named(layout_name)
-            struct_fmt = '<d'  # Little endian, double timestamp
-            struct_mapping = {
-                "int8": 'b',
-                "uint8": 'B',
-                "int16": 'h',
-                "uint16": 'H',
-                "int32": 'i',
-                "uint32": 'I',
-                "int64": 'q',
-                "uint64": 'Q',
-                "float32": 'f',
-                "float64": 'd',
-                }
-            for n in range(layout.count):
-                struct_fmt += struct_mapping[layout.datatype]
-        except KeyError:
-            raise ValueError("no such layout, or bad data types")
-
-        # Create the table.  Note that we make a distinction here
-        # between NilmDB paths (always Unix style, split apart
-        # manually) and OS paths (built up with os.path.join)
-        try:
-            # Make directories leading up to this one
-            elements = path.lstrip('/').split('/')
-            for i in range(len(elements)):
-                ospath = os.path.join(self.root, *elements[0:i])
-                if Table.exists(ospath):
-                    raise ValueError("path is subdir of existing node")
-                if not os.path.isdir(ospath):
-                    os.mkdir(ospath)
-
-            # Make the final dir
-            ospath = os.path.join(self.root, *elements)
-            if os.path.isdir(ospath):
-                raise ValueError("subdirs of this path already exist")
-            os.mkdir(ospath)
-
-            # Write format string to file
-            Table.create(ospath, struct_fmt)
-        except OSError as e:
-            raise ValueError("error creating table at that path: " + e.strerror)
-
-        # Open and cache it
-        self.getnode(unicodepath)
-
-        # Success
-        return
-
-    def destroy(self, unicodepath):
-        """Fully remove all data at a particular path.  No way to undo
-        it!  The group/path structure is removed, too."""
-        path = self._encode_filename(unicodepath)
-
-        # Get OS path
-        elements = path.lstrip('/').split('/')
-        ospath = os.path.join(self.root, *elements)
-
-        # Remove Table object from cache
-        self.getnode.cache_remove(self, ospath)
-
-        # Remove the contents of the target directory
-        if not os.path.isfile(os.path.join(ospath, "format")):
-            raise ValueError("nothing at that path")
-        for file in os.listdir(ospath):
-            os.remove(os.path.join(ospath, file))
-
-        # Remove empty parent directories
-        for i in reversed(range(len(elements))):
-            ospath = os.path.join(self.root, *elements[0:i+1])
-            try:
-                os.rmdir(ospath)
-            except OSError:
-                break
-
-    # Cache open tables
-    @nilmdb.utils.lru_cache(size = table_cache_size,
-                            onremove = lambda x: x.close())
-    def getnode(self, unicodepath):
-        """Return a Table object corresponding to the given database
-        path, which must exist."""
-        path = self._encode_filename(unicodepath)
-        elements = path.lstrip('/').split('/')
-        ospath = os.path.join(self.root, *elements)
-        return Table(ospath)
-
-@nilmdb.utils.must_close()
-class Table(object):
-    """Tools to help access a single table (data at a specific OS path)"""
-
-    # Class methods, to help keep format details in this class.
-    @classmethod
-    def exists(cls, root):
-        """Return True if a table appears to exist at this OS path"""
-        return os.path.isfile(os.path.join(root, "format"))
-
-    @classmethod
-    def create(cls, root, struct_fmt):
-        """Initialize a table at the given OS path.
-        'struct_fmt' is a Struct module format description"""
-        format = { "rows_per_file": 4 * 1024 * 1024,
-                   "struct_fmt": struct_fmt }
-        with open(os.path.join(root, "format"), "wb") as f:
-            pickle.dump(format, f, 2)
-
-    # Normal methods
-    def __init__(self, root):
-        """'root' is the full OS path to the directory of this table"""
-        self.root = root
-
-        # Load the format and build packer
-        with open(self._fullpath("format"), "rb") as f:
-            format = pickle.load(f)
-        self.rows_per_file = format["rows_per_file"]
-        self.packer = struct.Struct(format["struct_fmt"])
-        self.file_size = self.packer.size * self.rows_per_file
-
-        # Find nrows by locating the lexicographically last filename
-        # and using its size.
-        pattern = '[0-9a-f]' * 8
-        allfiles = fnmatch.filter(os.listdir(self.root), pattern)
-        if allfiles:
-            filename = max(allfiles)
-            offset = os.path.getsize(self._fullpath(filename))
-            self.nrows = self._row_from_fnoffset(filename, offset)
-        else:
-            self.nrows = 0
-
-    def close(self):
-        self.mmap_open.cache_remove_all()
-
-    # Internal helpers
-    def _fullpath(self, filename):
-        return os.path.join(self.root, filename)
-
-    def _fnoffset_from_row(self, row):
-        """Return a (filename, offset, count) tuple:
-
-        filename: the filename that contains the specified row
-          offset: byte offset of the specified row within the file
-           count: number of rows (starting at offste) that fit in the file
-        """
-        filenum = row // self.rows_per_file
-        filename = sprintf("%08x", filenum)
-        offset = (row % self.rows_per_file) * self.packer.size
-        count = self.rows_per_file - (row % self.rows_per_file)
-        return (filename, offset, count)
-
-    def _row_from_fnoffset(self, filename, offset):
-        """Return the row number that corresponds to the given
-        filename and byte-offset within that file."""
-        filenum = int(filename, 16)
-        if (offset % self.packer.size) != 0:
-            raise ValueError("file offset is not a multiple of data size")
-        row = (filenum * self.rows_per_file) + (offset // self.packer.size)
-        return row
-
-    # Cache open files
-    @nilmdb.utils.lru_cache(size = fd_cache_size,
-                            onremove = lambda x: x.close())
-    def mmap_open(self, file, newsize = None):
-        """Open and map a given filename (relative to self.root).
-        Will be automatically closed when evicted from the cache.
-
-        If 'newsize' is provided, the file is truncated to the given
-        size before the mapping is returned.  (Note that the LRU cache
-        on this function means the truncate will only happen if the
-        object isn't already cached; mmap.resize should be used too)"""
-        f = open(os.path.join(self.root, file), "a+", 0)
-        if newsize is not None:
-            # mmap can't map a zero-length file, so this allows the
-            # caller to set the filesize between file creation and
-            # mmap.
-            f.truncate(newsize)
-        mm = mmap.mmap(f.fileno(), 0)
-        return mm
-
-    def append(self, data):
-        """Append the data and flush it to disk.
-        data is a nested Python list [[row],[row],[...]]"""
-        remaining = len(data)
-        dataiter = iter(data)
-        while remaining:
-            # See how many rows we can fit into the current file, and open it
-            (filename, offset, count) = self._fnoffset_from_row(self.nrows)
-            if count > remaining:
-                count = remaining
-            newsize = offset + count * self.packer.size
-            mm = self.mmap_open(filename, newsize)
-            mm.seek(offset)
-
-            # Extend the file to the target length.  We specified
-            # newsize when opening, but that may have been ignored if
-            # the mmap_open returned a cached object.
-            mm.resize(newsize)
-
-            # Write the data
-            for i in xrange(count):
-                row = dataiter.next()
-                mm.write(self.packer.pack(*row))
-            remaining -= count
-            self.nrows += count
-
-    def __getitem__(self, key):
-        """Extract data and return it.  Supports simple indexing
-        (table[n]) and range slices (table[n:m]).  Returns a nested
-        Python list [[row],[row],[...]]"""
-
-        # Handle simple slices
-        if isinstance(key, slice):
-            # Fall back to brute force if the slice isn't simple
-            if ((key.step is not None and key.step != 1) or
-                key.start is None or
-                key.stop is None or
-                key.start >= key.stop or
-                key.start < 0 or
-                key.stop > self.nrows):
-                return [ self[x] for x in xrange(*key.indices(self.nrows)) ]
-
-            ret = []
-            row = key.start
-            remaining = key.stop - key.start
-            while remaining:
-                (filename, offset, count) = self._fnoffset_from_row(row)
-                if count > remaining:
-                    count = remaining
-                mm = self.mmap_open(filename)
-                for i in xrange(count):
-                    ret.append(list(self.packer.unpack_from(mm, offset)))
-                    offset += self.packer.size
-                remaining -= count
-                row += count
-            return ret
-
-        # Handle single points
-        if key < 0 or key >= self.nrows:
-            raise IndexError("Index out of range")
-        (filename, offset, count) = self._fnoffset_from_row(key)
-        mm = self.mmap_open(filename)
-        # unpack_from ignores the mmap object's current seek position
-        return self.packer.unpack_from(mm, offset)
-
-class TimestampOnlyTable(object):
-    """Helper that lets us pass a Tables object into bisect, by
-    returning only the timestamp when a particular row is requested."""
-    def __init__(self, table):
-        self.table = table
-    def __getitem__(self, index):
-        return self.table[index][0]
--- a/nilmdb/bxintersect.pyx
+++ b/nilmdb/bxintersect.pyx
@@ -0,0 +1,352 @@
+# cython: profile=False
+# This is based on bxintersect in bx-python 554:07aca5a9f6fc (BSD licensed);
+# modified to store interval ranges as doubles rather than 32-bit integers,
+# use fully closed intervals, support deletion, etc.
+#cython: cdivision=True
+
+import operator
+
+cdef extern from "stdlib.h":
+    int ceil(float f)
+    float log(float f)
+    int RAND_MAX
+    int rand()
+    int strlen(char *)
+    int iabs(int)
+
+cdef inline double dmax2(double a, double b):
+    if b > a: return b
+    return a
+
+cdef inline double dmax3(double a, double b, double c):
+    if b > a:
+        if c > b:
+            return c
+        return b
+    if a > c:
+        return a
+    return c
+
+cdef inline double dmin3(double a, double b, double c):
+    if b < a:
+        if c < b:
+            return c
+        return b
+    if a < c:
+        return a
+    return c
+
+cdef inline double dmin2(double a, double b):
+    if b < a: return b
+    return a
+
+cdef float nlog = -1.0 / log(0.5)
+
+cdef class IntervalNode:
+    """
+    A single node of an `IntervalTree`.
+
+    NOTE: Unless you really know what you are doing, you probably should us
+          `IntervalTree` rather than using this directly.
+    """
+    cdef float priority
+    cdef public object interval
+    cdef public double start, end
+    cdef double minend, maxend, minstart
+    cdef IntervalNode cleft, cright, croot
+
+    property left_node:
+        def __get__(self):
+            return self.cleft if self.cleft is not EmptyNode else None
+    property right_node:
+        def __get__(self):
+            return self.cright if self.cright is not EmptyNode else None
+    property root_node:
+        def __get__(self):
+            return self.croot if self.croot is not EmptyNode else None
+
+    def __repr__(self):
+        return "IntervalNode(%g, %g)" % (self.start, self.end)
+
+    def __cinit__(IntervalNode self, double start, double end, object interval):
+        # Python lacks the binomial distribution, so we convert a
+        # uniform into a binomial because it naturally scales with
+        # tree size.  Also, python's uniform is perfect since the
+        # upper limit is not inclusive, which gives us undefined here.
+        self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1)))
+        self.start    = start
+        self.end      = end
+        self.interval = interval
+        self.maxend   = end
+        self.minstart = start
+        self.minend   = end
+        self.cleft    = EmptyNode
+        self.cright   = EmptyNode
+        self.croot    = EmptyNode
+
+    cpdef IntervalNode insert(IntervalNode self, double start, double end, object interval):
+        """
+        Insert a new IntervalNode into the tree of which this node is
+        currently the root. The return value is the new root of the tree (which
+        may or may not be this node!)
+        """
+        cdef IntervalNode croot = self
+        # If starts are the same, decide which to add interval to based on
+        # end, thus maintaining sortedness relative to start/end
+        cdef double decision_endpoint = start
+        if start == self.start:
+            decision_endpoint = end
+
+        if decision_endpoint > self.start:
+            # insert to cright tree
+            if self.cright is not EmptyNode:
+                self.cright = self.cright.insert( start, end, interval )
+            else:
+                self.cright = IntervalNode( start, end, interval )
+            # rebalance tree
+            if self.priority < self.cright.priority:
+                croot = self.rotate_left()
+        else:
+            # insert to cleft tree
+            if self.cleft is not EmptyNode:
+                self.cleft = self.cleft.insert( start, end, interval)
+            else:
+                self.cleft = IntervalNode( start, end, interval)
+            # rebalance tree
+            if self.priority < self.cleft.priority:
+                croot = self.rotate_right()
+
+        croot.set_ends()
+        self.cleft.croot  = croot
+        self.cright.croot = croot
+        return croot
+
+    cdef IntervalNode rotate_right(IntervalNode self):
+        cdef IntervalNode croot = self.cleft
+        self.cleft  = self.cleft.cright
+        croot.cright = self
+        self.set_ends()
+        return croot
+
+    cdef IntervalNode rotate_left(IntervalNode self):
+        cdef IntervalNode croot = self.cright
+        self.cright = self.cright.cleft
+        croot.cleft  = self
+        self.set_ends()
+        return croot
+
+    cdef inline void set_ends(IntervalNode self):
+        if self.cright is not EmptyNode and self.cleft is not EmptyNode:
+            self.maxend = dmax3(self.end, self.cright.maxend, self.cleft.maxend)
+            self.minend = dmin3(self.end, self.cright.minend, self.cleft.minend)
+            self.minstart = dmin3(self.start, self.cright.minstart, self.cleft.minstart)
+        elif self.cright is not EmptyNode:
+            self.maxend = dmax2(self.end, self.cright.maxend)
+            self.minend = dmin2(self.end, self.cright.minend)
+            self.minstart = dmin2(self.start, self.cright.minstart)
+        elif self.cleft is not EmptyNode:
+            self.maxend = dmax2(self.end, self.cleft.maxend)
+            self.minend = dmin2(self.end, self.cleft.minend)
+            self.minstart = dmin2(self.start, self.cleft.minstart)
+
+
+    def intersect( self, double start, double end, sort=True ):
+        """
+        given a start and a end, return a list of features
+        falling within that range
+        """
+        cdef list results = []
+        self._intersect( start, end, results )
+        if sort:
+            results = sorted(results)
+        return results
+
+    find = intersect
+
+    cdef void _intersect( IntervalNode self, double start, double end, list results):
+        # Left subtree
+        if self.cleft is not EmptyNode and self.cleft.maxend > start:
+            self.cleft._intersect( start, end, results )
+        # This interval
+        if ( self.end > start ) and ( self.start < end ):
+            results.append( self.interval )
+        # Right subtree
+        if self.cright is not EmptyNode and self.start < end:
+            self.cright._intersect( start, end, results )
+
+
+    def traverse(self):
+        if self.cleft is not EmptyNode:
+            for node in self.cleft.traverse():
+                yield node
+        yield self.interval
+        if self.cright is not EmptyNode:
+            for node in self.cright.traverse():
+                yield node
+
+cdef IntervalNode EmptyNode = IntervalNode( 0, 0, Interval(0, 0))
+
+## ---- Wrappers that retain the old interface -------------------------------
+
+cdef class Interval:
+    """
+    Basic feature, with required integer start and end properties.
+    Also accepts optional strand as +1 or -1 (used for up/downstream queries),
+    a name, and any arbitrary data is sent in on the info keyword argument
+
+    >>> from bx.intervals.intersection import Interval
+
+    >>> f1 = Interval(23, 36)
+    >>> f2 = Interval(34, 48, value={'chr':12, 'anno':'transposon'})
+    >>> f2
+    Interval(34, 48, value={'anno': 'transposon', 'chr': 12})
+
+    """
+    cdef public double start, end
+    cdef public object value, chrom, strand
+
+    def __init__(self, double start, double end, object value=None, object chrom=None, object strand=None ):
+        assert start <= end, "start must be less than end"
+        self.start  = start
+        self.end   = end
+        self.value = value
+        self.chrom = chrom
+        self.strand = strand
+
+    def __repr__(self):
+        fstr = "Interval(%g, %g" % (self.start, self.end)
+        if not self.value is None:
+            fstr += ", value=" + str(self.value)
+        fstr += ")"
+        return fstr
+
+    def __richcmp__(self, other, op):
+        if op == 0:
+            # <
+            return self.start < other.start or self.end < other.end
+        elif op == 1:
+            # <=
+            return self == other or self < other
+        elif op == 2:
+            # ==
+            return self.start == other.start and self.end == other.end
+        elif op == 3:
+            # !=
+            return self.start != other.start or self.end != other.end
+        elif op == 4:
+            # >
+            return self.start > other.start or self.end > other.end
+        elif op == 5:
+            # >=
+            return self == other or self > other
+
+cdef class IntervalTree:
+    """
+    Data structure for performing window intersect queries on a set of
+    of possibly overlapping 1d intervals.
+
+    Usage
+    =====
+
+    Create an empty IntervalTree
+
+    >>> from bx.intervals.intersection import Interval, IntervalTree
+    >>> intersecter = IntervalTree()
+
+    An interval is a start and end position and a value (possibly None).
+    You can add any object as an interval:
+
+    >>> intersecter.insert( 0, 10, "food" )
+    >>> intersecter.insert( 3, 7, dict(foo='bar') )
+
+    >>> intersecter.find( 2, 5 )
+    ['food', {'foo': 'bar'}]
+
+    If the object has start and end attributes (like the Interval class) there
+    is are some shortcuts:
+
+    >>> intersecter = IntervalTree()
+    >>> intersecter.insert_interval( Interval( 0, 10 ) )
+    >>> intersecter.insert_interval( Interval( 3, 7 ) )
+    >>> intersecter.insert_interval( Interval( 3, 40 ) )
+    >>> intersecter.insert_interval( Interval( 13, 50 ) )
+
+    >>> intersecter.find( 30, 50 )
+    [Interval(3, 40), Interval(13, 50)]
+    >>> intersecter.find( 100, 200 )
+    []
+
+    Before/after for intervals
+
+    >>> intersecter.before_interval( Interval( 10, 20 ) )
+    [Interval(3, 7)]
+    >>> intersecter.before_interval( Interval( 5, 20 ) )
+    []
+
+    Upstream/downstream
+
+    >>> intersecter.upstream_of_interval(Interval(11, 12))
+    [Interval(0, 10)]
+    >>> intersecter.upstream_of_interval(Interval(11, 12, strand="-"))
+    [Interval(13, 50)]
+
+    >>> intersecter.upstream_of_interval(Interval(1, 2, strand="-"), num_intervals=3)
+    [Interval(3, 7), Interval(3, 40), Interval(13, 50)]
+
+
+    """
+
+    cdef IntervalNode root
+
+    def __cinit__( self ):
+        root = None
+
+    # ---- Position based interfaces -----------------------------------------
+
+    ## KEEP
+    def insert( self, double start, double end, object value=None ):
+        """
+        Insert the interval [start,end) associated with value `value`.
+        """
+        if self.root is None:
+            self.root = IntervalNode( start, end, value )
+        else:
+            self.root = self.root.insert( start, end, value )
+
+    def delete( self, double start, double end, object value=None ):
+        """
+        Delete the interval [start,end) associated with value `value`.
+        """
+        if self.root is None:
+            self.root = IntervalNode( start, end, value )
+        else:
+            self.root = self.root.insert( start, end, value )
+
+    def find( self, start, end ):
+        """
+        Return a sorted list of all intervals overlapping [start,end).
+        """
+        if self.root is None:
+            return []
+        return self.root.find( start, end )
+
+    # ---- Interval-like object based interfaces -----------------------------
+
+    ## KEEP
+    def insert_interval( self, interval ):
+        """
+        Insert an "interval" like object (one with at least start and end
+        attributes)
+        """
+        self.insert( interval.start, interval.end, interval )
+
+    def traverse(self):
+        """
+        iterator that traverses the tree
+        """
+        if self.root is None:
+            return iter([])
+        return self.root.traverse()
+
+# For backward compatibility
+Intersecter = IntervalTree
--- a/nilmdb/client.py
+++ b/nilmdb/client.py
@@ -1,16 +1,13 @@
-# -*- coding: utf-8 -*-
-
 """Class for performing HTTP client requests via libcurl"""

 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import time
 import sys
 import re
 import os
 import simplejson as json
-import itertools

 import nilmdb.httpclient

@@ -19,10 +16,6 @@ from nilmdb.httpclient import ClientError, ServerError, Error

 version = "1.0"

-def float_to_string(f):
-    # Use repr to maintain full precision in the string output.
-    return repr(float(f))
-
 class Client(object):
    """Main client interface to the Nilm database."""

@@ -91,82 +84,33 @@ class Client(object):
                   "layout" : layout }
        return self.http.get("stream/create", params)

-    def stream_destroy(self, path):
-        """Delete stream and its contents"""
-        params = { "path": path }
-        return self.http.get("stream/destroy", params)
-
-    def stream_insert(self, path, data, start = None, end = None):
+    def stream_insert(self, path, data):
        """Insert data into a stream.  data should be a file-like object
-        that provides ASCII data that matches the database layout for path.
-
-        start and end are the starting and ending timestamp of this
-        stream; all timestamps t in the data must satisfy 'start <= t
-        < end'.  If left unspecified, 'start' is the timestamp of the
-        first line of data, and 'end' is the timestamp on the last line
-        of data, plus a small delta of 1μs.
-        """
+        that provides ASCII data that matches the database layout for path."""
        params = { "path": path }

        # See design.md for a discussion of how much data to send.
        # These are soft limits -- actual data might be rounded up.
        max_data = 1048576
        max_time = 30
-        end_epsilon = 1e-6
-
-        def pairwise(iterable):
-            "s -> (s0,s1), (s1,s2), ..., (sn,None)"
-            a, b = itertools.tee(iterable)
-            next(b, None)
-            return itertools.izip_longest(a, b)
-
-        def extract_timestamp(line):
-            return float(line.split()[0])

        def sendit():
-            # If we have more data after this, use the timestamp of
-            # the next line as the end.  Otherwise, use the given
-            # overall end time, or add end_epsilon to the last data
-            # point.
-            if nextline:
-                block_end = extract_timestamp(nextline)
-                if end and block_end > end:
-                    # This is unexpected, but we'll defer to the server
-                    # to return an error in this case.
-                    block_end = end
-            elif end:
-                block_end = end
-            else:
-                block_end = extract_timestamp(line) + end_epsilon
+            result = self.http.put("stream/insert", send_data, params)
+            params["old_timestamp"] = result[1]
+            return result

-            # Send it
-            params["start"] = float_to_string(block_start)
-            params["end"] = float_to_string(block_end)
-            return self.http.put("stream/insert", block_data, params)
-
-        clock_start = time.time()
-        block_data = ""
-        block_start = start
        result = None
-        for (line, nextline) in pairwise(data):
-            # If we don't have a starting time, extract it from the first line
-            if block_start is None:
-                block_start = extract_timestamp(line)
+        start = time.time()
+        send_data = ""
+        for line in data:
+            elapsed = time.time() - start
+            send_data += line

-            clock_elapsed = time.time() - clock_start
-            block_data += line
-
-            # If we have enough data, or enough time has elapsed,
-            # send this block to the server, and empty things out
-            # for the next block.
-            if (len(block_data) > max_data) or (clock_elapsed > max_time):
+            if (len(send_data) > max_data) or (elapsed > max_time):
                result = sendit()
-                block_start = None
-                block_data = ""
-                clock_start = time.time()
-
-        # One last block?
-        if len(block_data):
+                send_data = ""
+                start = time.time()
+        if len(send_data):
            result = sendit()

        # Return the most recent JSON result we got back, or None if
@@ -181,9 +125,9 @@ class Client(object):
            "path": path
        }
        if start is not None:
-            params["start"] = float_to_string(start)
+            params["start"] = repr(start)  # use repr to keep precision
        if end is not None:
-            params["end"] = float_to_string(end)
+            params["end"] = repr(end)
        return self.http.get_gen("stream/intervals", params, retjson = True)

    def stream_extract(self, path, start = None, end = None, count = False):
@@ -199,9 +143,9 @@ class Client(object):
            "path": path,
        }
        if start is not None:
-            params["start"] = float_to_string(start)
+            params["start"] = repr(start)  # use repr to keep precision
        if end is not None:
-            params["end"] = float_to_string(end)
+            params["end"] = repr(end)
        if count:
            params["count"] = 1

--- a/nilmdb/cmdline/cmdline.py
+++ b/nilmdb/cmdline/cmdline.py
@@ -1,7 +1,7 @@
 """Command line client functionality"""

 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client

 import datetime_tz
@@ -15,8 +15,7 @@ version = "0.1"

 # Valid subcommands.  Defined in separate files just to break
 # things up -- they're still called with Cmdline as self.
-subcommands = [ "info", "create", "list", "metadata", "insert", "extract",
-                "destroy" ]
+subcommands = [ "info", "create", "list", "metadata", "insert", "extract" ]

 # Import the subcommand modules.  Equivalent way of doing this would be
 # from . import info as cmd_info
--- a/nilmdb/cmdline/create.py
+++ b/nilmdb/cmdline/create.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client

 from argparse import ArgumentDefaultsHelpFormatter as def_form
--- a/nilmdb/cmdline/destroy.py
+++ b/nilmdb/cmdline/destroy.py
@@ -1,25 +0,0 @@
-from __future__ import absolute_import
-from nilmdb.utils.printf import *
-import nilmdb.client
-
-from argparse import ArgumentDefaultsHelpFormatter as def_form
-
-def setup(self, sub):
-    cmd = sub.add_parser("destroy", help="Delete a stream and all data",
-                         formatter_class = def_form,
-                         description="""
-                         Destroy the stream at the specified path.  All
-                         data and metadata related to the stream is
-                         permanently deleted.
-                         """)
-    cmd.set_defaults(handler = cmd_destroy)
-    group = cmd.add_argument_group("Required arguments")
-    group.add_argument("path",
-                       help="Path of the stream to delete, e.g. /foo/bar")
-
-def cmd_destroy(self):
-    """Destroy stream"""
-    try:
-        self.client.stream_destroy(self.args.path)
-    except nilmdb.client.ClientError as e:
-        self.die("Error deleting stream: %s", str(e))
--- a/nilmdb/cmdline/extract.py
+++ b/nilmdb/cmdline/extract.py
@@ -1,7 +1,7 @@
 from __future__ import absolute_import
-from __future__ import print_function
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client
+import nilmdb.layout
 import sys

 def setup(self, sub):
@@ -51,7 +51,7 @@ def cmd_extract(self):
            # Strip timestamp (first element).  Doesn't make sense
            # if we are only returning a count.
            dataline = ' '.join(dataline.split(' ')[1:])
-        print(dataline)
+        print dataline
        printed = True
    if not printed:
        if self.args.annotate:
--- a/nilmdb/cmdline/info.py
+++ b/nilmdb/cmdline/info.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 from argparse import ArgumentDefaultsHelpFormatter as def_form

--- a/nilmdb/cmdline/insert.py
+++ b/nilmdb/cmdline/insert.py
@@ -1,6 +1,7 @@
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client
+import nilmdb.layout
 import nilmdb.timestamper

 import sys
--- a/nilmdb/cmdline/list.py
+++ b/nilmdb/cmdline/list.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client

 import fnmatch
--- a/nilmdb/cmdline/metadata.py
+++ b/nilmdb/cmdline/metadata.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.client

 def setup(self, sub):
--- a/nilmdb/utils/diskusage.py
+++ b/nilmdb/utils/diskusage.py
--- a/nilmdb/httpclient.py
+++ b/nilmdb/httpclient.py
@@ -1,8 +1,7 @@
 """HTTP client library"""

 from __future__ import absolute_import
-from nilmdb.utils.printf import *
-import nilmdb.utils
+from nilmdb.printf import *

 import time
 import sys
@@ -10,9 +9,12 @@ import re
 import os
 import simplejson as json
 import urlparse
+import urllib
 import pycurl
 import cStringIO

+import nilmdb.iteratorizer
+
 class Error(Exception):
    """Base exception for both ClientError and ServerError responses"""
    def __init__(self,
@@ -58,8 +60,7 @@ class HTTPClient(object):
    def _setup_url(self, url = "", params = ""):
        url = urlparse.urljoin(self.baseurl, url)
        if params:
-            url = urlparse.urljoin(
-                url, "?" + nilmdb.utils.urllib.urlencode(params, True))
+            url = urlparse.urljoin(url, "?" + urllib.urlencode(params, True))
        self.curl.setopt(pycurl.URL, url)
        self.url = url

@@ -84,10 +85,6 @@ class HTTPClient(object):
            raise ClientError(**args)
        else: # pragma: no cover
            if code >= 500 and code <= 599:
-                if args["message"] is None:
-                    args["message"] = ("(no message; try disabling " +
-                                       "response.stream option in " +
-                                       "nilmdb.server for better debugging)")
                raise ServerError(**args)
            else:
                raise Error(**args)
@@ -112,7 +109,7 @@ class HTTPClient(object):
            self.curl.setopt(pycurl.WRITEFUNCTION, callback)
            self.curl.perform()
        try:
-            for i in nilmdb.utils.Iteratorizer(func):
+            for i in nilmdb.iteratorizer.Iteratorizer(func):
                if self._status == 200:
                    # If we had a 200 response, yield the data to the caller.
                    yield i
--- a/nilmdb/interval.pyx
+++ b/nilmdb/interval.pyx
@@ -1,9 +1,8 @@
-"""Interval, IntervalSet
+"""Interval and IntervalSet

 Represents an interval of time, and a set of such intervals.

-Intervals are half-open, ie. they include data points with timestamps
-[start, end)
+Intervals are closed, ie. they include timestamps [start, end]
 """

 # First implementation kept a sorted list of intervals and used
@@ -19,20 +18,18 @@ Intervals are half-open, ie. they include data points with timestamps
 # Fourth version is an optimized rb-tree that stores interval starts
 # and ends directly in the tree, like bxinterval did.

-cimport rbtree
-cdef extern from "stdint.h":
-    ctypedef unsigned long long uint64_t
+# Fifth version is back to modified bxintersect...
+
+import bxintersect

 class IntervalError(Exception):
    """Error due to interval overlap, etc"""
    pass

-cdef class Interval:
+class Interval(object):
    """Represents an interval of time."""

-    cdef public double start, end
-
-    def __init__(self, double start, double end):
+    def __init__(self, start, end):
        """
        'start' and 'end' are arbitrary floats that represent time
        """
@@ -46,9 +43,9 @@ cdef class Interval:
        return self.__class__.__name__ + "(" + s + ")"

    def __str__(self):
-        return "[" + repr(self.start) + " -> " + repr(self.end) + ")"
+        return "[" + str(self.start) + " -> " + str(self.end) + "]"

-    def __cmp__(self, Interval other):
+    def __cmp__(self, other):
        """Compare two intervals.  If non-equal, order by start then end"""
        if not isinstance(other, Interval):
            raise TypeError("bad type")
@@ -62,20 +59,20 @@ cdef class Interval:
            return -1
        return 1

-    cpdef intersects(self, Interval other):
+    def intersects(self, other):
        """Return True if two Interval objects intersect"""
        if (self.end <= other.start or self.start >= other.end):
            return False
        return True

-    cpdef subset(self, double start, double end):
+    def subset(self, start, end):
        """Return a new Interval that is a subset of this one"""
        # A subclass that tracks additional data might override this.
        if start < self.start or end > self.end:
            raise IntervalError("not a subset")
        return Interval(start, end)

-cdef class DBInterval(Interval):
+class DBInterval(Interval):
    """
    Like Interval, but also tracks corresponding start/end times and
    positions within the database.  These are not currently modified
@@ -90,9 +87,6 @@ cdef class DBInterval(Interval):
        db_end = 200, db_endpos = 20000
    """

-    cpdef public double db_start, db_end
-    cpdef public uint64_t db_startpos, db_endpos
-
    def __init__(self, start, end,
                 db_start, db_end,
                 db_startpos, db_endpos):
@@ -117,7 +111,7 @@ cdef class DBInterval(Interval):
        s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
        return self.__class__.__name__ + "(" + s + ")"

-    cpdef subset(self, double start, double end):
+    def subset(self, start, end):
        """
        Return a new DBInterval that is a subset of this one
        """
@@ -127,18 +121,16 @@ cdef class DBInterval(Interval):
                          self.db_start, self.db_end,
                          self.db_startpos, self.db_endpos)

-cdef class IntervalSet:
+class IntervalSet(object):
    """
    A non-intersecting set of intervals.
    """

-    cdef public rbtree.RBTree tree
-
    def __init__(self, source=None):
        """
        'source' is an Interval or IntervalSet to add.
        """
-        self.tree = rbtree.RBTree()
+        self.tree = bxinterval.IntervalTree()
        if source is not None:
            self += source

@@ -158,7 +150,7 @@ cdef class IntervalSet:
        descs = [ str(x) for x in self ]
        return  "[" + ", ".join(descs) + "]"

-    def __match__(self, other):
+    def __eq__(self, other):
        # This isn't particularly efficient, but it shouldn't get used in the
        # general case.
        """Test equality of two IntervalSets.
@@ -177,8 +169,8 @@ cdef class IntervalSet:
            else:
                return False

-        this = list(self)
-        that = list(other)
+        this = [ x for x in self ]
+        that = [ x for x in other ]

        try:
            while True:
@@ -209,20 +201,10 @@ cdef class IntervalSet:
        except IndexError:
            return False

-    # Use __richcmp__ instead of __eq__, __ne__ for Cython.
-    def __richcmp__(self, other, int op):
-        if op == 2: # ==
-            return self.__match__(other)
-        elif op == 3: # !=
-            return not self.__match__(other)
-        return False
-    #def __eq__(self, other):
-    #    return self.__match__(other)
-    #
-    #def __ne__(self, other):
-    #    return not self.__match__(other)
+    def __ne__(self, other):
+        return not self.__eq__(other)

-    def __iadd__(self, object other not None):
+    def __iadd__(self, other):
        """Inplace add -- modifies self

        This throws an exception if the regions being added intersect."""
@@ -230,19 +212,13 @@ cdef class IntervalSet:
            if self.intersects(other):
                raise IntervalError("Tried to add overlapping interval "
                                    "to this set")
-            self.tree.insert(rbtree.RBNode(other.start, other.end, other))
+            self.tree.insert_interval(other)
        else:
            for x in other:
                self.__iadd__(x)
        return self

-    def iadd_nocheck(self, Interval other not None):
-        """Inplace add -- modifies self.
-        'Optimized' version that doesn't check for intersection and
-        only inserts the new interval into the tree."""
-        self.tree.insert(rbtree.RBNode(other.start, other.end, other))
-
-    def __isub__(self, Interval other not None):
+    def __isub__(self, other):
        """Inplace subtract -- modifies self

        Removes an interval from the set.  Must exist exactly
@@ -253,13 +229,13 @@ cdef class IntervalSet:
        self.tree.delete(i)
        return self

-    def __add__(self, other not None):
+    def __add__(self, other):
        """Add -- returns a new object"""
        new = IntervalSet(self)
        new += IntervalSet(other)
        return new

-    def __and__(self, other not None):
+    def __and__(self, other):
        """
        Compute a new IntervalSet from the intersection of two others

@@ -270,15 +246,15 @@ cdef class IntervalSet:

        if not isinstance(other, IntervalSet):
            for i in self.intersection(other):
-                out.tree.insert(rbtree.RBNode(i.start, i.end, i))
+                out.tree.insert(rbtree.RBNode(i))
        else:
            for x in other:
                for i in self.intersection(x):
-                    out.tree.insert(rbtree.RBNode(i.start, i.end, i))
+                    out.tree.insert(rbtree.RBNode(i))

        return out

-    def intersection(self, Interval interval not None):
+    def intersection(self, interval):
        """
        Compute a sequence of intervals that correspond to the
        intersection between `self` and the provided interval.
@@ -295,24 +271,23 @@ cdef class IntervalSet:
            if i:
                if i.start >= interval.start and i.end <= interval.end:
                    yield i
+                elif i.start > interval.end:
+                    break
                else:
                    subset = i.subset(max(i.start, interval.start),
                                      min(i.end, interval.end))
                    yield subset

-    cpdef intersects(self, Interval other):
+    def intersects(self, other):
+        ### PROBABLY WRONG
        """Return True if this IntervalSet intersects another interval"""
-        for n in self.tree.intersect(other.start, other.end):
-            if n.obj.intersects(other):
-                return True
+        node = self.tree.find_left(other.start, other.end)
+        if node is None:
+            return False
+        for n in self.tree.inorder(node):
+            if n.obj:
+                if n.obj.intersects(other):
+                    return True
+                if n.obj > other:
+                    break
        return False
-
-    def find_end(self, double t):
-        """
-        Return an Interval from this tree that ends at time t, or
-        None if it doesn't exist.
-        """
-        n = self.tree.find_left_end(t)
-        if n and n.obj.end == t:
-            return n.obj
-        return None
--- a/nilmdb/interval.pyxdep
+++ b/nilmdb/interval.pyxdep
@@ -1 +0,0 @@
-rbtree.pxd
--- a/nilmdb/utils/iteratorizer.py
+++ b/nilmdb/utils/iteratorizer.py
--- a/nilmdb/layout.pyx
+++ b/nilmdb/layout.pyx
@@ -1,5 +1,6 @@
 # cython: profile=False

+import tables
 import time
 import sys
 import inspect
@@ -121,6 +122,15 @@ class Layout:
            s += " %d" % d[i+1]
        return s + "\n"

+    # PyTables description
+    def description(self):
+        """Return the PyTables description of this layout"""
+        desc = {}
+        desc['timestamp'] = tables.Col.from_type('float64', pos=0)
+        for n in range(self.count):
+            desc['c' + str(n+1)] = tables.Col.from_type(self.datatype, pos=n+1)
+        return tables.Description(desc)
+
 # Get a layout by name
 def get_named(typestring):
    try:
--- a/nilmdb/nilmdb.py
+++ b/nilmdb/nilmdb.py
@@ -4,16 +4,17 @@

 Object that represents a NILM database file.

-Manages both the SQL database and the table storage backend.
+Manages both the SQL database and the PyTables storage backend.
 """

 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import sqlite3
+import tables
 import time
 import sys
 import os
@@ -24,8 +25,6 @@ import pyximport
 pyximport.install()
 from nilmdb.interval import Interval, DBInterval, IntervalSet, IntervalError

-from . import bulkdata
-
 # Note about performance and transactions:
 #
 # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
@@ -88,13 +87,19 @@ class StreamError(NilmDBError):
 class OverlapError(NilmDBError):
    pass

-@nilmdb.utils.must_close()
+# Helper that lets us pass a Pytables table into bisect
+class BisectableTable(object):
+    def __init__(self, table):
+        self.table = table
+    def __getitem__(self, index):
+        return self.table[index][0]
+
 class NilmDB(object):
    verbose = 0

    def __init__(self, basepath, sync=True, max_results=None):
        # set up path
-        self.basepath = os.path.abspath(basepath)
+        self.basepath = os.path.abspath(basepath.rstrip('/'))

        # Create the database path if it doesn't exist
        try:
@@ -103,16 +108,16 @@ class NilmDB(object):
            if e.errno != errno.EEXIST:
                raise IOError("can't create tree " + self.basepath)

-        # Our data goes inside it
-        self.data = bulkdata.BulkData(self.basepath)
+        # Our HD5 file goes inside it
+        h5filename = os.path.abspath(self.basepath + "/data.h5")
+        self.h5file = tables.openFile(h5filename, "a", "NILM Database")

        # SQLite database too
-        sqlfilename = os.path.join(self.basepath, "data.sql")
+        sqlfilename = os.path.abspath(self.basepath + "/data.sql")
        # We use check_same_thread = False, assuming that the rest
        # of the code (e.g. Server) will be smart and not access this
-        # database from multiple threads simultaneously.  Otherwise
-        # false positives will occur when the database is only opened
-        # in one thread, and only accessed in another.
+        # database from multiple threads simultaneously.  That requirement
+        # may be relaxed later.
        self.con = sqlite3.connect(sqlfilename, check_same_thread = False)
        self._sql_schema_update()

@@ -129,6 +134,17 @@ class NilmDB(object):
        else:
            self.max_results = 16384

+        self.opened = True
+
+        # Cached intervals
+        self._cached_iset = {}
+
+    def __del__(self):
+        if "opened" in self.__dict__: # pragma: no cover
+            fprintf(sys.stderr,
+                    "error: NilmDB.close() wasn't called, path %s",
+                    self.basepath)
+
    def get_basepath(self):
        return self.basepath

@@ -136,7 +152,8 @@ class NilmDB(object):
        if self.con:
            self.con.commit()
            self.con.close()
-        self.data.close()
+        self.h5file.close()
+        del self.opened

    def _sql_schema_update(self):
        cur = self.con.cursor()
@@ -153,78 +170,58 @@ class NilmDB(object):
            with self.con:
                cur.execute("PRAGMA user_version = {v:d}".format(v=version))

-    @nilmdb.utils.lru_cache(size = 16)
    def _get_intervals(self, stream_id):
        """
        Return a mutable IntervalSet corresponding to the given stream ID.
        """
-        iset = IntervalSet()
-        result = self.con.execute("SELECT start_time, end_time, "
-                                  "start_pos, end_pos "
-                                  "FROM ranges "
-                                  "WHERE stream_id=?", (stream_id,))
-        try:
-            for (start_time, end_time, start_pos, end_pos) in result:
-                iset += DBInterval(start_time, end_time,
-                                   start_time, end_time,
-                                   start_pos, end_pos)
-        except IntervalError as e: # pragma: no cover
-            raise NilmDBError("unexpected overlap in ranges table!")
+        # Load from database if not cached
+        if stream_id not in self._cached_iset:
+            iset = IntervalSet()
+            result = self.con.execute("SELECT start_time, end_time, "
+                                      "start_pos, end_pos "
+                                      "FROM ranges "
+                                      "WHERE stream_id=?", (stream_id,))
+            try:
+                for (start_time, end_time, start_pos, end_pos) in result:
+                    iset += DBInterval(start_time, end_time,
+                                       start_time, end_time,
+                                       start_pos, end_pos)
+            except IntervalError as e: # pragma: no cover
+                raise NilmDBError("unexpected overlap in ranges table!")
+            self._cached_iset[stream_id] = iset
+        # Return cached value
+        return self._cached_iset[stream_id]

-        return iset
+    # TODO: Split add_interval into two pieces, one to add
+    # and one to flush to disk?
+    # Need to think about this.  Basic problem is that we can't
+    # mess with intervals once they're in the IntervalSet,
+    # without mucking with bxinterval internals.
+
+    # Maybe add a separate optimization step?
+    # Join intervals that have a fairly small gap between them

    def _add_interval(self, stream_id, interval, start_pos, end_pos):
        """
        Add interval to the internal interval cache, and to the database.
        Note: arguments must be ints (not numpy.int64, etc)
        """
-        # Load this stream's intervals
+        # Ensure this stream's intervals are cached, and add the new
+        # interval to that cache.
        iset = self._get_intervals(stream_id)
-
-        # Check for overlap
-        if iset.intersects(interval): # pragma: no cover (gets caught earlier)
+        try:
+            iset += DBInterval(interval.start, interval.end,
+                               interval.start, interval.end,
+                               start_pos, end_pos)
+        except IntervalError as e: # pragma: no cover
            raise NilmDBError("new interval overlaps existing data")

-        # Check for adjacency.  If there's a stream in the database
-        # that ends exactly when this one starts, and the database
-        # rows match up, we can make one interval that covers the
-        # time range [adjacent.start -> interval.end)
-        # and database rows [ adjacent.start_pos -> end_pos ].
-        # Only do this if the resulting interval isn't too large.
-        max_merged_rows = 30000000 # a bit more than 1 hour at 8 KHz
-        adjacent = iset.find_end(interval.start)
-        if (adjacent is not None and
-            start_pos == adjacent.db_endpos and
-            (end_pos - adjacent.db_startpos) < max_merged_rows):
-            # First delete the old one, both from our iset and the
-            # database
-            iset -= adjacent
-            self.con.execute("DELETE FROM ranges WHERE "
-                             "stream_id=? AND start_time=? AND "
-                             "end_time=? AND start_pos=? AND "
-                             "end_pos=?", (stream_id,
-                                           adjacent.db_start,
-                                           adjacent.db_end,
-                                           adjacent.db_startpos,
-                                           adjacent.db_endpos))
-
-            # Now update our interval so the fallthrough add is
-            # correct.
-            interval.start = adjacent.start
-            start_pos = adjacent.db_startpos
-
-        # Add the new interval to the iset
-        iset.iadd_nocheck(DBInterval(interval.start, interval.end,
-                                     interval.start, interval.end,
-                                     start_pos, end_pos))
-
        # Insert into the database
        self.con.execute("INSERT INTO ranges "
                         "(stream_id,start_time,end_time,start_pos,end_pos) "
                         "VALUES (?,?,?,?,?)",
                         (stream_id, interval.start, interval.end,
                          int(start_pos), int(end_pos)))
-
        self.con.commit()

    def stream_list(self, path = None, layout = None):
@@ -288,11 +285,38 @@ class NilmDB(object):

        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
        """
-        # Create the bulk storage.  Raises ValueError on error, which we
-        # pass along.
-        self.data.create(path, layout_name)
+        if path[0] != '/':
+            raise ValueError("paths must start with /")
+        [ group, node ] = path.rsplit("/", 1)
+        if group == '':
+            raise ValueError("invalid path")

-        # Insert into SQL database once the bulk storage is happy
+        # Make the group structure, one element at a time
+        group_path = group.lstrip('/').split("/")
+        for i in range(len(group_path)):
+            parent = "/" + "/".join(group_path[0:i])
+            child = group_path[i]
+            try:
+                self.h5file.createGroup(parent, child)
+            except tables.NodeError:
+                pass
+
+        # Get description
+        try:
+            desc = nilmdb.layout.get_named(layout_name).description()
+        except KeyError:
+            raise ValueError("no such layout")
+
+        # Estimated table size (for PyTables optimization purposes): assume
+        # 3 months worth of data at 8 KHz.  It's OK if this is wrong.
+        exp_rows = 8000 * 60*60*24*30*3
+
+        # Create the table
+        table = self.h5file.createTable(group, node,
+                                        description = desc,
+                                        expectedrows = exp_rows)
+
+        # Insert into SQL database once the PyTables is happy
        with self.con as con:
            con.execute("INSERT INTO streams (path, layout) VALUES (?,?)",
                        (path, layout_name))
@@ -313,7 +337,8 @@ class NilmDB(object):
           """
        stream_id = self._stream_id(path)
        with self.con as con:
-            con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
+            con.execute("DELETE FROM metadata "
+                        "WHERE stream_id=?", (stream_id,))
            for key in data:
                if data[key] != '':
                    con.execute("INSERT INTO metadata VALUES (?, ?, ?)",
@@ -336,47 +361,44 @@ class NilmDB(object):
        data.update(newdata)
        self.stream_set_metadata(path, data)

-    def stream_destroy(self, path):
-        """Fully remove a table and all of its data from the database.
-        No way to undo it!  Metadata is removed."""
-        stream_id = self._stream_id(path)
-
-        # Delete the cached interval data
-        self._get_intervals.cache_remove(self, stream_id)
-
-        # Delete the data
-        self.data.destroy(path)
-
-        # Delete metadata, stream, intervals
-        with self.con as con:
-            con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
-            con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,))
-            con.execute("DELETE FROM streams WHERE id=?", (stream_id,))
-
-    def stream_insert(self, path, start, end, data):
+    def stream_insert(self, path, parser, old_timestamp = None):
        """Insert new data into the database.
           path: Path at which to add the data
-           start: Starting timestamp
-           end: Ending timestamp
-           data: Rows of data, to be passed to PyTable's table.append
-                 method.  E.g. nilmdb.layout.Parser.data
+           parser: nilmdb.layout.Parser instance full of data to insert
           """
+        if (not parser.min_timestamp or not parser.max_timestamp or
+            not len(parser.data)):
+            raise StreamError("no data provided")
+
+        # If we were provided with an old timestamp, the expectation
+        # is that the client has a contiguous block of time it is sending,
+        # but it's doing it over multiple calls to stream_insert.
+        # old_timestamp is the max_timestamp of the previous insert.
+        # To make things continuous, use that as our starting timestamp
+        # instead of what the parser found.
+        if old_timestamp:
+            min_timestamp = old_timestamp
+        else:
+            min_timestamp = parser.min_timestamp
+
        # First check for basic overlap using timestamp info given.
        stream_id = self._stream_id(path)
        iset = self._get_intervals(stream_id)
-        interval = Interval(start, end)
+        interval = Interval(min_timestamp, parser.max_timestamp)
        if iset.intersects(interval):
-            raise OverlapError("new data overlaps existing data at range: "
+            raise OverlapError("new data overlaps existing data: "
                               + str(iset & interval))

-        # Insert the data
-        table = self.data.getnode(path)
+        # Insert the data into pytables
+        table = self.h5file.getNode(path)
        row_start = table.nrows
-        table.append(data)
+        table.append(parser.data)
        row_end = table.nrows
+        table.flush()

        # Insert the record into the sql database.
-        self._add_interval(stream_id, interval, row_start, row_end)
+        # Casts are to convert from numpy.int64.
+        self._add_interval(stream_id, interval, int(row_start), int(row_end))

        # And that's all
        return "ok"
@@ -391,7 +413,7 @@ class NilmDB(object):
        # Optimization for the common case where an interval wasn't truncated
        if interval.start == interval.db_start:
            return interval.db_startpos
-        return bisect.bisect_left(bulkdata.TimestampOnlyTable(table),
+        return bisect.bisect_left(BisectableTable(table),
                                  interval.start,
                                  interval.db_startpos,
                                  interval.db_endpos)
@@ -410,7 +432,7 @@ class NilmDB(object):
        # want to include the given timestamp in the results.  This is
        # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
        # non-overlapping data.
-        return bisect.bisect_left(bulkdata.TimestampOnlyTable(table),
+        return bisect.bisect_left(BisectableTable(table),
                                  interval.end,
                                  interval.db_startpos,
                                  interval.db_endpos)
@@ -434,7 +456,7 @@ class NilmDB(object):
        than actually fetching the data.  It is not limited by
        max_results.
        """
-        table = self.data.getnode(path)
+        table = self.h5file.getNode(path)
        stream_id = self._stream_id(path)
        intervals = self._get_intervals(stream_id)
        requested = Interval(start or 0, end or 1e12)
--- a/nilmdb/utils/printf.py
+++ b/nilmdb/utils/printf.py
--- a/nilmdb/rbtree.pxd
+++ b/nilmdb/rbtree.pxd
@@ -1,23 +0,0 @@
-cdef class RBNode:
-    cdef public object obj
-    cdef public double start, end
-    cdef public int red
-    cdef public RBNode left, right, parent
-
-cdef class RBTree:
-    cdef public RBNode nil, root
-
-    cpdef getroot(RBTree self)
-    cdef void __rotate_left(RBTree self, RBNode x)
-    cdef void __rotate_right(RBTree self, RBNode y)
-    cdef RBNode __successor(RBTree self, RBNode x)
-    cpdef RBNode successor(RBTree self, RBNode x)
-    cdef RBNode __predecessor(RBTree self, RBNode x)
-    cpdef RBNode predecessor(RBTree self, RBNode x)
-    cpdef insert(RBTree self, RBNode z)
-    cdef void __insert_fixup(RBTree self, RBNode x)
-    cpdef delete(RBTree self, RBNode z)
-    cdef inline void __delete_fixup(RBTree self, RBNode x)
-    cpdef RBNode find(RBTree self, double start, double end)
-    cpdef RBNode find_left_end(RBTree self, double t)
-    cpdef RBNode find_right_start(RBTree self, double t)
--- a/nilmdb/rbtree.pyx
+++ b/nilmdb/rbtree.pyx
@@ -1,27 +1,20 @@
-# cython: profile=False
-# cython: cdivision=True
-
-"""
-Jim Paris <jim@jtan.com>
-
-Red-black tree, where keys are stored as start/end timestamps.
-This is a basic interval tree that holds half-open intervals:
-  [start, end)
-Intervals must not overlap.  Fixing that would involve making this
-into an augmented interval tree as described in CLRS 14.3.
-
-Code that assumes non-overlapping intervals is marked with the
-string 'non-overlapping'.
-"""
+"""Red-black tree, where keys are stored as start/end timestamps."""

 import sys
-cimport rbtree

-cdef class RBNode:
-    """One node of the Red/Black tree, containing a key (start, end)
-    and value (obj)"""
-    def __init__(self, double start, double end, object obj = None):
+class RBNode(object):
+    """One node of the Red/Black tree.  obj points to any object,
+    'start' and 'end' are timestamps that represent the key."""
+    def __init__(self, obj = None, start = None, end = None):
+        """If given an object but no start/end times, get the
+        start/end times from the object.
+
+        If given start/end times, obj can be anything, including None."""
        self.obj = obj
+        if start is None:
+            start = obj.start
+        if end is None:
+            end = obj.end
        self.start = start
        self.end = end
        self.red = False
@@ -33,23 +26,21 @@ cdef class RBNode:
            color = "R"
        else:
            color = "B"
-        if self.start == sys.float_info.min:
-            return "[node nil]"
-        return ("[node ("
-                + str(self.obj) + ") "
+        return ("[node "
                + str(self.start) + " -> " + str(self.end) + " "
                + color + "]")

-cdef class RBTree:
+class RBTree(object):
    """Red/Black tree"""

    # Init
    def __init__(self):
        self.nil = RBNode(start = sys.float_info.min,
-                          end = sys.float_info.min)
+                           end = sys.float_info.min)
        self.nil.left = self.nil
        self.nil.right = self.nil
        self.nil.parent = self.nil
+        self.nil.nil = True

        self.root = RBNode(start = sys.float_info.max,
                           end = sys.float_info.max)
@@ -57,21 +48,9 @@ cdef class RBTree:
        self.root.right = self.nil
        self.root.parent = self.nil

-    # We have a dummy root node to simplify operations, so from an
-    # external point of view, its left child is the real root.
-    cpdef getroot(self):
-        return self.root.left
-
    # Rotations and basic operations
-    cdef void __rotate_left(self, RBNode x):
-        """Rotate left:
-        #   x           y
-        #  / \   -->   / \
-        # z   y       x   w
-        #    / \     / \
-        #   v   w   z   v
-        """
-        cdef RBNode y = x.right
+    def __rotate_left(self, x):
+        y = x.right
        x.right = y.left
        if y.left is not self.nil:
            y.left.parent = x
@@ -83,15 +62,8 @@ cdef class RBTree:
        y.left = x
        x.parent = y

-    cdef void __rotate_right(self, RBNode y):
-        """Rotate right:
-        #     y           x
-        #    / \   -->   / \
-        #   x   w       z   y
-        #  / \             / \
-        # z   v           v   w
-        """
-        cdef RBNode x = y.left
+    def __rotate_right(self, y):
+        x = y.left
        y.left = x.right
        if x.right is not self.nil:
            x.right.parent = y
@@ -103,9 +75,9 @@ cdef class RBTree:
        x.right = y
        y.parent = x

-    cdef RBNode __successor(self, RBNode x):
+    def __successor(self, x):
        """Returns the successor of RBNode x"""
-        cdef RBNode y = x.right
+        y = x.right
        if y is not self.nil:
            while y.left is not self.nil:
                y = y.left
@@ -117,14 +89,10 @@ cdef class RBTree:
            if y is self.root:
                return self.nil
        return y
-    cpdef RBNode successor(self, RBNode x):
-        """Returns the successor of RBNode x, or None"""
-        cdef RBNode y = self.__successor(x)
-        return y if y is not self.nil else None

-    cdef RBNode __predecessor(self, RBNode x):
+    def _predecessor(self, x):
        """Returns the predecessor of RBNode x"""
-        cdef RBNode y = x.left
+        y = x.left
        if y is not self.nil:
            while y.right is not self.nil:
                y = y.right
@@ -137,18 +105,14 @@ cdef class RBTree:
                x = y
                y = y.parent
        return y
-    cpdef RBNode predecessor(self, RBNode x):
-        """Returns the predecessor of RBNode x, or None"""
-        cdef RBNode y = self.__predecessor(x)
-        return y if y is not self.nil else None

    # Insertion
-    cpdef insert(self, RBNode z):
+    def insert(self, z):
        """Insert RBNode z into RBTree and rebalance as necessary"""
        z.left = self.nil
        z.right = self.nil
-        cdef RBNode y = self.root
-        cdef RBNode x = self.root.left
+        y = self.root
+        x = self.root.left
        while x is not self.nil:
            y = x
            if (x.start > z.start or (x.start == z.start and x.end > z.end)):
@@ -164,7 +128,7 @@ cdef class RBTree:
        # relabel/rebalance
        self.__insert_fixup(z)

-    cdef void __insert_fixup(self, RBNode x):
+    def __insert_fixup(self, x):
        """Rebalance/fix RBTree after a simple insertion of RBNode x"""
        x.red = True
        while x.parent.red:
@@ -199,11 +163,10 @@ cdef class RBTree:
        self.root.left.red = False

    # Deletion
-    cpdef delete(self, RBNode z):
+    def delete(self, z):
        if z.left is None or z.right is None:
            raise AttributeError("you can only delete a node object "
                                 + "from the tree; use find() to get one")
-        cdef RBNode x, y
        if z.left is self.nil or z.right is self.nil:
            y = z
        else:
@@ -240,10 +203,10 @@ cdef class RBTree:
            if not y.red:
                self.__delete_fixup(x)

-    cdef void __delete_fixup(self, RBNode x):
+    def __delete_fixup(self, x):
        """Rebalance/fix RBTree after a deletion.  RBNode x is the
        child of the spliced out node."""
-        cdef RBNode rootLeft = self.root.left
+        rootLeft = self.root.left
        while not x.red and x is not rootLeft:
            if x is x.parent.left:
                w = x.parent.right
@@ -289,89 +252,141 @@ cdef class RBTree:
                    x = rootLeft # exit loop
        x.red = False

+    # Rendering
+    def __render_dot_node(self, node, max_depth = 20):
+        from printf import sprintf
+        """Render a single node and its children into a dot graph fragment"""
+        if max_depth == 0:
+            return ""
+        if node is self.nil:
+            return ""
+        def c(red):
+            if red:
+                return 'color="#ff0000", style=filled, fillcolor="#ffc0c0"'
+            else:
+                return 'color="#000000", style=filled, fillcolor="#c0c0c0"'
+        s = sprintf("%d [label=\"%g\\n%g\", %s];\n",
+                    id(node),
+                    node.start, node.end,
+                    c(node.red))
+
+        if node.left is self.nil:
+            s += sprintf("L%d [label=\"-\", %s];\n", id(node), c(False))
+            s += sprintf("%d -> L%d [label=L];\n", id(node), id(node))
+        else:
+            s += sprintf("%d -> %d [label=L];\n", id(node), id(node.left))
+        if node.right is self.nil:
+            s += sprintf("R%d [label=\"-\", %s];\n", id(node), c(False))
+            s += sprintf("%d -> R%d [label=R];\n", id(node), id(node))
+        else:
+            s += sprintf("%d -> %d [label=R];\n", id(node), id(node.right))
+        s += self.__render_dot_node(node.left, max_depth-1)
+        s += self.__render_dot_node(node.right, max_depth-1)
+        return s
+
+    def render_dot(self, title = "RBTree"):
+        """Render the entire RBTree as a dot graph"""
+        return ("digraph rbtree {\n"
+                + self.__render_dot_node(self.root.left)
+                + "}\n");
+
+    def render_dot_live(self, title = "RBTree"):
+        """Render the entire RBTree as a dot graph, live GTK view"""
+        import gtk
+        import gtk.gdk
+        sys.path.append("/usr/share/xdot")
+        import xdot
+        xdot.Pen.highlighted = lambda pen: pen
+        s = ("digraph rbtree {\n"
+             + self.__render_dot_node(self.root)
+             + "}\n");
+        window = xdot.DotWindow()
+        window.set_dotcode(s)
+        window.set_title(title + " - any key to close")
+        window.connect('destroy', gtk.main_quit)
+        def quit(widget, event):
+            if not event.is_modifier:
+                window.destroy()
+                gtk.main_quit()
+        window.widget.connect('key-press-event', quit)
+        gtk.main()
+
    # Walking, searching
    def __iter__(self):
-        return self.inorder()
+        return self.inorder(self.root.left)

-    def inorder(self, RBNode x = None):
+    def inorder(self, x = None):
        """Generator that performs an inorder walk for the tree
-        rooted at RBNode x"""
+        starting at RBNode x"""
        if x is None:
-            x = self.getroot()
+            x = self.root.left
        while x.left is not self.nil:
            x = x.left
        while x is not self.nil:
            yield x
            x = self.__successor(x)

-    cpdef RBNode find(self, double start, double end):
-        """Return the node with exactly the given start and end."""
-        cdef RBNode x = self.getroot()
+    def __find_all(self, start, end, x):
+        """Find node with the specified (start,end) key.
+        Also returns the largest node less than or equal to key,
+        and the smallest node greater or equal to than key."""
+        if x is None:
+            x = self.root.left
+        largest = self.nil
+        smallest = self.nil
        while x is not self.nil:
            if start < x.start:
-                x = x.left
+                smallest = x
+                x = x.left # start <
            elif start == x.start:
-                if end == x.end:
-                    break # found it
-                elif end < x.end:
-                    x = x.left
+                if end < x.end:
+                    smallest = x
+                    x = x.left # start =, end <
+                elif end == x.end: # found it
+                    smallest = x
+                    largest = x
+                    break
                else:
-                    x = x.right
+                    largest = x
+                    x = x.right # start =, end >
            else:
-                x = x.right
-        return x if x is not self.nil else None
+                largest = x
+                x = x.right # start >
+        return (x, smallest, largest)

-    cpdef RBNode find_left_end(self, double t):
-        """Find the leftmode node with end >= t.  With non-overlapping
-        intervals, this is the first node that might overlap time t.
+    def find(self, start, end, x = None):
+        """Find node with the key == (start,end), or None"""
+        y = self.__find_all(start, end, x)[1]
+        return y if y is not self.nil else None

-        Note that this relies on non-overlapping intervals, since
-        it assumes that we can use the endpoints to traverse the
-        tree even though it was created using the start points."""
-        cdef RBNode x = self.getroot()
-        while x is not self.nil:
-            if t < x.end:
-                if x.left is self.nil:
-                    break
-                x = x.left
-            elif t == x.end:
-                break
-            else:
-                if x.right is self.nil:
-                    x = self.__successor(x)
-                    break
-                x = x.right
-        return x if x is not self.nil else None
+    def find_right(self, start, end, x = None):
+        """Find node with the smallest key >= (start,end), or None"""
+        y = self.__find_all(start, end, x)[1]
+        return y if y is not self.nil else None

-    cpdef RBNode find_right_start(self, double t):
-        """Find the rightmode node with start <= t.  With non-overlapping
-        intervals, this is the last node that might overlap time t."""
-        cdef RBNode x = self.getroot()
-        while x is not self.nil:
-            if t < x.start:
-                if x.left is self.nil:
-                    x = self.__predecessor(x)
-                    break
-                x = x.left
-            elif t == x.start:
-                break
-            else:
-                if x.right is self.nil:
-                    break
-                x = x.right
-        return x if x is not self.nil else None
+    def find_left(self, start, end, x = None):
+        """Find node with the largest key <= (start,end), or None"""
+        y = self.__find_all(start, end, x)[2]
+        return y if y is not self.nil else None

    # Intersections
-    def intersect(self, double start, double end):
+    def intersect(self, start, end):
        """Generator that returns nodes that overlap the given
-        (start,end) range.  Assumes non-overlapping intervals."""
-        # Start with the leftmode node that ends after start
-        cdef RBNode n = self.find_left_end(start)
-        while n is not None:
-            if n.start >= end:
-                # this node starts after the requested end; we're done
-                break
-            if start < n.end:
-                # this node overlaps our requested area
-                yield n
-            n = self.successor(n)
+        (start,end) range, for the tree rooted at RBNode x.
+
+        NOTE: this assumes non-overlapping intervals."""
+        # Start with the leftmost node before the starting point
+        n = self.find_left(start, start)
+        # If we didn't find one, look for the leftmode node before the
+        # ending point instead.
+        if n is None:
+            n = self.find_left(end, end)
+        # If we still didn't find it, there are no intervals that intersect.
+        if n is None:
+            return none
+
+        # Now yield this node and all successors until their endpoints
+
+        if False:
+            yield
+        return
--- a/nilmdb/rbtree.pyxdep
+++ b/nilmdb/rbtree.pyxdep
@@ -1 +0,0 @@
-rbtree.pxd
--- a/nilmdb/utils/serializer.py
+++ b/nilmdb/utils/serializer.py
@@ -67,6 +67,3 @@ class WrapObject(object):
    def __del__(self):
        self.__wrap_call_queue.put((None, None, None, None))
        self.__wrap_serializer.join()
-
-# Just an alias
-Serializer = WrapObject
--- a/nilmdb/server.py
+++ b/nilmdb/server.py
@@ -3,15 +3,15 @@
 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
-from nilmdb.utils.printf import *
 import nilmdb

+from nilmdb.printf import *
+
 import cherrypy
 import sys
 import time
 import os
 import simplejson as json
-import functools

 try:
    import cherrypy
@@ -26,46 +26,6 @@ class NilmApp(object):

 version = "1.1"

-# Decorators
-def chunked_response(func):
-    """Decorator to enable chunked responses"""
-    # Set this to False to get better tracebacks from some requests
-    # (/stream/extract, /stream/intervals).
-    func._cp_config = { 'response.stream': True }
-    return func
-
-def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
-    """Decorator to work around CherryPy bug #1200 in a response
-    generator"""
-    # Even if chunked responses are disabled, you may still miss miss
-    # LookupError, or UnicodeError exceptions due to CherryPy bug
-    # #1200.  This throws them as generic Exceptions insteads.
-    import traceback
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        try:
-            for val in func(*args, **kwargs):
-                yield val
-        except (LookupError, UnicodeError) as e:
-            raise Exception("bug workaround; real exception is:\n" +
-                            traceback.format_exc())
-    return wrapper
-
-def exception_to_httperror(response = "400 Bad Request"):
-    """Return a decorator that catches Exception and throws
-    a HTTPError describing it instead"""
-    def decorator(func):
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            try:
-                return func(*args, **kwargs)
-            except Exception as e:
-                message = sprintf("%s: %s", type(e).__name__, str(e))
-                raise cherrypy.HTTPError(response, message)
-        return wrapper
-    return decorator
-
-# CherryPy apps
 class Root(NilmApp):
    """Root application for NILM database"""

@@ -99,7 +59,7 @@ class Root(NilmApp):
    @cherrypy.expose
    @cherrypy.tools.json_out()
    def dbsize(self):
-        return nilmdb.utils.du(self.db.get_basepath())
+        return nilmdb.du.du(self.db.get_basepath())

 class Stream(NilmApp):
    """Stream-specific operations"""
@@ -118,20 +78,15 @@ class Stream(NilmApp):
    # /stream/create?path=/newton/prep&layout=PrepData
    @cherrypy.expose
    @cherrypy.tools.json_out()
-    @exception_to_httperror()
    def create(self, path, layout):
        """Create a new stream in the database.  Provide path
        and one of the nilmdb.layout.layouts keys.
        """
-        return self.db.stream_create(path, layout)
-
-    # /stream/destroy?path=/newton/prep
-    @cherrypy.expose
-    @cherrypy.tools.json_out()
-    @exception_to_httperror()
-    def destroy(self, path):
-        """Delete a stream and its associated data."""
-        return self.db.stream_destroy(path)
+        try:
+            return self.db.stream_create(path, layout)
+        except Exception as e:
+            message = sprintf("%s: %s", type(e).__name__, e.message)
+            raise cherrypy.HTTPError("400 Bad Request", message)

    # /stream/get_metadata?path=/newton/prep
    # /stream/get_metadata?path=/newton/prep&key=foo&key=bar
@@ -160,35 +115,49 @@ class Stream(NilmApp):
    # /stream/set_metadata?path=/newton/prep&data=<json>
    @cherrypy.expose
    @cherrypy.tools.json_out()
-    @exception_to_httperror()
    def set_metadata(self, path, data):
        """Set metadata for the named stream, replacing any
        existing metadata.  Data should be a json-encoded
        dictionary"""
-        data_dict = json.loads(data)
-        self.db.stream_set_metadata(path, data_dict)
+        try:
+            data_dict = json.loads(data)
+            self.db.stream_set_metadata(path, data_dict)
+        except Exception as e:
+            message = sprintf("%s: %s", type(e).__name__, e.message)
+            raise cherrypy.HTTPError("400 Bad Request", message)
        return "ok"

    # /stream/update_metadata?path=/newton/prep&data=<json>
    @cherrypy.expose
    @cherrypy.tools.json_out()
-    @exception_to_httperror()
    def update_metadata(self, path, data):
        """Update metadata for the named stream.  Data
        should be a json-encoded dictionary"""
-        data_dict = json.loads(data)
-        self.db.stream_update_metadata(path, data_dict)
+        try:
+            data_dict = json.loads(data)
+            self.db.stream_update_metadata(path, data_dict)
+        except Exception as e:
+            message = sprintf("%s: %s", type(e).__name__, e.message)
+            raise cherrypy.HTTPError("400 Bad Request", message)
        return "ok"

    # /stream/insert?path=/newton/prep
    @cherrypy.expose
    @cherrypy.tools.json_out()
    #@cherrypy.tools.disable_prb()
-    def insert(self, path, start, end):
+    def insert(self, path, old_timestamp = None):
        """
        Insert new data into the database.  Provide textual data
        (matching the path's layout) as a HTTP PUT.
+
+        old_timestamp is used when making multiple, split-up insertions
+        for a larger contiguous block of data.  The first insert
+        will return the maximum timestamp that it saw, and the second
+        insert should provide this timestamp as an argument.  This is
+        used to extend the previous database interval rather than
+        start a new one.
        """
+
        # Important that we always read the input before throwing any
        # errors, to keep lengths happy for persistent connections.
        # However, CherryPy 3.2.2 has a bug where this fails for GET
@@ -213,36 +182,22 @@ class Stream(NilmApp):
                                     "Error parsing input data: " +
                                     e.message)

-        if (not parser.min_timestamp or not parser.max_timestamp or
-            not len(parser.data)):
-            raise cherrypy.HTTPError("400 Bad Request",
-                                     "no data provided")
-
-        # Check limits
-        start = float(start)
-        end = float(end)
-        if parser.min_timestamp < start:
-            raise cherrypy.HTTPError("400 Bad Request", "Data timestamp " +
-                                     repr(parser.min_timestamp) +
-                                     " < start time " + repr(start))
-        if parser.max_timestamp >= end:
-            raise cherrypy.HTTPError("400 Bad Request", "Data timestamp " +
-                                     repr(parser.max_timestamp) +
-                                     " >= end time " + repr(end))
-
        # Now do the nilmdb insert, passing it the parser full of data.
        try:
-            result = self.db.stream_insert(path, start, end, parser.data)
+            if old_timestamp:
+                old_timestamp = float(old_timestamp)
+            result = self.db.stream_insert(path, parser, old_timestamp)
        except nilmdb.nilmdb.NilmDBError as e:
            raise cherrypy.HTTPError("400 Bad Request", e.message)

-        # Done
-        return "ok"
+        # Return the maximum timestamp that we saw.  The client will
+        # return this back to us as the old_timestamp parameter, if
+        # it has more data to send.
+        return ("ok", parser.max_timestamp)

    # /stream/intervals?path=/newton/prep
    # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
-    @chunked_response
    def intervals(self, path, start = None, end = None):
        """
        Get intervals from backend database.  Streams the resulting
@@ -264,9 +219,9 @@ class Stream(NilmApp):
        if len(streams) != 1:
            raise cherrypy.HTTPError("404 Not Found", "No such stream")

-        @workaround_cp_bug_1200
        def content(start, end):
-            # Note: disable chunked responses to see tracebacks from here.
+            # Note: disable response.stream below to get better debug info
+            # from tracebacks in this subfunction.
            while True:
                (intervals, restart) = self.db.stream_intervals(path,start,end)
                response = ''.join([ json.dumps(i) + "\n" for i in intervals ])
@@ -275,10 +230,10 @@ class Stream(NilmApp):
                    break
                start = restart
        return content(start, end)
+    intervals._cp_config = { 'response.stream': True } # chunked HTTP response

    # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
-    @chunked_response
    def extract(self, path, start = None, end = None, count = False):
        """
        Extract data from backend database.  Streams the resulting
@@ -308,9 +263,9 @@ class Stream(NilmApp):
        # Get formatter
        formatter = nilmdb.layout.Formatter(layout)

-        @workaround_cp_bug_1200
        def content(start, end, count):
-            # Note: disable chunked responses to see tracebacks from here.
+            # Note: disable response.stream below to get better debug info
+            # from tracebacks in this subfunction.
            if count:
                matched = self.db.stream_extract(path, start, end, count)
                yield sprintf("%d\n", matched)
@@ -326,6 +281,8 @@ class Stream(NilmApp):
                    return
                start = restart
        return content(start, end, count)
+    extract._cp_config = { 'response.stream': True } # chunked HTTP response
+

 class Exiter(object):
    """App that exits the server, for testing"""
@@ -350,7 +307,7 @@ class Server(object):
        # Need to wrap DB object in a serializer because we'll call
        # into it from separate threads.
        self.embedded = embedded
-        self.db = nilmdb.utils.Serializer(db)
+        self.db = nilmdb.serializer.WrapObject(db)
        cherrypy.config.update({
            'server.socket_host': host,
            'server.socket_port': port,
--- a/nilmdb/utils/timer.py
+++ b/nilmdb/utils/timer.py
@@ -5,7 +5,6 @@
 #   with nilmdb.Timer("flush"):
 #       foo.flush()

-from __future__ import print_function
 import contextlib
 import time

@@ -19,4 +18,4 @@ def Timer(name = None, tosyslog = False):
        import syslog
        syslog.syslog(msg)
    else:
-        print(msg)
+        print msg
--- a/nilmdb/timestamper.py
+++ b/nilmdb/timestamper.py
@@ -1,7 +1,7 @@
 """File-like objects that add timestamps to the input lines"""

 from __future__ import absolute_import
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import time
 import os
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -1,9 +0,0 @@
-"""NilmDB utilities"""
-
-from .timer import Timer
-from .iteratorizer import Iteratorizer
-from .serializer import Serializer
-from .lrucache import lru_cache
-from .diskusage import du
-from .mustclose import must_close
-from .urllib import urlencode
--- a/nilmdb/utils/lrucache.py
+++ b/nilmdb/utils/lrucache.py
@@ -1,66 +0,0 @@
-# Memoize a function's return value with a least-recently-used cache
-# Based on:
-#   http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/
-# with added 'destructor' functionality.
-
-import collections
-import functools
-
-def lru_cache(size = 10, onremove = None):
-    """Least-recently-used cache decorator.
-
-    @lru_cache(size = 10, onevict = None)
-    def f(...):
-        pass
-
-    Given a function and arguments, memoize its return value.
-    Up to 'size' elements are cached.
-
-    When evicting a value from the cache, call the function
-    'onremove' with the value that's being evicted.
-
-    Call f.cache_remove(...) to evict the cache entry with the given
-    arguments.  Call f.cache_remove_all() to evict all entries.
-    f.cache_hits and f.cache_misses give statistics.
-    """
-
-    def decorator(func):
-        cache = collections.OrderedDict()	# order: least- to most-recent
-
-        def evict(value):
-            if onremove:
-                onremove(value)
-
-        @functools.wraps(func)
-        def wrapper(*args, **kwargs):
-            key = args + tuple(sorted(kwargs.items()))
-            try:
-                value = cache.pop(key)
-                wrapper.cache_hits += 1
-            except KeyError:
-                value = func(*args, **kwargs)
-                wrapper.cache_misses += 1
-                if len(cache) >= size:
-                    evict(cache.popitem(0)[1])	# evict LRU cache entry
-            cache[key] = value              	# (re-)insert this key at end
-            return value
-
-        def cache_remove(*args, **kwargs):
-            """Remove the described key from this cache, if present.
-            Note that if the original wrapped function was implicitly
-            passed 'self', you need to pass it as an argument here too."""
-            key = args + tuple(sorted(kwargs.items()))
-            if key in cache:
-                evict(cache.pop(key))
-
-        def cache_remove_all():
-            for key in cache:
-                evict(cache.pop(key))
-
-        wrapper.cache_hits = 0
-        wrapper.cache_misses = 0
-        wrapper.cache_remove = cache_remove
-        wrapper.cache_remove_all = cache_remove_all
-
-        return wrapper
-    return decorator
--- a/nilmdb/utils/mustclose.py
+++ b/nilmdb/utils/mustclose.py
@@ -1,42 +0,0 @@
-# Class decorator that warns on stderr at deletion time if the class's
-# close() member wasn't called.
-
-from nilmdb.utils.printf import *
-import sys
-
-def must_close(errorfile = sys.stderr):
-    def decorator(cls):
-        def dummy(*args, **kwargs):
-            pass
-        if "__init__" not in cls.__dict__:
-            cls.__init__ = dummy
-        if "__del__" not in cls.__dict__:
-            cls.__del__ = dummy
-        if "close" not in cls.__dict__:
-            cls.close = dummy
-
-        orig_init = cls.__init__
-        orig_del = cls.__del__
-        orig_close = cls.close
-
-        def __init__(self, *args, **kwargs):
-            ret = orig_init(self, *args, **kwargs)
-            self.__dict__["_must_close"] = True
-            return ret
-
-        def __del__(self):
-            if "_must_close" in self.__dict__:
-                fprintf(errorfile, "error: %s.close() wasn't called!\n",
-                        self.__class__.__name__)
-            return orig_del(self)
-
-        def close(self, *args, **kwargs):
-            del self._must_close
-            return orig_close(self)
-
-        cls.__init__ = __init__
-        cls.__del__ = __del__
-        cls.close = close
-
-        return cls
-    return decorator
--- a/nilmdb/utils/urllib.py
+++ b/nilmdb/utils/urllib.py
@@ -1,68 +0,0 @@
-from __future__ import absolute_import
-from urllib import quote_plus, _is_unicode
-
-# urllib.urlencode insists on encoding Unicode as ASCII.  This is an
-# exact copy of that function, except we encode it as UTF-8 instead.
-
-def urlencode(query, doseq=0):
-    """Encode a sequence of two-element tuples or dictionary into a URL query string.
-
-    If any values in the query arg are sequences and doseq is true, each
-    sequence element is converted to a separate parameter.
-
-    If the query arg is a sequence of two-element tuples, the order of the
-    parameters in the output will match the order of parameters in the
-    input.
-    """
-
-    if hasattr(query,"items"):
-        # mapping objects
-        query = query.items()
-    else:
-        # it's a bother at times that strings and string-like objects are
-        # sequences...
-        try:
-            # non-sequence items should not work with len()
-            # non-empty strings will fail this
-            if len(query) and not isinstance(query[0], tuple):
-                raise TypeError
-            # zero-length sequences of all types will get here and succeed,
-            # but that's a minor nit - since the original implementation
-            # allowed empty dicts that type of behavior probably should be
-            # preserved for consistency
-        except TypeError:
-            ty,va,tb = sys.exc_info()
-            raise TypeError, "not a valid non-string sequence or mapping object", tb
-
-    l = []
-    if not doseq:
-        # preserve old behavior
-        for k, v in query:
-            k = quote_plus(str(k))
-            v = quote_plus(str(v))
-            l.append(k + '=' + v)
-    else:
-        for k, v in query:
-            k = quote_plus(str(k))
-            if isinstance(v, str):
-                v = quote_plus(v)
-                l.append(k + '=' + v)
-            elif _is_unicode(v):
-                # is there a reasonable way to convert to ASCII?
-                # encode generates a string, but "replace" or "ignore"
-                # lose information and "strict" can raise UnicodeError
-                v = quote_plus(v.encode("utf-8","strict"))
-                l.append(k + '=' + v)
-            else:
-                try:
-                    # is this a sufficient test for sequence-ness?
-                    len(v)
-                except TypeError:
-                    # not a sequence
-                    v = quote_plus(str(v))
-                    l.append(k + '=' + v)
-                else:
-                    # loop over the sequence
-                    for elt in v:
-                        l.append(k + '=' + quote_plus(str(elt)))
-    return '&'.join(l)
--- a/runserver.py
+++ b/runserver.py
@@ -3,17 +3,14 @@
 import nilmdb
 import argparse

-formatter = argparse.ArgumentDefaultsHelpFormatter
-parser = argparse.ArgumentParser(description='Run the NILM server',
-                                 formatter_class = formatter)
+parser = argparse.ArgumentParser(description='Run the NILM server')
 parser.add_argument('-p', '--port', help='Port number', type=int, default=12380)
-parser.add_argument('-d', '--database', help='Database directory', default="db")
 parser.add_argument('-y', '--yappi', help='Run with yappi profiler',
                    action='store_true')
 args = parser.parse_args()

 # Start web app on a custom port
-db = nilmdb.NilmDB(args.database)
+db = nilmdb.NilmDB("db")
 server = nilmdb.Server(db, host = "127.0.0.1",
                       port = args.port,
                       embedded = False)
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,14 +10,10 @@ cover-erase=
 ##cover-branches=     # need nose 1.1.3 for this
 stop=
 verbosity=2
-#tests=tests/test_mustclose.py
-#tests=tests/test_lrucache.py
 #tests=tests/test_cmdline.py
 #tests=tests/test_layout.py
 #tests=tests/test_rbtree.py
-#tests=tests/test_interval.py
-#tests=tests/test_rbtree.py,tests/test_interval.py
-#tests=tests/test_interval.py
+tests=tests/test_interval.py
 #tests=tests/test_client.py
 #tests=tests/test_timestamper.py
 #tests=tests/test_serializer.py
--- a/tests/renderdot.py
+++ b/tests/renderdot.py
@@ -1,90 +0,0 @@
-import sys
-
-class Renderer(object):
-
-    def __init__(self, getleft, getright,
-                 getred, getstart, getend, nil):
-        self.getleft = getleft
-        self.getright = getright
-        self.getred = getred
-        self.getstart = getstart
-        self.getend = getend
-        self.nil = nil
-
-    # Rendering
-    def __render_dot_node(self, node, max_depth = 20):
-        from nilmdb.utils.printf import sprintf
-        """Render a single node and its children into a dot graph fragment"""
-        if max_depth == 0:
-            return ""
-        if node is self.nil:
-            return ""
-        def c(red):
-            if red:
-                return 'color="#ff0000", style=filled, fillcolor="#ffc0c0"'
-            else:
-                return 'color="#000000", style=filled, fillcolor="#c0c0c0"'
-        s = sprintf("%d [label=\"%g\\n%g\", %s];\n",
-                    id(node),
-                    self.getstart(node), self.getend(node),
-                    c(self.getred(node)))
-
-        if self.getleft(node) is self.nil:
-            s += sprintf("L%d [label=\"-\", %s];\n", id(node), c(False))
-            s += sprintf("%d -> L%d [label=L];\n", id(node), id(node))
-        else:
-            s += sprintf("%d -> %d [label=L];\n",
-                         id(node),id(self.getleft(node)))
-        if self.getright(node) is self.nil:
-            s += sprintf("R%d [label=\"-\", %s];\n", id(node), c(False))
-            s += sprintf("%d -> R%d [label=R];\n", id(node), id(node))
-        else:
-            s += sprintf("%d -> %d [label=R];\n",
-                         id(node), id(self.getright(node)))
-        s += self.__render_dot_node(self.getleft(node), max_depth-1)
-        s += self.__render_dot_node(self.getright(node), max_depth-1)
-        return s
-
-    def render_dot(self, rootnode, title = "Tree"):
-        """Render the entire tree as a dot graph"""
-        return ("digraph rbtree {\n"
-                + self.__render_dot_node(rootnode)
-                + "}\n");
-
-    def render_dot_live(self, rootnode, title = "Tree"):
-        """Render the entiretree as a dot graph, live GTK view"""
-        import gtk
-        import gtk.gdk
-        sys.path.append("/usr/share/xdot")
-        import xdot
-        xdot.Pen.highlighted = lambda pen: pen
-        s = ("digraph rbtree {\n"
-             + self.__render_dot_node(rootnode)
-             + "}\n");
-        window = xdot.DotWindow()
-        window.set_dotcode(s)
-        window.set_title(title + " - any key to close")
-        window.connect('destroy', gtk.main_quit)
-        def quit(widget, event):
-            if not event.is_modifier:
-                window.destroy()
-                gtk.main_quit()
-        window.widget.connect('key-press-event', quit)
-        gtk.main()
-
-class RBTreeRenderer(Renderer):
-    def __init__(self, tree):
-        Renderer.__init__(self,
-                          lambda node: node.left,
-                          lambda node: node.right,
-                          lambda node: node.red,
-                          lambda node: node.start,
-                          lambda node: node.end,
-                          tree.nil)
-        self.tree = tree
-
-    def render(self, title = "RBTree", live = True):
-        if live:
-            return Renderer.render_dot_live(self, self.tree.getroot(), title)
-        else:
-            return Renderer.render_dot(self, self.tree.getroot(), title)
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 from nilmdb.client import ClientError, ServerError

 import datetime_tz
@@ -84,8 +82,6 @@ class TestClient(object):
        # Bad layout type
        with assert_raises(ClientError):
            client.stream_create("/newton/prep", "NoSuchLayout")
-
-        # Create three streams
        client.stream_create("/newton/prep", "PrepData")
        client.stream_create("/newton/raw", "RawData")
        client.stream_create("/newton/zzz/rawnotch", "RawNotchedData")
@@ -135,7 +131,6 @@ class TestClient(object):

        testfile = "tests/data/prep-20120323T1000"
        start = datetime_tz.datetime_tz.smartparse("20120323T1000")
-        start = start.totimestamp()
        rate = 120

        # First try a nonexistent path
@@ -160,41 +155,14 @@ class TestClient(object):

        # Try forcing a server request with empty data
        with assert_raises(ClientError) as e:
-            client.http.put("stream/insert", "", { "path": "/newton/prep",
-                                                   "start": 0, "end": 0 })
+            client.http.put("stream/insert", "", { "path": "/newton/prep" })
        in_("400 Bad Request", str(e.exception))
        in_("no data provided", str(e.exception))

-        # Specify start/end (starts too late)
-        data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
-        with assert_raises(ClientError) as e:
-            result = client.stream_insert("/newton/prep", data,
-                                          start + 5, start + 120)
-        in_("400 Bad Request", str(e.exception))
-        in_("Data timestamp 1332511200.0 < start time 1332511205.0",
-            str(e.exception))
-
-        # Specify start/end (ends too early)
-        data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
-        with assert_raises(ClientError) as e:
-            result = client.stream_insert("/newton/prep", data,
-                                          start, start + 1)
-        in_("400 Bad Request", str(e.exception))
-        # Client chunks the input, so the exact timestamp here might change
-        # if the chunk positions change.
-        in_("Data timestamp 1332511271.016667 >= end time 1332511201.0",
-            str(e.exception))
-
        # Now do the real load
        data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
-        result = client.stream_insert("/newton/prep", data,
-                                      start, start + 119.999777)
-        eq_(result, "ok")
-
-        # Verify the intervals.  Should be just one, even if the data
-        # was inserted in chunks, due to nilmdb interval concatenation.
-        intervals = list(client.stream_intervals("/newton/prep"))
-        eq_(intervals, [[start, start + 119.999777]])
+        result = client.stream_insert("/newton/prep", data)
+        eq_(result[0], "ok")

        # Try some overlapping data -- just insert it again
        data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
@@ -247,8 +215,7 @@ class TestClient(object):
        # Check PUT with generator out
        with assert_raises(ClientError) as e:
            client.http.put_gen("stream/insert", "",
-                                { "path": "/newton/prep",
-                                  "start": 0, "end": 0 }).next()
+                                { "path": "/newton/prep" }).next()
        in_("400 Bad Request", str(e.exception))
        in_("no data provided", str(e.exception))

@@ -271,7 +238,7 @@ class TestClient(object):
        # still disable chunked responses for debugging.
        x = client.http.get("stream/intervals", { "path": "/newton/prep" },
                            retjson=False)
-        lines_(x, 1)
+        eq_(x.count('\n'), 2)
        if "transfer-encoding: chunked" not in client.http._headers.lower():
            warnings.warn("Non-chunked HTTP response for /stream/intervals")

@@ -281,40 +248,3 @@ class TestClient(object):
                              "end": "123" }, retjson=False)
        if "transfer-encoding: chunked" not in client.http._headers.lower():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
-
-    def test_client_7_unicode(self):
-        # Basic Unicode tests
-        client = nilmdb.Client(url = "http://localhost:12380/")
-
-        # Delete streams that exist
-        for stream in client.stream_list():
-            client.stream_destroy(stream[0])
-
-        # Database is empty
-        eq_(client.stream_list(), [])
-
-        # Create Unicode stream, match it
-        raw = [ u"/düsseldorf/raw", u"uint16_6" ]
-        prep = [ u"/düsseldorf/prep", u"uint16_6" ]
-        client.stream_create(*raw)
-        eq_(client.stream_list(), [raw])
-        eq_(client.stream_list(layout=raw[1]), [raw])
-        eq_(client.stream_list(path=raw[0]), [raw])
-        client.stream_create(*prep)
-        eq_(client.stream_list(), [prep, raw])
-
-        # Set / get metadata with Unicode keys and values
-        eq_(client.stream_get_metadata(raw[0]), {})
-        eq_(client.stream_get_metadata(prep[0]), {})
-        meta1 = { u"alpha": u"α",
-                  u"β": u"beta" }
-        meta2 = { u"alpha": u"α" }
-        meta3 = { u"β": u"beta" }
-        client.stream_set_metadata(prep[0], meta1)
-        client.stream_update_metadata(prep[0], {})
-        client.stream_update_metadata(raw[0], meta2)
-        client.stream_update_metadata(raw[0], meta3)
-        eq_(client.stream_get_metadata(prep[0]), meta1)
-        eq_(client.stream_get_metadata(raw[0]), meta1)
-        eq_(client.stream_get_metadata(raw[0], [ "alpha" ]), meta2)
-        eq_(client.stream_get_metadata(raw[0], [ "alpha", "β" ]), meta1)
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -1,7 +1,5 @@
-# -*- coding: utf-8 -*-
-
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import nilmdb.cmdline

 from nose.tools import *
@@ -15,7 +13,7 @@ import threading
 import urllib2
 from urllib2 import urlopen, HTTPError
 import Queue
-import StringIO
+import cStringIO
 import shlex

 from test_helpers import *
@@ -47,18 +45,12 @@ def setup_module():
 def teardown_module():
    server_stop()

-# Add an encoding property to StringIO so Python will convert Unicode
-# properly when writing or reading.
-class UTF8StringIO(StringIO.StringIO):
-    encoding = 'utf-8'
-
 class TestCmdline(object):

    def run(self, arg_string, infile=None, outfile=None):
        """Run a cmdline client with the specified argument string,
        passing the given input.  Returns a tuple with the output and
        exit code"""
-        # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
        class stdio_wrapper:
            def __init__(self, stdin, stdout, stderr):
                self.io = (stdin, stdout, stderr)
@@ -69,18 +61,15 @@ class TestCmdline(object):
                ( sys.stdin, sys.stdout, sys.stderr ) = self.saved
        # Empty input if none provided
        if infile is None:
-            infile = UTF8StringIO("")
+            infile = cStringIO.StringIO("")
        # Capture stderr
-        errfile = UTF8StringIO()
+        errfile = cStringIO.StringIO()
        if outfile is None:
            # If no output file, capture stdout with stderr
            outfile = errfile
        with stdio_wrapper(infile, outfile, errfile) as s:
            try:
-                # shlex doesn't support Unicode very well.  Encode the
-                # string as UTF-8 explicitly before splitting.
-                args = shlex.split(arg_string.encode('utf-8'))
-                nilmdb.cmdline.Cmdline(args).run()
+                nilmdb.cmdline.Cmdline(shlex.split(arg_string)).run()
                sys.exit(0)
            except SystemExit as e:
                exitcode = e.code
@@ -203,22 +192,11 @@ class TestCmdline(object):
        self.contain("no such layout")

        # Create a few streams
-        self.ok("create /newton/zzz/rawnotch RawNotchedData")
        self.ok("create /newton/prep PrepData")
        self.ok("create /newton/raw RawData")
+        self.ok("create /newton/zzz/rawnotch RawNotchedData")

-        # Should not be able to create a stream with another stream as
-        # its parent
-        self.fail("create /newton/prep/blah PrepData")
-        self.contain("path is subdir of existing node")
-
-        # Should not be able to create a stream at a location that
-        # has other nodes as children
-        self.fail("create /newton/zzz PrepData")
-        self.contain("subdirs of this path already exist")
-
-        # Verify we got those 3 streams and they're returned in
-        # alphabetical order.
+        # Verify we got those 3 streams
        self.ok("list")
        self.match("/newton/prep PrepData\n"
                   "/newton/raw RawData\n"
@@ -308,9 +286,16 @@ class TestCmdline(object):
        eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test)
        eq_(cmd.parse_time("20120405 1800 UTC"), test)
        eq_(cmd.parse_time("20120405 1400-0400 UTC"), test)
-        for badtime in [ "20120405 1400-9999", "hello", "-", "", "14:00" ]:
-            with assert_raises(ValueError):
-                x = cmd.parse_time(badtime)
+        with assert_raises(ValueError):
+            print cmd.parse_time("20120405 1400-9999")
+        with assert_raises(ValueError):
+            print cmd.parse_time("hello")
+        with assert_raises(ValueError):
+            print cmd.parse_time("-")
+        with assert_raises(ValueError):
+            print cmd.parse_time("")
+        with assert_raises(ValueError):
+            print cmd.parse_time("14:00")
        eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test)
        eq_(cmd.parse_time("prep-20120405T1400"), test)

@@ -377,36 +362,36 @@ class TestCmdline(object):
    def test_cmdline_07_detail(self):
        # Just count the number of lines, it's probably fine
        self.ok("list --detail")
-        lines_(self.captured, 8)
+        eq_(self.captured.count('\n'), 11)

        self.ok("list --detail --path *prep")
-        lines_(self.captured, 4)
+        eq_(self.captured.count('\n'), 7)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'")
-        lines_(self.captured, 3)
+        eq_(self.captured.count('\n'), 5)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'")
-        lines_(self.captured, 2)
+        eq_(self.captured.count('\n'), 3)

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'")
-        lines_(self.captured, 2)
+        eq_(self.captured.count('\n'), 2)
        self.contain("10:05:15.000")

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'")
-        lines_(self.captured, 2)
+        eq_(self.captured.count('\n'), 2)
        self.contain("10:05:15.500")

        self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'")
-        lines_(self.captured, 2)
+        eq_(self.captured.count('\n'), 2)
        self.contain("no intervals")

        self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'"
                + " --end='23 Mar 2012 10:05:15.50'")
-        lines_(self.captured, 2)
+        eq_(self.captured.count('\n'), 2)
        self.contain("10:05:15.500")

        self.ok("list --detail")
-        lines_(self.captured, 8)
+        eq_(self.captured.count('\n'), 11)

    def test_cmdline_08_extract(self):
        # nonexistent stream
@@ -459,7 +444,7 @@ class TestCmdline(object):

        # all data put in by tests
        self.ok("extract -a /newton/prep --start 2000-01-01 --end 2020-01-01")
-        lines_(self.captured, 43204)
+        eq_(self.captured.count('\n'), 43204)
        self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01")
        self.match("43200\n")

@@ -468,75 +453,6 @@ class TestCmdline(object):
        server_stop()
        server_start(max_results = 2)
        self.ok("list --detail")
-        lines_(self.captured, 8)
+        eq_(self.captured.count('\n'), 11)
        server_stop()
        server_start()
-
-    def test_cmdline_10_destroy(self):
-        # Delete records
-        self.ok("destroy --help")
-
-        self.fail("destroy")
-        self.contain("too few arguments")
-
-        self.fail("destroy /no/such/stream")
-        self.contain("No stream at path")
-
-        self.fail("destroy asdfasdf")
-        self.contain("No stream at path")
-
-        # From previous tests, we have:
-        self.ok("list")
-        self.match("/newton/prep PrepData\n"
-                   "/newton/raw RawData\n"
-                   "/newton/zzz/rawnotch RawNotchedData\n")
-
-        # Notice how they're not empty
-        self.ok("list --detail")
-        lines_(self.captured, 8)
-
-        # Delete some
-        self.ok("destroy /newton/prep")
-        self.ok("list")
-        self.match("/newton/raw RawData\n"
-                   "/newton/zzz/rawnotch RawNotchedData\n")
-
-        self.ok("destroy /newton/zzz/rawnotch")
-        self.ok("list")
-        self.match("/newton/raw RawData\n")
-
-        self.ok("destroy /newton/raw")
-        self.ok("create /newton/raw RawData")
-        self.ok("destroy /newton/raw")
-        self.ok("list")
-        self.match("")
-
-        # Re-create a previously deleted location, and some new ones
-        rebuild = [ "/newton/prep", "/newton/zzz",
-                    "/newton/raw", "/newton/asdf/qwer" ]
-        for path in rebuild:
-            # Create the path
-            self.ok("create " + path + " PrepData")
-            self.ok("list")
-            self.contain(path)
-            # Make sure it was created empty
-            self.ok("list --detail --path " + path)
-            self.contain("(no intervals)")
-
-    def test_cmdline_11_unicode(self):
-        # Unicode paths.
-        self.ok("destroy /newton/asdf/qwer")
-        self.ok("destroy /newton/prep")
-        self.ok("destroy /newton/raw")
-        self.ok("destroy /newton/zzz")
-
-        self.ok(u"create /düsseldorf/raw uint16_6")
-        self.ok("list --detail")
-        self.contain(u"/düsseldorf/raw uint16_6")
-        self.contain("(no intervals)")
-
-        # Unicode metadata
-        self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'")
-        self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
-        self.ok(u"metadata /düsseldorf/raw")
-        self.match(u"α=β ε τ α\nγ=δ\n")
--- a/tests/test_helpers.py
+++ b/tests/test_helpers.py
@@ -20,12 +20,6 @@ def ne_(a, b):
    if not a != b:
        raise AssertionError("unexpected %s == %s" % (myrepr(a), myrepr(b)))

-def lines_(a, n):
-    l = a.count('\n')
-    if not l == n:
-        raise AssertionError("wanted %d lines, got %d in output: '%s'"
-                             % (n, l, a))
-
 def recursive_unlink(path):
    try:
        shutil.rmtree(path)
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *
 import datetime_tz

 from nose.tools import *
@@ -13,19 +13,12 @@ from nilmdb.interval import Interval, DBInterval, IntervalSet, IntervalError
 from test_helpers import *
 import unittest

-# set to False to skip live renders
-do_live_renders = False
-def render(iset, description = "", live = True):
-    import renderdot
-    r = renderdot.RBTreeRenderer(iset.tree)
-    return r.render(description, live and do_live_renders)
-
 def makeset(string):
    """Build an IntervalSet from a string, for testing purposes

    Each character is 1 second
    [ = interval start
-    | = interval end + next start
+    | = interval end + adjacent start
    ] = interval end
    . = zero-width interval (identical start and end)
    anything else is ignored
@@ -38,7 +31,7 @@ def makeset(string):
        elif (c == "|"):
            iset += Interval(start, day)
            start = day
-        elif (c == ")"):
+        elif (c == "]"):
            iset += Interval(start, day)
            del start
        elif (c == "."):
@@ -78,24 +71,24 @@ class TestInterval:
        assert(Interval(d1, d3) < Interval(d2, d3))
        assert(Interval(d2, d2) > Interval(d1, d3))
        assert(Interval(d3, d3) == Interval(d3, d3))
-        #with assert_raises(TypeError): # was AttributeError, that's wrong
-        #    x = (i == 123)
+        with assert_raises(TypeError): # was AttributeError, that's wrong
+            x = (i == 123)

        # subset
-        eq_(Interval(d1, d3).subset(d1, d2), Interval(d1, d2))
+        assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2))
        with assert_raises(IntervalError):
            x = Interval(d2, d3).subset(d1, d2)

        # big integers and floats
        x = Interval(5000111222, 6000111222)
-        eq_(str(x), "[5000111222.0 -> 6000111222.0)")
+        eq_(str(x), "[5000111222.0 -> 6000111222.0]")
        x = Interval(123.45, 234.56)
-        eq_(str(x), "[123.45 -> 234.56)")
+        eq_(str(x), "[123.45 -> 234.56]")

        # misc
        i = Interval(d1, d2)
        eq_(repr(i), repr(eval(repr(i))))
-        eq_(str(i), "[1332561600.0 -> 1332648000.0)")
+        eq_(str(i), "[1332561600.0 -> 1332648000.0]")

    def test_interval_intersect(self):
        # Test Interval intersections
@@ -116,7 +109,7 @@ class TestInterval:
            except IntervalError:
                assert(i not in should_intersect[True] and
                       i not in should_intersect[False])
-        with assert_raises(TypeError):
+        with assert_raises(AttributeError):
            x = i1.intersects(1234)

    def test_intervalset_construct(self):
@@ -137,15 +130,6 @@ class TestInterval:
            x = iseta != 3
        ne_(IntervalSet(a), IntervalSet(b))

-        # Note that assignment makes a new reference (not a copy)
-        isetd = IntervalSet(isetb)
-        isete = isetd
-        eq_(isetd, isetb)
-        eq_(isetd, isete)
-        isetd -= a
-        ne_(isetd, isetb)
-        eq_(isetd, isete)
-
        # test iterator
        for interval in iseta:
            pass
@@ -167,18 +151,11 @@ class TestInterval:
        iset = IntervalSet(a)
        iset += IntervalSet(b)
        eq_(iset, IntervalSet([a, b]))
-
        iset = IntervalSet(a)
        iset += b
        eq_(iset, IntervalSet([a, b]))
-
-        iset = IntervalSet(a)
-        iset.iadd_nocheck(b)
-        eq_(iset, IntervalSet([a, b]))
-
        iset = IntervalSet(a) + IntervalSet(b)
        eq_(iset, IntervalSet([a, b]))
-
        iset = IntervalSet(b) + a
        eq_(iset, IntervalSet([a, b]))

@@ -191,79 +168,61 @@ class TestInterval:

        # misc
        eq_(repr(iset), repr(eval(repr(iset))))
-        eq_(str(iset), "[[100.0 -> 200.0), [200.0 -> 300.0)]")
+        eq_(str(iset), "[[100.0 -> 200.0], [200.0 -> 300.0]]")

    def test_intervalset_geniset(self):
        # Test basic iset construction
-        eq_(makeset("  [----)   "),
-            makeset("  [-|--)   "))
+        assert(makeset("  [----]   ") ==
+               makeset("  [-|--]   "))

-        eq_(makeset("[)  [--)   ") +
-            makeset(" [)    [--)"),
-            makeset("[|) [-----)"))
+        assert(makeset("[]  [--]   ") +
+               makeset(" []    [--]") ==
+               makeset("[|] [-----]"))

-        eq_(makeset("  [-------)"),
-            makeset("  [-|-----|"))
+        assert(makeset("  [-------]") ==
+               makeset("  [-|-----|"))


    def test_intervalset_intersect(self):
        # Test intersection (&)
        with assert_raises(TypeError): # was AttributeError
-            x = makeset("[--)") & 1234
+            x = makeset("[--]") & 1234

-        # Intersection with interval
-        eq_(makeset("[---|---)[)") &
-            list(makeset("  [------) "))[0],
-            makeset("  [-----)  "))
+        assert(makeset("[---------]") &
+               makeset(" [---]     ") ==
+               makeset(" [---]     "))

-        # Intersection with sets
-        eq_(makeset("[---------)") &
-            makeset(" [---)     "),
-            makeset(" [---)     "))
+        assert(makeset(" [---]     ") &
+               makeset("[---------]") ==
+               makeset(" [---]     "))

-        eq_(makeset(" [---)     ") &
-            makeset("[---------)"),
-            makeset(" [---)     "))
+        assert(makeset("    [-----]") &
+               makeset(" [-----]   ") ==
+               makeset("    [--]   "))

-        eq_(makeset("    [-----)") &
-            makeset(" [-----)   "),
-            makeset("    [--)   "))
+        assert(makeset(" [--]  [--]") &
+               makeset("  [------] ") ==
+               makeset("  [-]  [-] "))

-        eq_(makeset(" [--)  [--)") &
-            makeset("  [------) "),
-            makeset("  [-)  [-) "))
+        assert(makeset("      [---]") &
+               makeset(" [--]      ") ==
+               makeset("           "))

-        eq_(makeset("      [---)") &
-            makeset(" [--)      "),
-            makeset("           "))
+        assert(makeset("      [---]") &
+               makeset(" [----]    ") ==
+               makeset("      .    "))

-        eq_(makeset("    [-|---)") &
-            makeset(" [-----|-) "),
-            makeset("    [----) "))
+        assert(makeset("    [-|---]") &
+               makeset(" [-----|-] ") ==
+               makeset("    [----] "))

-        eq_(makeset("    [-|-)  ") &
-            makeset(" [-|--|--) "),
-            makeset("    [---)  "))
+        assert(makeset("    [-|-]  ") &
+               makeset(" [-|--|--] ") ==
+               makeset("    [---]  "))

-        # Border cases -- will give different results if intervals are
-        # half open or fully closed.  Right now, they are half open,
-        # although that's a little messy since the database intervals
-        # often contain a data point at the endpoint.
-        half_open = True
-        if half_open:
-            eq_(makeset("      [---)") &
-                makeset(" [----)    "),
-                makeset("           "))
-            eq_(makeset(" [----)[--)") &
-                makeset("[-) [--) [)"),
-                makeset(" [) [-)  [)"))
-        else:
-            eq_(makeset("      [---)") &
-                makeset(" [----)    "),
-                makeset("      .    "))
-            eq_(makeset(" [----)[--)") &
-                makeset("[-) [--) [)"),
-                makeset(" [) [-). [)"))
+        assert(makeset(" [----][--]") &
+               makeset("[-] [--] []") ==
+               makeset(" [] [-]. []"))

 class TestIntervalDB:
    def test_dbinterval(self):
@@ -314,13 +273,12 @@ class TestIntervalTree:
        import random
        random.seed(1234)

-        # make a set of 100 intervals
+        # make a set of 500 intervals
        iset = IntervalSet()
-        j = 100
+        j = 500
        for i in random.sample(xrange(j),j):
            interval = Interval(i, i+1)
            iset += interval
-        render(iset, "Random Insertion")

        # remove about half of them
        for i in random.sample(xrange(j),j):
@@ -330,15 +288,10 @@ class TestIntervalTree:
        # try removing an interval that doesn't exist
        with assert_raises(IntervalError):
            iset -= Interval(1234,5678)
-        render(iset, "Random Insertion, deletion")

-        # make a set of 100 intervals, inserted in order
-        iset = IntervalSet()
-        j = 100
-        for i in xrange(j):
-            interval = Interval(i, i+1)
-            iset += interval
-        render(iset, "In-order insertion")
+        # show the graph
+        if False:
+            iset.tree.render_dot_live()

 class TestIntervalSpeed:
    @unittest.skip("this is slow")
@@ -347,23 +300,18 @@ class TestIntervalSpeed:
        import time
        import aplotter
        import random
-        import math

        print
        yappi.start()
        speeds = {}
-        for j in [ 2**x for x in range(5,20) ]:
+        for j in [ 2**x for x in range(5,18) ]:
            start = time.time()
            iset = IntervalSet()
            for i in random.sample(xrange(j),j):
                interval = Interval(i, i+1)
                iset += interval
            speed = (time.time() - start) * 1000000.0
-            printf("%d: %g μs (%g μs each, O(n log n) ratio %g)\n",
-                   j,
-                   speed,
-                   speed/j,
-                   speed / (j*math.log(j))) # should be constant
+            printf("%d: %g μs (%g μs each)\n", j, speed, speed/j)
            speeds[j] = speed
        aplotter.plot(speeds.keys(), speeds.values(), plot_slope=True)
        yappi.stop()
--- a/tests/test_iteratorizer.py
+++ b/tests/test_iteratorizer.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import nose
 from nose.tools import *
@@ -9,6 +9,8 @@ import time

 from test_helpers import *

+import nilmdb.iteratorizer
+
 def func_with_callback(a, b, callback):
    callback(a)
    callback(b)
@@ -25,18 +27,16 @@ class TestIteratorizer(object):
        eq_(self.result, "123")

        # Now make it an iterator
-        it = nilmdb.utils.Iteratorizer(
-            lambda x:
-            func_with_callback(1, 2, x))
+        it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+                                              func_with_callback(1, 2, x))
        result = ""
        for i in it:
            result += str(i)
        eq_(result, "123")

        # Make sure things work when an exception occurs
-        it = nilmdb.utils.Iteratorizer(
-            lambda x:
-            func_with_callback(1, "a", x))
+        it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+                                              func_with_callback(1, "a", x))
        result = ""
        with assert_raises(TypeError) as e:
            for i in it:
@@ -48,8 +48,7 @@ class TestIteratorizer(object):
        # itself.  This doesn't have a particular result in the test,
        # but gains coverage.
        def foo():
-            it = nilmdb.utils.Iteratorizer(
-                lambda x:
-                func_with_callback(1, 2, x))
+            it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+                                                  func_with_callback(1, 2, x))
            it.next()
        foo()
--- a/tests/test_layout.py
+++ b/tests/test_layout.py
@@ -2,7 +2,7 @@

 import nilmdb

-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
@@ -28,13 +28,9 @@ class TestLayouts(object):
    # Some nilmdb.layout tests.  Not complete, just fills in missing
    # coverage.
    def test_layouts(self):
-        x = nilmdb.layout.get_named("PrepData")
-        y = nilmdb.layout.get_named("float32_8")
-        eq_(x.count, y.count)
-        eq_(x.datatype, y.datatype)
-        y = nilmdb.layout.get_named("float32_7")
-        ne_(x.count, y.count)
-        eq_(x.datatype, y.datatype)
+        x = nilmdb.layout.get_named("PrepData").description()
+        y = nilmdb.layout.get_named("float32_8").description()
+        eq_(repr(x), repr(y))

    def test_parsing(self):
        self.real_t_parsing("PrepData", "RawData", "RawNotchedData")
--- a/tests/test_lrucache.py
+++ b/tests/test_lrucache.py
@@ -1,53 +0,0 @@
-import nilmdb
-from nilmdb.utils.printf import *
-
-import nose
-from nose.tools import *
-from nose.tools import assert_raises
-import threading
-import time
-
-from test_helpers import *
-
-@nilmdb.utils.lru_cache(size = 3)
-def foo1(n):
-    return n
-
-@nilmdb.utils.lru_cache(size = 5)
-def foo2(n):
-    return n
-
-def foo3d(n):
-    foo3d.destructed.append(n)
-foo3d.destructed = []
-@nilmdb.utils.lru_cache(size = 3, onremove = foo3d)
-def foo3(n):
-    return n
-
-class TestLRUCache(object):
-    def test(self):
-        [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo1.cache_hits, foo1.cache_misses), (6, 3))
-        [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo1.cache_hits, foo1.cache_misses), (15, 3))
-        [ foo1(n) for n in [ 4, 2, 1, 1, 4 ] ]
-        eq_((foo1.cache_hits, foo1.cache_misses), (18, 5))
-
-        [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo2.cache_hits, foo2.cache_misses), (6, 3))
-        [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo2.cache_hits, foo2.cache_misses), (15, 3))
-        [ foo2(n) for n in [ 4, 2, 1, 1, 4 ] ]
-        eq_((foo2.cache_hits, foo2.cache_misses), (19, 4))
-
-        [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo3.cache_hits, foo3.cache_misses), (6, 3))
-        [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
-        eq_((foo3.cache_hits, foo3.cache_misses), (15, 3))
-        [ foo3(n) for n in [ 4, 2, 1, 1, 4 ] ]
-        eq_((foo3.cache_hits, foo3.cache_misses), (18, 5))
-        eq_(foo3d.destructed, [1, 3])
-        foo3.cache_remove(1)
-        eq_(foo3d.destructed, [1, 3, 1])
-        foo3.cache_remove_all()
-        eq_(foo3d.destructed, [1, 3, 1, 2, 4 ])
--- a/tests/test_mustclose.py
+++ b/tests/test_mustclose.py
@@ -1,59 +0,0 @@
-import nilmdb
-from nilmdb.utils.printf import *
-
-import nose
-from nose.tools import *
-from nose.tools import assert_raises
-
-from test_helpers import *
-
-import sys
-import cStringIO
-
-err = cStringIO.StringIO()
-
-@nilmdb.utils.must_close(errorfile = err)
-class Foo:
-    def __init__(self):
-        fprintf(err, "Init\n")
-
-    def __del__(self):
-        fprintf(err, "Deleting\n")
-
-    def close(self):
-        fprintf(err, "Closing\n")
-
-@nilmdb.utils.must_close(errorfile = err)
-class Bar:
-    pass
-
-class TestMustClose(object):
-    def test(self):
-
-        # Note: this test might fail if the Python interpreter doesn't
-        # garbage collect the object (and call its __del__ function)
-        # right after a "del x".
-
-        x = Foo()
-        del x
-        eq_(err.getvalue(),
-            "Init\n"
-            "error: Foo.close() wasn't called!\n"
-            "Deleting\n")
-
-        err.truncate(0)
-
-        y = Foo()
-        y.close()
-        del y
-        eq_(err.getvalue(),
-            "Init\n"
-            "Closing\n"
-            "Deleting\n")
-
-        err.truncate(0)
-
-        z = Bar()
-        z.close()
-        del z
-        eq_(err.getvalue(), "")
--- a/tests/test_nilmdb.py
+++ b/tests/test_nilmdb.py
@@ -14,7 +14,6 @@ import urllib2
 from urllib2 import urlopen, HTTPError
 import Queue
 import cStringIO
-import time

 testdb = "tests/testdb"

@@ -40,8 +39,8 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        capture = cStringIO.StringIO()
        old = sys.stdout
        sys.stdout = capture
-        with nilmdb.utils.Timer("test"):
-            time.sleep(0.01)
+        with nilmdb.Timer("test"):
+            nilmdb.timer.time.sleep(0.01)
        sys.stdout = old
        in_("test: ", capture.getvalue())

@@ -70,14 +69,12 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        eq_(db.stream_list(layout="RawData"), [ ["/newton/raw", "RawData"] ])
        eq_(db.stream_list(path="/newton/raw"), [ ["/newton/raw", "RawData"] ])

-        # Verify that columns were made right (pytables specific)
-        if "h5file" in db.data.__dict__:
-            h5file = db.data.h5file
-            eq_(len(h5file.getNode("/newton/prep").cols), 9)
-            eq_(len(h5file.getNode("/newton/raw").cols), 7)
-            eq_(len(h5file.getNode("/newton/zzz/rawnotch").cols), 10)
-            assert(not h5file.getNode("/newton/prep").colindexed["timestamp"])
-            assert(not h5file.getNode("/newton/prep").colindexed["c1"])
+        # Verify that columns were made right
+        eq_(len(db.h5file.getNode("/newton/prep").cols), 9)
+        eq_(len(db.h5file.getNode("/newton/raw").cols), 7)
+        eq_(len(db.h5file.getNode("/newton/zzz/rawnotch").cols), 10)
+        assert(not db.h5file.getNode("/newton/prep").colindexed["timestamp"])
+        assert(not db.h5file.getNode("/newton/prep").colindexed["c1"])

        # Set / get metadata
        eq_(db.stream_get_metadata("/newton/prep"), {})
@@ -199,6 +196,6 @@ class TestServer(object):
        # GET instead of POST (no body)
        # (actual POST test is done by client code)
        with assert_raises(HTTPError) as e:
-            getjson("/stream/insert?path=/newton/prep&start=0&end=0")
+            getjson("/stream/insert?path=/newton/prep")
        eq_(e.exception.code, 400)

--- a/tests/test_printf.py
+++ b/tests/test_printf.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
--- a/tests/test_rbtree.py
+++ b/tests/test_rbtree.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
@@ -11,149 +11,65 @@ from nilmdb.rbtree import RBTree, RBNode
 from test_helpers import *
 import unittest

-# set to False to skip live renders
-do_live_renders = False
-def render(tree, description = "", live = True):
-    import renderdot
-    r = renderdot.RBTreeRenderer(tree)
-    return r.render(description, live and do_live_renders)
+render = False

 class TestRBTree:
    def test_rbtree(self):
        rb = RBTree()
-        rb.insert(RBNode(10000, 10001))
-        rb.insert(RBNode(10004, 10007))
-        rb.insert(RBNode(10001, 10002))
+        rb.insert(RBNode(None, 10000, 10001))
+        rb.insert(RBNode(None, 10004, 10007))
+        rb.insert(RBNode(None, 10001, 10002))
+        s = rb.render_dot()
        # There was a typo that gave the RBTree a loop in this case.
        # Verify that the dot isn't too big.
-        s = render(rb, live = False)
        assert(len(s.splitlines()) < 30)

    def test_rbtree_big(self):
        import random
        random.seed(1234)

-        # make a set of 100 intervals, inserted in order
+        # make a set of 500 intervals, inserted in order
        rb = RBTree()
-        j = 100
+        j = 500
        for i in xrange(j):
-            rb.insert(RBNode(i, i+1))
-        render(rb, "in-order insert")
+            rb.insert(RBNode(None, i, i+1))
+
+        # show the graph
+        if render:
+            rb.render_dot_live("in-order insert")

        # remove about half of them
        for i in random.sample(xrange(j),j):
            if random.randint(0,1):
                rb.delete(rb.find(i, i+1))
-        render(rb, "in-order insert, random delete")

-        # make a set of 100 intervals, inserted at random
+        # show the graph
+        if render:
+            rb.render_dot_live("in-order insert, random delete")
+
+        # make a set of 500 intervals, inserted at random
        rb = RBTree()
-        j = 100
+        j = 500
        for i in random.sample(xrange(j),j):
-            rb.insert(RBNode(i, i+1))
-        render(rb, "random insert")
+            rb.insert(RBNode(None, i, i+1))
+
+        # show the graph
+        if render:
+            rb.render_dot_live("random insert")

        # remove about half of them
        for i in random.sample(xrange(j),j):
            if random.randint(0,1):
                rb.delete(rb.find(i, i+1))
-        render(rb, "random insert, random delete")

-        # in-order insert of 50 more
-        for i in xrange(50):
-            rb.insert(RBNode(i+500, i+501))
-        render(rb, "random insert, random delete, in-order insert")
+        # show the graph
+        if render:
+            rb.render_dot_live("random insert, random delete")

-    def test_rbtree_basics(self):
-        rb = RBTree()
-        vals = [ 7, 14, 1, 2, 8, 11, 5, 15, 4]
-        for n in vals:
-            rb.insert(RBNode(n, n))
+        # in-order insert of 250 more
+        for i in xrange(250):
+            rb.insert(RBNode(None, i+500, i+501))

-        # stringify
-        s = ""
-        for node in rb:
-            s += str(node)
-        in_("[node (None) 1", s)
-        eq_(str(rb.nil), "[node nil]")
-
-        # inorder traversal, successor and predecessor
-        last = 0
-        for node in rb:
-            assert(node.start > last)
-            last = node.start
-            successor = rb.successor(node)
-            if successor:
-                assert(rb.predecessor(successor) is node)
-            predecessor = rb.predecessor(node)
-            if predecessor:
-                assert(rb.successor(predecessor) is node)
-
-        # Delete node not in the tree
-        with assert_raises(AttributeError):
-            rb.delete(RBNode(1,2))
-
-        # Delete all nodes!
-        for node in rb:
-            rb.delete(node)
-
-        # Build it up again, make sure it matches
-        for n in vals:
-            rb.insert(RBNode(n, n))
-        s2 = ""
-        for node in rb:
-            s2 += str(node)
-        assert(s == s2)
-
-    def test_rbtree_find(self):
-        # Get a little bit of coverage for some overlapping cases,
-        # even though the class doesn't fully support it.
-        rb = RBTree()
-        nodes = [ RBNode(1, 5), RBNode(1, 10), RBNode(1, 15) ]
-        for n in nodes:
-            rb.insert(n)
-        assert(rb.find(1, 5) is nodes[0])
-        assert(rb.find(1, 10) is nodes[1])
-        assert(rb.find(1, 15) is nodes[2])
-
-    def test_rbtree_find_leftright(self):
-        # Now let's get some ranges in there
-        rb = RBTree()
-        vals = [ 7, 14, 1, 2, 8, 11, 5, 15, 4]
-        for n in vals:
-            rb.insert(RBNode(n*10, n*10+5))
-
-        # Check find_end_left, find_right_start
-        for i in range(160):
-            left = rb.find_left_end(i)
-            right = rb.find_right_start(i)
-            if left:
-                # endpoint should be more than i
-                assert(left.end >= i)
-                # all earlier nodes should have a lower endpoint
-                for node in rb:
-                    if node is left:
-                        break
-                    assert(node.end < i)
-            if right:
-                # startpoint should be less than i
-                assert(right.start <= i)
-                # all later nodes should have a higher startpoint
-                for node in reversed(list(rb)):
-                    if node is right:
-                        break
-                    assert(node.start > i)
-
-    def test_rbtree_intersect(self):
-        # Fill with some ranges
-        rb = RBTree()
-        rb.insert(RBNode(10,20))
-        rb.insert(RBNode(20,25))
-        rb.insert(RBNode(30,40))
-        # Just a quick test; test_interval will do better.
-        eq_(len(list(rb.intersect(1,100))), 3)
-        eq_(len(list(rb.intersect(10,20))), 1)
-        eq_(len(list(rb.intersect(5,15))), 1)
-        eq_(len(list(rb.intersect(15,15))), 1)
-        eq_(len(list(rb.intersect(20,21))), 1)
-        eq_(len(list(rb.intersect(19,21))), 2)
+        # show the graph
+        if render:
+            rb.render_dot_live("random insert, random delete, in-order insert")
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import nose
 from nose.tools import *
@@ -57,7 +57,7 @@ class TestUnserialized(Base):
 class TestSerialized(Base):
    def setUp(self):
        self.realfoo = Foo()
-        self.foo = nilmdb.utils.Serializer(self.realfoo)
+        self.foo = nilmdb.serializer.WrapObject(self.realfoo)

    def tearDown(self):
        del self.foo
--- a/tests/test_timestamper.py
+++ b/tests/test_timestamper.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.utils.printf import *
+from nilmdb.printf import *

 import datetime_tz

--- a/timeit.sh
+++ b/timeit.sh
@@ -1,22 +1,20 @@
-./nilmtool.py destroy /bpnilm/2/raw
 ./nilmtool.py create /bpnilm/2/raw RawData

-if false; then
-    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 -r 8000 /bpnilm/2/raw
-    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 -r 8000 /bpnilm/2/raw
+if true; then
+    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/2/raw
+    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/2/raw
 else
-    # 170 hours, about 98 gigs uncompressed:
-    for i in $(seq 2000 2016); do
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 -r 8000 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 -r 8000 /bpnilm/2/raw
+    for i in $(seq 2000 2050); do
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 /bpnilm/2/raw
    done
 fi
Author	SHA1	Message	Date
Jim Paris	9b9f392d43	add .gitignore	2012-11-28 17:21:36 -05:00
Jim Paris	3c441de498	WIP moving back to bxintersect, cleaning up bxintersect	2012-11-15 16:05:33 -05:00