Improve boolean HTTP parameter handling

Remove duplicated test
Fix WSGI docs again
2013-07-15 14:38:28 -04:00 · 2013-07-14 15:30:53 -04:00 · 2013-07-11 16:36:32 -04:00 · 2013-07-10 14:16:25 -04:00 · 2013-07-09 19:06:26 -04:00 · 2013-07-09 19:01:53 -04:00
125 changed files with 71534 additions and 969 deletions
--- a/.coveragerc
+++ b/.coveragerc
@@ -0,0 +1,10 @@
+# -*- conf -*-
+
+[run]
+# branch = True
+
+[report]
+exclude_lines =
+	pragma: no cover
+	if 0:
+omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1 @@
+nilmdb/_version.py export-subst
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,25 @@
+# Tests
+tests/*testdb/
+.coverage
+db/
+
+# Compiled / cythonized files
+docs/*.html
+build/
+*.pyc
+nilmdb/server/interval.c
+nilmdb/server/layout.c
+nilmdb/server/rbtree.c
+*.so
+
+# Setup junk
+dist/
+nilmdb.egg-info/
+
+# This gets generated as needed by setup.py
+MANIFEST.in
+MANIFEST
+
+# Misc
+timeit*out
+
--- a/.pylintrc
+++ b/.pylintrc
@@ -0,0 +1,250 @@
+# -*- conf -*-
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Python code to execute, usually for sys.path manipulation such as
+# pygtk.require().
+#init-hook=
+
+# Profiled execution.
+profile=no
+
+# Add files or directories to the blacklist. They should be base names, not
+# paths.
+ignore=datetime_tz
+
+# Pickle collected data for later comparisons.
+persistent=no
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable the message, report, category or checker with the given id(s). You can
+# either give multiple identifier separated by comma (,) or put this option
+# multiple time.
+#enable=
+
+# Disable the message, report, category or checker with the given id(s). You
+# can either give multiple identifier separated by comma (,) or put this option
+# multiple time (only on the command line, not in the configuration file where
+# it should appear only once).
+disable=C0111,R0903,R0201,R0914,R0912,W0142,W0703,W0702
+
+
+[REPORTS]
+
+# Set the output format. Available formats are text, parseable, colorized, msvs
+# (visual studio) and html
+output-format=parseable
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells whether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note). You have access to the variables errors warning, statement which
+# respectively contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (RP0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (RP0004).
+comment=no
+
+
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=4
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+[TYPECHECK]
+
+# Tells whether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# List of classes names for which member attributes should not be checked
+# (useful for classes with attributes dynamically set).
+ignored-classes=SQLObject
+
+# When zope mode is activated, add a predefined set of Zope acquired attributes
+# to generated-members.
+zope=no
+
+# List of members which are set dynamically and missed by pylint inference
+# system, and so shouldn't trigger E0201 when accessed. Python regular
+# expressions are accepted.
+generated-members=REQUEST,acl_users,aq_parent
+
+
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=80
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
+
+
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+[VARIABLES]
+
+# Tells whether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching the beginning of the name of dummy variables
+# (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=apply,input
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z0-9_]*)|(__.*__)|version)$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{0,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{0,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{0,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{0,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{0,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+# List of valid names for the first argument in a class method.
+valid-classmethod-first-arg=cls
+
+
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=5
+
+# Argument names that match this expression will be ignored. Default to name
+# with leading underscore
+ignored-argument-names=_.*
+
+# Maximum number of locals for function / method body
+max-locals=15
+
+# Maximum number of return / yield for function / method body
+max-returns=6
+
+# Maximum number of branch for function / method body
+max-branchs=12
+
+# Maximum number of statements in function / method body
+max-statements=50
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=7
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=2
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report RP0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report RP0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report RP0402 must
+# not be disabled)
+int-import-graph=
+
+
+[EXCEPTIONS]
+
+# Exceptions that will emit a warning when being caught. Defaults to
+# "Exception"
+overgeneral-exceptions=Exception
--- a/48
+++ b/48
@@ -1,2 +1,46 @@
-all:
-	nosetests nilmdb/test_interval.py
+# By default, run the tests.
+all: test
+
+version:
+	python setup.py version
+
+build:
+	python setup.py build_ext --inplace
+
+dist: sdist
+sdist:
+	python setup.py sdist
+
+install:
+	python setup.py install
+
+develop:
+	python setup.py develop
+
+docs:
+	make -C docs
+
+lint:
+	pylint --rcfile=.pylintrc nilmdb
+
+test:
+ifeq ($(INSIDE_EMACS), t)
+# Use the slightly more flexible script
+	python setup.py build_ext --inplace
+	python tests/runtests.py
+else
+# Let setup.py check dependencies, build stuff, and run the test
+	python setup.py nosetests
+endif
+
+clean::
+	find . -name '*pyc' | xargs rm -f
+	rm -f .coverage
+	rm -rf tests/*testdb*
+	rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so MANIFEST.in
+	make -C docs clean
+
+gitclean::
+	git clean -dXf
+
+.PHONY: all version build dist sdist install docs lint test clean gitclean
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,31 @@
-To install, 
+nilmdb: Non-Intrusive Load Monitor Database
+by Jim Paris <jim@jtan.com>

-   python seutp.py install
+Prerequisites:

+  # Runtime and build environments
+  sudo apt-get install python2.7 python2.7-dev python-setuptools cython
+
+  # Base NilmDB dependencies
+  sudo apt-get install python-cherrypy3 python-decorator python-simplejson
+  sudo apt-get install python-requests python-dateutil python-tz python-psutil
+
+  # Other dependencies (required by some modules)
+  sudo apt-get install python-numpy
+
+  # Tools for running tests
+  sudo apt-get install python-nose python-coverage
+
+Test:
+  python setup.py nosetests
+
+Install:
+
+  python setup.py install
+
+Usage:
+
+  nilmdb-server --help
+  nilmtool --help
+
+See docs/wsgi.md for info on setting up a WSGI application in Apache.
--- a/bin/nilm-test.py
+++ b/bin/nilm-test.py
@@ -1,26 +0,0 @@
-#!/usr/bin/python
-
-from nilmdb import Interval
-from optparse import OptionParser
-import sys
-
-version = "1.0"
-
-parser = OptionParser()
-parser.add_option("-d", "--db", dest="database", metavar="DATABASE",
-                  help="location of sqlite database")
-parser.add_option("-V", "--version", dest="version", default=False, action="store_true",
-                  help="print version then exit")
-
-(options, args) = parser.parse_args()
-
-if (options.version):
-    print "This script version: " + version
-    sys.exit(0)
-
-if options.database is None:
-    print "Error: database is mandatory"
-    sys.exit(1)
-
-print "Database is " + options.database
-
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -0,0 +1,9 @@
+ALL_DOCS = $(wildcard *.md)
+
+all: $(ALL_DOCS:.md=.html)
+
+%.html: %.md
+	pandoc -s $< > $@
+
+clean:
+	rm -f *.html
--- a/docs/TODO.md
+++ b/docs/TODO.md
@@ -0,0 +1,5 @@
+- Documentation
+
+- Machine-readable information in OverflowError, parser errors.
+  Maybe subclass `cherrypy.HTTPError` and override `set_response`
+  to add another JSON field?
--- a/docs/design.md
+++ b/docs/design.md
@@ -0,0 +1,440 @@
+Structure
+---------
+nilmdb.nilmdb is the NILM database interface.  A nilmdb.BulkData
+interface stores data in flat files, and a SQL database tracks
+metadata and ranges.
+
+Access to the nilmdb must be single-threaded.  This is handled with
+the nilmdb.serializer class.  In the future this could probably
+be turned into a per-path serialization.
+
+nilmdb.server is a HTTP server that provides an interface to talk,
+thorugh the serialization layer, to the nilmdb object.
+
+nilmdb.client is a HTTP client that connects to this.
+
+Sqlite performance
+------------------
+
+Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
+takes about 125msec.  sqlite3 will commit transactions at 3 times:
+
+1. explicit con.commit()
+
+2. between a series of DML commands and non-DML commands, e.g.
+   after a series of INSERT, SELECT, but before a CREATE TABLE or
+   PRAGMA.
+
+3. at the end of an explicit transaction, e.g. "with self.con as con:"
+
+To speed up testing, or if this transaction speed becomes an issue,
+the sync=False option to NilmDB will set PRAGMA synchronous=OFF.
+
+
+Inserting streams
+-----------------
+
+We need to send the contents of "data" as POST.  Do we need chunked
+transfer?
+
+- Don't know the size in advance, so we would need to use chunked if
+  we send the entire thing in one request.
+- But we shouldn't send one chunk per line, so we need to buffer some
+  anyway; why not just make new requests?
+- Consider the infinite-streaming case, we might want to send it
+  immediately?  Not really -- server still should do explicit inserts
+  of fixed-size chunks.
+- Even chunked encoding needs the size of each chunk beforehand, so
+  everything still gets buffered.  Just a tradeoff of buffer size.
+
+Before timestamps are added:
+
+- Raw data is about 440 kB/s    (9 channels)
+- Prep data is about 12.5 kB/s  (1 phase)
+- How do we know how much data to send?
+
+    - Remember that we can only do maybe 8-50 transactions per second on
+      the sqlite database.  So if one block of inserted data is one
+      transaction, we'd need the raw case to be around 64kB per request,
+      ideally more.
+    - Maybe use a range, based on how long it's taking to read the data
+        - If no more data, send it
+        - If data > 1 MB, send it
+    - If more than 10 seconds have elapsed, send it
+    - Should those numbers come from the server?
+
+Converting from ASCII to PyTables:
+
+- For each row getting added, we need to set attributes on a PyTables
+  Row object and call table.append().  This means that there isn't a
+  particularly efficient way of converting from ascii.
+- Could create a function like nilmdb.layout.Layout("foo".fillRow(asciiline)
+    - But this means we're doing parsing on the serialized side
+    - Let's keep parsing on the threaded server side so we can detect
+      errors better, and not block the serialized nilmdb for a slow
+      parsing process.
+- Client sends ASCII data
+- Server converts this ACSII data to a list of values
+    - Maybe:
+
+            # threaded side creates this object
+            parser = nilmdb.layout.Parser("layout_name")
+            # threaded side parses and fills it with data
+            parser.parse(textdata)
+            # serialized side pulls out rows
+            for n in xrange(parser.nrows):
+                parser.fill_row(rowinstance, n)
+                table.append()
+
+
+Inserting streams, inside nilmdb
+--------------------------------
+
+- First check that the new stream doesn't overlap.
+    - Get minimum timestamp, maximum timestamp from data parser.
+        - (extend parser to verify monotonicity and track extents)
+    - Get all intervals for this stream in the database
+    - See if new interval overlaps any existing ones
+        - If so, bail
+    - Question: should we cache intervals inside NilmDB?
+        - Assume database is fast for now, and always rebuild fom DB.
+        - Can add a caching layer later if we need to.
+    - `stream_get_ranges(path)` -> return IntervalSet?
+
+Speed
+-----
+
+- First approach was quadratic.  Adding four hours of data:
+
+        $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 /bpnilm/1/raw
+        real    24m31.093s
+        $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 /bpnilm/1/raw
+        real    43m44.528s
+        $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-130002 /bpnilm/1/raw
+        real    93m29.713s
+        $ time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-140003 /bpnilm/1/raw
+        real    166m53.007s
+
+- Disabling pytables indexing didn't help:
+
+        real    31m21.492s
+        real    52m51.963s
+        real    102m8.151s
+        real    176m12.469s
+
+- Server RAM usage is constant.
+
+- Speed problems were due to IntervalSet speed, of parsing intervals
+  from the database and adding the new one each time.
+
+    - First optimization is to cache result of `nilmdb:_get_intervals`,
+      which gives the best speedup.
+
+    - Also switched to internally using bxInterval from bx-python package.
+      Speed of `tests/test_interval:TestIntervalSpeed` is pretty decent
+      and seems to be growing logarithmically now.  About 85μs per insertion
+      for inserting 131k entries.
+
+    - Storing the interval data in SQL might be better, with a scheme like:
+      http://www.logarithmic.net/pfh/blog/01235197474
+
+- Next slowdown target is nilmdb.layout.Parser.parse().
+    - Rewrote parsers using cython and sscanf
+    - Stats (rev 10831), with `_add_interval` disabled
+
+        layout.pyx.Parser.parse:128        6303 sec, 262k calls
+         layout.pyx.parse:63               13913 sec, 5.1g calls
+        numpy:records.py.fromrecords:569   7410 sec, 262k calls
+
+- Probably OK for now.
+
+- After all updates, now takes about 8.5 minutes to insert an hour of
+  data, constant after adding 171 hours (4.9 billion data points)
+
+- Data set size: 98 gigs = 20 bytes per data point.
+  6 uint16 data + 1 uint32 timestamp = 16 bytes per point
+  So compression must be off -- will retry with compression forced on.
+
+IntervalSet speed
+-----------------
+- Initial implementation was pretty slow, even with binary search in
+  sorted list
+
+- Replaced with bxInterval; now takes about log n time for an insertion
+    - TestIntervalSpeed with range(17,18) and profiling
+        - 85 μs each
+        - 131072 calls to `__iadd__`
+        - 131072 to bx.insert_interval
+        - 131072 to bx.insert:395
+        - 2355835 to bx.insert:106  (18x as many?)
+
+- Tried blist too, worse than bxinterval.
+
+- Might be algorithmic improvements to be made in Interval.py,
+  like in `__and__`
+
+- Replaced again with rbtree.  Seems decent.  Numbers are time per
+  insert for 2**17 insertions, followed by total wall time and RAM
+  usage for running "make test" with `test_rbtree` and `test_interval`
+  with range(5,20):
+    - old values with bxinterval:
+      20.2 μS, total 20 s, 177 MB RAM
+    - rbtree, plain python:
+      97 μS, total 105 s, 846 MB RAM
+    - rbtree converted to cython:
+      26 μS, total 29 s, 320 MB RAM
+    - rbtree and interval converted to cython:
+      8.4 μS, total 12 s, 134 MB RAM
+
+- Would like to move Interval itself back to Python so other
+  non-cythonized code like client code can use it more easily.
+  Testing speed with just `test_interval` being tested, with
+  `range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
+  times recorded for 2097152:
+    - 52ae397 (Interval in cython):
+	  12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
+	- 9759dcf (Interval in python):
+	  21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
+  That's a huge difference!  Instead, will keep Interval and DBInterval
+  cythonized inside nilmdb, and just have an additional copy in
+  nilmdb.utils for clients to use.
+
+Layouts
+-------
+Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
+Let's get rid of this entirely and switch to simpler data types that are
+just collections and counts of a single type.  We'll still use strings
+to describe them, with format:
+
+    type_count
+
+where type is "uint16", "float32", or "float64", and count is an integer.
+
+nilmdb.layout.named() will parse these strings into the appropriate
+handlers.  For compatibility:
+
+    "RawData" == "uint16_6"
+    "RawNotchedData" == "uint16_9"
+    "PrepData" == "float32_8"
+
+
+BulkData design
+---------------
+
+BulkData is a custom bulk data storage system that was written to
+replace PyTables.  The general structure is a `data` subdirectory in
+the main NilmDB directory.  Within `data`, paths are created for each
+created stream.  These locations are called tables.  For example,
+tables might be located at
+
+    nilmdb/data/newton/raw/
+    nilmdb/data/newton/prep/
+    nilmdb/data/cottage/raw/
+
+Each table contains:
+
+- An unchanging `_format` file (Python pickle format) that describes
+  parameters of how the data is broken up, like files per directory,
+  rows per file, and the binary data format
+
+- Hex named subdirectories `("%04x", although more than 65536 can exist)`
+
+- Hex named files within those subdirectories, like:
+
+        /nilmdb/data/newton/raw/000b/010a
+
+    The data format of these files is raw binary, interpreted by the
+    Python `struct` module according to the format string in the
+    `_format` file.
+
+- Same as above, with `.removed` suffix, is an optional file (Python
+  pickle format) containing a list of row numbers that have been
+  logically removed from the file.  If this range covers the entire
+  file, the entire file will be removed.
+
+- Note that the `bulkdata.nrows` variable is calculated once in
+  `BulkData.__init__()`, and only ever incremented during use.  Thus,
+  even if all data is removed, `nrows` can remain high.  However, if
+  the server is restarted, the newly calculated `nrows` may be lower
+  than in a previous run due to deleted data.  To be specific, this
+  sequence of events:
+
+    - insert data
+    - remove all data
+    - insert data
+
+    will result in having different row numbers in the database, and
+    differently numbered files on the filesystem, than the sequence:
+
+    - insert data
+    - remove all data
+    - restart server
+    - insert data
+
+    This is okay!  Everything should remain consistent both in the
+    `BulkData` and `NilmDB`.  Not attempting to readjust `nrows` during
+    deletion makes the code quite a bit simpler.
+
+- Similarly, data files are never truncated shorter.  Removing data
+  from the end of the file will not shorten it; it will only be
+  deleted when it has been fully filled and all of the data has been
+  subsequently removed.
+
+
+Rocket
+------
+
+Original design had the nilmdb.nilmdb thread (through bulkdata)
+convert from on-disk layout to a Python list, and then the
+nilmdb.server thread (from cherrypy) converts to ASCII.  For at least
+the extraction side of things, it's easy to pass the bulkdata a layout
+name instead, and have it convert directly from on-disk to ASCII
+format, because this conversion can then be shoved into a C module.
+This module, which provides a means for converting directly from
+on-disk format to ASCII or Python lists, is the "rocket" interface.
+Python is still used to manage the files and figure out where the
+data should go; rocket just puts binary data directly in or out of
+those files at specified locations.
+
+Before rocket, testing speed with uint16_6 data, with an end-to-end
+test (extracting data with nilmtool):
+
+- insert: 65 klines/sec
+- extract: 120 klines/sec
+
+After switching to the rocket design, but using the Python version
+(pyrocket):
+
+- insert: 57 klines/sec
+- extract: 120 klines/sec
+
+After switching to a C extension module (rocket.c)
+
+- insert: 74 klines/sec through insert.py; 99.6 klines/sec through nilmtool
+- extract: 335 klines/sec
+
+After client block updates (described below):
+
+- insert: 180 klines/sec through nilmtool (pre-timestamped)
+- extract: 390 klines/sec through nilmtool
+
+Using "insert --timestamp" or "extract --bare" cuts the speed in half.
+
+Blocks versus lines
+-------------------
+
+Generally want to avoid parsing the bulk of the data as lines if
+possible, and transfer things in bigger blocks at once.
+
+Current places where we use lines:
+
+- All data returned by `client.stream_extract`, since it comes from
+  `httpclient.get_gen`, which iterates over lines.  Not sure if this
+  should be changed, because a `nilmtool extract` is just about the
+  same speed as `curl -q .../stream/extract`!
+
+- `client.StreamInserter.insert_iter` and
+  `client.StreamInserter.insert_line`, which should probably get
+  replaced with block versions.  There's no real need to keep
+  updating the timestamp every time we get a new line of data.
+
+  - Finished.  Just a single insert() that takes any length string and
+    does very little processing until it's time to send it to the
+	server.
+
+Timestamps
+----------
+
+Timestamps are currently double-precision floats (64 bit).  Since the
+mantissa is 53-bit, this can only represent about 15-17 significant
+figures, and microsecond Unix timestamps like 1222333444.000111 are
+already 16 significant figures.  Rounding is therefore an issue;
+it's hard to sure that converting from ASCII, then back to ASCII,
+will always give the same result.
+
+Also, if the client provides a floating point value like 1.9999999999,
+we need to be careful that we don't store it as 1.9999999999 but later
+print it as 2.000000, because then round-trips change the data.
+
+Possible solutions:
+
+- When the client provides a floating point value to the server,
+  always round to the 6th decimal digit before verifying & storing.
+  Good for compatibility and simplicity.  But still might have rounding
+  issues, and clients will also need to round when doing their own
+  verification.  Having every piece of code need to know which digit
+  to round at is not ideal.
+
+- Always store int64 timestamps on the server, representing
+  microseconds since epoch.  int64 timestamps are used in all HTTP
+  parameters, in insert/extract ASCII strings, client API, commandline
+  raw timestamps, etc.  Pretty big change.
+
+  This is what we'll go with...
+
+  - Client programs that interpret the timestamps as doubles instead
+    of ints will remain accurate until 2^53 microseconds, or year
+    2255.
+
+  - On insert, maybe it's OK to send floating point microsecond values
+    (1234567890123456.0), just to cope with clients that want to print
+    everything as a double.  Server could try parsing as int64, and if
+    that fails, parse as double and truncate to int64.  However, this
+    wouldn't catch imprecise inputs like "1.23456789012e+15".  But
+    maybe that can just be ignored; it's likely to cause a
+    non-monotonic error at the client.
+
+  - Timestamps like 1234567890.123456 never show up anywhere, except
+    for interfacing to datetime_tz etc.  Command line "raw timestamps"
+    are always printed as int64 values, and a new format
+    "@1234567890123456" is added to the parser for specifying them
+    exactly.
+
+Binary interface
+----------------
+
+The ASCII interface is too slow for high-bandwidth processing, like
+sinefits, prep, etc.  A binary interface was added so that you can
+extract the raw binary out of the bulkdata storage.  This binary is
+a little-endian format, e.g. in C a uint16_6 stream would be:
+
+    #include <endian.h>
+    #include <stdint.h>
+    struct {
+        int64_t timestamp_le;
+        uint16_t data_le[6];
+    } __attribute__((packed));
+
+Remember to byteswap (with e.g. `letoh` in C)!
+
+This interface is used by the new `nilmdb.client.numpyclient.NumpyClient`
+class, which is a subclass of the normal `nilmcb.client.client.Client`
+and has all of the same functions.  It adds three new functions:
+
+- `stream_extract_numpy` to extract data as a Numpy array
+
+- `stream_insert_numpy` to insert data as a Numpy array
+
+- `stream_insert_numpy_context` is the context manager for
+  incrementally inserting data
+
+It is significantly faster!  It is about 20 times faster to decimate a
+stream with `nilm-decimate` when the filter code is using the new
+binary/numpy interface.
+
+
+WSGI interface & chunked requests
+---------------------------------
+
+mod_wsgi requires "WSGIChunkedRequest On" to handle
+"Transfer-encoding: Chunked" requests.  However, `/stream/insert`
+doesn't handle this correctly right now, because:
+
+- The `cherrpy.request.body.read()` call needs to be fixed for chunked requests
+
+- We don't want to just buffer endlessly in the server, and it will
+  require some thought on how to handle data in chunks (what to do about
+  interval endpoints).
+
+It is probably better to just keep the endpoint management on the client
+side, so leave "WSGIChunkedRequest off" for now.
--- a/docs/wsgi.md
+++ b/docs/wsgi.md
@@ -0,0 +1,32 @@
+WSGI Application in Apache
+--------------------------
+
+Install `apache2` and `libapache2-mod-wsgi`
+
+We'll set up the database server at URL `http://myhost.com/nilmdb`.
+The database will be stored in `/home/nilm/db`, and the process will
+run as user `nilm`, group `nilm`.
+
+First, create a WSGI script `/home/nilm/nilmdb.wsgi` containing:
+
+    import nilmdb.server
+    application = nilmdb.server.wsgi_application("/home/nilm/db", "/nilmdb")
+
+The first parameter is the local filesystem path, and the second
+parameter is the path part of the URL.
+
+Then, set up Apache with a configuration like:
+
+    <VirtualHost>
+        WSGIScriptAlias /nilmdb /home/nilm/nilmdb.wsgi
+        WSGIDaemonProcess nilmdb-procgroup threads=32 user=nilm group=nilm
+        <Location /nilmdb>
+            WSGIProcessGroup nilmdb-procgroup
+            WSGIApplicationGroup nilmdb-appgroup
+
+            # Access control example:
+            Order deny,allow
+            Deny from all
+            Allow from 1.2.3.4
+        </Location>
+    </VirtualHost>
--- a/extras/fix-oversize-files.py
+++ b/extras/fix-oversize-files.py
@@ -0,0 +1,50 @@
+#!/usr/bin/python
+
+import os
+import sys
+import cPickle as pickle
+import argparse
+import fcntl
+import re
+from nilmdb.client.numpyclient import layout_to_dtype
+
+parser = argparse.ArgumentParser(
+    description = """
+Fix database corruption where binary writes caused too much data to be
+written to the file.  Truncates files to the correct length.  This was
+fixed by b98ff1331a515ad47fd3203615e835b529b039f9.
+""")
+parser.add_argument("path", action="store", help='Database root path')
+parser.add_argument("-y", "--yes", action="store_true", help='Fix them')
+args = parser.parse_args()
+
+lock = os.path.join(args.path, "data.lock")
+with open(lock, "w") as f:
+    fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+    fix = {}
+
+    for (path, dirs, files) in os.walk(args.path):
+        if "_format" in files:
+            with open(os.path.join(path, "_format")) as format:
+                fmt = pickle.load(format)
+                rowsize = layout_to_dtype(fmt["layout"]).itemsize
+                maxsize = rowsize * fmt["rows_per_file"]
+                fix[path] = maxsize
+                if maxsize < 128000000: # sanity check
+                    raise Exception("bad maxsize " + str(maxsize))
+
+    for fixpath in fix:
+        for (path, dirs, files) in os.walk(fixpath):
+            for fn in files:
+                if not re.match("^[0-9a-f]{4,}$", fn):
+                    continue
+                fn = os.path.join(path, fn)
+                size = os.path.getsize(fn)
+                maxsize = fix[fixpath]
+                if size > maxsize:
+                    diff = size - maxsize
+                    print diff, "too big:", fn
+                    if args.yes:
+                        with open(fn, "a+") as dbfile:
+                            dbfile.truncate(maxsize)
--- a/extras/nilmtool-bash-completion.sh
+++ b/extras/nilmtool-bash-completion.sh
@@ -0,0 +1,20 @@
+# To enable bash completion:
+#
+# 1. Ensure python-argcomplete is installed:
+#       pip install argcomplete
+# 2. Source this file:
+#       . nilmtool-bash-completion.sh
+
+_nilmtool_argcomplete() {
+    local IFS=$(printf "\013")
+    COMPREPLY=( $(IFS="$IFS" \
+                  COMP_LINE="$COMP_LINE" \
+	          COMP_WORDBREAKS="$COMP_WORDBREAKS" \
+                  COMP_POINT="$COMP_POINT" \
+                  _ARGCOMPLETE=1 \
+                  "$1" 8>&1 9>&2 1>/dev/null 2>/dev/null) )
+    if [[ $? != 0 ]]; then
+        unset COMPREPLY
+    fi
+}
+complete -o nospace -F _nilmtool_argcomplete nilmtool
--- a/nilmdb/init.py
+++ b/nilmdb/init.py
@@ -1,2 +1,10 @@
-from nilmdb.interval import *
-from nilmdb.fileinterval import *
+"""Main NilmDB import"""
+
+# These aren't imported automatically, because loading the server
+# stuff isn't always necessary.
+#from nilmdb.server import NilmDB, Server
+#from nilmdb.client import Client
+
+from nilmdb._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
--- a/nilmdb/_version.py
+++ b/nilmdb/_version.py
@@ -0,0 +1,197 @@
+
+IN_LONG_VERSION_PY = True
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (build by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.7+ (https://github.com/warner/python-versioneer)
+
+# these strings will be replaced by git during git-archive
+git_refnames = "$Format:%d$"
+git_full = "$Format:%H$"
+
+
+import subprocess
+import sys
+
+def run_command(args, cwd=None, verbose=False):
+    try:
+        # remember shell=False, so use git.cmd on windows, not just git
+        p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
+    except EnvironmentError:
+        e = sys.exc_info()[1]
+        if verbose:
+            print("unable to run %s" % args[0])
+            print(e)
+        return None
+    stdout = p.communicate()[0].strip()
+    if sys.version >= '3':
+        stdout = stdout.decode()
+    if p.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % args[0])
+        return None
+    return stdout
+
+
+import sys
+import re
+import os.path
+
+def get_expanded_variables(versionfile_source):
+    # the code embedded in _version.py can just fetch the value of these
+    # variables. When used from setup.py, we don't want to import
+    # _version.py, so we do it with a regexp instead. This function is not
+    # used from _version.py.
+    variables = {}
+    try:
+        for line in open(versionfile_source,"r").readlines():
+            if line.strip().startswith("git_refnames ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    variables["refnames"] = mo.group(1)
+            if line.strip().startswith("git_full ="):
+                mo = re.search(r'=\s*"(.*)"', line)
+                if mo:
+                    variables["full"] = mo.group(1)
+    except EnvironmentError:
+        pass
+    return variables
+
+def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
+    refnames = variables["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("variables are unexpanded, not using")
+        return {} # unexpanded, so not in an unpacked git-archive tarball
+    refs = set([r.strip() for r in refnames.strip("()").split(",")])
+    for ref in list(refs):
+        if not re.search(r'\d', ref):
+            if verbose:
+                print("discarding '%s', no digits" % ref)
+            refs.discard(ref)
+            # Assume all version tags have a digit. git's %d expansion
+            # behaves like git log --decorate=short and strips out the
+            # refs/heads/ and refs/tags/ prefixes that would let us
+            # distinguish between branches and tags. By ignoring refnames
+            # without digits, we filter out many common branch names like
+            # "release" and "stabilization", as well as "HEAD" and "master".
+    if verbose:
+        print("remaining refs: %s" % ",".join(sorted(refs)))
+    for ref in sorted(refs):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            if verbose:
+                print("picking %s" % r)
+            return { "version": r,
+                     "full": variables["full"].strip() }
+    # no suitable tags, so we use the full revision id
+    if verbose:
+        print("no suitable tags, using full revision id")
+    return { "version": variables["full"].strip(),
+             "full": variables["full"].strip() }
+
+def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
+    # this runs 'git' from the root of the source tree. That either means
+    # someone ran a setup.py command (and this code is in versioneer.py, so
+    # IN_LONG_VERSION_PY=False, thus the containing directory is the root of
+    # the source tree), or someone ran a project-specific entry point (and
+    # this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
+    # containing directory is somewhere deeper in the source tree). This only
+    # gets called if the git-archive 'subst' variables were *not* expanded,
+    # and _version.py hasn't already been rewritten with a short version
+    # string, meaning we're inside a checked out source tree.
+
+    try:
+        here = os.path.abspath(__file__)
+    except NameError:
+        # some py2exe/bbfreeze/non-CPython implementations don't do __file__
+        return {} # not always correct
+
+    # versionfile_source is the relative path from the top of the source tree
+    # (where the .git directory might live) to this file. Invert this to find
+    # the root from __file__.
+    root = here
+    if IN_LONG_VERSION_PY:
+        for i in range(len(versionfile_source.split("/"))):
+            root = os.path.dirname(root)
+    else:
+        root = os.path.dirname(here)
+    if not os.path.exists(os.path.join(root, ".git")):
+        if verbose:
+            print("no .git in %s" % root)
+        return {}
+
+    GIT = "git"
+    if sys.platform == "win32":
+        GIT = "git.cmd"
+    stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
+                         cwd=root)
+    if stdout is None:
+        return {}
+    if not stdout.startswith(tag_prefix):
+        if verbose:
+            print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
+        return {}
+    tag = stdout[len(tag_prefix):]
+    stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
+    if stdout is None:
+        return {}
+    full = stdout.strip()
+    if tag.endswith("-dirty"):
+        full += "-dirty"
+    return {"version": tag, "full": full}
+
+
+def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
+    if IN_LONG_VERSION_PY:
+        # We're running from _version.py. If it's from a source tree
+        # (execute-in-place), we can work upwards to find the root of the
+        # tree, and then check the parent directory for a version string. If
+        # it's in an installed application, there's no hope.
+        try:
+            here = os.path.abspath(__file__)
+        except NameError:
+            # py2exe/bbfreeze/non-CPython don't have __file__
+            return {} # without __file__, we have no hope
+        # versionfile_source is the relative path from the top of the source
+        # tree to _version.py. Invert this to find the root from __file__.
+        root = here
+        for i in range(len(versionfile_source.split("/"))):
+            root = os.path.dirname(root)
+    else:
+        # we're running from versioneer.py, which means we're running from
+        # the setup.py in a source tree. sys.argv[0] is setup.py in the root.
+        here = os.path.abspath(sys.argv[0])
+        root = os.path.dirname(here)
+
+    # Source tarballs conventionally unpack into a directory that includes
+    # both the project name and a version string.
+    dirname = os.path.basename(root)
+    if not dirname.startswith(parentdir_prefix):
+        if verbose:
+            print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
+                  (root, dirname, parentdir_prefix))
+        return None
+    return {"version": dirname[len(parentdir_prefix):], "full": ""}
+
+tag_prefix = "nilmdb-"
+parentdir_prefix = "nilmdb-"
+versionfile_source = "nilmdb/_version.py"
+
+def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
+    variables = { "refnames": git_refnames, "full": git_full }
+    ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
+    if not ver:
+        ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
+    if not ver:
+        ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
+                                      verbose)
+    if not ver:
+        ver = default
+    return ver
+
--- a/nilmdb/client/init.py
+++ b/nilmdb/client/init.py
@@ -0,0 +1,4 @@
+"""nilmdb.client"""
+
+from nilmdb.client.client import Client
+from nilmdb.client.errors import ClientError, ServerError, Error
--- a/nilmdb/client/client.py
+++ b/nilmdb/client/client.py
@@ -0,0 +1,464 @@
+# -*- coding: utf-8 -*-
+
+"""Class for performing HTTP client requests via libcurl"""
+
+import nilmdb.utils
+import nilmdb.client.httpclient
+from nilmdb.client.errors import ClientError
+
+import time
+import simplejson as json
+import contextlib
+
+from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
+
+def extract_timestamp(line):
+    """Extract just the timestamp from a line of data text"""
+    return string_to_timestamp(line.split()[0])
+
+class Client(object):
+    """Main client interface to the Nilm database."""
+
+    def __init__(self, url, post_json = False):
+        """Initialize client with given URL.  If post_json is true,
+        POST requests are sent with Content-Type 'application/json'
+        instead of the default 'x-www-form-urlencoded'."""
+        self.http = nilmdb.client.httpclient.HTTPClient(url, post_json)
+        self.post_json = post_json
+
+    # __enter__/__exit__ allow this class to be a context manager
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.close()
+
+    def _json_post_param(self, data):
+        """Return compact json-encoded version of parameter"""
+        if self.post_json:
+            # If we're posting as JSON, we don't need to encode it further here
+            return data
+        return json.dumps(data, separators=(',',':'))
+
+    def close(self):
+        """Close the connection; safe to call multiple times"""
+        self.http.close()
+
+    def geturl(self):
+        """Return the URL we're using"""
+        return self.http.baseurl
+
+    def version(self):
+        """Return server version"""
+        return self.http.get("version")
+
+    def dbinfo(self):
+        """Return server database info (path, size, free space)
+        as a dictionary."""
+        return self.http.get("dbinfo")
+
+    def stream_list(self, path = None, layout = None, extended = False):
+        params = {}
+        if path is not None:
+            params["path"] = path
+        if layout is not None:
+            params["layout"] = layout
+        if extended:
+            params["extended"] = 1
+        streams = self.http.get("stream/list", params)
+        return nilmdb.utils.sort.sort_human(streams, key = lambda s: s[0])
+
+    def stream_get_metadata(self, path, keys = None):
+        params = { "path": path }
+        if keys is not None:
+            params["key"] = keys
+        return self.http.get("stream/get_metadata", params)
+
+    def stream_set_metadata(self, path, data):
+        """Set stream metadata from a dictionary, replacing all existing
+        metadata."""
+        params = {
+            "path": path,
+            "data": self._json_post_param(data)
+            }
+        return self.http.post("stream/set_metadata", params)
+
+    def stream_update_metadata(self, path, data):
+        """Update stream metadata from a dictionary"""
+        params = {
+            "path": path,
+            "data": self._json_post_param(data)
+            }
+        return self.http.post("stream/update_metadata", params)
+
+    def stream_create(self, path, layout):
+        """Create a new stream"""
+        params = { "path": path,
+                   "layout" : layout }
+        return self.http.post("stream/create", params)
+
+    def stream_destroy(self, path):
+        """Delete stream.  Fails if any data is still present."""
+        params = { "path": path }
+        return self.http.post("stream/destroy", params)
+
+    def stream_rename(self, oldpath, newpath):
+        """Rename a stream."""
+        params = { "oldpath": oldpath,
+                   "newpath": newpath }
+        return self.http.post("stream/rename", params)
+
+    def stream_remove(self, path, start = None, end = None):
+        """Remove data from the specified time range"""
+        params = {
+            "path": path
+        }
+        if start is not None:
+            params["start"] = timestamp_to_string(start)
+        if end is not None:
+            params["end"] = timestamp_to_string(end)
+        total = 0
+        for count in self.http.post_gen("stream/remove", params):
+            total += int(count)
+        return total
+
+    @contextlib.contextmanager
+    def stream_insert_context(self, path, start = None, end = None):
+        """Return a context manager that allows data to be efficiently
+        inserted into a stream in a piecewise manner.  Data is
+        provided as ASCII lines, and is aggregated and sent to the
+        server in larger or smaller chunks as necessary.  Data lines
+        must match the database layout for the given path, and end
+        with a newline.
+
+        Example:
+          with client.stream_insert_context('/path', start, end) as ctx:
+            ctx.insert('1234567890.0 1 2 3 4\\n')
+            ctx.insert('1234567891.0 1 2 3 4\\n')
+
+        For more details, see help for nilmdb.client.client.StreamInserter
+
+        This may make multiple requests to the server, if the data is
+        large enough or enough time has passed between insertions.
+        """
+        ctx = StreamInserter(self, path, start, end)
+        yield ctx
+        ctx.finalize()
+        ctx.destroy()
+
+    def stream_insert(self, path, data, start = None, end = None):
+        """Insert rows of data into a stream.  data should be a string
+        or iterable that provides ASCII data that matches the database
+        layout for path.  Data is passed through stream_insert_context,
+        so it will be broken into reasonably-sized chunks and
+        start/end will be deduced if missing."""
+        with self.stream_insert_context(path, start, end) as ctx:
+            if isinstance(data, basestring):
+                ctx.insert(data)
+            else:
+                for chunk in data:
+                    ctx.insert(chunk)
+        return ctx.last_response
+
+    def stream_insert_block(self, path, data, start, end, binary = False):
+        """Insert a single fixed block of data into the stream.  It is
+        sent directly to the server in one block with no further
+        processing.
+
+        If 'binary' is True, provide raw binary data in little-endian
+        format matching the path layout, including an int64 timestamp.
+        Otherwise, provide ASCII data matching the layout."""
+        params = {
+            "path": path,
+            "start": timestamp_to_string(start),
+            "end": timestamp_to_string(end),
+        }
+        if binary:
+            params["binary"] = 1
+        return self.http.put("stream/insert", data, params, binary = binary)
+
+    def stream_intervals(self, path, start = None, end = None, diffpath = None):
+        """
+        Return a generator that yields each stream interval.
+
+        If 'diffpath' is not None, yields only interval ranges that are
+        present in 'path' but not in 'diffpath'.
+        """
+        params = {
+            "path": path
+        }
+        if diffpath is not None:
+            params["diffpath"] = diffpath
+        if start is not None:
+            params["start"] = timestamp_to_string(start)
+        if end is not None:
+            params["end"] = timestamp_to_string(end)
+        return self.http.get_gen("stream/intervals", params)
+
+    def stream_extract(self, path, start = None, end = None,
+                       count = False, markup = False, binary = False):
+        """
+        Extract data from a stream.  Returns a generator that yields
+        lines of ASCII-formatted data that matches the database
+        layout for the given path.
+
+        If 'count' is True, return a count of matching data points
+        rather than the actual data.  The output format is unchanged.
+
+        If 'markup' is True, include comments in the returned data
+        that indicate interval starts and ends.
+
+        If 'binary' is True, return chunks of raw binary data, rather
+        than lines of ASCII-formatted data.  Raw binary data is
+        little-endian and matches the database types (including an
+        int64 timestamp).
+        """
+        params = {
+            "path": path,
+        }
+        if start is not None:
+            params["start"] = timestamp_to_string(start)
+        if end is not None:
+            params["end"] = timestamp_to_string(end)
+        if count:
+            params["count"] = 1
+        if markup:
+            params["markup"] = 1
+        if binary:
+            params["binary"] = 1
+        return self.http.get_gen("stream/extract", params, binary = binary)
+
+    def stream_count(self, path, start = None, end = None):
+        """
+        Return the number of rows of data in the stream that satisfy
+        the given timestamps.
+        """
+        counts = list(self.stream_extract(path, start, end, count = True))
+        return int(counts[0])
+
+class StreamInserter(object):
+    """Object returned by stream_insert_context() that manages
+    the insertion of rows of data into a particular path.
+
+    The basic data flow is that we are filling a contiguous interval
+    on the server, with no gaps, that extends from timestamp 'start'
+    to timestamp 'end'.  Data timestamps satisfy 'start <= t < end'.
+
+    Data is provided to .insert() as ASCII formatted data separated by
+    newlines.  The chunks of data passed to .insert() do not need to
+    match up with the newlines; less or more than one line can be passed.
+
+    1. The first inserted line begins a new interval that starts at
+    'start'.  If 'start' is not given, it is deduced from the first
+    line's timestamp.
+
+    2. Subsequent lines go into the same contiguous interval.  As lines
+    are inserted, this routine may make multiple insertion requests to
+    the server, but will structure the timestamps to leave no gaps.
+
+    3. The current contiguous interval can be completed by manually
+    calling .finalize(), which the context manager will also do
+    automatically.  This will send any remaining data to the server,
+    using the 'end' timestamp to end the interval.  If no 'end'
+    was provided, it is deduced from the last timestamp seen,
+    plus a small delta.
+
+    After a .finalize(), inserting new data goes back to step 1.
+
+    .update_start() can be called before step 1 to change the start
+    time for the interval.  .update_end() can be called before step 3
+    to change the end time for the interval.
+    """
+
+    # See design.md for a discussion of how much data to send.  This
+    # is a soft limit -- we might send up to twice as much or so
+    _max_data = 2 * 1024 * 1024
+    _max_data_after_send = 64 * 1024
+
+    def __init__(self, client, path, start, end):
+        """'client' is the client object.  'path' is the database
+        path to insert to.  'start' and 'end' are used for the first
+        contiguous interval and may be None."""
+        self.last_response = None
+
+        self._client = client
+        self._path = path
+
+        # Start and end for the overall contiguous interval we're
+        # filling
+        self._interval_start = start
+        self._interval_end = end
+
+        # Current data we're building up to send.  Each string
+        # goes into the array, and gets joined all at once.
+        self._block_data = []
+        self._block_len = 0
+
+        self.destroyed = False
+
+    def destroy(self):
+        """Ensure this object can't be used again without raising
+        an error"""
+        def error(*args, **kwargs):
+            raise Exception("don't reuse this context object")
+        self._send_block = self.insert = self.finalize = self.send = error
+
+    def insert(self, data):
+        """Insert a chunk of ASCII formatted data in string form.  The
+        overall data must consist of lines terminated by '\\n'."""
+        length = len(data)
+        maxdata = self._max_data
+
+        if length > maxdata:
+            # This could make our buffer more than twice what we
+            # wanted to send, so split it up.  This is a bit
+            # inefficient, but the user really shouldn't be providing
+            # this much data at once.
+            for cut in range(0, length, maxdata):
+                self.insert(data[cut:(cut + maxdata)])
+            return
+
+        # Append this string to our list
+        self._block_data.append(data)
+        self._block_len += length
+
+        # Send the block once we have enough data
+        if self._block_len >= maxdata:
+            self._send_block(final = False)
+            if self._block_len >= self._max_data_after_send: # pragma: no cover
+                raise ValueError("too much data left over after trying"
+                                 " to send intermediate block; is it"
+                                 " missing newlines or malformed?")
+
+    def update_start(self, start):
+        """Update the start time for the next contiguous interval.
+        Call this before starting to insert data for a new interval,
+        for example, after .finalize()"""
+        self._interval_start = start
+
+    def update_end(self, end):
+        """Update the end time for the current contiguous interval.
+        Call this before .finalize()"""
+        self._interval_end = end
+
+    def finalize(self):
+        """Stop filling the current contiguous interval.
+        All outstanding data will be sent, and the interval end
+        time of the interval will be taken from the 'end' argument
+        used when initializing this class, or the most recent
+        value passed to update_end(), or the last timestamp plus
+        a small epsilon value if no other endpoint was provided.
+
+        If more data is inserted after a finalize(), it will become
+        part of a new interval and there may be a gap left in-between."""
+        self._send_block(final = True)
+
+    def send(self):
+        """Send any data that we might have buffered up.  Does not affect
+        any other treatment of timestamps or endpoints."""
+        self._send_block(final = False)
+
+    def _get_first_noncomment(self, block):
+        """Return the (start, end) indices of the first full line in
+        block that isn't a comment, or raise IndexError if
+        there isn't one."""
+        start = 0
+        while True:
+            end = block.find('\n', start)
+            if end < 0:
+                raise IndexError
+            if block[start] != '#':
+                return (start, (end + 1))
+            start = end + 1
+
+    def _get_last_noncomment(self, block):
+        """Return the (start, end) indices of the last full line in
+        block[:length] that isn't a comment, or raise IndexError if
+        there isn't one."""
+        end = block.rfind('\n')
+        if end <= 0:
+            raise IndexError
+        while True:
+            start = block.rfind('\n', 0, end)
+            if block[start + 1] != '#':
+                return ((start + 1), end)
+            if start == -1:
+                raise IndexError
+            end = start
+
+    def _send_block(self, final = False):
+        """Send data currently in the block.  The data sent will
+        consist of full lines only, so some might be left over."""
+        # Build the full string to send
+        block = "".join(self._block_data)
+
+        start_ts = self._interval_start
+        if start_ts is None:
+            # Pull start from the first line
+            try:
+                (spos, epos) = self._get_first_noncomment(block)
+                start_ts = extract_timestamp(block[spos:epos])
+            except (ValueError, IndexError):
+                pass # no timestamp is OK, if we have no data
+
+        if final:
+            # For a final block, it must end in a newline, and the
+            # ending timestamp is either the user-provided end,
+            # or the timestamp of the last line plus epsilon.
+            end_ts = self._interval_end
+            try:
+                if block[-1] != '\n':
+                    raise ValueError("final block didn't end with a newline")
+                if end_ts is None:
+                    (spos, epos) = self._get_last_noncomment(block)
+                    end_ts = extract_timestamp(block[spos:epos])
+                    end_ts += nilmdb.utils.time.epsilon
+            except (ValueError, IndexError):
+                pass # no timestamp is OK, if we have no data
+            self._block_data = []
+            self._block_len = 0
+
+            # Next block is completely fresh
+            self._interval_start = None
+            self._interval_end = None
+        else:
+            # An intermediate block, e.g. "line1\nline2\nline3\nline4"
+            # We need to save "line3\nline4" for the next block, and
+            # use the timestamp from "line3" as the ending timestamp
+            # for this one.
+            try:
+                (spos, epos) = self._get_last_noncomment(block)
+                end_ts = extract_timestamp(block[spos:epos])
+            except (ValueError, IndexError):
+                # If we found no timestamp, give up; we could send this
+                # block later when we have more data.
+                return
+            if spos == 0:
+                # Not enough data to send an intermediate block
+                return
+            if self._interval_end is not None and end_ts > self._interval_end:
+                # User gave us bad endpoints; send it anyway, and let
+                # the server complain so that the error is the same
+                # as if we hadn't done this chunking.
+                end_ts = self._interval_end
+            self._block_data = [ block[spos:] ]
+            self._block_len = (epos - spos)
+            block = block[:spos]
+
+            # Next block continues where this one ended
+            self._interval_start = end_ts
+
+        # Double check endpoints
+        if (start_ts is None or end_ts is None) or (start_ts == end_ts):
+            # If the block has no non-comment lines, it's OK
+            try:
+                self._get_first_noncomment(block)
+            except IndexError:
+                return
+            raise ClientError("have data to send, but no start/end times")
+
+        # Send it
+        self.last_response = self._client.stream_insert_block(
+            self._path, block, start_ts, end_ts, binary = False)
+
+        return
--- a/nilmdb/client/errors.py
+++ b/nilmdb/client/errors.py
@@ -0,0 +1,33 @@
+"""HTTP client errors"""
+
+from nilmdb.utils.printf import *
+
+class Error(Exception):
+    """Base exception for both ClientError and ServerError responses"""
+    def __init__(self,
+                 status = "Unspecified error",
+                 message = None,
+                 url = None,
+                 traceback = None):
+        Exception.__init__(self, status)
+        self.status = status     # e.g. "400 Bad Request"
+        self.message = message   # textual message from the server
+        self.url = url           # URL we were requesting
+        self.traceback = traceback # server traceback, if available
+    def _format_error(self, show_url):
+        s = sprintf("[%s]", self.status)
+        if self.message:
+            s += sprintf(" %s", self.message)
+        if show_url and self.url: # pragma: no cover
+            s += sprintf(" (%s)", self.url)
+        if self.traceback: # pragma: no cover
+            s += sprintf("\nServer traceback:\n%s", self.traceback)
+        return s
+    def __str__(self):
+        return self._format_error(show_url = False)
+    def __repr__(self): # pragma: no cover
+        return self._format_error(show_url = True)
+class ClientError(Error):
+    pass
+class ServerError(Error):
+    pass
--- a/nilmdb/client/httpclient.py
+++ b/nilmdb/client/httpclient.py
@@ -0,0 +1,172 @@
+"""HTTP client library"""
+
+import nilmdb.utils
+from nilmdb.client.errors import ClientError, ServerError, Error
+
+import simplejson as json
+import urlparse
+import requests
+
+class HTTPClient(object):
+    """Class to manage and perform HTTP requests from the client"""
+    def __init__(self, baseurl = "", post_json = False):
+        """If baseurl is supplied, all other functions that take
+        a URL can be given a relative URL instead."""
+        # Verify / clean up URL
+        reparsed = urlparse.urlparse(baseurl).geturl()
+        if '://' not in reparsed:
+            reparsed = urlparse.urlparse("http://" + baseurl).geturl()
+        self.baseurl = reparsed.rstrip('/') + '/'
+
+        # Build Requests session object, enable SSL verification
+        self.session = requests.Session()
+        self.session.verify = True
+
+        # Saved response, so that tests can verify a few things.
+        self._last_response = {}
+
+        # Whether to send application/json POST bodies (versus
+        # x-www-form-urlencoded)
+        self.post_json = post_json
+
+    def _handle_error(self, url, code, body):
+        # Default variables for exception.  We use the entire body as
+        # the default message, in case we can't extract it from a JSON
+        # response.
+        args = { "url" : url,
+                 "status" : str(code),
+                 "message" : body,
+                 "traceback" : None }
+        try:
+            # Fill with server-provided data if we can
+            jsonerror = json.loads(body)
+            args["status"] = jsonerror["status"]
+            args["message"] = jsonerror["message"]
+            args["traceback"] = jsonerror["traceback"]
+        except Exception: # pragma: no cover
+            pass
+        if code >= 400 and code <= 499:
+            raise ClientError(**args)
+        else: # pragma: no cover
+            if code >= 500 and code <= 599:
+                if args["message"] is None:
+                    args["message"] = ("(no message; try disabling " +
+                                       "response.stream option in " +
+                                       "nilmdb.server for better debugging)")
+                raise ServerError(**args)
+            else:
+                raise Error(**args)
+
+    def close(self):
+        self.session.close()
+
+    def _do_req(self, method, url, query_data, body_data, stream, headers):
+        url = urlparse.urljoin(self.baseurl, url)
+        try:
+            response = self.session.request(method, url,
+                                            params = query_data,
+                                            data = body_data,
+                                            stream = stream,
+                                            headers = headers)
+        except requests.RequestException as e:
+            raise ServerError(status = "502 Error", url = url,
+                              message = str(e.message))
+        if response.status_code != 200:
+            self._handle_error(url, response.status_code, response.content)
+        self._last_response = response
+        if response.headers["content-type"] in ("application/json",
+                                                "application/x-json-stream"):
+            return (response, True)
+        else:
+            return (response, False)
+
+    # Normal versions that return data directly
+    def _req(self, method, url, query = None, body = None, headers = None):
+        """
+        Make a request and return the body data as a string or parsed
+        JSON object, or raise an error if it contained an error.
+        """
+        (response, isjson) = self._do_req(method, url, query, body,
+                                          stream = False, headers = headers)
+        if isjson:
+            return json.loads(response.content)
+        return response.content
+
+    def get(self, url, params = None):
+        """Simple GET (parameters in URL)"""
+        return self._req("GET", url, params, None)
+
+    def post(self, url, params = None):
+        """Simple POST (parameters in body)"""
+        if self.post_json:
+            return self._req("POST", url, None,
+                             json.dumps(params),
+                             { 'Content-type': 'application/json' })
+        else:
+            return self._req("POST", url, None, params)
+
+    def put(self, url, data, params = None, binary = False):
+        """Simple PUT (parameters in URL, data in body)"""
+        if binary:
+            h = { 'Content-type': 'application/octet-stream' }
+        else:
+            h = { 'Content-type': 'text/plain; charset=utf-8' }
+        return self._req("PUT", url, query = params, body = data, headers = h)
+
+    # Generator versions that return data one line at a time.
+    def _req_gen(self, method, url, query = None, body = None,
+                 headers = None, binary = False):
+        """
+        Make a request and return a generator that gives back strings
+        or JSON decoded lines of the body data, or raise an error if
+        it contained an eror.
+        """
+        (response, isjson) = self._do_req(method, url, query, body,
+                                          stream = True, headers = headers)
+
+        # Like the iter_lines function in Requests, but only splits on
+        # the specified line ending.
+        def lines(source, ending):
+            pending = None
+            for chunk in source:
+                if pending is not None:
+                    chunk = pending + chunk
+                tmp = chunk.split(ending)
+                lines = tmp[:-1]
+                if chunk.endswith(ending):
+                    pending = None
+                else:
+                    pending = tmp[-1]
+                for line in lines:
+                    yield line
+            if pending is not None: # pragma: no cover (missing newline)
+                yield pending
+
+        # Yield the chunks or lines as requested
+        if binary:
+            for chunk in response.iter_content(chunk_size = 65536):
+                yield chunk
+        elif isjson:
+            for line in lines(response.iter_content(chunk_size = 1),
+                              ending = '\r\n'):
+                yield json.loads(line)
+        else:
+            for line in lines(response.iter_content(chunk_size = 65536),
+                              ending = '\n'):
+                yield line
+
+    def get_gen(self, url, params = None, binary = False):
+        """Simple GET (parameters in URL) returning a generator"""
+        return self._req_gen("GET", url, params, binary = binary)
+
+    def post_gen(self, url, params = None):
+        """Simple POST (parameters in body) returning a generator"""
+        if self.post_json:
+            return self._req_gen("POST", url, None,
+                                 json.dumps(params),
+                                 { 'Content-type': 'application/json' })
+        else:
+            return self._req_gen("POST", url, None, params)
+
+    # Not much use for a POST or PUT generator, since they don't
+    # return much data.
--- a/nilmdb/client/numpyclient.py
+++ b/nilmdb/client/numpyclient.py
@@ -0,0 +1,258 @@
+# -*- coding: utf-8 -*-
+
+"""Provide a NumpyClient class that is based on normal Client, but has
+additional methods for extracting and inserting data via Numpy arrays."""
+
+import nilmdb.utils
+import nilmdb.client.client
+import nilmdb.client.httpclient
+from nilmdb.client.errors import ClientError
+
+import contextlib
+from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
+
+import numpy
+import cStringIO
+
+def layout_to_dtype(layout):
+    ltype = layout.split('_')[0]
+    lcount = int(layout.split('_')[1])
+    if ltype.startswith('int'):
+        atype = '<i' + str(int(ltype[3:]) / 8)
+    elif ltype.startswith('uint'):
+        atype = '<u' + str(int(ltype[4:]) / 8)
+    elif ltype.startswith('float'):
+        atype = '<f' + str(int(ltype[5:]) / 8)
+    else:
+        raise ValueError("bad layout")
+    return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])
+
+class NumpyClient(nilmdb.client.client.Client):
+    """Subclass of nilmdb.client.Client that adds additional methods for
+    extracting and inserting data via Numpy arrays."""
+
+    def _get_dtype(self, path, layout):
+        if layout is None:
+            streams = self.stream_list(path)
+            if len(streams) != 1:
+                raise ClientError("can't get layout for path: " + path)
+            layout = streams[0][1]
+        return layout_to_dtype(layout)
+
+    def stream_extract_numpy(self, path, start = None, end = None,
+                             layout = None, maxrows = 100000,
+                             structured = False):
+        """
+        Extract data from a stream.  Returns a generator that yields
+        Numpy arrays of up to 'maxrows' of data each.
+
+        If 'layout' is None, it is read using stream_info.
+
+        If 'structured' is False, all data is converted to float64
+        and returned in a flat 2D array.  Otherwise, data is returned
+        as a structured dtype in a 1D array.
+        """
+        dtype = self._get_dtype(path, layout)
+
+        def to_numpy(data):
+            a = numpy.fromstring(data, dtype)
+            if structured:
+                return a
+            return numpy.c_[a['timestamp'], a['data']]
+
+        chunks = []
+        total_len = 0
+        maxsize = dtype.itemsize * maxrows
+        for data in self.stream_extract(path, start, end, binary = True):
+            # Add this block of binary data
+            chunks.append(data)
+            total_len += len(data)
+
+            # See if we have enough to make the requested Numpy array
+            while total_len >= maxsize:
+                assembled = "".join(chunks)
+                total_len -= maxsize
+                chunks = [ assembled[maxsize:] ]
+                block = assembled[:maxsize]
+                yield to_numpy(block)
+
+        if total_len:
+            yield to_numpy("".join(chunks))
+
+    @contextlib.contextmanager
+    def stream_insert_numpy_context(self, path, start = None, end = None,
+                                    layout = None):
+        """Return a context manager that allows data to be efficiently
+        inserted into a stream in a piecewise manner.  Data is
+        provided as Numpy arrays, and is aggregated and sent to the
+        server in larger or smaller chunks as necessary.  Data format
+        must match the database layout for the given path.
+
+        For more details, see help for
+        nilmdb.client.numpyclient.StreamInserterNumpy
+
+        If 'layout' is not None, use it as the layout rather than
+        querying the database.
+        """
+        dtype = self._get_dtype(path, layout)
+        ctx = StreamInserterNumpy(self, path, start, end, dtype)
+        yield ctx
+        ctx.finalize()
+        ctx.destroy()
+
+    def stream_insert_numpy(self, path, data, start = None, end = None,
+                            layout = None):
+        """Insert data into a stream.  data should be a Numpy array
+        which will be passed through stream_insert_numpy_context to
+        break it into chunks etc.  See the help for that function
+        for details."""
+        with self.stream_insert_numpy_context(path, start, end, layout) as ctx:
+            if isinstance(data, numpy.ndarray):
+                ctx.insert(data)
+            else:
+                for chunk in data:
+                    ctx.insert(chunk)
+        return ctx.last_response
+
+class StreamInserterNumpy(nilmdb.client.client.StreamInserter):
+    """Object returned by stream_insert_numpy_context() that manages
+    the insertion of rows of data into a particular path.
+
+    See help for nilmdb.client.client.StreamInserter for details.
+    The only difference is that, instead of ASCII formatted data,
+    this context manager can take Numpy arrays, which are either
+    structured (1D with complex dtype) or flat (2D with simple dtype).
+    """
+
+    # Soft limit of how many bytes to send per HTTP request.
+    _max_data = 2 * 1024 * 1024
+
+    def __init__(self, client, path, start, end, dtype):
+        """
+        'client' is the client object.  'path' is the database path
+        to insert to.  'start' and 'end' are used for the first
+        contiguous interval and may be None.  'dtype' is the Numpy
+        dtype for this stream.
+        """
+        super(StreamInserterNumpy, self).__init__(client, path, start, end)
+        self._dtype = dtype
+
+        # Max rows to send at once
+        self._max_rows = self._max_data // self._dtype.itemsize
+
+        # List of the current arrays we're building up to send
+        self._block_arrays = []
+        self._block_rows = 0
+
+    def insert(self, array):
+        """Insert Numpy data, which must match the layout type."""
+        if type(array) != numpy.ndarray:
+            array = numpy.array(array)
+        if array.ndim == 1:
+            # Already a structured array; just verify the type
+            if array.dtype != self._dtype:
+                raise ValueError("wrong dtype for 1D (structured) array")
+        elif array.ndim == 2:
+            # Convert to structured array
+            sarray = numpy.zeros(array.shape[0], dtype=self._dtype)
+            try:
+                sarray['timestamp'] = array[:,0]
+                # Need the squeeze in case sarray['data'] is 1 dimensional
+                sarray['data'] = numpy.squeeze(array[:,1:])
+            except (IndexError, ValueError):
+                raise ValueError("wrong number of fields for this data type")
+            array = sarray
+        else:
+            raise ValueError("wrong number of dimensions in array")
+
+        length = len(array)
+        maxrows = self._max_rows
+
+        if length == 0:
+            return
+        if length > maxrows:
+            # This is more than twice what we wanted to send, so split
+            # it up.  This is a bit inefficient, but the user really
+            # shouldn't be providing this much data at once.
+            for cut in range(0, length, maxrows):
+                self.insert(array[cut:(cut + maxrows)])
+            return
+
+        # Add this array to our list
+        self._block_arrays.append(array)
+        self._block_rows += length
+
+        # Send if it's too long
+        if self._block_rows >= maxrows:
+            self._send_block(final = False)
+
+    def _send_block(self, final = False):
+        """Send the data current stored up.  One row might be left
+        over if we need its timestamp saved."""
+
+        # Build the full array to send
+        if self._block_rows == 0:
+            array = numpy.zeros(0, dtype = self._dtype)
+        else:
+            array = numpy.hstack(self._block_arrays)
+
+        # Get starting timestamp
+        start_ts = self._interval_start
+        if start_ts is None:
+            # Pull start from the first row
+            try:
+                start_ts = array['timestamp'][0]
+            except IndexError:
+                pass # no timestamp is OK, if we have no data
+
+        # Get ending timestamp
+        if final:
+            # For a final block, the timestamp is either the
+            # user-provided end, or the timestamp of the last line
+            # plus epsilon.
+            end_ts = self._interval_end
+            if end_ts is None:
+                try:
+                    end_ts = array['timestamp'][-1]
+                    end_ts += nilmdb.utils.time.epsilon
+                except IndexError:
+                    pass # no timestamp is OK, if we have no data
+            self._block_arrays = []
+            self._block_rows = 0
+
+            # Next block is completely fresh
+            self._interval_start = None
+            self._interval_end = None
+        else:
+            # An intermediate block.  We need to save the last row
+            # for the next block, and use its timestamp as the ending
+            # timestamp for this one.
+            if len(array) < 2:
+                # Not enough data to send an intermediate block
+                return
+            end_ts = array['timestamp'][-1]
+            if self._interval_end is not None and end_ts > self._interval_end:
+                # User gave us bad endpoints; send it anyway, and let
+                # the server complain so that the error is the same
+                # as if we hadn't done this chunking.
+                end_ts = self._interval_end
+            self._block_arrays = [ array[-1:] ]
+            self._block_rows = 1
+            array = array[:-1]
+
+            # Next block continues where this one ended
+            self._interval_start = end_ts
+
+        # If we have no endpoints, or equal endpoints, it's OK as long
+        # as there's no data to send
+        if (start_ts is None or end_ts is None) or (start_ts == end_ts):
+            if len(array) == 0:
+                return
+            raise ClientError("have data to send, but invalid start/end times")
+
+        # Send it
+        data = array.tostring()
+        self.last_response = self._client.stream_insert_block(
+            self._path, data, start_ts, end_ts, binary = True)
+
+        return
--- a/nilmdb/cmdline/init.py
+++ b/nilmdb/cmdline/init.py
@@ -0,0 +1,3 @@
+"""nilmdb.cmdline"""
+
+from nilmdb.cmdline.cmdline import Cmdline
--- a/nilmdb/cmdline/cmdline.py
+++ b/nilmdb/cmdline/cmdline.py
@@ -0,0 +1,179 @@
+"""Command line client functionality"""
+
+import nilmdb.client
+
+from nilmdb.utils.printf import *
+from nilmdb.utils import datetime_tz
+import nilmdb.utils.time
+
+import sys
+import os
+import argparse
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+import signal
+
+try: # pragma: no cover
+    import argcomplete
+except ImportError: # pragma: no cover
+    argcomplete = None
+
+# Valid subcommands.  Defined in separate files just to break
+# things up -- they're still called with Cmdline as self.
+subcommands = [ "help", "info", "create", "list", "metadata",
+                "insert", "extract", "remove", "destroy",
+                "intervals", "rename" ]
+
+# Import the subcommand modules
+subcmd_mods = {}
+for cmd in subcommands:
+    subcmd_mods[cmd] = __import__("nilmdb.cmdline." + cmd, fromlist = [ cmd ])
+
+class JimArgumentParser(argparse.ArgumentParser):
+    def error(self, message):
+        self.print_usage(sys.stderr)
+        self.exit(2, sprintf("error: %s\n", message))
+
+class Complete(object): # pragma: no cover
+    # Completion helpers, for using argcomplete (see
+    # extras/nilmtool-bash-completion.sh)
+    def escape(self, s):
+        quote_chars = [ "\\", "\"", "'", " " ]
+        for char in quote_chars:
+            s = s.replace(char, "\\" + char)
+        return s
+
+    def none(self, prefix, parsed_args, **kwargs):
+        return []
+    rate = none
+    time = none
+    url = none
+
+    def path(self, prefix, parsed_args, **kwargs):
+        client = nilmdb.client.Client(parsed_args.url)
+        return ( self.escape(s[0])
+                 for s in client.stream_list()
+                 if s[0].startswith(prefix) )
+
+    def layout(self, prefix, parsed_args, **kwargs):
+        types = [ "int8", "int16", "int32", "int64",
+                  "uint8", "uint16", "uint32", "uint64",
+                  "float32", "float64" ]
+        layouts = []
+        for i in range(1,10):
+            layouts.extend([(t + "_" + str(i)) for t in types])
+        return ( l for l in layouts if l.startswith(prefix) )
+
+    def meta_key(self, prefix, parsed_args, **kwargs):
+        return (kv.split('=')[0] for kv
+                in self.meta_keyval(prefix, parsed_args, **kwargs))
+
+    def meta_keyval(self, prefix, parsed_args, **kwargs):
+        client = nilmdb.client.Client(parsed_args.url)
+        path = parsed_args.path
+        if not path:
+            return []
+        results = []
+        # prefix comes in as UTF-8, but results need to be Unicode,
+        # weird.  Still doesn't work in all cases, but that's bugs in
+        # argcomplete.
+        prefix = nilmdb.utils.unicode.decode(prefix)
+        for (k,v) in client.stream_get_metadata(path).iteritems():
+            kv = self.escape(k + '=' + v)
+            if kv.startswith(prefix):
+                results.append(kv)
+        return results
+
+class Cmdline(object):
+
+    def __init__(self, argv = None):
+        self.argv = argv or sys.argv[1:]
+        try:
+            # Assume command line arguments are encoded with stdin's encoding,
+            # and reverse it.  Won't be needed in Python 3, but for now..
+            self.argv = [ x.decode(sys.stdin.encoding) for x in self.argv ]
+        except Exception: # pragma: no cover
+            pass
+        self.client = None
+        self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
+        self.subcmd = {}
+        self.complete = Complete()
+
+    def arg_time(self, toparse):
+        """Parse a time string argument"""
+        try:
+            return nilmdb.utils.time.parse_time(toparse)
+        except ValueError as e:
+            raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
+                                                     str(e), toparse))
+
+    # Set up the parser
+    def parser_setup(self):
+        self.parser = JimArgumentParser(add_help = False,
+                                        formatter_class = def_form)
+
+        group = self.parser.add_argument_group("General options")
+        group.add_argument("-h", "--help", action='help',
+                           help='show this help message and exit')
+        group.add_argument("-V", "--version", action="version",
+                           version = nilmdb.__version__)
+
+        group = self.parser.add_argument_group("Server")
+        group.add_argument("-u", "--url", action="store",
+                           default=self.def_url,
+                           help="NilmDB server URL (default: %(default)s)"
+                           ).completer = self.complete.url
+
+        sub = self.parser.add_subparsers(
+            title="Commands", dest="command",
+            description="Use 'help command' or 'command --help' for more "
+            "details on a particular command.")
+
+        # Set up subcommands (defined in separate files)
+        for cmd in subcommands:
+            self.subcmd[cmd] = subcmd_mods[cmd].setup(self, sub)
+
+    def die(self, formatstr, *args):
+        fprintf(sys.stderr, formatstr + "\n", *args)
+        if self.client:
+            self.client.close()
+        sys.exit(-1)
+
+    def run(self):
+        # Set SIGPIPE to its default handler -- we don't need Python
+        # to catch it for us.
+        try:
+            signal.signal(signal.SIGPIPE, signal.SIG_DFL)
+        except ValueError: # pragma: no cover
+            pass
+
+        # Clear cached timezone, so that we can pick up timezone changes
+        # while running this from the test suite.
+        datetime_tz._localtz = None
+
+        # Run parser
+        self.parser_setup()
+        if argcomplete: # pragma: no cover
+            argcomplete.autocomplete(self.parser)
+        self.args = self.parser.parse_args(self.argv)
+
+        # Run arg verify handler if there is one
+        if "verify" in self.args:
+            self.args.verify(self)
+
+        self.client = nilmdb.client.Client(self.args.url)
+
+        # Make a test connection to make sure things work,
+        # unless the particular command requests that we don't.
+        if "no_test_connect" not in self.args:
+            try:
+                server_version = self.client.version()
+            except nilmdb.client.Error as e:
+                self.die("error connecting to server: %s", str(e))
+
+        # Now dispatch client request to appropriate function.  Parser
+        # should have ensured that we don't have any unknown commands
+        # here.
+        retval = self.args.handler(self) or 0
+
+        self.client.close()
+        sys.exit(retval)
--- a/nilmdb/cmdline/create.py
+++ b/nilmdb/cmdline/create.py
@@ -0,0 +1,37 @@
+from nilmdb.utils.printf import *
+import nilmdb.client
+
+from argparse import RawDescriptionHelpFormatter as raw_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("create", help="Create a new stream",
+                         formatter_class = raw_form,
+                         description="""
+Create a new empty stream at the specified path and with the specified
+layout type.
+
+Layout types are of the format: type_count
+
+  'type' is a data type like 'float32', 'float64', 'uint16', 'int32', etc.
+
+  'count' is the number of columns of this type.
+
+  For example, 'float32_8' means the data for this stream has 8 columns of
+  32-bit floating point values.
+""")
+    cmd.set_defaults(handler = cmd_create)
+    group = cmd.add_argument_group("Required arguments")
+    group.add_argument("path",
+                       help="Path (in database) of new stream, e.g. /foo/bar",
+                       ).completer = self.complete.path
+    group.add_argument("layout",
+                       help="Layout type for new stream, e.g. float32_8",
+                       ).completer = self.complete.layout
+    return cmd
+
+def cmd_create(self):
+    """Create new stream"""
+    try:
+        self.client.stream_create(self.args.path, self.args.layout)
+    except nilmdb.client.ClientError as e:
+        self.die("error creating stream: %s", str(e))
--- a/nilmdb/cmdline/destroy.py
+++ b/nilmdb/cmdline/destroy.py
@@ -0,0 +1,49 @@
+from nilmdb.utils.printf import *
+import nilmdb.client
+import fnmatch
+
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("destroy", help="Delete a stream and all data",
+                         formatter_class = def_form,
+                         description="""
+                         Destroy the stream at the specified path.
+                         The stream must be empty.  All metadata
+                         related to the stream is permanently deleted.
+
+                         Wildcards and multiple paths are supported.
+                         """)
+    cmd.set_defaults(handler = cmd_destroy)
+    group = cmd.add_argument_group("Options")
+    group.add_argument("-R", "--remove", action="store_true",
+                       help="Remove all data before destroying stream")
+    group.add_argument("-q", "--quiet", action="store_true",
+                       help="Don't display names when destroying "
+                       "multiple paths")
+    group = cmd.add_argument_group("Required arguments")
+    group.add_argument("path", nargs='+',
+                       help="Path of the stream to delete, e.g. /foo/bar/*",
+                       ).completer = self.complete.path
+    return cmd
+
+def cmd_destroy(self):
+    """Destroy stream"""
+    streams = [ s[0] for s in self.client.stream_list() ]
+    paths = []
+    for path in self.args.path:
+        new = fnmatch.filter(streams, path)
+        if not new:
+            self.die("error: no stream matched path: %s", path)
+        paths.extend(new)
+
+    for path in paths:
+        if not self.args.quiet and len(paths) > 1:
+            printf("Destroying %s\n", path)
+
+        try:
+            if self.args.remove:
+                count = self.client.stream_remove(path)
+            self.client.stream_destroy(path)
+        except nilmdb.client.ClientError as e:
+            self.die("error destroying stream: %s", str(e))
--- a/nilmdb/cmdline/extract.py
+++ b/nilmdb/cmdline/extract.py
@@ -0,0 +1,94 @@
+from __future__ import print_function
+from nilmdb.utils.printf import *
+import nilmdb.client
+import sys
+
+def setup(self, sub):
+    cmd = sub.add_parser("extract", help="Extract data",
+                         description="""
+                         Extract data from a stream.
+                         """)
+    cmd.set_defaults(verify = cmd_extract_verify,
+                     handler = cmd_extract)
+
+    group = cmd.add_argument_group("Data selection")
+    group.add_argument("path",
+                       help="Path of stream, e.g. /foo/bar",
+                       ).completer = self.complete.path
+    group.add_argument("-s", "--start", required=True,
+                       metavar="TIME", type=self.arg_time,
+                       help="Starting timestamp (free-form, inclusive)",
+                       ).completer = self.complete.time
+    group.add_argument("-e", "--end", required=True,
+                       metavar="TIME", type=self.arg_time,
+                       help="Ending timestamp (free-form, noninclusive)",
+                       ).completer = self.complete.time
+
+    group = cmd.add_argument_group("Output format")
+    group.add_argument("-B", "--binary", action="store_true",
+                       help="Raw binary output")
+    group.add_argument("-b", "--bare", action="store_true",
+                       help="Exclude timestamps from output lines")
+    group.add_argument("-a", "--annotate", action="store_true",
+                       help="Include comments with some information "
+                       "about the stream")
+    group.add_argument("-m", "--markup", action="store_true",
+                       help="Include comments with interval starts and ends")
+    group.add_argument("-T", "--timestamp-raw", action="store_true",
+                       help="Show raw timestamps in annotated information")
+    group.add_argument("-c", "--count", action="store_true",
+                       help="Just output a count of matched data points")
+    return cmd
+
+def cmd_extract_verify(self):
+    if self.args.start is not None and self.args.end is not None:
+        if self.args.start > self.args.end:
+            self.parser.error("start is after end")
+
+    if self.args.binary:
+        if (self.args.bare or self.args.annotate or self.args.markup or
+            self.args.timestamp_raw or self.args.count):
+            self.parser.error("--binary cannot be combined with other options")
+
+def cmd_extract(self):
+    streams = self.client.stream_list(self.args.path)
+    if len(streams) != 1:
+        self.die("error getting stream info for path %s", self.args.path)
+    layout = streams[0][1]
+
+    if self.args.timestamp_raw:
+        time_string = nilmdb.utils.time.timestamp_to_string
+    else:
+        time_string = nilmdb.utils.time.timestamp_to_human
+
+    if self.args.annotate:
+        printf("# path: %s\n", self.args.path)
+        printf("# layout: %s\n", layout)
+        printf("# start: %s\n", time_string(self.args.start))
+        printf("# end: %s\n", time_string(self.args.end))
+
+    printed = False
+    if self.args.binary:
+        printer = sys.stdout.write
+    else:
+        printer = print
+    bare = self.args.bare
+    count = self.args.count
+    for dataline in self.client.stream_extract(self.args.path,
+                                               self.args.start,
+                                               self.args.end,
+                                               self.args.count,
+                                               self.args.markup,
+                                               self.args.binary):
+        if bare and not count:
+            # Strip timestamp (first element).  Doesn't make sense
+            # if we are only returning a count.
+            dataline = ' '.join(dataline.split(' ')[1:])
+        printer(dataline)
+        printed = True
+    if not printed:
+        if self.args.annotate:
+            printf("# no data\n")
+        return 2
+
+    return 0
--- a/nilmdb/cmdline/help.py
+++ b/nilmdb/cmdline/help.py
@@ -0,0 +1,26 @@
+from nilmdb.utils.printf import *
+
+import argparse
+import sys
+
+def setup(self, sub):
+    cmd = sub.add_parser("help", help="Show detailed help for a command",
+                         description="""
+                         Show help for a command. 'help command' is
+                         the same as 'command --help'.
+                         """)
+    cmd.set_defaults(handler = cmd_help)
+    cmd.set_defaults(no_test_connect = True)
+    cmd.add_argument("command", nargs="?",
+                     help="Command to get help about")
+    cmd.add_argument("rest", nargs=argparse.REMAINDER,
+                     help=argparse.SUPPRESS)
+    return cmd
+
+def cmd_help(self):
+    if self.args.command in self.subcmd:
+        self.subcmd[self.args.command].print_help()
+    else:
+        self.parser.print_help()
+
+    return
--- a/nilmdb/cmdline/info.py
+++ b/nilmdb/cmdline/info.py
@@ -0,0 +1,28 @@
+import nilmdb.client
+from nilmdb.utils.printf import *
+from nilmdb.utils import human_size
+
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("info", help="Server information",
+                         formatter_class = def_form,
+                         description="""
+                         List information about the server, like
+                         version.
+                         """)
+    cmd.set_defaults(handler = cmd_info)
+    return cmd
+
+def cmd_info(self):
+    """Print info about the server"""
+    printf("Client version: %s\n", nilmdb.__version__)
+    printf("Server version: %s\n", self.client.version())
+    printf("Server URL: %s\n", self.client.geturl())
+    dbinfo = self.client.dbinfo()
+    printf("Server database path: %s\n", dbinfo["path"])
+    for (desc, field) in [("used by NilmDB", "size"),
+                          ("used by other", "other"),
+                          ("reserved", "reserved"),
+                          ("free", "free")]:
+        printf("Server disk space %s: %s\n", desc, human_size(dbinfo[field]))
--- a/nilmdb/cmdline/insert.py
+++ b/nilmdb/cmdline/insert.py
@@ -0,0 +1,131 @@
+from nilmdb.utils.printf import *
+import nilmdb.client
+import nilmdb.utils.timestamper as timestamper
+import nilmdb.utils.time
+
+import sys
+
+def setup(self, sub):
+    cmd = sub.add_parser("insert", help="Insert data",
+                         description="""
+                         Insert data into a stream.
+                         """)
+    cmd.set_defaults(verify = cmd_insert_verify,
+                     handler = cmd_insert)
+    cmd.add_argument("-q", "--quiet", action='store_true',
+                     help='suppress unnecessary messages')
+
+    group = cmd.add_argument_group("Timestamping",
+                                   description="""
+                                   To add timestamps, specify the
+                                   arguments --timestamp and --rate,
+                                   and provide a starting time.
+                                   """)
+
+    group.add_argument("-t", "--timestamp", action="store_true",
+                       help="Add timestamps to each line")
+    group.add_argument("-r", "--rate", type=float,
+                       help="Data rate, in Hz",
+                       ).completer = self.complete.rate
+
+    group = cmd.add_argument_group("Start time",
+                                   description="""
+                                   Start time may be manually
+                                   specified with --start, or guessed
+                                   from the filenames using
+                                   --filename.  Set the TZ environment
+                                   variable to change the default
+                                   timezone.""")
+
+    exc = group.add_mutually_exclusive_group()
+    exc.add_argument("-s", "--start",
+                     metavar="TIME", type=self.arg_time,
+                     help="Starting timestamp (free-form)",
+                     ).completer = self.complete.time
+    exc.add_argument("-f", "--filename", action="store_true",
+                     help="Use filename to determine start time")
+
+    group = cmd.add_argument_group("End time",
+                                   description="""
+                                   End time for the overall stream.
+                                   (required when not using --timestamp).
+                                   Set the TZ environment
+                                   variable to change the default
+                                   timezone.""")
+    group.add_argument("-e", "--end",
+                       metavar="TIME", type=self.arg_time,
+                       help="Ending timestamp (free-form)",
+                       ).completer = self.complete.time
+
+    group = cmd.add_argument_group("Required parameters")
+    group.add_argument("path",
+                       help="Path of stream, e.g. /foo/bar",
+                       ).completer = self.complete.path
+    group.add_argument("file", nargs = '?', default='-',
+                       help="File to insert (default: - (stdin))")
+    return cmd
+
+def cmd_insert_verify(self):
+    if self.args.timestamp:
+        if not self.args.rate:
+            self.die("error: --rate is needed, but was not specified")
+        if not self.args.filename and self.args.start is None:
+            self.die("error: need --start or --filename when adding timestamps")
+    else:
+        if self.args.start is None or self.args.end is None:
+            self.die("error: when not adding timestamps, --start and "
+                     "--end are required")
+
+def cmd_insert(self):
+    # Find requested stream
+    streams = self.client.stream_list(self.args.path)
+    if len(streams) != 1:
+        self.die("error getting stream info for path %s", self.args.path)
+
+    arg = self.args
+
+    try:
+        filename = arg.file
+        if filename == '-':
+            infile = sys.stdin
+        else:
+            try:
+                infile = open(filename, "rb")
+            except IOError:
+                self.die("error opening input file %s", filename)
+
+        if arg.start is None:
+            try:
+                arg.start = nilmdb.utils.time.parse_time(filename)
+            except ValueError:
+                self.die("error extracting start time from filename '%s'",
+                         filename)
+
+        if arg.timestamp:
+            data = timestamper.TimestamperRate(infile, arg.start, arg.rate)
+        else:
+            data = iter(lambda: infile.read(1048576), '')
+
+        # Print info
+        if not arg.quiet:
+            printf(" Input file: %s\n", filename)
+            printf(" Start time: %s\n",
+                   nilmdb.utils.time.timestamp_to_human(arg.start))
+            if arg.end:
+                printf("   End time: %s\n",
+                       nilmdb.utils.time.timestamp_to_human(arg.end))
+            if arg.timestamp:
+                printf("Timestamper: %s\n", str(data))
+
+        # Insert the data
+        self.client.stream_insert(arg.path, data, arg.start, arg.end)
+
+    except nilmdb.client.Error as e:
+        # TODO: It would be nice to be able to offer better errors
+        # here, particularly in the case of overlap, which just shows
+        # ugly bracketed ranges of 16-digit numbers and a mangled URL.
+        # Need to consider adding something like e.prettyprint()
+        # that is smarter about the contents of the error.
+        self.die("error inserting data: %s", str(e))
+
+    return
--- a/nilmdb/cmdline/intervals.py
+++ b/nilmdb/cmdline/intervals.py
@@ -0,0 +1,66 @@
+from nilmdb.utils.printf import *
+import nilmdb.utils.time
+
+import fnmatch
+import argparse
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("intervals", help="List intervals",
+                         formatter_class = def_form,
+                         description="""
+                         List intervals in a stream, similar to
+                         'list --detail path'.
+
+                         If '--diff diffpath' is provided, only
+                         interval ranges that are present in 'path'
+                         and not present in 'diffpath' are printed.
+                         """)
+    cmd.set_defaults(verify = cmd_intervals_verify,
+                     handler = cmd_intervals)
+
+    group = cmd.add_argument_group("Stream selection")
+    group.add_argument("path", metavar="PATH",
+                       help="List intervals for this path",
+                       ).completer = self.complete.path
+    group.add_argument("-d", "--diff", metavar="PATH",
+                       help="Subtract intervals from this path",
+                       ).completer = self.complete.path
+
+    group = cmd.add_argument_group("Interval details")
+    group.add_argument("-s", "--start",
+                       metavar="TIME", type=self.arg_time,
+                       help="Starting timestamp for intervals "
+                       "(free-form, inclusive)",
+                       ).completer = self.complete.time
+    group.add_argument("-e", "--end",
+                       metavar="TIME", type=self.arg_time,
+                       help="Ending timestamp for intervals "
+                       "(free-form, noninclusive)",
+                       ).completer = self.complete.time
+
+    group = cmd.add_argument_group("Misc options")
+    group.add_argument("-T", "--timestamp-raw", action="store_true",
+                       help="Show raw timestamps when printing times")
+
+    return cmd
+
+def cmd_intervals_verify(self):
+    if self.args.start is not None and self.args.end is not None:
+        if self.args.start >= self.args.end:
+            self.parser.error("start must precede end")
+
+def cmd_intervals(self):
+    """List intervals in a stream"""
+    if self.args.timestamp_raw:
+        time_string = nilmdb.utils.time.timestamp_to_string
+    else:
+        time_string = nilmdb.utils.time.timestamp_to_human
+
+    try:
+           for (start, end) in self.client.stream_intervals(
+               self.args.path, self.args.start, self.args.end, self.args.diff):
+               printf("[ %s -> %s ]\n", time_string(start), time_string(end))
+    except nilmdb.client.ClientError as e:
+        self.die("error listing intervals: %s", str(e))
+
--- a/nilmdb/cmdline/list.py
+++ b/nilmdb/cmdline/list.py
@@ -0,0 +1,98 @@
+from nilmdb.utils.printf import *
+import nilmdb.utils.time
+
+import fnmatch
+import argparse
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("list", help="List streams",
+                         formatter_class = def_form,
+                         description="""
+                         List streams available in the database,
+                         optionally filtering by path.  Wildcards
+                         are accepted; non-matching paths or wildcards
+                         are ignored.
+                         """)
+    cmd.set_defaults(verify = cmd_list_verify,
+                     handler = cmd_list)
+
+    group = cmd.add_argument_group("Stream filtering")
+    group.add_argument("path", metavar="PATH", default=["*"], nargs='*',
+                       ).completer = self.complete.path
+
+    group = cmd.add_argument_group("Interval info")
+    group.add_argument("-E", "--ext", action="store_true",
+                       help="Show extended stream info, like interval "
+                       "extents and row count")
+
+    group = cmd.add_argument_group("Interval details")
+    group.add_argument("-d", "--detail", action="store_true",
+                       help="Show available data time intervals")
+    group.add_argument("-s", "--start",
+                       metavar="TIME", type=self.arg_time,
+                       help="Starting timestamp for intervals "
+                       "(free-form, inclusive)",
+                       ).completer = self.complete.time
+    group.add_argument("-e", "--end",
+                       metavar="TIME", type=self.arg_time,
+                       help="Ending timestamp for intervals "
+                       "(free-form, noninclusive)",
+                       ).completer = self.complete.time
+
+    group = cmd.add_argument_group("Misc options")
+    group.add_argument("-T", "--timestamp-raw", action="store_true",
+                       help="Show raw timestamps when printing times")
+    group.add_argument("-l", "--layout", action="store_true",
+                       help="Show layout type next to path name")
+
+    return cmd
+
+def cmd_list_verify(self):
+    if self.args.start is not None and self.args.end is not None:
+        if self.args.start >= self.args.end:
+            self.parser.error("start must precede end")
+
+    if self.args.start is not None or self.args.end is not None:
+        if not self.args.detail:
+            self.parser.error("--start and --end only make sense with --detail")
+
+def cmd_list(self):
+    """List available streams"""
+    streams = self.client.stream_list(extended = True)
+
+    if self.args.timestamp_raw:
+        time_string = nilmdb.utils.time.timestamp_to_string
+    else:
+        time_string = nilmdb.utils.time.timestamp_to_human
+
+    for argpath in self.args.path:
+        for stream in streams:
+            (path, layout, int_min, int_max, rows, time) = stream[:6]
+            if not fnmatch.fnmatch(path, argpath):
+                continue
+
+            if self.args.layout:
+                printf("%s %s\n", path, layout)
+            else:
+                printf("%s\n", path)
+
+            if self.args.ext:
+                if int_min is None or int_max is None:
+                    printf("  interval extents: (no data)\n")
+                else:
+                    printf("  interval extents: %s -> %s\n",
+                           time_string(int_min), time_string(int_max))
+                printf("        total data: %d rows, %.6f seconds\n",
+                       rows or 0,
+                       nilmdb.utils.time.timestamp_to_seconds(time or 0))
+
+            if self.args.detail:
+                printed = False
+                for (start, end) in self.client.stream_intervals(
+                    path, self.args.start, self.args.end):
+                    printf("  [ %s -> %s ]\n",
+                           time_string(start), time_string(end))
+                    printed = True
+                if not printed:
+                    printf("  (no intervals)\n")
--- a/nilmdb/cmdline/metadata.py
+++ b/nilmdb/cmdline/metadata.py
@@ -0,0 +1,90 @@
+from nilmdb.utils.printf import *
+import nilmdb
+import nilmdb.client
+
+def setup(self, sub):
+    cmd = sub.add_parser("metadata", help="Get or set stream metadata",
+                         description="""
+                         Get or set key=value metadata associated with
+                         a stream.
+                         """,
+                         usage="%(prog)s path [-g [key ...] | "
+                         "-s key=value [...] | -u key=value [...]] | "
+                         "-d [key ...]")
+    cmd.set_defaults(handler = cmd_metadata)
+
+    group = cmd.add_argument_group("Required arguments")
+    group.add_argument("path",
+                       help="Path of stream, e.g. /foo/bar",
+                       ).completer = self.complete.path
+
+    group = cmd.add_argument_group("Actions")
+    exc = group.add_mutually_exclusive_group()
+    exc.add_argument("-g", "--get", nargs="*", metavar="key",
+                     help="Get metadata for specified keys (default all)",
+                     ).completer = self.complete.meta_key
+    exc.add_argument("-s", "--set", nargs="+", metavar="key=value",
+                     help="Replace all metadata with provided "
+                     "key=value pairs",
+                     ).completer = self.complete.meta_keyval
+    exc.add_argument("-u", "--update", nargs="+", metavar="key=value",
+                     help="Update metadata using provided "
+                     "key=value pairs",
+                     ).completer = self.complete.meta_keyval
+    exc.add_argument("-d", "--delete", nargs="*", metavar="key",
+                     help="Delete metadata for specified keys (default all)",
+                     ).completer = self.complete.meta_key
+    return cmd
+
+def cmd_metadata(self):
+    """Manipulate metadata"""
+    if self.args.set is not None or self.args.update is not None:
+        # Either set, or update
+        if self.args.set is not None:
+            keyvals = map(nilmdb.utils.unicode.decode, self.args.set)
+            handler = self.client.stream_set_metadata
+        else:
+            keyvals = map(nilmdb.utils.unicode.decode, self.args.update)
+            handler = self.client.stream_update_metadata
+
+        # Extract key=value pairs
+        data = {}
+        for keyval in keyvals:
+            kv = keyval.split('=')
+            if len(kv) != 2 or kv[0] == "":
+                self.die("error parsing key=value argument '%s'", keyval)
+            data[kv[0]] = kv[1]
+
+        # Make the call
+        try:
+            handler(self.args.path, data)
+        except nilmdb.client.ClientError as e:
+            self.die("error setting/updating metadata: %s", str(e))
+    elif self.args.delete is not None:
+        # Delete (by setting values to empty strings)
+        keys = None
+        if self.args.delete:
+            keys = map(nilmdb.utils.unicode.decode, self.args.delete)
+        try:
+            data = self.client.stream_get_metadata(self.args.path, keys)
+            for key in data:
+                data[key] = ""
+            self.client.stream_update_metadata(self.args.path, data)
+        except nilmdb.client.ClientError as e:
+            self.die("error deleting metadata: %s", str(e))
+    else:
+        # Get (or unspecified)
+        keys = None
+        if self.args.get:
+            keys = map(nilmdb.utils.unicode.decode, self.args.get)
+        try:
+            data = self.client.stream_get_metadata(self.args.path, keys)
+        except nilmdb.client.ClientError as e:
+            self.die("error getting metadata: %s", str(e))
+        for key, value in sorted(data.items()):
+            # Print nonexistant keys as having empty value
+            if value is None:
+                value = ""
+            printf("%s=%s\n",
+                   nilmdb.utils.unicode.encode(key),
+                   nilmdb.utils.unicode.encode(value))
--- a/nilmdb/cmdline/remove.py
+++ b/nilmdb/cmdline/remove.py
@@ -0,0 +1,55 @@
+from nilmdb.utils.printf import *
+import nilmdb.client
+import fnmatch
+
+def setup(self, sub):
+    cmd = sub.add_parser("remove", help="Remove data",
+                         description="""
+                         Remove all data from a specified time range within a
+                         stream.  If multiple streams or wildcards are provided,
+                         the same time range is removed from all streams.
+                         """)
+    cmd.set_defaults(handler = cmd_remove)
+
+    group = cmd.add_argument_group("Data selection")
+    group.add_argument("path", nargs='+',
+                       help="Path of stream, e.g. /foo/bar/*",
+                       ).completer = self.complete.path
+    group.add_argument("-s", "--start", required=True,
+                       metavar="TIME", type=self.arg_time,
+                       help="Starting timestamp (free-form, inclusive)",
+                       ).completer = self.complete.time
+    group.add_argument("-e", "--end", required=True,
+                       metavar="TIME", type=self.arg_time,
+                       help="Ending timestamp (free-form, noninclusive)",
+                       ).completer = self.complete.time
+
+    group = cmd.add_argument_group("Output format")
+    group.add_argument("-q", "--quiet", action="store_true",
+                       help="Don't display names when removing "
+                       "from multiple paths")
+    group.add_argument("-c", "--count", action="store_true",
+                       help="Output number of data points removed")
+    return cmd
+
+def cmd_remove(self):
+    streams = [ s[0] for s in self.client.stream_list() ]
+    paths = []
+    for path in self.args.path:
+        new = fnmatch.filter(streams, path)
+        if not new:
+            self.die("error: no stream matched path: %s", path)
+        paths.extend(new)
+
+    try:
+        for path in paths:
+            if not self.args.quiet and len(paths) > 1:
+                printf("Removing from %s\n", path)
+            count = self.client.stream_remove(path,
+                                              self.args.start, self.args.end)
+            if self.args.count:
+                printf("%d\n", count);
+    except nilmdb.client.ClientError as e:
+        self.die("error removing data: %s", str(e))
+
+    return 0
--- a/nilmdb/cmdline/rename.py
+++ b/nilmdb/cmdline/rename.py
@@ -0,0 +1,31 @@
+from nilmdb.utils.printf import *
+import nilmdb.client
+
+from argparse import ArgumentDefaultsHelpFormatter as def_form
+
+def setup(self, sub):
+    cmd = sub.add_parser("rename", help="Rename a stream",
+                         formatter_class = def_form,
+                         description="""
+                         Rename a stream.
+
+                         Only the stream's path is renamed; no
+                         metadata is changed.
+                         """)
+    cmd.set_defaults(handler = cmd_rename)
+    group = cmd.add_argument_group("Required arguments")
+    group.add_argument("oldpath",
+                       help="Old path, e.g. /foo/old",
+                       ).completer = self.complete.path
+    group.add_argument("newpath",
+                       help="New path, e.g. /foo/bar/new",
+                       ).completer = self.complete.path
+
+    return cmd
+
+def cmd_rename(self):
+    """Rename a stream"""
+    try:
+        self.client.stream_rename(self.args.oldpath, self.args.newpath)
+    except nilmdb.client.ClientError as e:
+        self.die("error renaming stream: %s", str(e))
--- a/nilmdb/fileinterval.py
+++ b/nilmdb/fileinterval.py
@@ -1,37 +0,0 @@
-"""FileInterval
-
-An Interval that is backed with file data storage"""
-
-from nilmdb.interval import Interval, IntervalSet, IntervalError
-from datetime import datetime
-import bisect
-
-class FileInterval(Interval):
-    """Represents an interval of time and its corresponding data"""
-
-    def __init__(self, start, end, 
-                 filename, 
-                 start_offset = None, end_offset = None):
-        self.start = start
-        self.end = end
-        self.filename = filename
-        if start_offset is None:
-            start_offset = 0
-        self.start_offset = start_offset
-        if end_offset is None:
-            f = open(filename, 'rb')
-            f.seek(0, os.SEEK_END)
-            end_offset = f.tell()
-        self.end_offset = end_offset
-
-    def __setattr__(self, name, value):
-        pass
-
-    def subset(self, start, end):
-        """Return a new Interval that is a subset of this one"""
-        # TODO: Any magic regarding file/offset/length mapping for subsets
-        if (start < self.start or end > self.end):
-            raise IntervalError("not a subset")
-        return FileInterval(start, end)            
-
-    
--- a/nilmdb/interval.py
+++ b/nilmdb/interval.py
@@ -1,205 +0,0 @@
-"""Interval and IntervalSet
-
-Represents an interval of time, and a sorted set of such intervals"""
-
-from datetime import datetime
-import bisect
-
-class IntervalError(Exception):
-    """Error due to interval overlap, etc"""
-    pass
-
-class Interval(object):
-    """Represents an interval of time"""
-
-    start = None
-    end = None
-
-    def __init__(self, start, end):
-        self.start = start
-        self.end = end
-
-    def __repr__(self):
-        return "Interval(" + repr(self.start) + ", " + repr(self.end) + ")"
-
-    def __str__(self):
-        return "[" + str(self.start) + " -> " + str(self.end) + "]"
-
-    def __setattr__(self, name, value):
-        """Set attribute"""
-        # TODO: If we need to manipulate file names, offsets, lengths, etc,
-        # based on start and end time changing, maybe this is the right spot?
-        # Or we could just disallow changing it here.
-        if not isinstance(value, datetime):
-            raise IntervalError("Must set datetime values")
-        self.__dict__[name] = value
-        if (type(self.start) is type(self.end)):
-            if (self.start > self.end):
-                raise IntervalError("Interval start must precede interval end")
-
-    def __cmp__(self, other):
-        """Compare two intervals.  If non-equal, order by start then end"""
-        if not isinstance(other, Interval):
-            raise TypeError("Can't compare to non-interval")
-        if (self.start == other.start):
-            if (self.end < other.end):
-                return -1
-            if (self.end > other.end):
-                return 1
-            return 0
-        if (self.start < other.start):
-            return -1
-        return 1
-                
-    def intersects(self, other):
-        """Return True if two Interval objects intersect"""
-        if (not isinstance(other, Interval)):
-            raise TypeError("need Interval for intersection test")
-        if (self.end <= other.start or
-            self.start >= other.end):
-            return False
-        else:
-            return True
-
-    def is_adjacent(self, other):
-        """Return True if two Intervals are adjacent (same end or start)"""
-        if (not isinstance(other, Interval)):
-            raise TypeError("need Interval for adjacency test")
-        if (self.end == other.start or
-            self.start == other.end):
-            return True
-        else:
-            return False
-
-    def subset(self, start, end):
-        """Return a new Interval that is a subset of this one"""
-        # TODO: Any magic regarding file/offset/length mapping for subsets
-        if (start < self.start or end > self.end):
-            raise IntervalError("not a subset")
-        return Interval(start, end)            
-
-class IntervalSet(object):
-    """A non-intersecting set of intervals
-
-    Kept sorted internally"""
-
-    def __init__(self, iterable=None):
-        self.data = []
-        if iterable is not None:
-            if isinstance(iterable, Interval):
-                iterable = [iterable]
-            self._add_intervals(iterable)
-
-    def __iter__(self):
-        return self.data.__iter__()
-
-    def __repr__(self):
-        return "IntervalSet(" + repr(list(self.data)) + ")"
-
-    def __cmp__(self, other):
-        # compare isn't supported, they don't really have an ordering
-        raise TypeError("can't compare IntervalSets with cmp()")
-
-    def __eq__(self, other):
-        """Test equality of two IntervalSets.
-
-        Treats adjacent Intervals as equivalent to one long interval,
-        so this function really tests whether the IntervalSets cover
-        the same spans of time."""
-        if not isinstance(other, IntervalSet):
-            return False
-        i = 0
-        j = 0
-        outside = True
-        try:
-            while True:
-                if (outside):
-                    # To match, we need to be finished this set
-                    if (i >= len(self) and j >= len(other)):
-                        return True
-                    # Or the starts need to match
-                    if (self[i].start != other[j].start):
-                        return False
-                    outside = False
-                else:
-                    # We can move on if the two interval ends match
-                    if (self[i].end == other[j].end):
-                        i += 1
-                        j += 1
-                        outside = True
-                    else:
-                        # Whichever ends first needs to be adjacent to the next
-                        if (self[i].end < other[j].end):
-                            if (not self[i].is_adjacent(self[i+1])):
-                                return False
-                            i += 1
-                        else:
-                            if (not other[j].is_adjacent(other[j+1])):
-                                return False
-                            j += 1
-        except IndexError:
-            return False
-
-    def __ne__(self, other):
-        return not self.__eq__(other)
-
-    def __len__(self):
-        return len(self.data)
-
-    def __getitem__(self, key):
-        return self.data.__getitem__(key)
-
-    def __iadd__(self, other):
-        """Inplace add -- modifies self
-
-        This throws an exception if the regions being added intersect."""
-        if isinstance(other, Interval):
-            other = [other]
-        self._add_intervals(other)
-        return self
-        
-    def __add__(self, other):
-        """Add -- returns a new object
-
-        This throws an exception if the regions being added intersect."""
-        new = IntervalSet(self)
-        new += IntervalSet(other)
-        return new
-
-    def __and__(self, other):
-        """Compute a new IntervalSet from the intersection of two others
-
-        Output intervals are built as subsets of the intervals in the
-        first argument (self)."""
-        # If we were given a set, intersect with each interval in that set
-        if isinstance(other, IntervalSet):
-            out = IntervalSet()
-            for interval in other.data:
-                out += self & interval
-            return out
-
-        if not isinstance(other, Interval):
-            raise TypeError("can't intersect with that type")
-
-        out = IntervalSet()
-        for this in self.data:
-            # If there's any overlap, add the overlapping region
-            if (this.end > other.start and this.start < other.end):
-                out += this.subset(max(this.start, other.start),
-                                   min(this.end, other.end))
-        return out
-    
-    def _add_intervals(self, iterable):
-        """Add each Interval from an interable to this set"""
-        for element in iter(iterable):
-            self._add_single_interval(element)
-
-    def _add_single_interval(self, interval):
-        """Add one Interval to this set"""
-        if (not isinstance(interval, Interval)):
-            raise TypeError("can only add Intervals")
-        for existing in self.data:
-            if existing.intersects(interval):
-                raise IntervalError("Tried to add overlapping interval "
-                                    "to this set")
-        bisect.insort(self.data, interval)
--- a/nilmdb/scripts/init.py
+++ b/nilmdb/scripts/init.py
@@ -0,0 +1 @@
+# Command line scripts
--- a/nilmdb/scripts/nilmdb_server.py
+++ b/nilmdb/scripts/nilmdb_server.py
@@ -0,0 +1,87 @@
+#!/usr/bin/python
+
+import nilmdb.server
+import argparse
+import os
+import socket
+
+def main():
+    """Main entry point for the 'nilmdb-server' command line script"""
+
+    parser = argparse.ArgumentParser(
+        description = 'Run the NilmDB server',
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter)
+
+    parser.add_argument("-V", "--version", action="version",
+                        version = nilmdb.__version__)
+
+    group = parser.add_argument_group("Standard options")
+    group.add_argument('-a', '--address',
+                       help = 'Only listen on the given address',
+                       default = '0.0.0.0')
+    group.add_argument('-p', '--port', help = 'Listen on the given port',
+                       type = int, default = 12380)
+    group.add_argument('-d', '--database', help = 'Database directory',
+                       default = "./db")
+    group.add_argument('-q', '--quiet', help = 'Silence output',
+                       action = 'store_true')
+    group.add_argument('-t', '--traceback',
+                       help = 'Provide tracebacks in client errors',
+                       action = 'store_true', default = False)
+
+    group = parser.add_argument_group("Debug options")
+    group.add_argument('-y', '--yappi', help = 'Run under yappi profiler and '
+                       'invoke interactive shell afterwards',
+                       action = 'store_true')
+
+    args = parser.parse_args()
+
+    # Create database object.  Needs to be serialized before passing
+    # to the Server.
+    db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(args.database)
+
+    # Configure the server
+    if args.quiet:
+        embedded = True
+    else:
+        embedded = False
+    server = nilmdb.server.Server(db,
+                                  host = args.address,
+                                  port = args.port,
+                                  embedded = embedded,
+                                  force_traceback = args.traceback)
+
+    # Print info
+    if not args.quiet:
+        print "Version: %s" % nilmdb.__version__
+        print "Database: %s" % (os.path.realpath(args.database))
+        if args.address == '0.0.0.0' or args.address == '::':
+            host = socket.getfqdn()
+        else:
+            host = args.address
+        print "Server URL: http://%s:%d/" % ( host, args.port)
+        print "----"
+
+    # Run it
+    if args.yappi:
+        print "Running in yappi"
+        try:
+            import yappi
+            yappi.start()
+            server.start(blocking = True)
+        finally:
+            yappi.stop()
+            yappi.print_stats(sort_type = yappi.SORTTYPE_TTOT, limit = 50)
+            from IPython import embed
+            embed(header = "Use the yappi object to explore further, "
+                  "quit to exit")
+    else:
+        server.start(blocking = True)
+
+    # Clean up
+    if not args.quiet:
+        print "Closing database"
+        db.close()
+
+if __name__ == "__main__":
+    main()
--- a/nilmdb/scripts/nilmtool.py
+++ b/nilmdb/scripts/nilmtool.py
@@ -0,0 +1,10 @@
+#!/usr/bin/python
+
+import nilmdb.cmdline
+
+def main():
+    """Main entry point for the 'nilmtool' command line script"""
+    nilmdb.cmdline.Cmdline().run()
+
+if __name__ == "__main__":
+    main()
--- a/nilmdb/server/init.py
+++ b/nilmdb/server/init.py
@@ -0,0 +1,21 @@
+"""nilmdb.server"""
+
+from __future__ import absolute_import
+
+# Try to set up pyximport to automatically rebuild Cython modules.  If
+# this doesn't work, it's OK, as long as the modules were built externally.
+# (e.g. python setup.py build_ext --inplace)
+try: # pragma: no cover
+    import Cython
+    import distutils.version
+    if (distutils.version.LooseVersion(Cython.__version__) <
+        distutils.version.LooseVersion("0.17")): # pragma: no cover
+        raise ImportError("Cython version too old")
+    import pyximport
+    pyximport.install(inplace = True, build_in_temp = False)
+except (ImportError, TypeError): # pragma: no cover
+    pass
+
+from nilmdb.server.nilmdb import NilmDB
+from nilmdb.server.server import Server, wsgi_application
+from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
--- a/nilmdb/server/bulkdata.py
+++ b/nilmdb/server/bulkdata.py
@@ -0,0 +1,617 @@
+# Fixed record size bulk data storage
+
+# Need absolute_import so that "import nilmdb" won't pull in
+# nilmdb.py, but will pull the parent nilmdb module instead.
+from __future__ import absolute_import
+from __future__ import division
+from nilmdb.utils.printf import *
+from nilmdb.utils.time import timestamp_to_string as timestamp_to_string
+import nilmdb.utils
+
+import os
+import cPickle as pickle
+import re
+import sys
+import tempfile
+
+import nilmdb.utils.lock
+from . import rocket
+
+# Up to 256 open file descriptors at any given time.
+# These variables are global so they can be used in the decorator arguments.
+table_cache_size = 32
+fd_cache_size = 8
+
+@nilmdb.utils.must_close(wrap_verify = False)
+class BulkData(object):
+    def __init__(self, basepath, **kwargs):
+        self.basepath = basepath
+        self.root = os.path.join(self.basepath, "data")
+        self.lock = self.root + ".lock"
+        self.lockfile = None
+
+        # Tuneables
+        if "file_size" in kwargs:
+            self.file_size = kwargs["file_size"]
+        else:
+            # Default to approximately 128 MiB per file
+            self.file_size = 128 * 1024 * 1024
+
+        if "files_per_dir" in kwargs:
+            self.files_per_dir = kwargs["files_per_dir"]
+        else:
+            # 32768 files per dir should work even on FAT32
+            self.files_per_dir = 32768
+
+        # Make root path
+        if not os.path.isdir(self.root):
+            os.mkdir(self.root)
+
+        # Create the lock
+        self.lockfile = open(self.lock, "w")
+        if not nilmdb.utils.lock.exclusive_lock(self.lockfile):
+            raise IOError('database at "' + self.basepath +
+                          '" is already locked by another process')
+
+    def close(self):
+        self.getnode.cache_remove_all()
+        if self.lockfile:
+            nilmdb.utils.lock.exclusive_unlock(self.lockfile)
+            self.lockfile.close()
+            try:
+                os.unlink(self.lock)
+            except OSError: # pragma: no cover
+                pass
+            self.lockfile = None
+
+    def _encode_filename(self, path):
+        # Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
+        # because we want to be able to represent all code points and the user
+        # will never be directly exposed to filenames.  We can then do path
+        # manipulations on the UTF-8 directly.
+        if isinstance(path, unicode):
+            return path.encode('utf-8')
+        return path
+
+    def _create_check_ospath(self, ospath):
+        if ospath[-1] == '/':
+            raise ValueError("invalid path; should not end with a /")
+        if Table.exists(ospath):
+            raise ValueError("stream already exists at this path")
+        if os.path.isdir(ospath):
+            # Look for any files in subdirectories.  Fully empty subdirectories
+            # are OK; they might be there during a rename
+            for (root, dirs, files) in os.walk(ospath):
+                if len(files):
+                    raise ValueError(
+                        "non-empty subdirs of this path already exist")
+
+    def _create_parents(self, unicodepath):
+        """Verify the path name, and create parent directories if they
+        don't exist.  Returns a list of elements that got created."""
+        path = self._encode_filename(unicodepath)
+
+        if path[0] != '/':
+            raise ValueError("paths must start with /")
+        [ group, node ] = path.rsplit("/", 1)
+        if group == '':
+            raise ValueError("invalid path; path must contain at least one "
+                             "folder")
+        if node == '':
+            raise ValueError("invalid path; should not end with a /")
+        if not Table.valid_path(path):
+            raise ValueError("path name is invalid or contains reserved words")
+
+        # Create the table's base dir.  Note that we make a
+        # distinction here between NilmDB paths (always Unix style,
+        # split apart manually) and OS paths (built up with
+        # os.path.join)
+
+        # Make directories leading up to this one
+        elements = path.lstrip('/').split('/')
+        made_dirs = []
+        try:
+            # Make parent elements
+            for i in range(len(elements)):
+                ospath = os.path.join(self.root, *elements[0:i])
+                if Table.exists(ospath):
+                    raise ValueError("path is subdir of existing node")
+                if not os.path.isdir(ospath):
+                    os.mkdir(ospath)
+                    made_dirs.append(ospath)
+        except Exception as e:
+            # Try to remove paths that we created; ignore errors
+            exc_info = sys.exc_info()
+            for ospath in reversed(made_dirs): # pragma: no cover (hard to hit)
+                try:
+                    os.rmdir(ospath)
+                except OSError:
+                    pass
+            raise exc_info[1], None, exc_info[2]
+
+        return elements
+
+    def create(self, unicodepath, layout_name):
+        """
+        unicodepath: path to the data (e.g. u'/newton/prep').
+        Paths must contain at least two elements, e.g.:
+           /newton/prep
+           /newton/raw
+           /newton/upstairs/prep
+           /newton/upstairs/raw
+
+        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
+        """
+        elements = self._create_parents(unicodepath)
+
+        # Make the final dir
+        ospath = os.path.join(self.root, *elements)
+        self._create_check_ospath(ospath)
+        os.mkdir(ospath)
+
+        try:
+            # Write format string to file
+            Table.create(ospath, layout_name, self.file_size,
+                         self.files_per_dir)
+
+            # Open and cache it
+            self.getnode(unicodepath)
+        except Exception:
+            exc_info = sys.exc_info()
+            try:
+                os.rmdir(ospath)
+            except OSError:
+                pass
+            raise exc_info[1], None, exc_info[2]
+
+        # Success
+        return
+
+    def _remove_leaves(self, unicodepath):
+        """Remove empty directories starting at the leaves of unicodepath"""
+        path = self._encode_filename(unicodepath)
+        elements = path.lstrip('/').split('/')
+        for i in reversed(range(len(elements))):
+            ospath = os.path.join(self.root, *elements[0:i+1])
+            try:
+                os.rmdir(ospath)
+            except OSError:
+                pass
+
+    def rename(self, oldunicodepath, newunicodepath):
+        """Move entire tree from 'oldunicodepath' to
+        'newunicodepath'"""
+        oldpath = self._encode_filename(oldunicodepath)
+        newpath = self._encode_filename(newunicodepath)
+
+        # Get OS paths
+        oldelements = oldpath.lstrip('/').split('/')
+        oldospath = os.path.join(self.root, *oldelements)
+        newelements = newpath.lstrip('/').split('/')
+        newospath = os.path.join(self.root, *newelements)
+
+        # Basic checks
+        if oldospath == newospath:
+            raise ValueError("old and new paths are the same")
+
+        # Move the table to a temporary location
+        tmpdir = tempfile.mkdtemp(prefix = "rename-", dir = self.root)
+        tmppath = os.path.join(tmpdir, "table")
+        os.rename(oldospath, tmppath)
+
+        try:
+            # Check destination path
+            self._create_check_ospath(newospath)
+
+            # Create parent dirs for new location
+            self._create_parents(newunicodepath)
+
+            # Move table into new location
+            os.rename(tmppath, newospath)
+        except Exception:
+            # On failure, move the table back to original path
+            os.rename(tmppath, oldospath)
+            os.rmdir(tmpdir)
+            raise
+
+        # Prune old dirs
+        self._remove_leaves(oldunicodepath)
+        os.rmdir(tmpdir)
+
+    def destroy(self, unicodepath):
+        """Fully remove all data at a particular path.  No way to undo
+        it!  The group/path structure is removed, too."""
+        path = self._encode_filename(unicodepath)
+
+        # Get OS path
+        elements = path.lstrip('/').split('/')
+        ospath = os.path.join(self.root, *elements)
+
+        # Remove Table object from cache
+        self.getnode.cache_remove(self, unicodepath)
+
+        # Remove the contents of the target directory
+        if not Table.exists(ospath):
+            raise ValueError("nothing at that path")
+        for (root, dirs, files) in os.walk(ospath, topdown = False):
+            for name in files:
+                os.remove(os.path.join(root, name))
+            for name in dirs:
+                os.rmdir(os.path.join(root, name))
+
+        # Remove leftover empty directories
+        self._remove_leaves(unicodepath)
+
+    # Cache open tables
+    @nilmdb.utils.lru_cache(size = table_cache_size,
+                            onremove = lambda x: x.close())
+    def getnode(self, unicodepath):
+        """Return a Table object corresponding to the given database
+        path, which must exist."""
+        path = self._encode_filename(unicodepath)
+        elements = path.lstrip('/').split('/')
+        ospath = os.path.join(self.root, *elements)
+        return Table(ospath)
+
+@nilmdb.utils.must_close(wrap_verify = False)
+class Table(object):
+    """Tools to help access a single table (data at a specific OS path)."""
+    # See design.md for design details
+
+    # Class methods, to help keep format details in this class.
+    @classmethod
+    def valid_path(cls, root):
+        """Return True if a root path is a valid name"""
+        return "_format" not in root.split("/")
+
+    @classmethod
+    def exists(cls, root):
+        """Return True if a table appears to exist at this OS path"""
+        return os.path.isfile(os.path.join(root, "_format"))
+
+    @classmethod
+    def create(cls, root, layout, file_size, files_per_dir):
+        """Initialize a table at the given OS path with the
+        given layout string"""
+
+        # Calculate rows per file so that each file is approximately
+        # file_size bytes.
+        rkt = rocket.Rocket(layout, None)
+        rows_per_file = max(file_size // rkt.binary_size, 1)
+        rkt.close()
+
+        fmt = { "rows_per_file": rows_per_file,
+                "files_per_dir": files_per_dir,
+                "layout": layout,
+                "version": 3 }
+        with open(os.path.join(root, "_format"), "wb") as f:
+            pickle.dump(fmt, f, 2)
+
+    # Normal methods
+    def __init__(self, root):
+        """'root' is the full OS path to the directory of this table"""
+        self.root = root
+
+        # Load the format
+        with open(os.path.join(self.root, "_format"), "rb") as f:
+            fmt = pickle.load(f)
+
+        if fmt["version"] != 3: # pragma: no cover
+            # Old versions used floating point timestamps, which aren't
+            # valid anymore.
+            raise NotImplementedError("old version " + str(fmt["version"]) +
+                                      " bulk data store is not supported")
+
+        self.rows_per_file = fmt["rows_per_file"]
+        self.files_per_dir = fmt["files_per_dir"]
+        self.layout = fmt["layout"]
+
+        # Use rocket to get row size and file size
+        rkt = rocket.Rocket(self.layout, None)
+        self.row_size = rkt.binary_size
+        self.file_size = rkt.binary_size * self.rows_per_file
+        rkt.close()
+
+        # Find nrows
+        self.nrows = self._get_nrows()
+
+    def close(self):
+        self.file_open.cache_remove_all()
+
+    # Internal helpers
+    def _get_nrows(self):
+        """Find nrows by locating the lexicographically last filename
+        and using its size"""
+        # Note that this just finds a 'nrows' that is guaranteed to be
+        # greater than the row number of any piece of data that
+        # currently exists, not necessarily all data that _ever_
+        # existed.
+        regex = re.compile("^[0-9a-f]{4,}$")
+
+        # Find the last directory.  We sort and loop through all of them,
+        # starting with the numerically greatest, because the dirs could be
+        # empty if something was deleted.
+        subdirs = sorted(filter(regex.search, os.listdir(self.root)),
+                         key = lambda x: int(x, 16), reverse = True)
+
+        for subdir in subdirs:
+            # Now find the last file in that dir
+            path = os.path.join(self.root, subdir)
+            files = filter(regex.search, os.listdir(path))
+            if not files: # pragma: no cover (shouldn't occur)
+                # Empty dir: try the next one
+                continue
+
+            # Find the numerical max
+            filename = max(files, key = lambda x: int(x, 16))
+            offset = os.path.getsize(os.path.join(self.root, subdir, filename))
+
+            # Convert to row number
+            return self._row_from_offset(subdir, filename, offset)
+
+        # No files, so no data
+        return 0
+
+    def _offset_from_row(self, row):
+        """Return a (subdir, filename, offset, count) tuple:
+
+          subdir: subdirectory for the file
+        filename: the filename that contains the specified row
+          offset: byte offset of the specified row within the file
+           count: number of rows (starting at offset) that fit in the file
+        """
+        filenum = row // self.rows_per_file
+        # It's OK if these format specifiers are too short; the filenames
+        # will just get longer but will still sort correctly.
+        dirname = sprintf("%04x", filenum // self.files_per_dir)
+        filename = sprintf("%04x", filenum % self.files_per_dir)
+        offset = (row % self.rows_per_file) * self.row_size
+        count = self.rows_per_file - (row % self.rows_per_file)
+        return (dirname, filename, offset, count)
+
+    def _row_from_offset(self, subdir, filename, offset):
+        """Return the row number that corresponds to the given
+        'subdir/filename' and byte-offset within that file."""
+        if (offset % self.row_size) != 0: # pragma: no cover
+            # this shouldn't occur, unless there is some corruption somewhere
+            raise ValueError("file offset is not a multiple of data size")
+        filenum = int(subdir, 16) * self.files_per_dir + int(filename, 16)
+        row = (filenum * self.rows_per_file) + (offset // self.row_size)
+        return row
+
+    def _remove_or_truncate_file(self, subdir, filename, offset = 0):
+        """Remove the given file, and remove the subdirectory too
+        if it's empty.  If offset is nonzero, truncate the file
+        to that size instead."""
+        # Close potentially open file in file_open LRU cache
+        self.file_open.cache_remove(self, subdir, filename)
+        if offset:
+            # Truncate it
+            with open(os.path.join(self.root, subdir, filename), "r+b") as f:
+                f.truncate(offset)
+        else:
+            # Remove file
+            os.remove(os.path.join(self.root, subdir, filename))
+            # Try deleting subdir, too
+            try:
+                os.rmdir(os.path.join(self.root, subdir))
+            except Exception:
+                pass
+
+    # Cache open files
+    @nilmdb.utils.lru_cache(size = fd_cache_size,
+                            onremove = lambda f: f.close())
+    def file_open(self, subdir, filename):
+        """Open and map a given 'subdir/filename' (relative to self.root).
+        Will be automatically closed when evicted from the cache."""
+        # Create path if it doesn't exist
+        try:
+            os.mkdir(os.path.join(self.root, subdir))
+        except OSError:
+            pass
+        # Return a rocket.Rocket object, which contains the open file
+        return rocket.Rocket(self.layout,
+                             os.path.join(self.root, subdir, filename))
+
+    def append_data(self, data, start, end, binary = False):
+        """Parse the formatted string in 'data', according to the
+        current layout, and append it to the table.  If any timestamps
+        are non-monotonic, or don't fall between 'start' and 'end',
+        a ValueError is raised.
+
+        If 'binary' is True, the data should be in raw binary format
+        instead: little-endian, matching the current table's layout,
+        including the int64 timestamp.
+
+        If this function succeeds, it returns normally.  Otherwise,
+        the table is reverted back to its original state by truncating
+        or deleting files as necessary."""
+        data_offset = 0
+        last_timestamp = nilmdb.utils.time.min_timestamp
+        tot_rows = self.nrows
+        count = 0
+        linenum = 0
+        try:
+            while data_offset < len(data):
+                # See how many rows we can fit into the current file,
+                # and open it
+                (subdir, fname, offset, count) = self._offset_from_row(tot_rows)
+                f = self.file_open(subdir, fname)
+
+                # Ask the rocket object to parse and append up to "count"
+                # rows of data, verifying things along the way.
+                try:
+                    if binary:
+                        appender = f.append_binary
+                    else:
+                        appender = f.append_string
+                    (added_rows, data_offset, last_timestamp, linenum
+                     ) = appender(count, data, data_offset, linenum,
+                                  start, end, last_timestamp)
+                except rocket.ParseError as e:
+                    (linenum, colnum, errtype, obj) = e.args
+                    if binary:
+                        where = "byte %d: " % (linenum)
+                    else:
+                        where = "line %d, column %d: " % (linenum, colnum)
+                    # Extract out the error line, add column marker
+                    try:
+                        if binary:
+                            raise IndexError
+                        bad = data.splitlines()[linenum-1]
+                        bad += '\n' + ' ' * (colnum - 1) + '^'
+                    except IndexError:
+                        bad = ""
+                    if errtype == rocket.ERR_NON_MONOTONIC:
+                        err = "timestamp is not monotonically increasing"
+                    elif errtype == rocket.ERR_OUT_OF_INTERVAL:
+                        if obj < start:
+                            err = sprintf("Data timestamp %s < start time %s",
+                                          timestamp_to_string(obj),
+                                          timestamp_to_string(start))
+                        else:
+                            err = sprintf("Data timestamp %s >= end time %s",
+                                          timestamp_to_string(obj),
+                                          timestamp_to_string(end))
+                    else:
+                        err = str(obj)
+                    raise ValueError("error parsing input data: " +
+                                     where + err + "\n" + bad)
+                tot_rows += added_rows
+        except Exception:
+            # Some failure, so try to roll things back by truncating or
+            # deleting files that we may have appended data to.
+            cleanpos = self.nrows
+            while cleanpos <= tot_rows:
+                (subdir, fname, offset, count) = self._offset_from_row(cleanpos)
+                self._remove_or_truncate_file(subdir, fname, offset)
+                cleanpos += count
+            # Re-raise original exception
+            raise
+        else:
+            # Success, so update self.nrows accordingly
+            self.nrows = tot_rows
+
+    def get_data(self, start, stop, binary = False):
+        """Extract data corresponding to Python range [n:m],
+        and returns a formatted string"""
+        if (start is None or
+            stop is None or
+            start > stop or
+            start < 0 or
+            stop > self.nrows):
+            raise IndexError("Index out of range")
+
+        ret = []
+        row = start
+        remaining = stop - start
+        while remaining > 0:
+            (subdir, filename, offset, count) = self._offset_from_row(row)
+            if count > remaining:
+                count = remaining
+            f = self.file_open(subdir, filename)
+            if binary:
+                ret.append(f.extract_binary(offset, count))
+            else:
+                ret.append(f.extract_string(offset, count))
+            remaining -= count
+            row += count
+        return b"".join(ret)
+
+    def __getitem__(self, row):
+        """Extract timestamps from a row, with table[n] notation."""
+        if row < 0 or row >= self.nrows:
+            raise IndexError("Index out of range")
+        (subdir, filename, offset, count) = self._offset_from_row(row)
+        f = self.file_open(subdir, filename)
+        return f.extract_timestamp(offset)
+
+    def _remove_rows(self, subdir, filename, start, stop):
+        """Helper to mark specific rows as being removed from a
+        file, and potentially remove or truncate the file itself."""
+        # Close potentially open file in file_open LRU cache
+        self.file_open.cache_remove(self, subdir, filename)
+
+        # We keep a file like 0000.removed that contains a list of
+        # which rows have been "removed".  Note that we never have to
+        # remove entries from this list, because we never decrease
+        # self.nrows, and so we will never overwrite those locations in the
+        # file.  Only when the list covers the entire extent of the
+        # file will that file be removed.
+        datafile = os.path.join(self.root, subdir, filename)
+        cachefile = datafile + ".removed"
+        try:
+            with open(cachefile, "rb") as f:
+                ranges = pickle.load(f)
+            cachefile_present = True
+        except Exception:
+            ranges = []
+            cachefile_present = False
+
+        # Append our new range and sort
+        ranges.append((start, stop))
+        ranges.sort()
+
+        # Merge adjacent ranges into "out"
+        merged = []
+        prev = None
+        for new in ranges:
+            if prev is None:
+                # No previous range, so remember this one
+                prev = new
+            elif prev[1] == new[0]:
+                # Previous range connected to this new one; extend prev
+                prev = (prev[0], new[1])
+            else:
+                # Not connected; append previous and start again
+                merged.append(prev)
+                prev = new
+        if prev is not None:
+            merged.append(prev)
+
+        # If the range covered the whole file, we can delete it now.
+        # Note that the last file in a table may be only partially
+        # full (smaller than self.rows_per_file).  We purposely leave
+        # those files around rather than deleting them, because the
+        # remainder will be filled on a subsequent append(), and things
+        # are generally easier if we don't have to special-case that.
+        if (len(merged) == 1 and
+            merged[0][0] == 0 and merged[0][1] == self.rows_per_file):
+            # Delete files
+            if cachefile_present:
+                os.remove(cachefile)
+            self._remove_or_truncate_file(subdir, filename, 0)
+        else:
+            # File needs to stick around.  This means we can get
+            # degenerate cases where we have large files containing as
+            # little as one row.  Try to punch a hole in the file,
+            # so that this region doesn't take up filesystem space.
+            offset = start * self.row_size
+            count = (stop - start) * self.row_size
+            nilmdb.utils.fallocate.punch_hole(datafile, offset, count)
+
+            # Update cache.  Try to do it atomically.
+            nilmdb.utils.atomic.replace_file(cachefile,
+                                             pickle.dumps(merged, 2))
+
+    def remove(self, start, stop):
+        """Remove specified rows [start, stop) from this table.
+
+        If a file is left empty, it is fully removed.  Otherwise, a
+        parallel data file is used to remember which rows have been
+        removed, and the file is otherwise untouched."""
+        if start < 0 or start > stop or stop > self.nrows:
+            raise IndexError("Index out of range")
+
+        row = start
+        remaining = stop - start
+        while remaining:
+            # Loop through each file that we need to touch
+            (subdir, filename, offset, count) = self._offset_from_row(row)
+            if count > remaining:
+                count = remaining
+            row_offset = offset // self.row_size
+            # Mark the rows as being removed
+            self._remove_rows(subdir, filename, row_offset, row_offset + count)
+            remaining -= count
+            row += count
--- a/nilmdb/server/errors.py
+++ b/nilmdb/server/errors.py
@@ -0,0 +1,12 @@
+"""Exceptions"""
+
+class NilmDBError(Exception):
+    """Base exception for NilmDB errors"""
+    def __init__(self, message = "Unspecified error"):
+        Exception.__init__(self, message)
+
+class StreamError(NilmDBError):
+    pass
+
+class OverlapError(NilmDBError):
+    pass
--- a/nilmdb/server/interval.pyx
+++ b/nilmdb/server/interval.pyx
@@ -0,0 +1,317 @@
+"""Interval, IntervalSet
+
+The Interval implemented here is just like
+nilmdb.utils.interval.Interval, except implemented in Cython for
+speed.
+
+Represents an interval of time, and a set of such intervals.
+
+Intervals are half-open, ie. they include data points with timestamps
+[start, end)
+"""
+
+# First implementation kept a sorted list of intervals and used
+# biesct() to optimize some operations, but this was too slow.
+
+# Second version was based on the quicksect implementation from
+# python-bx, modified slightly to handle floating point intervals.
+# This didn't support deletion.
+
+# Third version is more similar to the first version, using a rb-tree
+# instead of a simple sorted list to maintain O(log n) operations.
+
+# Fourth version is an optimized rb-tree that stores interval starts
+# and ends directly in the tree, like bxinterval did.
+
+from ..utils.time import min_timestamp as nilmdb_min_timestamp
+from ..utils.time import max_timestamp as nilmdb_max_timestamp
+from ..utils.time import timestamp_to_string
+from ..utils.iterator import imerge
+from ..utils.interval import IntervalError
+import itertools
+
+cimport rbtree
+from libc.stdint cimport uint64_t, int64_t
+
+ctypedef int64_t timestamp_t
+
+cdef class Interval:
+    """Represents an interval of time."""
+
+    cdef public timestamp_t start, end
+
+    def __init__(self, timestamp_t start, timestamp_t end):
+        """
+        'start' and 'end' are arbitrary numbers that represent time
+        """
+        if start >= end:
+            # Explicitly disallow zero-width intervals (since they're half-open)
+            raise IntervalError("start %s must precede end %s" % (start, end))
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        s = repr(self.start) + ", " + repr(self.end)
+        return self.__class__.__name__ + "(" + s + ")"
+
+    def __str__(self):
+        return ("[" + timestamp_to_string(self.start) +
+                " -> " + timestamp_to_string(self.end) + ")")
+
+    def __cmp__(self, Interval other):
+        """Compare two intervals.  If non-equal, order by start then end"""
+        return cmp(self.start, other.start) or cmp(self.end, other.end)
+
+    cpdef intersects(self, Interval other):
+        """Return True if two Interval objects intersect"""
+        if (self.end <= other.start or self.start >= other.end):
+            return False
+        return True
+
+    cpdef subset(self, timestamp_t start, timestamp_t end):
+        """Return a new Interval that is a subset of this one"""
+        # A subclass that tracks additional data might override this.
+        if start < self.start or end > self.end:
+            raise IntervalError("not a subset")
+        return Interval(start, end)
+
+cdef class DBInterval(Interval):
+    """
+    Like Interval, but also tracks corresponding start/end times and
+    positions within the database.  These are not currently modified
+    when subsets are taken, but can be used later to help zero in on
+    database positions.
+
+    The actual 'start' and 'end' will always fall within the database
+    start and end, e.g.:
+        db_start = 100, db_startpos = 10000
+        start = 123
+        end = 150
+        db_end = 200, db_endpos = 20000
+    """
+
+    cpdef public timestamp_t db_start, db_end
+    cpdef public uint64_t db_startpos, db_endpos
+
+    def __init__(self, start, end,
+                 db_start, db_end,
+                 db_startpos, db_endpos):
+        """
+        'db_start' and 'db_end' are arbitrary numbers that represent
+        time.  They must be a strict superset of the time interval
+        covered by 'start' and 'end'.  The 'db_startpos' and
+        'db_endpos' are arbitrary database position indicators that
+        correspond to those points.
+        """
+        Interval.__init__(self, start, end)
+        self.db_start = db_start
+        self.db_end = db_end
+        self.db_startpos = db_startpos
+        self.db_endpos = db_endpos
+        if db_start > start or db_end < end:
+            raise IntervalError("database times must span the interval times")
+
+    def __repr__(self):
+        s = repr(self.start) + ", " + repr(self.end)
+        s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
+        s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
+        return self.__class__.__name__ + "(" + s + ")"
+
+    cpdef subset(self, timestamp_t start, timestamp_t end):
+        """
+        Return a new DBInterval that is a subset of this one
+        """
+        if start < self.start or end > self.end:
+            raise IntervalError("not a subset")
+        return DBInterval(start, end,
+                          self.db_start, self.db_end,
+                          self.db_startpos, self.db_endpos)
+
+cdef class IntervalSet:
+    """
+    A non-intersecting set of intervals.
+    """
+
+    cdef public rbtree.RBTree tree
+
+    def __init__(self, source=None):
+        """
+        'source' is an Interval or IntervalSet to add.
+        """
+        self.tree = rbtree.RBTree()
+        if source is not None:
+            self += source
+
+    def __iter__(self):
+        for node in self.tree:
+            if node.obj:
+                yield node.obj
+
+    def __len__(self):
+        return sum(1 for x in self)
+
+    def __repr__(self):
+        descs = [ repr(x) for x in self ]
+        return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
+
+    def __str__(self):
+        descs = [ str(x) for x in self ]
+        return  "[" + ", ".join(descs) + "]"
+
+    def __match__(self, other):
+        # This isn't particularly efficient, but it shouldn't get used in the
+        # general case.
+        """Test equality of two IntervalSets.
+
+        Treats adjacent Intervals as equivalent to one long interval,
+        so this function really tests whether the IntervalSets cover
+        the same spans of time."""
+        i = 0
+        j = 0
+        outside = True
+
+        def is_adjacent(a, b):
+            """Return True if two Intervals are adjacent (same end or start)"""
+            if a.end == b.start or b.end == a.start:
+                return True
+            else:
+                return False
+
+        this = list(self)
+        that = list(other)
+
+        try:
+            while True:
+                if (outside):
+                    # To match, we need to be finished both sets
+                    if (i >= len(this) and j >= len(that)):
+                        return True
+                    # Or the starts need to match
+                    if (this[i].start != that[j].start):
+                        return False
+                    outside = False
+                else:
+                    # We can move on if the two interval ends match
+                    if (this[i].end == that[j].end):
+                        i += 1
+                        j += 1
+                        outside = True
+                    else:
+                        # Whichever ends first needs to be adjacent to the next
+                        if (this[i].end < that[j].end):
+                            if (not is_adjacent(this[i],this[i+1])):
+                                return False
+                            i += 1
+                        else:
+                            if (not is_adjacent(that[j],that[j+1])):
+                                return False
+                            j += 1
+        except IndexError:
+            return False
+
+    # Use __richcmp__ instead of __eq__, __ne__ for Cython.
+    def __richcmp__(self, other, int op):
+        if op == 2: # ==
+            return self.__match__(other)
+        elif op == 3: # !=
+            return not self.__match__(other)
+        return False
+    #def __eq__(self, other):
+    #    return self.__match__(other)
+    #
+    #def __ne__(self, other):
+    #    return not self.__match__(other)
+
+    def __iadd__(self, object other not None):
+        """Inplace add -- modifies self
+
+        This throws an exception if the regions being added intersect."""
+        if isinstance(other, Interval):
+            if self.intersects(other):
+                raise IntervalError("Tried to add overlapping interval "
+                                    "to this set")
+            self.tree.insert(rbtree.RBNode(other.start, other.end, other))
+        else:
+            for x in other:
+                self.__iadd__(x)
+        return self
+
+    def iadd_nocheck(self, Interval other not None):
+        """Inplace add -- modifies self.
+        'Optimized' version that doesn't check for intersection and
+        only inserts the new interval into the tree."""
+        self.tree.insert(rbtree.RBNode(other.start, other.end, other))
+
+    def __isub__(self, Interval other not None):
+        """Inplace subtract -- modifies self
+
+        Removes an interval from the set.  Must exist exactly
+        as provided -- cannot remove a subset of an existing interval."""
+        i = self.tree.find(other.start, other.end)
+        if i is None:
+            raise IntervalError("interval " + str(other) + " not in tree")
+        self.tree.delete(i)
+        return self
+
+    def __add__(self, other not None):
+        """Add -- returns a new object"""
+        new = IntervalSet(self)
+        new += IntervalSet(other)
+        return new
+
+    def __and__(self, other not None):
+        """
+        Compute a new IntervalSet from the intersection of this
+        IntervalSet with one other interval.
+
+        Output intervals are built as subsets of the intervals in the
+        first argument (self).
+        """
+        out = IntervalSet()
+        for i in self.intersection(other):
+            out.tree.insert(rbtree.RBNode(i.start, i.end, i))
+        return out
+
+    def intersection(self, Interval interval not None, orig = False):
+        """
+        Compute a sequence of intervals that correspond to the
+        intersection between `self` and the provided interval.
+        Returns a generator that yields each of these intervals
+        in turn.
+
+        Output intervals are built as subsets of the intervals in the
+        first argument (self).
+
+        If orig = True, also return the original interval that was
+        (potentially) subsetted to make the one that is being
+        returned.
+        """
+        if orig:
+            for n in self.tree.intersect(interval.start, interval.end):
+                i = n.obj
+                subset = i.subset(max(i.start, interval.start),
+                                  min(i.end, interval.end))
+                yield (subset, i)
+        else:
+            for n in self.tree.intersect(interval.start, interval.end):
+                i = n.obj
+                subset = i.subset(max(i.start, interval.start),
+                                  min(i.end, interval.end))
+                yield subset
+
+    cpdef intersects(self, Interval other):
+        """Return True if this IntervalSet intersects another interval"""
+        for n in self.tree.intersect(other.start, other.end):
+            if n.obj.intersects(other):
+                return True
+        return False
+
+    def find_end(self, timestamp_t t):
+        """
+        Return an Interval from this tree that ends at time t, or
+        None if it doesn't exist.
+        """
+        n = self.tree.find_left_end(t)
+        if n and n.obj.end == t:
+            return n.obj
+        return None
--- a/nilmdb/server/interval.pyxdep
+++ b/nilmdb/server/interval.pyxdep
@@ -0,0 +1 @@
+rbtree.pxd
--- a/nilmdb/server/nilmdb.py
+++ b/nilmdb/server/nilmdb.py
@@ -0,0 +1,683 @@
+# -*- coding: utf-8 -*-
+
+"""NilmDB
+
+Object that represents a NILM database file.
+
+Manages both the SQL database and the table storage backend.
+"""
+
+# Need absolute_import so that "import nilmdb" won't pull in
+# nilmdb.py, but will pull the parent nilmdb module instead.
+from __future__ import absolute_import
+import nilmdb.utils
+from nilmdb.utils.printf import *
+from nilmdb.utils.time import timestamp_to_string
+
+from nilmdb.utils.interval import IntervalError
+from nilmdb.server.interval import Interval, DBInterval, IntervalSet
+
+from nilmdb.server import bulkdata
+from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
+
+import sqlite3
+import os
+import errno
+import bisect
+
+# Note about performance and transactions:
+#
+# Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
+# takes about 125msec.  sqlite3 will commit transactions at 3 times:
+# 1: explicit con.commit()
+# 2: between a series of DML commands and non-DML commands, e.g.
+#    after a series of INSERT, SELECT, but before a CREATE TABLE or PRAGMA.
+# 3: at the end of an explicit transaction, e.g. "with self.con as con:"
+#
+# To speed things up, we can set 'PRAGMA synchronous=OFF'.  Or, it
+# seems that 'PRAGMA synchronous=NORMAL' and 'PRAGMA journal_mode=WAL'
+# give an equivalent speedup more safely.  That is what is used here.
+_sql_schema_updates = {
+    0: { "next": 1, "sql": """
+    -- All streams
+    CREATE TABLE streams(
+        id INTEGER PRIMARY KEY,		-- stream ID
+        path TEXT UNIQUE NOT NULL,	-- path, e.g. '/newton/prep'
+        layout TEXT NOT NULL		-- layout name, e.g. float32_8
+    );
+
+    -- Individual timestamped ranges in those streams.
+    -- For a given start_time and end_time, this tells us that the
+    -- data is stored between start_pos and end_pos.
+    -- Times are stored as μs since Unix epoch
+    -- Positions are opaque: PyTables rows, file offsets, etc.
+    --
+    -- Note: end_pos points to the row _after_ end_time, so end_pos-1
+    -- is the last valid row.
+    CREATE TABLE ranges(
+        stream_id INTEGER NOT NULL,
+        start_time INTEGER NOT NULL,
+        end_time INTEGER NOT NULL,
+        start_pos INTEGER NOT NULL,
+        end_pos INTEGER NOT NULL
+    );
+    CREATE INDEX _ranges_index ON ranges (stream_id, start_time, end_time);
+    """ },
+
+    1: { "next": 3, "sql": """
+    -- Generic dictionary-type metadata that can be associated with a stream
+    CREATE TABLE metadata(
+    	stream_id INTEGER NOT NULL,
+        key TEXT NOT NULL,
+        value TEXT
+    );
+    """ },
+
+    2: { "error": "old format with floating-point timestamps requires "
+         "nilmdb 1.3.1 or older" },
+
+    3: { "next": None },
+}
+
+@nilmdb.utils.must_close()
+class NilmDB(object):
+    verbose = 0
+
+    def __init__(self, basepath, max_results=None,
+                 max_removals=None, bulkdata_args=None):
+        """Initialize NilmDB at the given basepath.
+        Other arguments are for debugging / testing:
+
+        'max_results' is the max rows to send in a single
+        stream_intervals or stream_extract response.
+
+        'max_removals' is the max rows to delete at once
+        in stream_move.
+
+        'bulkdata_args' is kwargs for the bulkdata module.
+        """
+        if bulkdata_args is None:
+            bulkdata_args = {}
+
+        # set up path
+        self.basepath = os.path.abspath(basepath)
+
+        # Create the database path if it doesn't exist
+        try:
+            os.makedirs(self.basepath)
+        except OSError as e:
+            if e.errno != errno.EEXIST: # pragma: no cover
+                # (no coverage, because it's hard to trigger this case
+                # if tests are run as root)
+                raise IOError("can't create tree " + self.basepath)
+
+        # Our data goes inside it
+        self.data = bulkdata.BulkData(self.basepath, **bulkdata_args)
+
+        # SQLite database too
+        sqlfilename = os.path.join(self.basepath, "data.sql")
+        self.con = sqlite3.connect(sqlfilename, check_same_thread = True)
+        try:
+            self._sql_schema_update()
+        except Exception: # pragma: no cover
+            self.data.close()
+            raise
+
+        # See big comment at top about the performance implications of this
+        self.con.execute("PRAGMA synchronous=NORMAL")
+        self.con.execute("PRAGMA journal_mode=WAL")
+
+        # Approximate largest number of elements that we want to send
+        # in a single reply (for stream_intervals, stream_extract).
+        self.max_results = max_results or 16384
+
+        # Remove up to this many rows per call to stream_remove.
+        self.max_removals = max_removals or 1048576
+
+    def get_basepath(self):
+        return self.basepath
+
+    def close(self):
+        if self.con:
+            self.con.commit()
+            self.con.close()
+        self.data.close()
+
+    def _sql_schema_update(self):
+        cur = self.con.cursor()
+        version = cur.execute("PRAGMA user_version").fetchone()[0]
+        oldversion = version
+
+        while True:
+            if version not in _sql_schema_updates: # pragma: no cover
+                raise Exception(self.basepath + ": unknown database version "
+                                + str(version))
+            update = _sql_schema_updates[version]
+            if "error" in update: # pragma: no cover
+                raise Exception(self.basepath + ": can't use database version "
+                                + str(version) + ": " + update["error"])
+            if update["next"] is None:
+                break
+            cur.executescript(update["sql"])
+            version = update["next"]
+            if self.verbose: # pragma: no cover
+                printf("Database schema updated to %d\n", version)
+
+        if version != oldversion:
+            with self.con:
+                cur.execute("PRAGMA user_version = {v:d}".format(v=version))
+
+    def _check_user_times(self, start, end):
+        if start is None:
+            start = nilmdb.utils.time.min_timestamp
+        if end is None:
+            end = nilmdb.utils.time.max_timestamp
+        if start >= end:
+            raise NilmDBError("start must precede end")
+        return (start, end)
+
+    @nilmdb.utils.lru_cache(size = 64)
+    def _get_intervals(self, stream_id):
+        """
+        Return a mutable IntervalSet corresponding to the given stream ID.
+        """
+        iset = IntervalSet()
+        result = self.con.execute("SELECT start_time, end_time, "
+                                  "start_pos, end_pos "
+                                  "FROM ranges "
+                                  "WHERE stream_id=?", (stream_id,))
+        try:
+            for (start_time, end_time, start_pos, end_pos) in result:
+                iset += DBInterval(start_time, end_time,
+                                   start_time, end_time,
+                                   start_pos, end_pos)
+        except IntervalError: # pragma: no cover
+            raise NilmDBError("unexpected overlap in ranges table!")
+
+        return iset
+
+    def _sql_interval_insert(self, id, start, end, start_pos, end_pos):
+        """Helper that adds interval to the SQL database only"""
+        self.con.execute("INSERT INTO ranges "
+                         "(stream_id,start_time,end_time,start_pos,end_pos) "
+                         "VALUES (?,?,?,?,?)",
+                         (id, start, end, start_pos, end_pos))
+
+    def _sql_interval_delete(self, id, start, end, start_pos, end_pos):
+        """Helper that removes interval from the SQL database only"""
+        self.con.execute("DELETE FROM ranges WHERE "
+                         "stream_id=? AND start_time=? AND "
+                         "end_time=? AND start_pos=? AND end_pos=?",
+                         (id, start, end, start_pos, end_pos))
+
+    def _add_interval(self, stream_id, interval, start_pos, end_pos):
+        """
+        Add interval to the internal interval cache, and to the database.
+        Note: arguments must be ints (not numpy.int64, etc)
+        """
+        # Load this stream's intervals
+        iset = self._get_intervals(stream_id)
+
+        # Check for overlap
+        if iset.intersects(interval): # pragma: no cover (gets caught earlier)
+            raise NilmDBError("new interval overlaps existing data")
+
+        # Check for adjacency.  If there's a stream in the database
+        # that ends exactly when this one starts, and the database
+        # rows match up, we can make one interval that covers the
+        # time range [adjacent.start -> interval.end)
+        # and database rows [ adjacent.start_pos -> end_pos ].
+        # Only do this if the resulting interval isn't too large.
+        max_merged_rows = 8000 * 60 * 60 * 1.05 # 1.05 hours at 8 KHz
+        adjacent = iset.find_end(interval.start)
+        if (adjacent is not None and
+            start_pos == adjacent.db_endpos and
+            (end_pos - adjacent.db_startpos) < max_merged_rows):
+            # First delete the old one, both from our iset and the
+            # database
+            iset -= adjacent
+            self._sql_interval_delete(stream_id,
+                                      adjacent.db_start, adjacent.db_end,
+                                      adjacent.db_startpos, adjacent.db_endpos)
+
+            # Now update our interval so the fallthrough add is
+            # correct.
+            interval.start = adjacent.start
+            start_pos = adjacent.db_startpos
+
+        # Add the new interval to the iset
+        iset.iadd_nocheck(DBInterval(interval.start, interval.end,
+                                     interval.start, interval.end,
+                                     start_pos, end_pos))
+
+        # Insert into the database
+        self._sql_interval_insert(stream_id, interval.start, interval.end,
+                                  int(start_pos), int(end_pos))
+
+        self.con.commit()
+
+    def _remove_interval(self, stream_id, original, remove):
+        """
+        Remove an interval from the internal cache and the database.
+
+        stream_id: id of stream
+         original: original DBInterval; must be already present in DB
+        to_remove: DBInterval to remove; must be subset of 'original'
+        """
+        # Just return if we have nothing to remove
+        if remove.start == remove.end: # pragma: no cover
+            return
+
+        # Load this stream's intervals
+        iset = self._get_intervals(stream_id)
+
+        # Remove existing interval from the cached set and the database
+        iset -= original
+        self._sql_interval_delete(stream_id,
+                                  original.db_start, original.db_end,
+                                  original.db_startpos, original.db_endpos)
+
+        # Add back the intervals that would be left over if the
+        # requested interval is removed.  There may be two of them, if
+        # the removed piece was in the middle.
+        def add(iset, start, end, start_pos, end_pos):
+            iset += DBInterval(start, end, start, end, start_pos, end_pos)
+            self._sql_interval_insert(stream_id, start, end, start_pos, end_pos)
+
+        if original.start != remove.start:
+            # Interval before the removed region
+            add(iset, original.start, remove.start,
+                original.db_startpos, remove.db_startpos)
+
+        if original.end != remove.end:
+            # Interval after the removed region
+            add(iset, remove.end, original.end,
+                remove.db_endpos, original.db_endpos)
+
+        # Commit SQL changes
+        self.con.commit()
+
+        return
+
+    def stream_list(self, path = None, layout = None, extended = False):
+        """Return list of lists of all streams in the database.
+
+        If path is specified, include only streams with a path that
+        matches the given string.
+
+        If layout is specified, include only streams with a layout
+        that matches the given string.
+
+        If extended = False, returns a list of lists containing
+        the path and layout: [ path, layout ]
+
+        If extended = True, returns a list of lists containing
+        more information:
+           path
+           layout
+           interval_min (earliest interval start)
+           interval_max (latest interval end)
+           rows         (total number of rows of data)
+           time         (total time covered by this stream, in timestamp units)
+        """
+        params = ()
+        query = "SELECT streams.path, streams.layout"
+        if extended:
+            query += ", min(ranges.start_time), max(ranges.end_time) "
+            query += ", coalesce(sum(ranges.end_pos - ranges.start_pos), 0) "
+            query += ", coalesce(sum(ranges.end_time - ranges.start_time), 0) "
+        query += " FROM streams"
+        if extended:
+            query += " LEFT JOIN ranges ON streams.id = ranges.stream_id"
+        query += " WHERE 1=1"
+        if layout is not None:
+            query += " AND streams.layout=?"
+            params += (layout,)
+        if path is not None:
+            query += " AND streams.path=?"
+            params += (path,)
+        query += " GROUP BY streams.id ORDER BY streams.path"
+        result = self.con.execute(query, params).fetchall()
+        return [ list(x) for x in result ]
+
+    def stream_intervals(self, path, start = None, end = None, diffpath = None):
+        """
+        List all intervals in 'path' between 'start' and 'end'.  If
+        'diffpath' is not none, list instead the set-difference
+        between the intervals in the two streams; i.e. all interval
+        ranges that are present in 'path' but not 'diffpath'.
+
+        Returns (intervals, restart) tuple.
+
+        'intervals' is a list of [start,end] timestamps of all intervals
+        that exist for path, between start and end.
+
+        'restart', if not None, means that there were too many results
+        to return in a single request.  The data is complete from the
+        starting timestamp to the point at which it was truncated, and
+        a new request with a start time of 'restart' will fetch the
+        next block of data.
+        """
+        stream_id = self._stream_id(path)
+        intervals = self._get_intervals(stream_id)
+        if diffpath:
+            diffstream_id = self._stream_id(diffpath)
+            diffintervals = self._get_intervals(diffstream_id)
+        (start, end) = self._check_user_times(start, end)
+        requested = Interval(start, end)
+        result = []
+        if diffpath:
+            getter = nilmdb.utils.interval.set_difference(
+                intervals.intersection(requested),
+                diffintervals.intersection(requested))
+        else:
+            getter = intervals.intersection(requested)
+        for n, i in enumerate(getter):
+            if n >= self.max_results:
+                restart = i.start
+                break
+            result.append([i.start, i.end])
+        else:
+            restart = None
+        return (result, restart)
+
+    def stream_create(self, path, layout_name):
+        """Create a new table in the database.
+
+        path: path to the data (e.g. '/newton/prep').
+        Paths must contain at least two elements, e.g.:
+           /newton/prep
+           /newton/raw
+           /newton/upstairs/prep
+           /newton/upstairs/raw
+
+        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
+        """
+        # Create the bulk storage.  Raises ValueError on error, which we
+        # pass along.
+        self.data.create(path, layout_name)
+
+        # Insert into SQL database once the bulk storage is happy
+        with self.con as con:
+            con.execute("INSERT INTO streams (path, layout) VALUES (?,?)",
+                        (path, layout_name))
+
+    def _stream_id(self, path):
+        """Return unique stream ID"""
+        result = self.con.execute("SELECT id FROM streams WHERE path=?",
+                                  (path,)).fetchone()
+        if result is None:
+            raise StreamError("No stream at path " + path)
+        return result[0]
+
+    def stream_set_metadata(self, path, data):
+        """Set stream metadata from a dictionary, e.g.
+           { description = 'Downstairs lighting',
+             v_scaling = 123.45 }
+           This replaces all existing metadata.
+           """
+        stream_id = self._stream_id(path)
+        with self.con as con:
+            con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
+            for key in data:
+                if data[key] != '':
+                    con.execute("INSERT INTO metadata VALUES (?, ?, ?)",
+                                (stream_id, key, data[key]))
+
+    def stream_get_metadata(self, path):
+        """Return stream metadata as a dictionary."""
+        stream_id = self._stream_id(path)
+        result = self.con.execute("SELECT metadata.key, metadata.value "
+                                  "FROM metadata "
+                                  "WHERE metadata.stream_id=?", (stream_id,))
+        data = {}
+        for (key, value) in result:
+            data[key] = value
+        return data
+
+    def stream_update_metadata(self, path, newdata):
+        """Update stream metadata from a dictionary"""
+        data = self.stream_get_metadata(path)
+        data.update(newdata)
+        self.stream_set_metadata(path, data)
+
+    def stream_rename(self, oldpath, newpath):
+        """Rename a stream."""
+        stream_id = self._stream_id(oldpath)
+
+        # Rename the data
+        self.data.rename(oldpath, newpath)
+
+        # Rename the stream in the database
+        with self.con as con:
+            con.execute("UPDATE streams SET path=? WHERE id=?",
+                        (newpath, stream_id))
+
+    def stream_destroy(self, path):
+        """Fully remove a table from the database.  Fails if there are
+        any intervals data present; remove them first.  Metadata is
+        also removed."""
+        stream_id = self._stream_id(path)
+
+        # Verify that no intervals are present, and clear the cache
+        iset = self._get_intervals(stream_id)
+        if len(iset):
+            raise NilmDBError("all intervals must be removed before "
+                              "destroying a stream")
+        self._get_intervals.cache_remove(self, stream_id)
+
+        # Delete the bulkdata storage
+        self.data.destroy(path)
+
+        # Delete metadata, stream, intervals (should be none)
+        with self.con as con:
+            con.execute("DELETE FROM metadata WHERE stream_id=?", (stream_id,))
+            con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,))
+            con.execute("DELETE FROM streams WHERE id=?", (stream_id,))
+
+    def stream_insert(self, path, start, end, data, binary = False):
+        """Insert new data into the database.
+           path: Path at which to add the data
+           start: Starting timestamp
+           end: Ending timestamp
+           data: Textual data, formatted according to the layout of path
+
+           'binary', if True, means that 'data' is raw binary:
+           little-endian, matching the current table's layout,
+           including the int64 timestamp.
+           """
+        # First check for basic overlap using timestamp info given.
+        stream_id = self._stream_id(path)
+        iset = self._get_intervals(stream_id)
+        interval = Interval(start, end)
+        if iset.intersects(interval):
+            raise OverlapError("new data overlaps existing data at range: "
+                               + str(iset & interval))
+
+        # Tenatively append the data.  This will raise a ValueError if
+        # there are any parse errors.
+        table = self.data.getnode(path)
+        row_start = table.nrows
+        table.append_data(data, start, end, binary)
+        row_end = table.nrows
+
+        # Insert the record into the sql database.
+        self._add_interval(stream_id, interval, row_start, row_end)
+
+        # And that's all
+        return
+
+    def _find_start(self, table, dbinterval):
+        """
+        Given a DBInterval, find the row in the database that
+        corresponds to the start time.  Return the first database
+        position with a timestamp (first element) greater than or
+        equal to 'start'.
+        """
+        # Optimization for the common case where an interval wasn't truncated
+        if dbinterval.start == dbinterval.db_start:
+            return dbinterval.db_startpos
+        return bisect.bisect_left(table,
+                                  dbinterval.start,
+                                  dbinterval.db_startpos,
+                                  dbinterval.db_endpos)
+
+    def _find_end(self, table, dbinterval):
+        """
+        Given a DBInterval, find the row in the database that follows
+        the end time.  Return the first database position after the
+        row with timestamp (first element) greater than or equal
+        to 'end'.
+        """
+        # Optimization for the common case where an interval wasn't truncated
+        if dbinterval.end == dbinterval.db_end:
+            return dbinterval.db_endpos
+        # Note that we still use bisect_left here, because we don't
+        # want to include the given timestamp in the results.  This is
+        # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
+        # non-overlapping data.
+        return bisect.bisect_left(table,
+                                  dbinterval.end,
+                                  dbinterval.db_startpos,
+                                  dbinterval.db_endpos)
+
+    def stream_extract(self, path, start = None, end = None,
+                       count = False, markup = False, binary = False):
+        """
+        Returns (data, restart) tuple.
+
+        'data' is ASCII-formatted data from the database, formatted
+        according to the layout of the stream.
+
+        'restart', if not None, means that there were too many results to
+        return in a single request.  The data is complete from the
+        starting timestamp to the point at which it was truncated,
+        and a new request with a start time of 'restart' will fetch
+        the next block of data.
+
+        'count', if true, means to not return raw data, but just the count
+        of rows that would have been returned.  This is much faster
+        than actually fetching the data.  It is not limited by
+        max_results.
+
+        'markup', if true, indicates that returned data should be
+        marked with a comment denoting when a particular interval
+        starts, and another comment when an interval ends.
+
+        'binary', if true, means to return raw binary rather than
+        ASCII-formatted data.
+        """
+        stream_id = self._stream_id(path)
+        table = self.data.getnode(path)
+        intervals = self._get_intervals(stream_id)
+        (start, end) = self._check_user_times(start, end)
+        requested = Interval(start, end)
+        result = []
+        matched = 0
+        remaining = self.max_results
+        restart = None
+        if binary and (markup or count):
+            raise NilmDBError("binary mode can't be used with markup or count")
+        for interval in intervals.intersection(requested):
+            # Reading single rows from the table is too slow, so
+            # we use two bisections to find both the starting and
+            # ending row for this particular interval, then
+            # read the entire range as one slice.
+            row_start = self._find_start(table, interval)
+            row_end = self._find_end(table, interval)
+
+            if count:
+                matched += row_end - row_start
+                continue
+
+            # Shorten it if we'll hit the maximum number of results
+            row_max = row_start + remaining
+            if row_max < row_end:
+                row_end = row_max
+                restart = table[row_max]
+
+            # Add markup
+            if markup:
+                result.append("# interval-start " +
+                              timestamp_to_string(interval.start) + "\n")
+
+            # Gather these results up
+            result.append(table.get_data(row_start, row_end, binary))
+
+            # Count them
+            remaining -= row_end - row_start
+
+            # Add markup, and exit if restart is set.
+            if restart is not None:
+                if markup:
+                    result.append("# interval-end " +
+                                  timestamp_to_string(restart) + "\n")
+                break
+            if markup:
+                result.append("# interval-end " +
+                              timestamp_to_string(interval.end) + "\n")
+
+        if count:
+            return matched
+        return ("".join(result), restart)
+
+    def stream_remove(self, path, start = None, end = None):
+        """
+        Remove data from the specified time interval within a stream.
+
+        Removes data in the interval [start, end), and intervals are
+        truncated or split appropriately.
+
+        Returns a (removed, restart) tuple.
+
+        'removed' is the number of data points that were removed.
+
+        'restart', if not None, means there were too many rows to
+        remove in a single request.  This function should be called
+        again with a start time of 'restart' to complete the removal.
+        """
+        stream_id = self._stream_id(path)
+        table = self.data.getnode(path)
+        intervals = self._get_intervals(stream_id)
+        (start, end) = self._check_user_times(start, end)
+        to_remove = Interval(start, end)
+        removed = 0
+        remaining = self.max_removals
+        restart = None
+
+        # Can't remove intervals from within the iterator, so we need to
+        # remember what's currently in the intersection now.
+        all_candidates = list(intervals.intersection(to_remove, orig = True))
+
+        for (dbint, orig) in all_candidates:
+            # Find row start and end
+            row_start = self._find_start(table, dbint)
+            row_end = self._find_end(table, dbint)
+
+            # Shorten it if we'll hit the maximum number of removals
+            row_max = row_start + remaining
+            if row_max < row_end:
+                row_end = row_max
+                dbint.end = table[row_max]
+                restart = dbint.end
+
+            # Adjust the DBInterval to match the newly found ends
+            dbint.db_start = dbint.start
+            dbint.db_end = dbint.end
+            dbint.db_startpos = row_start
+            dbint.db_endpos = row_end
+
+            # Remove interval from the database
+            self._remove_interval(stream_id, orig, dbint)
+
+            # Remove data from the underlying table storage
+            table.remove(row_start, row_end)
+
+            # Count how many were removed
+            removed += row_end - row_start
+            remaining -= row_end - row_start
+
+            if restart is not None:
+                break
+
+        return (removed, restart)
--- a/nilmdb/server/rbtree.pxd
+++ b/nilmdb/server/rbtree.pxd
@@ -0,0 +1,23 @@
+cdef class RBNode:
+    cdef public object obj
+    cdef public double start, end
+    cdef public int red
+    cdef public RBNode left, right, parent
+
+cdef class RBTree:
+    cdef public RBNode nil, root
+
+    cpdef getroot(RBTree self)
+    cdef void __rotate_left(RBTree self, RBNode x)
+    cdef void __rotate_right(RBTree self, RBNode y)
+    cdef RBNode __successor(RBTree self, RBNode x)
+    cpdef RBNode successor(RBTree self, RBNode x)
+    cdef RBNode __predecessor(RBTree self, RBNode x)
+    cpdef RBNode predecessor(RBTree self, RBNode x)
+    cpdef insert(RBTree self, RBNode z)
+    cdef void __insert_fixup(RBTree self, RBNode x)
+    cpdef delete(RBTree self, RBNode z)
+    cdef inline void __delete_fixup(RBTree self, RBNode x)
+    cpdef RBNode find(RBTree self, double start, double end)
+    cpdef RBNode find_left_end(RBTree self, double t)
+    cpdef RBNode find_right_start(RBTree self, double t)
--- a/nilmdb/server/rbtree.pyx
+++ b/nilmdb/server/rbtree.pyx
@@ -0,0 +1,377 @@
+# cython: profile=False
+# cython: cdivision=True
+
+"""
+Jim Paris <jim@jtan.com>
+
+Red-black tree, where keys are stored as start/end timestamps.
+This is a basic interval tree that holds half-open intervals:
+  [start, end)
+Intervals must not overlap.  Fixing that would involve making this
+into an augmented interval tree as described in CLRS 14.3.
+
+Code that assumes non-overlapping intervals is marked with the
+string 'non-overlapping'.
+"""
+
+import sys
+cimport rbtree
+
+cdef class RBNode:
+    """One node of the Red/Black tree, containing a key (start, end)
+    and value (obj)"""
+    def __init__(self, double start, double end, object obj = None):
+        self.obj = obj
+        self.start = start
+        self.end = end
+        self.red = False
+        self.left = None
+        self.right = None
+
+    def __str__(self):
+        if self.red:
+            color = "R"
+        else:
+            color = "B"
+        if self.start == sys.float_info.min:
+            return "[node nil]"
+        return ("[node ("
+                + str(self.obj) + ") "
+                + str(self.start) + " -> " + str(self.end) + " "
+                + color + "]")
+
+cdef class RBTree:
+    """Red/Black tree"""
+
+    # Init
+    def __init__(self):
+        self.nil = RBNode(start = sys.float_info.min,
+                          end = sys.float_info.min)
+        self.nil.left = self.nil
+        self.nil.right = self.nil
+        self.nil.parent = self.nil
+
+        self.root = RBNode(start = sys.float_info.max,
+                           end = sys.float_info.max)
+        self.root.left = self.nil
+        self.root.right = self.nil
+        self.root.parent = self.nil
+
+    # We have a dummy root node to simplify operations, so from an
+    # external point of view, its left child is the real root.
+    cpdef getroot(self):
+        return self.root.left
+
+    # Rotations and basic operations
+    cdef void __rotate_left(self, RBNode x):
+        """Rotate left:
+        #   x           y
+        #  / \   -->   / \
+        # z   y       x   w
+        #    / \     / \
+        #   v   w   z   v
+        """
+        cdef RBNode y = x.right
+        x.right = y.left
+        if y.left is not self.nil:
+            y.left.parent = x
+        y.parent = x.parent
+        if x is x.parent.left:
+            x.parent.left = y
+        else:
+            x.parent.right = y
+        y.left = x
+        x.parent = y
+
+    cdef void __rotate_right(self, RBNode y):
+        """Rotate right:
+        #     y           x
+        #    / \   -->   / \
+        #   x   w       z   y
+        #  / \             / \
+        # z   v           v   w
+        """
+        cdef RBNode x = y.left
+        y.left = x.right
+        if x.right is not self.nil:
+            x.right.parent = y
+        x.parent = y.parent
+        if y is y.parent.left:
+            y.parent.left = x
+        else:
+            y.parent.right = x
+        x.right = y
+        y.parent = x
+
+    cdef RBNode __successor(self, RBNode x):
+        """Returns the successor of RBNode x"""
+        cdef RBNode y = x.right
+        if y is not self.nil:
+            while y.left is not self.nil:
+                y = y.left
+        else:
+            y = x.parent
+            while x is y.right:
+                x = y
+                y = y.parent
+            if y is self.root:
+                return self.nil
+        return y
+    cpdef RBNode successor(self, RBNode x):
+        """Returns the successor of RBNode x, or None"""
+        cdef RBNode y = self.__successor(x)
+        return y if y is not self.nil else None
+
+    cdef RBNode __predecessor(self, RBNode x):
+        """Returns the predecessor of RBNode x"""
+        cdef RBNode y = x.left
+        if y is not self.nil:
+            while y.right is not self.nil:
+                y = y.right
+        else:
+            y = x.parent
+            while x is y.left:
+                if y is self.root:
+                    y = self.nil
+                    break
+                x = y
+                y = y.parent
+        return y
+    cpdef RBNode predecessor(self, RBNode x):
+        """Returns the predecessor of RBNode x, or None"""
+        cdef RBNode y = self.__predecessor(x)
+        return y if y is not self.nil else None
+
+    # Insertion
+    cpdef insert(self, RBNode z):
+        """Insert RBNode z into RBTree and rebalance as necessary"""
+        z.left = self.nil
+        z.right = self.nil
+        cdef RBNode y = self.root
+        cdef RBNode x = self.root.left
+        while x is not self.nil:
+            y = x
+            if (x.start > z.start or (x.start == z.start and x.end > z.end)):
+                x = x.left
+            else:
+                x = x.right
+        z.parent = y
+        if (y is self.root or
+            (y.start > z.start or (y.start == z.start and y.end > z.end))):
+            y.left = z
+        else:
+            y.right = z
+        # relabel/rebalance
+        self.__insert_fixup(z)
+
+    cdef void __insert_fixup(self, RBNode x):
+        """Rebalance/fix RBTree after a simple insertion of RBNode x"""
+        x.red = True
+        while x.parent.red:
+            if x.parent is x.parent.parent.left:
+                y = x.parent.parent.right
+                if y.red:
+                    x.parent.red = False
+                    y.red = False
+                    x.parent.parent.red = True
+                    x = x.parent.parent
+                else:
+                    if x is x.parent.right:
+                        x = x.parent
+                        self.__rotate_left(x)
+                    x.parent.red = False
+                    x.parent.parent.red = True
+                    self.__rotate_right(x.parent.parent)
+            else: # same as above, left/right switched
+                y = x.parent.parent.left
+                if y.red:
+                    x.parent.red = False
+                    y.red = False
+                    x.parent.parent.red = True
+                    x = x.parent.parent
+                else:
+                    if x is x.parent.left:
+                        x = x.parent
+                        self.__rotate_right(x)
+                    x.parent.red = False
+                    x.parent.parent.red = True
+                    self.__rotate_left(x.parent.parent)
+        self.root.left.red = False
+
+    # Deletion
+    cpdef delete(self, RBNode z):
+        if z.left is None or z.right is None:
+            raise AttributeError("you can only delete a node object "
+                                 + "from the tree; use find() to get one")
+        cdef RBNode x, y
+        if z.left is self.nil or z.right is self.nil:
+            y = z
+        else:
+            y = self.__successor(z)
+        if y.left is self.nil:
+            x = y.right
+        else:
+            x = y.left
+        x.parent = y.parent
+        if x.parent is self.root:
+            self.root.left = x
+        else:
+            if y is y.parent.left:
+                y.parent.left = x
+            else:
+                y.parent.right = x
+        if y is not z:
+            # y is the node to splice out, x is its child
+            y.left = z.left
+            y.right = z.right
+            y.parent = z.parent
+            z.left.parent = y
+            z.right.parent = y
+            if z is z.parent.left:
+                z.parent.left = y
+            else:
+                z.parent.right = y
+            if not y.red:
+                y.red = z.red
+                self.__delete_fixup(x)
+            else:
+                y.red = z.red
+        else:
+            if not y.red:
+                self.__delete_fixup(x)
+
+    cdef void __delete_fixup(self, RBNode x):
+        """Rebalance/fix RBTree after a deletion.  RBNode x is the
+        child of the spliced out node."""
+        cdef RBNode rootLeft = self.root.left
+        while not x.red and x is not rootLeft:
+            if x is x.parent.left:
+                w = x.parent.right
+                if w.red:
+                    w.red = False
+                    x.parent.red = True
+                    self.__rotate_left(x.parent)
+                    w = x.parent.right
+                if not w.right.red and not w.left.red:
+                    w.red = True
+                    x = x.parent
+                else:
+                    if not w.right.red:
+                        w.left.red = False
+                        w.red = True
+                        self.__rotate_right(w)
+                        w = x.parent.right
+                    w.red = x.parent.red
+                    x.parent.red = False
+                    w.right.red = False
+                    self.__rotate_left(x.parent)
+                    x = rootLeft # exit loop
+            else: # same as above, left/right switched
+                w = x.parent.left
+                if w.red:
+                    w.red = False
+                    x.parent.red = True
+                    self.__rotate_right(x.parent)
+                    w = x.parent.left
+                if not w.left.red and not w.right.red:
+                    w.red = True
+                    x = x.parent
+                else:
+                    if not w.left.red:
+                        w.right.red = False
+                        w.red = True
+                        self.__rotate_left(w)
+                        w = x.parent.left
+                    w.red = x.parent.red
+                    x.parent.red = False
+                    w.left.red = False
+                    self.__rotate_right(x.parent)
+                    x = rootLeft # exit loop
+        x.red = False
+
+    # Walking, searching
+    def __iter__(self):
+        return self.inorder()
+
+    def inorder(self, RBNode x = None):
+        """Generator that performs an inorder walk for the tree
+        rooted at RBNode x"""
+        if x is None:
+            x = self.getroot()
+        while x.left is not self.nil:
+            x = x.left
+        while x is not self.nil:
+            yield x
+            x = self.__successor(x)
+
+    cpdef RBNode find(self, double start, double end):
+        """Return the node with exactly the given start and end."""
+        cdef RBNode x = self.getroot()
+        while x is not self.nil:
+            if start < x.start:
+                x = x.left
+            elif start == x.start:
+                if end == x.end:
+                    break # found it
+                elif end < x.end:
+                    x = x.left
+                else:
+                    x = x.right
+            else:
+                x = x.right
+        return x if x is not self.nil else None
+
+    cpdef RBNode find_left_end(self, double t):
+        """Find the leftmode node with end >= t.  With non-overlapping
+        intervals, this is the first node that might overlap time t.
+
+        Note that this relies on non-overlapping intervals, since
+        it assumes that we can use the endpoints to traverse the
+        tree even though it was created using the start points."""
+        cdef RBNode x = self.getroot()
+        while x is not self.nil:
+            if t < x.end:
+                if x.left is self.nil:
+                    break
+                x = x.left
+            elif t == x.end:
+                break
+            else:
+                if x.right is self.nil:
+                    x = self.__successor(x)
+                    break
+                x = x.right
+        return x if x is not self.nil else None
+
+    cpdef RBNode find_right_start(self, double t):
+        """Find the rightmode node with start <= t.  With non-overlapping
+        intervals, this is the last node that might overlap time t."""
+        cdef RBNode x = self.getroot()
+        while x is not self.nil:
+            if t < x.start:
+                if x.left is self.nil:
+                    x = self.__predecessor(x)
+                    break
+                x = x.left
+            elif t == x.start:
+                break
+            else:
+                if x.right is self.nil:
+                    break
+                x = x.right
+        return x if x is not self.nil else None
+
+    # Intersections
+    def intersect(self, double start, double end):
+        """Generator that returns nodes that overlap the given
+        (start,end) range.  Assumes non-overlapping intervals."""
+        # Start with the leftmode node that ends after start
+        cdef RBNode n = self.find_left_end(start)
+        while n is not None:
+            if n.start >= end:
+                # this node starts after the requested end; we're done
+                break
+            if start < n.end:
+                # this node overlaps our requested area
+                yield n
+            n = self.successor(n)
--- a/nilmdb/server/rbtree.pyxdep
+++ b/nilmdb/server/rbtree.pyxdep
@@ -0,0 +1 @@
+rbtree.pxd
--- a/nilmdb/server/rocket.c
+++ b/nilmdb/server/rocket.c
@@ -0,0 +1,796 @@
+#include <Python.h>
+#include <structmember.h>
+#include <endian.h>
+
+#include <ctype.h>
+#include <stdint.h>
+
+#define __STDC_FORMAT_MACROS
+#include <inttypes.h>
+
+/* Values missing from stdint.h */
+#define UINT8_MIN 0
+#define UINT16_MIN 0
+#define UINT32_MIN 0
+#define UINT64_MIN 0
+
+/* Marker values (if min == max, skip range check) */
+#define FLOAT32_MIN 0
+#define FLOAT32_MAX 0
+#define FLOAT64_MIN 0
+#define FLOAT64_MAX 0
+
+typedef int64_t timestamp_t;
+
+/* Somewhat arbitrary, just so we can use fixed sizes for strings
+   etc. */
+static const int MAX_LAYOUT_COUNT = 1024;
+
+/* Error object and constants */
+static PyObject *ParseError;
+typedef enum {
+	ERR_OTHER,
+	ERR_NON_MONOTONIC,
+	ERR_OUT_OF_INTERVAL,
+} parseerror_code_t;
+static void add_parseerror_codes(PyObject *module)
+{
+	PyModule_AddIntMacro(module, ERR_OTHER);
+	PyModule_AddIntMacro(module, ERR_NON_MONOTONIC);
+	PyModule_AddIntMacro(module, ERR_OUT_OF_INTERVAL);
+}
+
+/* Helpers to raise ParseErrors.  Use "return raise_str(...)" etc. */
+static PyObject *raise_str(int line, int col, int code, const char *string)
+{
+	PyObject *o;
+	o = Py_BuildValue("(iiis)", line, col, code, string);
+	if (o != NULL) {
+		PyErr_SetObject(ParseError, o);
+		Py_DECREF(o);
+	}
+	return NULL;
+}
+static PyObject *raise_int(int line, int col, int code, int64_t num)
+{
+	PyObject *o;
+	o = Py_BuildValue("(iiiL)", line, col, code, (long long)num);
+	if (o != NULL) {
+		PyErr_SetObject(ParseError, o);
+		Py_DECREF(o);
+	}
+	return NULL;
+}
+
+/****
+ * Layout and type helpers
+ */
+typedef union {
+	int8_t i;
+	uint8_t u;
+} union8_t;
+typedef union {
+	int16_t i;
+	uint16_t u;
+} union16_t;
+typedef union {
+	int32_t i;
+	uint32_t u;
+	float f;
+} union32_t;
+typedef union {
+	int64_t i;
+	uint64_t u;
+	double d;
+} union64_t;
+
+typedef enum {
+	LAYOUT_TYPE_NONE,
+	LAYOUT_TYPE_INT8,
+	LAYOUT_TYPE_UINT8,
+	LAYOUT_TYPE_INT16,
+	LAYOUT_TYPE_UINT16,
+	LAYOUT_TYPE_INT32,
+	LAYOUT_TYPE_UINT32,
+	LAYOUT_TYPE_INT64,
+	LAYOUT_TYPE_UINT64,
+	LAYOUT_TYPE_FLOAT32,
+	LAYOUT_TYPE_FLOAT64,
+} layout_type_t;
+
+struct {
+	char *string;
+	layout_type_t layout;
+	int size;
+} type_lookup[] = {
+	{ "int8",    LAYOUT_TYPE_INT8,    1 },
+	{ "uint8",   LAYOUT_TYPE_UINT8,   1 },
+	{ "int16",   LAYOUT_TYPE_INT16,   2 },
+	{ "uint16",  LAYOUT_TYPE_UINT16,  2 },
+	{ "int32",   LAYOUT_TYPE_INT32,   4 },
+	{ "uint32",  LAYOUT_TYPE_UINT32,  4 },
+	{ "int64",   LAYOUT_TYPE_INT64,   8 },
+	{ "uint64",  LAYOUT_TYPE_UINT64,  8 },
+	{ "float32", LAYOUT_TYPE_FLOAT32, 4 },
+	{ "float64", LAYOUT_TYPE_FLOAT64, 8 },
+	{ NULL }
+};
+
+/****
+ * Object definition, init, etc
+ */
+
+/* Rocket object */
+typedef struct {
+	PyObject_HEAD
+	layout_type_t layout_type;
+	int layout_count;
+	int binary_size;
+	FILE *file;
+	int file_size;
+} Rocket;
+
+/* Dealloc / new */
+static void Rocket_dealloc(Rocket *self)
+{
+	if (self->file) {
+		fprintf(stderr, "rocket: file wasn't closed\n");
+		fclose(self->file);
+		self->file = NULL;
+	}
+	self->ob_type->tp_free((PyObject *)self);
+}
+
+static PyObject *Rocket_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+	Rocket *self;
+
+	self = (Rocket *)type->tp_alloc(type, 0);
+	if (!self)
+		return NULL;
+	self->layout_type = LAYOUT_TYPE_NONE;
+	self->layout_count = 0;
+	self->binary_size = 0;
+	self->file = NULL;
+	self->file_size = -1;
+	return (PyObject *)self;
+}
+
+/* .__init__(layout, file) */
+static int Rocket_init(Rocket *self, PyObject *args, PyObject *kwds)
+{
+	const char *layout, *path;
+	static char *kwlist[] = { "layout", "file", NULL };
+	if (!PyArg_ParseTupleAndKeywords(args, kwds, "sz", kwlist,
+					 &layout, &path))
+		return -1;
+	if (!layout)
+		return -1;
+	if (path) {
+		if ((self->file = fopen(path, "a+b")) == NULL) {
+			PyErr_SetFromErrno(PyExc_OSError);
+			return -1;
+		}
+		self->file_size = -1;
+	} else {
+		self->file = NULL;
+	}
+
+	const char *under;
+	char *tmp;
+	under = strchr(layout, '_');
+	if (!under) {
+		PyErr_SetString(PyExc_ValueError, "no such layout: "
+				"badly formatted string");
+		return -1;
+	}
+	self->layout_count = strtoul(under+1, &tmp, 10);
+	if (self->layout_count < 1 || *tmp != '\0') {
+		PyErr_SetString(PyExc_ValueError, "no such layout: "
+				"bad count");
+		return -1;
+	}
+	if (self->layout_count >= MAX_LAYOUT_COUNT) {
+		PyErr_SetString(PyExc_ValueError, "no such layout: "
+				"count too high");
+		return -1;
+	}
+
+	int i;
+	for (i = 0; type_lookup[i].string; i++)
+		if (strncmp(layout, type_lookup[i].string, under-layout) == 0)
+			break;
+	if (!type_lookup[i].string) {
+		PyErr_SetString(PyExc_ValueError, "no such layout: "
+				"bad data type");
+		return -1;
+	}
+	self->layout_type = type_lookup[i].layout;
+	self->binary_size = 8 + (type_lookup[i].size * self->layout_count);
+
+	return 0;
+}
+
+/* .close() */
+static PyObject *Rocket_close(Rocket *self)
+{
+	if (self->file) {
+		fclose(self->file);
+		self->file = NULL;
+	}
+	Py_INCREF(Py_None);
+	return Py_None;
+}
+
+/* .file_size property */
+static PyObject *Rocket_get_file_size(Rocket *self)
+{
+	if (!self->file) {
+		PyErr_SetString(PyExc_AttributeError, "no file");
+		return NULL;
+	}
+	if (self->file_size < 0) {
+		int oldpos;
+		if (((oldpos = ftell(self->file)) < 0) ||
+		    (fseek(self->file, 0, SEEK_END) < 0) ||
+		    ((self->file_size = ftell(self->file)) < 0) ||
+		    (fseek(self->file, oldpos, SEEK_SET) < 0)) {
+			PyErr_SetFromErrno(PyExc_OSError);
+			return NULL;
+		}
+	}
+	return PyInt_FromLong(self->file_size);
+}
+
+/****
+ * Append from string
+ */
+static inline long int strtoll10(const char *nptr, char **endptr) {
+	return strtoll(nptr, endptr, 10);
+}
+static inline long int strtoull10(const char *nptr, char **endptr) {
+	return strtoull(nptr, endptr, 10);
+}
+
+/* .append_string(count, data, offset, linenum, start, end, last_timestamp) */
+static PyObject *Rocket_append_string(Rocket *self, PyObject *args)
+{
+	int count;
+	const char *data;
+	int offset;
+	const char *linestart;
+	int linenum;
+        long long ll1, ll2, ll3;
+	timestamp_t start;
+	timestamp_t end;
+	timestamp_t last_timestamp;
+
+	int written = 0;
+	char *endptr;
+	union8_t t8;
+	union16_t t16;
+	union32_t t32;
+	union64_t t64;
+	int i;
+
+	/* It would be nice to use 't#' instead of 's' for data,
+	   but we need the null termination for strto*.  If we had
+	   strnto* that took a length, we could use t# and not require
+	   a copy. */
+	if (!PyArg_ParseTuple(args, "isiiLLL:append_string", &count,
+			      &data, &offset, &linenum,
+			      &ll1, &ll2, &ll3))
+		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;
+
+	/* Skip spaces, but don't skip over a newline. */
+#define SKIP_BLANK(buf) do {			\
+	while (isspace(*buf)) {			\
+		if (*buf == '\n')		\
+			break;			\
+		buf++;				\
+	} } while(0)
+
+	const char *buf = &data[offset];
+	while (written < count && *buf)
+	{
+		linestart = buf;
+		linenum++;
+
+		/* Skip leading whitespace and commented lines */
+		SKIP_BLANK(buf);
+		if (*buf == '#') {
+			while (*buf && *buf != '\n')
+				buf++;
+			if (*buf)
+				buf++;
+			continue;
+		}
+
+		/* Extract timestamp */
+		t64.i = strtoll(buf, &endptr, 10);
+		if (endptr == buf || !isspace(*endptr)) {
+			/* Try parsing as a double instead */
+			t64.d = strtod(buf, &endptr);
+			if (endptr == buf)
+				goto bad_timestamp;
+			if (!isspace(*endptr))
+				goto cant_parse_value;
+			t64.i = round(t64.d);
+		}
+		if (t64.i <= last_timestamp)
+			return raise_int(linenum, buf - linestart + 1,
+					 ERR_NON_MONOTONIC, t64.i);
+		last_timestamp = t64.i;
+		if (t64.i < start || t64.i >= end)
+			return raise_int(linenum, buf - linestart + 1,
+					 ERR_OUT_OF_INTERVAL, t64.i);
+		t64.u = le64toh(t64.u);
+		if (fwrite(&t64.u, 8, 1, self->file) != 1)
+			goto err;
+		buf = endptr;
+
+		/* Parse all values in the line */
+		switch (self->layout_type) {
+#define CS(type, parsefunc, parsetype, realtype, disktype, letoh, bytes) \
+		case LAYOUT_TYPE_##type:				\
+			/* parse and write in a loop */			\
+			for (i = 0; i < self->layout_count; i++) {	\
+				/* skip non-newlines */			\
+				SKIP_BLANK(buf);			\
+				if (*buf == '\n')			\
+					goto wrong_number_of_values;	\
+				/* parse number */			\
+				parsetype = parsefunc(buf, &endptr);	\
+				if (*endptr && !isspace(*endptr))	\
+					goto cant_parse_value;		\
+				/* check limits */			\
+				if (type##_MIN != type##_MAX &&		\
+				    (parsetype < type##_MIN ||		\
+				     parsetype > type##_MAX))		\
+					goto value_out_of_range;	\
+				/* convert to disk representation */	\
+				realtype = parsetype;			\
+				disktype = letoh(disktype);		\
+				/* write it */				\
+				if (fwrite(&disktype, bytes,		\
+					   1, self->file) != 1)		\
+					goto err;			\
+				/* advance buf */			\
+				buf = endptr;				\
+			}						\
+			/* Skip trailing whitespace and comments */	\
+			SKIP_BLANK(buf);				\
+			if (*buf == '#')				\
+				while (*buf && *buf != '\n')		\
+					buf++;				\
+			if (*buf == '\n')				\
+				buf++;					\
+			else if (*buf != '\0')				\
+				goto extra_data_on_line;		\
+			break
+
+			CS(INT8,   strtoll10,  t64.i, t8.i,  t8.u,         , 1);
+			CS(UINT8,  strtoull10, t64.u, t8.u,  t8.u,         , 1);
+			CS(INT16,  strtoll10,  t64.i, t16.i, t16.u, le16toh, 2);
+			CS(UINT16, strtoull10, t64.u, t16.u, t16.u, le16toh, 2);
+			CS(INT32,  strtoll10,  t64.i, t32.i, t32.u, le32toh, 4);
+			CS(UINT32, strtoull10, t64.u, t32.u, t32.u, le32toh, 4);
+			CS(INT64,  strtoll10,  t64.i, t64.i, t64.u, le64toh, 8);
+			CS(UINT64, strtoull10, t64.u, t64.u, t64.u, le64toh, 8);
+			CS(FLOAT32, strtod,   t64.d, t32.f, t32.u, le32toh, 4);
+			CS(FLOAT64, strtod,   t64.d, t64.d, t64.u, le64toh, 8);
+#undef CS
+		default:
+			PyErr_SetString(PyExc_TypeError, "unknown type");
+			return NULL;
+		}
+
+		/* Done this line */
+		written++;
+	}
+
+	fflush(self->file);
+
+	/* Build return value and return */
+	offset = buf - data;
+	PyObject *o;
+	o = Py_BuildValue("(iiLi)", written, offset,
+                          (long long)last_timestamp, linenum);
+	return o;
+err:
+	PyErr_SetFromErrno(PyExc_OSError);
+	return NULL;
+bad_timestamp:
+	return raise_str(linenum, buf - linestart + 1,
+			 ERR_OTHER, "bad timestamp");
+cant_parse_value:
+	return raise_str(linenum, buf - linestart + 1,
+			 ERR_OTHER, "can't parse value");
+wrong_number_of_values:
+	return raise_str(linenum, buf - linestart + 1,
+			 ERR_OTHER, "wrong number of values");
+value_out_of_range:
+	return raise_str(linenum, buf - linestart + 1,
+			 ERR_OTHER, "value out of range");
+extra_data_on_line:
+	return raise_str(linenum, buf - linestart + 1,
+			 ERR_OTHER, "extra data on line");
+}
+
+/****
+ * Append from binary data
+ */
+
+/* .append_binary(count, data, offset, linenum, start, end, last_timestamp) */
+static PyObject *Rocket_append_binary(Rocket *self, PyObject *args)
+{
+        int count;
+	const uint8_t *data;
+        int data_len;
+        int linenum;
+	int offset;
+        long long ll1, ll2, ll3;
+	timestamp_t start;
+	timestamp_t end;
+	timestamp_t last_timestamp;
+
+	if (!PyArg_ParseTuple(args, "it#iiLLL:append_binary",
+                              &count, &data, &data_len, &offset,
+                              &linenum, &ll1, &ll2, &ll3))
+		return NULL;
+        start = ll1;
+        end = ll2;
+        last_timestamp = ll3;
+
+        /* Advance to offset */
+        if (offset > data_len)
+                return raise_str(0, 0, ERR_OTHER, "bad offset");
+        data += offset;
+        data_len -= offset;
+
+        /* Figure out max number of rows to insert */
+        int rows = data_len / self->binary_size;
+        if (rows > count)
+                rows = count;
+
+        /* Check timestamps */
+        timestamp_t ts;
+	int i;
+        for (i = 0; i < rows; i++) {
+                /* Read raw timestamp, byteswap if needed */
+                memcpy(&ts, &data[i * self->binary_size], 8);
+                ts = le64toh(ts);
+
+                /* Check limits */
+                if (ts <= last_timestamp)
+                        return raise_int(i, 0, ERR_NON_MONOTONIC, ts);
+                last_timestamp = ts;
+                if (ts < start || ts >= end)
+                        return raise_int(i, 0, ERR_OUT_OF_INTERVAL, ts);
+        }
+
+        /* Write binary data */
+        if (fwrite(data, self->binary_size, rows, self->file) != rows) {
+                PyErr_SetFromErrno(PyExc_OSError);
+                return NULL;
+        }
+	fflush(self->file);
+
+	/* Build return value and return */
+	PyObject *o;
+	o = Py_BuildValue("(iiLi)", rows, offset + rows * self->binary_size,
+                          (long long)last_timestamp, linenum);
+	return o;
+}
+
+/****
+ * Extract to string
+ */
+
+static PyObject *Rocket_extract_string(Rocket *self, PyObject *args)
+{
+	long count;
+	long offset;
+
+	if (!PyArg_ParseTuple(args, "ll", &offset, &count))
+		return NULL;
+	if (!self->file) {
+		PyErr_SetString(PyExc_Exception, "no file");
+		return NULL;
+	}
+	/* Seek to target location */
+	if (fseek(self->file, offset, SEEK_SET) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	char *str = NULL, *new;
+	long len_alloc = 0;
+	long len = 0;
+	int ret;
+
+	/* min space free in string (and the maximum length of one
+	   line); this is generous */
+	const int min_free = 32 * MAX_LAYOUT_COUNT;
+
+	/* how much to allocate at once */
+	const int alloc_size = 1048576;
+
+	int row, i;
+	union8_t t8;
+	union16_t t16;
+	union32_t t32;
+	union64_t t64;
+	for (row = 0; row < count; row++) {
+		/* Make sure there's space for a line */
+		if ((len_alloc - len) < min_free) {
+			/* grow by 1 meg at a time */
+			len_alloc += alloc_size;
+			new = realloc(str, len_alloc);
+			if (new == NULL)
+				goto err;
+			str = new;
+		}
+
+		/* Read and print timestamp */
+		if (fread(&t64.u, 8, 1, self->file) != 1)
+			goto err;
+		t64.u = le64toh(t64.u);
+		ret = sprintf(&str[len], "%" PRId64, t64.i);
+		if (ret <= 0)
+			goto err;
+		len += ret;
+
+		/* Read and print values */
+		switch (self->layout_type) {
+#define CASE(type, fmt, fmttype, disktype, letoh, bytes)		\
+		case LAYOUT_TYPE_##type:				\
+			/* read and format in a loop */			\
+			for (i = 0; i < self->layout_count; i++) {	\
+				if (fread(&disktype, bytes,		\
+					  1, self->file) != 1)		\
+					goto err;			\
+				disktype = letoh(disktype);		\
+				ret = sprintf(&str[len], " " fmt,	\
+					      fmttype);			\
+				if (ret <= 0)				\
+					goto err;			\
+				len += ret;				\
+			}						\
+			break
+			CASE(INT8,   "%" PRId8,  t8.i,  t8.u,         , 1);
+			CASE(UINT8,  "%" PRIu8,  t8.u,  t8.u,         , 1);
+			CASE(INT16,  "%" PRId16, t16.i, t16.u, le16toh, 2);
+			CASE(UINT16, "%" PRIu16, t16.u, t16.u, le16toh, 2);
+			CASE(INT32,  "%" PRId32, t32.i, t32.u, le32toh, 4);
+			CASE(UINT32, "%" PRIu32, t32.u, t32.u, le32toh, 4);
+			CASE(INT64,  "%" PRId64, t64.i, t64.u, le64toh, 8);
+			CASE(UINT64, "%" PRIu64, t64.u, t64.u, le64toh, 8);
+			/* These next two are a bit debatable.  floats
+			   are 6-9 significant figures, so we print 7.
+			   Doubles are 15-19, so we print 17.  This is
+			   similar to the old prep format for float32.
+			*/
+			CASE(FLOAT32, "%.6e",  t32.f, t32.u, le32toh, 4);
+			CASE(FLOAT64, "%.16e", t64.d, t64.u, le64toh, 8);
+#undef CASE
+		default:
+			PyErr_SetString(PyExc_TypeError, "unknown type");
+			if (str) free(str);
+			return NULL;
+		}
+		str[len++] = '\n';
+	}
+
+	PyObject *pystr = PyString_FromStringAndSize(str, len);
+	free(str);
+	return pystr;
+err:
+	if (str) free(str);
+	PyErr_SetFromErrno(PyExc_OSError);
+	return NULL;
+}
+
+/****
+ * Extract to binary string containing raw little-endian binary data
+ */
+static PyObject *Rocket_extract_binary(Rocket *self, PyObject *args)
+{
+	long count;
+	long offset;
+
+	if (!PyArg_ParseTuple(args, "ll", &offset, &count))
+		return NULL;
+	if (!self->file) {
+		PyErr_SetString(PyExc_Exception, "no file");
+		return NULL;
+	}
+	/* Seek to target location */
+	if (fseek(self->file, offset, SEEK_SET) < 0) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+        uint8_t *str;
+        int len = count * self->binary_size;
+        str = malloc(len);
+        if (str == NULL) {
+                PyErr_SetFromErrno(PyExc_OSError);
+                return NULL;
+        }
+
+        /* Data in the file is already in the desired little-endian
+           binary format, so just read it directly. */
+        if (fread(str, self->binary_size, count, self->file) != count) {
+                free(str);
+                PyErr_SetFromErrno(PyExc_OSError);
+                return NULL;
+        }
+
+	PyObject *pystr = PyBytes_FromStringAndSize((char *)str, len);
+	free(str);
+	return pystr;
+}
+
+/****
+ * Extract timestamp
+ */
+static PyObject *Rocket_extract_timestamp(Rocket *self, PyObject *args)
+{
+	long offset;
+	union64_t t64;
+	if (!PyArg_ParseTuple(args, "l", &offset))
+		return NULL;
+	if (!self->file) {
+		PyErr_SetString(PyExc_Exception, "no file");
+		return NULL;
+	}
+
+	/* Seek to target location and read timestamp */
+	if ((fseek(self->file, offset, SEEK_SET) < 0) ||
+	    (fread(&t64.u, 8, 1, self->file) != 1)) {
+		PyErr_SetFromErrno(PyExc_OSError);
+		return NULL;
+	}
+
+	/* Convert and return */
+	t64.u = le64toh(t64.u);
+	return Py_BuildValue("L", (long long)t64.i);
+}
+
+/****
+ * Module and type setup
+ */
+
+static PyGetSetDef Rocket_getsetters[] = {
+	{ "file_size", (getter)Rocket_get_file_size, NULL,
+	  "file size in bytes", NULL },
+	{ NULL },
+};
+
+static PyMemberDef Rocket_members[] = {
+	{ "binary_size", T_INT, offsetof(Rocket, binary_size), 0,
+	  "binary size per row" },
+	{ NULL },
+};
+
+static PyMethodDef Rocket_methods[] = {
+	{ "close",
+          (PyCFunction)Rocket_close, METH_NOARGS,
+	  "close(self)\n\n"
+	  "Close file handle" },
+
+	{ "append_string",
+          (PyCFunction)Rocket_append_string, METH_VARARGS,
+	  "append_string(self, count, data, offset, line, start, end, ts)\n\n"
+          "Parse string and append data.\n"
+	  "\n"
+	  "  count: maximum number of rows to add\n"
+          "  data: string data\n"
+          "  offset: byte offset into data to start parsing\n"
+          "  line: current line number of data\n"
+          "  start: starting timestamp for interval\n"
+          "  end: end timestamp for interval\n"
+          "  ts: last timestamp that was previously parsed\n"
+	  "\n"
+	  "Raises ParseError if timestamps are non-monotonic, outside\n"
+	  "the start/end interval etc.\n"
+	  "\n"
+          "On success, return a tuple:\n"
+          "  added_rows: how many rows were added from the file\n"
+          "  data_offset: current offset into the data string\n"
+          "  last_timestamp: last timestamp we parsed\n"
+          "  linenum: current line number" },
+
+	{ "append_binary",
+	  (PyCFunction)Rocket_append_binary, METH_VARARGS,
+	  "append_binary(self, count, data, offset, line, start, end, ts)\n\n"
+          "Append binary data, which must match the data layout.\n"
+	  "\n"
+	  "  count: maximum number of rows to add\n"
+          "  data: binary data\n"
+          "  offset: byte offset into data to start adding\n"
+          "  line: current line number (unused)\n"
+          "  start: starting timestamp for interval\n"
+          "  end: end timestamp for interval\n"
+          "  ts: last timestamp that was previously parsed\n"
+	  "\n"
+	  "Raises ParseError if timestamps are non-monotonic, outside\n"
+	  "the start/end interval etc.\n"
+	  "\n"
+          "On success, return a tuple:\n"
+          "  added_rows: how many rows were added from the file\n"
+          "  data_offset: current offset into the data string\n"
+          "  last_timestamp: last timestamp we parsed\n"
+          "  linenum: current line number (copied from argument)" },
+
+	{ "extract_string",
+          (PyCFunction)Rocket_extract_string, METH_VARARGS,
+	  "extract_string(self, offset, count)\n\n"
+	  "Extract count rows of data from the file at offset offset.\n"
+	  "Return an ascii formatted string according to the layout" },
+
+	{ "extract_binary",
+	  (PyCFunction)Rocket_extract_binary, METH_VARARGS,
+	  "extract_binary(self, offset, count)\n\n"
+	  "Extract count rows of data from the file at offset offset.\n"
+	  "Return a raw binary string of data matching the data layout." },
+
+	{ "extract_timestamp",
+	  (PyCFunction)Rocket_extract_timestamp, METH_VARARGS,
+	  "extract_timestamp(self, offset)\n\n"
+	  "Extract a single timestamp from the file" },
+
+	{ NULL },
+};
+
+static PyTypeObject RocketType = {
+	PyObject_HEAD_INIT(NULL)
+
+	.tp_name	= "rocket.Rocket",
+	.tp_basicsize	= sizeof(Rocket),
+	.tp_flags	= Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
+
+	.tp_new		= Rocket_new,
+	.tp_dealloc	= (destructor)Rocket_dealloc,
+	.tp_init	= (initproc)Rocket_init,
+	.tp_methods	= Rocket_methods,
+	.tp_members	= Rocket_members,
+	.tp_getset	= Rocket_getsetters,
+
+	.tp_doc		= ("rocket.Rocket(layout, file)\n\n"
+			   "C implementation of the \"rocket\" data parsing\n"
+			   "interface, which translates between the binary\n"
+			   "format on disk and the ASCII or Python list\n"
+			   "format used when communicating with the rest of\n"
+			   "the system.")
+};
+
+static PyMethodDef module_methods[] = {
+	{ NULL },
+};
+
+PyMODINIT_FUNC
+initrocket(void)
+{
+	PyObject *module;
+
+	RocketType.tp_new = PyType_GenericNew;
+	if (PyType_Ready(&RocketType) < 0)
+		return;
+
+	module = Py_InitModule3("rocket", module_methods,
+				"Rocket data parsing and formatting module");
+	Py_INCREF(&RocketType);
+	PyModule_AddObject(module, "Rocket", (PyObject *)&RocketType);
+
+	ParseError = PyErr_NewException("rocket.ParseError", NULL, NULL);
+	Py_INCREF(ParseError);
+	PyModule_AddObject(module, "ParseError", ParseError);
+	add_parseerror_codes(module);
+
+	return;
+}
--- a/nilmdb/server/server.py
+++ b/nilmdb/server/server.py
@@ -0,0 +1,541 @@
+"""CherryPy-based server for accessing NILM database via HTTP"""
+
+# Need absolute_import so that "import nilmdb" won't pull in
+# nilmdb.py, but will pull the nilmdb module instead.
+from __future__ import absolute_import
+import nilmdb.server
+from nilmdb.utils.printf import *
+from nilmdb.server.errors import NilmDBError
+from nilmdb.utils.time import string_to_timestamp
+
+import cherrypy
+import sys
+import os
+import socket
+import simplejson as json
+import decorator
+import psutil
+import traceback
+
+from nilmdb.server.serverutil import (
+    chunked_response,
+    response_type,
+    workaround_cp_bug_1200,
+    exception_to_httperror,
+    CORS_allow,
+    json_to_request_params,
+    json_error_page,
+    cherrypy_start,
+    cherrypy_stop,
+    bool_param,
+    )
+
+# Add CORS_allow tool
+cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+
+class NilmApp(object):
+    def __init__(self, db):
+        self.db = db
+
+# CherryPy apps
+class Root(NilmApp):
+    """Root application for NILM database"""
+
+    def __init__(self, db):
+        super(Root, self).__init__(db)
+
+    # /
+    @cherrypy.expose
+    def index(self):
+        cherrypy.response.headers['Content-Type'] = 'text/plain'
+        msg = sprintf("This is NilmDB version %s, running on host %s.\n",
+                      nilmdb.__version__, socket.getfqdn())
+        return msg
+
+    # /favicon.ico
+    @cherrypy.expose
+    def favicon_ico(self):
+        raise cherrypy.NotFound()
+
+    # /version
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    def version(self):
+        return nilmdb.__version__
+
+    # /dbinfo
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    def dbinfo(self):
+        """Return a dictionary with the database path,
+        size of the database in bytes, and free disk space in bytes"""
+        path = self.db.get_basepath()
+        usage = psutil.disk_usage(path)
+        dbsize = nilmdb.utils.du(path)
+        return { "path": path,
+                 "size": dbsize,
+                 "other": usage.used - dbsize,
+                 "reserved": usage.total - usage.used - usage.free,
+                 "free": usage.free }
+
+class Stream(NilmApp):
+    """Stream-specific operations"""
+
+    # Helpers
+    def _get_times(self, start_param, end_param):
+        (start, end) = (None, None)
+        if start_param is not None:
+            start = string_to_timestamp(start_param)
+        if end_param is not None:
+            end = string_to_timestamp(end_param)
+        if start is not None and end is not None:
+            if start >= end:
+                raise cherrypy.HTTPError(
+                    "400 Bad Request",
+                    sprintf("start must precede end (%s >= %s)",
+                            start_param, end_param))
+        return (start, end)
+
+    # /stream/list
+    # /stream/list?layout=float32_8
+    # /stream/list?path=/newton/prep&extended=1
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    def list(self, path = None, layout = None, extended = None):
+        """List all streams in the database.  With optional path or
+        layout parameter, just list streams that match the given path
+        or layout.
+
+        If extended is missing or zero, returns a list of lists
+        containing the path and layout: [ path, layout ]
+
+        If extended is true, returns a list of lists containing
+        extended info: [ path, layout, extent_min, extent_max,
+        total_rows, total_seconds ].  More data may be added.
+        """
+        return self.db.stream_list(path, layout, bool(extended))
+
+    # /stream/create?path=/newton/prep&layout=float32_8
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError, ValueError)
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    def create(self, path, layout):
+        """Create a new stream in the database.  Provide path
+        and one of the nilmdb.layout.layouts keys.
+        """
+        return self.db.stream_create(path, layout)
+
+    # /stream/destroy?path=/newton/prep
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError)
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    def destroy(self, path):
+        """Delete a stream.  Fails if any data is still present."""
+        return self.db.stream_destroy(path)
+
+    # /stream/rename?oldpath=/newton/prep&newpath=/newton/prep/1
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError, ValueError)
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    def rename(self, oldpath, newpath):
+        """Rename a stream."""
+        return self.db.stream_rename(oldpath, newpath)
+
+    # /stream/get_metadata?path=/newton/prep
+    # /stream/get_metadata?path=/newton/prep&key=foo&key=bar
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    def get_metadata(self, path, key=None):
+        """Get metadata for the named stream.  If optional
+        key parameters are specified, only return metadata
+        matching the given keys."""
+        try:
+            data = self.db.stream_get_metadata(path)
+        except nilmdb.server.nilmdb.StreamError as e:
+            raise cherrypy.HTTPError("404 Not Found", e.message)
+        if key is None:  # If no keys specified, return them all
+            key = data.keys()
+        elif not isinstance(key, list):
+            key = [ key ]
+        result = {}
+        for k in key:
+            if k in data:
+                result[k] = data[k]
+            else: # Return "None" for keys with no matching value
+                result[k] = None
+        return result
+
+    # Helper for set_metadata and get_metadata
+    def _metadata_helper(self, function, path, data):
+        if not isinstance(data, dict):
+            try:
+                data = dict(json.loads(data))
+            except TypeError as e:
+                raise NilmDBError("can't parse 'data' parameter: " + e.message)
+        for key in data:
+            if not (isinstance(data[key], basestring) or
+                    isinstance(data[key], float) or
+                    isinstance(data[key], int)):
+                raise NilmDBError("metadata values must be a string or number")
+        function(path, data)
+
+    # /stream/set_metadata?path=/newton/prep&data=<json>
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError, LookupError)
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    def set_metadata(self, path, data):
+        """Set metadata for the named stream, replacing any existing
+        metadata.  Data can be json-encoded or a plain dictionary."""
+        self._metadata_helper(self.db.stream_set_metadata, path, data)
+
+    # /stream/update_metadata?path=/newton/prep&data=<json>
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError, LookupError, ValueError)
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    def update_metadata(self, path, data):
+        """Set metadata for the named stream, replacing any existing
+        metadata.  Data can be json-encoded or a plain dictionary."""
+        self._metadata_helper(self.db.stream_update_metadata, path, data)
+
+    # /stream/insert?path=/newton/prep
+    @cherrypy.expose
+    @cherrypy.tools.json_out()
+    @exception_to_httperror(NilmDBError, ValueError)
+    @cherrypy.tools.CORS_allow(methods = ["PUT"])
+    def insert(self, path, start, end, binary = False):
+        """
+        Insert new data into the database.  Provide textual data
+        (matching the path's layout) as a HTTP PUT.
+
+        If 'binary' is True, expect raw binary data, rather than lines
+        of ASCII-formatted data.  Raw binary data is always
+        little-endian and matches the database types (including an
+        int64 timestamp).
+        """
+        binary = bool_param(binary)
+
+        # Important that we always read the input before throwing any
+        # errors, to keep lengths happy for persistent connections.
+        # Note that CherryPy 3.2.2 has a bug where this fails for GET
+        # requests, if we ever want to handle those (issue #1134)
+        body = cherrypy.request.body.read()
+
+        # Verify content type for binary data
+        content_type = cherrypy.request.headers.get('content-type')
+        if binary and content_type:
+            if content_type != "application/octet-stream":
+                raise cherrypy.HTTPError("400", "Content type must be "
+                                         "application/octet-stream for "
+                                         "binary data, not " + content_type)
+
+        # Check path and get layout
+        if len(self.db.stream_list(path = path)) != 1:
+            raise cherrypy.HTTPError("404", "No such stream: " + path)
+
+        # Check limits
+        (start, end) = self._get_times(start, end)
+
+        # Pass the data directly to nilmdb, which will parse it and
+        # raise a ValueError if there are any problems.
+        self.db.stream_insert(path, start, end, body, binary)
+
+        # Done
+        return
+
+    # /stream/remove?path=/newton/prep
+    # /stream/remove?path=/newton/prep&start=1234567890.0&end=1234567899.0
+    @cherrypy.expose
+    @cherrypy.tools.json_in()
+    @cherrypy.tools.CORS_allow(methods = ["POST"])
+    @chunked_response
+    @response_type("application/x-json-stream")
+    def remove(self, path, start = None, end = None):
+        """
+        Remove data from the backend database.  Removes all data in
+        the interval [start, end).
+
+        Returns the number of data points removed.  Since this is a potentially
+        long-running operation, multiple numbers may be returned as the
+        data gets removed from the backend database.  The total number of
+        points removed is the sum of all of these numbers.
+        """
+        (start, end) = self._get_times(start, end)
+
+        if len(self.db.stream_list(path = path)) != 1:
+            raise cherrypy.HTTPError("404", "No such stream: " + path)
+
+        @workaround_cp_bug_1200
+        def content(start, end):
+            # Note: disable chunked responses to see tracebacks from here.
+            while True:
+                (removed, restart) = self.db.stream_remove(path, start, end)
+                yield json.dumps(removed) + "\r\n"
+                if restart is None:
+                    break
+                start = restart
+        return content(start, end)
+
+    # /stream/intervals?path=/newton/prep
+    # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
+    # /stream/intervals?path=/newton/prep&diffpath=/newton/prep2
+    @cherrypy.expose
+    @chunked_response
+    @response_type("application/x-json-stream")
+    def intervals(self, path, start = None, end = None, diffpath = None):
+        """
+        Get intervals from backend database.  Streams the resulting
+        intervals as JSON strings separated by CR LF pairs.  This may
+        make multiple requests to the nilmdb backend to avoid causing
+        it to block for too long.
+
+        Returns intervals between 'start' and 'end' belonging to
+        'path'.  If 'diff' is provided, the set-difference between
+        intervals in 'path' and intervals in 'diffpath' are
+        returned instead.
+
+        Note that the response type is the non-standard
+        'application/x-json-stream' for lack of a better option.
+        """
+        (start, end) = self._get_times(start, end)
+
+        if len(self.db.stream_list(path = path)) != 1:
+            raise cherrypy.HTTPError("404", "No such stream: " + path)
+
+        if diffpath and len(self.db.stream_list(path = diffpath)) != 1:
+            raise cherrypy.HTTPError("404", "No such stream: " + diffpath)
+
+        @workaround_cp_bug_1200
+        def content(start, end):
+            # Note: disable chunked responses to see tracebacks from here.
+            while True:
+                (ints, restart) = self.db.stream_intervals(path, start, end,
+                                                           diffpath)
+                response = ''.join([ json.dumps(i) + "\r\n" for i in ints ])
+                yield response
+                if restart is None:
+                    break
+                start = restart
+        return content(start, end)
+
+    # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
+    @cherrypy.expose
+    @chunked_response
+    def extract(self, path, start = None, end = None,
+                count = False, markup = False, binary = False):
+        """
+        Extract data from backend database.  Streams the resulting
+        entries as ASCII text lines separated by newlines.  This may
+        make multiple requests to the nilmdb backend to avoid causing
+        it to block for too long.
+
+        If 'count' is True, returns a count rather than actual data.
+
+        If 'markup' is True, adds comments to the stream denoting each
+        interval's start and end timestamp.
+
+        If 'binary' is True, return raw binary data, rather than lines
+        of ASCII-formatted data.  Raw binary data is always
+        little-endian and matches the database types (including an
+        int64 timestamp).
+        """
+        binary = bool_param(binary)
+        markup = bool_param(markup)
+        count = bool_param(count)
+
+        (start, end) = self._get_times(start, end)
+
+        # Check path and get layout
+        if len(self.db.stream_list(path = path)) != 1:
+            raise cherrypy.HTTPError("404", "No such stream: " + path)
+
+        if binary:
+            content_type = "application/octet-stream"
+            if markup or count:
+                raise cherrypy.HTTPError("400", "can't mix binary and "
+                                         "markup or count modes")
+        else:
+            content_type = "text/plain"
+        cherrypy.response.headers['Content-Type'] = content_type
+
+        @workaround_cp_bug_1200
+        def content(start, end):
+            # Note: disable chunked responses to see tracebacks from here.
+            if count:
+                matched = self.db.stream_extract(path, start, end,
+                                                 count = True)
+                yield sprintf("%d\n", matched)
+                return
+
+            while True:
+                (data, restart) = self.db.stream_extract(
+                    path, start, end, count = False,
+                    markup = markup, binary = binary)
+                yield data
+
+                if restart is None:
+                    return
+                start = restart
+        return content(start, end)
+
+class Exiter(object):
+    """App that exits the server, for testing"""
+    @cherrypy.expose
+    def index(self):
+        cherrypy.response.headers['Content-Type'] = 'text/plain'
+        def content():
+            yield 'Exiting by request'
+            raise SystemExit
+        return content()
+    index._cp_config = { 'response.stream': True }
+
+class Server(object):
+    def __init__(self, db, host = '127.0.0.1', port = 8080,
+                 stoppable = False,       # whether /exit URL exists
+                 embedded = True,         # hide diagnostics and output, etc
+                 fast_shutdown = False,   # don't wait for clients to disconn.
+                 force_traceback = False, # include traceback in all errors
+                 basepath = '',           # base URL path for cherrypy.tree
+                 ):
+        # Save server version, just for verification during tests
+        self.version = nilmdb.__version__
+
+        self.embedded = embedded
+        self.db = db
+        if not getattr(db, "_thread_safe", None):
+            raise KeyError("Database object " + str(db) + " doesn't claim "
+                           "to be thread safe.  You should pass "
+                           "nilmdb.utils.serializer_proxy(NilmDB)(args) "
+                           "rather than NilmDB(args).")
+
+        # Build up global server configuration
+        cherrypy.config.update({
+            'server.socket_host': host,
+            'server.socket_port': port,
+            'engine.autoreload_on': False,
+            'server.max_request_body_size': 8*1024*1024,
+            })
+        if self.embedded:
+            cherrypy.config.update({ 'environment': 'embedded' })
+
+        # Build up application specific configuration
+        app_config = {}
+        app_config.update({
+            'error_page.default': self.json_error_page,
+            })
+
+        # Some default headers to just help identify that things are working
+        app_config.update({ 'response.headers.X-Jim-Is-Awesome': 'yeah' })
+
+        # Set up Cross-Origin Resource Sharing (CORS) handler so we
+        # can correctly respond to browsers' CORS preflight requests.
+        # This also limits verbs to GET and HEAD by default.
+        app_config.update({ 'tools.CORS_allow.on': True,
+                            'tools.CORS_allow.methods': ['GET', 'HEAD'] })
+
+        # Configure the 'json_in' tool to also allow other content-types
+        # (like x-www-form-urlencoded), and to treat JSON as a dict that
+        # fills requests.param.
+        app_config.update({ 'tools.json_in.force': False,
+                            'tools.json_in.processor': json_to_request_params })
+
+        # Send tracebacks in error responses.  They're hidden by the
+        # error_page function for client errors (code 400-499).
+        app_config.update({ 'request.show_tracebacks' : True })
+        self.force_traceback = force_traceback
+
+        # Patch CherryPy error handler to never pad out error messages.
+        # This isn't necessary, but then again, neither is padding the
+        # error messages.
+        cherrypy._cperror._ie_friendly_error_sizes = {}
+
+        # Build up the application and mount it
+        root = Root(self.db)
+        root.stream = Stream(self.db)
+        if stoppable:
+            root.exit = Exiter()
+        cherrypy.tree.apps = {}
+        cherrypy.tree.mount(root, basepath, config = { "/" : app_config })
+
+        # Shutdowns normally wait for clients to disconnect.  To speed
+        # up tests, set fast_shutdown = True
+        if fast_shutdown:
+            # Setting timeout to 0 triggers os._exit(70) at shutdown, grr...
+            cherrypy.server.shutdown_timeout = 0.01
+        else:
+            cherrypy.server.shutdown_timeout = 5
+
+        # Set up the WSGI application pointer for external programs
+        self.wsgi_application = cherrypy.tree
+
+    def json_error_page(self, status, message, traceback, version):
+        """Return a custom error page in JSON so the client can parse it"""
+        return json_error_page(status, message, traceback, version,
+                               self.force_traceback)
+
+    def start(self, blocking = False, event = None):
+        cherrypy_start(blocking, event, self.embedded)
+
+    def stop(self):
+        cherrypy_stop()
+
+# Use a single global nilmdb.server.NilmDB and nilmdb.server.Server
+# instance since the database can only be opened once.  For this to
+# work, the web server must use only a single process and single
+# Python interpreter.  Multiple threads are OK.
+_wsgi_server = None
+def wsgi_application(dbpath, basepath): # pragma: no cover
+    """Return a WSGI application object with a database at the
+    specified path.
+
+    'dbpath' is a filesystem location, e.g. /home/nilm/db
+
+    'basepath' is the URL path of the application base, which
+    is the same as the first argument to Apache's WSGIScriptAlias
+    directive.
+    """
+    def application(environ, start_response):
+        global _wsgi_server
+        if _wsgi_server is None:
+            # Try to start the server
+            try:
+                db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(dbpath)
+                _wsgi_server = nilmdb.server.Server(
+                    db, embedded = True,
+                    basepath = basepath.rstrip('/'))
+            except Exception:
+                # Build an error message on failure
+                import pprint
+                err = sprintf("Initializing database at path '%s' failed:\n\n",
+                              dbpath)
+                err += traceback.format_exc()
+                try:
+                    import pwd
+                    import grp
+                    err += sprintf("\nRunning as: uid=%d (%s), gid=%d (%s) "
+                                   "on host %s, pid %d\n",
+                                   os.getuid(), pwd.getpwuid(os.getuid())[0],
+                                   os.getgid(), grp.getgrgid(os.getgid())[0],
+                                   socket.gethostname(), os.getpid())
+                except ImportError:
+                    pass
+                err += sprintf("\nEnvironment:\n%s\n", pprint.pformat(environ))
+        if _wsgi_server is None:
+            # Serve up the error with our own mini WSGI app.
+            headers = [ ('Content-type', 'text/plain'),
+                        ('Content-length', str(len(err))) ]
+            start_response("500 Internal Server Error", headers)
+            return [err]
+
+        # Call the normal application
+        return _wsgi_server.wsgi_application(environ, start_response)
+    return application
--- a/nilmdb/server/serverutil.py
+++ b/nilmdb/server/serverutil.py
@@ -0,0 +1,214 @@
+"""Miscellaneous decorators and other helpers for running a CherryPy
+server"""
+
+import cherrypy
+import sys
+import os
+import decorator
+import simplejson as json
+
+# Helper to parse parameters into booleans
+def bool_param(s):
+    """Return a bool indicating whether parameter 's' was True or False,
+    supporting a few different types for 's'."""
+    try:
+        ss = s.lower()
+        if ss in [ "0", "false", "f", "no", "n" ]:
+            return False
+        if ss in [ "1", "true", "t", "yes", "y" ]:
+            return True
+    except Exception:
+        return bool(s)
+    raise cherrypy.HTTPError("400 Bad Request",
+                             "can't parse parameter: " + ss)
+
+# Decorators
+def chunked_response(func):
+    """Decorator to enable chunked responses."""
+    # Set this to False to get better tracebacks from some requests
+    # (/stream/extract, /stream/intervals).
+    func._cp_config = { 'response.stream': True }
+    return func
+
+def response_type(content_type):
+    """Return a decorator-generating function that sets the
+    response type to the specified string."""
+    def wrapper(func, *args, **kwargs):
+        cherrypy.response.headers['Content-Type'] = content_type
+        return func(*args, **kwargs)
+    return decorator.decorator(wrapper)
+
+@decorator.decorator
+def workaround_cp_bug_1200(func, *args, **kwargs): # pragma: no cover
+    """Decorator to work around CherryPy bug #1200 in a response
+    generator.
+
+    Even if chunked responses are disabled, LookupError or
+    UnicodeError exceptions may still be swallowed by CherryPy due to
+    bug #1200.  This throws them as generic Exceptions instead so that
+    they make it through.
+    """
+    exc_info = None
+    try:
+        for val in func(*args, **kwargs):
+            yield val
+    except (LookupError, UnicodeError):
+        # Re-raise it, but maintain the original traceback
+        exc_info = sys.exc_info()
+        new_exc = Exception(exc_info[0].__name__ + ": " + str(exc_info[1]))
+        raise new_exc, None, exc_info[2]
+    finally:
+        del exc_info
+
+def exception_to_httperror(*expected):
+    """Return a decorator-generating function that catches expected
+    errors and throws a HTTPError describing it instead.
+
+        @exception_to_httperror(NilmDBError, ValueError)
+        def foo():
+            pass
+    """
+    def wrapper(func, *args, **kwargs):
+        exc_info = None
+        try:
+            return func(*args, **kwargs)
+        except expected:
+            # Re-raise it, but maintain the original traceback
+            exc_info = sys.exc_info()
+            new_exc = cherrypy.HTTPError("400 Bad Request", str(exc_info[1]))
+            raise new_exc, None, exc_info[2]
+        finally:
+            del exc_info
+    # We need to preserve the function's argspecs for CherryPy to
+    # handle argument errors correctly.  Decorator.decorator takes
+    # care of that.
+    return decorator.decorator(wrapper)
+
+# Custom CherryPy tools
+
+def CORS_allow(methods):
+    """This does several things:
+
+    Handles CORS preflight requests.
+    Adds Allow: header to all requests.
+    Raise 405 if request.method not in method.
+
+    It is similar to cherrypy.tools.allow, with the CORS stuff added.
+
+    Add this to CherryPy with:
+    cherrypy.tools.CORS_allow = cherrypy.Tool('on_start_resource', CORS_allow)
+    """
+    request = cherrypy.request.headers
+    response = cherrypy.response.headers
+
+    if not isinstance(methods, (tuple, list)): # pragma: no cover
+        methods = [ methods ]
+    methods = [ m.upper() for m in methods if m ]
+    if not methods: # pragma: no cover
+        methods = [ 'GET', 'HEAD' ]
+    elif 'GET' in methods and 'HEAD' not in methods: # pragma: no cover
+        methods.append('HEAD')
+    response['Allow'] = ', '.join(methods)
+
+    # Allow all origins
+    if 'Origin' in request:
+        response['Access-Control-Allow-Origin'] = request['Origin']
+
+    # If it's a CORS request, send response.
+    request_method = request.get("Access-Control-Request-Method", None)
+    request_headers = request.get("Access-Control-Request-Headers", None)
+    if (cherrypy.request.method == "OPTIONS" and
+        request_method and request_headers):
+        response['Access-Control-Allow-Headers'] = request_headers
+        response['Access-Control-Allow-Methods'] = ', '.join(methods)
+        # Try to stop further processing and return a 200 OK
+        cherrypy.response.status = "200 OK"
+        cherrypy.response.body = ""
+        cherrypy.request.handler = lambda: ""
+        return
+
+    # Reject methods that were not explicitly allowed
+    if cherrypy.request.method not in methods:
+        raise cherrypy.HTTPError(405)
+
+
+# Helper for json_in tool to process JSON data into normal request
+# parameters.
+def json_to_request_params(body):
+    cherrypy.lib.jsontools.json_processor(body)
+    if not isinstance(cherrypy.request.json, dict):
+        raise cherrypy.HTTPError(415)
+    cherrypy.request.params.update(cherrypy.request.json)
+
+# Used as an "error_page.default" handler
+def json_error_page(status, message, traceback, version,
+                    force_traceback = False):
+    """Return a custom error page in JSON so the client can parse it"""
+    errordata = { "status" : status,
+                  "message" : message,
+                  "traceback" : traceback }
+    # Don't send a traceback if the error was 400-499 (client's fault)
+    try:
+        code = int(status.split()[0])
+        if not force_traceback:
+            if code >= 400 and code <= 499:
+                errordata["traceback"] = ""
+    except Exception: # pragma: no cover
+        pass
+    # Override the response type, which was previously set to text/html
+    cherrypy.serving.response.headers['Content-Type'] = (
+        "application/json;charset=utf-8" )
+    # Undo the HTML escaping that cherrypy's get_error_page function applies
+    # (cherrypy issue 1135)
+    for k, v in errordata.iteritems():
+        v = v.replace("&lt;","<")
+        v = v.replace("&gt;",">")
+        v = v.replace("&amp;","&")
+        errordata[k] = v
+    return json.dumps(errordata, separators=(',',':'))
+
+# Start/stop CherryPy standalone server
+def cherrypy_start(blocking = False, event = False, embedded = False):
+    """Start the CherryPy server, handling errors and signals
+    somewhat gracefully."""
+
+    if not embedded: # pragma: no cover
+        # Handle signals nicely
+        if hasattr(cherrypy.engine, "signal_handler"):
+            cherrypy.engine.signal_handler.subscribe()
+        if hasattr(cherrypy.engine, "console_control_handler"):
+            cherrypy.engine.console_control_handler.subscribe()
+
+    # Cherrypy stupidly calls os._exit(70) when it can't bind the
+    # port.  At least try to print a reasonable error and continue
+    # in this case, rather than just dying silently (as we would
+    # otherwise do in embedded mode)
+    real_exit = os._exit
+    def fake_exit(code): # pragma: no cover
+        if code == os.EX_SOFTWARE:
+            fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
+        else:
+            real_exit(code)
+    os._exit = fake_exit
+    cherrypy.engine.start()
+    os._exit = real_exit
+
+    # Signal that the engine has started successfully
+    if event is not None:
+        event.set()
+
+    if blocking:
+        try:
+            cherrypy.engine.wait(cherrypy.engine.states.EXITING,
+                                 interval = 0.1, channel = 'main')
+        except (KeyboardInterrupt, IOError): # pragma: no cover
+            cherrypy.engine.log('Keyboard Interrupt: shutting down bus')
+            cherrypy.engine.exit()
+        except SystemExit: # pragma: no cover
+            cherrypy.engine.log('SystemExit raised: shutting down bus')
+            cherrypy.engine.exit()
+            raise
+
+# Stop CherryPy server
+def cherrypy_stop():
+    cherrypy.engine.exit()
--- a/nilmdb/test_fileinterval.py
+++ b/nilmdb/test_fileinterval.py
@@ -1,46 +0,0 @@
-from nilmdb import Interval, IntervalSet, IntervalError, FileInterval
-from datetime import datetime
-from nose.tools import assert_raises
-
-from test_interval import iset
-
-def fiset(string):
-    """Like iset, but builds with FileIntervals instead of Intervals"""
-    iset = IntervalSet()
-    for i, c in enumerate(string):
-        day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
-        if (c == "["):
-            start = day
-        elif (c == "|"):
-            iset += FileInterval(start, day, "test.dat")
-            start = day
-        elif (c == "]"):
-            iset += FileInterval(start, day, "test.dat")
-            del start
-    return iset
-
-def test_fileinterval_vs_interval():
-    """Test FileInterval/Interval inheritance"""
-
-    i = iset("[--]")
-    f = fiset("[--]")
-
-    # check types
-    assert(isinstance(i[0], Interval))
-    assert(not isinstance(i[0], FileInterval))
-    assert(isinstance(f[0], Interval))
-    assert(isinstance(f[0], FileInterval))
-
-    # when doing an intersection, result should be a subset of the first arg
-    u = (i & f)
-    assert(isinstance(u[0], Interval))
-    assert(not isinstance(u[0], FileInterval))
-    u = (f & i)
-    assert(isinstance(u[0], Interval))
-    assert(isinstance(u[0], FileInterval))
-
-    # they're still the same though
-    assert(i == f == u)
-
-    # just for coverage
-    assert_raises(IntervalError, fiset("[]")[0].subset, f[0].start, f[0].end)
--- a/nilmdb/test_interval.py
+++ b/nilmdb/test_interval.py
@@ -1,189 +0,0 @@
-from nilmdb import Interval, IntervalSet, IntervalError
-from datetime import datetime
-from nose.tools import assert_raises
-import itertools 
-
-def test_interval():
-    """Test the Interval class"""
-    d1 = datetime.strptime("19801205","%Y%m%d")
-    d2 = datetime.strptime("19900216","%Y%m%d")
-    d3 = datetime.strptime("20111205","%Y%m%d")
-
-    # basic construction
-    i = Interval(d1, d1)
-    i = Interval(d1, d3)
-    assert(i.start == d1)
-    assert(i.end == d3)
-
-    # assignment should work
-    i.start = d2
-    try:
-        i.end = d1
-        raise Exception("should have died there")
-    except IntervalError:
-        pass
-    i.start = d1
-    i.end = d2
-
-    # end before start
-    assert_raises(IntervalError, Interval, d3, d1)
-
-    # wrong type
-    assert_raises(IntervalError, Interval, 1, 2)
-
-    # compare
-    assert(Interval(d1, d2) == Interval(d1, d2))
-    assert(Interval(d1, d2) < Interval(d1, d3))
-    assert(Interval(d1, d3) > Interval(d1, d2))
-    assert(Interval(d1, d2) < Interval(d2, d3))
-    assert(Interval(d1, d3) < Interval(d2, d3))
-    assert(Interval(d2, d2) > Interval(d1, d3))
-    assert(Interval(d3, d3) == Interval(d3, d3))
-    assert_raises(TypeError, cmp, i, 123)
-
-    # subset
-    assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2))
-    assert_raises(IntervalError, Interval(d2, d3).subset, d1, d2)
-
-    # append
-    assert(Interval(d1, d2).is_adjacent(Interval(d2,d3)))
-    assert(Interval(d2, d3).is_adjacent(Interval(d1,d2)))
-    assert(not Interval(d2, d3).is_adjacent(Interval(d1,d3)))
-    assert_raises(TypeError, Interval(d1, d2).is_adjacent, 1)
-
-    # misc
-    assert(repr(i) == repr(eval(repr(i).replace("datetime.",""))))
-    assert(str(i) == "[1980-12-05 00:00:00 -> 1990-02-16 00:00:00]")
-
-def test_interval_intersect():
-    """Test Interval intersections"""
-    dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ] ]
-    perm = list(itertools.permutations(dates, 2))
-    prod = list(itertools.product(perm, perm))
-    should_intersect = {
-        False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100],
-        True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29,
-               32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]}
-    for i,((a,b),(c,d)) in enumerate(prod):
-        try:
-            i1 = Interval(a, b)
-            i2 = Interval(c, d)
-            assert(i1.intersects(i2) == i2.intersects(i1))
-            assert(i in should_intersect[i1.intersects(i2)])
-        except IntervalError:
-            assert(i not in should_intersect[True] and
-                   i not in should_intersect[False])
-    assert_raises(TypeError, i1.intersects, 1234)
-
-def test_intervalset_construct():
-    """Test interval set construction"""
-    dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ]]
-
-    a = Interval(dates[0], dates[1])
-    b = Interval(dates[1], dates[2])
-    c = Interval(dates[0], dates[2])
-    d = Interval(dates[2], dates[3])
-
-    iseta = IntervalSet(a)
-    isetb = IntervalSet([a, b])
-    isetc = IntervalSet([a])
-    assert(iseta != isetb)
-    assert(iseta == isetc)
-    assert(iseta != 3)
-    assert(IntervalSet(a) != IntervalSet(b))
-
-    print iseta == None
-    assert_raises(TypeError, cmp, iseta, isetb)
-    assert_raises(IntervalError, IntervalSet, [a, b, c])
-    assert_raises(TypeError, IntervalSet, [1, 2])
-
-    iset = IntervalSet(isetb)   # test iterator
-    assert(iset == isetb)
-    assert(len(iset) == 2)
-    assert(len(IntervalSet()) == 0)
-
-    # Test adding
-    iset = IntervalSet(a)
-    iset += IntervalSet(b)
-    assert(iset == IntervalSet([a, b]))
-    iset = IntervalSet(a)
-    iset += b
-    assert(iset == IntervalSet([a, b]))
-    iset = IntervalSet(a) + IntervalSet(b)
-    assert(iset == IntervalSet([a, b]))
-    iset = IntervalSet(b) + a
-    assert(iset == IntervalSet([a, b]))
-
-    # A set consisting of [0-1],[1-2] should match a set consisting of [0-2]
-    assert(IntervalSet([a,b]) == IntervalSet([c]))
-    # Etc
-    assert(IntervalSet([a,d]) != IntervalSet([c]))
-    assert(IntervalSet([c]) != IntervalSet([a,d]))
-    assert(IntervalSet([c,d]) != IntervalSet([b,d]))
-    
-    # misc
-    assert(repr(iset) == repr(eval(repr(iset).replace("datetime.",""))))
-
-def iset(string):
-    """Build an IntervalSet from a string, for testing purposes
-
-    Each character is a year
-    [ = interval start
-    | = interval end + adjacent start
-    ] = interval end
-    anything else is ignored
-    """
-    iset = IntervalSet()
-    for i, c in enumerate(string):
-        day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
-        if (c == "["):
-            start = day
-        elif (c == "|"):
-            iset += Interval(start, day)
-            start = day
-        elif (c == "]"):
-            iset += Interval(start, day)
-            del start
-    return iset
-
-def test_intervalset_iset():
-    """Test basic iset construction"""
-    assert(iset("  [----]   ") ==
-           iset("  [-|--]   "))
-
-    assert(iset("[]  [--]   ") +
-           iset(" []    [--]") ==
-           iset("[|] [-----]"))
-
-def test_intervalset_intsersect():
-    """Test intersection (&)"""
-    assert_raises(TypeError, iset("[--]").__and__, 1234)
-    
-    assert(iset("[---------]") &
-           iset(" [---]     ") ==
-           iset(" [---]     "))
-
-    assert(iset(" [---]     ") &
-           iset("[---------]") ==
-           iset(" [---]     "))
-
-    assert(iset("    [-----]") &
-           iset(" [-----]   ") ==
-           iset("    [--]   "))
-
-    assert(iset("      [---]") &
-           iset(" [--]      ") ==
-           iset("           "))
-
-    assert(iset("    [-|---]") &
-           iset(" [-----|-] ") ==
-           iset("    [----] "))
-
-    assert(iset("    [-|-]  ") &
-           iset(" [-|--|--] ") ==
-           iset("    [---]  "))
-
-    assert(iset(" [----][--]") &
-           iset("[-] [--] []") ==
-           iset(" [] [-]  []"))
-
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -0,0 +1,17 @@
+"""NilmDB utilities"""
+
+from __future__ import absolute_import
+from nilmdb.utils.timer import Timer
+from nilmdb.utils.serializer import serializer_proxy
+from nilmdb.utils.lrucache import lru_cache
+from nilmdb.utils.diskusage import du, human_size
+from nilmdb.utils.mustclose import must_close
+from nilmdb.utils import atomic
+import nilmdb.utils.threadsafety
+import nilmdb.utils.fallocate
+import nilmdb.utils.time
+import nilmdb.utils.iterator
+import nilmdb.utils.interval
+import nilmdb.utils.lock
+import nilmdb.utils.sort
+import nilmdb.utils.unicode
--- a/nilmdb/utils/atomic.py
+++ b/nilmdb/utils/atomic.py
@@ -0,0 +1,26 @@
+# Atomic file writing helper.
+
+import os
+
+def replace_file(filename, content):
+    """Attempt to atomically and durably replace the filename with the
+    given contents.  This is intended to be 'pretty good on most
+    OSes', but not necessarily bulletproof."""
+
+    newfilename = filename + ".new"
+
+    # Write to new file, flush it
+    with open(newfilename, "wb") as f:
+        f.write(content)
+        f.flush()
+        os.fsync(f.fileno())
+
+    # Move new file over old one
+    try:
+        os.rename(newfilename, filename)
+    except OSError: # pragma: no cover
+        # Some OSes might not support renaming over an existing file.
+        # This is definitely NOT atomic!
+        os.remove(filename)
+        os.rename(newfilename, filename)
+
--- a/nilmdb/utils/datetime_tz/init.py
+++ b/nilmdb/utils/datetime_tz/init.py
@@ -0,0 +1,710 @@
+#!/usr/bin/python
+#
+# Copyright 2009 Google Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+#
+# Disable the invalid name warning as we are inheriting from a standard library
+# object.
+# pylint: disable-msg=C6409,W0212
+
+"""A version of the datetime module which *cares* about timezones.
+
+This module will never return a naive datetime object. This requires the module
+know your local timezone, which it tries really hard to figure out.
+
+You can override the detection by using the datetime.tzaware.defaulttz_set
+method. It the module is unable to figure out the timezone itself this method
+*must* be called before the normal module is imported. If done before importing
+it can also speed up the time taken to import as the defaulttz will no longer
+try and do the detection.
+"""
+
+__author__ = "tansell@google.com (Tim Ansell)"
+
+import calendar
+import datetime
+import os
+import os.path
+import re
+import time
+import warnings
+import dateutil.parser
+import dateutil.relativedelta
+import dateutil.tz
+import pytz
+import pytz_abbr
+
+
+try:
+  # pylint: disable-msg=C6204
+  import functools
+except ImportError, e:
+
+  class functools(object):
+    """Fake replacement for a full functools."""
+
+    # pylint: disable-msg=W0613
+    @staticmethod
+    def wraps(f, *args, **kw):
+      return f
+
+
+# Need to patch pytz.utc to have a _utcoffset so you can normalize/localize
+# using it.
+pytz.utc._utcoffset = datetime.timedelta()
+
+
+timedelta = datetime.timedelta
+
+
+def _tzinfome(tzinfo):
+  """Gets a tzinfo object from a string.
+
+  Args:
+    tzinfo: A string (or string like) object, or a datetime.tzinfo object.
+
+  Returns:
+    An datetime.tzinfo object.
+
+  Raises:
+    UnknownTimeZoneError: If the timezone given can't be decoded.
+  """
+  if not isinstance(tzinfo, datetime.tzinfo):
+    try:
+      tzinfo = pytz.timezone(tzinfo)
+    except AttributeError:
+      raise pytz.UnknownTimeZoneError("Unknown timezone! %s" % tzinfo)
+  return tzinfo
+
+
+# Our "local" timezone
+_localtz = None
+
+
+def localtz():
+  """Get the local timezone.
+
+  Returns:
+    The localtime timezone as a tzinfo object.
+  """
+  # pylint: disable-msg=W0603
+  global _localtz
+  if _localtz is None:
+    _localtz = detect_timezone()
+  return _localtz
+
+
+def localtz_set(timezone):
+  """Set the local timezone."""
+  # pylint: disable-msg=W0603
+  global _localtz
+  _localtz = _tzinfome(timezone)
+
+
+def detect_timezone():
+  """Try and detect the timezone that Python is currently running in.
+
+  We have a bunch of different methods for trying to figure this out (listed in
+  order they are attempted).
+    * Try TZ environment variable.
+    * Try and find /etc/timezone file (with timezone name).
+    * Try and find /etc/localtime file (with timezone data).
+    * Try and match a TZ to the current dst/offset/shortname.
+
+  Returns:
+    The detected local timezone as a tzinfo object
+
+  Raises:
+    pytz.UnknownTimeZoneError: If it was unable to detect a timezone.
+  """
+  # First we try the TZ variable
+  tz = _detect_timezone_environ()
+  if tz is not None:
+    return tz
+
+  # Second we try /etc/timezone and use the value in that
+  tz = _detect_timezone_etc_timezone()
+  if tz is not None:
+    return tz
+
+  # Next we try and see if something matches the tzinfo in /etc/localtime
+  tz = _detect_timezone_etc_localtime()
+  if tz is not None:
+    return tz
+
+  # Next we try and use a similiar method to what PHP does.
+  # We first try to search on time.tzname, time.timezone, time.daylight to
+  # match a pytz zone.
+  warnings.warn("Had to fall back to worst detection method (the 'PHP' "
+                "method).")
+
+  tz = _detect_timezone_php()
+  if tz is not None:
+    return tz
+
+  raise pytz.UnknownTimeZoneError("Unable to detect your timezone!")
+
+
+def _detect_timezone_environ():
+  if "TZ" in os.environ:
+    try:
+      return pytz.timezone(os.environ["TZ"])
+    except (IOError, pytz.UnknownTimeZoneError):
+      warnings.warn("You provided a TZ environment value (%r) we did not "
+                    "understand!" % os.environ["TZ"])
+
+
+def _detect_timezone_etc_timezone():
+  if os.path.exists("/etc/timezone"):
+    try:
+      tz = file("/etc/timezone").read().strip()
+      try:
+        return pytz.timezone(tz)
+      except (IOError, pytz.UnknownTimeZoneError), ei:
+        warnings.warn("Your /etc/timezone file references a timezone (%r) that"
+                      " is not valid (%r)." % (tz, ei))
+
+    # Problem reading the /etc/timezone file
+    except IOError, eo:
+      warnings.warn("Could not access your /etc/timezone file: %s" % eo)
+
+
+def _detect_timezone_etc_localtime():
+  matches = []
+  if os.path.exists("/etc/localtime"):
+    localtime = pytz.tzfile.build_tzinfo("/etc/localtime",
+                                         file("/etc/localtime"))
+
+    # See if we can find a "Human Name" for this..
+    for tzname in pytz.all_timezones:
+      tz = _tzinfome(tzname)
+
+      if dir(tz) != dir(localtime):
+        continue
+
+      for attrib in dir(tz):
+        # Ignore functions and specials
+        if callable(getattr(tz, attrib)) or attrib.startswith("__"):
+          continue
+
+        # This will always be different
+        if attrib == "zone" or attrib == "_tzinfos":
+          continue
+
+        if getattr(tz, attrib) != getattr(localtime, attrib):
+          break
+
+      # We get here iff break didn't happen, i.e. no meaningful attributes
+      # differ between tz and localtime
+      else:
+        matches.append(tzname)
+
+    if len(matches) == 1:
+      return _tzinfome(matches[0])
+    else:
+      # Warn the person about this!
+      warning = "Could not get a human name for your timezone: "
+      if len(matches) > 1:
+        warning += ("We detected multiple matches for your /etc/localtime. "
+                    "(Matches where %s)" % matches)
+        return _tzinfome(matches[0])
+      else:
+        warning += "We detected no matches for your /etc/localtime."
+      warnings.warn(warning)
+
+      # Register /etc/localtime as the timezone loaded.
+      pytz._tzinfo_cache['/etc/localtime'] = localtime
+      return localtime
+
+
+def _detect_timezone_php():
+  tomatch = (time.tzname[0], time.timezone, time.daylight)
+  now = datetime.datetime.now()
+
+  matches = []
+  for tzname in pytz.all_timezones:
+    try:
+      tz = pytz.timezone(tzname)
+    except IOError:
+      continue
+
+    try:
+      indst = tz.localize(now).timetuple()[-1]
+
+      if tomatch == (tz._tzname, -tz._utcoffset.seconds, indst):
+        matches.append(tzname)
+
+    # pylint: disable-msg=W0704
+    except AttributeError:
+      pass
+
+  if len(matches) > 1:
+    warnings.warn("We detected multiple matches for the timezone, choosing "
+                  "the first %s. (Matches where %s)" % (matches[0], matches))
+    return pytz.timezone(matches[0])
+
+
+class datetime_tz(datetime.datetime):
+  """An extension of the inbuilt datetime adding more functionality.
+
+  The extra functionality includes:
+    * Partial parsing support (IE 2006/02/30 matches %Y/%M/%D %H:%M)
+    * Full integration with pytz (just give it the string of the timezone!)
+    * Proper support for going to/from Unix timestamps (which are in UTC!).
+  """
+  __slots__ = ["is_dst"]
+
+  def __new__(cls, *args, **kw):
+    args = list(args)
+    if not args:
+      raise TypeError("Not enough arguments given.")
+
+    # See if we are given a tzinfo object...
+    tzinfo = None
+    if isinstance(args[-1], (datetime.tzinfo, basestring)):
+      tzinfo = _tzinfome(args.pop(-1))
+    elif kw.get("tzinfo", None) is not None:
+      tzinfo = _tzinfome(kw.pop("tzinfo"))
+
+    # Create a datetime object if we don't have one
+    if isinstance(args[0], datetime.datetime):
+      # Convert the datetime instance to a datetime object.
+      newargs = (list(args[0].timetuple()[0:6]) +
+                 [args[0].microsecond, args[0].tzinfo])
+      dt = datetime.datetime(*newargs)
+
+      if tzinfo is None and dt.tzinfo is None:
+        raise TypeError("Must specify a timezone!")
+
+      if tzinfo is not None and dt.tzinfo is not None:
+        raise TypeError("Can not give a timezone with timezone aware"
+                        " datetime object! (Use localize.)")
+    else:
+      dt = datetime.datetime(*args, **kw)
+
+    if dt.tzinfo is not None:
+      # Re-normalize the dt object
+      dt = dt.tzinfo.normalize(dt)
+
+    else:
+      if tzinfo is None:
+        tzinfo = localtz()
+
+      try:
+        dt = tzinfo.localize(dt, is_dst=None)
+      except pytz.AmbiguousTimeError:
+        is_dst = None
+        if "is_dst" in kw:
+          is_dst = kw.pop("is_dst")
+
+        try:
+          dt = tzinfo.localize(dt, is_dst)
+        except IndexError:
+          raise pytz.AmbiguousTimeError("No such time exists!")
+
+    newargs = list(dt.timetuple()[0:6])+[dt.microsecond, dt.tzinfo]
+    obj = datetime.datetime.__new__(cls, *newargs)
+    obj.is_dst = obj.dst() != datetime.timedelta(0)
+    return obj
+
+  def asdatetime(self, naive=True):
+    """Return this datetime_tz as a datetime object.
+
+    Args:
+      naive: Return *without* any tz info.
+
+    Returns:
+      This datetime_tz as a datetime object.
+    """
+    args = list(self.timetuple()[0:6])+[self.microsecond]
+    if not naive:
+      args.append(self.tzinfo)
+    return datetime.datetime(*args)
+
+  def asdate(self):
+    """Return this datetime_tz as a date object.
+
+    Returns:
+      This datetime_tz as a date object.
+    """
+    return datetime.date(self.year, self.month, self.day)
+
+  def totimestamp(self):
+    """Convert this datetime object back to a unix timestamp.
+
+    The Unix epoch is the time 00:00:00 UTC on January 1, 1970.
+
+    Returns:
+      Unix timestamp.
+    """
+    return calendar.timegm(self.utctimetuple())+1e-6*self.microsecond
+
+  def astimezone(self, tzinfo):
+    """Returns a version of this timestamp converted to the given timezone.
+
+    Args:
+      tzinfo: Either a datetime.tzinfo object or a string (which will be looked
+              up in pytz.
+
+    Returns:
+      A datetime_tz object in the given timezone.
+    """
+    # Assert we are not a naive datetime object
+    assert self.tzinfo is not None
+
+    tzinfo = _tzinfome(tzinfo)
+
+    d = self.asdatetime(naive=False).astimezone(tzinfo)
+    return datetime_tz(d)
+
+  # pylint: disable-msg=C6113
+  def replace(self, **kw):
+    """Return datetime with new specified fields given as arguments.
+
+    For example, dt.replace(days=4) would return a new datetime_tz object with
+    exactly the same as dt but with the days attribute equal to 4.
+
+    Any attribute can be replaced, but tzinfo can not be set to None.
+
+    Args:
+      Any datetime_tz attribute.
+
+    Returns:
+      A datetime_tz object with the attributes replaced.
+
+    Raises:
+      TypeError: If the given replacement is invalid.
+    """
+    if "tzinfo" in kw:
+      if kw["tzinfo"] is None:
+        raise TypeError("Can not remove the timezone use asdatetime()")
+
+    is_dst = None
+    if "is_dst" in kw:
+      is_dst = kw["is_dst"]
+      del kw["is_dst"]
+    else:
+      # Use our own DST setting..
+      is_dst = self.is_dst
+
+    replaced = self.asdatetime().replace(**kw)
+
+    return datetime_tz(replaced, tzinfo=self.tzinfo.zone, is_dst=is_dst)
+
+  # pylint: disable-msg=C6310
+  @classmethod
+  def smartparse(cls, toparse, tzinfo=None):
+    """Method which uses dateutil.parse and extras to try and parse the string.
+
+    Valid dates are found at:
+     http://labix.org/python-dateutil#head-1443e0f14ad5dff07efd465e080d1110920673d8-2
+
+    Other valid formats include:
+      "now" or "today"
+      "yesterday"
+      "tommorrow"
+      "5 minutes ago"
+      "10 hours ago"
+      "10h5m ago"
+      "start of yesterday"
+      "end of tommorrow"
+      "end of 3rd of March"
+
+    Args:
+      toparse: The string to parse.
+      tzinfo: Timezone for the resultant datetime_tz object should be in.
+              (Defaults to your local timezone.)
+
+    Returns:
+      New datetime_tz object.
+
+    Raises:
+      ValueError: If unable to make sense of the input.
+    """
+    # Default for empty fields are:
+    #  year/month/day == now
+    #  hour/minute/second/microsecond == 0
+    toparse = toparse.strip()
+
+    if tzinfo is None:
+      dt = cls.now()
+    else:
+      dt = cls.now(tzinfo)
+
+    default = dt.replace(hour=0, minute=0, second=0, microsecond=0)
+
+    # Remove "start of " and "end of " prefix in the string
+    if toparse.lower().startswith("end of "):
+      toparse = toparse[7:].strip()
+
+      dt += datetime.timedelta(days=1)
+      dt = dt.replace(hour=0, minute=0, second=0, microsecond=0)
+      dt -= datetime.timedelta(microseconds=1)
+
+      default = dt
+
+    elif toparse.lower().startswith("start of "):
+      toparse = toparse[9:].strip()
+
+      dt = dt.replace(hour=0, minute=0, second=0, microsecond=0)
+      default = dt
+
+    # Handle strings with "now", "today", "yesterday", "tomorrow" and "ago".
+    # Need to use lowercase
+    toparselower = toparse.lower()
+
+    if toparselower in ["now", "today"]:
+      pass
+
+    elif toparselower == "yesterday":
+      dt -= datetime.timedelta(days=1)
+
+    elif toparselower == "tommorrow":
+      dt += datetime.timedelta(days=1)
+
+    elif "ago" in toparselower:
+      # Remove the "ago" bit
+      toparselower = toparselower[:-3]
+      # Replace all "a day and an hour" with "1 day 1 hour"
+      toparselower = toparselower.replace("a ", "1 ")
+      toparselower = toparselower.replace("an ", "1 ")
+      toparselower = toparselower.replace(" and ", " ")
+
+      # Match the following
+      # 1 hour ago
+      # 1h ago
+      # 1 h ago
+      # 1 hour ago
+      # 2 hours ago
+      # Same with minutes, seconds, etc.
+
+      tocheck = ("seconds", "minutes", "hours", "days", "weeks", "months",
+                 "years")
+      result = {}
+      for match in re.finditer("([0-9]+)([^0-9]*)", toparselower):
+        amount = int(match.group(1))
+        unit = match.group(2).strip()
+
+        for bit in tocheck:
+          regex = "^([%s]|((%s)s?))$" % (
+              bit[0], bit[:-1])
+
+          bitmatch = re.search(regex, unit)
+          if bitmatch:
+            result[bit] = amount
+            break
+        else:
+          raise ValueError("Was not able to parse date unit %r!" % unit)
+
+      delta = dateutil.relativedelta.relativedelta(**result)
+      dt -= delta
+
+    else:
+      # Handle strings with normal datetime format, use original case.
+      dt = dateutil.parser.parse(toparse, default=default.asdatetime(),
+                                 tzinfos=pytz_abbr.tzinfos)
+      if dt is None:
+        raise ValueError("Was not able to parse date!")
+
+      if dt.tzinfo is pytz_abbr.unknown:
+        dt = dt.replace(tzinfo=None)
+
+      if dt.tzinfo is None:
+        if tzinfo is None:
+          tzinfo = localtz()
+        dt = cls(dt, tzinfo)
+      else:
+        if isinstance(dt.tzinfo, pytz_abbr.tzabbr):
+          abbr = dt.tzinfo
+          dt = dt.replace(tzinfo=None)
+          dt = cls(dt, abbr.zone, is_dst=abbr.dst)
+
+        dt = cls(dt)
+
+    return dt
+
+  @classmethod
+  def utcfromtimestamp(cls, timestamp):
+    """Returns a datetime object of a given timestamp (in UTC)."""
+    obj = datetime.datetime.utcfromtimestamp(timestamp)
+    obj = pytz.utc.localize(obj)
+    return cls(obj)
+
+  @classmethod
+  def fromtimestamp(cls, timestamp):
+    """Returns a datetime object of a given timestamp (in local tz)."""
+    d = cls.utcfromtimestamp(timestamp)
+    return d.astimezone(localtz())
+
+  @classmethod
+  def utcnow(cls):
+    """Return a new datetime representing UTC day and time."""
+    obj = datetime.datetime.utcnow()
+    obj = cls(obj, tzinfo=pytz.utc)
+    return obj
+
+  @classmethod
+  def now(cls, tzinfo=None):
+    """[tz] -> new datetime with tz's local day and time."""
+    obj = cls.utcnow()
+    if tzinfo is None:
+      tzinfo = localtz()
+    return obj.astimezone(tzinfo)
+
+  today = now
+
+  @staticmethod
+  def fromordinal(ordinal):
+    raise SyntaxError("Not enough information to create a datetime_tz object "
+                      "from an ordinal. Please use datetime.date.fromordinal")
+
+
+class iterate(object):
+  """Helpful iterators for working with datetime_tz objects."""
+
+  @staticmethod
+  def between(start, delta, end=None):
+    """Return an iterator between this date till given end point.
+
+    Example usage:
+      >>> d = datetime_tz.smartparse("5 days ago")
+      2008/05/12 11:45
+      >>> for i in d.between(timedelta(days=1), datetime_tz.now()):
+      >>>    print i
+      2008/05/12 11:45
+      2008/05/13 11:45
+      2008/05/14 11:45
+      2008/05/15 11:45
+      2008/05/16 11:45
+
+    Args:
+      start: The date to start at.
+      delta: The interval to iterate with.
+      end: (Optional) Date to end at. If not given the iterator will never
+           terminate.
+
+    Yields:
+      datetime_tz objects.
+    """
+    toyield = start
+    while end is None or toyield < end:
+      yield toyield
+      toyield += delta
+
+  @staticmethod
+  def weeks(start, end=None):
+    """Iterate over the weeks between the given datetime_tzs.
+
+    Args:
+      start: datetime_tz to start from.
+      end: (Optional) Date to end at, if not given the iterator will never
+           terminate.
+
+    Returns:
+      An iterator which generates datetime_tz objects a week apart.
+    """
+    return iterate.between(start, datetime.timedelta(days=7), end)
+
+  @staticmethod
+  def days(start, end=None):
+    """Iterate over the days between the given datetime_tzs.
+
+    Args:
+      start: datetime_tz to start from.
+      end: (Optional) Date to end at, if not given the iterator will never
+           terminate.
+
+    Returns:
+      An iterator which generates datetime_tz objects a day apart.
+    """
+    return iterate.between(start, datetime.timedelta(days=1), end)
+
+  @staticmethod
+  def hours(start, end=None):
+    """Iterate over the hours between the given datetime_tzs.
+
+    Args:
+      start: datetime_tz to start from.
+      end: (Optional) Date to end at, if not given the iterator will never
+           terminate.
+
+    Returns:
+      An iterator which generates datetime_tz objects a hour apart.
+    """
+    return iterate.between(start, datetime.timedelta(hours=1), end)
+
+  @staticmethod
+  def minutes(start, end=None):
+    """Iterate over the minutes between the given datetime_tzs.
+
+    Args:
+      start: datetime_tz to start from.
+      end: (Optional) Date to end at, if not given the iterator will never
+           terminate.
+
+    Returns:
+      An iterator which generates datetime_tz objects a minute apart.
+    """
+    return iterate.between(start, datetime.timedelta(minutes=1), end)
+
+  @staticmethod
+  def seconds(start, end=None):
+    """Iterate over the seconds between the given datetime_tzs.
+
+    Args:
+      start: datetime_tz to start from.
+      end: (Optional) Date to end at, if not given the iterator will never
+           terminate.
+
+    Returns:
+      An iterator which generates datetime_tz objects a second apart.
+    """
+    return iterate.between(start, datetime.timedelta(minutes=1), end)
+
+
+def _wrap_method(name):
+  """Wrap a method.
+
+  Patch a method which might return a datetime.datetime to return a
+  datetime_tz.datetime_tz instead.
+
+  Args:
+    name: The name of the method to patch
+  """
+  method = getattr(datetime.datetime, name)
+
+  # Have to give the second argument as method has no __module__ option.
+  @functools.wraps(method, ("__name__", "__doc__"), ())
+  def wrapper(*args, **kw):
+    r = method(*args, **kw)
+
+    if isinstance(r, datetime.datetime) and not isinstance(r, datetime_tz):
+      r = datetime_tz(r)
+    return r
+
+  setattr(datetime_tz, name, wrapper)
+
+for methodname in ["__add__", "__radd__", "__rsub__", "__sub__", "combine"]:
+
+  # Make sure we have not already got an override for this method
+  assert methodname not in datetime_tz.__dict__
+
+  _wrap_method(methodname)
+
+
+__all__ = ['datetime_tz', 'detect_timezone', 'iterate', 'localtz',
+    'localtz_set', 'timedelta', '_detect_timezone_environ',
+    '_detect_timezone_etc_localtime', '_detect_timezone_etc_timezone',
+    '_detect_timezone_php']
--- a/nilmdb/utils/datetime_tz/pytz_abbr.py
+++ b/nilmdb/utils/datetime_tz/pytz_abbr.py
@@ -0,0 +1,230 @@
+#!/usr/bin/python2.4
+# -*- coding: utf-8 -*-
+#
+# Copyright 2010 Google Inc. All Rights Reserved.
+#
+
+"""
+Common time zone acronyms/abbreviations for use with the datetime_tz module.
+
+*WARNING*: There are lots of caveats when using this module which are listed
+below.
+
+CAVEAT 1: The acronyms/abbreviations are not globally unique, they are not even
+unique within a region. For example, EST can mean any of,
+  Eastern Standard Time in Australia (which is 10 hour ahead of UTC)
+  Eastern Standard Time in North America (which is 5 hours behind UTC)
+
+Where there are two abbreviations the more popular one will appear in the all
+dictionary, while the less common one will only appear in that countries region
+dictionary. IE If using all, EST will be mapped to Eastern Standard Time in
+North America.
+
+CAVEAT 2: Many of the acronyms don't map to a neat Oslon timezones. For example,
+Eastern European Summer Time (EEDT) is used by many different countries in
+Europe *at different times*! If the acronym does not map neatly to one zone it
+is mapped to the Etc/GMT+-XX Oslon zone. This means that any date manipulations
+can end up with idiot things like summer time in the middle of winter.
+
+CAVEAT 3: The Summer/Standard time difference is really important! For an hour
+each year it is needed to determine which time you are actually talking about.
+    2002-10-27 01:20:00 EST != 2002-10-27 01:20:00 EDT
+"""
+
+import datetime
+import pytz
+import pytz.tzfile
+
+
+class tzabbr(datetime.tzinfo):
+  """A timezone abbreviation.
+
+  *WARNING*: This is not a tzinfo implementation! Trying to use this as tzinfo
+  object will result in failure.  We inherit from datetime.tzinfo so we can get
+  through the dateutil checks.
+  """
+  pass
+
+
+# A "marker" tzinfo object which is used to signify an unknown timezone.
+unknown = datetime.tzinfo(0)
+
+
+regions = {'all': {}, 'military': {}}
+# Create a special alias for the all and military regions
+all = regions['all']
+military = regions['military']
+
+
+def tzabbr_register(abbr, name, region, zone, dst):
+  """Register a new timezone abbreviation in the global registry.
+
+  If another abbreviation with the same name has already been registered it new
+  abbreviation will only be registered in region specific dictionary.
+  """
+  newabbr = tzabbr()
+  newabbr.abbr = abbr
+  newabbr.name = name
+  newabbr.region = region
+  newabbr.zone = zone
+  newabbr.dst = dst
+
+  if abbr not in all:
+    all[abbr] = newabbr
+
+  if not region in regions:
+    regions[region] = {}
+
+  assert abbr not in regions[region]
+  regions[region][abbr] = newabbr
+
+
+def tzinfos_create(use_region):
+  abbrs = regions[use_region]
+
+  def tzinfos(abbr, offset):
+    if abbr:
+      if abbr in abbrs:
+        result = abbrs[abbr]
+        if offset:
+          # FIXME: Check the offset matches the abbreviation we just selected.
+          pass
+        return result
+      else:
+        raise ValueError, "Unknown timezone found %s" % abbr
+    if offset == 0:
+      return pytz.utc
+    if offset:
+      return pytz.FixedOffset(offset/60)
+    return unknown
+
+  return tzinfos
+
+
+# Create a special alias for the all tzinfos
+tzinfos = tzinfos_create('all')
+
+
+# Create the abbreviations.
+# *WARNING*: Order matters!
+tzabbr_register("A", u"Alpha Time Zone", u"Military", "Etc/GMT-1", False)
+tzabbr_register("ACDT", u"Australian Central Daylight Time", u"Australia",
+                "Australia/Adelaide", True)
+tzabbr_register("ACST", u"Australian Central Standard Time", u"Australia",
+                "Australia/Adelaide", False)
+tzabbr_register("ADT", u"Atlantic Daylight Time", u"North America",
+                "America/Halifax", True)
+tzabbr_register("AEDT", u"Australian Eastern Daylight Time", u"Australia",
+                "Australia/Sydney", True)
+tzabbr_register("AEST", u"Australian Eastern Standard Time", u"Australia",
+                "Australia/Sydney", False)
+tzabbr_register("AKDT", u"Alaska Daylight Time", u"North America",
+                "US/Alaska", True)
+tzabbr_register("AKST", u"Alaska Standard Time", u"North America",
+                "US/Alaska", False)
+tzabbr_register("AST", u"Atlantic Standard Time", u"North America",
+                "America/Halifax", False)
+tzabbr_register("AWDT", u"Australian Western Daylight Time", u"Australia",
+                "Australia/West", True)
+tzabbr_register("AWST", u"Australian Western Standard Time", u"Australia",
+                "Australia/West", False)
+tzabbr_register("B", u"Bravo Time Zone", u"Military", "Etc/GMT-2", False)
+tzabbr_register("BST", u"British Summer Time", u"Europe", "Europe/London", True)
+tzabbr_register("C", u"Charlie Time Zone", u"Military", "Etc/GMT-2", False)
+tzabbr_register("CDT", u"Central Daylight Time", u"North America",
+                "US/Central", True)
+tzabbr_register("CEDT", u"Central European Daylight Time", u"Europe",
+                "Etc/GMT+2", True)
+tzabbr_register("CEST", u"Central European Summer Time", u"Europe",
+                "Etc/GMT+2", True)
+tzabbr_register("CET", u"Central European Time", u"Europe", "Etc/GMT+1", False)
+tzabbr_register("CST", u"Central Standard Time", u"North America",
+                "US/Central", False)
+tzabbr_register("CXT", u"Christmas Island Time", u"Australia",
+                "Indian/Christmas", False)
+tzabbr_register("D", u"Delta Time Zone", u"Military", "Etc/GMT-2", False)
+tzabbr_register("E", u"Echo Time Zone", u"Military", "Etc/GMT-2", False)
+tzabbr_register("EDT", u"Eastern Daylight Time", u"North America",
+                "US/Eastern", True)
+tzabbr_register("EEDT", u"Eastern European Daylight Time", u"Europe",
+                "Etc/GMT+3", True)
+tzabbr_register("EEST", u"Eastern European Summer Time", u"Europe",
+                "Etc/GMT+3", True)
+tzabbr_register("EET", u"Eastern European Time", u"Europe", "Etc/GMT+2", False)
+tzabbr_register("EST", u"Eastern Standard Time", u"North America",
+                "US/Eastern", False)
+tzabbr_register("F", u"Foxtrot Time Zone", u"Military", "Etc/GMT-6", False)
+tzabbr_register("G", u"Golf Time Zone", u"Military", "Etc/GMT-7", False)
+tzabbr_register("GMT", u"Greenwich Mean Time", u"Europe", pytz.utc, False)
+tzabbr_register("H", u"Hotel Time Zone", u"Military", "Etc/GMT-8", False)
+#tzabbr_register("HAA", u"Heure Avancée de l'Atlantique", u"North America", u"UTC - 3 hours")
+#tzabbr_register("HAC", u"Heure Avancée du Centre", u"North America", u"UTC - 5 hours")
+tzabbr_register("HADT", u"Hawaii-Aleutian Daylight Time", u"North America",
+                "Pacific/Honolulu", True)
+#tzabbr_register("HAE", u"Heure Avancée de l'Est", u"North America", u"UTC - 4 hours")
+#tzabbr_register("HAP", u"Heure Avancée du Pacifique", u"North America", u"UTC - 7 hours")
+#tzabbr_register("HAR", u"Heure Avancée des Rocheuses", u"North America", u"UTC - 6 hours")
+tzabbr_register("HAST", u"Hawaii-Aleutian Standard Time", u"North America",
+                "Pacific/Honolulu", False)
+#tzabbr_register("HAT", u"Heure Avancée de Terre-Neuve", u"North America", u"UTC - 2:30 hours")
+#tzabbr_register("HAY", u"Heure Avancée du Yukon", u"North America", u"UTC - 8 hours")
+tzabbr_register("HDT", u"Hawaii Daylight Time", u"North America",
+                "Pacific/Honolulu", True)
+#tzabbr_register("HNA", u"Heure Normale de l'Atlantique", u"North America", u"UTC - 4 hours")
+#tzabbr_register("HNC", u"Heure Normale du Centre", u"North America", u"UTC - 6 hours")
+#tzabbr_register("HNE", u"Heure Normale de l'Est", u"North America", u"UTC - 5 hours")
+#tzabbr_register("HNP", u"Heure Normale du Pacifique", u"North America", u"UTC - 8 hours")
+#tzabbr_register("HNR", u"Heure Normale des Rocheuses", u"North America", u"UTC - 7 hours")
+#tzabbr_register("HNT", u"Heure Normale de Terre-Neuve", u"North America", u"UTC - 3:30 hours")
+#tzabbr_register("HNY", u"Heure Normale du Yukon", u"North America", u"UTC - 9 hours")
+tzabbr_register("HST", u"Hawaii Standard Time", u"North America",
+                "Pacific/Honolulu", False)
+tzabbr_register("I", u"India Time Zone", u"Military", "Etc/GMT-9", False)
+tzabbr_register("IST", u"Irish Summer Time", u"Europe", "Europe/Dublin", True)
+tzabbr_register("K", u"Kilo Time Zone", u"Military", "Etc/GMT-10", False)
+tzabbr_register("L", u"Lima Time Zone", u"Military", "Etc/GMT-11", False)
+tzabbr_register("M", u"Mike Time Zone", u"Military", "Etc/GMT-12", False)
+tzabbr_register("MDT", u"Mountain Daylight Time", u"North America",
+                "US/Mountain", True)
+#tzabbr_register("MESZ", u"Mitteleuroäische Sommerzeit", u"Europe", u"UTC + 2 hours")
+#tzabbr_register("MEZ", u"Mitteleuropäische Zeit", u"Europe", u"UTC + 1 hour")
+tzabbr_register("MSD", u"Moscow Daylight Time", u"Europe",
+                "Europe/Moscow", True)
+tzabbr_register("MSK", u"Moscow Standard Time", u"Europe",
+                "Europe/Moscow", False)
+tzabbr_register("MST", u"Mountain Standard Time", u"North America",
+                "US/Mountain", False)
+tzabbr_register("N", u"November Time Zone", u"Military", "Etc/GMT+1", False)
+tzabbr_register("NDT", u"Newfoundland Daylight Time", u"North America",
+                "America/St_Johns", True)
+tzabbr_register("NFT", u"Norfolk (Island) Time", u"Australia",
+                "Pacific/Norfolk", False)
+tzabbr_register("NST", u"Newfoundland Standard Time", u"North America",
+                "America/St_Johns", False)
+tzabbr_register("O", u"Oscar Time Zone", u"Military", "Etc/GMT+2", False)
+tzabbr_register("P", u"Papa Time Zone", u"Military", "Etc/GMT+3", False)
+tzabbr_register("PDT", u"Pacific Daylight Time", u"North America",
+                "US/Pacific", True)
+tzabbr_register("PST", u"Pacific Standard Time", u"North America",
+                "US/Pacific", False)
+tzabbr_register("Q", u"Quebec Time Zone", u"Military", "Etc/GMT+4", False)
+tzabbr_register("R", u"Romeo Time Zone", u"Military", "Etc/GMT+5", False)
+tzabbr_register("S", u"Sierra Time Zone", u"Military", "Etc/GMT+6", False)
+tzabbr_register("T", u"Tango Time Zone", u"Military", "Etc/GMT+7", False)
+tzabbr_register("U", u"Uniform Time Zone", u"Military", "Etc/GMT+8", False)
+tzabbr_register("UTC", u"Coordinated Universal Time", u"Europe",
+                pytz.utc, False)
+tzabbr_register("V", u"Victor Time Zone", u"Military", "Etc/GMT+9", False)
+tzabbr_register("W", u"Whiskey Time Zone", u"Military", "Etc/GMT+10", False)
+tzabbr_register("WDT", u"Western Daylight Time", u"Australia",
+                "Australia/West", True)
+tzabbr_register("WEDT", u"Western European Daylight Time", u"Europe",
+                "Etc/GMT+1", True)
+tzabbr_register("WEST", u"Western European Summer Time", u"Europe",
+                "Etc/GMT+1", True)
+tzabbr_register("WET", u"Western European Time", u"Europe", pytz.utc, False)
+tzabbr_register("WST", u"Western Standard Time", u"Australia",
+                "Australia/West", False)
+tzabbr_register("X", u"X-ray Time Zone", u"Military", "Etc/GMT+11", False)
+tzabbr_register("Y", u"Yankee Time Zone", u"Military", "Etc/GMT+12", False)
+tzabbr_register("Z", u"Zulu Time Zone", u"Military", pytz.utc, False)
--- a/nilmdb/utils/diskusage.py
+++ b/nilmdb/utils/diskusage.py
@@ -0,0 +1,33 @@
+import os
+import errno
+from math import log
+
+def human_size(num):
+    """Human friendly file size"""
+    unit_list = zip(['bytes', 'kiB', 'MiB', 'GiB', 'TiB'], [0, 0, 1, 2, 2])
+    if num > 1:
+        exponent = min(int(log(num, 1024)), len(unit_list) - 1)
+        quotient = float(num) / 1024**exponent
+        unit, num_decimals = unit_list[exponent]
+        format_string = '{:.%sf} {}' % (num_decimals)
+        return format_string.format(quotient, unit)
+    if num == 0: # pragma: no cover
+        return '0 bytes'
+    if num == 1: # pragma: no cover
+        return '1 byte'
+
+def du(path):
+    """Like du -sb, returns total size of path in bytes.  Ignore
+    errors that might occur if we encounter broken symlinks or
+    files in the process of being removed."""
+    try:
+        size = os.path.getsize(path)
+        if os.path.isdir(path):
+            for thisfile in os.listdir(path):
+                filepath = os.path.join(path, thisfile)
+                size += du(filepath)
+        return size
+    except OSError as e: # pragma: no cover
+        if e.errno != errno.ENOENT:
+            raise
+        return 0
--- a/nilmdb/utils/fallocate.py
+++ b/nilmdb/utils/fallocate.py
@@ -0,0 +1,49 @@
+# Implementation of hole punching via fallocate, if the OS
+# and filesystem support it.
+
+try:
+    import os
+    import ctypes
+    import ctypes.util
+
+    def make_fallocate():
+        libc_name = ctypes.util.find_library('c')
+        libc = ctypes.CDLL(libc_name, use_errno=True)
+
+        _fallocate = libc.fallocate
+        _fallocate.restype = ctypes.c_int
+        _fallocate.argtypes = [ ctypes.c_int, ctypes.c_int,
+                                ctypes.c_int64, ctypes.c_int64 ]
+
+        del libc
+        del libc_name
+
+        def fallocate(fd, mode, offset, len_):
+            res = _fallocate(fd, mode, offset, len_)
+            if res != 0: # pragma: no cover
+                errno = ctypes.get_errno()
+                raise IOError(errno, os.strerror(errno))
+        return fallocate
+
+    fallocate = make_fallocate()
+    del make_fallocate
+except Exception: # pragma: no cover
+    fallocate = None
+
+FALLOC_FL_KEEP_SIZE = 0x01
+FALLOC_FL_PUNCH_HOLE = 0x02
+
+def punch_hole(filename, offset, length, ignore_errors = True):
+    """Punch a hole in the file.  This isn't well supported, so errors
+    are ignored by default."""
+    try:
+        if fallocate is None: # pragma: no cover
+            raise IOError("fallocate not available")
+        with open(filename, "r+") as f:
+            fallocate(f.fileno(),
+                      FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+                      offset, length)
+    except IOError: # pragma: no cover
+        if ignore_errors:
+            return
+        raise
--- a/nilmdb/utils/interval.py
+++ b/nilmdb/utils/interval.py
@@ -0,0 +1,128 @@
+"""Interval.  Like nilmdb.server.interval, but re-implemented here
+in plain Python so clients have easier access to it, and with a few
+helper functions.
+
+Intervals are half-open, ie. they include data points with timestamps
+[start, end)
+"""
+
+import nilmdb.utils.time
+import nilmdb.utils.iterator
+
+class IntervalError(Exception):
+    """Error due to interval overlap, etc"""
+    pass
+
+# Interval
+class Interval:
+    """Represents an interval of time."""
+
+    def __init__(self, start, end):
+        """
+        'start' and 'end' are arbitrary numbers that represent time
+        """
+        if start >= end:
+            # Explicitly disallow zero-width intervals (since they're half-open)
+            raise IntervalError("start %s must precede end %s" % (start, end))
+        self.start = start
+        self.end = end
+
+    def __repr__(self):
+        s = repr(self.start) + ", " + repr(self.end)
+        return self.__class__.__name__ + "(" + s + ")"
+
+    def __str__(self):
+        return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
+                " -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
+
+    def human_string(self):
+        return ("[ " + nilmdb.utils.time.timestamp_to_human(self.start) +
+                " -> " + nilmdb.utils.time.timestamp_to_human(self.end) + " ]")
+
+    def __cmp__(self, other):
+        """Compare two intervals.  If non-equal, order by start then end"""
+        return cmp(self.start, other.start) or cmp(self.end, other.end)
+
+    def intersects(self, other):
+        """Return True if two Interval objects intersect"""
+        if not isinstance(other, Interval):
+            raise TypeError("need an Interval")
+        if self.end <= other.start or self.start >= other.end:
+            return False
+        return True
+
+    def subset(self, start, end):
+        """Return a new Interval that is a subset of this one"""
+        # A subclass that tracks additional data might override this.
+        if start < self.start or end > self.end:
+            raise IntervalError("not a subset")
+        return Interval(start, end)
+
+def set_difference(a, b):
+    """
+    Compute the difference (a \\ b) between the intervals in 'a' and
+    the intervals in 'b'; i.e., the ranges that are present in 'self'
+    but not 'other'.
+
+    'a' and 'b' must both be iterables.
+
+    Returns a generator that yields each interval in turn.
+    Output intervals are built as subsets of the intervals in the
+    first argument (a).
+    """
+    # Iterate through all starts and ends in sorted order.  Add a
+    # tag to the iterator so that we can figure out which one they
+    # were, after sorting.
+    def decorate(it, key_start, key_end):
+        for i in it:
+            yield i.start, key_start, i
+            yield i.end, key_end, i
+    a_iter = decorate(iter(a), 0, 2)
+    b_iter = decorate(iter(b), 1, 3)
+
+    # Now iterate over the timestamps of each start and end.
+    # At each point, evaluate which type of end it is, to determine
+    # how to build up the output intervals.
+    a_interval = None
+    b_interval = None
+    out_start = None
+    for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
+        if k == 0:
+            # start a interval
+            a_interval = i
+            if b_interval is None:
+                out_start = ts
+        elif k == 1:
+            # start b interval
+            b_interval = i
+            if out_start is not None and out_start != ts:
+                yield a_interval.subset(out_start, ts)
+            out_start = None
+        elif k == 2:
+            # end a interval
+            if out_start is not None and out_start != ts:
+                yield a_interval.subset(out_start, ts)
+            out_start = None
+            a_interval = None
+        elif k == 3:
+            # end b interval
+            b_interval = None
+            if a_interval:
+                out_start = ts
+
+def optimize(it):
+    """
+    Given an iterable 'it' with intervals, optimize them by joining
+    together intervals that are adjacent in time, and return a generator
+    that yields the new intervals.
+    """
+    saved_int = None
+    for interval in it:
+        if saved_int is not None:
+            if saved_int.end == interval.start:
+                interval.start = saved_int.start
+            else:
+                yield saved_int
+        saved_int = interval
+    if saved_int is not None:
+        yield saved_int
--- a/nilmdb/utils/iterator.py
+++ b/nilmdb/utils/iterator.py
@@ -0,0 +1,36 @@
+# Misc iterator tools
+
+# Iterator merging, based on http://code.activestate.com/recipes/491285/
+import heapq
+def imerge(*iterables):
+    '''Merge multiple sorted inputs into a single sorted output.
+
+    Equivalent to:  sorted(itertools.chain(*iterables))
+
+    >>> list(imerge([1,3,5,7], [0,2,4,8], [5,10,15,20], [], [25]))
+    [0, 1, 2, 3, 4, 5, 5, 7, 8, 10, 15, 20, 25]
+
+    '''
+    heappop, siftup, _Stop = heapq.heappop, heapq._siftup, StopIteration
+
+    h = []
+    h_append = h.append
+    for it in map(iter, iterables):
+        try:
+            next = it.next
+            h_append([next(), next])
+        except _Stop:
+            pass
+    heapq.heapify(h)
+
+    while 1:
+        try:
+            while 1:
+                v, next = s = h[0]      # raises IndexError when h is empty
+                yield v
+                s[0] = next()           # raises StopIteration when exhausted
+                siftup(h, 0)            # restore heap condition
+        except _Stop:
+            heappop(h)                  # remove empty iterator
+        except IndexError:
+            return
--- a/nilmdb/utils/lock.py
+++ b/nilmdb/utils/lock.py
@@ -0,0 +1,33 @@
+# File locking
+
+import warnings
+
+try:
+    import fcntl
+    import errno
+
+    def exclusive_lock(f):
+        """Acquire an exclusive lock.  Returns True on successful
+        lock, or False on error."""
+        try:
+            fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+        except IOError as e:
+            if e.errno in (errno.EACCES, errno.EAGAIN):
+                return False
+            else: # pragma: no cover
+                raise
+        return True
+
+    def exclusive_unlock(f):
+        """Release an exclusive lock."""
+        fcntl.flock(f.fileno(), fcntl.LOCK_UN)
+
+except ImportError: # pragma: no cover
+    def exclusive_lock(f):
+        """Dummy lock function -- does not lock!"""
+        warnings.warn("Pretending to lock " + str(f))
+        return True
+
+    def exclusive_unlock(f):
+        """Release an exclusive lock."""
+        return
--- a/nilmdb/utils/lrucache.py
+++ b/nilmdb/utils/lrucache.py
@@ -0,0 +1,76 @@
+# Memoize a function's return value with a least-recently-used cache
+# Based on:
+#   http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/
+# with added 'destructor' functionality.
+
+import collections
+import decorator
+
+def lru_cache(size = 10, onremove = None, keys = slice(None)):
+    """Least-recently-used cache decorator.
+
+    @lru_cache(size = 10, onevict = None)
+    def f(...):
+        pass
+
+    Given a function and arguments, memoize its return value.  Up to
+    'size' elements are cached.  'keys' is a slice object that
+    represents which arguments are used as the cache key.
+
+    When evicting a value from the cache, call the function
+    'onremove' with the value that's being evicted.
+
+    Call f.cache_remove(...) to evict the cache entry with the given
+    arguments.  Call f.cache_remove_all() to evict all entries.
+    f.cache_hits and f.cache_misses give statistics.
+    """
+
+    def decorate(func):
+        cache = collections.OrderedDict()	# order: least- to most-recent
+
+        def evict(value):
+            if onremove:
+                onremove(value)
+
+        def wrapper(orig, *args, **kwargs):
+            if kwargs:
+                raise NotImplementedError("kwargs not supported")
+            key = args[keys]
+            try:
+                value = cache.pop(key)
+                orig.cache_hits += 1
+            except KeyError:
+                value = orig(*args)
+                orig.cache_misses += 1
+                if len(cache) >= size:
+                    evict(cache.popitem(0)[1])	# evict LRU cache entry
+            cache[key] = value              	# (re-)insert this key at end
+            return value
+
+        def cache_remove(*args):
+            """Remove the described key from this cache, if present."""
+            key = args
+            if key in cache:
+                evict(cache.pop(key))
+            else:
+                if len(cache) > 0 and len(args) != len(cache.iterkeys().next()):
+                    raise KeyError("trying to remove from LRU cache, but "
+                                   "number of arguments doesn't match the "
+                                   "cache key length")
+
+        def cache_remove_all():
+            for key in cache:
+                evict(cache.pop(key))
+
+        def cache_info():
+            return (func.cache_hits, func.cache_misses)
+
+        new = decorator.decorator(wrapper, func)
+        func.cache_hits = 0
+        func.cache_misses = 0
+        new.cache_info = cache_info
+        new.cache_remove = cache_remove
+        new.cache_remove_all = cache_remove_all
+        return new
+
+    return decorate
--- a/nilmdb/utils/mustclose.py
+++ b/nilmdb/utils/mustclose.py
@@ -0,0 +1,61 @@
+from nilmdb.utils.printf import *
+import sys
+import inspect
+import decorator
+
+def must_close(errorfile = sys.stderr, wrap_verify = False):
+    """Class decorator that warns on 'errorfile' at deletion time if
+    the class's close() member wasn't called.
+
+    If 'wrap_verify' is True, every class method is wrapped with a
+    verifier that will raise AssertionError if the .close() method has
+    already been called."""
+    def class_decorator(cls):
+
+        def wrap_class_method(wrapper):
+            try:
+                orig = getattr(cls, wrapper.__name__).im_func
+            except Exception:
+                orig = lambda x: None
+            setattr(cls, wrapper.__name__, decorator.decorator(wrapper, orig))
+
+        @wrap_class_method
+        def __init__(orig, self, *args, **kwargs):
+            ret = orig(self, *args, **kwargs)
+            self.__dict__["_must_close"] = True
+            self.__dict__["_must_close_initialized"] = True
+            return ret
+
+        @wrap_class_method
+        def __del__(orig, self, *args, **kwargs):
+            if "_must_close" in self.__dict__:
+                fprintf(errorfile, "error: %s.close() wasn't called!\n",
+                        self.__class__.__name__)
+            return orig(self, *args, **kwargs)
+
+        @wrap_class_method
+        def close(orig, self, *args, **kwargs):
+            if "_must_close" in self.__dict__:
+                del self._must_close
+            return orig(self, *args, **kwargs)
+
+        # Optionally wrap all other functions
+        def verifier(orig, self, *args, **kwargs):
+            if ("_must_close" not in self.__dict__ and
+                "_must_close_initialized" in self.__dict__):
+                raise AssertionError("called " + str(orig) + " after close")
+            return orig(self, *args, **kwargs)
+        if wrap_verify:
+            for (name, method) in inspect.getmembers(cls, inspect.ismethod):
+                # Skip class methods
+                if method.__self__ is not None:
+                    continue
+                # Skip some methods
+                if name in [ "__del__", "__init__" ]:
+                    continue
+                # Set up wrapper
+                setattr(cls, name, decorator.decorator(verifier,
+                                                       method.im_func))
+
+        return cls
+    return class_decorator
--- a/nilmdb/utils/printf.py
+++ b/nilmdb/utils/printf.py
@@ -0,0 +1,9 @@
+"""printf, fprintf, sprintf"""
+
+from __future__ import print_function
+def printf(_str, *args):
+    print(_str % args, end='')
+def fprintf(_file, _str, *args):
+    print(_str % args, end='', file=_file)
+def sprintf(_str, *args):
+    return (_str % args)
--- a/nilmdb/utils/serializer.py
+++ b/nilmdb/utils/serializer.py
@@ -0,0 +1,109 @@
+import Queue
+import threading
+import sys
+import decorator
+import inspect
+import types
+import functools
+
+# This file provides a class that will wrap an object and serialize
+# all calls to its methods.  All calls to that object will be queued
+# and executed from a single thread, regardless of which thread makes
+# the call.
+
+# Based partially on http://stackoverflow.com/questions/2642515/
+
+class SerializerThread(threading.Thread):
+    """Thread that retrieves call information from the queue, makes the
+    call, and returns the results."""
+    def __init__(self, classname, call_queue):
+        threading.Thread.__init__(self)
+        self.name = "Serializer-" + classname + "-" + self.name
+        self.call_queue = call_queue
+
+    def run(self):
+        while True:
+            result_queue, func, args, kwargs = self.call_queue.get()
+            # Terminate if result_queue is None
+            if result_queue is None:
+                return
+            exception = None
+            result = None
+            try:
+                result = func(*args, **kwargs) # wrapped
+            except:
+                exception = sys.exc_info()
+            # Ensure we delete these before returning a result, so
+            # we don't unncessarily hold onto a reference while
+            # we're waiting for the next call.
+            del func, args, kwargs
+            result_queue.put((exception, result))
+            del exception, result
+
+def serializer_proxy(obj_or_type):
+    """Wrap the given object or type in a SerializerObjectProxy.
+
+    Returns a SerializerObjectProxy object that proxies all method
+    calls to the object, as well as attribute retrievals.
+
+    The proxied requests, including instantiation, are performed in a
+    single thread and serialized between caller threads.
+    """
+    class SerializerCallProxy(object):
+        def __init__(self, call_queue, func, objectproxy):
+            self.call_queue = call_queue
+            self.func = func
+            # Need to hold a reference to object proxy so it doesn't
+            # go away (and kill the thread) until after get called.
+            self.objectproxy = objectproxy
+        def __call__(self, *args, **kwargs):
+            result_queue = Queue.Queue()
+            self.call_queue.put((result_queue, self.func, args, kwargs))
+            ( exc_info, result ) = result_queue.get()
+            if exc_info is None:
+                return result
+            else:
+                raise exc_info[0], exc_info[1], exc_info[2]
+
+    class SerializerObjectProxy(object):
+        def __init__(self, obj_or_type, *args, **kwargs):
+            self.__object = obj_or_type
+            try:
+                if type(obj_or_type) in (types.TypeType, types.ClassType):
+                    classname = obj_or_type.__name__
+                else:
+                    classname = obj_or_type.__class__.__name__
+            except AttributeError: # pragma: no cover
+                classname = "???"
+            self.__call_queue = Queue.Queue()
+            self.__thread = SerializerThread(classname, self.__call_queue)
+            self.__thread.daemon = True
+            self.__thread.start()
+            self._thread_safe = True
+
+        def __getattr__(self, key):
+            if key.startswith("_SerializerObjectProxy__"): # pragma: no cover
+                raise AttributeError
+            attr = getattr(self.__object, key)
+            if not callable(attr):
+                getter = SerializerCallProxy(self.__call_queue, getattr, self)
+                return getter(self.__object, key)
+            r = SerializerCallProxy(self.__call_queue, attr, self)
+            return r
+
+        def __call__(self, *args, **kwargs):
+            """Call this to instantiate the type, if a type was passed
+            to serializer_proxy.  Otherwise, pass the call through."""
+            ret = SerializerCallProxy(self.__call_queue,
+                                      self.__object, self)(*args, **kwargs)
+            if type(self.__object) in (types.TypeType, types.ClassType):
+                # Instantiation
+                self.__object = ret
+                return self
+            return ret
+
+        def __del__(self):
+            self.__call_queue.put((None, None, None, None))
+            self.__thread.join()
+
+    return SerializerObjectProxy(obj_or_type)
--- a/nilmdb/utils/sort.py
+++ b/nilmdb/utils/sort.py
@@ -0,0 +1,18 @@
+import re
+
+def sort_human(items, key = None):
+    """Human-friendly sort (/stream/2 before /stream/10)"""
+    def to_num(val):
+        try:
+            return int(val)
+        except Exception:
+            return val
+
+    def human_key(text):
+        if key:
+            text = key(text)
+        # Break into character and numeric chunks.
+        chunks = re.split(r'([0-9]+)', text)
+        return [ to_num(c) for c in chunks ]
+
+    return sorted(items, key = human_key)
--- a/nilmdb/utils/threadsafety.py
+++ b/nilmdb/utils/threadsafety.py
@@ -0,0 +1,109 @@
+from nilmdb.utils.printf import *
+import threading
+import warnings
+import types
+
+def verify_proxy(obj_or_type, exception = False, check_thread = True,
+                 check_concurrent = True):
+    """Wrap the given object or type in a VerifyObjectProxy.
+
+    Returns a VerifyObjectProxy that proxies all method calls to the
+    given object, as well as attribute retrievals.
+
+    When calling methods, the following checks are performed.  If
+    exception is True, an exception is raised.  Otherwise, a warning
+    is printed.
+
+    check_thread = True     # Warn/fail if two different threads call methods.
+    check_concurrent = True # Warn/fail if two functions are concurrently
+                            # run through this proxy
+    """
+    class Namespace(object):
+        pass
+    class VerifyCallProxy(object):
+        def __init__(self, func, parent_namespace):
+            self.func = func
+            self.parent_namespace = parent_namespace
+
+        def __call__(self, *args, **kwargs):
+            p = self.parent_namespace
+            this = threading.current_thread()
+            try:
+                callee = self.func.__name__
+            except AttributeError:
+                callee = "???"
+
+            if p.thread is None:
+                p.thread = this
+                p.thread_callee = callee
+
+            if check_thread and p.thread != this:
+                err = sprintf("unsafe threading: %s called %s.%s,"
+                              " but %s called %s.%s",
+                              p.thread.name, p.classname, p.thread_callee,
+                              this.name, p.classname, callee)
+                if exception:
+                    raise AssertionError(err)
+                else: # pragma: no cover
+                    warnings.warn(err)
+
+            need_concur_unlock = False
+            if check_concurrent:
+                if p.concur_lock.acquire(False) == False:
+                    err = sprintf("unsafe concurrency: %s called %s.%s "
+                                  "while %s is still in %s.%s",
+                                  this.name, p.classname, callee,
+                                  p.concur_tname, p.classname, p.concur_callee)
+                    if exception:
+                        raise AssertionError(err)
+                    else: # pragma: no cover
+                        warnings.warn(err)
+                else:
+                    p.concur_tname = this.name
+                    p.concur_callee = callee
+                    need_concur_unlock = True
+
+            try:
+                ret = self.func(*args, **kwargs)
+            finally:
+                if need_concur_unlock:
+                    p.concur_lock.release()
+            return ret
+
+    class VerifyObjectProxy(object):
+        def __init__(self, obj_or_type, *args, **kwargs):
+            p = Namespace()
+            self.__ns = p
+            p.thread = None
+            p.thread_callee = None
+            p.concur_lock = threading.Lock()
+            p.concur_tname = None
+            p.concur_callee = None
+            self.__obj = obj_or_type
+            try:
+                if type(obj_or_type) in (types.TypeType, types.ClassType):
+                    p.classname = self.__obj.__name__
+                else:
+                    p.classname = self.__obj.__class__.__name__
+            except AttributeError: # pragma: no cover
+                p.classname = "???"
+
+        def __getattr__(self, key):
+            if key.startswith("_VerifyObjectProxy__"): # pragma: no cover
+                raise AttributeError
+            attr = getattr(self.__obj, key)
+            if not callable(attr):
+                return VerifyCallProxy(getattr, self.__ns)(self.__obj, key)
+            return VerifyCallProxy(attr, self.__ns)
+
+        def __call__(self, *args, **kwargs):
+            """Call this to instantiate the type, if a type was passed
+            to verify_proxy.  Otherwise, pass the call through."""
+            ret = VerifyCallProxy(self.__obj, self.__ns)(*args, **kwargs)
+            if type(self.__obj) in (types.TypeType, types.ClassType):
+                # Instantiation
+                self.__obj = ret
+                return self
+            return ret
+
+    return VerifyObjectProxy(obj_or_type)
--- a/nilmdb/utils/time.py
+++ b/nilmdb/utils/time.py
@@ -0,0 +1,134 @@
+from __future__ import absolute_import
+
+from nilmdb.utils import datetime_tz
+import re
+import time
+
+# Range
+min_timestamp = (-2**63)
+max_timestamp = (2**63 - 1)
+
+# Smallest representable step
+epsilon = 1
+
+def string_to_timestamp(str):
+    """Convert a string that represents an integer number of microseconds
+    since epoch."""
+    try:
+        # Parse a string like "1234567890123456" and return an integer
+        return int(str)
+    except ValueError:
+        # Try parsing as a float, in case it's "1234567890123456.0"
+        return int(round(float(str)))
+
+def timestamp_to_string(timestamp):
+    """Convert a timestamp (integer microseconds since epoch) to a string"""
+    if isinstance(timestamp, float):
+        return str(int(round(timestamp)))
+    else:
+        return str(timestamp)
+
+def timestamp_to_human(timestamp):
+    """Convert a timestamp (integer microseconds since epoch) to a
+    human-readable string, using the local timezone for display
+    (e.g. from the TZ env var)."""
+    if timestamp == min_timestamp:
+        return "(minimum)"
+    if timestamp == max_timestamp:
+        return "(maximum)"
+    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_unix(timestamp))
+    return dt.strftime("%a, %d %b %Y %H:%M:%S.%f %z")
+
+def unix_to_timestamp(unix):
+    """Convert a Unix timestamp (floating point seconds since epoch)
+    into a NILM timestamp (integer microseconds since epoch)"""
+    return int(round(unix * 1e6))
+seconds_to_timestamp = unix_to_timestamp
+
+def timestamp_to_unix(timestamp):
+    """Convert a NILM timestamp (integer microseconds since epoch)
+    into a Unix timestamp (floating point seconds since epoch)"""
+    return timestamp / 1e6
+timestamp_to_seconds = timestamp_to_unix
+
+def rate_to_period(hz, cycles = 1):
+    """Convert a rate (in Hz) to a period (in timestamp units).
+    Returns an integer."""
+    period = unix_to_timestamp(cycles) / float(hz)
+    return int(round(period))
+
+def parse_time(toparse):
+    """
+    Parse a free-form time string and return a nilmdb timestamp
+    (integer seconds since epoch).  If the string doesn't contain a
+    timestamp, the current local timezone is assumed (e.g. from the TZ
+    env var).
+    """
+    if toparse == "min":
+        return min_timestamp
+    if toparse == "max":
+        return max_timestamp
+
+    # If it starts with @, treat it as a NILM timestamp
+    # (integer microseconds since epoch)
+    try:
+        if toparse[0] == '@':
+            return int(toparse[1:])
+    except (ValueError, KeyError, IndexError):
+        pass
+
+    # If string isn't "now" and doesn't contain at least 4 digits,
+    # consider it invalid.  smartparse might otherwise accept
+    # empty strings and strings with just separators.
+    if toparse != "now" and len(re.findall(r"\d", toparse)) < 4:
+        raise ValueError("not enough digits for a timestamp")
+
+    # Try to just parse the time as given
+    try:
+        return unix_to_timestamp(datetime_tz.datetime_tz.
+                                 smartparse(toparse).totimestamp())
+    except (ValueError, OverflowError):
+        pass
+
+    # If it's parseable as a float, treat it as a Unix or NILM
+    # timestamp based on its range.
+    try:
+        val = float(toparse)
+        # range is from about year 2001 - 2128
+        if val > 1e9 and val < 5e9:
+            return unix_to_timestamp(val)
+        if val > 1e15 and val < 5e15:
+            return val
+    except ValueError:
+        pass
+
+    # Try to extract a substring in a condensed format that we expect
+    # to see in a filename or header comment
+    res = re.search(r"(^|[^\d])("            # non-numeric or SOL
+                    r"(199\d|2\d\d\d)"       # year
+                    r"[-/]?"                 # separator
+                    r"(0[1-9]|1[012])"       # month
+                    r"[-/]?"                 # separator
+                    r"([012]\d|3[01])"       # day
+                    r"[-T ]?"                # separator
+                    r"([01]\d|2[0-3])"       # hour
+                    r"[:]?"                  # separator
+                    r"([0-5]\d)"             # minute
+                    r"[:]?"                  # separator
+                    r"([0-5]\d)?"            # second
+                    r"([-+]\d\d\d\d)?"       # timezone
+                    r")", toparse)
+    if res is not None:
+        try:
+            return unix_to_timestamp(datetime_tz.datetime_tz.
+                                     smartparse(res.group(2)).totimestamp())
+        except ValueError:
+            pass
+
+    # Could also try to successively parse substrings, but let's
+    # just give up for now.
+    raise ValueError("unable to parse timestamp")
+
+def now():
+    """Return current timestamp"""
+    return unix_to_timestamp(time.time())
--- a/nilmdb/utils/timer.py
+++ b/nilmdb/utils/timer.py
@@ -0,0 +1,23 @@
+# -*- coding: utf-8 -*-
+
+# Simple timer to time a block of code, for optimization debugging
+# use like:
+#   with nilmdb.utils.Timer("flush"):
+#       foo.flush()
+
+from __future__ import print_function
+from __future__ import absolute_import
+import contextlib
+import time
+
+@contextlib.contextmanager
+def Timer(name = None, tosyslog = False):
+    start = time.time()
+    yield
+    elapsed = int((time.time() - start) * 1000)
+    msg = (name or 'elapsed') + ": " + str(elapsed) + " ms"
+    if tosyslog: # pragma: no cover
+        import syslog
+        syslog.syslog(msg)
+    else:
+        print(msg)
--- a/nilmdb/utils/timestamper.py
+++ b/nilmdb/utils/timestamper.py
@@ -0,0 +1,93 @@
+"""File-like objects that add timestamps to the input lines"""
+
+from nilmdb.utils.printf import *
+import nilmdb.utils.time
+
+class Timestamper(object):
+    """A file-like object that adds timestamps to lines of an input file."""
+    def __init__(self, infile, ts_iter):
+        """file: filename, or another file-like object
+           ts_iter: iterator that returns a timestamp string for
+           each line of the file"""
+        if isinstance(infile, basestring):
+            self.file = open(infile, "r")
+        else:
+            self.file = infile
+        self.ts_iter = ts_iter
+
+    def close(self):
+        self.file.close()
+
+    def readline(self, *args):
+        while True:
+            line = self.file.readline(*args)
+            if not line:
+                return ""
+            if line[0] == '#':
+                continue
+            break
+        try:
+            return self.ts_iter.next() + line
+        except StopIteration:
+            return ""
+
+    def readlines(self, size = None):
+        out = ""
+        while True:
+            line = self.readline()
+            out += line
+            if not line or (size and len(out) >= size):
+                break
+        return out
+
+    def __iter__(self):
+        return self
+
+    def next(self):
+        result = self.readline()
+        if not result:
+            raise StopIteration
+        return result
+
+class TimestamperRate(Timestamper):
+    """Timestamper that uses a start time and a fixed rate"""
+    def __init__(self, infile, start, rate, end = None):
+        """
+        file: file name or object
+
+        start: Unix timestamp for the first value
+
+        rate: 1/rate is added to the timestamp for each line
+
+        end: If specified, raise StopIteration before outputting a value
+             greater than this."""
+        timestamp_to_string = nilmdb.utils.time.timestamp_to_string
+        rate_to_period = nilmdb.utils.time.rate_to_period
+        def iterator(start, rate, end):
+            n = 0
+            rate = float(rate)
+            while True:
+                now = start + rate_to_period(rate, n)
+                if end and now >= end:
+                    raise StopIteration
+                yield timestamp_to_string(now) + " "
+                n += 1
+        Timestamper.__init__(self, infile, iterator(start, rate, end))
+        self.start = start
+        self.rate = rate
+    def __str__(self):
+        return sprintf("TimestamperRate(..., start=\"%s\", rate=%g)",
+                       nilmdb.utils.time.timestamp_to_human(self.start),
+                       self.rate)
+
+class TimestamperNow(Timestamper):
+    """Timestamper that uses current time"""
+    def __init__(self, infile):
+        timestamp_to_string = nilmdb.utils.time.timestamp_to_string
+        get_now = nilmdb.utils.time.now
+        def iterator():
+            while True:
+                yield timestamp_to_string(get_now()) + " "
+        Timestamper.__init__(self, infile, iterator())
+    def __str__(self):
+        return "TimestamperNow(...)"
--- a/nilmdb/utils/unicode.py
+++ b/nilmdb/utils/unicode.py
@@ -0,0 +1,29 @@
+import sys
+
+if sys.version_info[0] >= 3: # pragma: no cover (future Python3 compat)
+    text_type = str
+else:
+    text_type = unicode
+
+def encode(u):
+    """Try to encode something from Unicode to a string using the
+    default encoding.  If it fails, try encoding as UTF-8."""
+    if not isinstance(u, text_type):
+        return u
+    try:
+        return u.encode()
+    except UnicodeEncodeError:
+        return u.encode("utf-8")
+
+def decode(s):
+    """Try to decode someting from string to Unicode using the
+    default encoding.  If it fails, try decoding as UTF-8."""
+    if isinstance(s, text_type):
+        return s
+    try:
+        return s.decode()
+    except UnicodeDecodeError:
+        try:
+            return s.decode("utf-8")
+        except UnicodeDecodeError:
+            return s # best we can do
--- a/pytables-test/Makefile
+++ b/pytables-test/Makefile
@@ -1,5 +0,0 @@
-all:
-	time python test-indexed-read.py
-
-clean:
-	rm -f *pyc
--- a/pytables-test/README.jim
+++ b/pytables-test/README.jim
@@ -1,2 +0,0 @@
-New version from:
-  http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=641485#15
--- a/pytables-test/TODO.txt
+++ b/pytables-test/TODO.txt
@@ -1,12 +0,0 @@
- Make CherryPy server that can handle simple GET/POST,
-  and a httplib client that can talk to that server.
-  Steps:
-  - Make server handle GET
-  - Make client send request, get response
-  - Add request streaming to server
-  - Add request streaming to client
-  - Make server handle POST
-  - Make client send request, get response
-  - Add request streaming to server
-  - Add request streaming to client
-  - Integrate into a server process that also keeps database open.
--- a/pytables-test/indexing-notes
+++ b/pytables-test/indexing-notes
@@ -1,3 +0,0 @@
-Indexing time64 doesn't seem to work -- needed to do "time >= 1243052015" even though the actual database times
-should be something like 1243052015.847000.  Let's switch to just using a 64-bit integer counting e.g.
-microseconds since 1970-01-01 
--- a/pytables-test/sample-query
+++ b/pytables-test/sample-query
@@ -1,3 +0,0 @@
-timestamp > 1243052015
-took 394.5 minutes in vitables
-(2340 rows matched)
--- a/pytables-test/server.py
+++ b/pytables-test/server.py
@@ -1,53 +0,0 @@
-import sys
-import tables
-import nilmdb
-
-try:
-    import cherrypy
-    cherrypy.tools.json_out
-except:
-    sys.stderr.write("Cherrypy 3.2+ required\n")
-    sys.exit(1)
-
-class NilmApp:
-    def __init__(self, db):
-        self.db = db
-
-class Root(NilmApp):
-    """NILM Database"""
-
-    server_version = "1.0"
-
-    @cherrypy.expose
-    def index(self):
-        raise cherrypy.NotFound()
-
-    @cherrypy.expose
-    def favicon_ico(self):
-        raise cherrypy.NotFound()
-
-    @cherrypy.expose
-    @cherrypy.tools.json_out()
-    def version(self):
-        return self.server_version
-
-class Stream(NilmApp):
-    """Stream operations"""
-
-    @cherrypy.expose
-    @cherrypy.tools.json_out()
-    def list(self):
-        return 
-    
-cherrypy.config.update({
-    'server.socket_host': '127.0.0.1',
-    'server.socket_port': 12380
-    })
-
-db = nilmdb.nilmdb()
-cherrypy.tree.mount(Root(db), "/")
-cherrypy.tree.mount(Stream(db), "/stream")
-
-if __name__ == "__main__":
-    cherrypy.engine.start()
-    cherrypy.engine.block()
--- a/pytables-test/speed-pytables.py
+++ b/pytables-test/speed-pytables.py
@@ -1,16 +0,0 @@
-import tables
-import numpy
-
-class RawSample(tables.IsDescription):
-    timestamp = tables.UInt64Col()
-    voltage   = tables.UInt16Col(shape = 3)
-    current   = tables.UInt16Col(shape = 3)
-
-h5file = tables.openFile("test.h5", mode = "w", title = "Test")
-group = h5file.createGroup("/", "raw", "Raw Data")
-table = h5file.createTable(group, "nilm1", RawSample, "NILM 1")
-
-print repr(h5file)
-
-# write rows
-
--- a/pytables-test/test-indexing.py
+++ b/pytables-test/test-indexing.py
@@ -1,54 +0,0 @@
-#!/usr/bin/python
-
-from tables import *
-import re
-import time
-
-# A class to describe our data
-class PrepData(IsDescription):
-    timestamp = Int64Col()
-    p1 = Float32Col()
-    q1 = Float32Col()
-    p3 = Float32Col()
-    q3 = Float32Col()
-    p5 = Float32Col()
-    q5 = Float32Col()
-    p7 = Float32Col()
-    q7 = Float32Col()
-
-filename = "test.h5"
-h5file = openFile(filename, mode = "w", title = "NILM Test")
-
-group = h5file.createGroup("/", "newton", "Newton school")
-table = h5file.createTable(group, "prep", PrepData, "Prep Data", expectedrows = 120 * 86400 * 90)
-
-table.cols.timestamp.createIndex()
-
-for i in range(0, 80):
-    # Open file
-    data = open("data/alldata")
-    count = 0
-    oldtime = time.time()
-    prep = table.row
-    for line in data:
-        count = count + 1
-        if count % 1000000 == 0:
-            print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
-            oldtime = time.time()
-        v = re.split('\s+', line)
-        prep['timestamp'] = int(v[0]) + 500000000 * i
-        prep['p1'] = v[1]
-        prep['q1'] = v[2]
-        prep['p3'] = v[3]
-        prep['q3'] = v[4]
-        prep['p5'] = v[5]
-        prep['q5'] = v[6]
-        prep['p7'] = v[7]
-        prep['q7'] = v[8]
-        prep.append()
-    data.close()
-
-h5file.close()
-
-
-
--- a/pytables-test/test-write.py
+++ b/pytables-test/test-write.py
@@ -1,54 +0,0 @@
-#!/usr/bin/python
-
-from tables import *
-import re
-import time
-
-# A class to describe our data
-class PrepData(IsDescription):
-    timestamp = Time64Col()
-    p1 = Float32Col()
-    q1 = Float32Col()
-    p3 = Float32Col()
-    q3 = Float32Col()
-    p5 = Float32Col()
-    q5 = Float32Col()
-    p7 = Float32Col()
-    q7 = Float32Col()
-
-filename = "test.h5"
-h5file = openFile(filename, mode = "w", title = "NILM Test")
-
-group = h5file.createGroup("/", "newton", "Newton school")
-table = h5file.createTable(group, "prep", PrepData, "Prep Data")
-
-table.cols.timestamp.createIndex()
-
-for i in range(0, 80):
-    # Open file
-    data = open("data/alldata")
-    count = 0
-    oldtime = time.time()
-    prep = table.row
-    for line in data:
-        count = count + 1
-        if count % 1000000 == 0:
-            print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
-            oldtime = time.time()
-        v = re.split('\s+', line)
-        prep['timestamp'] = float(v[0]) / 1000.0 + 500000 * i
-        prep['p1'] = v[1]
-        prep['q1'] = v[2]
-        prep['p3'] = v[3]
-        prep['q3'] = v[4]
-        prep['p5'] = v[5]
-        prep['q5'] = v[6]
-        prep['p7'] = v[7]
-        prep['q7'] = v[8]
-        prep.append()
-    data.close()
-
-h5file.close()
-
-
-
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,9 +1,41 @@
+[aliases]
+test = nosetests
+
 [nosetests]
+# Note: values must be set to 1, and have no comments on the same line,
+# for "python setup.py nosetests" to work correctly.
 nocapture=1
+# Comment this out to see CherryPy logs on failure:
+nologcapture=1
 with-coverage=1
 cover-inclusive=1
 cover-package=nilmdb
 cover-erase=1
+# this works, puts html output in cover/ dir:
+# cover-html=1
+# need nose 1.1.3 for this:
+# cover-branches=1
+#debug=nose
+#debug-log=nose.log
 stop=1
 verbosity=2
-
+tests=tests
+#tests=tests/test_threadsafety.py
+#tests=tests/test_bulkdata.py
+#tests=tests/test_mustclose.py
+#tests=tests/test_lrucache.py
+#tests=tests/test_cmdline.py
+#tests=tests/test_layout.py
+#tests=tests/test_rbtree.py
+#tests=tests/test_interval.py
+#tests=tests/test_rbtree.py,tests/test_interval.py
+#tests=tests/test_interval.py
+#tests=tests/test_client.py
+#tests=tests/test_timestamper.py
+#tests=tests/test_serializer.py
+#tests=tests/test_iteratorizer.py
+#tests=tests/test_client.py:TestClient.test_client_nilmdb
+#tests=tests/test_nilmdb.py
+#with-profile=1
+#profile-sort=time
+##profile-restrict=10  # doesn't work right, treated as string or something
--- a/setup.py
+++ b/setup.py
@@ -1,9 +1,138 @@
 #!/usr/bin/python

-from distutils.core import setup
+# To release a new version, tag it:
+#   git tag -a nilmdb-1.1 -m "Version 1.1"
+#   git push --tags
+# Then just package it up:
+#   python setup.py sdist

-setup(name = 'nilmdb',
-      version = '1.0',
-      scripts = [ 'bin/nilm-test.py' ],
-      packages = [ 'nilmdb' ],
+# This is supposed to be using Distribute:
+#
+#   distutils provides a "setup" method.
+#   setuptools is a set of monkeypatches on top of that.
+#   distribute is a particular version/implementation of setuptools.
+#
+# So we don't really know if this is using the old setuptools or the
+# Distribute-provided version of setuptools.
+
+import traceback
+import sys
+import os
+
+try:
+    from setuptools import setup, find_packages
+    from distutils.extension import Extension
+    import distutils.version
+except ImportError:
+    traceback.print_exc()
+    print "Please install the prerequisites listed in README.txt"
+    sys.exit(1)
+
+# Versioneer manages version numbers from git tags.
+# https://github.com/warner/python-versioneer
+import versioneer
+versioneer.versionfile_source = 'nilmdb/_version.py'
+versioneer.versionfile_build = 'nilmdb/_version.py'
+versioneer.tag_prefix = 'nilmdb-'
+versioneer.parentdir_prefix = 'nilmdb-'
+
+# Hack to workaround logging/multiprocessing issue:
+# https://groups.google.com/d/msg/nose-users/fnJ-kAUbYHQ/_UsLN786ygcJ
+try: import multiprocessing
+except Exception: pass
+
+# Use Cython if it's new enough, otherwise use preexisting C files.
+cython_modules = [ 'nilmdb.server.interval',
+                   'nilmdb.server.rbtree' ]
+try:
+    import Cython
+    from Cython.Build import cythonize
+    if (distutils.version.LooseVersion(Cython.__version__) <
+        distutils.version.LooseVersion("0.16")):
+        print "Cython version", Cython.__version__, "is too old; not using it."
+        raise ImportError()
+    use_cython = True
+except ImportError:
+    use_cython = False
+
+ext_modules = [ Extension('nilmdb.server.rocket', ['nilmdb/server/rocket.c' ]) ]
+for modulename in cython_modules:
+    filename = modulename.replace('.','/')
+    if use_cython:
+        ext_modules.extend(cythonize(filename + ".pyx"))
+    else:
+        cfile = filename + ".c"
+        if not os.path.exists(cfile):
+            raise Exception("Missing source file " + cfile + ".  "
+                            "Try installing cython >= 0.16.")
+        ext_modules.append(Extension(modulename, [ cfile ]))
+
+# We need a MANIFEST.in.  Generate it here rather than polluting the
+# repository with yet another setup-related file.
+with open("MANIFEST.in", "w") as m:
+    m.write("""
+# Root
+include README.txt
+include setup.cfg
+include setup.py
+include versioneer.py
+include Makefile
+include .coveragerc
+include .pylintrc
+
+# Cython files -- include source.
+recursive-include nilmdb/server *.pyx *.pyxdep *.pxd
+
+# Tests
+recursive-include tests *.py
+recursive-include tests/data *
+include tests/test.order
+
+# Docs
+recursive-include docs Makefile *.md
+
+# Extras
+recursive-include extras *
+""")
+
+# Run setup
+setup(name='nilmdb',
+      version = versioneer.get_version(),
+      cmdclass = versioneer.get_cmdclass(),
+      url = 'https://git.jim.sh/jim/lees/nilmdb.git',
+      author = 'Jim Paris',
+      description = "NILM Database",
+      long_description = "NILM Database",
+      license = "Proprietary",
+      author_email = 'jim@jtan.com',
+      tests_require = [ 'nose',
+                        'coverage',
+                        'numpy',
+                        ],
+      setup_requires = [ 'distribute',
+                         ],
+      install_requires = [ 'decorator',
+                           'cherrypy >= 3.2',
+                           'simplejson',
+                           'python-dateutil',
+                           'pytz',
+                           'psutil >= 0.3.0',
+                           'requests >= 1.1.0, < 2.0.0',
+                           ],
+      packages = [ 'nilmdb',
+                   'nilmdb.utils',
+                   'nilmdb.utils.datetime_tz',
+                   'nilmdb.server',
+                   'nilmdb.client',
+                   'nilmdb.cmdline',
+                   'nilmdb.scripts',
+                   ],
+      entry_points = {
+          'console_scripts': [
+              'nilmtool = nilmdb.scripts.nilmtool:main',
+              'nilmdb-server = nilmdb.scripts.nilmdb_server:main',
+              ],
+          },
+      ext_modules = ext_modules,
+      zip_safe = False,
      )
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,5 +0,0 @@
-all:
-	python speed-readascii.py
-
-clean:
-	rm -f *pyc
--- a/test/printf.py
+++ b/test/printf.py
@@ -1,4 +0,0 @@
-from __future__ import print_function
-def printf(str, *args):
-    print(str % args, end='')
-    
--- a/test/speed-readascii.py
+++ b/test/speed-readascii.py
@@ -1,67 +0,0 @@
-#!/usr/bin/python
-
-from printf import printf
-import time
-import re
-import numpy as np
-import itertools
-
-class Timer():
-    def __init__(self, arg):
-        self.arg = arg
-    def __enter__(self): self.start = time.time()
-    def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))
-
-def test_split():
-    for n, line in enumerate(open('1m.raw', 'r')):
-        out = [0]*6
-        tmp = [ int(i) for i in line.partition('#')[0].split() ]
-        out[0:len(tmp)] = tmp
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_split2():
-    for n, line in enumerate(open('1m.raw', 'r')):
-        out = [0]*6
-        tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
-        out[0:len(tmp)] = tmp
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_regex():
-    for n, line in enumerate(open('1m.raw', 'r')):
-        out = [0]*6
-        tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
-        out[0:len(tmp)] = tmp
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_bigregex():
-    regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
-    for n, line in enumerate(open('1m.raw', 'r')):
-        out = [ int(x or 0) for x in re.match(regex, line).groups() ]
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_numpy():
-    out = np.genfromtxt(open('1m.raw', 'r'),
-                        dtype = np.dtype('i2,i2,i2,i2,i2,i2'))
-    
-with Timer("numpy"):
-    test_numpy() # 106k/sec
-
-with Timer("regex"):
-    test_regex() # 121k/sec
-
-with Timer("split"):
-    test_split() # 219k/sec
-
-with Timer("split2"):
-    test_split2() # 328k/sec
-
-with Timer("bigregex"):
-    test_bigregex() # 130k/sec
-
-# The "int" operation takes quite a while -- int(x,10) is twice as fast
-# Perl does about 500k/sec
-
--- a/test/speed-readbinary.py
+++ b/test/speed-readbinary.py
@@ -1,74 +0,0 @@
-#!/usr/bin/python
-
-from printf import printf
-import time
-import re
-import numpy as np
-import itertools
-import struct
-import array
-import os
-import mmap
-
-class Timer():
-    def __init__(self, arg):
-        self.arg = arg
-    def __enter__(self): self.start = time.time()
-    def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))
-
-def test_struct1():
-    """read with struct.unpack"""
-    f = open('1m.bin', 'rb')
-    f.seek(0,os.SEEK_END)
-    filesize = f.tell()
-    f.seek(0,os.SEEK_SET)
-    packer = struct.Struct('!dHHHHHH')
-    items = filesize / packer.size
-    for n in xrange(items):
-        s = f.read(packer.size)
-        out = packer.unpack(s)
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_struct2():
-    """read with struct.unpack, convert to string"""
-    f = open('1m.bin', 'rb')
-    f.seek(0,os.SEEK_END)
-    filesize = f.tell()
-    f.seek(0,os.SEEK_SET)
-    packer = struct.Struct('!dHHHHHH')
-    items = filesize / packer.size
-    for n in xrange(items):
-        s = f.read(packer.size)
-        out = packer.unpack(s)
-        x = str(out)
-        if (n % 100000 == 0):
-            printf("line %d = %s\n", n, str(out))
-
-def test_mmap():
-    """struct.unpack with mmap"""
-    with open('1m.bin', 'rb') as f:
-        f.seek(0,os.SEEK_END)
-        filesize = f.tell()
-        f.seek(0,os.SEEK_SET)
-        m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ)
-        packer = struct.Struct('!dHHHHHH')
-        items = filesize / packer.size
-        for n in xrange(items):
-            out = packer.unpack(m[packer.size*n : packer.size*(n+1)])
-            if (n % 100000 == 0):
-                printf("line %d = %s\n", n, str(out))
-
-with Timer("mmap"):
-    test_mmap()  # 1600k
-
-with Timer("struct1"):
-    test_struct1()  # 1460k
-
-with Timer("struct2"):
-    test_struct2()  # 210k
-
-# Reading from the file is again much quicker than converting to string
-# Use mmap, it's good
-
-
--- a/test/speed-writebinary.py
+++ b/test/speed-writebinary.py
@@ -1,76 +0,0 @@
-#!/usr/bin/python
-
-from printf import printf
-import time
-import re
-import numpy as np
-import itertools
-import struct
-import array
-
-class Timer():
-    def __init__(self, arg):
-        self.arg = arg
-    def __enter__(self): self.start = time.time()
-    def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))
-
-def read_ascii():
-    for n in xrange(1000000):
-        yield (1234, 2345, 3456, 4576, 5678, 6789)
-#    for n, line in enumerate(open('1m.raw', 'r')):
-#        out = [0]*6
-#        tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
-#        out[0:len(tmp)] = tmp
-#        if (n % 100000 == 0):
-#            printf("line %d = %s\n", n, str(out))
-#        yield out
-
-def test_struct1():
-    """write with struct.pack"""
-    f = open('1m.bin', 'wb')
-    for out in read_ascii():
-        s = struct.pack('!HHHHHH', *out)
-        f.write(s)
-
-def test_struct2():
-    """use constant format string"""
-    f = open('1m.bin', 'wb')
-    packer = struct.Struct('!HHHHHH')
-    for out in read_ascii():
-        f.write(packer.pack(*out))
-    f.close()
-    printf("size was %d\n", packer.size)
-
-def test_struct3():
-    """like struct1, with timestamp"""
-    f = open('1m.bin', 'wb')
-    for out in read_ascii():
-        s = struct.pack('!dHHHHHH', time.time(), *out)
-        f.write(s)
-
-def test_struct4():
-    """like struct2, with timestamp"""
-    f = open('1m.bin', 'wb')
-    packer = struct.Struct('!dHHHHHH')
-    for out in read_ascii():
-        f.write(packer.pack(time.time(), *out))
-    f.close()
-    printf("size was %d\n", packer.size)
-
-#raise Exception('done')
-
-with Timer("struct1"):
-    test_struct1() # 1089k
-
-with Timer("struct2"):
-    test_struct2() # 1249k
-
-with Timer("struct3"):
-    test_struct3() # 845k
-
-with Timer("struct4"):
-    test_struct4() # 922k
-
-# This seems fast enough for writing new data, since it's faster than
-# we read ascii data anyway.  Use e.g. struct4
-
--- a/test/test-struct-pack.py
+++ b/test/test-struct-pack.py
@@ -1,11 +0,0 @@
-#!/usr/bin/python
-
-import struct
-import mmap
-
-f = open("test.dat", "rb+")
-mm = mmap.mmap(f.fileno(),3)
-
-print len(mm)
-print "first 3 bytes: " + mm[0:3];
-
--- a/test/test.dat
+++ b/test/test.dat
@@ -1 +0,0 @@
-
--- a/test/todo.md
+++ b/test/todo.md
@@ -1,7 +0,0 @@
- Have a class representing the file contents
- Looks like an array
-  - len(), get(), index
-  - some form of bisect search
-  - get_extents = return [0].timestamp, [-1].timestamp
-  - 
- Can append?  Sure, why not.  Just write to the file, extend mmap accordingly.
--- a/tests/data/extract-1
+++ b/tests/data/extract-1
@@ -0,0 +1,124 @@
+# path: /newton/prep
+# layout: float32_8
+# start: Fri, 23 Mar 2012 10:00:30.000000 +0000
+# end: Fri, 23 Mar 2012 10:00:31.000000 +0000
+1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
+1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
+1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
+1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
+1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
+1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
+1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
+1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
+1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
+1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
+1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
+1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
+1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
+1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
+1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
+1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
+1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
+1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
+1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
+1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
+1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
+1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
+1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
+1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
+1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
+1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
+1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
+1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
+1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
+1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
+1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
+1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
+1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
+1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
+1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
+1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
+1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
+1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
+1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
+1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
+1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
+1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
+1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
+1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
+1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
+1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
+1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
+1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
+1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
+1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
+1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
+1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
+1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
+1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
+1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
+1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
+1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
+1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
+1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
+1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
+1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
+1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
+1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
+1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
+1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
+1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
+1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
+1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
+1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
+1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
+1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
+1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
+1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
+1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
+1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
+1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
+1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
+1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
+1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
+1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
+1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
+1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
+1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
+1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
+1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
+1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
+1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
+1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
+1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
+1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
+1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
+1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
+1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
+1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
+1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
+1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
+1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
+1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
+1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
+1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
+1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
+1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
+1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
+1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
+1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
+1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
+1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
+1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
+1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
+1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
+1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
+1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
+1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
+1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
+1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
+1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
+1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
+1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
+1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
--- a/tests/data/extract-2
+++ b/tests/data/extract-2
@@ -0,0 +1,119 @@
+1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
+1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
+1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
+1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
+1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
+1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
+1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
+1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
+1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
+1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
+1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
+1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
+1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
+1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
+1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
+1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
+1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
+1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
+1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
+1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
+1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
+1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
+1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
+1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
+1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
+1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
+1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
+1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
+1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
+1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
+1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
+1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
+1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
+1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
+1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
+1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
+1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
+1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
+1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
+1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
+1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
+1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
+1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
+1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
+1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
+1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
+1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
+1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
+1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
+1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
+1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
+1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
+1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
+1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
+1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
+1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
+1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
+1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
+1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
+1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
+1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
+1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
+1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
+1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
+1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
+1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
+1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
+1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
+1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
+1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
+1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
+1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
+1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
+1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
+1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
+1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
+1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
+1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
+1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
+1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
+1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
+1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
+1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
+1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
+1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
+1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
+1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
+1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
+1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
+1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
+1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
+1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
+1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
+1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
+1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
+1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
+1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
+1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
+1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
+1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
+1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
+1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
+1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
+1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
+1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
+1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
+1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
+1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
+1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
+1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
+1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
+1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
+1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
+1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
+1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
+1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
+1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
+1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
--- a/tests/data/extract-3
+++ b/tests/data/extract-3
@@ -0,0 +1 @@
+1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
--- a/tests/data/extract-4
+++ b/tests/data/extract-4
@@ -0,0 +1,2 @@
+1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
--- a/tests/data/extract-5
+++ b/tests/data/extract-5
@@ -0,0 +1,124 @@
+# path: /newton/prep
+# layout: float32_8
+# start: Fri, 23 Mar 2012 10:00:30.000000 +0000
+# end: Fri, 23 Mar 2012 10:00:31.000000 +0000
+2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
+2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
+2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
+2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
+2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
+2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
+2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
+2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
+2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
+2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
+2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
+2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
+2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
+2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
+2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
+2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
+2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
+2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
+2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
+2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
+2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
+2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
+2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
+2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
+2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
+2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
+2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
+2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
+2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
+2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
+2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
+2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
+2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
+2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
+2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
+2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
+2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
+2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
+2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
+2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
+2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
+2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
+2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
+2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
+2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
+2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
+2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
+2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
+2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
+2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
+2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
+2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
+2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
+2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
+2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
+2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
+2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
+2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
+2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
+2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
+2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
+2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
+2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
+2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
+2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
+2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
+2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
+2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
+2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
+2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
+2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
+2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
+2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
+2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
+2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
+2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
+2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
+2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
+2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
+2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
+2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
+2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
+2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
+2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
+2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
+2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
+2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
+2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
+2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
+2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
+2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
+2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
+2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
+2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
+2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
+2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
+2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
+2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
+2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
+2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
+2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
+2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
+2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
+2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
+2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
+2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
+2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
+2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
+2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
+2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
+2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
+2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
+2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
+2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
+2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
+2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
+2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
+2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
+2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
--- a/tests/data/extract-6
+++ b/tests/data/extract-6
@@ -0,0 +1,120 @@
+2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
+2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
+2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
+2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
+2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
+2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
+2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
+2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
+2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
+2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
+2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
+2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
+2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
+2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
+2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
+2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
+2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
+2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
+2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
+2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
+2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
+2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
+2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
+2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
+2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
+2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
+2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
+2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
+2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
+2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
+2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
+2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
+2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
+2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
+2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
+2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
+2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
+2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
+2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
+2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
+2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
+2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
+2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
+2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
+2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
+2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
+2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
+2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
+2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
+2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
+2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
+2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
+2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
+2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
+2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
+2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
+2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
+2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
+2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
+2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
+2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
+2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
+2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
+2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
+2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
+2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
+2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
+2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
+2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
+2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
+2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
+2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
+2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
+2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
+2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
+2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
+2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
+2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
+2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
+2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
+2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
+2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
+2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
+2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
+2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
+2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
+2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
+2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
+2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
+2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
+2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
+2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
+2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
+2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
+2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
+2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
+2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
+2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
+2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
+2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
+2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
+2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
+2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
+2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
+2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
+2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
+2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
+2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
+2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
+2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
+2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
+2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
+2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
+2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
+2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
+2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
+2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
+2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
+2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
--- a/tests/data/extract-7
+++ b/tests/data/extract-7
@@ -0,0 +1,124 @@
+# path: /newton/prep
+# layout: float32_8
+# start: 1332496830000000
+# end: 1332496830999000
+1332496830000000 2.517740e+05 2.242410e+05 5.688100e+03 1.915530e+03 9.329220e+03 4.183710e+03 1.212350e+03 2.641790e+03
+1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03
+1332496830016667 2.630730e+05 2.233040e+05 4.961640e+03 2.197120e+03 7.687310e+03 4.861860e+03 2.732780e+03 3.008540e+03
+1332496830025000 2.576140e+05 2.233230e+05 5.003660e+03 3.525140e+03 7.165310e+03 4.685620e+03 1.715380e+03 3.440480e+03
+1332496830033333 2.557800e+05 2.219150e+05 6.357310e+03 2.145290e+03 8.426970e+03 3.775350e+03 1.475390e+03 3.797240e+03
+1332496830041667 2.601660e+05 2.230080e+05 6.702590e+03 1.484960e+03 9.288100e+03 3.330830e+03 1.228500e+03 3.214320e+03
+1332496830050000 2.612310e+05 2.264260e+05 4.980060e+03 2.982380e+03 8.499630e+03 4.267670e+03 9.940890e+02 2.292890e+03
+1332496830058333 2.551170e+05 2.266420e+05 4.584410e+03 4.656440e+03 7.860150e+03 5.317310e+03 1.473600e+03 2.111690e+03
+1332496830066667 2.533000e+05 2.235540e+05 6.455090e+03 3.036650e+03 8.869750e+03 4.986310e+03 2.607360e+03 2.839590e+03
+1332496830075000 2.610610e+05 2.212630e+05 6.951980e+03 1.500240e+03 9.386100e+03 3.791680e+03 2.677010e+03 3.980630e+03
+1332496830083333 2.665030e+05 2.231980e+05 5.189610e+03 2.594560e+03 8.571530e+03 3.175000e+03 9.198400e+02 3.792010e+03
+1332496830091667 2.606920e+05 2.251840e+05 3.782480e+03 4.642880e+03 7.662960e+03 3.917790e+03 -2.510970e+02 2.907060e+03
+1332496830100000 2.539630e+05 2.250810e+05 5.123530e+03 3.839550e+03 8.669030e+03 4.877820e+03 9.437240e+02 2.527450e+03
+1332496830108333 2.565550e+05 2.241690e+05 5.930600e+03 2.298540e+03 8.906710e+03 5.331680e+03 2.549910e+03 3.053560e+03
+1332496830116667 2.608890e+05 2.250100e+05 4.681130e+03 2.971870e+03 7.900040e+03 4.874080e+03 2.322430e+03 3.649120e+03
+1332496830125000 2.579440e+05 2.249230e+05 3.291140e+03 4.357090e+03 7.131590e+03 4.385560e+03 1.077050e+03 3.664040e+03
+1332496830133333 2.550090e+05 2.230180e+05 4.584820e+03 2.864000e+03 8.469490e+03 3.625580e+03 9.855570e+02 3.504230e+03
+1332496830141667 2.601140e+05 2.219470e+05 5.676190e+03 1.210340e+03 9.393780e+03 3.390240e+03 1.654020e+03 3.018700e+03
+1332496830150000 2.642770e+05 2.244380e+05 4.446620e+03 2.176720e+03 8.142090e+03 4.584880e+03 2.327830e+03 2.615800e+03
+1332496830158333 2.592210e+05 2.264710e+05 2.734440e+03 4.182760e+03 6.389550e+03 5.540520e+03 1.958880e+03 2.720120e+03
+1332496830166667 2.526500e+05 2.248310e+05 4.163640e+03 2.989990e+03 7.179200e+03 5.213060e+03 1.929550e+03 3.457660e+03
+1332496830175000 2.570830e+05 2.220480e+05 5.759040e+03 7.024410e+02 8.566550e+03 3.552020e+03 1.832940e+03 3.956190e+03
+1332496830183333 2.631300e+05 2.229670e+05 5.141140e+03 1.166120e+03 8.666960e+03 2.720370e+03 9.713740e+02 3.479730e+03
+1332496830191667 2.602360e+05 2.252650e+05 3.425140e+03 3.339080e+03 7.853610e+03 3.674950e+03 5.259080e+02 2.443310e+03
+1332496830200000 2.535030e+05 2.245270e+05 4.398130e+03 2.927430e+03 8.110280e+03 4.842470e+03 1.513870e+03 2.467100e+03
+1332496830208333 2.561260e+05 2.226930e+05 6.043530e+03 6.562240e+02 8.797560e+03 4.832410e+03 2.832370e+03 3.426140e+03
+1332496830216667 2.616770e+05 2.236080e+05 5.830460e+03 1.033910e+03 8.123940e+03 3.980690e+03 1.927960e+03 4.092720e+03
+1332496830225000 2.594570e+05 2.255360e+05 4.015570e+03 2.995990e+03 7.135440e+03 3.713550e+03 3.072200e+02 3.849430e+03
+1332496830233333 2.533520e+05 2.242160e+05 4.650560e+03 3.196620e+03 8.131280e+03 3.586160e+03 7.083230e+01 3.074180e+03
+1332496830241667 2.561240e+05 2.215130e+05 6.100480e+03 8.219800e+02 9.757540e+03 3.474510e+03 1.647520e+03 2.559860e+03
+1332496830250000 2.630240e+05 2.215590e+05 5.789960e+03 6.994170e+02 9.129740e+03 4.153080e+03 2.829250e+03 2.677270e+03
+1332496830258333 2.617200e+05 2.240150e+05 4.358500e+03 2.645360e+03 7.414110e+03 4.810670e+03 2.225990e+03 3.185990e+03
+1332496830266667 2.547560e+05 2.242400e+05 4.857380e+03 3.229680e+03 7.539310e+03 4.769140e+03 1.507130e+03 3.668260e+03
+1332496830275000 2.568890e+05 2.226580e+05 6.473420e+03 1.214110e+03 9.010760e+03 3.848730e+03 1.303840e+03 3.778500e+03
+1332496830283333 2.642080e+05 2.233160e+05 5.700450e+03 1.116560e+03 9.087610e+03 3.846680e+03 1.293590e+03 2.891560e+03
+1332496830291667 2.633100e+05 2.257190e+05 3.936120e+03 3.252360e+03 7.552850e+03 4.897860e+03 1.156630e+03 2.037160e+03
+1332496830300000 2.550790e+05 2.250860e+05 4.536450e+03 3.960110e+03 7.454590e+03 5.479070e+03 1.596360e+03 2.190800e+03
+1332496830308333 2.544870e+05 2.225080e+05 6.635860e+03 1.758850e+03 8.732970e+03 4.466970e+03 2.650360e+03 3.139310e+03
+1332496830316667 2.612410e+05 2.224320e+05 6.702270e+03 1.085130e+03 8.989230e+03 3.112990e+03 1.933560e+03 3.828410e+03
+1332496830325000 2.621190e+05 2.255870e+05 4.714950e+03 2.892360e+03 8.107820e+03 2.961310e+03 2.399780e+02 3.273720e+03
+1332496830333333 2.549990e+05 2.265140e+05 4.532090e+03 4.126900e+03 8.200130e+03 3.872590e+03 5.608900e+01 2.370580e+03
+1332496830341667 2.542890e+05 2.240330e+05 6.538810e+03 2.251440e+03 9.419430e+03 4.564450e+03 2.077810e+03 2.508170e+03
+1332496830350000 2.618900e+05 2.219600e+05 6.846090e+03 1.475270e+03 9.125590e+03 4.598290e+03 3.299220e+03 3.475420e+03
+1332496830358333 2.645020e+05 2.230850e+05 5.066380e+03 3.270560e+03 7.933170e+03 4.173710e+03 1.908910e+03 3.867460e+03
+1332496830366667 2.578890e+05 2.236560e+05 4.201660e+03 4.473640e+03 7.688340e+03 4.161580e+03 6.875790e+02 3.653690e+03
+1332496830375000 2.542700e+05 2.231510e+05 5.715140e+03 2.752140e+03 9.273320e+03 3.772950e+03 8.964040e+02 3.256060e+03
+1332496830383333 2.582570e+05 2.242170e+05 6.114310e+03 1.856860e+03 9.604320e+03 4.200490e+03 1.764380e+03 2.939220e+03
+1332496830391667 2.600200e+05 2.268680e+05 4.237530e+03 3.605880e+03 8.066220e+03 5.430250e+03 2.138580e+03 2.696710e+03
+1332496830400000 2.550830e+05 2.259240e+05 3.350310e+03 4.853070e+03 7.045820e+03 5.925200e+03 1.893610e+03 2.897340e+03
+1332496830408333 2.544530e+05 2.221270e+05 5.271330e+03 2.491500e+03 8.436680e+03 5.032080e+03 2.436050e+03 3.724590e+03
+1332496830416667 2.625880e+05 2.199500e+05 5.994620e+03 7.892740e+02 9.029650e+03 3.515740e+03 1.953570e+03 4.014520e+03
+1332496830425000 2.656100e+05 2.233330e+05 4.391410e+03 2.400960e+03 8.146460e+03 3.536960e+03 5.302320e+02 3.133920e+03
+1332496830433333 2.574700e+05 2.269770e+05 2.975320e+03 4.633530e+03 7.278560e+03 4.640100e+03 -5.015020e+01 2.024960e+03
+1332496830441667 2.506870e+05 2.263310e+05 4.517860e+03 3.183800e+03 8.072600e+03 5.281660e+03 1.605140e+03 2.335140e+03
+1332496830450000 2.555630e+05 2.244950e+05 5.551000e+03 1.101300e+03 8.461490e+03 4.725700e+03 2.726670e+03 3.480540e+03
+1332496830458333 2.613350e+05 2.246450e+05 4.764680e+03 1.557020e+03 7.833350e+03 3.524810e+03 1.577410e+03 4.038620e+03
+1332496830466667 2.602690e+05 2.240080e+05 3.558030e+03 2.987610e+03 7.362440e+03 3.279230e+03 5.624420e+02 3.786550e+03
+1332496830475000 2.574350e+05 2.217770e+05 4.972600e+03 2.166880e+03 8.481440e+03 3.328720e+03 1.037130e+03 3.271370e+03
+1332496830483333 2.610460e+05 2.215500e+05 5.816180e+03 5.902170e+02 9.120930e+03 3.895400e+03 2.382670e+03 2.824170e+03
+1332496830491667 2.627660e+05 2.244730e+05 4.835050e+03 1.785770e+03 7.880760e+03 4.745620e+03 2.443660e+03 3.229550e+03
+1332496830500000 2.565090e+05 2.264130e+05 3.758870e+03 3.461200e+03 6.743770e+03 4.928960e+03 1.536620e+03 3.546690e+03
+1332496830508333 2.507930e+05 2.243720e+05 5.218490e+03 2.865260e+03 7.803960e+03 4.351090e+03 1.333820e+03 3.680490e+03
+1332496830516667 2.563190e+05 2.220660e+05 6.403970e+03 7.323450e+02 9.627760e+03 3.089300e+03 1.516780e+03 3.653690e+03
+1332496830525000 2.633430e+05 2.232350e+05 5.200430e+03 1.388580e+03 9.372850e+03 3.371230e+03 1.450390e+03 2.678910e+03
+1332496830533333 2.609030e+05 2.251100e+05 3.722580e+03 3.246660e+03 7.876540e+03 4.716810e+03 1.498440e+03 2.116520e+03
+1332496830541667 2.544160e+05 2.237690e+05 4.841650e+03 2.956400e+03 8.115920e+03 5.392360e+03 2.142810e+03 2.652320e+03
+1332496830550000 2.566980e+05 2.221720e+05 6.471230e+03 9.703960e+02 8.834980e+03 4.816840e+03 2.376630e+03 3.605860e+03
+1332496830558333 2.618410e+05 2.235370e+05 5.500740e+03 1.189660e+03 8.365730e+03 4.016470e+03 1.042270e+03 3.821200e+03
+1332496830566667 2.595030e+05 2.258400e+05 3.827930e+03 3.088840e+03 7.676140e+03 3.978310e+03 -3.570070e+02 3.016420e+03
+1332496830575000 2.534570e+05 2.246360e+05 4.914610e+03 3.097450e+03 8.224900e+03 4.321440e+03 1.713740e+02 2.412360e+03
+1332496830583333 2.560290e+05 2.222210e+05 6.841800e+03 1.028500e+03 9.252300e+03 4.387570e+03 2.418140e+03 2.510100e+03
+1332496830591667 2.628400e+05 2.225500e+05 6.210250e+03 1.410730e+03 8.538900e+03 4.152580e+03 3.009300e+03 3.219760e+03
+1332496830600000 2.616330e+05 2.250650e+05 4.284530e+03 3.357210e+03 7.282170e+03 3.823590e+03 1.402840e+03 3.644670e+03
+1332496830608333 2.545910e+05 2.251090e+05 4.693160e+03 3.647740e+03 7.745160e+03 3.686380e+03 4.901610e+02 3.448860e+03
+1332496830616667 2.547800e+05 2.235990e+05 6.527380e+03 1.569870e+03 9.438430e+03 3.456580e+03 1.162520e+03 3.252010e+03
+1332496830625000 2.606390e+05 2.241070e+05 6.531050e+03 1.633050e+03 9.283720e+03 4.174020e+03 2.089550e+03 2.775750e+03
+1332496830633333 2.611080e+05 2.254720e+05 4.968260e+03 3.527850e+03 7.692870e+03 5.137100e+03 2.207390e+03 2.436660e+03
+1332496830641667 2.557750e+05 2.237080e+05 4.963450e+03 4.017370e+03 7.701420e+03 5.269650e+03 2.284400e+03 2.842080e+03
+1332496830650000 2.573980e+05 2.209470e+05 6.767500e+03 1.645710e+03 9.107070e+03 4.000180e+03 2.548860e+03 3.624770e+03
+1332496830658333 2.649240e+05 2.215590e+05 6.471460e+03 1.110330e+03 9.459650e+03 3.108170e+03 1.696970e+03 3.893440e+03
+1332496830666667 2.653390e+05 2.257330e+05 4.348800e+03 3.459510e+03 8.475300e+03 4.031240e+03 5.733470e+02 2.910270e+03
+1332496830675000 2.568140e+05 2.269950e+05 3.479540e+03 4.949790e+03 7.499910e+03 5.624710e+03 7.516560e+02 2.347710e+03
+1332496830683333 2.533160e+05 2.251610e+05 5.147060e+03 3.218430e+03 8.460160e+03 5.869300e+03 2.336320e+03 2.987960e+03
+1332496830691667 2.593600e+05 2.231010e+05 5.549120e+03 1.869950e+03 8.740760e+03 4.668940e+03 2.457910e+03 3.758820e+03
+1332496830700000 2.620120e+05 2.240160e+05 4.173610e+03 3.004130e+03 8.157040e+03 3.704730e+03 9.879640e+02 3.652750e+03
+1332496830708333 2.571760e+05 2.244200e+05 3.517300e+03 4.118750e+03 7.822240e+03 3.718230e+03 3.726490e+01 2.953680e+03
+1332496830716667 2.551460e+05 2.233220e+05 4.923980e+03 2.330680e+03 9.095910e+03 3.792400e+03 1.013070e+03 2.711240e+03
+1332496830725000 2.605240e+05 2.236510e+05 5.413630e+03 1.146210e+03 8.817170e+03 4.419650e+03 2.446650e+03 2.832050e+03
+1332496830733333 2.620980e+05 2.257520e+05 4.262980e+03 2.270970e+03 7.135480e+03 5.067120e+03 2.294680e+03 3.376620e+03
+1332496830741667 2.568890e+05 2.253790e+05 3.606460e+03 3.568190e+03 6.552650e+03 4.970270e+03 1.516380e+03 3.662570e+03
+1332496830750000 2.539480e+05 2.226310e+05 5.511700e+03 2.066300e+03 7.952660e+03 4.019910e+03 1.513140e+03 3.752630e+03
+1332496830758333 2.597990e+05 2.220670e+05 5.873500e+03 6.085840e+02 9.253780e+03 2.870740e+03 1.348240e+03 3.344200e+03
+1332496830766667 2.625470e+05 2.249010e+05 4.346080e+03 1.928100e+03 8.590970e+03 3.455460e+03 9.043910e+02 2.379270e+03
+1332496830775000 2.561370e+05 2.267610e+05 3.423560e+03 3.379080e+03 7.471150e+03 4.894170e+03 1.153540e+03 2.031410e+03
+1332496830783333 2.503260e+05 2.250130e+05 5.519980e+03 2.423970e+03 7.991760e+03 5.117950e+03 2.098790e+03 3.099240e+03
+1332496830791667 2.554540e+05 2.229920e+05 6.547950e+03 4.964960e+02 8.751340e+03 3.900560e+03 2.132290e+03 4.076810e+03
+1332496830800000 2.612860e+05 2.234890e+05 5.152850e+03 1.501510e+03 8.425610e+03 2.888030e+03 7.761140e+02 3.786360e+03
+1332496830808333 2.589690e+05 2.240690e+05 3.832610e+03 3.001980e+03 7.979260e+03 3.182310e+03 5.271600e+01 2.874800e+03
+1332496830816667 2.549460e+05 2.220350e+05 5.317880e+03 2.139800e+03 9.103140e+03 3.955610e+03 1.235170e+03 2.394150e+03
+1332496830825000 2.586760e+05 2.212050e+05 6.594910e+03 5.053440e+02 9.423360e+03 4.562470e+03 2.913740e+03 2.892350e+03
+1332496830833333 2.621250e+05 2.235660e+05 5.116750e+03 1.773600e+03 8.082200e+03 4.776370e+03 2.386390e+03 3.659730e+03
+1332496830841667 2.578350e+05 2.259180e+05 3.714300e+03 3.477080e+03 7.205370e+03 4.554610e+03 7.115390e+02 3.878420e+03
+1332496830850000 2.536600e+05 2.243710e+05 5.022450e+03 2.592430e+03 8.277200e+03 4.119370e+03 4.865080e+02 3.666740e+03
+1332496830858333 2.595030e+05 2.220610e+05 6.589950e+03 6.599360e+02 9.596920e+03 3.598100e+03 1.702490e+03 3.036600e+03
+1332496830866667 2.654950e+05 2.228430e+05 5.541850e+03 1.728430e+03 8.459960e+03 4.492000e+03 2.231970e+03 2.430620e+03
+1332496830875000 2.609290e+05 2.249960e+05 4.000950e+03 3.745990e+03 6.983790e+03 5.430860e+03 1.855260e+03 2.533380e+03
+1332496830883333 2.527160e+05 2.243350e+05 5.086560e+03 3.401150e+03 7.597970e+03 5.196120e+03 1.755720e+03 3.079760e+03
+1332496830891667 2.541100e+05 2.231110e+05 6.822190e+03 1.229080e+03 9.164340e+03 3.761230e+03 1.679390e+03 3.584880e+03
+1332496830900000 2.599690e+05 2.246930e+05 6.183950e+03 1.538500e+03 9.222080e+03 3.139170e+03 9.499020e+02 3.180800e+03
+1332496830908333 2.590780e+05 2.269130e+05 4.388890e+03 3.694820e+03 8.195020e+03 3.933000e+03 4.260800e+02 2.388450e+03
+1332496830916667 2.545630e+05 2.247600e+05 5.168440e+03 4.020940e+03 8.450270e+03 4.758910e+03 1.458900e+03 2.286430e+03
+1332496830925000 2.580590e+05 2.212170e+05 6.883460e+03 1.649530e+03 9.232780e+03 4.457650e+03 3.057820e+03 3.031950e+03
+1332496830933333 2.646670e+05 2.211770e+05 6.218510e+03 1.645730e+03 8.657180e+03 3.663500e+03 2.528280e+03 3.978340e+03
+1332496830941667 2.629250e+05 2.243820e+05 4.627500e+03 3.635930e+03 7.892800e+03 3.431320e+03 6.045090e+02 3.901370e+03
+1332496830950000 2.547080e+05 2.254480e+05 4.408250e+03 4.461040e+03 8.197170e+03 3.953750e+03 -4.453460e+01 3.154870e+03
+1332496830958333 2.537020e+05 2.246350e+05 5.825770e+03 2.577050e+03 9.590050e+03 4.569250e+03 1.460270e+03 2.785170e+03
+1332496830966667 2.602060e+05 2.241400e+05 5.387980e+03 1.951160e+03 8.789510e+03 5.131660e+03 2.706380e+03 2.972480e+03
+1332496830975000 2.612400e+05 2.247370e+05 3.860810e+03 3.418310e+03 7.414530e+03 5.284520e+03 2.271380e+03 3.183150e+03
+1332496830983333 2.561400e+05 2.232520e+05 3.850010e+03 3.957140e+03 7.262650e+03 4.964640e+03 1.499510e+03 3.453130e+03
+1332496830991667 2.561160e+05 2.213490e+05 5.594480e+03 2.054400e+03 8.835130e+03 3.662010e+03 1.485510e+03 3.613010e+03
--- a/tests/data/extract-8
+++ b/tests/data/extract-8
@@ -0,0 +1,28 @@
+# interval-start 1332496919900000
+1332496919900000 2.523050e+05 2.254020e+05 4.779410e+03 3.638030e+03 8.138070e+03 4.334460e+03 1.083780e+03 3.743730e+03
+1332496919908333 2.551190e+05 2.237870e+05 5.965640e+03 2.076350e+03 9.468790e+03 3.693880e+03 1.247860e+03 3.393680e+03
+1332496919916667 2.616370e+05 2.247980e+05 4.848970e+03 2.315620e+03 9.323300e+03 4.225460e+03 1.805780e+03 2.593050e+03
+1332496919925000 2.606460e+05 2.251300e+05 3.061360e+03 3.951840e+03 7.662910e+03 5.341410e+03 1.986520e+03 2.276780e+03
+1332496919933333 2.559710e+05 2.235030e+05 4.096030e+03 3.296970e+03 7.827080e+03 5.452120e+03 2.492520e+03 2.929450e+03
+1332496919941667 2.579260e+05 2.217080e+05 5.472320e+03 1.555700e+03 8.495760e+03 4.491140e+03 2.379780e+03 3.741710e+03
+1332496919950000 2.610180e+05 2.242350e+05 4.669770e+03 1.876190e+03 8.366680e+03 3.677510e+03 9.021690e+02 3.549040e+03
+1332496919958333 2.569150e+05 2.274650e+05 2.785070e+03 3.751930e+03 7.440320e+03 3.964860e+03 -3.227860e+02 2.460890e+03
+1332496919966667 2.509510e+05 2.262000e+05 3.772710e+03 3.131950e+03 8.159860e+03 4.539860e+03 7.375190e+02 2.126750e+03
+1332496919975000 2.556710e+05 2.223720e+05 5.826200e+03 8.715560e+02 9.120240e+03 4.545110e+03 2.804310e+03 2.721000e+03
+1332496919983333 2.649730e+05 2.214860e+05 5.839130e+03 4.659180e+02 8.628300e+03 3.934870e+03 2.972490e+03 3.773730e+03
+1332496919991667 2.652170e+05 2.233920e+05 3.718770e+03 2.834970e+03 7.209900e+03 3.460260e+03 1.324930e+03 4.075960e+03
+# interval-end 1332496919991668
+# interval-start 1332496920000000
+1332496920000000 2.564370e+05 2.244300e+05 4.011610e+03 3.475340e+03 7.495890e+03 3.388940e+03 2.613970e+02 3.731260e+03
+1332496920008333 2.539630e+05 2.241670e+05 5.621070e+03 1.548010e+03 9.165170e+03 3.522930e+03 1.058930e+03 2.996960e+03
+1332496920016667 2.585080e+05 2.249300e+05 6.011400e+03 8.188660e+02 9.039950e+03 4.482440e+03 2.490390e+03 2.679340e+03
+1332496920025000 2.596270e+05 2.260220e+05 4.474500e+03 2.423020e+03 7.414190e+03 5.071970e+03 2.439380e+03 2.962960e+03
+1332496920033333 2.551870e+05 2.246320e+05 4.738570e+03 3.398040e+03 7.395120e+03 4.726450e+03 1.839030e+03 3.393530e+03
+1332496920041667 2.571020e+05 2.216230e+05 6.144130e+03 1.441090e+03 8.756480e+03 3.495320e+03 1.869940e+03 3.752530e+03
+1332496920050000 2.636530e+05 2.217700e+05 6.221770e+03 7.389620e+02 9.547600e+03 2.666820e+03 1.462660e+03 3.332570e+03
+1332496920058333 2.636130e+05 2.252560e+05 4.477120e+03 2.437450e+03 8.510210e+03 3.855630e+03 9.594420e+02 2.387180e+03
+1332496920066667 2.553500e+05 2.262640e+05 4.283720e+03 3.923940e+03 7.912470e+03 5.466520e+03 1.284990e+03 2.093720e+03
+1332496920075000 2.527270e+05 2.246090e+05 5.851930e+03 2.491980e+03 8.540630e+03 5.623050e+03 2.339780e+03 3.007140e+03
+1332496920083333 2.584750e+05 2.235780e+05 5.924870e+03 1.394480e+03 8.779620e+03 4.544180e+03 2.132030e+03 3.849760e+03
+1332496920091667 2.615630e+05 2.246090e+05 4.336140e+03 2.455750e+03 8.055380e+03 3.469110e+03 6.278730e+02 3.664200e+03
+# interval-end 1332496920100000
--- a/tests/data/prep-20120323T1000
+++ b/tests/data/prep-20120323T1000
--- a/tests/data/prep-20120323T1002
+++ b/tests/data/prep-20120323T1002
--- a/Show More
+++ b/Show More
				`@@ -0,0 +1 @@`
				`1332496830008333 2.595670e+05 2.226980e+05 6.207600e+03 6.786720e+02 9.380230e+03 4.575580e+03 2.830610e+03 2.688630e+03`