Branch

git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10372 ddd99763-3ecb-0310-9145-efcb8ce7c51f
12 years ago · 4ca726439b
--- a/+ 3
+++ b/+ 3
@@ -1,2 +1,5 @@
 all:
 	nosetests

 clean:
 	find . -name '*pyc' | xargs rm
--- a/README.txt
+++ b/README.txt
@@ -1,4 +1,4 @@
 To install, 

   python seutp.py install
   python setup.py install

--- a/+ 1
+++ b/+ 1
@@ -0,0 +1 @@
 - More stream operations: insert & extract
--- a/bin/nilm-test.py
+++ b/bin/nilm-test.py
@@ -1,26 +0,0 @@
 #!/usr/bin/python

 from nilmdb import Interval
 from optparse import OptionParser
 import sys

 version = "1.0"

 parser = OptionParser()
 parser.add_option("-d", "--db", dest="database", metavar="DATABASE",
                  help="location of sqlite database")
 parser.add_option("-V", "--version", dest="version", default=False, action="store_true",
                  help="print version then exit")

 (options, args) = parser.parse_args()

 if (options.version):
    print "This script version: " + version
    sys.exit(0)

 if options.database is None:
    print "Error: database is mandatory"
    sys.exit(1)

 print "Database is " + options.database

--- a/nilmdb/init.py
+++ b/nilmdb/init.py
@@ -1,2 +1,5 @@
 from nilmdb.interval import *
 from nilmdb.fileinterval import *
 # empty
 from nilmdb import NilmDB
 from server import Server
 from layout import *

--- a/nilmdb/fileinterval.py
+++ b/nilmdb/fileinterval.py
@@ -1,37 +0,0 @@
 """FileInterval

 An Interval that is backed with file data storage"""

 from nilmdb.interval import Interval, IntervalSet, IntervalError
 from datetime import datetime
 import bisect

 class FileInterval(Interval):
    """Represents an interval of time and its corresponding data"""

    def __init__(self, start, end, 
                 filename, 
                 start_offset = None, end_offset = None):
        self.start = start
        self.end = end
        self.filename = filename
        if start_offset is None:
            start_offset = 0
        self.start_offset = start_offset
        if end_offset is None:
            f = open(filename, 'rb')
            f.seek(0, os.SEEK_END)
            end_offset = f.tell()
        self.end_offset = end_offset

    def __setattr__(self, name, value):
        pass

    def subset(self, start, end):
        """Return a new Interval that is a subset of this one"""
        # TODO: Any magic regarding file/offset/length mapping for subsets
        if (start < self.start or end > self.end):
            raise IntervalError("not a subset")
        return FileInterval(start, end)            

    
--- a/nilmdb/interval.py
+++ b/nilmdb/interval.py
@@ -1,205 +0,0 @@
 """Interval and IntervalSet

 Represents an interval of time, and a sorted set of such intervals"""

 from datetime import datetime
 import bisect

 class IntervalError(Exception):
    """Error due to interval overlap, etc"""
    pass

 class Interval(object):
    """Represents an interval of time"""

    start = None
    end = None

    def __init__(self, start, end):
        self.start = start
        self.end = end

    def __repr__(self):
        return "Interval(" + repr(self.start) + ", " + repr(self.end) + ")"

    def __str__(self):
        return "[" + str(self.start) + " -> " + str(self.end) + "]"

    def __setattr__(self, name, value):
        """Set attribute"""
        # TODO: If we need to manipulate file names, offsets, lengths, etc,
        # based on start and end time changing, maybe this is the right spot?
        # Or we could just disallow changing it here.
        if not isinstance(value, datetime):
            raise IntervalError("Must set datetime values")
        self.__dict__[name] = value
        if (type(self.start) is type(self.end)):
            if (self.start > self.end):
                raise IntervalError("Interval start must precede interval end")

    def __cmp__(self, other):
        """Compare two intervals.  If non-equal, order by start then end"""
        if not isinstance(other, Interval):
            raise TypeError("Can't compare to non-interval")
        if (self.start == other.start):
            if (self.end < other.end):
                return -1
            if (self.end > other.end):
                return 1
            return 0
        if (self.start < other.start):
            return -1
        return 1
                
    def intersects(self, other):
        """Return True if two Interval objects intersect"""
        if (not isinstance(other, Interval)):
            raise TypeError("need Interval for intersection test")
        if (self.end <= other.start or
            self.start >= other.end):
            return False
        else:
            return True

    def is_adjacent(self, other):
        """Return True if two Intervals are adjacent (same end or start)"""
        if (not isinstance(other, Interval)):
            raise TypeError("need Interval for adjacency test")
        if (self.end == other.start or
            self.start == other.end):
            return True
        else:
            return False

    def subset(self, start, end):
        """Return a new Interval that is a subset of this one"""
        # TODO: Any magic regarding file/offset/length mapping for subsets
        if (start < self.start or end > self.end):
            raise IntervalError("not a subset")
        return Interval(start, end)            

 class IntervalSet(object):
    """A non-intersecting set of intervals

    Kept sorted internally"""

    def __init__(self, iterable=None):
        self.data = []
        if iterable is not None:
            if isinstance(iterable, Interval):
                iterable = [iterable]
            self._add_intervals(iterable)

    def __iter__(self):
        return self.data.__iter__()

    def __repr__(self):
        return "IntervalSet(" + repr(list(self.data)) + ")"

    def __cmp__(self, other):
        # compare isn't supported, they don't really have an ordering
        raise TypeError("can't compare IntervalSets with cmp()")

    def __eq__(self, other):
        """Test equality of two IntervalSets.

        Treats adjacent Intervals as equivalent to one long interval,
        so this function really tests whether the IntervalSets cover
        the same spans of time."""
        if not isinstance(other, IntervalSet):
            return False
        i = 0
        j = 0
        outside = True
        try:
            while True:
                if (outside):
                    # To match, we need to be finished this set
                    if (i >= len(self) and j >= len(other)):
                        return True
                    # Or the starts need to match
                    if (self[i].start != other[j].start):
                        return False
                    outside = False
                else:
                    # We can move on if the two interval ends match
                    if (self[i].end == other[j].end):
                        i += 1
                        j += 1
                        outside = True
                    else:
                        # Whichever ends first needs to be adjacent to the next
                        if (self[i].end < other[j].end):
                            if (not self[i].is_adjacent(self[i+1])):
                                return False
                            i += 1
                        else:
                            if (not other[j].is_adjacent(other[j+1])):
                                return False
                            j += 1
        except IndexError:
            return False

    def __ne__(self, other):
        return not self.__eq__(other)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, key):
        return self.data.__getitem__(key)

    def __iadd__(self, other):
        """Inplace add -- modifies self

        This throws an exception if the regions being added intersect."""
        if isinstance(other, Interval):
            other = [other]
        self._add_intervals(other)
        return self
        
    def __add__(self, other):
        """Add -- returns a new object

        This throws an exception if the regions being added intersect."""
        new = IntervalSet(self)
        new += IntervalSet(other)
        return new

    def __and__(self, other):
        """Compute a new IntervalSet from the intersection of two others

        Output intervals are built as subsets of the intervals in the
        first argument (self)."""
        # If we were given a set, intersect with each interval in that set
        if isinstance(other, IntervalSet):
            out = IntervalSet()
            for interval in other.data:
                out += self & interval
            return out

        if not isinstance(other, Interval):
            raise TypeError("can't intersect with that type")

        out = IntervalSet()
        for this in self.data:
            # If there's any overlap, add the overlapping region
            if (this.end > other.start and this.start < other.end):
                out += this.subset(max(this.start, other.start),
                                   min(this.end, other.end))
        return out
    
    def _add_intervals(self, iterable):
        """Add each Interval from an interable to this set"""
        for element in iter(iterable):
            self._add_single_interval(element)

    def _add_single_interval(self, interval):
        """Add one Interval to this set"""
        if (not isinstance(interval, Interval)):
            raise TypeError("can only add Intervals")
        for existing in self.data:
            if existing.intersects(interval):
                raise IntervalError("Tried to add overlapping interval "
                                    "to this set")
        bisect.insort(self.data, interval)
--- a/nilmdb/layout.py
+++ b/nilmdb/layout.py
@@ -0,0 +1,67 @@
 import tables

 # Note: 'expected_daily_rows' is an estimate, used to help PyTables
 # choose a proper chunk size for the table.

 layouts = {
    # Typical prep output
    "PrepData": {
        'expected_daily_rows': 120 * 86400,
        'description': {
            'timestamp': tables.Int64Col(pos=1),
            'p1': tables.Float32Col(pos=2),
            'q1': tables.Float32Col(pos=3),
            'p3': tables.Float32Col(pos=4),
            'q3': tables.Float32Col(pos=5),
            'p5': tables.Float32Col(pos=6),
            'q5': tables.Float32Col(pos=7),
            'p7': tables.Float32Col(pos=8),
            'q7': tables.Float32Col(pos=9),
        } },

    # Raw data
    "RawData": {
        'expected_daily_rows': 8000 * 86400,
        'description': {
            'timestamp': tables.Int64Col(pos=1),
            'va': tables.UInt16Col(pos=2),
            'vb': tables.UInt16Col(pos=3),
            'vc': tables.UInt16Col(pos=4),
            'ia': tables.UInt16Col(pos=5),
            'ib': tables.UInt16Col(pos=6),
            'ic': tables.UInt16Col(pos=7),
        } },

    # Raw data plus 60 Hz notched current
    "RawNotchedData": {
        'expected_daily_rows': 8000 * 86400,
        'description': {
            'timestamp': tables.Int64Col(pos=1),
            'va': tables.UInt16Col(pos=2),
            'vb': tables.UInt16Col(pos=3),
            'vc': tables.UInt16Col(pos=4),
            'ia': tables.UInt16Col(pos=5),
            'ib': tables.UInt16Col(pos=6),
            'ic': tables.UInt16Col(pos=7),
            'notch_ia': tables.UInt16Col(pos=8),
            'notch_ib': tables.UInt16Col(pos=9),
            'notch_ic': tables.UInt16Col(pos=10),
        } },
 }

 def description(layout):
    return tables.Description(layouts[layout]["description"])

 def expected_daily_rows(layout):
    return layouts[layout]["expected_daily_rows"]

 def desc_to_layout(match_desc):
    """
    Match a tables.Description to our fixed list of layouts, and
    return a string reprensenting the one that matched, or None
    if nothing matches.
    """
    for layout in layouts.keys():
        if description(layout)._v_colObjects == match_desc._v_colObjects:
            return layout
    return None
--- a/nilmdb/nilmdb.py
+++ b/nilmdb/nilmdb.py
@@ -0,0 +1,89 @@
 """NilmDB

 Object that represents a NILM database file"""

 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
 import nilmdb
 from nilmdb.printf import *

 import tables
 import time
 import sys

 class NilmDB(object):
    def __init__(self, filename):
        # Open or create the file
        self.filename = filename
        self.h5file = tables.openFile(filename, "a", "NILM Database")
        self.opened = True

    def __del__(self):
        if "opened" in self.__dict__: # pragma: no cover
            fprintf(sys.stderr,
                    "error: NilmDB.close() wasn't called, file %s",
                    self.filename)

    def close(self):
        self.h5file.close()
        del self.opened

    def stream_list(self, layout = None):
        """Return list of information for all tables in the database
        that match the given layout, or all tables if layout is None.

        Format of returned data is a list of tuples:
        (path, layout, nrows)
        """
        paths = []
        for node in self.h5file.walkNodes('/', 'Table'):
            path = node._v_pathname
            this_layout = nilmdb.layout.desc_to_layout(node.description)
            if layout is None or layout == this_layout:
                tuple = (path, this_layout, node.nrows)
                paths.append(tuple)
        return sorted(paths)

    def stream_create(self, path, layout_name, index = None):
        """Create a table at the given path, with the contents
        matching the given layout_name (e.g. 'PrepData').
        Columns listed in 'index' are marked as indices.  If index =
        None, the 'timestamp' column is indexed if it exists.  Pass
        an empty list to prevent indexing"""
        [ group, node ] = path.rsplit("/", 1)
        if group == '':
            raise ValueError("Invalid path")

        # Make the group structure, one element at a time
        group_path = group.lstrip('/').split("/")
        for i in range(len(group_path)):
            parent = "/" + "/".join(group_path[0:i])
            child = group_path[i]
            try:
                self.h5file.createGroup(parent, child)
            except tables.NodeError:
                pass

        # Get description
        desc = nilmdb.layout.description(layout_name)

        # Estimated table size (for PyTables optimization purposes): assume
        # 3 months worth of data.  It's OK if this is wrong.
        exp_rows = nilmdb.layout.expected_daily_rows(layout_name) * 90

        table = self.h5file.createTable(group, node,
                                        description = desc,
                                        expectedrows = exp_rows)

        # Create indices
        try:
            if index is None and "timestamp" in table.colnames:
                index = [ "timestamp" ]
            for ind in index:
                table.cols._f_col(ind).createIndex()
        except KeyError, e:
            # Remove this table if we got an error
            self.h5file.removeNode(group, node)
            raise e
            
--- a/nilmdb/printf.py
+++ b/nilmdb/printf.py
@@ -0,0 +1,7 @@
 from __future__ import print_function
 def printf(str, *args):
    print(str % args, end='')
 def fprintf(file, str, *args):
    print(str % args, end='', file=file)
 def sprintf(str, *args):
    return (str % args)
--- a/nilmdb/serializer.py
+++ b/nilmdb/serializer.py
@@ -0,0 +1,69 @@
 import Queue
 import threading
 import sys

 # This file provides a class that will wrap an object and serialize
 # all calls to its methods.  All calls to that object will be queued
 # and executed from a single thread, regardless of which thread makes
 # the call.

 # Based partially on http://stackoverflow.com/questions/2642515/

 class SerializerThread(threading.Thread):
    """Thread that retrieves call information from the queue, makes the
    call, and returns the results."""
    def __init__(self, call_queue):
        threading.Thread.__init__(self)
        self.call_queue = call_queue
    
    def run(self):
        while True:
            result_queue, func, args, kwargs = self.call_queue.get()
            # Terminate if result_queue is None
            if result_queue is None:
                return
            try:
                result = func(*args, **kwargs) # wrapped
            except:
                result_queue.put((sys.exc_info(), None))
            else:
                result_queue.put((None, result))

 class WrapCall(object):
    """Wrap a callable using the given queues"""

    def __init__(self, call_queue, result_queue, func):
        self.call_queue = call_queue
        self.result_queue = result_queue
        self.func = func

    def __call__(self, *args, **kwargs):
        self.call_queue.put((self.result_queue, self.func, args, kwargs))
        ( exc_info, result ) = self.result_queue.get()
        if exc_info is None:
            return result
        else:
            raise exc_info[0], exc_info[1], exc_info[2]

 class WrapObject(object):
    """Wrap all calls to methods in a target object with WrapCall"""

    def __init__(self, target):
        self.__wrap_target = target
        self.__wrap_call_queue = Queue.Queue()
        self.__wrap_serializer = SerializerThread(self.__wrap_call_queue)
        self.__wrap_serializer.daemon = True
        self.__wrap_serializer.start()

    def __getattr__(self, key):
        """Wrap methods of self.__wrap_target in a WrapCall instance"""
        func = getattr(self.__wrap_target, key)
        if not callable(func):
            raise TypeError("Can't serialize attribute %r (type: %s)"
                            % (key, type(func)))
        result_queue = Queue.Queue()
        return WrapCall(self.__wrap_call_queue, result_queue, func)

    def __del__(self):
        self.__wrap_call_queue.put((None, None, None, None))
        self.__wrap_serializer.join()
--- a/nilmdb/server.py
+++ b/nilmdb/server.py
@@ -0,0 +1,111 @@
 """CherryPy-based server for accessing NILM database via HTTP"""

 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
 import nilmdb

 from nilmdb.printf import *

 import cherrypy
 import sys
 import os
 import traceback

 try:
    import cherrypy
    cherrypy.tools.json_out
 except: # pragma: no cover
    sys.stderr.write("Cherrypy 3.2+ required\n")
    sys.exit(1)

 class NilmApp(object):
    def __init__(self, db):
        self.db = db

 class Root(NilmApp):
    """Root application for NILM database"""

    def __init__(self, db, version):
        super(Root, self).__init__(db)
        self.server_version = version

    # /
    @cherrypy.expose
    def index(self):
        raise cherrypy.NotFound()

    # /favicon.ico
    @cherrypy.expose
    def favicon_ico(self):
        raise cherrypy.NotFound()

    # /version
    @cherrypy.expose
    @cherrypy.tools.json_out()
    def version(self):
        return self.server_version

 class Stream(NilmApp):
    """Stream-specific operations"""

    # /stream/list
    # /stream/list?layout=PrepData
    @cherrypy.expose
    @cherrypy.tools.json_out()
    def list(self, layout = None):
        return self.db.stream_list(layout)

 class Exiter(object):
    """App that exits the server, for testing"""
    @cherrypy.expose
    def index(self):
        cherrypy.response.headers['Content-Type'] = 'text/plain'
        def content():
            yield 'Exiting by request'
            raise SystemExit
        return content()
    index._cp_config = { 'response.stream': True }

 class Server(object):
    version = "1.0"
    
    def __init__(self, db, host = '127.0.0.1', port = 8080, stoppable = False):
        # Need to wrap DB object in a serializer because we'll call into it from separate threads.
        self.db = nilmdb.serializer.WrapObject(db)
        cherrypy.config.update({
            'server.socket_host': host,
            'server.socket_port': port,
            'engine.autoreload_on': False,
            'environment': 'embedded',
            })
        cherrypy.tree.apps = {}
        cherrypy.tree.mount(Root(self.db, self.version), "/")
        cherrypy.tree.mount(Stream(self.db), "/stream")
        if stoppable:
            cherrypy.tree.mount(Exiter(), "/exit")

    def start(self, blocking = False, event = None):

        # Cherrypy stupidly calls os._exit(70) when it can't bind the
        # port.  At least try to print a reasonable error and continue
        # in this case, rather than just dying silently (as we would
        # otherwise do in embedded mode)
        real_exit = os._exit
        def fake_exit(code): # pragma: no cover
            if code == os.EX_SOFTWARE:
                fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
            else:
                real_exit(code)
        os._exit = fake_exit
        cherrypy.engine.start()
        os._exit = real_exit
        
        if event is not None:
            event.set()
        if blocking:
            cherrypy.engine.wait(cherrypy.engine.states.EXITING,
                                 interval = 0.1, channel = 'main')

    def stop(self):
        cherrypy.engine.exit()
--- a/nilmdb/test_fileinterval.py
+++ b/nilmdb/test_fileinterval.py
@@ -1,46 +0,0 @@
 from nilmdb import Interval, IntervalSet, IntervalError, FileInterval
 from datetime import datetime
 from nose.tools import assert_raises

 from test_interval import iset

 def fiset(string):
    """Like iset, but builds with FileIntervals instead of Intervals"""
    iset = IntervalSet()
    for i, c in enumerate(string):
        day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
        if (c == "["):
            start = day
        elif (c == "|"):
            iset += FileInterval(start, day, "test.dat")
            start = day
        elif (c == "]"):
            iset += FileInterval(start, day, "test.dat")
            del start
    return iset

 def test_fileinterval_vs_interval():
    """Test FileInterval/Interval inheritance"""

    i = iset("[--]")
    f = fiset("[--]")

    # check types
    assert(isinstance(i[0], Interval))
    assert(not isinstance(i[0], FileInterval))
    assert(isinstance(f[0], Interval))
    assert(isinstance(f[0], FileInterval))

    # when doing an intersection, result should be a subset of the first arg
    u = (i & f)
    assert(isinstance(u[0], Interval))
    assert(not isinstance(u[0], FileInterval))
    u = (f & i)
    assert(isinstance(u[0], Interval))
    assert(isinstance(u[0], FileInterval))

    # they're still the same though
    assert(i == f == u)

    # just for coverage
    assert_raises(IntervalError, fiset("[]")[0].subset, f[0].start, f[0].end)
--- a/nilmdb/test_interval.py
+++ b/nilmdb/test_interval.py
@@ -1,188 +0,0 @@
 from nilmdb import Interval, IntervalSet, IntervalError
 from datetime import datetime
 from nose.tools import assert_raises
 import itertools 

 def test_interval():
    """Test the Interval class"""
    d1 = datetime.strptime("19801205","%Y%m%d")
    d2 = datetime.strptime("19900216","%Y%m%d")
    d3 = datetime.strptime("20111205","%Y%m%d")

    # basic construction
    i = Interval(d1, d1)
    i = Interval(d1, d3)
    assert(i.start == d1)
    assert(i.end == d3)

    # assignment should work
    i.start = d2
    try:
        i.end = d1
        raise Exception("should have died there")
    except IntervalError:
        pass
    i.start = d1
    i.end = d2

    # end before start
    assert_raises(IntervalError, Interval, d3, d1)

    # wrong type
    assert_raises(IntervalError, Interval, 1, 2)

    # compare
    assert(Interval(d1, d2) == Interval(d1, d2))
    assert(Interval(d1, d2) < Interval(d1, d3))
    assert(Interval(d1, d3) > Interval(d1, d2))
    assert(Interval(d1, d2) < Interval(d2, d3))
    assert(Interval(d1, d3) < Interval(d2, d3))
    assert(Interval(d2, d2) > Interval(d1, d3))
    assert(Interval(d3, d3) == Interval(d3, d3))
    assert_raises(TypeError, cmp, i, 123)

    # subset
    assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2))
    assert_raises(IntervalError, Interval(d2, d3).subset, d1, d2)

    # append
    assert(Interval(d1, d2).is_adjacent(Interval(d2,d3)))
    assert(Interval(d2, d3).is_adjacent(Interval(d1,d2)))
    assert(not Interval(d2, d3).is_adjacent(Interval(d1,d3)))
    assert_raises(TypeError, Interval(d1, d2).is_adjacent, 1)

    # misc
    assert(repr(i) == repr(eval(repr(i).replace("datetime.",""))))
    assert(str(i) == "[1980-12-05 00:00:00 -> 1990-02-16 00:00:00]")

 def test_interval_intersect():
    """Test Interval intersections"""
    dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ] ]
    perm = list(itertools.permutations(dates, 2))
    prod = list(itertools.product(perm, perm))
    should_intersect = {
        False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100],
        True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29,
               32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]}
    for i,((a,b),(c,d)) in enumerate(prod):
        try:
            i1 = Interval(a, b)
            i2 = Interval(c, d)
            assert(i1.intersects(i2) == i2.intersects(i1))
            assert(i in should_intersect[i1.intersects(i2)])
        except IntervalError:
            assert(i not in should_intersect[True] and
                   i not in should_intersect[False])
    assert_raises(TypeError, i1.intersects, 1234)

 def test_intervalset_construct():
    """Test interval set construction"""
    dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ]]

    a = Interval(dates[0], dates[1])
    b = Interval(dates[1], dates[2])
    c = Interval(dates[0], dates[2])
    d = Interval(dates[2], dates[3])

    iseta = IntervalSet(a)
    isetb = IntervalSet([a, b])
    isetc = IntervalSet([a])
    assert(iseta != isetb)
    assert(iseta == isetc)
    assert(iseta != 3)
    assert(IntervalSet(a) != IntervalSet(b))
    
    assert_raises(TypeError, cmp, iseta, isetb)
    assert_raises(IntervalError, IntervalSet, [a, b, c])
    assert_raises(TypeError, IntervalSet, [1, 2])

    iset = IntervalSet(isetb)   # test iterator
    assert(iset == isetb)
    assert(len(iset) == 2)
    assert(len(IntervalSet()) == 0)

    # Test adding
    iset = IntervalSet(a)
    iset += IntervalSet(b)
    assert(iset == IntervalSet([a, b]))
    iset = IntervalSet(a)
    iset += b
    assert(iset == IntervalSet([a, b]))
    iset = IntervalSet(a) + IntervalSet(b)
    assert(iset == IntervalSet([a, b]))
    iset = IntervalSet(b) + a
    assert(iset == IntervalSet([a, b]))

    # A set consisting of [0-1],[1-2] should match a set consisting of [0-2]
    assert(IntervalSet([a,b]) == IntervalSet([c]))
    # Etc
    assert(IntervalSet([a,d]) != IntervalSet([c]))
    assert(IntervalSet([c]) != IntervalSet([a,d]))
    assert(IntervalSet([c,d]) != IntervalSet([b,d]))
    
    # misc
    assert(repr(iset) == repr(eval(repr(iset).replace("datetime.",""))))

 def iset(string):
    """Build an IntervalSet from a string, for testing purposes

    Each character is a year
    [ = interval start
    | = interval end + adjacent start
    ] = interval end
    anything else is ignored
    """
    iset = IntervalSet()
    for i, c in enumerate(string):
        day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
        if (c == "["):
            start = day
        elif (c == "|"):
            iset += Interval(start, day)
            start = day
        elif (c == "]"):
            iset += Interval(start, day)
            del start
    return iset

 def test_intervalset_iset():
    """Test basic iset construction"""
    assert(iset("  [----]   ") ==
           iset("  [-|--]   "))

    assert(iset("[]  [--]   ") +
           iset(" []    [--]") ==
           iset("[|] [-----]"))

 def test_intervalset_intsersect():
    """Test intersection (&)"""
    assert_raises(TypeError, iset("[--]").__and__, 1234)
    
    assert(iset("[---------]") &
           iset(" [---]     ") ==
           iset(" [---]     "))

    assert(iset(" [---]     ") &
           iset("[---------]") ==
           iset(" [---]     "))

    assert(iset("    [-----]") &
           iset(" [-----]   ") ==
           iset("    [--]   "))

    assert(iset("      [---]") &
           iset(" [--]      ") ==
           iset("           "))

    assert(iset("    [-|---]") &
           iset(" [-----|-] ") ==
           iset("    [----] "))

    assert(iset("    [-|-]  ") &
           iset(" [-|--|--] ") ==
           iset("    [---]  "))

    assert(iset(" [----][--]") &
           iset("[-] [--] []") ==
           iset(" [] [-]  []"))

--- a/pytables-test/Makefile
+++ b/pytables-test/Makefile
@@ -1,5 +0,0 @@
 all:
 	time python test-indexed-read.py

 clean:
 	rm -f *pyc
--- a/pytables-test/README.jim
+++ b/pytables-test/README.jim
@@ -1,2 +0,0 @@
 New version from:
  http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=641485#15
--- a/pytables-test/TODO.txt
+++ b/pytables-test/TODO.txt
@@ -1,12 +0,0 @@
 - Make CherryPy server that can handle simple GET/POST,
  and a httplib client that can talk to that server.
  Steps:
  - Make server handle GET
  - Make client send request, get response
  - Add request streaming to server
  - Add request streaming to client
  - Make server handle POST
  - Make client send request, get response
  - Add request streaming to server
  - Add request streaming to client
  - Integrate into a server process that also keeps database open.
--- a/pytables-test/indexing-notes
+++ b/pytables-test/indexing-notes
@@ -1,3 +0,0 @@
 Indexing time64 doesn't seem to work -- needed to do "time >= 1243052015" even though the actual database times
 should be something like 1243052015.847000.  Let's switch to just using a 64-bit integer counting e.g.
 microseconds since 1970-01-01 
--- a/pytables-test/sample-query
+++ b/pytables-test/sample-query
@@ -1,3 +0,0 @@
 timestamp > 1243052015
 took 394.5 minutes in vitables
 (2340 rows matched)
--- a/pytables-test/server.py
+++ b/pytables-test/server.py
@@ -1,53 +0,0 @@
 import sys
 import tables
 import nilmdb

 try:
    import cherrypy
    cherrypy.tools.json_out
 except:
    sys.stderr.write("Cherrypy 3.2+ required\n")
    sys.exit(1)

 class NilmApp:
    def __init__(self, db):
        self.db = db

 class Root(NilmApp):
    """NILM Database"""

    server_version = "1.0"

    @cherrypy.expose
    def index(self):
        raise cherrypy.NotFound()

    @cherrypy.expose
    def favicon_ico(self):
        raise cherrypy.NotFound()

    @cherrypy.expose
    @cherrypy.tools.json_out()
    def version(self):
        return self.server_version

 class Stream(NilmApp):
    """Stream operations"""

    @cherrypy.expose
    @cherrypy.tools.json_out()
    def list(self):
        return 
    
 cherrypy.config.update({
    'server.socket_host': '127.0.0.1',
    'server.socket_port': 12380
    })

 db = nilmdb.nilmdb()
 cherrypy.tree.mount(Root(db), "/")
 cherrypy.tree.mount(Stream(db), "/stream")

 if __name__ == "__main__":
    cherrypy.engine.start()
    cherrypy.engine.block()
--- a/pytables-test/speed-pytables.py
+++ b/pytables-test/speed-pytables.py
@@ -1,16 +0,0 @@
 import tables
 import numpy

 class RawSample(tables.IsDescription):
    timestamp = tables.UInt64Col()
    voltage   = tables.UInt16Col(shape = 3)
    current   = tables.UInt16Col(shape = 3)

 h5file = tables.openFile("test.h5", mode = "w", title = "Test")
 group = h5file.createGroup("/", "raw", "Raw Data")
 table = h5file.createTable(group, "nilm1", RawSample, "NILM 1")

 print repr(h5file)

 # write rows

--- a/pytables-test/test-indexing.py
+++ b/pytables-test/test-indexing.py
@@ -1,54 +0,0 @@
 #!/usr/bin/python

 from tables import *
 import re
 import time

 # A class to describe our data
 class PrepData(IsDescription):
    timestamp = Int64Col()
    p1 = Float32Col()
    q1 = Float32Col()
    p3 = Float32Col()
    q3 = Float32Col()
    p5 = Float32Col()
    q5 = Float32Col()
    p7 = Float32Col()
    q7 = Float32Col()

 filename = "test.h5"
 h5file = openFile(filename, mode = "w", title = "NILM Test")

 group = h5file.createGroup("/", "newton", "Newton school")
 table = h5file.createTable(group, "prep", PrepData, "Prep Data", expectedrows = 120 * 86400 * 90)

 table.cols.timestamp.createIndex()

 for i in range(0, 80):
    # Open file
    data = open("data/alldata")
    count = 0
    oldtime = time.time()
    prep = table.row
    for line in data:
        count = count + 1
        if count % 1000000 == 0:
            print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
            oldtime = time.time()
        v = re.split('\s+', line)
        prep['timestamp'] = int(v[0]) + 500000000 * i
        prep['p1'] = v[1]
        prep['q1'] = v[2]
        prep['p3'] = v[3]
        prep['q3'] = v[4]
        prep['p5'] = v[5]
        prep['q5'] = v[6]
        prep['p7'] = v[7]
        prep['q7'] = v[8]
        prep.append()
    data.close()

 h5file.close()



--- a/pytables-test/test-write.py
+++ b/pytables-test/test-write.py
@@ -1,54 +0,0 @@
 #!/usr/bin/python

 from tables import *
 import re
 import time

 # A class to describe our data
 class PrepData(IsDescription):
    timestamp = Time64Col()
    p1 = Float32Col()
    q1 = Float32Col()
    p3 = Float32Col()
    q3 = Float32Col()
    p5 = Float32Col()
    q5 = Float32Col()
    p7 = Float32Col()
    q7 = Float32Col()

 filename = "test.h5"
 h5file = openFile(filename, mode = "w", title = "NILM Test")

 group = h5file.createGroup("/", "newton", "Newton school")
 table = h5file.createTable(group, "prep", PrepData, "Prep Data")

 table.cols.timestamp.createIndex()

 for i in range(0, 80):
    # Open file
    data = open("data/alldata")
    count = 0
    oldtime = time.time()
    prep = table.row
    for line in data:
        count = count + 1
        if count % 1000000 == 0:
            print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
            oldtime = time.time()
        v = re.split('\s+', line)
        prep['timestamp'] = float(v[0]) / 1000.0 + 500000 * i
        prep['p1'] = v[1]
        prep['q1'] = v[2]
        prep['p3'] = v[3]
        prep['q3'] = v[4]
        prep['p5'] = v[5]
        prep['q5'] = v[6]
        prep['p7'] = v[7]
        prep['q7'] = v[8]
        prep.append()
    data.close()

 h5file.close()



--- a/setup.py
+++ b/setup.py
@@ -1,9 +0,0 @@
 #!/usr/bin/python

 from distutils.core import setup

 setup(name = 'nilmdb',
      version = '1.0',
      scripts = [ 'bin/nilm-test.py' ],
      packages = [ 'nilmdb' ],
      )
--- a/test/Makefile
+++ b/test/Makefile
@@ -1,5 +0,0 @@
 all:
 	python speed-readbinary.py

 clean:
 	rm -f *pyc
--- a/test/printf.py
+++ b/test/printf.py
@@ -1,4 +0,0 @@
 from __future__ import print_function
 def printf(str, *args):
    print(str % args, end='')
    
--- a/test/speed-readascii.py
+++ b/test/speed-readascii.py
@@ -1,60 +0,0 @@
 #!/usr/bin/python

 from printf import printf
 import time
 import re
 import numpy as np
 import itertools

 class Timer():
    def __init__(self, arg):
        self.arg = arg
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))

 def test_split():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(i) for i in line.partition('#')[0].split() ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 def test_split2():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 def test_regex():
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [0]*6
        tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
        out[0:len(tmp)] = tmp
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 def test_bigregex():
    regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
    for n, line in enumerate(open('1m.raw', 'r')):
        out = [ int(x or 0) for x in re.match(regex, line).groups() ]
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 with Timer("regex"):
    test_regex() # 102k/sec

 with Timer("split"):
    test_split() # 175k/sec

 with Timer("split2"):
    test_split2() # 275k/sec

 with Timer("bigregex"):
    test_bigregex() # 110k/sec

 # The "int" operation takes quite a while -- int(x,10) is twice as fast
 # Perl does about 500k/sec

--- a/test/speed-readbinary.py
+++ b/test/speed-readbinary.py
@@ -1,74 +0,0 @@
 #!/usr/bin/python

 from printf import printf
 import time
 import re
 import numpy as np
 import itertools
 import struct
 import array
 import os
 import mmap

 class Timer():
    def __init__(self, arg):
        self.arg = arg
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))

 def test_struct1():
    """read with struct.unpack"""
    f = open('1m.bin', 'rb')
    f.seek(0,os.SEEK_END)
    filesize = f.tell()
    f.seek(0,os.SEEK_SET)
    packer = struct.Struct('!dHHHHHH')
    items = filesize / packer.size
    for n in xrange(items):
        s = f.read(packer.size)
        out = packer.unpack(s)
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 def test_struct2():
    """read with struct.unpack, convert to string"""
    f = open('1m.bin', 'rb')
    f.seek(0,os.SEEK_END)
    filesize = f.tell()
    f.seek(0,os.SEEK_SET)
    packer = struct.Struct('!dHHHHHH')
    items = filesize / packer.size
    for n in xrange(items):
        s = f.read(packer.size)
        out = packer.unpack(s)
        x = str(out)
        if (n % 100000 == 0):
            printf("line %d = %s\n", n, str(out))

 def test_mmap():
    """struct.unpack with mmap"""
    with open('1m.bin', 'rb') as f:
        f.seek(0,os.SEEK_END)
        filesize = f.tell()
        f.seek(0,os.SEEK_SET)
        m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ)
        packer = struct.Struct('!dHHHHHH')
        items = filesize / packer.size
        for n in xrange(items):
            out = packer.unpack(m[packer.size*n : packer.size*(n+1)])
            if (n % 100000 == 0):
                printf("line %d = %s\n", n, str(out))

 with Timer("mmap"):
    test_mmap()  # 1600k

 with Timer("struct1"):
    test_struct1()  # 1460k

 with Timer("struct2"):
    test_struct2()  # 210k

 # Reading from the file is again much quicker than converting to string
 # Use mmap, it's good


--- a/test/speed-writebinary.py
+++ b/test/speed-writebinary.py
@@ -1,76 +0,0 @@
 #!/usr/bin/python

 from printf import printf
 import time
 import re
 import numpy as np
 import itertools
 import struct
 import array

 class Timer():
    def __init__(self, arg):
        self.arg = arg
    def __enter__(self): self.start = time.time()
    def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))

 def read_ascii():
    for n in xrange(1000000):
        yield (1234, 2345, 3456, 4576, 5678, 6789)
 #    for n, line in enumerate(open('1m.raw', 'r')):
 #        out = [0]*6
 #        tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
 #        out[0:len(tmp)] = tmp
 #        if (n % 100000 == 0):
 #            printf("line %d = %s\n", n, str(out))
 #        yield out

 def test_struct1():
    """write with struct.pack"""
    f = open('1m.bin', 'wb')
    for out in read_ascii():
        s = struct.pack('!HHHHHH', *out)
        f.write(s)

 def test_struct2():
    """use constant format string"""
    f = open('1m.bin', 'wb')
    packer = struct.Struct('!HHHHHH')
    for out in read_ascii():
        f.write(packer.pack(*out))
    f.close()
    printf("size was %d\n", packer.size)

 def test_struct3():
    """like struct1, with timestamp"""
    f = open('1m.bin', 'wb')
    for out in read_ascii():
        s = struct.pack('!dHHHHHH', time.time(), *out)
        f.write(s)

 def test_struct4():
    """like struct2, with timestamp"""
    f = open('1m.bin', 'wb')
    packer = struct.Struct('!dHHHHHH')
    for out in read_ascii():
        f.write(packer.pack(time.time(), *out))
    f.close()
    printf("size was %d\n", packer.size)

 #raise Exception('done')

 with Timer("struct1"):
    test_struct1() # 1089k

 with Timer("struct2"):
    test_struct2() # 1249k

 with Timer("struct3"):
    test_struct3() # 845k

 with Timer("struct4"):
    test_struct4() # 922k

 # This seems fast enough for writing new data, since it's faster than
 # we read ascii data anyway.  Use e.g. struct4

--- a/test/test-struct-pack.py
+++ b/test/test-struct-pack.py
@@ -1,11 +0,0 @@
 #!/usr/bin/python

 import struct
 import mmap

 f = open("test.dat", "rb+")
 mm = mmap.mmap(f.fileno(),3)

 print len(mm)
 print "first 3 bytes: " + mm[0:3];

--- a/test/test.dat
+++ b/test/test.dat
@@ -1 +0,0 @@

--- a/test/todo.md
+++ b/test/todo.md
@@ -1,7 +0,0 @@
 - Have a class representing the file contents
 - Looks like an array
  - len(), get(), index
  - some form of bisect search
  - get_extents = return [0].timestamp, [-1].timestamp
  - 
 - Can append?  Sure, why not.  Just write to the file, extend mmap accordingly.
--- a/tests/test_nilmdb.py
+++ b/tests/test_nilmdb.py
@@ -0,0 +1,150 @@
 import nilmdb

 from nose.tools import *
 from nose.tools import assert_raises
 from distutils.version import StrictVersion as V
 import json
 import itertools 
 import os
 import sys
 import cherrypy
 import threading
 import urllib2
 import Queue

 testdb = "tests/test.db"

 #@atexit.register
 #def cleanup():
 #    os.unlink(testdb)

 class Test00Nilmdb(object):  # named 00 so it runs first
    def test_NilmDB(self):
        try:
            os.unlink(testdb)
        except:
            pass

        with assert_raises(IOError):
            nilmdb.NilmDB("/nonexistant-db/foo")

        db = nilmdb.NilmDB(testdb)
        db.close()
        db = nilmdb.NilmDB(testdb)
        db.close()

    def test_stream(self):
        db = nilmdb.NilmDB(testdb)
        eq_(db.stream_list(), [])

        # Bad path
        with assert_raises(ValueError):
            db.stream_create("/foo", "PrepData")
        # Bad layout type
        with assert_raises(KeyError):
            db.stream_create("/newton/prep", "NoSuchLayout")
        # Bad index columns
        with assert_raises(KeyError):
            db.stream_create("/newton/prep", "PrepData", ["nonexistant"])
        db.stream_create("/newton/prep", "PrepData")
        db.stream_create("/newton/raw", "RawData")
        db.stream_create("/newton/zzz/rawnotch", "RawNotchedData")

        # Verify we got 3 streams
        eq_(db.stream_list(), [ ("/newton/prep", "PrepData", 0),
                                ("/newton/raw", "RawData", 0),
                                ("/newton/zzz/rawnotch", "RawNotchedData", 0)
                                ])
        # Match just one type
        eq_(db.stream_list("RawData"), [ ("/newton/raw", "RawData", 0) ])

        # Verify returned types if a layout is missing
        save = nilmdb.layout.layouts.copy()
        del nilmdb.layout.layouts["RawData"]
        eq_(db.stream_list(), [ ("/newton/prep", "PrepData", 0),
                                ("/newton/raw", None, 0),
                                ("/newton/zzz/rawnotch", "RawNotchedData", 0)
                                ])
        nilmdb.layout.layouts = save

        # Verify that columns were made right
        eq_(len(db.h5file.getNode("/newton/prep").cols), 9)
        eq_(len(db.h5file.getNode("/newton/raw").cols), 7)
        eq_(len(db.h5file.getNode("/newton/zzz/rawnotch").cols), 10)
        assert(db.h5file.getNode("/newton/prep").colindexed["timestamp"])
        assert(not db.h5file.getNode("/newton/prep").colindexed["p1"])
        db.close()

 class TestBlockingServer(object):
    def setUp(self):
        self.db = nilmdb.NilmDB(testdb)

    def tearDown(self):
        self.db.close()
        
    def test_blocking_server(self):
        # Start web app on a custom port
        self.server = nilmdb.Server(self.db, host = "127.0.0.1",
                                    port = 12380, stoppable = True)

        # Run it
        event = threading.Event()
        def run_server():
            self.server.start(blocking = True, event = event)
        thread = threading.Thread(target = run_server)
        thread.start()
        event.wait(timeout = 2)
        
        # Send request to exit.
        req = urllib2.urlopen("http://127.0.0.1:12380/exit/", timeout = 1)

        # Wait for it
        thread.join()
    
 def geturl(path):
    req = urllib2.urlopen("http://127.0.0.1:12380" + path, timeout = 10)
    return req.read()

 def getjson(path):
    return json.loads(geturl(path))

 class TestServer(object):

    def setUp(self):
        # Start web app on a custom port
        self.db = nilmdb.NilmDB(testdb)
        self.server = nilmdb.Server(self.db, host = "127.0.0.1",
                                    port = 12380, stoppable = False)
        self.server.start(blocking = False)

    def tearDown(self):
        # Close web app
        self.server.stop()
        self.db.close()

    def test_server(self):
        # Make sure we can't force an exit, and test other 404 errors
        for url in [ "/exit", "/", "/favicon.ico" ]:
            with assert_raises(urllib2.HTTPError) as e:
                geturl(url)
            eq_(e.exception.code, 404)

        # Check version
        eq_(V(getjson("/version")), V(self.server.version))

    def test_stream_list(self):
        # Known streams that got populated by an earlier test (test_nilmdb)
        streams = getjson("/stream/list")

        eq_(streams, [
            ['/newton/prep', 'PrepData', 0],
            ['/newton/raw', 'RawData', 0],
            ['/newton/zzz/rawnotch', 'RawNotchedData', 0],
            ])

        streams = getjson("/stream/list?layout=RawData")
        eq_(streams, [['/newton/raw', 'RawData', 0]])

        streams = getjson("/stream/list?layout=NoSuchLayout")
        eq_(streams, [])

--- a/tests/test_printf.py
+++ b/tests/test_printf.py
@@ -0,0 +1,25 @@
 import nilmdb
 from nilmdb.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
 from cStringIO import StringIO
 import sys

 class TestPrintf(object):
    def test_printf(self):
        old_stdout = sys.stdout
        sys.stdout = test1 = StringIO()
        test2 = StringIO()
        test3 = ""
        try:
            printf("hello, world: %d", 123)
            fprintf(test2, "hello too: %d", 123)
            test3 = sprintf("hello three: %d", 123)
        except:
            sys.stdout = old_stdout
            raise
        sys.stdout = old_stdout
        eq_(test1.getvalue(), "hello, world: 123")
        eq_(test2.getvalue(), "hello too: 123")
        eq_(test3, "hello three: 123")
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -0,0 +1,70 @@
 import nilmdb
 from nilmdb.printf import *

 import nose
 from nose.tools import *
 from nose.tools import assert_raises
 import threading
 import time

 #raise nose.exc.SkipTest("Skip these")

 class Foo(object):
    val = 0

    def fail(self):
        raise Exception("you asked me to do this")

    def test(self, debug = False):
        # purposely not thread-safe
        oldval = self.val
        newval = oldval + 1
        time.sleep(0.05)
        self.val = newval
        if debug:
            printf("[%s] value changed: %d -> %d\n",
                   threading.current_thread().name, oldval, newval)

 class Base(object):

    def test_wrapping(self):
        self.foo.test()
        with assert_raises(Exception):
            self.foo.fail()

    def test_threaded(self):
        def func(foo):
            foo.test()
        threads = []
        for i in xrange(20):
            threads.append(threading.Thread(target = func, args = (self.foo,)))
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        self.verify_result()

 class TestUnserialized(Base):
    def setUp(self):
        self.foo = Foo()

    def verify_result(self):
        # This should have failed to increment properly
        assert(self.foo.val != 20)

 class TestSerialized(Base):
    def setUp(self):
        self.realfoo = Foo()
        self.foo = nilmdb.serializer.WrapObject(self.realfoo)

    def tearDown(self):
        del self.foo

    def verify_result(self):
        # This should have worked
        eq_(self.realfoo.val, 20)

    def test_attribute(self):
        # Can't wrap attributes yet
        with assert_raises(TypeError):
            self.foo.val
--- a/todo.txt
+++ b/todo.txt
@@ -1,7 +0,0 @@
 - test performance of binary storage
  - write
  - read
  - seek to timestamp?

 - implement binary storage classes
  - fileinterval, binaryfileinterval, etc?