More test coverage

Test everything; remove debugging
Bulkdata extract works now.
2013-01-02 00:00:05 -05:00 · 2013-01-01 23:46:54 -05:00 · 2013-01-01 23:44:52 -05:00 · 2013-01-01 23:03:53 -05:00 · 2013-01-01 21:04:35 -05:00 · 2013-01-01 21:03:33 -05:00
43 changed files with 701 additions and 1286 deletions
--- a/design.md
+++ b/design.md
@@ -143,6 +143,13 @@ Speed
 	 numpy:records.py.fromrecords:569   7410 sec, 262k calls
  - Probably OK for now.

+- After all updates, now takes about 8.5 minutes to insert an hour of
+  data, constant after adding 171 hours (4.9 billion data points)
+
+- Data set size: 98 gigs = 20 bytes per data point.
+  6 uint16 data + 1 uint32 timestamp = 16 bytes per point
+  So compression must be off -- will retry with compression forced on.
+
 IntervalSet speed
 -----------------
 - Initial implementation was pretty slow, even with binary search in
--- a/nilmdb/RedBlackTree.cc
+++ b/nilmdb/RedBlackTree.cc
@@ -1,605 +0,0 @@
-
-
-
-// The RedBlackEntry class is an Abstract Base Class.  This means that no
-// instance of the RedBlackEntry class can exist.  Only classes which
-// inherit from the RedBlackEntry class can exist.  Furthermore any class
-// which inherits from the RedBlackEntry class must define the member
-// function GetKey().  The Print() member function does not have to
-// be defined because a default definition exists.
-//
-// The GetKey() function should return an integer key for that entry.
-// The key for an entry should never change otherwise bad things might occur.
-
-class RedBlackEntry {
-public:
-  RedBlackEntry();
-  virtual ~RedBlackEntry();
-  virtual int GetKey() const = 0;
-  virtual void Print() const;
-};
-
-class RedBlackTreeNode {
-  friend class RedBlackTree;
-public:
-  void Print(RedBlackTreeNode*,
-	     RedBlackTreeNode*) const;
-  RedBlackTreeNode();
-  RedBlackTreeNode(RedBlackEntry *);
-  RedBlackEntry * GetEntry() const;
-  ~RedBlackTreeNode();
-protected:
-  RedBlackEntry * storedEntry;
-  int key;
-  int red; /* if red=0 then the node is black */
-  RedBlackTreeNode * left;
-  RedBlackTreeNode * right;
-  RedBlackTreeNode * parent;
-};
-
-
-
-class RedBlackTree {
-public:
-  RedBlackTree();
-  ~RedBlackTree();
-  void Print() const;
-  RedBlackEntry * DeleteNode(RedBlackTreeNode *);
-  RedBlackTreeNode * Insert(RedBlackEntry *);
-  RedBlackTreeNode * GetPredecessorOf(RedBlackTreeNode *) const;
-  RedBlackTreeNode * GetSuccessorOf(RedBlackTreeNode *) const;
-  RedBlackTreeNode * Search(int key);
-  TemplateStack<RedBlackTreeNode *> * Enumerate(int low, int high) ;
-  void CheckAssumptions() const;
-protected:
-  /*  A sentinel is used for root and for nil.  These sentinels are */
-  /*  created when RedBlackTreeCreate is caled.  root->left should always */
-  /*  point to the node which is the root of the tree.  nil points to a */
-  /*  node which should always be black but has aribtrary children and */
-  /*  parent and no key or info.  The point of using these sentinels is so */
-  /*  that the root and nil nodes do not require special cases in the code */
-  RedBlackTreeNode * root;
-  RedBlackTreeNode * nil;
-  void LeftRotate(RedBlackTreeNode *);
-  void RightRotate(RedBlackTreeNode *);
-  void TreeInsertHelp(RedBlackTreeNode *);
-  void TreePrintHelper(RedBlackTreeNode *) const;
-  void FixUpMaxHigh(RedBlackTreeNode *);
-  void DeleteFixUp(RedBlackTreeNode *);
-};
-
-const int MIN_INT=-MAX_INT;
-
-RedBlackTreeNode::RedBlackTreeNode(){
-};
-
-RedBlackTreeNode::RedBlackTreeNode(RedBlackEntry * newEntry)
-  : storedEntry (newEntry) , key(newEntry->GetKey()) {
-};
-
-RedBlackTreeNode::~RedBlackTreeNode(){
-};
-
-RedBlackEntry * RedBlackTreeNode::GetEntry() const {return storedEntry;}
-
-RedBlackEntry::RedBlackEntry(){
-};
-RedBlackEntry::~RedBlackEntry(){
-};
-void RedBlackEntry::Print() const {
-  cout << "No Print Method defined. Using Default: " << GetKey() << endl;
-}
-
-RedBlackTree::RedBlackTree()
-{
-  nil = new RedBlackTreeNode;
-  nil->left = nil->right = nil->parent = nil;
-  nil->red = 0;
-  nil->key = MIN_INT;
-  nil->storedEntry = NULL;
-
-  root = new RedBlackTreeNode;
-  root->parent = root->left = root->right = nil;
-  root->key = MAX_INT;
-  root->red=0;
-  root->storedEntry = NULL;
-}
-
-/***********************************************************************/
-/*  FUNCTION:  LeftRotate */
-/**/
-/*  INPUTS:  the node to rotate on */
-/**/
-/*  OUTPUT:  None */
-/**/
-/*  Modifies Input: this, x */
-/**/
-/*  EFFECTS:  Rotates as described in _Introduction_To_Algorithms by */
-/*            Cormen, Leiserson, Rivest (Chapter 14).  Basically this */
-/*            makes the parent of x be to the left of x, x the parent of */
-/*            its parent before the rotation and fixes other pointers */
-/*            accordingly.  */
-/***********************************************************************/
-
-void RedBlackTree::LeftRotate(RedBlackTreeNode* x) {
-  RedBlackTreeNode* y;
-
-  /*  I originally wrote this function to use the sentinel for */
-  /*  nil to avoid checking for nil.  However this introduces a */
-  /*  very subtle bug because sometimes this function modifies */
-  /*  the parent pointer of nil.  This can be a problem if a */
-  /*  function which calls LeftRotate also uses the nil sentinel */
-  /*  and expects the nil sentinel's parent pointer to be unchanged */
-  /*  after calling this function.  For example, when DeleteFixUP */
-  /*  calls LeftRotate it expects the parent pointer of nil to be */
-  /*  unchanged. */
-
-  y=x->right;
-  x->right=y->left;
-
-  if (y->left != nil) y->left->parent=x; /* used to use sentinel here */
-  /* and do an unconditional assignment instead of testing for nil */
-
-  y->parent=x->parent;
-
-  /* instead of checking if x->parent is the root as in the book, we */
-  /* count on the root sentinel to implicitly take care of this case */
-  if( x == x->parent->left) {
-    x->parent->left=y;
-  } else {
-    x->parent->right=y;
-  }
-  y->left=x;
-  x->parent=y;
-}
-
-/***********************************************************************/
-/*  FUNCTION:  RighttRotate */
-/**/
-/*  INPUTS:  node to rotate on */
-/**/
-/*  OUTPUT:  None */
-/**/
-/*  Modifies Input?: this, y */
-/**/
-/*  EFFECTS:  Rotates as described in _Introduction_To_Algorithms by */
-/*            Cormen, Leiserson, Rivest (Chapter 14).  Basically this */
-/*            makes the parent of x be to the left of x, x the parent of */
-/*            its parent before the rotation and fixes other pointers */
-/*            accordingly.  */
-/***********************************************************************/
-
-void RedBlackTree::RightRotate(RedBlackTreeNode* y) {
-  RedBlackTreeNode* x;
-
-  /*  I originally wrote this function to use the sentinel for */
-  /*  nil to avoid checking for nil.  However this introduces a */
-  /*  very subtle bug because sometimes this function modifies */
-  /*  the parent pointer of nil.  This can be a problem if a */
-  /*  function which calls LeftRotate also uses the nil sentinel */
-  /*  and expects the nil sentinel's parent pointer to be unchanged */
-  /*  after calling this function.  For example, when DeleteFixUP */
-  /*  calls LeftRotate it expects the parent pointer of nil to be */
-  /*  unchanged. */
-
-  x=y->left;
-  y->left=x->right;
-
-  if (nil != x->right)  x->right->parent=y; /*used to use sentinel here */
-  /* and do an unconditional assignment instead of testing for nil */
-
-  /* instead of checking if x->parent is the root as in the book, we */
-  /* count on the root sentinel to implicitly take care of this case */
-  x->parent=y->parent;
-  if( y == y->parent->left) {
-    y->parent->left=x;
-  } else {
-    y->parent->right=x;
-  }
-  x->right=y;
-  y->parent=x;
-}
-
-/***********************************************************************/
-/*  FUNCTION:  TreeInsertHelp  */
-/**/
-/*  INPUTS:  z is the node to insert */
-/**/
-/*  OUTPUT:  none */
-/**/
-/*  Modifies Input:  this, z */
-/**/
-/*  EFFECTS:  Inserts z into the tree as if it were a regular binary tree */
-/*            using the algorithm described in _Introduction_To_Algorithms_ */
-/*            by Cormen et al.  This funciton is only intended to be called */
-/*            by the Insert function and not by the user */
-/***********************************************************************/
-
-void RedBlackTree::TreeInsertHelp(RedBlackTreeNode* z) {
-  /*  This function should only be called by RedBlackTree::Insert */
-  RedBlackTreeNode* x;
-  RedBlackTreeNode* y;
-
-  z->left=z->right=nil;
-  y=root;
-  x=root->left;
-  while( x != nil) {
-    y=x;
-    if ( x->key > z->key) {
-      x=x->left;
-    } else { /* x->key <= z->key */
-      x=x->right;
-    }
-  }
-  z->parent=y;
-  if ( (y == root) ||
-       (y->key > z->key) ) {
-    y->left=z;
-  } else {
-    y->right=z;
-  }
-}
-
-/*  Before calling InsertNode  the node x should have its key set */
-
-/***********************************************************************/
-/*  FUNCTION:  InsertNode */
-/**/
-/*  INPUTS:  newEntry is the entry to insert*/
-/**/
-/*  OUTPUT:  This function returns a pointer to the newly inserted node */
-/*           which is guarunteed to be valid until this node is deleted. */
-/*           What this means is if another data structure stores this */
-/*           pointer then the tree does not need to be searched when this */
-/*           is to be deleted. */
-/**/
-/*  Modifies Input: tree */
-/**/
-/*  EFFECTS:  Creates a node node which contains the appropriate key and */
-/*            info pointers and inserts it into the tree. */
-/***********************************************************************/
-/* jim */
-RedBlackTreeNode * RedBlackTree::Insert(RedBlackEntry * newEntry)
-{
-  RedBlackTreeNode * y;
-  RedBlackTreeNode * x;
-  RedBlackTreeNode * newNode;
-
-  x = new RedBlackTreeNode(newEntry);
-  TreeInsertHelp(x);
-  newNode = x;
-  x->red=1;
-  while(x->parent->red) { /* use sentinel instead of checking for root */
-    if (x->parent == x->parent->parent->left) {
-      y=x->parent->parent->right;
-      if (y->red) {
-	x->parent->red=0;
-	y->red=0;
-	x->parent->parent->red=1;
-	x=x->parent->parent;
-      } else {
-	if (x == x->parent->right) {
-	  x=x->parent;
-	  LeftRotate(x);
-	}
-	x->parent->red=0;
-	x->parent->parent->red=1;
-	RightRotate(x->parent->parent);
-      }
-    } else { /* case for x->parent == x->parent->parent->right */
-             /* this part is just like the section above with */
-             /* left and right interchanged */
-      y=x->parent->parent->left;
-      if (y->red) {
-	x->parent->red=0;
-	y->red=0;
-	x->parent->parent->red=1;
-	x=x->parent->parent;
-      } else {
-	if (x == x->parent->left) {
-	  x=x->parent;
-	  RightRotate(x);
-	}
-	x->parent->red=0;
-	x->parent->parent->red=1;
-	LeftRotate(x->parent->parent);
-      }
-    }
-  }
-  root->left->red=0;
-  return(newNode);
-}
-
-/***********************************************************************/
-/*  FUNCTION:  GetSuccessorOf  */
-/**/
-/*    INPUTS:  x is the node we want the succesor of */
-/**/
-/*    OUTPUT:  This function returns the successor of x or NULL if no */
-/*             successor exists. */
-/**/
-/*    Modifies Input: none */
-/**/
-/*    Note:  uses the algorithm in _Introduction_To_Algorithms_ */
-/***********************************************************************/
-
-RedBlackTreeNode * RedBlackTree::GetSuccessorOf(RedBlackTreeNode * x) const
-{
-  RedBlackTreeNode* y;
-
-  if (nil != (y = x->right)) { /* assignment to y is intentional */
-    while(y->left != nil) { /* returns the minium of the right subtree of x */
-      y=y->left;
-    }
-    return(y);
-  } else {
-    y=x->parent;
-    while(x == y->right) { /* sentinel used instead of checking for nil */
-      x=y;
-      y=y->parent;
-    }
-    if (y == root) return(nil);
-    return(y);
-  }
-}
-
-/***********************************************************************/
-/*  FUNCTION:  GetPredecessorOf  */
-/**/
-/*    INPUTS:  x is the node to get predecessor of */
-/**/
-/*    OUTPUT:  This function returns the predecessor of x or NULL if no */
-/*             predecessor exists. */
-/**/
-/*    Modifies Input: none */
-/**/
-/*    Note:  uses the algorithm in _Introduction_To_Algorithms_ */
-/***********************************************************************/
-
-RedBlackTreeNode * RedBlackTree::GetPredecessorOf(RedBlackTreeNode * x) const {
-  RedBlackTreeNode* y;
-
-  if (nil != (y = x->left)) { /* assignment to y is intentional */
-    while(y->right != nil) { /* returns the maximum of the left subtree of x */
-      y=y->right;
-    }
-    return(y);
-  } else {
-    y=x->parent;
-    while(x == y->left) {
-      if (y == root) return(nil);
-      x=y;
-      y=y->parent;
-    }
-    return(y);
-  }
-}
-
-/***********************************************************************/
-/*  FUNCTION:  Print */
-/**/
-/*    INPUTS:  none */
-/**/
-/*    OUTPUT:  none  */
-/**/
-/*    EFFECTS:  This function recursively prints the nodes of the tree */
-/*              inorder. */
-/**/
-/*    Modifies Input: none */
-/**/
-/*    Note:    This function should only be called from ITTreePrint */
-/***********************************************************************/
-
-void RedBlackTreeNode::Print(RedBlackTreeNode * nil,
-			     RedBlackTreeNode * root) const {
-  storedEntry->Print();
-  printf(", key=%i ",key);
-  printf("  l->key=");
-  if( left == nil) printf("NULL"); else printf("%i",left->key);
-  printf("  r->key=");
-  if( right == nil) printf("NULL"); else printf("%i",right->key);
-  printf("  p->key=");
-  if( parent == root) printf("NULL"); else printf("%i",parent->key);
-  printf("  red=%i\n",red);
-}
-
-void RedBlackTree::TreePrintHelper( RedBlackTreeNode* x) const {
-
-  if (x != nil) {
-    TreePrintHelper(x->left);
-    x->Print(nil,root);
-    TreePrintHelper(x->right);
-  }
-}
-
-/***********************************************************************/
-/*  FUNCTION:  Print */
-/**/
-/*    INPUTS:  none */
-/**/
-/*    OUTPUT:  none */
-/**/
-/*    EFFECT:  This function recursively prints the nodes of the tree */
-/*             inorder. */
-/**/
-/*    Modifies Input: none */
-/**/
-/***********************************************************************/
-
-void RedBlackTree::Print() const {
-  TreePrintHelper(root->left);
-}
-
-/***********************************************************************/
-/*  FUNCTION:  DeleteFixUp */
-/**/
-/*    INPUTS:  x is the child of the spliced */
-/*             out node in DeleteNode. */
-/**/
-/*    OUTPUT:  none */
-/**/
-/*    EFFECT:  Performs rotations and changes colors to restore red-black */
-/*             properties after a node is deleted */
-/**/
-/*    Modifies Input: this, x */
-/**/
-/*    The algorithm from this function is from _Introduction_To_Algorithms_ */
-/***********************************************************************/
-
-void RedBlackTree::DeleteFixUp(RedBlackTreeNode* x) {
-  RedBlackTreeNode * w;
-  RedBlackTreeNode * rootLeft = root->left;
-
-  while( (!x->red) && (rootLeft != x)) {
-    if (x == x->parent->left) {
-
-//
-      w=x->parent->right;
-      if (w->red) {
-	w->red=0;
-	x->parent->red=1;
-	LeftRotate(x->parent);
-	w=x->parent->right;
-      }
-      if ( (!w->right->red) && (!w->left->red) ) {
-	w->red=1;
-	x=x->parent;
-      } else {
-	if (!w->right->red) {
-	  w->left->red=0;
-	  w->red=1;
-	  RightRotate(w);
-	  w=x->parent->right;
-	}
-	w->red=x->parent->red;
-	x->parent->red=0;
-	w->right->red=0;
-	LeftRotate(x->parent);
-	x=rootLeft; /* this is to exit while loop */
-      }
-//
-
-    } else { /* the code below is has left and right switched from above */
-      w=x->parent->left;
-      if (w->red) {
-	w->red=0;
-	x->parent->red=1;
-	RightRotate(x->parent);
-	w=x->parent->left;
-      }
-      if ( (!w->right->red) && (!w->left->red) ) {
-	w->red=1;
-	x=x->parent;
-      } else {
-	if (!w->left->red) {
-	  w->right->red=0;
-	  w->red=1;
-	  LeftRotate(w);
-	  w=x->parent->left;
-	}
-	w->red=x->parent->red;
-	x->parent->red=0;
-	w->left->red=0;
-	RightRotate(x->parent);
-	x=rootLeft; /* this is to exit while loop */
-      }
-    }
-  }
-  x->red=0;
-
-}
-
-
-/***********************************************************************/
-/*  FUNCTION:  DeleteNode */
-/**/
-/*    INPUTS:  tree is the tree to delete node z from */
-/**/
-/*    OUTPUT:  returns the RedBlackEntry stored at deleted node */
-/**/
-/*    EFFECT:  Deletes z from tree and but don't call destructor */
-/**/
-/*    Modifies Input:  z */
-/**/
-/*    The algorithm from this function is from _Introduction_To_Algorithms_ */
-/***********************************************************************/
-
-RedBlackEntry * RedBlackTree::DeleteNode(RedBlackTreeNode * z){
-  RedBlackTreeNode* y;
-  RedBlackTreeNode* x;
-  RedBlackEntry * returnValue = z->storedEntry;
-
-  y= ((z->left == nil) || (z->right == nil)) ? z : GetSuccessorOf(z);
-  x= (y->left == nil) ? y->right : y->left;
-  if (root == (x->parent = y->parent)) { /* assignment of y->p to x->p is intentional */
-    root->left=x;
-  } else {
-    if (y == y->parent->left) {
-      y->parent->left=x;
-    } else {
-      y->parent->right=x;
-    }
-  }
-  if (y != z) { /* y should not be nil in this case */
-
-    /* y is the node to splice out and x is its child */
-
-    y->left=z->left;
-    y->right=z->right;
-    y->parent=z->parent;
-    z->left->parent=z->right->parent=y;
-    if (z == z->parent->left) {
-      z->parent->left=y;
-    } else {
-      z->parent->right=y;
-    }
-    if (!(y->red)) {
-      y->red = z->red;
-      DeleteFixUp(x);
-    } else
-      y->red = z->red;
-    delete z;
-  } else {
-    if (!(y->red)) DeleteFixUp(x);
-    delete y;
-  }
-  return returnValue;
-}
-
-
-/***********************************************************************/
-/*  FUNCTION:  Enumerate */
-/**/
-/*    INPUTS:  tree is the tree to look for keys between [low,high] */
-/**/
-/*    OUTPUT:  stack containing pointers to the nodes between [low,high] */
-/**/
-/*    Modifies Input: none */
-/**/
-/*    EFFECT:  Returns a stack containing pointers to nodes containing */
-/*             keys which in [low,high]/ */
-/**/
-/***********************************************************************/
-
-TemplateStack<RedBlackTreeNode *> * RedBlackTree::Enumerate(int low,
-							    int high)  {
-  TemplateStack<RedBlackTreeNode *> * enumResultStack =
-    new TemplateStack<RedBlackTreeNode *>(4);
-
-  RedBlackTreeNode* x=root->left;
-  RedBlackTreeNode* lastBest=NULL;
-
-  while(nil != x) {
-    if ( x->key > high ) {
-      x=x->left;
-    } else {
-      lastBest=x;
-      x=x->right;
-    }
-  }
-  while ( (lastBest) && (low <= lastBest->key) ) {
-    enumResultStack->Push(lastBest);
-    lastBest=GetPredecessorOf(lastBest);
-  }
-  return(enumResultStack);
-}
--- a/nilmdb/init.py
+++ b/nilmdb/init.py
@@ -3,14 +3,10 @@
 from .nilmdb import NilmDB
 from .server import Server
 from .client import Client
-from .timer import Timer
-
-import cmdline

 import pyximport; pyximport.install()
 import layout
-
-import serializer
-import timestamper
 import interval
-import du
+
+import cmdline
+
--- a/nilmdb/bulkdata.py
+++ b/nilmdb/bulkdata.py
@@ -0,0 +1,297 @@
+# Fixed record size bulk data storage
+
+from __future__ import absolute_import
+from __future__ import division
+import nilmdb
+from nilmdb.utils.printf import *
+
+import os
+import sys
+import cPickle as pickle
+import struct
+import fnmatch
+import mmap
+
+# Up to 256 open file descriptors at any given time
+table_cache_size = 16
+fd_cache_size = 16
+
+@nilmdb.utils.must_close()
+class BulkData(object):
+    def __init__(self, basepath):
+        self.basepath = basepath
+        self.root = os.path.join(self.basepath, "data")
+
+        # Make root path
+        if not os.path.isdir(self.root):
+            os.mkdir(self.root)
+
+    def close(self):
+        self.getnode.cache_remove_all()
+
+    def create(self, path, layout_name):
+        """
+        path: path to the data (e.g. '/newton/prep').
+        Paths must contain at least two elements, e.g.:
+           /newton/prep
+           /newton/raw
+           /newton/upstairs/prep
+           /newton/upstairs/raw
+
+        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
+        """
+        if path[0] != '/':
+            raise ValueError("paths must start with /")
+        [ group, node ] = path.rsplit("/", 1)
+        if group == '':
+            raise ValueError("invalid path")
+
+        # Get layout, and build format string for struct module
+        try:
+            layout = nilmdb.layout.get_named(layout_name)
+            struct_fmt = '<d'  # Little endian, double timestamp
+            struct_mapping = {
+                "int8": 'b',
+                "uint8": 'B',
+                "int16": 'h',
+                "uint16": 'H',
+                "int32": 'i',
+                "uint32": 'I',
+                "int64": 'q',
+                "uint64": 'Q',
+                "float32": 'f',
+                "float64": 'd',
+                }
+            for n in range(layout.count):
+                struct_fmt += struct_mapping[layout.datatype]
+        except KeyError:
+            raise ValueError("no such layout, or bad data types")
+
+        # Create the table.  Note that we make a distinction here
+        # between NilmDB paths (always Unix style, split apart
+        # manually) and OS paths (built up with os.path.join)
+        try:
+            # Make directories leading up to this one
+            elements = path.lstrip('/').split('/')
+            for i in range(len(elements)):
+                ospath = os.path.join(self.root, *elements[0:i])
+                if Table.exists(ospath):
+                    raise ValueError("path is subdir of existing node")
+                if not os.path.isdir(ospath):
+                    os.mkdir(ospath)
+
+            # Make the final dir
+            ospath = os.path.join(self.root, *elements)
+            if os.path.isdir(ospath):
+                raise ValueError("subdirs of this path already exist")
+            os.mkdir(ospath)
+
+            # Write format string to file
+            Table.create(ospath, struct_fmt)
+        except OSError as e:
+            raise ValueError("error creating table at that path: " + e.strerror)
+
+        # Open and cache it
+        self.getnode(path)
+
+        # Success
+        return
+
+    def destroy(self, path):
+        """Fully remove all data at a particular path.  No way to undo
+        it!  The group/path structure is removed, too."""
+
+        # Get OS path
+        elements = path.lstrip('/').split('/')
+        ospath = os.path.join(self.root, *elements)
+
+        # Remove Table object from cache
+        self.getnode.cache_remove(self, ospath)
+
+        # Remove the contents of the target directory
+        if not os.path.isfile(os.path.join(ospath, "format")):
+            raise ValueError("nothing at that path")
+        for file in os.listdir(ospath):
+            os.remove(os.path.join(ospath, file))
+
+        # Remove empty parent directories
+        for i in reversed(range(len(elements))):
+            ospath = os.path.join(self.root, *elements[0:i+1])
+            try:
+                os.rmdir(ospath)
+            except OSError:
+                break
+
+    # Cache open tables
+    @nilmdb.utils.lru_cache(size = table_cache_size,
+                            onremove = lambda x: x.close())
+    def getnode(self, path):
+        """Return a Table object corresponding to the given database
+        path, which must exist."""
+        elements = path.lstrip('/').split('/')
+        ospath = os.path.join(self.root, *elements)
+        return Table(ospath)
+
+@nilmdb.utils.must_close()
+class Table(object):
+    """Tools to help access a single table (data at a specific OS path)"""
+
+    # Class methods, to help keep format details in this class.
+    @classmethod
+    def exists(cls, root):
+        """Return True if a table appears to exist at this OS path"""
+        return os.path.isfile(os.path.join(root, "format"))
+
+    @classmethod
+    def create(cls, root, struct_fmt):
+        """Initialize a table at the given OS path.
+        'struct_fmt' is a Struct module format description"""
+        format = { "rows_per_file": 4 * 1024 * 1024,
+                   "struct_fmt": struct_fmt }
+        with open(os.path.join(root, "format"), "wb") as f:
+            pickle.dump(format, f, 2)
+
+    # Normal methods
+    def __init__(self, root):
+        """'root' is the full OS path to the directory of this table"""
+        self.root = root
+
+        # Load the format and build packer
+        with open(self._fullpath("format"), "rb") as f:
+            format = pickle.load(f)
+        self.rows_per_file = format["rows_per_file"]
+        self.packer = struct.Struct(format["struct_fmt"])
+        self.file_size = self.packer.size * self.rows_per_file
+
+        # Find nrows by locating the lexicographically last filename
+        # and using its size.
+        pattern = '[0-9a-f]' * 8
+        allfiles = fnmatch.filter(os.listdir(self.root), pattern)
+        if allfiles:
+            filename = max(allfiles)
+            offset = os.path.getsize(self._fullpath(filename))
+            self.nrows = self._row_from_fnoffset(filename, offset)
+        else:
+            self.nrows = 0
+
+    def close(self):
+        self.mmap_open.cache_remove_all()
+
+    # Internal helpers
+    def _fullpath(self, filename):
+        return os.path.join(self.root, filename)
+
+    def _fnoffset_from_row(self, row):
+        """Return a (filename, offset, count) tuple:
+
+        filename: the filename that contains the specified row
+          offset: byte offset of the specified row within the file
+           count: number of rows (starting at offste) that fit in the file
+        """
+        filenum = row // self.rows_per_file
+        filename = sprintf("%08x", filenum)
+        offset = (row % self.rows_per_file) * self.packer.size
+        count = self.rows_per_file - (row % self.rows_per_file)
+        return (filename, offset, count)
+
+    def _row_from_fnoffset(self, filename, offset):
+        """Return the row number that corresponds to the given
+        filename and byte-offset within that file."""
+        filenum = int(filename, 16)
+        if (offset % self.packer.size) != 0:
+            raise ValueError("file offset is not a multiple of data size")
+        row = (filenum * self.rows_per_file) + (offset // self.packer.size)
+        return row
+
+    # Cache open files
+    @nilmdb.utils.lru_cache(size = fd_cache_size,
+                            onremove = lambda x: x.close())
+    def mmap_open(self, file, newsize = None):
+        """Open and map a given filename (relative to self.root).
+        Will be automatically closed when evicted from the cache.
+
+        If 'newsize' is provided, the file is truncated to the given
+        size before the mapping is returned.  (Note that the LRU cache
+        on this function means the truncate will only happen if the
+        object isn't already cached; mmap.resize should be used too)"""
+        f = open(os.path.join(self.root, file), "a+", 0)
+        if newsize is not None:
+            # mmap can't map a zero-length file, so this allows the
+            # caller to set the filesize between file creation and
+            # mmap.
+            f.truncate(newsize)
+        mm = mmap.mmap(f.fileno(), 0)
+        return mm
+
+    def append(self, data):
+        """Append the data and flush it to disk.
+        data is a nested Python list [[row],[row],[...]]"""
+        remaining = len(data)
+        dataiter = iter(data)
+        while remaining:
+            # See how many rows we can fit into the current file, and open it
+            (filename, offset, count) = self._fnoffset_from_row(self.nrows)
+            if count > remaining:
+                count = remaining
+            newsize = offset + count * self.packer.size
+            mm = self.mmap_open(filename, newsize)
+            mm.seek(offset)
+
+            # Extend the file to the target length.  We specified
+            # newsize when opening, but that may have been ignored if
+            # the mmap_open returned a cached object.
+            mm.resize(newsize)
+
+            # Write the data
+            for i in xrange(count):
+                row = dataiter.next()
+                mm.write(self.packer.pack(*row))
+            remaining -= count
+            self.nrows += count
+
+    def __getitem__(self, key):
+        """Extract data and return it.  Supports simple indexing
+        (table[n]) and range slices (table[n:m]).  Returns a nested
+        Python list [[row],[row],[...]]"""
+
+        # Handle simple slices
+        if isinstance(key, slice):
+            # Fall back to brute force if the slice isn't simple
+            if ((key.step is not None and key.step != 1) or
+                key.start is None or
+                key.stop is None or
+                key.start >= key.stop or
+                key.start < 0 or
+                key.stop > self.nrows):
+                return [ self[x] for x in xrange(*key.indices(self.nrows)) ]
+
+            ret = []
+            row = key.start
+            remaining = key.stop - key.start
+            while remaining:
+                (filename, offset, count) = self._fnoffset_from_row(row)
+                if count > remaining:
+                    count = remaining
+                mm = self.mmap_open(filename)
+                for i in xrange(count):
+                    ret.append(list(self.packer.unpack_from(mm, offset)))
+                    offset += self.packer.size
+                remaining -= count
+                row += count
+            return ret
+
+        # Handle single points
+        if key < 0 or key >= self.nrows:
+            raise IndexError("Index out of range")
+        (filename, offset, count) = self._fnoffset_from_row(key)
+        mm = self.mmap_open(filename)
+        # unpack_from ignores the mmap object's current seek position
+        return self.packer.unpack_from(mm, offset)
+
+class TimestampOnlyTable(object):
+    """Helper that lets us pass a Tables object into bisect, by
+    returning only the timestamp when a particular row is requested."""
+    def __init__(self, table):
+        self.table = table
+    def __getitem__(self, index):
+        return self.table[index][0]
--- a/nilmdb/bxintersect.pyx
+++ b/nilmdb/bxintersect.pyx
@@ -1,495 +0,0 @@
-# cython: profile=False
-# This is from bx-python 554:07aca5a9f6fc (BSD licensed), modified to
-# store interval ranges as doubles rather than 32-bit integers.
-
-"""
-Data structure for performing intersect queries on a set of intervals which
-preserves all information about the intervals (unlike bitset projection methods).
-
-:Authors: James Taylor (james@jamestaylor.org),
-          Ian Schenk (ian.schenck@gmail.com),
-          Brent Pedersen (bpederse@gmail.com)
-"""
-
-# Historical note:
-#    This module original contained an implementation based on sorted endpoints
-#    and a binary search, using an idea from Scott Schwartz and Piotr Berman.
-#    Later an interval tree implementation was implemented by Ian for Galaxy's
-#    join tool (see `bx.intervals.operations.quicksect.py`). This was then
-#    converted to Cython by Brent, who also added support for
-#    upstream/downstream/neighbor queries. This was modified by James to
-#    handle half-open intervals strictly, to maintain sort order, and to
-#    implement the same interface as the original Intersecter.
-
-#cython: cdivision=True
-
-import operator
-
-cdef extern from "stdlib.h":
-    int ceil(float f)
-    float log(float f)
-    int RAND_MAX
-    int rand()
-    int strlen(char *)
-    int iabs(int)
-
-cdef inline double dmax2(double a, double b):
-    if b > a: return b
-    return a
-
-cdef inline double dmax3(double a, double b, double c):
-    if b > a:
-        if c > b:
-            return c
-        return b
-    if a > c:
-        return a
-    return c
-
-cdef inline double dmin3(double a, double b, double c):
-    if b < a:
-        if c < b:
-            return c
-        return b
-    if a < c:
-        return a
-    return c
-
-cdef inline double dmin2(double a, double b):
-    if b < a: return b
-    return a
-
-cdef float nlog = -1.0 / log(0.5)
-
-cdef class IntervalNode:
-    """
-    A single node of an `IntervalTree`.
-
-    NOTE: Unless you really know what you are doing, you probably should us
-          `IntervalTree` rather than using this directly.
-    """
-    cdef float priority
-    cdef public object interval
-    cdef public double start, end
-    cdef double minend, maxend, minstart
-    cdef IntervalNode cleft, cright, croot
-
-    property left_node:
-        def __get__(self):
-            return self.cleft if self.cleft is not EmptyNode else None
-    property right_node:
-        def __get__(self):
-            return self.cright if self.cright is not EmptyNode else None
-    property root_node:
-        def __get__(self):
-            return self.croot if self.croot is not EmptyNode else None
-
-    def __repr__(self):
-        return "IntervalNode(%g, %g)" % (self.start, self.end)
-
-    def __cinit__(IntervalNode self, double start, double end, object interval):
-        # Python lacks the binomial distribution, so we convert a
-        # uniform into a binomial because it naturally scales with
-        # tree size.  Also, python's uniform is perfect since the
-        # upper limit is not inclusive, which gives us undefined here.
-        self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1)))
-        self.start    = start
-        self.end      = end
-        self.interval = interval
-        self.maxend   = end
-        self.minstart = start
-        self.minend   = end
-        self.cleft    = EmptyNode
-        self.cright   = EmptyNode
-        self.croot    = EmptyNode
-
-    cpdef IntervalNode insert(IntervalNode self, double start, double end, object interval):
-        """
-        Insert a new IntervalNode into the tree of which this node is
-        currently the root. The return value is the new root of the tree (which
-        may or may not be this node!)
-        """
-        cdef IntervalNode croot = self
-        # If starts are the same, decide which to add interval to based on
-        # end, thus maintaining sortedness relative to start/end
-        cdef double decision_endpoint = start
-        if start == self.start:
-            decision_endpoint = end
-
-        if decision_endpoint > self.start:
-            # insert to cright tree
-            if self.cright is not EmptyNode:
-                self.cright = self.cright.insert( start, end, interval )
-            else:
-                self.cright = IntervalNode( start, end, interval )
-            # rebalance tree
-            if self.priority < self.cright.priority:
-                croot = self.rotate_left()
-        else:
-            # insert to cleft tree
-            if self.cleft is not EmptyNode:
-                self.cleft = self.cleft.insert( start, end, interval)
-            else:
-                self.cleft = IntervalNode( start, end, interval)
-            # rebalance tree
-            if self.priority < self.cleft.priority:
-                croot = self.rotate_right()
-
-        croot.set_ends()
-        self.cleft.croot  = croot
-        self.cright.croot = croot
-        return croot
-
-    cdef IntervalNode rotate_right(IntervalNode self):
-        cdef IntervalNode croot = self.cleft
-        self.cleft  = self.cleft.cright
-        croot.cright = self
-        self.set_ends()
-        return croot
-
-    cdef IntervalNode rotate_left(IntervalNode self):
-        cdef IntervalNode croot = self.cright
-        self.cright = self.cright.cleft
-        croot.cleft  = self
-        self.set_ends()
-        return croot
-
-    cdef inline void set_ends(IntervalNode self):
-        if self.cright is not EmptyNode and self.cleft is not EmptyNode:
-            self.maxend = dmax3(self.end, self.cright.maxend, self.cleft.maxend)
-            self.minend = dmin3(self.end, self.cright.minend, self.cleft.minend)
-            self.minstart = dmin3(self.start, self.cright.minstart, self.cleft.minstart)
-        elif self.cright is not EmptyNode:
-            self.maxend = dmax2(self.end, self.cright.maxend)
-            self.minend = dmin2(self.end, self.cright.minend)
-            self.minstart = dmin2(self.start, self.cright.minstart)
-        elif self.cleft is not EmptyNode:
-            self.maxend = dmax2(self.end, self.cleft.maxend)
-            self.minend = dmin2(self.end, self.cleft.minend)
-            self.minstart = dmin2(self.start, self.cleft.minstart)
-
-
-    def intersect( self, double start, double end, sort=True ):
-        """
-        given a start and a end, return a list of features
-        falling within that range
-        """
-        cdef list results = []
-        self._intersect( start, end, results )
-        if sort:
-            results = sorted(results)
-        return results
-
-    find = intersect
-
-    cdef void _intersect( IntervalNode self, double start, double end, list results):
-        # Left subtree
-        if self.cleft is not EmptyNode and self.cleft.maxend > start:
-            self.cleft._intersect( start, end, results )
-        # This interval
-        if ( self.end > start ) and ( self.start < end ):
-            results.append( self.interval )
-        # Right subtree
-        if self.cright is not EmptyNode and self.start < end:
-            self.cright._intersect( start, end, results )
-
-
-    cdef void _seek_left(IntervalNode self, double position, list results, int n, double max_dist):
-        # we know we can bail in these 2 cases.
-        if self.maxend + max_dist < position:
-            return
-        if self.minstart > position:
-            return
-
-        # the ordering of these 3 blocks makes it so the results are
-        # ordered nearest to farest from the query position
-        if self.cright is not EmptyNode:
-            self.cright._seek_left(position, results, n, max_dist)
-
-        if -1 < position - self.end < max_dist:
-            results.append(self.interval)
-
-        # TODO: can these conditionals be more stringent?
-        if self.cleft is not EmptyNode:
-                self.cleft._seek_left(position, results, n, max_dist)
-
-
-
-    cdef void _seek_right(IntervalNode self, double position, list results, int n, double max_dist):
-        # we know we can bail in these 2 cases.
-        if self.maxend < position: return
-        if self.minstart - max_dist > position: return
-
-        #print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position
-
-        # the ordering of these 3 blocks makes it so the results are
-        # ordered nearest to farest from the query position
-        if self.cleft is not EmptyNode:
-                self.cleft._seek_right(position, results, n, max_dist)
-
-        if -1 < self.start - position < max_dist:
-            results.append(self.interval)
-
-        if self.cright is not EmptyNode:
-                self.cright._seek_right(position, results, n, max_dist)
-
-
-    cpdef left(self, position, int n=1, double max_dist=2500):
-        """
-        find n features with a start > than `position`
-        f: a Interval object (or anything with an `end` attribute)
-        n: the number of features to return
-        max_dist: the maximum distance to look before giving up.
-        """
-        cdef list results = []
-        # use start - 1 becuase .left() assumes strictly left-of
-        self._seek_left( position - 1, results, n, max_dist )
-        if len(results) == n: return results
-        r = results
-        r.sort(key=operator.attrgetter('end'), reverse=True)
-        return r[:n]
-
-    cpdef right(self, position, int n=1, double max_dist=2500):
-        """
-        find n features with a end < than position
-        f: a Interval object (or anything with a `start` attribute)
-        n: the number of features to return
-        max_dist: the maximum distance to look before giving up.
-        """
-        cdef list results = []
-        # use end + 1 becuase .right() assumes strictly right-of
-        self._seek_right(position + 1, results, n, max_dist)
-        if len(results) == n: return results
-        r = results
-        r.sort(key=operator.attrgetter('start'))
-        return r[:n]
-
-    def traverse(self):
-        if self.cleft is not EmptyNode:
-            for node in self.cleft.traverse():
-                yield node
-        yield self.interval
-        if self.cright is not EmptyNode:
-            for node in self.cright.traverse():
-                yield node
-
-cdef IntervalNode EmptyNode = IntervalNode( 0, 0, Interval(0, 0))
-
-## ---- Wrappers that retain the old interface -------------------------------
-
-cdef class Interval:
-    """
-    Basic feature, with required integer start and end properties.
-    Also accepts optional strand as +1 or -1 (used for up/downstream queries),
-    a name, and any arbitrary data is sent in on the info keyword argument
-
-    >>> from bx.intervals.intersection import Interval
-
-    >>> f1 = Interval(23, 36)
-    >>> f2 = Interval(34, 48, value={'chr':12, 'anno':'transposon'})
-    >>> f2
-    Interval(34, 48, value={'anno': 'transposon', 'chr': 12})
-
-    """
-    cdef public double start, end
-    cdef public object value, chrom, strand
-
-    def __init__(self, double start, double end, object value=None, object chrom=None, object strand=None ):
-        assert start <= end, "start must be less than end"
-        self.start  = start
-        self.end   = end
-        self.value = value
-        self.chrom = chrom
-        self.strand = strand
-
-    def __repr__(self):
-        fstr = "Interval(%g, %g" % (self.start, self.end)
-        if not self.value is None:
-            fstr += ", value=" + str(self.value)
-        fstr += ")"
-        return fstr
-
-    def __richcmp__(self, other, op):
-        if op == 0:
-            # <
-            return self.start < other.start or self.end < other.end
-        elif op == 1:
-            # <=
-            return self == other or self < other
-        elif op == 2:
-            # ==
-            return self.start == other.start and self.end == other.end
-        elif op == 3:
-            # !=
-            return self.start != other.start or self.end != other.end
-        elif op == 4:
-            # >
-            return self.start > other.start or self.end > other.end
-        elif op == 5:
-            # >=
-            return self == other or self > other
-
-cdef class IntervalTree:
-    """
-    Data structure for performing window intersect queries on a set of
-    of possibly overlapping 1d intervals.
-
-    Usage
-    =====
-
-    Create an empty IntervalTree
-
-    >>> from bx.intervals.intersection import Interval, IntervalTree
-    >>> intersecter = IntervalTree()
-
-    An interval is a start and end position and a value (possibly None).
-    You can add any object as an interval:
-
-    >>> intersecter.insert( 0, 10, "food" )
-    >>> intersecter.insert( 3, 7, dict(foo='bar') )
-
-    >>> intersecter.find( 2, 5 )
-    ['food', {'foo': 'bar'}]
-
-    If the object has start and end attributes (like the Interval class) there
-    is are some shortcuts:
-
-    >>> intersecter = IntervalTree()
-    >>> intersecter.insert_interval( Interval( 0, 10 ) )
-    >>> intersecter.insert_interval( Interval( 3, 7 ) )
-    >>> intersecter.insert_interval( Interval( 3, 40 ) )
-    >>> intersecter.insert_interval( Interval( 13, 50 ) )
-
-    >>> intersecter.find( 30, 50 )
-    [Interval(3, 40), Interval(13, 50)]
-    >>> intersecter.find( 100, 200 )
-    []
-
-    Before/after for intervals
-
-    >>> intersecter.before_interval( Interval( 10, 20 ) )
-    [Interval(3, 7)]
-    >>> intersecter.before_interval( Interval( 5, 20 ) )
-    []
-
-    Upstream/downstream
-
-    >>> intersecter.upstream_of_interval(Interval(11, 12))
-    [Interval(0, 10)]
-    >>> intersecter.upstream_of_interval(Interval(11, 12, strand="-"))
-    [Interval(13, 50)]
-
-    >>> intersecter.upstream_of_interval(Interval(1, 2, strand="-"), num_intervals=3)
-    [Interval(3, 7), Interval(3, 40), Interval(13, 50)]
-
-
-    """
-
-    cdef IntervalNode root
-
-    def __cinit__( self ):
-        root = None
-
-    # ---- Position based interfaces -----------------------------------------
-
-    def insert( self, double start, double end, object value=None ):
-        """
-        Insert the interval [start,end) associated with value `value`.
-        """
-        if self.root is None:
-            self.root = IntervalNode( start, end, value )
-        else:
-            self.root = self.root.insert( start, end, value )
-
-    add = insert
-
-
-    def find( self, start, end ):
-        """
-        Return a sorted list of all intervals overlapping [start,end).
-        """
-        if self.root is None:
-            return []
-        return self.root.find( start, end )
-
-    def before( self, position, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie before `position` and are no
-        further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        return self.root.left( position, num_intervals, max_dist )
-
-    def after( self, position, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie after `position` and are no
-        further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        return self.root.right( position, num_intervals, max_dist )
-
-    # ---- Interval-like object based interfaces -----------------------------
-
-    def insert_interval( self, interval ):
-        """
-        Insert an "interval" like object (one with at least start and end
-        attributes)
-        """
-        self.insert( interval.start, interval.end, interval )
-
-    add_interval = insert_interval
-
-    def before_interval( self, interval, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie completely before `interval`
-        and are no further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        return self.root.left( interval.start, num_intervals, max_dist )
-
-    def after_interval( self, interval, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie completely after `interval` and
-        are no further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        return self.root.right( interval.end, num_intervals, max_dist )
-
-    def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie completely upstream of
-        `interval` and are no further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        if interval.strand == -1 or interval.strand == "-":
-            return self.root.right( interval.end, num_intervals, max_dist )
-        else:
-            return self.root.left( interval.start, num_intervals, max_dist )
-
-    def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ):
-        """
-        Find `num_intervals` intervals that lie completely downstream of
-        `interval` and are no further than `max_dist` positions away
-        """
-        if self.root is None:
-            return []
-        if interval.strand == -1 or interval.strand == "-":
-            return self.root.left( interval.start, num_intervals, max_dist )
-        else:
-            return self.root.right( interval.end, num_intervals, max_dist )
-
-    def traverse(self):
-        """
-        iterator that traverses the tree
-        """
-        if self.root is None:
-            return iter([])
-        return self.root.traverse()
-
-# For backward compatibility
-Intersecter = IntervalTree
--- a/nilmdb/client.py
+++ b/nilmdb/client.py
@@ -3,7 +3,7 @@
 """Class for performing HTTP client requests via libcurl"""

 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import time
 import sys
--- a/nilmdb/cmdline/cmdline.py
+++ b/nilmdb/cmdline/cmdline.py
@@ -1,7 +1,7 @@
 """Command line client functionality"""

 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client

 import datetime_tz
--- a/nilmdb/cmdline/create.py
+++ b/nilmdb/cmdline/create.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client

 from argparse import ArgumentDefaultsHelpFormatter as def_form
--- a/nilmdb/cmdline/destroy.py
+++ b/nilmdb/cmdline/destroy.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client

 from argparse import ArgumentDefaultsHelpFormatter as def_form
--- a/nilmdb/cmdline/extract.py
+++ b/nilmdb/cmdline/extract.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client
 import sys

--- a/nilmdb/cmdline/info.py
+++ b/nilmdb/cmdline/info.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 from argparse import ArgumentDefaultsHelpFormatter as def_form

--- a/nilmdb/cmdline/insert.py
+++ b/nilmdb/cmdline/insert.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client
 import nilmdb.timestamper

--- a/nilmdb/cmdline/list.py
+++ b/nilmdb/cmdline/list.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client

 import fnmatch
--- a/nilmdb/cmdline/metadata.py
+++ b/nilmdb/cmdline/metadata.py
@@ -1,5 +1,5 @@
 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.client

 def setup(self, sub):
--- a/nilmdb/httpclient.py
+++ b/nilmdb/httpclient.py
@@ -1,7 +1,8 @@
 """HTTP client library"""

 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
+import nilmdb.utils

 import time
 import sys
@@ -13,8 +14,6 @@ import urllib
 import pycurl
 import cStringIO

-import nilmdb.iteratorizer
-
 class Error(Exception):
    """Base exception for both ClientError and ServerError responses"""
    def __init__(self,
@@ -85,6 +84,10 @@ class HTTPClient(object):
            raise ClientError(**args)
        else: # pragma: no cover
            if code >= 500 and code <= 599:
+                if args["message"] is None:
+                    args["message"] = ("(no message; try disabling " +
+                                       "response.stream option in " +
+                                       "nilmdb.server for better debugging)")
                raise ServerError(**args)
            else:
                raise Error(**args)
@@ -109,7 +112,7 @@ class HTTPClient(object):
            self.curl.setopt(pycurl.WRITEFUNCTION, callback)
            self.curl.perform()
        try:
-            for i in nilmdb.iteratorizer.Iteratorizer(func):
+            for i in nilmdb.utils.Iteratorizer(func):
                if self._status == 200:
                    # If we had a 200 response, yield the data to the caller.
                    yield i
--- a/nilmdb/layout.pyx
+++ b/nilmdb/layout.pyx
@@ -1,6 +1,5 @@
 # cython: profile=False

-import tables
 import time
 import sys
 import inspect
@@ -122,15 +121,6 @@ class Layout:
            s += " %d" % d[i+1]
        return s + "\n"

-    # PyTables description
-    def description(self):
-        """Return the PyTables description of this layout"""
-        desc = {}
-        desc['timestamp'] = tables.Col.from_type('float64', pos=0)
-        for n in range(self.count):
-            desc['c' + str(n+1)] = tables.Col.from_type(self.datatype, pos=n+1)
-        return tables.Description(desc)
-
 # Get a layout by name
 def get_named(typestring):
    try:
--- a/nilmdb/nilmdb.py
+++ b/nilmdb/nilmdb.py
@@ -4,17 +4,16 @@

 Object that represents a NILM database file.

-Manages both the SQL database and the PyTables storage backend.
+Manages both the SQL database and the table storage backend.
 """

 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import sqlite3
-import tables
 import time
 import sys
 import os
@@ -25,6 +24,8 @@ import pyximport
 pyximport.install()
 from nilmdb.interval import Interval, DBInterval, IntervalSet, IntervalError

+from . import bulkdata
+
 # Note about performance and transactions:
 #
 # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL)
@@ -87,19 +88,13 @@ class StreamError(NilmDBError):
 class OverlapError(NilmDBError):
    pass

-# Helper that lets us pass a Pytables table into bisect
-class BisectableTable(object):
-    def __init__(self, table):
-        self.table = table
-    def __getitem__(self, index):
-        return self.table[index][0]
-
+@nilmdb.utils.must_close()
 class NilmDB(object):
    verbose = 0

    def __init__(self, basepath, sync=True, max_results=None):
        # set up path
-        self.basepath = os.path.abspath(basepath.rstrip('/'))
+        self.basepath = os.path.abspath(basepath)

        # Create the database path if it doesn't exist
        try:
@@ -108,16 +103,16 @@ class NilmDB(object):
            if e.errno != errno.EEXIST:
                raise IOError("can't create tree " + self.basepath)

-        # Our HD5 file goes inside it
-        h5filename = os.path.abspath(self.basepath + "/data.h5")
-        self.h5file = tables.openFile(h5filename, "a", "NILM Database")
+        # Our data goes inside it
+        self.data = bulkdata.BulkData(self.basepath)

        # SQLite database too
-        sqlfilename = os.path.abspath(self.basepath + "/data.sql")
+        sqlfilename = os.path.join(self.basepath, "data.sql")
        # We use check_same_thread = False, assuming that the rest
        # of the code (e.g. Server) will be smart and not access this
-        # database from multiple threads simultaneously.  That requirement
-        # may be relaxed later.
+        # database from multiple threads simultaneously.  Otherwise
+        # false positives will occur when the database is only opened
+        # in one thread, and only accessed in another.
        self.con = sqlite3.connect(sqlfilename, check_same_thread = False)
        self._sql_schema_update()

@@ -134,17 +129,6 @@ class NilmDB(object):
        else:
            self.max_results = 16384

-        self.opened = True
-
-        # Cached intervals
-        self._cached_iset = {}
-
-    def __del__(self):
-        if "opened" in self.__dict__: # pragma: no cover
-            fprintf(sys.stderr,
-                    "error: NilmDB.close() wasn't called, path %s",
-                    self.basepath)
-
    def get_basepath(self):
        return self.basepath

@@ -152,8 +136,7 @@ class NilmDB(object):
        if self.con:
            self.con.commit()
            self.con.close()
-        self.h5file.close()
-        del self.opened
+        self.data.close()

    def _sql_schema_update(self):
        cur = self.con.cursor()
@@ -170,12 +153,11 @@ class NilmDB(object):
            with self.con:
                cur.execute("PRAGMA user_version = {v:d}".format(v=version))

+    @nilmdb.utils.lru_cache(size = 16)
    def _get_intervals(self, stream_id):
        """
        Return a mutable IntervalSet corresponding to the given stream ID.
        """
-        # Load from database if not cached
-        if stream_id not in self._cached_iset:
        iset = IntervalSet()
        result = self.con.execute("SELECT start_time, end_time, "
                                  "start_pos, end_pos "
@@ -188,16 +170,15 @@ class NilmDB(object):
                                   start_pos, end_pos)
        except IntervalError as e: # pragma: no cover
            raise NilmDBError("unexpected overlap in ranges table!")
-            self._cached_iset[stream_id] = iset
-        # Return cached value
-        return self._cached_iset[stream_id]
+
+        return iset

    def _add_interval(self, stream_id, interval, start_pos, end_pos):
        """
        Add interval to the internal interval cache, and to the database.
        Note: arguments must be ints (not numpy.int64, etc)
        """
-        # Ensure this stream's intervals are cached
+        # Load this stream's intervals
        iset = self._get_intervals(stream_id)

        # Check for overlap
@@ -215,7 +196,7 @@ class NilmDB(object):
        if (adjacent is not None and
            start_pos == adjacent.db_endpos and
            (end_pos - adjacent.db_startpos) < max_merged_rows):
-            # First delete the old one, both from our cache and the
+            # First delete the old one, both from our iset and the
            # database
            iset -= adjacent
            self.con.execute("DELETE FROM ranges WHERE "
@@ -232,7 +213,7 @@ class NilmDB(object):
            interval.start = adjacent.start
            start_pos = adjacent.db_startpos

-        # Add the new interval to the cache
+        # Add the new interval to the iset
        iset.iadd_nocheck(DBInterval(interval.start, interval.end,
                                     interval.start, interval.end,
                                     start_pos, end_pos))
@@ -307,34 +288,11 @@ class NilmDB(object):

        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
        """
-        if path[0] != '/':
-            raise ValueError("paths must start with /")
-        [ group, node ] = path.rsplit("/", 1)
-        if group == '':
-            raise ValueError("invalid path")
+        # Create the bulk storage.  Raises ValueError on error, which we
+        # pass along.
+        self.data.create(path, layout_name)

-        # Get description
-        try:
-            desc = nilmdb.layout.get_named(layout_name).description()
-        except KeyError:
-            raise ValueError("no such layout")
-
-        # Estimated table size (for PyTables optimization purposes): assume
-        # 3 months worth of data at 8 KHz.  It's OK if this is wrong.
-        exp_rows = 8000 * 60*60*24*30*3
-
-        # Create the table
-        try:
-            table = self.h5file.createTable(group, node,
-                                            description = desc,
-                                            expectedrows = exp_rows,
-                                            createparents = True)
-        except AttributeError:
-            # Trying to create e.g. /foo/bar/baz when /foo/bar is already
-            # a table raises this error.
-            raise ValueError("error creating table at that path")
-
-        # Insert into SQL database once the PyTables is happy
+        # Insert into SQL database once the bulk storage is happy
        with self.con as con:
            con.execute("INSERT INTO streams (path, layout) VALUES (?,?)",
                        (path, layout_name))
@@ -380,24 +338,14 @@ class NilmDB(object):

    def stream_destroy(self, path):
        """Fully remove a table and all of its data from the database.
-        No way to undo it!  The group structure is removed, if there
-        are no other tables in it.  Metadata is removed."""
+        No way to undo it!  Metadata is removed."""
        stream_id = self._stream_id(path)

        # Delete the cached interval data
-        if stream_id in self._cached_iset:
-            del self._cached_iset[stream_id]
+        self._get_intervals.cache_remove(self, stream_id)

-        # Delete the data node, and all parent nodes (if they have no
-        # remaining children)
-        split_path = path.lstrip('/').split("/")
-        while split_path:
-            name = split_path.pop()
-            where = "/" + "/".join(split_path)
-            try:
-                self.h5file.removeNode(where, name, recursive = False)
-            except tables.NodeError:
-                break
+        # Delete the data
+        self.data.destroy(path)

        # Delete metadata, stream, intervals
        with self.con as con:
@@ -421,16 +369,14 @@ class NilmDB(object):
            raise OverlapError("new data overlaps existing data at range: "
                               + str(iset & interval))

-        # Insert the data into pytables
-        table = self.h5file.getNode(path)
+        # Insert the data
+        table = self.data.getnode(path)
        row_start = table.nrows
        table.append(data)
        row_end = table.nrows
-        table.flush()

        # Insert the record into the sql database.
-        # Casts are to convert from numpy.int64.
-        self._add_interval(stream_id, interval, int(row_start), int(row_end))
+        self._add_interval(stream_id, interval, row_start, row_end)

        # And that's all
        return "ok"
@@ -445,7 +391,7 @@ class NilmDB(object):
        # Optimization for the common case where an interval wasn't truncated
        if interval.start == interval.db_start:
            return interval.db_startpos
-        return bisect.bisect_left(BisectableTable(table),
+        return bisect.bisect_left(bulkdata.TimestampOnlyTable(table),
                                  interval.start,
                                  interval.db_startpos,
                                  interval.db_endpos)
@@ -464,7 +410,7 @@ class NilmDB(object):
        # want to include the given timestamp in the results.  This is
        # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
        # non-overlapping data.
-        return bisect.bisect_left(BisectableTable(table),
+        return bisect.bisect_left(bulkdata.TimestampOnlyTable(table),
                                  interval.end,
                                  interval.db_startpos,
                                  interval.db_endpos)
@@ -488,7 +434,7 @@ class NilmDB(object):
        than actually fetching the data.  It is not limited by
        max_results.
        """
-        table = self.h5file.getNode(path)
+        table = self.data.getnode(path)
        stream_id = self._stream_id(path)
        intervals = self._get_intervals(stream_id)
        requested = Interval(start or 0, end or 1e12)
--- a/nilmdb/server.py
+++ b/nilmdb/server.py
@@ -3,10 +3,9 @@
 # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
 # but will pull the nilmdb module instead.
 from __future__ import absolute_import
+from nilmdb.utils.printf import *
 import nilmdb

-from nilmdb.printf import *
-
 import cherrypy
 import sys
 import time
@@ -26,6 +25,33 @@ class NilmApp(object):

 version = "1.1"

+# Decorators
+def chunked_response(func):
+    """Decorator to enable chunked responses"""
+    # Set this to False to get better tracebacks from some requests
+    # (/stream/extract, /stream/intervals).
+    func._cp_config = { 'response.stream': True }
+    return func
+
+def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
+    """Decorator to work around CherryPy bug #1200 in a response
+    generator"""
+    # Even if chunked responses are disabled, you may still miss miss
+    # LookupError, or UnicodeError exceptions due to CherryPy bug
+    # #1200.  This throws them as generic Exceptions insteads.
+    import functools
+    import traceback
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        try:
+            for val in func(*args, **kwargs):
+                yield val
+        except (LookupError, UnicodeError) as e:
+            raise Exception("bug workaround; real exception is:\n" +
+                            traceback.format_exc())
+    return wrapper
+
+# CherryPy apps
 class Root(NilmApp):
    """Root application for NILM database"""

@@ -59,7 +85,7 @@ class Root(NilmApp):
    @cherrypy.expose
    @cherrypy.tools.json_out()
    def dbsize(self):
-        return nilmdb.du.du(self.db.get_basepath())
+        return nilmdb.utils.du(self.db.get_basepath())

 class Stream(NilmApp):
    """Stream-specific operations"""
@@ -214,6 +240,7 @@ class Stream(NilmApp):
    # /stream/intervals?path=/newton/prep
    # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
+    @chunked_response
    def intervals(self, path, start = None, end = None):
        """
        Get intervals from backend database.  Streams the resulting
@@ -235,9 +262,9 @@ class Stream(NilmApp):
        if len(streams) != 1:
            raise cherrypy.HTTPError("404 Not Found", "No such stream")

+        @workaround_cp_bug_1200
        def content(start, end):
-            # Note: disable response.stream below to get better debug info
-            # from tracebacks in this subfunction.
+            # Note: disable chunked responses to see tracebacks from here.
            while True:
                (intervals, restart) = self.db.stream_intervals(path,start,end)
                response = ''.join([ json.dumps(i) + "\n" for i in intervals ])
@@ -246,10 +273,10 @@ class Stream(NilmApp):
                    break
                start = restart
        return content(start, end)
-    intervals._cp_config = { 'response.stream': True } # chunked HTTP response

    # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0
    @cherrypy.expose
+    @chunked_response
    def extract(self, path, start = None, end = None, count = False):
        """
        Extract data from backend database.  Streams the resulting
@@ -279,9 +306,9 @@ class Stream(NilmApp):
        # Get formatter
        formatter = nilmdb.layout.Formatter(layout)

+        @workaround_cp_bug_1200
        def content(start, end, count):
-            # Note: disable response.stream below to get better debug info
-            # from tracebacks in this subfunction.
+            # Note: disable chunked responses to see tracebacks from here.
            if count:
                matched = self.db.stream_extract(path, start, end, count)
                yield sprintf("%d\n", matched)
@@ -297,8 +324,6 @@ class Stream(NilmApp):
                    return
                start = restart
        return content(start, end, count)
-    extract._cp_config = { 'response.stream': True } # chunked HTTP response
-

 class Exiter(object):
    """App that exits the server, for testing"""
@@ -323,7 +348,7 @@ class Server(object):
        # Need to wrap DB object in a serializer because we'll call
        # into it from separate threads.
        self.embedded = embedded
-        self.db = nilmdb.serializer.WrapObject(db)
+        self.db = nilmdb.utils.Serializer(db)
        cherrypy.config.update({
            'server.socket_host': host,
            'server.socket_port': port,
--- a/nilmdb/timestamper.py
+++ b/nilmdb/timestamper.py
@@ -1,7 +1,7 @@
 """File-like objects that add timestamps to the input lines"""

 from __future__ import absolute_import
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import time
 import os
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -0,0 +1,8 @@
+"""NilmDB utilities"""
+
+from .timer import Timer
+from .iteratorizer import Iteratorizer
+from .serializer import Serializer
+from .lrucache import lru_cache
+from .diskusage import du
+from .mustclose import must_close
--- a/nilmdb/utils/diskusage.py
+++ b/nilmdb/utils/diskusage.py
--- a/nilmdb/utils/iteratorizer.py
+++ b/nilmdb/utils/iteratorizer.py
--- a/nilmdb/utils/lrucache.py
+++ b/nilmdb/utils/lrucache.py
@@ -0,0 +1,66 @@
+# Memoize a function's return value with a least-recently-used cache
+# Based on:
+#   http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/
+# with added 'destructor' functionality.
+
+import collections
+import functools
+
+def lru_cache(size = 10, onremove = None):
+    """Least-recently-used cache decorator.
+
+    @lru_cache(size = 10, onevict = None)
+    def f(...):
+        pass
+
+    Given a function and arguments, memoize its return value.
+    Up to 'size' elements are cached.
+
+    When evicting a value from the cache, call the function
+    'onremove' with the value that's being evicted.
+
+    Call f.cache_remove(...) to evict the cache entry with the given
+    arguments.  Call f.cache_remove_all() to evict all entries.
+    f.cache_hits and f.cache_misses give statistics.
+    """
+
+    def decorator(func):
+        cache = collections.OrderedDict()	# order: least- to most-recent
+
+        def evict(value):
+            if onremove:
+                onremove(value)
+
+        @functools.wraps(func)
+        def wrapper(*args, **kwargs):
+            key = args + tuple(sorted(kwargs.items()))
+            try:
+                value = cache.pop(key)
+                wrapper.cache_hits += 1
+            except KeyError:
+                value = func(*args, **kwargs)
+                wrapper.cache_misses += 1
+                if len(cache) >= size:
+                    evict(cache.popitem(0)[1])	# evict LRU cache entry
+            cache[key] = value              	# (re-)insert this key at end
+            return value
+
+        def cache_remove(*args, **kwargs):
+            """Remove the described key from this cache, if present.
+            Note that if the original wrapped function was implicitly
+            passed 'self', you need to pass it as an argument here too."""
+            key = args + tuple(sorted(kwargs.items()))
+            if key in cache:
+                evict(cache.pop(key))
+
+        def cache_remove_all():
+            for key in cache:
+                evict(cache.pop(key))
+
+        wrapper.cache_hits = 0
+        wrapper.cache_misses = 0
+        wrapper.cache_remove = cache_remove
+        wrapper.cache_remove_all = cache_remove_all
+
+        return wrapper
+    return decorator
--- a/nilmdb/utils/mustclose.py
+++ b/nilmdb/utils/mustclose.py
@@ -0,0 +1,42 @@
+# Class decorator that warns on stderr at deletion time if the class's
+# close() member wasn't called.
+
+from nilmdb.utils.printf import *
+import sys
+
+def must_close(errorfile = sys.stderr):
+    def decorator(cls):
+        def dummy(*args, **kwargs):
+            pass
+        if "__init__" not in cls.__dict__:
+            cls.__init__ = dummy
+        if "__del__" not in cls.__dict__:
+            cls.__del__ = dummy
+        if "close" not in cls.__dict__:
+            cls.close = dummy
+
+        orig_init = cls.__init__
+        orig_del = cls.__del__
+        orig_close = cls.close
+
+        def __init__(self, *args, **kwargs):
+            ret = orig_init(self, *args, **kwargs)
+            self.__dict__["_must_close"] = True
+            return ret
+
+        def __del__(self):
+            if "_must_close" in self.__dict__:
+                fprintf(errorfile, "error: %s.close() wasn't called!\n",
+                        self.__class__.__name__)
+            return orig_del(self)
+
+        def close(self, *args, **kwargs):
+            del self._must_close
+            return orig_close(self)
+
+        cls.__init__ = __init__
+        cls.__del__ = __del__
+        cls.close = close
+
+        return cls
+    return decorator
--- a/nilmdb/utils/printf.py
+++ b/nilmdb/utils/printf.py
--- a/nilmdb/utils/serializer.py
+++ b/nilmdb/utils/serializer.py
@@ -67,3 +67,6 @@ class WrapObject(object):
    def __del__(self):
        self.__wrap_call_queue.put((None, None, None, None))
        self.__wrap_serializer.join()
+
+# Just an alias
+Serializer = WrapObject
--- a/nilmdb/utils/timer.py
+++ b/nilmdb/utils/timer.py
--- a/runserver.py
+++ b/runserver.py
@@ -3,14 +3,17 @@
 import nilmdb
 import argparse

-parser = argparse.ArgumentParser(description='Run the NILM server')
+formatter = argparse.ArgumentDefaultsHelpFormatter
+parser = argparse.ArgumentParser(description='Run the NILM server',
+                                 formatter_class = formatter)
 parser.add_argument('-p', '--port', help='Port number', type=int, default=12380)
+parser.add_argument('-d', '--database', help='Database directory', default="db")
 parser.add_argument('-y', '--yappi', help='Run with yappi profiler',
                    action='store_true')
 args = parser.parse_args()

 # Start web app on a custom port
-db = nilmdb.NilmDB("db")
+db = nilmdb.NilmDB(args.database)
 server = nilmdb.Server(db, host = "127.0.0.1",
                       port = args.port,
                       embedded = False)
--- a/setup.cfg
+++ b/setup.cfg
@@ -10,6 +10,8 @@ cover-erase=
 ##cover-branches=     # need nose 1.1.3 for this
 stop=
 verbosity=2
+#tests=tests/test_mustclose.py
+#tests=tests/test_lrucache.py
 #tests=tests/test_cmdline.py
 #tests=tests/test_layout.py
 #tests=tests/test_rbtree.py
--- a/tests/renderdot.py
+++ b/tests/renderdot.py
@@ -13,7 +13,7 @@ class Renderer(object):

    # Rendering
    def __render_dot_node(self, node, max_depth = 20):
-        from nilmdb.printf import sprintf
+        from nilmdb.utils.printf import sprintf
        """Render a single node and its children into a dot graph fragment"""
        if max_depth == 0:
            return ""
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 from nilmdb.client import ClientError, ServerError

 import datetime_tz
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import nilmdb.cmdline

 from nose.tools import *
@@ -51,6 +51,7 @@ class TestCmdline(object):
        """Run a cmdline client with the specified argument string,
        passing the given input.  Returns a tuple with the output and
        exit code"""
+        #print "TZ=UTC ./nilmtool.py " + arg_string
        class stdio_wrapper:
            def __init__(self, stdin, stdout, stderr):
                self.io = (stdin, stdout, stderr)
@@ -199,7 +200,12 @@ class TestCmdline(object):
        # Should not be able to create a stream with another stream as
        # its parent
        self.fail("create /newton/prep/blah PrepData")
-        self.contain("error creating table at that path")
+        self.contain("path is subdir of existing node")
+
+        # Should not be able to create a stream at a location that
+        # has other nodes as children
+        self.fail("create /newton/zzz PrepData")
+        self.contain("subdirs of this path already exist")

        # Verify we got those 3 streams and they're returned in
        # alphabetical order.
--- a/tests/test_interval.py
+++ b/tests/test_interval.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *
 import datetime_tz

 from nose.tools import *
--- a/tests/test_iteratorizer.py
+++ b/tests/test_iteratorizer.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import nose
 from nose.tools import *
@@ -9,8 +9,6 @@ import time

 from test_helpers import *

-import nilmdb.iteratorizer
-
 def func_with_callback(a, b, callback):
    callback(a)
    callback(b)
@@ -27,7 +25,8 @@ class TestIteratorizer(object):
        eq_(self.result, "123")

        # Now make it an iterator
-        it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+        it = nilmdb.utils.Iteratorizer(
+            lambda x:
            func_with_callback(1, 2, x))
        result = ""
        for i in it:
@@ -35,7 +34,8 @@ class TestIteratorizer(object):
        eq_(result, "123")

        # Make sure things work when an exception occurs
-        it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+        it = nilmdb.utils.Iteratorizer(
+            lambda x:
            func_with_callback(1, "a", x))
        result = ""
        with assert_raises(TypeError) as e:
@@ -48,7 +48,8 @@ class TestIteratorizer(object):
        # itself.  This doesn't have a particular result in the test,
        # but gains coverage.
        def foo():
-            it = nilmdb.iteratorizer.Iteratorizer(lambda x:
+            it = nilmdb.utils.Iteratorizer(
+                lambda x:
                func_with_callback(1, 2, x))
            it.next()
        foo()
--- a/tests/test_layout.py
+++ b/tests/test_layout.py
@@ -2,7 +2,7 @@

 import nilmdb

-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
@@ -28,9 +28,13 @@ class TestLayouts(object):
    # Some nilmdb.layout tests.  Not complete, just fills in missing
    # coverage.
    def test_layouts(self):
-        x = nilmdb.layout.get_named("PrepData").description()
-        y = nilmdb.layout.get_named("float32_8").description()
-        eq_(repr(x), repr(y))
+        x = nilmdb.layout.get_named("PrepData")
+        y = nilmdb.layout.get_named("float32_8")
+        eq_(x.count, y.count)
+        eq_(x.datatype, y.datatype)
+        y = nilmdb.layout.get_named("float32_7")
+        ne_(x.count, y.count)
+        eq_(x.datatype, y.datatype)

    def test_parsing(self):
        self.real_t_parsing("PrepData", "RawData", "RawNotchedData")
--- a/tests/test_lrucache.py
+++ b/tests/test_lrucache.py
@@ -0,0 +1,53 @@
+import nilmdb
+from nilmdb.utils.printf import *
+
+import nose
+from nose.tools import *
+from nose.tools import assert_raises
+import threading
+import time
+
+from test_helpers import *
+
+@nilmdb.utils.lru_cache(size = 3)
+def foo1(n):
+    return n
+
+@nilmdb.utils.lru_cache(size = 5)
+def foo2(n):
+    return n
+
+def foo3d(n):
+    foo3d.destructed.append(n)
+foo3d.destructed = []
+@nilmdb.utils.lru_cache(size = 3, onremove = foo3d)
+def foo3(n):
+    return n
+
+class TestLRUCache(object):
+    def test(self):
+        [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo1.cache_hits, foo1.cache_misses), (6, 3))
+        [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo1.cache_hits, foo1.cache_misses), (15, 3))
+        [ foo1(n) for n in [ 4, 2, 1, 1, 4 ] ]
+        eq_((foo1.cache_hits, foo1.cache_misses), (18, 5))
+
+        [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo2.cache_hits, foo2.cache_misses), (6, 3))
+        [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo2.cache_hits, foo2.cache_misses), (15, 3))
+        [ foo2(n) for n in [ 4, 2, 1, 1, 4 ] ]
+        eq_((foo2.cache_hits, foo2.cache_misses), (19, 4))
+
+        [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo3.cache_hits, foo3.cache_misses), (6, 3))
+        [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ]
+        eq_((foo3.cache_hits, foo3.cache_misses), (15, 3))
+        [ foo3(n) for n in [ 4, 2, 1, 1, 4 ] ]
+        eq_((foo3.cache_hits, foo3.cache_misses), (18, 5))
+        eq_(foo3d.destructed, [1, 3])
+        foo3.cache_remove(1)
+        eq_(foo3d.destructed, [1, 3, 1])
+        foo3.cache_remove_all()
+        eq_(foo3d.destructed, [1, 3, 1, 2, 4 ])
--- a/tests/test_mustclose.py
+++ b/tests/test_mustclose.py
@@ -0,0 +1,59 @@
+import nilmdb
+from nilmdb.utils.printf import *
+
+import nose
+from nose.tools import *
+from nose.tools import assert_raises
+
+from test_helpers import *
+
+import sys
+import cStringIO
+
+err = cStringIO.StringIO()
+
+@nilmdb.utils.must_close(errorfile = err)
+class Foo:
+    def __init__(self):
+        fprintf(err, "Init\n")
+
+    def __del__(self):
+        fprintf(err, "Deleting\n")
+
+    def close(self):
+        fprintf(err, "Closing\n")
+
+@nilmdb.utils.must_close(errorfile = err)
+class Bar:
+    pass
+
+class TestMustClose(object):
+    def test(self):
+
+        # Note: this test might fail if the Python interpreter doesn't
+        # garbage collect the object (and call its __del__ function)
+        # right after a "del x".
+
+        x = Foo()
+        del x
+        eq_(err.getvalue(),
+            "Init\n"
+            "error: Foo.close() wasn't called!\n"
+            "Deleting\n")
+
+        err.truncate(0)
+
+        y = Foo()
+        y.close()
+        del y
+        eq_(err.getvalue(),
+            "Init\n"
+            "Closing\n"
+            "Deleting\n")
+
+        err.truncate(0)
+
+        z = Bar()
+        z.close()
+        del z
+        eq_(err.getvalue(), "")
--- a/tests/test_nilmdb.py
+++ b/tests/test_nilmdb.py
@@ -14,6 +14,7 @@ import urllib2
 from urllib2 import urlopen, HTTPError
 import Queue
 import cStringIO
+import time

 testdb = "tests/testdb"

@@ -39,8 +40,8 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        capture = cStringIO.StringIO()
        old = sys.stdout
        sys.stdout = capture
-        with nilmdb.Timer("test"):
-            nilmdb.timer.time.sleep(0.01)
+        with nilmdb.utils.Timer("test"):
+            time.sleep(0.01)
        sys.stdout = old
        in_("test: ", capture.getvalue())

@@ -69,12 +70,14 @@ class Test00Nilmdb(object):  # named 00 so it runs first
        eq_(db.stream_list(layout="RawData"), [ ["/newton/raw", "RawData"] ])
        eq_(db.stream_list(path="/newton/raw"), [ ["/newton/raw", "RawData"] ])

-        # Verify that columns were made right
-        eq_(len(db.h5file.getNode("/newton/prep").cols), 9)
-        eq_(len(db.h5file.getNode("/newton/raw").cols), 7)
-        eq_(len(db.h5file.getNode("/newton/zzz/rawnotch").cols), 10)
-        assert(not db.h5file.getNode("/newton/prep").colindexed["timestamp"])
-        assert(not db.h5file.getNode("/newton/prep").colindexed["c1"])
+        # Verify that columns were made right (pytables specific)
+        if "h5file" in db.data.__dict__:
+            h5file = db.data.h5file
+            eq_(len(h5file.getNode("/newton/prep").cols), 9)
+            eq_(len(h5file.getNode("/newton/raw").cols), 7)
+            eq_(len(h5file.getNode("/newton/zzz/rawnotch").cols), 10)
+            assert(not h5file.getNode("/newton/prep").colindexed["timestamp"])
+            assert(not h5file.getNode("/newton/prep").colindexed["c1"])

        # Set / get metadata
        eq_(db.stream_get_metadata("/newton/prep"), {})
--- a/tests/test_printf.py
+++ b/tests/test_printf.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
--- a/tests/test_rbtree.py
+++ b/tests/test_rbtree.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-

 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 from nose.tools import *
 from nose.tools import assert_raises
--- a/tests/test_serializer.py
+++ b/tests/test_serializer.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import nose
 from nose.tools import *
@@ -57,7 +57,7 @@ class TestUnserialized(Base):
 class TestSerialized(Base):
    def setUp(self):
        self.realfoo = Foo()
-        self.foo = nilmdb.serializer.WrapObject(self.realfoo)
+        self.foo = nilmdb.utils.Serializer(self.realfoo)

    def tearDown(self):
        del self.foo
--- a/tests/test_timestamper.py
+++ b/tests/test_timestamper.py
@@ -1,5 +1,5 @@
 import nilmdb
-from nilmdb.printf import *
+from nilmdb.utils.printf import *

 import datetime_tz

--- a/timeit.sh
+++ b/timeit.sh
@@ -1,21 +1,22 @@
 ./nilmtool.py destroy /bpnilm/2/raw
 ./nilmtool.py create /bpnilm/2/raw RawData

-if true; then
+if false; then
    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 -r 8000 /bpnilm/2/raw
    time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 -r 8000 /bpnilm/2/raw
 else
-    for i in $(seq 2000 2050); do
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 /bpnilm/2/raw
-	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 /bpnilm/2/raw
+    # 170 hours, about 98 gigs uncompressed:
+    for i in $(seq 2000 2016); do
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 -r 8000 /bpnilm/2/raw
+	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 -r 8000 /bpnilm/2/raw
    done
 fi
Author	SHA1	Message	Date
Jim Paris	10d58f6a47	More test coverage	2013-01-02 00:00:05 -05:00
Jim Paris	e2464efc12	Test everything; remove debugging	2013-01-01 23:46:54 -05:00
Jim Paris	1beae5024e	Bulkdata extract works now.	2013-01-01 23:44:52 -05:00
Jim Paris	c7c65b6542	Work around CherryPy bug #1200 ; related cleanups Spent way too long trying to track down a cryptic error that turned out to be a CherryPy bug. Now we catch this using a decorator in the 'extract' and 'intervals' generators that transforms exceptions that trigger the bugs into one that does not. fun!	2013-01-01 23:03:53 -05:00
Jim Paris	f41ff0a6e8	Inserting bulk data is essentially done, not tested	2013-01-01 21:04:35 -05:00
Jim Paris	389c1d189f	Make option to turn off chunked encoding for debugging more clear.	2013-01-01 21:03:33 -05:00
Jim Paris	487298986e	More work towards bulkdata	2012-12-31 18:44:57 -05:00
Jim Paris	d4cd045c48	Fix path stuff, build packer in bulkdata.Table	2012-12-31 17:22:30 -05:00
Jim Paris	3816645313	More work on BulkData	2012-12-31 17:22:30 -05:00
Jim Paris	83b937c720	More Pytables -> bulkdata conversion	2012-12-31 17:22:30 -05:00
Jim Paris	b3e6e8976f	More work towards flat bulk data storage. Cleaned up OS-specific path handling in nilmdb, bulkdata.	2012-12-31 17:22:30 -05:00
Jim Paris	c890ea93cb	WIP switching away from PyTables	2012-12-31 17:22:29 -05:00
Jim Paris	84c68c6913	Better documentation, cache Tables	2012-12-31 17:22:29 -05:00
Jim Paris	6f1e6fe232	Isolate all PyTables stuff to a single file. This will make migrating to my own data storage engine easier.	2012-12-31 17:22:29 -05:00
Jim Paris	b0d76312d1	Add must_close() decorator, use it in nilmdb Warns at runtime if a class's close() method wasn't called before the object was destroyed.	2012-12-31 17:21:19 -05:00
Jim Paris	19c846c71c	Remove outdated files	2012-12-31 15:55:43 -05:00
Jim Paris	f355c73209	Refactor utility classes into nilmdb.utils subdir/namespace There's some bug with the testing harness where placing e.g. from du import du in nilmdb/utils/__init__.py doesn't quite work -- sometimes the module "du" replaces the function "du". Not exactly sure why; we work around that by just renaming files so they don't match the imported names directly.	2012-12-31 15:55:36 -05:00
Jim Paris	173014ba19	Use nilmdb.lrucache for caching interval sets	2012-12-31 14:52:46 -05:00
Jim Paris	24d4752bc3	Add LRU cache memoizing decorator for functions	2012-12-31 14:39:16 -05:00
Jim Paris	a85b273e2e	Remove compression. Messes up extraction, since we random access for the timestamp binary search. In the future, maybe switching to multiple tables (one for timestamp, one for compressed data) would be smart.	2012-12-14 17:19:23 -05:00
Jim Paris	7f73b4b304	Use compression in pytables	2012-12-14 17:17:52 -05:00
Jim Paris	f3eb6d1b79	Time it!	2012-12-14 16:57:02 -05:00