Compare commits
	
		
			34 Commits
		
	
	
		
			before-ins
			...
			unicode
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| c083d63c96 | |||
| 0221e3ea21 | |||
| f5fd2b064e | |||
| 06e91a6a98 | |||
| 41b3f3c018 | |||
| 842076fef4 | |||
| 10d58f6a47 | |||
| e2464efc12 | |||
| 1beae5024e | |||
| c7c65b6542 | |||
| f41ff0a6e8 | |||
| 389c1d189f | |||
| 487298986e | |||
| d4cd045c48 | |||
| 3816645313 | |||
| 83b937c720 | |||
| b3e6e8976f | |||
| c890ea93cb | |||
| 84c68c6913 | |||
| 6f1e6fe232 | |||
| b0d76312d1 | |||
| 19c846c71c | |||
| f355c73209 | |||
| 173014ba19 | |||
| 24d4752bc3 | |||
| a85b273e2e | |||
| 7f73b4b304 | |||
| f3eb6d1b79 | |||
| 9082cc9f44 | |||
| bf64a40472 | |||
| 32dbeebc09 | |||
| 66ddc79b15 | |||
| 7a8bd0bf41 | |||
| ee552de740 | 
| @@ -143,6 +143,13 @@ Speed | ||||
| 	 numpy:records.py.fromrecords:569   7410 sec, 262k calls | ||||
|   - Probably OK for now. | ||||
|  | ||||
| - After all updates, now takes about 8.5 minutes to insert an hour of | ||||
|   data, constant after adding 171 hours (4.9 billion data points) | ||||
|  | ||||
| - Data set size: 98 gigs = 20 bytes per data point. | ||||
|   6 uint16 data + 1 uint32 timestamp = 16 bytes per point | ||||
|   So compression must be off -- will retry with compression forced on. | ||||
|  | ||||
| IntervalSet speed | ||||
| ----------------- | ||||
| - Initial implementation was pretty slow, even with binary search in | ||||
|   | ||||
| @@ -1,605 +0,0 @@ | ||||
|  | ||||
|  | ||||
|  | ||||
| // The RedBlackEntry class is an Abstract Base Class.  This means that no | ||||
| // instance of the RedBlackEntry class can exist.  Only classes which | ||||
| // inherit from the RedBlackEntry class can exist.  Furthermore any class | ||||
| // which inherits from the RedBlackEntry class must define the member | ||||
| // function GetKey().  The Print() member function does not have to | ||||
| // be defined because a default definition exists. | ||||
| // | ||||
| // The GetKey() function should return an integer key for that entry. | ||||
| // The key for an entry should never change otherwise bad things might occur. | ||||
|  | ||||
| class RedBlackEntry { | ||||
| public: | ||||
|   RedBlackEntry(); | ||||
|   virtual ~RedBlackEntry(); | ||||
|   virtual int GetKey() const = 0; | ||||
|   virtual void Print() const; | ||||
| }; | ||||
|  | ||||
| class RedBlackTreeNode { | ||||
|   friend class RedBlackTree; | ||||
| public: | ||||
|   void Print(RedBlackTreeNode*, | ||||
| 	     RedBlackTreeNode*) const; | ||||
|   RedBlackTreeNode(); | ||||
|   RedBlackTreeNode(RedBlackEntry *); | ||||
|   RedBlackEntry * GetEntry() const; | ||||
|   ~RedBlackTreeNode(); | ||||
| protected: | ||||
|   RedBlackEntry * storedEntry; | ||||
|   int key; | ||||
|   int red; /* if red=0 then the node is black */ | ||||
|   RedBlackTreeNode * left; | ||||
|   RedBlackTreeNode * right; | ||||
|   RedBlackTreeNode * parent; | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
| class RedBlackTree { | ||||
| public: | ||||
|   RedBlackTree(); | ||||
|   ~RedBlackTree(); | ||||
|   void Print() const; | ||||
|   RedBlackEntry * DeleteNode(RedBlackTreeNode *); | ||||
|   RedBlackTreeNode * Insert(RedBlackEntry *); | ||||
|   RedBlackTreeNode * GetPredecessorOf(RedBlackTreeNode *) const; | ||||
|   RedBlackTreeNode * GetSuccessorOf(RedBlackTreeNode *) const; | ||||
|   RedBlackTreeNode * Search(int key); | ||||
|   TemplateStack<RedBlackTreeNode *> * Enumerate(int low, int high) ; | ||||
|   void CheckAssumptions() const; | ||||
| protected: | ||||
|   /*  A sentinel is used for root and for nil.  These sentinels are */ | ||||
|   /*  created when RedBlackTreeCreate is caled.  root->left should always */ | ||||
|   /*  point to the node which is the root of the tree.  nil points to a */ | ||||
|   /*  node which should always be black but has aribtrary children and */ | ||||
|   /*  parent and no key or info.  The point of using these sentinels is so */ | ||||
|   /*  that the root and nil nodes do not require special cases in the code */ | ||||
|   RedBlackTreeNode * root; | ||||
|   RedBlackTreeNode * nil; | ||||
|   void LeftRotate(RedBlackTreeNode *); | ||||
|   void RightRotate(RedBlackTreeNode *); | ||||
|   void TreeInsertHelp(RedBlackTreeNode *); | ||||
|   void TreePrintHelper(RedBlackTreeNode *) const; | ||||
|   void FixUpMaxHigh(RedBlackTreeNode *); | ||||
|   void DeleteFixUp(RedBlackTreeNode *); | ||||
| }; | ||||
|  | ||||
| const int MIN_INT=-MAX_INT; | ||||
|  | ||||
| RedBlackTreeNode::RedBlackTreeNode(){ | ||||
| }; | ||||
|  | ||||
| RedBlackTreeNode::RedBlackTreeNode(RedBlackEntry * newEntry) | ||||
|   : storedEntry (newEntry) , key(newEntry->GetKey()) { | ||||
| }; | ||||
|  | ||||
| RedBlackTreeNode::~RedBlackTreeNode(){ | ||||
| }; | ||||
|  | ||||
| RedBlackEntry * RedBlackTreeNode::GetEntry() const {return storedEntry;} | ||||
|  | ||||
| RedBlackEntry::RedBlackEntry(){ | ||||
| }; | ||||
| RedBlackEntry::~RedBlackEntry(){ | ||||
| }; | ||||
| void RedBlackEntry::Print() const { | ||||
|   cout << "No Print Method defined. Using Default: " << GetKey() << endl; | ||||
| } | ||||
|  | ||||
| RedBlackTree::RedBlackTree() | ||||
| { | ||||
|   nil = new RedBlackTreeNode; | ||||
|   nil->left = nil->right = nil->parent = nil; | ||||
|   nil->red = 0; | ||||
|   nil->key = MIN_INT; | ||||
|   nil->storedEntry = NULL; | ||||
|  | ||||
|   root = new RedBlackTreeNode; | ||||
|   root->parent = root->left = root->right = nil; | ||||
|   root->key = MAX_INT; | ||||
|   root->red=0; | ||||
|   root->storedEntry = NULL; | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  LeftRotate */ | ||||
| /**/ | ||||
| /*  INPUTS:  the node to rotate on */ | ||||
| /**/ | ||||
| /*  OUTPUT:  None */ | ||||
| /**/ | ||||
| /*  Modifies Input: this, x */ | ||||
| /**/ | ||||
| /*  EFFECTS:  Rotates as described in _Introduction_To_Algorithms by */ | ||||
| /*            Cormen, Leiserson, Rivest (Chapter 14).  Basically this */ | ||||
| /*            makes the parent of x be to the left of x, x the parent of */ | ||||
| /*            its parent before the rotation and fixes other pointers */ | ||||
| /*            accordingly.  */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTree::LeftRotate(RedBlackTreeNode* x) { | ||||
|   RedBlackTreeNode* y; | ||||
|  | ||||
|   /*  I originally wrote this function to use the sentinel for */ | ||||
|   /*  nil to avoid checking for nil.  However this introduces a */ | ||||
|   /*  very subtle bug because sometimes this function modifies */ | ||||
|   /*  the parent pointer of nil.  This can be a problem if a */ | ||||
|   /*  function which calls LeftRotate also uses the nil sentinel */ | ||||
|   /*  and expects the nil sentinel's parent pointer to be unchanged */ | ||||
|   /*  after calling this function.  For example, when DeleteFixUP */ | ||||
|   /*  calls LeftRotate it expects the parent pointer of nil to be */ | ||||
|   /*  unchanged. */ | ||||
|  | ||||
|   y=x->right; | ||||
|   x->right=y->left; | ||||
|  | ||||
|   if (y->left != nil) y->left->parent=x; /* used to use sentinel here */ | ||||
|   /* and do an unconditional assignment instead of testing for nil */ | ||||
|  | ||||
|   y->parent=x->parent; | ||||
|  | ||||
|   /* instead of checking if x->parent is the root as in the book, we */ | ||||
|   /* count on the root sentinel to implicitly take care of this case */ | ||||
|   if( x == x->parent->left) { | ||||
|     x->parent->left=y; | ||||
|   } else { | ||||
|     x->parent->right=y; | ||||
|   } | ||||
|   y->left=x; | ||||
|   x->parent=y; | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  RighttRotate */ | ||||
| /**/ | ||||
| /*  INPUTS:  node to rotate on */ | ||||
| /**/ | ||||
| /*  OUTPUT:  None */ | ||||
| /**/ | ||||
| /*  Modifies Input?: this, y */ | ||||
| /**/ | ||||
| /*  EFFECTS:  Rotates as described in _Introduction_To_Algorithms by */ | ||||
| /*            Cormen, Leiserson, Rivest (Chapter 14).  Basically this */ | ||||
| /*            makes the parent of x be to the left of x, x the parent of */ | ||||
| /*            its parent before the rotation and fixes other pointers */ | ||||
| /*            accordingly.  */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTree::RightRotate(RedBlackTreeNode* y) { | ||||
|   RedBlackTreeNode* x; | ||||
|  | ||||
|   /*  I originally wrote this function to use the sentinel for */ | ||||
|   /*  nil to avoid checking for nil.  However this introduces a */ | ||||
|   /*  very subtle bug because sometimes this function modifies */ | ||||
|   /*  the parent pointer of nil.  This can be a problem if a */ | ||||
|   /*  function which calls LeftRotate also uses the nil sentinel */ | ||||
|   /*  and expects the nil sentinel's parent pointer to be unchanged */ | ||||
|   /*  after calling this function.  For example, when DeleteFixUP */ | ||||
|   /*  calls LeftRotate it expects the parent pointer of nil to be */ | ||||
|   /*  unchanged. */ | ||||
|  | ||||
|   x=y->left; | ||||
|   y->left=x->right; | ||||
|  | ||||
|   if (nil != x->right)  x->right->parent=y; /*used to use sentinel here */ | ||||
|   /* and do an unconditional assignment instead of testing for nil */ | ||||
|  | ||||
|   /* instead of checking if x->parent is the root as in the book, we */ | ||||
|   /* count on the root sentinel to implicitly take care of this case */ | ||||
|   x->parent=y->parent; | ||||
|   if( y == y->parent->left) { | ||||
|     y->parent->left=x; | ||||
|   } else { | ||||
|     y->parent->right=x; | ||||
|   } | ||||
|   x->right=y; | ||||
|   y->parent=x; | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  TreeInsertHelp  */ | ||||
| /**/ | ||||
| /*  INPUTS:  z is the node to insert */ | ||||
| /**/ | ||||
| /*  OUTPUT:  none */ | ||||
| /**/ | ||||
| /*  Modifies Input:  this, z */ | ||||
| /**/ | ||||
| /*  EFFECTS:  Inserts z into the tree as if it were a regular binary tree */ | ||||
| /*            using the algorithm described in _Introduction_To_Algorithms_ */ | ||||
| /*            by Cormen et al.  This funciton is only intended to be called */ | ||||
| /*            by the Insert function and not by the user */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTree::TreeInsertHelp(RedBlackTreeNode* z) { | ||||
|   /*  This function should only be called by RedBlackTree::Insert */ | ||||
|   RedBlackTreeNode* x; | ||||
|   RedBlackTreeNode* y; | ||||
|  | ||||
|   z->left=z->right=nil; | ||||
|   y=root; | ||||
|   x=root->left; | ||||
|   while( x != nil) { | ||||
|     y=x; | ||||
|     if ( x->key > z->key) { | ||||
|       x=x->left; | ||||
|     } else { /* x->key <= z->key */ | ||||
|       x=x->right; | ||||
|     } | ||||
|   } | ||||
|   z->parent=y; | ||||
|   if ( (y == root) || | ||||
|        (y->key > z->key) ) { | ||||
|     y->left=z; | ||||
|   } else { | ||||
|     y->right=z; | ||||
|   } | ||||
| } | ||||
|  | ||||
| /*  Before calling InsertNode  the node x should have its key set */ | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  InsertNode */ | ||||
| /**/ | ||||
| /*  INPUTS:  newEntry is the entry to insert*/ | ||||
| /**/ | ||||
| /*  OUTPUT:  This function returns a pointer to the newly inserted node */ | ||||
| /*           which is guarunteed to be valid until this node is deleted. */ | ||||
| /*           What this means is if another data structure stores this */ | ||||
| /*           pointer then the tree does not need to be searched when this */ | ||||
| /*           is to be deleted. */ | ||||
| /**/ | ||||
| /*  Modifies Input: tree */ | ||||
| /**/ | ||||
| /*  EFFECTS:  Creates a node node which contains the appropriate key and */ | ||||
| /*            info pointers and inserts it into the tree. */ | ||||
| /***********************************************************************/ | ||||
| /* jim */ | ||||
| RedBlackTreeNode * RedBlackTree::Insert(RedBlackEntry * newEntry) | ||||
| { | ||||
|   RedBlackTreeNode * y; | ||||
|   RedBlackTreeNode * x; | ||||
|   RedBlackTreeNode * newNode; | ||||
|  | ||||
|   x = new RedBlackTreeNode(newEntry); | ||||
|   TreeInsertHelp(x); | ||||
|   newNode = x; | ||||
|   x->red=1; | ||||
|   while(x->parent->red) { /* use sentinel instead of checking for root */ | ||||
|     if (x->parent == x->parent->parent->left) { | ||||
|       y=x->parent->parent->right; | ||||
|       if (y->red) { | ||||
| 	x->parent->red=0; | ||||
| 	y->red=0; | ||||
| 	x->parent->parent->red=1; | ||||
| 	x=x->parent->parent; | ||||
|       } else { | ||||
| 	if (x == x->parent->right) { | ||||
| 	  x=x->parent; | ||||
| 	  LeftRotate(x); | ||||
| 	} | ||||
| 	x->parent->red=0; | ||||
| 	x->parent->parent->red=1; | ||||
| 	RightRotate(x->parent->parent); | ||||
|       } | ||||
|     } else { /* case for x->parent == x->parent->parent->right */ | ||||
|              /* this part is just like the section above with */ | ||||
|              /* left and right interchanged */ | ||||
|       y=x->parent->parent->left; | ||||
|       if (y->red) { | ||||
| 	x->parent->red=0; | ||||
| 	y->red=0; | ||||
| 	x->parent->parent->red=1; | ||||
| 	x=x->parent->parent; | ||||
|       } else { | ||||
| 	if (x == x->parent->left) { | ||||
| 	  x=x->parent; | ||||
| 	  RightRotate(x); | ||||
| 	} | ||||
| 	x->parent->red=0; | ||||
| 	x->parent->parent->red=1; | ||||
| 	LeftRotate(x->parent->parent); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   root->left->red=0; | ||||
|   return(newNode); | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  GetSuccessorOf  */ | ||||
| /**/ | ||||
| /*    INPUTS:  x is the node we want the succesor of */ | ||||
| /**/ | ||||
| /*    OUTPUT:  This function returns the successor of x or NULL if no */ | ||||
| /*             successor exists. */ | ||||
| /**/ | ||||
| /*    Modifies Input: none */ | ||||
| /**/ | ||||
| /*    Note:  uses the algorithm in _Introduction_To_Algorithms_ */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| RedBlackTreeNode * RedBlackTree::GetSuccessorOf(RedBlackTreeNode * x) const | ||||
| { | ||||
|   RedBlackTreeNode* y; | ||||
|  | ||||
|   if (nil != (y = x->right)) { /* assignment to y is intentional */ | ||||
|     while(y->left != nil) { /* returns the minium of the right subtree of x */ | ||||
|       y=y->left; | ||||
|     } | ||||
|     return(y); | ||||
|   } else { | ||||
|     y=x->parent; | ||||
|     while(x == y->right) { /* sentinel used instead of checking for nil */ | ||||
|       x=y; | ||||
|       y=y->parent; | ||||
|     } | ||||
|     if (y == root) return(nil); | ||||
|     return(y); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  GetPredecessorOf  */ | ||||
| /**/ | ||||
| /*    INPUTS:  x is the node to get predecessor of */ | ||||
| /**/ | ||||
| /*    OUTPUT:  This function returns the predecessor of x or NULL if no */ | ||||
| /*             predecessor exists. */ | ||||
| /**/ | ||||
| /*    Modifies Input: none */ | ||||
| /**/ | ||||
| /*    Note:  uses the algorithm in _Introduction_To_Algorithms_ */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| RedBlackTreeNode * RedBlackTree::GetPredecessorOf(RedBlackTreeNode * x) const { | ||||
|   RedBlackTreeNode* y; | ||||
|  | ||||
|   if (nil != (y = x->left)) { /* assignment to y is intentional */ | ||||
|     while(y->right != nil) { /* returns the maximum of the left subtree of x */ | ||||
|       y=y->right; | ||||
|     } | ||||
|     return(y); | ||||
|   } else { | ||||
|     y=x->parent; | ||||
|     while(x == y->left) { | ||||
|       if (y == root) return(nil); | ||||
|       x=y; | ||||
|       y=y->parent; | ||||
|     } | ||||
|     return(y); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  Print */ | ||||
| /**/ | ||||
| /*    INPUTS:  none */ | ||||
| /**/ | ||||
| /*    OUTPUT:  none  */ | ||||
| /**/ | ||||
| /*    EFFECTS:  This function recursively prints the nodes of the tree */ | ||||
| /*              inorder. */ | ||||
| /**/ | ||||
| /*    Modifies Input: none */ | ||||
| /**/ | ||||
| /*    Note:    This function should only be called from ITTreePrint */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTreeNode::Print(RedBlackTreeNode * nil, | ||||
| 			     RedBlackTreeNode * root) const { | ||||
|   storedEntry->Print(); | ||||
|   printf(", key=%i ",key); | ||||
|   printf("  l->key="); | ||||
|   if( left == nil) printf("NULL"); else printf("%i",left->key); | ||||
|   printf("  r->key="); | ||||
|   if( right == nil) printf("NULL"); else printf("%i",right->key); | ||||
|   printf("  p->key="); | ||||
|   if( parent == root) printf("NULL"); else printf("%i",parent->key); | ||||
|   printf("  red=%i\n",red); | ||||
| } | ||||
|  | ||||
| void RedBlackTree::TreePrintHelper( RedBlackTreeNode* x) const { | ||||
|  | ||||
|   if (x != nil) { | ||||
|     TreePrintHelper(x->left); | ||||
|     x->Print(nil,root); | ||||
|     TreePrintHelper(x->right); | ||||
|   } | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  Print */ | ||||
| /**/ | ||||
| /*    INPUTS:  none */ | ||||
| /**/ | ||||
| /*    OUTPUT:  none */ | ||||
| /**/ | ||||
| /*    EFFECT:  This function recursively prints the nodes of the tree */ | ||||
| /*             inorder. */ | ||||
| /**/ | ||||
| /*    Modifies Input: none */ | ||||
| /**/ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTree::Print() const { | ||||
|   TreePrintHelper(root->left); | ||||
| } | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  DeleteFixUp */ | ||||
| /**/ | ||||
| /*    INPUTS:  x is the child of the spliced */ | ||||
| /*             out node in DeleteNode. */ | ||||
| /**/ | ||||
| /*    OUTPUT:  none */ | ||||
| /**/ | ||||
| /*    EFFECT:  Performs rotations and changes colors to restore red-black */ | ||||
| /*             properties after a node is deleted */ | ||||
| /**/ | ||||
| /*    Modifies Input: this, x */ | ||||
| /**/ | ||||
| /*    The algorithm from this function is from _Introduction_To_Algorithms_ */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| void RedBlackTree::DeleteFixUp(RedBlackTreeNode* x) { | ||||
|   RedBlackTreeNode * w; | ||||
|   RedBlackTreeNode * rootLeft = root->left; | ||||
|  | ||||
|   while( (!x->red) && (rootLeft != x)) { | ||||
|     if (x == x->parent->left) { | ||||
|  | ||||
| // | ||||
|       w=x->parent->right; | ||||
|       if (w->red) { | ||||
| 	w->red=0; | ||||
| 	x->parent->red=1; | ||||
| 	LeftRotate(x->parent); | ||||
| 	w=x->parent->right; | ||||
|       } | ||||
|       if ( (!w->right->red) && (!w->left->red) ) { | ||||
| 	w->red=1; | ||||
| 	x=x->parent; | ||||
|       } else { | ||||
| 	if (!w->right->red) { | ||||
| 	  w->left->red=0; | ||||
| 	  w->red=1; | ||||
| 	  RightRotate(w); | ||||
| 	  w=x->parent->right; | ||||
| 	} | ||||
| 	w->red=x->parent->red; | ||||
| 	x->parent->red=0; | ||||
| 	w->right->red=0; | ||||
| 	LeftRotate(x->parent); | ||||
| 	x=rootLeft; /* this is to exit while loop */ | ||||
|       } | ||||
| // | ||||
|  | ||||
|     } else { /* the code below is has left and right switched from above */ | ||||
|       w=x->parent->left; | ||||
|       if (w->red) { | ||||
| 	w->red=0; | ||||
| 	x->parent->red=1; | ||||
| 	RightRotate(x->parent); | ||||
| 	w=x->parent->left; | ||||
|       } | ||||
|       if ( (!w->right->red) && (!w->left->red) ) { | ||||
| 	w->red=1; | ||||
| 	x=x->parent; | ||||
|       } else { | ||||
| 	if (!w->left->red) { | ||||
| 	  w->right->red=0; | ||||
| 	  w->red=1; | ||||
| 	  LeftRotate(w); | ||||
| 	  w=x->parent->left; | ||||
| 	} | ||||
| 	w->red=x->parent->red; | ||||
| 	x->parent->red=0; | ||||
| 	w->left->red=0; | ||||
| 	RightRotate(x->parent); | ||||
| 	x=rootLeft; /* this is to exit while loop */ | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   x->red=0; | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  DeleteNode */ | ||||
| /**/ | ||||
| /*    INPUTS:  tree is the tree to delete node z from */ | ||||
| /**/ | ||||
| /*    OUTPUT:  returns the RedBlackEntry stored at deleted node */ | ||||
| /**/ | ||||
| /*    EFFECT:  Deletes z from tree and but don't call destructor */ | ||||
| /**/ | ||||
| /*    Modifies Input:  z */ | ||||
| /**/ | ||||
| /*    The algorithm from this function is from _Introduction_To_Algorithms_ */ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| RedBlackEntry * RedBlackTree::DeleteNode(RedBlackTreeNode * z){ | ||||
|   RedBlackTreeNode* y; | ||||
|   RedBlackTreeNode* x; | ||||
|   RedBlackEntry * returnValue = z->storedEntry; | ||||
|  | ||||
|   y= ((z->left == nil) || (z->right == nil)) ? z : GetSuccessorOf(z); | ||||
|   x= (y->left == nil) ? y->right : y->left; | ||||
|   if (root == (x->parent = y->parent)) { /* assignment of y->p to x->p is intentional */ | ||||
|     root->left=x; | ||||
|   } else { | ||||
|     if (y == y->parent->left) { | ||||
|       y->parent->left=x; | ||||
|     } else { | ||||
|       y->parent->right=x; | ||||
|     } | ||||
|   } | ||||
|   if (y != z) { /* y should not be nil in this case */ | ||||
|  | ||||
|     /* y is the node to splice out and x is its child */ | ||||
|  | ||||
|     y->left=z->left; | ||||
|     y->right=z->right; | ||||
|     y->parent=z->parent; | ||||
|     z->left->parent=z->right->parent=y; | ||||
|     if (z == z->parent->left) { | ||||
|       z->parent->left=y; | ||||
|     } else { | ||||
|       z->parent->right=y; | ||||
|     } | ||||
|     if (!(y->red)) { | ||||
|       y->red = z->red; | ||||
|       DeleteFixUp(x); | ||||
|     } else | ||||
|       y->red = z->red; | ||||
|     delete z; | ||||
|   } else { | ||||
|     if (!(y->red)) DeleteFixUp(x); | ||||
|     delete y; | ||||
|   } | ||||
|   return returnValue; | ||||
| } | ||||
|  | ||||
|  | ||||
| /***********************************************************************/ | ||||
| /*  FUNCTION:  Enumerate */ | ||||
| /**/ | ||||
| /*    INPUTS:  tree is the tree to look for keys between [low,high] */ | ||||
| /**/ | ||||
| /*    OUTPUT:  stack containing pointers to the nodes between [low,high] */ | ||||
| /**/ | ||||
| /*    Modifies Input: none */ | ||||
| /**/ | ||||
| /*    EFFECT:  Returns a stack containing pointers to nodes containing */ | ||||
| /*             keys which in [low,high]/ */ | ||||
| /**/ | ||||
| /***********************************************************************/ | ||||
|  | ||||
| TemplateStack<RedBlackTreeNode *> * RedBlackTree::Enumerate(int low, | ||||
| 							    int high)  { | ||||
|   TemplateStack<RedBlackTreeNode *> * enumResultStack = | ||||
|     new TemplateStack<RedBlackTreeNode *>(4); | ||||
|  | ||||
|   RedBlackTreeNode* x=root->left; | ||||
|   RedBlackTreeNode* lastBest=NULL; | ||||
|  | ||||
|   while(nil != x) { | ||||
|     if ( x->key > high ) { | ||||
|       x=x->left; | ||||
|     } else { | ||||
|       lastBest=x; | ||||
|       x=x->right; | ||||
|     } | ||||
|   } | ||||
|   while ( (lastBest) && (low <= lastBest->key) ) { | ||||
|     enumResultStack->Push(lastBest); | ||||
|     lastBest=GetPredecessorOf(lastBest); | ||||
|   } | ||||
|   return(enumResultStack); | ||||
| } | ||||
| @@ -3,14 +3,10 @@ | ||||
| from .nilmdb import NilmDB | ||||
| from .server import Server | ||||
| from .client import Client | ||||
| from .timer import Timer | ||||
|  | ||||
| import cmdline | ||||
|  | ||||
| import pyximport; pyximport.install() | ||||
| import layout | ||||
|  | ||||
| import serializer | ||||
| import timestamper | ||||
| import interval | ||||
| import du | ||||
|  | ||||
| import cmdline | ||||
|  | ||||
|   | ||||
							
								
								
									
										310
									
								
								nilmdb/bulkdata.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										310
									
								
								nilmdb/bulkdata.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,310 @@ | ||||
| # Fixed record size bulk data storage | ||||
|  | ||||
| from __future__ import absolute_import | ||||
| from __future__ import division | ||||
| import nilmdb | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import os | ||||
| import sys | ||||
| import cPickle as pickle | ||||
| import struct | ||||
| import fnmatch | ||||
| import mmap | ||||
|  | ||||
| # Up to 256 open file descriptors at any given time | ||||
| table_cache_size = 16 | ||||
| fd_cache_size = 16 | ||||
|  | ||||
| @nilmdb.utils.must_close() | ||||
| class BulkData(object): | ||||
|     def __init__(self, basepath): | ||||
|         self.basepath = basepath | ||||
|         self.root = os.path.join(self.basepath, "data") | ||||
|  | ||||
|         # Make root path | ||||
|         if not os.path.isdir(self.root): | ||||
|             os.mkdir(self.root) | ||||
|  | ||||
|     def close(self): | ||||
|         self.getnode.cache_remove_all() | ||||
|  | ||||
|     def _encode_filename(self, path): | ||||
|         # Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(), | ||||
|         # because we want to be able to represent all code points and the user | ||||
|         # will never be directly exposed to filenames.  We can then do path | ||||
|         # manipulations on the UTF-8 directly. | ||||
|         if isinstance(path, unicode): | ||||
|             return path.encode('utf-8') | ||||
|         return path | ||||
|  | ||||
|     def create(self, unicodepath, layout_name): | ||||
|         """ | ||||
|         unicodepath: path to the data (e.g. u'/newton/prep'). | ||||
|         Paths must contain at least two elements, e.g.: | ||||
|            /newton/prep | ||||
|            /newton/raw | ||||
|            /newton/upstairs/prep | ||||
|            /newton/upstairs/raw | ||||
|  | ||||
|         layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' | ||||
|         """ | ||||
|         path = self._encode_filename(unicodepath) | ||||
|  | ||||
|         if path[0] != '/': | ||||
|             raise ValueError("paths must start with /") | ||||
|         [ group, node ] = path.rsplit("/", 1) | ||||
|         if group == '': | ||||
|             raise ValueError("invalid path") | ||||
|  | ||||
|         # Get layout, and build format string for struct module | ||||
|         try: | ||||
|             layout = nilmdb.layout.get_named(layout_name) | ||||
|             struct_fmt = '<d'  # Little endian, double timestamp | ||||
|             struct_mapping = { | ||||
|                 "int8": 'b', | ||||
|                 "uint8": 'B', | ||||
|                 "int16": 'h', | ||||
|                 "uint16": 'H', | ||||
|                 "int32": 'i', | ||||
|                 "uint32": 'I', | ||||
|                 "int64": 'q', | ||||
|                 "uint64": 'Q', | ||||
|                 "float32": 'f', | ||||
|                 "float64": 'd', | ||||
|                 } | ||||
|             for n in range(layout.count): | ||||
|                 struct_fmt += struct_mapping[layout.datatype] | ||||
|         except KeyError: | ||||
|             raise ValueError("no such layout, or bad data types") | ||||
|  | ||||
|         # Create the table.  Note that we make a distinction here | ||||
|         # between NilmDB paths (always Unix style, split apart | ||||
|         # manually) and OS paths (built up with os.path.join) | ||||
|         try: | ||||
|             # Make directories leading up to this one | ||||
|             elements = path.lstrip('/').split('/') | ||||
|             for i in range(len(elements)): | ||||
|                 ospath = os.path.join(self.root, *elements[0:i]) | ||||
|                 if Table.exists(ospath): | ||||
|                     raise ValueError("path is subdir of existing node") | ||||
|                 if not os.path.isdir(ospath): | ||||
|                     os.mkdir(ospath) | ||||
|  | ||||
|             # Make the final dir | ||||
|             ospath = os.path.join(self.root, *elements) | ||||
|             if os.path.isdir(ospath): | ||||
|                 raise ValueError("subdirs of this path already exist") | ||||
|             os.mkdir(ospath) | ||||
|  | ||||
|             # Write format string to file | ||||
|             Table.create(ospath, struct_fmt) | ||||
|         except OSError as e: | ||||
|             raise ValueError("error creating table at that path: " + e.strerror) | ||||
|  | ||||
|         # Open and cache it | ||||
|         self.getnode(unicodepath) | ||||
|  | ||||
|         # Success | ||||
|         return | ||||
|  | ||||
|     def destroy(self, unicodepath): | ||||
|         """Fully remove all data at a particular path.  No way to undo | ||||
|         it!  The group/path structure is removed, too.""" | ||||
|         path = self._encode_filename(unicodepath) | ||||
|  | ||||
|         # Get OS path | ||||
|         elements = path.lstrip('/').split('/') | ||||
|         ospath = os.path.join(self.root, *elements) | ||||
|  | ||||
|         # Remove Table object from cache | ||||
|         self.getnode.cache_remove(self, ospath) | ||||
|  | ||||
|         # Remove the contents of the target directory | ||||
|         if not os.path.isfile(os.path.join(ospath, "format")): | ||||
|             raise ValueError("nothing at that path") | ||||
|         for file in os.listdir(ospath): | ||||
|             os.remove(os.path.join(ospath, file)) | ||||
|  | ||||
|         # Remove empty parent directories | ||||
|         for i in reversed(range(len(elements))): | ||||
|             ospath = os.path.join(self.root, *elements[0:i+1]) | ||||
|             try: | ||||
|                 os.rmdir(ospath) | ||||
|             except OSError: | ||||
|                 break | ||||
|  | ||||
|     # Cache open tables | ||||
|     @nilmdb.utils.lru_cache(size = table_cache_size, | ||||
|                             onremove = lambda x: x.close()) | ||||
|     def getnode(self, unicodepath): | ||||
|         """Return a Table object corresponding to the given database | ||||
|         path, which must exist.""" | ||||
|         path = self._encode_filename(unicodepath) | ||||
|         elements = path.lstrip('/').split('/') | ||||
|         ospath = os.path.join(self.root, *elements) | ||||
|         return Table(ospath) | ||||
|  | ||||
| @nilmdb.utils.must_close() | ||||
| class Table(object): | ||||
|     """Tools to help access a single table (data at a specific OS path)""" | ||||
|  | ||||
|     # Class methods, to help keep format details in this class. | ||||
|     @classmethod | ||||
|     def exists(cls, root): | ||||
|         """Return True if a table appears to exist at this OS path""" | ||||
|         return os.path.isfile(os.path.join(root, "format")) | ||||
|  | ||||
|     @classmethod | ||||
|     def create(cls, root, struct_fmt): | ||||
|         """Initialize a table at the given OS path. | ||||
|         'struct_fmt' is a Struct module format description""" | ||||
|         format = { "rows_per_file": 4 * 1024 * 1024, | ||||
|                    "struct_fmt": struct_fmt } | ||||
|         with open(os.path.join(root, "format"), "wb") as f: | ||||
|             pickle.dump(format, f, 2) | ||||
|  | ||||
|     # Normal methods | ||||
|     def __init__(self, root): | ||||
|         """'root' is the full OS path to the directory of this table""" | ||||
|         self.root = root | ||||
|  | ||||
|         # Load the format and build packer | ||||
|         with open(self._fullpath("format"), "rb") as f: | ||||
|             format = pickle.load(f) | ||||
|         self.rows_per_file = format["rows_per_file"] | ||||
|         self.packer = struct.Struct(format["struct_fmt"]) | ||||
|         self.file_size = self.packer.size * self.rows_per_file | ||||
|  | ||||
|         # Find nrows by locating the lexicographically last filename | ||||
|         # and using its size. | ||||
|         pattern = '[0-9a-f]' * 8 | ||||
|         allfiles = fnmatch.filter(os.listdir(self.root), pattern) | ||||
|         if allfiles: | ||||
|             filename = max(allfiles) | ||||
|             offset = os.path.getsize(self._fullpath(filename)) | ||||
|             self.nrows = self._row_from_fnoffset(filename, offset) | ||||
|         else: | ||||
|             self.nrows = 0 | ||||
|  | ||||
|     def close(self): | ||||
|         self.mmap_open.cache_remove_all() | ||||
|  | ||||
|     # Internal helpers | ||||
|     def _fullpath(self, filename): | ||||
|         return os.path.join(self.root, filename) | ||||
|  | ||||
|     def _fnoffset_from_row(self, row): | ||||
|         """Return a (filename, offset, count) tuple: | ||||
|  | ||||
|         filename: the filename that contains the specified row | ||||
|           offset: byte offset of the specified row within the file | ||||
|            count: number of rows (starting at offste) that fit in the file | ||||
|         """ | ||||
|         filenum = row // self.rows_per_file | ||||
|         filename = sprintf("%08x", filenum) | ||||
|         offset = (row % self.rows_per_file) * self.packer.size | ||||
|         count = self.rows_per_file - (row % self.rows_per_file) | ||||
|         return (filename, offset, count) | ||||
|  | ||||
|     def _row_from_fnoffset(self, filename, offset): | ||||
|         """Return the row number that corresponds to the given | ||||
|         filename and byte-offset within that file.""" | ||||
|         filenum = int(filename, 16) | ||||
|         if (offset % self.packer.size) != 0: | ||||
|             raise ValueError("file offset is not a multiple of data size") | ||||
|         row = (filenum * self.rows_per_file) + (offset // self.packer.size) | ||||
|         return row | ||||
|  | ||||
|     # Cache open files | ||||
|     @nilmdb.utils.lru_cache(size = fd_cache_size, | ||||
|                             onremove = lambda x: x.close()) | ||||
|     def mmap_open(self, file, newsize = None): | ||||
|         """Open and map a given filename (relative to self.root). | ||||
|         Will be automatically closed when evicted from the cache. | ||||
|  | ||||
|         If 'newsize' is provided, the file is truncated to the given | ||||
|         size before the mapping is returned.  (Note that the LRU cache | ||||
|         on this function means the truncate will only happen if the | ||||
|         object isn't already cached; mmap.resize should be used too)""" | ||||
|         f = open(os.path.join(self.root, file), "a+", 0) | ||||
|         if newsize is not None: | ||||
|             # mmap can't map a zero-length file, so this allows the | ||||
|             # caller to set the filesize between file creation and | ||||
|             # mmap. | ||||
|             f.truncate(newsize) | ||||
|         mm = mmap.mmap(f.fileno(), 0) | ||||
|         return mm | ||||
|  | ||||
|     def append(self, data): | ||||
|         """Append the data and flush it to disk. | ||||
|         data is a nested Python list [[row],[row],[...]]""" | ||||
|         remaining = len(data) | ||||
|         dataiter = iter(data) | ||||
|         while remaining: | ||||
|             # See how many rows we can fit into the current file, and open it | ||||
|             (filename, offset, count) = self._fnoffset_from_row(self.nrows) | ||||
|             if count > remaining: | ||||
|                 count = remaining | ||||
|             newsize = offset + count * self.packer.size | ||||
|             mm = self.mmap_open(filename, newsize) | ||||
|             mm.seek(offset) | ||||
|  | ||||
|             # Extend the file to the target length.  We specified | ||||
|             # newsize when opening, but that may have been ignored if | ||||
|             # the mmap_open returned a cached object. | ||||
|             mm.resize(newsize) | ||||
|  | ||||
|             # Write the data | ||||
|             for i in xrange(count): | ||||
|                 row = dataiter.next() | ||||
|                 mm.write(self.packer.pack(*row)) | ||||
|             remaining -= count | ||||
|             self.nrows += count | ||||
|  | ||||
|     def __getitem__(self, key): | ||||
|         """Extract data and return it.  Supports simple indexing | ||||
|         (table[n]) and range slices (table[n:m]).  Returns a nested | ||||
|         Python list [[row],[row],[...]]""" | ||||
|  | ||||
|         # Handle simple slices | ||||
|         if isinstance(key, slice): | ||||
|             # Fall back to brute force if the slice isn't simple | ||||
|             if ((key.step is not None and key.step != 1) or | ||||
|                 key.start is None or | ||||
|                 key.stop is None or | ||||
|                 key.start >= key.stop or | ||||
|                 key.start < 0 or | ||||
|                 key.stop > self.nrows): | ||||
|                 return [ self[x] for x in xrange(*key.indices(self.nrows)) ] | ||||
|  | ||||
|             ret = [] | ||||
|             row = key.start | ||||
|             remaining = key.stop - key.start | ||||
|             while remaining: | ||||
|                 (filename, offset, count) = self._fnoffset_from_row(row) | ||||
|                 if count > remaining: | ||||
|                     count = remaining | ||||
|                 mm = self.mmap_open(filename) | ||||
|                 for i in xrange(count): | ||||
|                     ret.append(list(self.packer.unpack_from(mm, offset))) | ||||
|                     offset += self.packer.size | ||||
|                 remaining -= count | ||||
|                 row += count | ||||
|             return ret | ||||
|  | ||||
|         # Handle single points | ||||
|         if key < 0 or key >= self.nrows: | ||||
|             raise IndexError("Index out of range") | ||||
|         (filename, offset, count) = self._fnoffset_from_row(key) | ||||
|         mm = self.mmap_open(filename) | ||||
|         # unpack_from ignores the mmap object's current seek position | ||||
|         return self.packer.unpack_from(mm, offset) | ||||
|  | ||||
| class TimestampOnlyTable(object): | ||||
|     """Helper that lets us pass a Tables object into bisect, by | ||||
|     returning only the timestamp when a particular row is requested.""" | ||||
|     def __init__(self, table): | ||||
|         self.table = table | ||||
|     def __getitem__(self, index): | ||||
|         return self.table[index][0] | ||||
| @@ -1,495 +0,0 @@ | ||||
| # cython: profile=False | ||||
| # This is from bx-python 554:07aca5a9f6fc (BSD licensed), modified to | ||||
| # store interval ranges as doubles rather than 32-bit integers. | ||||
|  | ||||
| """ | ||||
| Data structure for performing intersect queries on a set of intervals which | ||||
| preserves all information about the intervals (unlike bitset projection methods). | ||||
|  | ||||
| :Authors: James Taylor (james@jamestaylor.org), | ||||
|           Ian Schenk (ian.schenck@gmail.com), | ||||
|           Brent Pedersen (bpederse@gmail.com) | ||||
| """ | ||||
|  | ||||
| # Historical note: | ||||
| #    This module original contained an implementation based on sorted endpoints | ||||
| #    and a binary search, using an idea from Scott Schwartz and Piotr Berman. | ||||
| #    Later an interval tree implementation was implemented by Ian for Galaxy's | ||||
| #    join tool (see `bx.intervals.operations.quicksect.py`). This was then | ||||
| #    converted to Cython by Brent, who also added support for | ||||
| #    upstream/downstream/neighbor queries. This was modified by James to | ||||
| #    handle half-open intervals strictly, to maintain sort order, and to | ||||
| #    implement the same interface as the original Intersecter. | ||||
|  | ||||
| #cython: cdivision=True | ||||
|  | ||||
| import operator | ||||
|  | ||||
| cdef extern from "stdlib.h": | ||||
|     int ceil(float f) | ||||
|     float log(float f) | ||||
|     int RAND_MAX | ||||
|     int rand() | ||||
|     int strlen(char *) | ||||
|     int iabs(int) | ||||
|  | ||||
| cdef inline double dmax2(double a, double b): | ||||
|     if b > a: return b | ||||
|     return a | ||||
|  | ||||
| cdef inline double dmax3(double a, double b, double c): | ||||
|     if b > a: | ||||
|         if c > b: | ||||
|             return c | ||||
|         return b | ||||
|     if a > c: | ||||
|         return a | ||||
|     return c | ||||
|  | ||||
| cdef inline double dmin3(double a, double b, double c): | ||||
|     if b < a: | ||||
|         if c < b: | ||||
|             return c | ||||
|         return b | ||||
|     if a < c: | ||||
|         return a | ||||
|     return c | ||||
|  | ||||
| cdef inline double dmin2(double a, double b): | ||||
|     if b < a: return b | ||||
|     return a | ||||
|  | ||||
| cdef float nlog = -1.0 / log(0.5) | ||||
|  | ||||
| cdef class IntervalNode: | ||||
|     """ | ||||
|     A single node of an `IntervalTree`. | ||||
|  | ||||
|     NOTE: Unless you really know what you are doing, you probably should us | ||||
|           `IntervalTree` rather than using this directly. | ||||
|     """ | ||||
|     cdef float priority | ||||
|     cdef public object interval | ||||
|     cdef public double start, end | ||||
|     cdef double minend, maxend, minstart | ||||
|     cdef IntervalNode cleft, cright, croot | ||||
|  | ||||
|     property left_node: | ||||
|         def __get__(self): | ||||
|             return self.cleft if self.cleft is not EmptyNode else None | ||||
|     property right_node: | ||||
|         def __get__(self): | ||||
|             return self.cright if self.cright is not EmptyNode else None | ||||
|     property root_node: | ||||
|         def __get__(self): | ||||
|             return self.croot if self.croot is not EmptyNode else None | ||||
|  | ||||
|     def __repr__(self): | ||||
|         return "IntervalNode(%g, %g)" % (self.start, self.end) | ||||
|  | ||||
|     def __cinit__(IntervalNode self, double start, double end, object interval): | ||||
|         # Python lacks the binomial distribution, so we convert a | ||||
|         # uniform into a binomial because it naturally scales with | ||||
|         # tree size.  Also, python's uniform is perfect since the | ||||
|         # upper limit is not inclusive, which gives us undefined here. | ||||
|         self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1))) | ||||
|         self.start    = start | ||||
|         self.end      = end | ||||
|         self.interval = interval | ||||
|         self.maxend   = end | ||||
|         self.minstart = start | ||||
|         self.minend   = end | ||||
|         self.cleft    = EmptyNode | ||||
|         self.cright   = EmptyNode | ||||
|         self.croot    = EmptyNode | ||||
|  | ||||
|     cpdef IntervalNode insert(IntervalNode self, double start, double end, object interval): | ||||
|         """ | ||||
|         Insert a new IntervalNode into the tree of which this node is | ||||
|         currently the root. The return value is the new root of the tree (which | ||||
|         may or may not be this node!) | ||||
|         """ | ||||
|         cdef IntervalNode croot = self | ||||
|         # If starts are the same, decide which to add interval to based on | ||||
|         # end, thus maintaining sortedness relative to start/end | ||||
|         cdef double decision_endpoint = start | ||||
|         if start == self.start: | ||||
|             decision_endpoint = end | ||||
|  | ||||
|         if decision_endpoint > self.start: | ||||
|             # insert to cright tree | ||||
|             if self.cright is not EmptyNode: | ||||
|                 self.cright = self.cright.insert( start, end, interval ) | ||||
|             else: | ||||
|                 self.cright = IntervalNode( start, end, interval ) | ||||
|             # rebalance tree | ||||
|             if self.priority < self.cright.priority: | ||||
|                 croot = self.rotate_left() | ||||
|         else: | ||||
|             # insert to cleft tree | ||||
|             if self.cleft is not EmptyNode: | ||||
|                 self.cleft = self.cleft.insert( start, end, interval) | ||||
|             else: | ||||
|                 self.cleft = IntervalNode( start, end, interval) | ||||
|             # rebalance tree | ||||
|             if self.priority < self.cleft.priority: | ||||
|                 croot = self.rotate_right() | ||||
|  | ||||
|         croot.set_ends() | ||||
|         self.cleft.croot  = croot | ||||
|         self.cright.croot = croot | ||||
|         return croot | ||||
|  | ||||
|     cdef IntervalNode rotate_right(IntervalNode self): | ||||
|         cdef IntervalNode croot = self.cleft | ||||
|         self.cleft  = self.cleft.cright | ||||
|         croot.cright = self | ||||
|         self.set_ends() | ||||
|         return croot | ||||
|  | ||||
|     cdef IntervalNode rotate_left(IntervalNode self): | ||||
|         cdef IntervalNode croot = self.cright | ||||
|         self.cright = self.cright.cleft | ||||
|         croot.cleft  = self | ||||
|         self.set_ends() | ||||
|         return croot | ||||
|  | ||||
|     cdef inline void set_ends(IntervalNode self): | ||||
|         if self.cright is not EmptyNode and self.cleft is not EmptyNode: | ||||
|             self.maxend = dmax3(self.end, self.cright.maxend, self.cleft.maxend) | ||||
|             self.minend = dmin3(self.end, self.cright.minend, self.cleft.minend) | ||||
|             self.minstart = dmin3(self.start, self.cright.minstart, self.cleft.minstart) | ||||
|         elif self.cright is not EmptyNode: | ||||
|             self.maxend = dmax2(self.end, self.cright.maxend) | ||||
|             self.minend = dmin2(self.end, self.cright.minend) | ||||
|             self.minstart = dmin2(self.start, self.cright.minstart) | ||||
|         elif self.cleft is not EmptyNode: | ||||
|             self.maxend = dmax2(self.end, self.cleft.maxend) | ||||
|             self.minend = dmin2(self.end, self.cleft.minend) | ||||
|             self.minstart = dmin2(self.start, self.cleft.minstart) | ||||
|  | ||||
|  | ||||
|     def intersect( self, double start, double end, sort=True ): | ||||
|         """ | ||||
|         given a start and a end, return a list of features | ||||
|         falling within that range | ||||
|         """ | ||||
|         cdef list results = [] | ||||
|         self._intersect( start, end, results ) | ||||
|         if sort: | ||||
|             results = sorted(results) | ||||
|         return results | ||||
|  | ||||
|     find = intersect | ||||
|  | ||||
|     cdef void _intersect( IntervalNode self, double start, double end, list results): | ||||
|         # Left subtree | ||||
|         if self.cleft is not EmptyNode and self.cleft.maxend > start: | ||||
|             self.cleft._intersect( start, end, results ) | ||||
|         # This interval | ||||
|         if ( self.end > start ) and ( self.start < end ): | ||||
|             results.append( self.interval ) | ||||
|         # Right subtree | ||||
|         if self.cright is not EmptyNode and self.start < end: | ||||
|             self.cright._intersect( start, end, results ) | ||||
|  | ||||
|  | ||||
|     cdef void _seek_left(IntervalNode self, double position, list results, int n, double max_dist): | ||||
|         # we know we can bail in these 2 cases. | ||||
|         if self.maxend + max_dist < position: | ||||
|             return | ||||
|         if self.minstart > position: | ||||
|             return | ||||
|  | ||||
|         # the ordering of these 3 blocks makes it so the results are | ||||
|         # ordered nearest to farest from the query position | ||||
|         if self.cright is not EmptyNode: | ||||
|             self.cright._seek_left(position, results, n, max_dist) | ||||
|  | ||||
|         if -1 < position - self.end < max_dist: | ||||
|             results.append(self.interval) | ||||
|  | ||||
|         # TODO: can these conditionals be more stringent? | ||||
|         if self.cleft is not EmptyNode: | ||||
|                 self.cleft._seek_left(position, results, n, max_dist) | ||||
|  | ||||
|  | ||||
|  | ||||
|     cdef void _seek_right(IntervalNode self, double position, list results, int n, double max_dist): | ||||
|         # we know we can bail in these 2 cases. | ||||
|         if self.maxend < position: return | ||||
|         if self.minstart - max_dist > position: return | ||||
|  | ||||
|         #print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position | ||||
|  | ||||
|         # the ordering of these 3 blocks makes it so the results are | ||||
|         # ordered nearest to farest from the query position | ||||
|         if self.cleft is not EmptyNode: | ||||
|                 self.cleft._seek_right(position, results, n, max_dist) | ||||
|  | ||||
|         if -1 < self.start - position < max_dist: | ||||
|             results.append(self.interval) | ||||
|  | ||||
|         if self.cright is not EmptyNode: | ||||
|                 self.cright._seek_right(position, results, n, max_dist) | ||||
|  | ||||
|  | ||||
|     cpdef left(self, position, int n=1, double max_dist=2500): | ||||
|         """ | ||||
|         find n features with a start > than `position` | ||||
|         f: a Interval object (or anything with an `end` attribute) | ||||
|         n: the number of features to return | ||||
|         max_dist: the maximum distance to look before giving up. | ||||
|         """ | ||||
|         cdef list results = [] | ||||
|         # use start - 1 becuase .left() assumes strictly left-of | ||||
|         self._seek_left( position - 1, results, n, max_dist ) | ||||
|         if len(results) == n: return results | ||||
|         r = results | ||||
|         r.sort(key=operator.attrgetter('end'), reverse=True) | ||||
|         return r[:n] | ||||
|  | ||||
|     cpdef right(self, position, int n=1, double max_dist=2500): | ||||
|         """ | ||||
|         find n features with a end < than position | ||||
|         f: a Interval object (or anything with a `start` attribute) | ||||
|         n: the number of features to return | ||||
|         max_dist: the maximum distance to look before giving up. | ||||
|         """ | ||||
|         cdef list results = [] | ||||
|         # use end + 1 becuase .right() assumes strictly right-of | ||||
|         self._seek_right(position + 1, results, n, max_dist) | ||||
|         if len(results) == n: return results | ||||
|         r = results | ||||
|         r.sort(key=operator.attrgetter('start')) | ||||
|         return r[:n] | ||||
|  | ||||
|     def traverse(self): | ||||
|         if self.cleft is not EmptyNode: | ||||
|             for node in self.cleft.traverse(): | ||||
|                 yield node | ||||
|         yield self.interval | ||||
|         if self.cright is not EmptyNode: | ||||
|             for node in self.cright.traverse(): | ||||
|                 yield node | ||||
|  | ||||
| cdef IntervalNode EmptyNode = IntervalNode( 0, 0, Interval(0, 0)) | ||||
|  | ||||
| ## ---- Wrappers that retain the old interface ------------------------------- | ||||
|  | ||||
| cdef class Interval: | ||||
|     """ | ||||
|     Basic feature, with required integer start and end properties. | ||||
|     Also accepts optional strand as +1 or -1 (used for up/downstream queries), | ||||
|     a name, and any arbitrary data is sent in on the info keyword argument | ||||
|  | ||||
|     >>> from bx.intervals.intersection import Interval | ||||
|  | ||||
|     >>> f1 = Interval(23, 36) | ||||
|     >>> f2 = Interval(34, 48, value={'chr':12, 'anno':'transposon'}) | ||||
|     >>> f2 | ||||
|     Interval(34, 48, value={'anno': 'transposon', 'chr': 12}) | ||||
|  | ||||
|     """ | ||||
|     cdef public double start, end | ||||
|     cdef public object value, chrom, strand | ||||
|  | ||||
|     def __init__(self, double start, double end, object value=None, object chrom=None, object strand=None ): | ||||
|         assert start <= end, "start must be less than end" | ||||
|         self.start  = start | ||||
|         self.end   = end | ||||
|         self.value = value | ||||
|         self.chrom = chrom | ||||
|         self.strand = strand | ||||
|  | ||||
|     def __repr__(self): | ||||
|         fstr = "Interval(%g, %g" % (self.start, self.end) | ||||
|         if not self.value is None: | ||||
|             fstr += ", value=" + str(self.value) | ||||
|         fstr += ")" | ||||
|         return fstr | ||||
|  | ||||
|     def __richcmp__(self, other, op): | ||||
|         if op == 0: | ||||
|             # < | ||||
|             return self.start < other.start or self.end < other.end | ||||
|         elif op == 1: | ||||
|             # <= | ||||
|             return self == other or self < other | ||||
|         elif op == 2: | ||||
|             # == | ||||
|             return self.start == other.start and self.end == other.end | ||||
|         elif op == 3: | ||||
|             # != | ||||
|             return self.start != other.start or self.end != other.end | ||||
|         elif op == 4: | ||||
|             # > | ||||
|             return self.start > other.start or self.end > other.end | ||||
|         elif op == 5: | ||||
|             # >= | ||||
|             return self == other or self > other | ||||
|  | ||||
| cdef class IntervalTree: | ||||
|     """ | ||||
|     Data structure for performing window intersect queries on a set of | ||||
|     of possibly overlapping 1d intervals. | ||||
|  | ||||
|     Usage | ||||
|     ===== | ||||
|  | ||||
|     Create an empty IntervalTree | ||||
|  | ||||
|     >>> from bx.intervals.intersection import Interval, IntervalTree | ||||
|     >>> intersecter = IntervalTree() | ||||
|  | ||||
|     An interval is a start and end position and a value (possibly None). | ||||
|     You can add any object as an interval: | ||||
|  | ||||
|     >>> intersecter.insert( 0, 10, "food" ) | ||||
|     >>> intersecter.insert( 3, 7, dict(foo='bar') ) | ||||
|  | ||||
|     >>> intersecter.find( 2, 5 ) | ||||
|     ['food', {'foo': 'bar'}] | ||||
|  | ||||
|     If the object has start and end attributes (like the Interval class) there | ||||
|     is are some shortcuts: | ||||
|  | ||||
|     >>> intersecter = IntervalTree() | ||||
|     >>> intersecter.insert_interval( Interval( 0, 10 ) ) | ||||
|     >>> intersecter.insert_interval( Interval( 3, 7 ) ) | ||||
|     >>> intersecter.insert_interval( Interval( 3, 40 ) ) | ||||
|     >>> intersecter.insert_interval( Interval( 13, 50 ) ) | ||||
|  | ||||
|     >>> intersecter.find( 30, 50 ) | ||||
|     [Interval(3, 40), Interval(13, 50)] | ||||
|     >>> intersecter.find( 100, 200 ) | ||||
|     [] | ||||
|  | ||||
|     Before/after for intervals | ||||
|  | ||||
|     >>> intersecter.before_interval( Interval( 10, 20 ) ) | ||||
|     [Interval(3, 7)] | ||||
|     >>> intersecter.before_interval( Interval( 5, 20 ) ) | ||||
|     [] | ||||
|  | ||||
|     Upstream/downstream | ||||
|  | ||||
|     >>> intersecter.upstream_of_interval(Interval(11, 12)) | ||||
|     [Interval(0, 10)] | ||||
|     >>> intersecter.upstream_of_interval(Interval(11, 12, strand="-")) | ||||
|     [Interval(13, 50)] | ||||
|  | ||||
|     >>> intersecter.upstream_of_interval(Interval(1, 2, strand="-"), num_intervals=3) | ||||
|     [Interval(3, 7), Interval(3, 40), Interval(13, 50)] | ||||
|  | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     cdef IntervalNode root | ||||
|  | ||||
|     def __cinit__( self ): | ||||
|         root = None | ||||
|  | ||||
|     # ---- Position based interfaces ----------------------------------------- | ||||
|  | ||||
|     def insert( self, double start, double end, object value=None ): | ||||
|         """ | ||||
|         Insert the interval [start,end) associated with value `value`. | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             self.root = IntervalNode( start, end, value ) | ||||
|         else: | ||||
|             self.root = self.root.insert( start, end, value ) | ||||
|  | ||||
|     add = insert | ||||
|  | ||||
|  | ||||
|     def find( self, start, end ): | ||||
|         """ | ||||
|         Return a sorted list of all intervals overlapping [start,end). | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         return self.root.find( start, end ) | ||||
|  | ||||
|     def before( self, position, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie before `position` and are no | ||||
|         further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         return self.root.left( position, num_intervals, max_dist ) | ||||
|  | ||||
|     def after( self, position, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie after `position` and are no | ||||
|         further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         return self.root.right( position, num_intervals, max_dist ) | ||||
|  | ||||
|     # ---- Interval-like object based interfaces ----------------------------- | ||||
|  | ||||
|     def insert_interval( self, interval ): | ||||
|         """ | ||||
|         Insert an "interval" like object (one with at least start and end | ||||
|         attributes) | ||||
|         """ | ||||
|         self.insert( interval.start, interval.end, interval ) | ||||
|  | ||||
|     add_interval = insert_interval | ||||
|  | ||||
|     def before_interval( self, interval, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie completely before `interval` | ||||
|         and are no further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         return self.root.left( interval.start, num_intervals, max_dist ) | ||||
|  | ||||
|     def after_interval( self, interval, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie completely after `interval` and | ||||
|         are no further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         return self.root.right( interval.end, num_intervals, max_dist ) | ||||
|  | ||||
|     def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie completely upstream of | ||||
|         `interval` and are no further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         if interval.strand == -1 or interval.strand == "-": | ||||
|             return self.root.right( interval.end, num_intervals, max_dist ) | ||||
|         else: | ||||
|             return self.root.left( interval.start, num_intervals, max_dist ) | ||||
|  | ||||
|     def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): | ||||
|         """ | ||||
|         Find `num_intervals` intervals that lie completely downstream of | ||||
|         `interval` and are no further than `max_dist` positions away | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return [] | ||||
|         if interval.strand == -1 or interval.strand == "-": | ||||
|             return self.root.left( interval.start, num_intervals, max_dist ) | ||||
|         else: | ||||
|             return self.root.right( interval.end, num_intervals, max_dist ) | ||||
|  | ||||
|     def traverse(self): | ||||
|         """ | ||||
|         iterator that traverses the tree | ||||
|         """ | ||||
|         if self.root is None: | ||||
|             return iter([]) | ||||
|         return self.root.traverse() | ||||
|  | ||||
| # For backward compatibility | ||||
| Intersecter = IntervalTree | ||||
| @@ -1,13 +1,16 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| """Class for performing HTTP client requests via libcurl""" | ||||
|  | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import time | ||||
| import sys | ||||
| import re | ||||
| import os | ||||
| import simplejson as json | ||||
| import itertools | ||||
|  | ||||
| import nilmdb.httpclient | ||||
|  | ||||
| @@ -16,6 +19,10 @@ from nilmdb.httpclient import ClientError, ServerError, Error | ||||
|  | ||||
| version = "1.0" | ||||
|  | ||||
| def float_to_string(f): | ||||
|     # Use repr to maintain full precision in the string output. | ||||
|     return repr(float(f)) | ||||
|  | ||||
| class Client(object): | ||||
|     """Main client interface to the Nilm database.""" | ||||
|  | ||||
| @@ -89,33 +96,77 @@ class Client(object): | ||||
|         params = { "path": path } | ||||
|         return self.http.get("stream/destroy", params) | ||||
|  | ||||
|     def stream_insert(self, path, data): | ||||
|     def stream_insert(self, path, data, start = None, end = None): | ||||
|         """Insert data into a stream.  data should be a file-like object | ||||
|         that provides ASCII data that matches the database layout for path.""" | ||||
|         that provides ASCII data that matches the database layout for path. | ||||
|  | ||||
|         start and end are the starting and ending timestamp of this | ||||
|         stream; all timestamps t in the data must satisfy 'start <= t | ||||
|         < end'.  If left unspecified, 'start' is the timestamp of the | ||||
|         first line of data, and 'end' is the timestamp on the last line | ||||
|         of data, plus a small delta of 1μs. | ||||
|         """ | ||||
|         params = { "path": path } | ||||
|  | ||||
|         # See design.md for a discussion of how much data to send. | ||||
|         # These are soft limits -- actual data might be rounded up. | ||||
|         max_data = 1048576 | ||||
|         max_time = 30 | ||||
|         end_epsilon = 1e-6 | ||||
|  | ||||
|         def pairwise(iterable): | ||||
|             "s -> (s0,s1), (s1,s2), ..., (sn,None)" | ||||
|             a, b = itertools.tee(iterable) | ||||
|             next(b, None) | ||||
|             return itertools.izip_longest(a, b) | ||||
|  | ||||
|         def extract_timestamp(line): | ||||
|             return float(line.split()[0]) | ||||
|  | ||||
|         def sendit(): | ||||
|             result = self.http.put("stream/insert", send_data, params) | ||||
|             params["old_timestamp"] = result[1] | ||||
|             return result | ||||
|             # If we have more data after this, use the timestamp of | ||||
|             # the next line as the end.  Otherwise, use the given | ||||
|             # overall end time, or add end_epsilon to the last data | ||||
|             # point. | ||||
|             if nextline: | ||||
|                 block_end = extract_timestamp(nextline) | ||||
|                 if end and block_end > end: | ||||
|                     # This is unexpected, but we'll defer to the server | ||||
|                     # to return an error in this case. | ||||
|                     block_end = end | ||||
|             elif end: | ||||
|                 block_end = end | ||||
|             else: | ||||
|                 block_end = extract_timestamp(line) + end_epsilon | ||||
|  | ||||
|             # Send it | ||||
|             params["start"] = float_to_string(block_start) | ||||
|             params["end"] = float_to_string(block_end) | ||||
|             return self.http.put("stream/insert", block_data, params) | ||||
|  | ||||
|         clock_start = time.time() | ||||
|         block_data = "" | ||||
|         block_start = start | ||||
|         result = None | ||||
|         start = time.time() | ||||
|         send_data = "" | ||||
|         for line in data: | ||||
|             elapsed = time.time() - start | ||||
|             send_data += line | ||||
|         for (line, nextline) in pairwise(data): | ||||
|             # If we don't have a starting time, extract it from the first line | ||||
|             if block_start is None: | ||||
|                 block_start = extract_timestamp(line) | ||||
|  | ||||
|             if (len(send_data) > max_data) or (elapsed > max_time): | ||||
|             clock_elapsed = time.time() - clock_start | ||||
|             block_data += line | ||||
|  | ||||
|             # If we have enough data, or enough time has elapsed, | ||||
|             # send this block to the server, and empty things out | ||||
|             # for the next block. | ||||
|             if (len(block_data) > max_data) or (clock_elapsed > max_time): | ||||
|                 result = sendit() | ||||
|                 send_data = "" | ||||
|                 start = time.time() | ||||
|         if len(send_data): | ||||
|                 block_start = None | ||||
|                 block_data = "" | ||||
|                 clock_start = time.time() | ||||
|  | ||||
|         # One last block? | ||||
|         if len(block_data): | ||||
|             result = sendit() | ||||
|  | ||||
|         # Return the most recent JSON result we got back, or None if | ||||
| @@ -130,9 +181,9 @@ class Client(object): | ||||
|             "path": path | ||||
|         } | ||||
|         if start is not None: | ||||
|             params["start"] = repr(start)  # use repr to keep precision | ||||
|             params["start"] = float_to_string(start) | ||||
|         if end is not None: | ||||
|             params["end"] = repr(end) | ||||
|             params["end"] = float_to_string(end) | ||||
|         return self.http.get_gen("stream/intervals", params, retjson = True) | ||||
|  | ||||
|     def stream_extract(self, path, start = None, end = None, count = False): | ||||
| @@ -148,9 +199,9 @@ class Client(object): | ||||
|             "path": path, | ||||
|         } | ||||
|         if start is not None: | ||||
|             params["start"] = repr(start)  # use repr to keep precision | ||||
|             params["start"] = float_to_string(start) | ||||
|         if end is not None: | ||||
|             params["end"] = repr(end) | ||||
|             params["end"] = float_to_string(end) | ||||
|         if count: | ||||
|             params["count"] = 1 | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| """Command line client functionality""" | ||||
|  | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
|  | ||||
| import datetime_tz | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
|  | ||||
| from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
|  | ||||
| from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from __future__ import print_function | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
| import nilmdb.layout | ||||
| import sys | ||||
|  | ||||
| def setup(self, sub): | ||||
| @@ -51,7 +51,7 @@ def cmd_extract(self): | ||||
|             # Strip timestamp (first element).  Doesn't make sense | ||||
|             # if we are only returning a count. | ||||
|             dataline = ' '.join(dataline.split(' ')[1:]) | ||||
|         print dataline | ||||
|         print(dataline) | ||||
|         printed = True | ||||
|     if not printed: | ||||
|         if self.args.annotate: | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| from argparse import ArgumentDefaultsHelpFormatter as def_form | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,6 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
| import nilmdb.layout | ||||
| import nilmdb.timestamper | ||||
|  | ||||
| import sys | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
|  | ||||
| import fnmatch | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.client | ||||
|  | ||||
| def setup(self, sub): | ||||
|   | ||||
| @@ -1,7 +1,8 @@ | ||||
| """HTTP client library""" | ||||
|  | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.utils | ||||
|  | ||||
| import time | ||||
| import sys | ||||
| @@ -9,12 +10,9 @@ import re | ||||
| import os | ||||
| import simplejson as json | ||||
| import urlparse | ||||
| import urllib | ||||
| import pycurl | ||||
| import cStringIO | ||||
|  | ||||
| import nilmdb.iteratorizer | ||||
|  | ||||
| class Error(Exception): | ||||
|     """Base exception for both ClientError and ServerError responses""" | ||||
|     def __init__(self, | ||||
| @@ -60,7 +58,8 @@ class HTTPClient(object): | ||||
|     def _setup_url(self, url = "", params = ""): | ||||
|         url = urlparse.urljoin(self.baseurl, url) | ||||
|         if params: | ||||
|             url = urlparse.urljoin(url, "?" + urllib.urlencode(params, True)) | ||||
|             url = urlparse.urljoin( | ||||
|                 url, "?" + nilmdb.utils.urllib.urlencode(params, True)) | ||||
|         self.curl.setopt(pycurl.URL, url) | ||||
|         self.url = url | ||||
|  | ||||
| @@ -85,6 +84,10 @@ class HTTPClient(object): | ||||
|             raise ClientError(**args) | ||||
|         else: # pragma: no cover | ||||
|             if code >= 500 and code <= 599: | ||||
|                 if args["message"] is None: | ||||
|                     args["message"] = ("(no message; try disabling " + | ||||
|                                        "response.stream option in " + | ||||
|                                        "nilmdb.server for better debugging)") | ||||
|                 raise ServerError(**args) | ||||
|             else: | ||||
|                 raise Error(**args) | ||||
| @@ -109,7 +112,7 @@ class HTTPClient(object): | ||||
|             self.curl.setopt(pycurl.WRITEFUNCTION, callback) | ||||
|             self.curl.perform() | ||||
|         try: | ||||
|             for i in nilmdb.iteratorizer.Iteratorizer(func): | ||||
|             for i in nilmdb.utils.Iteratorizer(func): | ||||
|                 if self._status == 200: | ||||
|                     # If we had a 200 response, yield the data to the caller. | ||||
|                     yield i | ||||
|   | ||||
| @@ -177,8 +177,8 @@ cdef class IntervalSet: | ||||
|             else: | ||||
|                 return False | ||||
|  | ||||
|         this = [ x for x in self ] | ||||
|         that = [ x for x in other ] | ||||
|         this = list(self) | ||||
|         that = list(other) | ||||
|  | ||||
|         try: | ||||
|             while True: | ||||
| @@ -236,6 +236,12 @@ cdef class IntervalSet: | ||||
|                 self.__iadd__(x) | ||||
|         return self | ||||
|  | ||||
|     def iadd_nocheck(self, Interval other not None): | ||||
|         """Inplace add -- modifies self. | ||||
|         'Optimized' version that doesn't check for intersection and | ||||
|         only inserts the new interval into the tree.""" | ||||
|         self.tree.insert(rbtree.RBNode(other.start, other.end, other)) | ||||
|  | ||||
|     def __isub__(self, Interval other not None): | ||||
|         """Inplace subtract -- modifies self | ||||
|  | ||||
| @@ -300,3 +306,13 @@ cdef class IntervalSet: | ||||
|             if n.obj.intersects(other): | ||||
|                 return True | ||||
|         return False | ||||
|  | ||||
|     def find_end(self, double t): | ||||
|         """ | ||||
|         Return an Interval from this tree that ends at time t, or | ||||
|         None if it doesn't exist. | ||||
|         """ | ||||
|         n = self.tree.find_left_end(t) | ||||
|         if n and n.obj.end == t: | ||||
|             return n.obj | ||||
|         return None | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| # cython: profile=False | ||||
|  | ||||
| import tables | ||||
| import time | ||||
| import sys | ||||
| import inspect | ||||
| @@ -122,15 +121,6 @@ class Layout: | ||||
|             s += " %d" % d[i+1] | ||||
|         return s + "\n" | ||||
|  | ||||
|     # PyTables description | ||||
|     def description(self): | ||||
|         """Return the PyTables description of this layout""" | ||||
|         desc = {} | ||||
|         desc['timestamp'] = tables.Col.from_type('float64', pos=0) | ||||
|         for n in range(self.count): | ||||
|             desc['c' + str(n+1)] = tables.Col.from_type(self.datatype, pos=n+1) | ||||
|         return tables.Description(desc) | ||||
|  | ||||
| # Get a layout by name | ||||
| def get_named(typestring): | ||||
|     try: | ||||
|   | ||||
							
								
								
									
										192
									
								
								nilmdb/nilmdb.py
									
									
									
									
									
								
							
							
						
						
									
										192
									
								
								nilmdb/nilmdb.py
									
									
									
									
									
								
							| @@ -4,17 +4,16 @@ | ||||
|  | ||||
| Object that represents a NILM database file. | ||||
|  | ||||
| Manages both the SQL database and the PyTables storage backend. | ||||
| Manages both the SQL database and the table storage backend. | ||||
| """ | ||||
|  | ||||
| # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py, | ||||
| # but will pull the nilmdb module instead. | ||||
| from __future__ import absolute_import | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import sqlite3 | ||||
| import tables | ||||
| import time | ||||
| import sys | ||||
| import os | ||||
| @@ -25,6 +24,8 @@ import pyximport | ||||
| pyximport.install() | ||||
| from nilmdb.interval import Interval, DBInterval, IntervalSet, IntervalError | ||||
|  | ||||
| from . import bulkdata | ||||
|  | ||||
| # Note about performance and transactions: | ||||
| # | ||||
| # Committing a transaction in the default sync mode (PRAGMA synchronous=FULL) | ||||
| @@ -87,19 +88,13 @@ class StreamError(NilmDBError): | ||||
| class OverlapError(NilmDBError): | ||||
|     pass | ||||
|  | ||||
| # Helper that lets us pass a Pytables table into bisect | ||||
| class BisectableTable(object): | ||||
|     def __init__(self, table): | ||||
|         self.table = table | ||||
|     def __getitem__(self, index): | ||||
|         return self.table[index][0] | ||||
|  | ||||
| @nilmdb.utils.must_close() | ||||
| class NilmDB(object): | ||||
|     verbose = 0 | ||||
|  | ||||
|     def __init__(self, basepath, sync=True, max_results=None): | ||||
|         # set up path | ||||
|         self.basepath = os.path.abspath(basepath.rstrip('/')) | ||||
|         self.basepath = os.path.abspath(basepath) | ||||
|  | ||||
|         # Create the database path if it doesn't exist | ||||
|         try: | ||||
| @@ -108,16 +103,16 @@ class NilmDB(object): | ||||
|             if e.errno != errno.EEXIST: | ||||
|                 raise IOError("can't create tree " + self.basepath) | ||||
|  | ||||
|         # Our HD5 file goes inside it | ||||
|         h5filename = os.path.abspath(self.basepath + "/data.h5") | ||||
|         self.h5file = tables.openFile(h5filename, "a", "NILM Database") | ||||
|         # Our data goes inside it | ||||
|         self.data = bulkdata.BulkData(self.basepath) | ||||
|  | ||||
|         # SQLite database too | ||||
|         sqlfilename = os.path.abspath(self.basepath + "/data.sql") | ||||
|         sqlfilename = os.path.join(self.basepath, "data.sql") | ||||
|         # We use check_same_thread = False, assuming that the rest | ||||
|         # of the code (e.g. Server) will be smart and not access this | ||||
|         # database from multiple threads simultaneously.  That requirement | ||||
|         # may be relaxed later. | ||||
|         # database from multiple threads simultaneously.  Otherwise | ||||
|         # false positives will occur when the database is only opened | ||||
|         # in one thread, and only accessed in another. | ||||
|         self.con = sqlite3.connect(sqlfilename, check_same_thread = False) | ||||
|         self._sql_schema_update() | ||||
|  | ||||
| @@ -134,17 +129,6 @@ class NilmDB(object): | ||||
|         else: | ||||
|             self.max_results = 16384 | ||||
|  | ||||
|         self.opened = True | ||||
|  | ||||
|         # Cached intervals | ||||
|         self._cached_iset = {} | ||||
|  | ||||
|     def __del__(self): | ||||
|         if "opened" in self.__dict__: # pragma: no cover | ||||
|             fprintf(sys.stderr, | ||||
|                     "error: NilmDB.close() wasn't called, path %s", | ||||
|                     self.basepath) | ||||
|  | ||||
|     def get_basepath(self): | ||||
|         return self.basepath | ||||
|  | ||||
| @@ -152,8 +136,7 @@ class NilmDB(object): | ||||
|         if self.con: | ||||
|             self.con.commit() | ||||
|             self.con.close() | ||||
|         self.h5file.close() | ||||
|         del self.opened | ||||
|         self.data.close() | ||||
|  | ||||
|     def _sql_schema_update(self): | ||||
|         cur = self.con.cursor() | ||||
| @@ -170,12 +153,11 @@ class NilmDB(object): | ||||
|             with self.con: | ||||
|                 cur.execute("PRAGMA user_version = {v:d}".format(v=version)) | ||||
|  | ||||
|     @nilmdb.utils.lru_cache(size = 16) | ||||
|     def _get_intervals(self, stream_id): | ||||
|         """ | ||||
|         Return a mutable IntervalSet corresponding to the given stream ID. | ||||
|         """ | ||||
|         # Load from database if not cached | ||||
|         if stream_id not in self._cached_iset: | ||||
|         iset = IntervalSet() | ||||
|         result = self.con.execute("SELECT start_time, end_time, " | ||||
|                                   "start_pos, end_pos " | ||||
| @@ -188,40 +170,61 @@ class NilmDB(object): | ||||
|                                    start_pos, end_pos) | ||||
|         except IntervalError as e: # pragma: no cover | ||||
|             raise NilmDBError("unexpected overlap in ranges table!") | ||||
|             self._cached_iset[stream_id] = iset | ||||
|         # Return cached value | ||||
|         return self._cached_iset[stream_id] | ||||
|  | ||||
|     # TODO: Split add_interval into two pieces, one to add | ||||
|     # and one to flush to disk? | ||||
|     # Need to think about this.  Basic problem is that we can't | ||||
|     # mess with intervals once they're in the IntervalSet, | ||||
|     # without mucking with bxinterval internals. | ||||
|  | ||||
|     # Maybe add a separate optimization step? | ||||
|     # Join intervals that have a fairly small gap between them | ||||
|         return iset | ||||
|  | ||||
|     def _add_interval(self, stream_id, interval, start_pos, end_pos): | ||||
|         """ | ||||
|         Add interval to the internal interval cache, and to the database. | ||||
|         Note: arguments must be ints (not numpy.int64, etc) | ||||
|         """ | ||||
|         # Ensure this stream's intervals are cached, and add the new | ||||
|         # interval to that cache. | ||||
|         # Load this stream's intervals | ||||
|         iset = self._get_intervals(stream_id) | ||||
|         try: | ||||
|             iset += DBInterval(interval.start, interval.end, | ||||
|                                interval.start, interval.end, | ||||
|                                start_pos, end_pos) | ||||
|         except IntervalError as e: # pragma: no cover | ||||
|  | ||||
|         # Check for overlap | ||||
|         if iset.intersects(interval): # pragma: no cover (gets caught earlier) | ||||
|             raise NilmDBError("new interval overlaps existing data") | ||||
|  | ||||
|         # Check for adjacency.  If there's a stream in the database | ||||
|         # that ends exactly when this one starts, and the database | ||||
|         # rows match up, we can make one interval that covers the | ||||
|         # time range [adjacent.start -> interval.end) | ||||
|         # and database rows [ adjacent.start_pos -> end_pos ]. | ||||
|         # Only do this if the resulting interval isn't too large. | ||||
|         max_merged_rows = 30000000 # a bit more than 1 hour at 8 KHz | ||||
|         adjacent = iset.find_end(interval.start) | ||||
|         if (adjacent is not None and | ||||
|             start_pos == adjacent.db_endpos and | ||||
|             (end_pos - adjacent.db_startpos) < max_merged_rows): | ||||
|             # First delete the old one, both from our iset and the | ||||
|             # database | ||||
|             iset -= adjacent | ||||
|             self.con.execute("DELETE FROM ranges WHERE " | ||||
|                              "stream_id=? AND start_time=? AND " | ||||
|                              "end_time=? AND start_pos=? AND " | ||||
|                              "end_pos=?", (stream_id, | ||||
|                                            adjacent.db_start, | ||||
|                                            adjacent.db_end, | ||||
|                                            adjacent.db_startpos, | ||||
|                                            adjacent.db_endpos)) | ||||
|  | ||||
|             # Now update our interval so the fallthrough add is | ||||
|             # correct. | ||||
|             interval.start = adjacent.start | ||||
|             start_pos = adjacent.db_startpos | ||||
|  | ||||
|         # Add the new interval to the iset | ||||
|         iset.iadd_nocheck(DBInterval(interval.start, interval.end, | ||||
|                                      interval.start, interval.end, | ||||
|                                      start_pos, end_pos)) | ||||
|  | ||||
|         # Insert into the database | ||||
|         self.con.execute("INSERT INTO ranges " | ||||
|                          "(stream_id,start_time,end_time,start_pos,end_pos) " | ||||
|                          "VALUES (?,?,?,?,?)", | ||||
|                          (stream_id, interval.start, interval.end, | ||||
|                           int(start_pos), int(end_pos))) | ||||
|  | ||||
|         self.con.commit() | ||||
|  | ||||
|     def stream_list(self, path = None, layout = None): | ||||
| @@ -285,34 +288,11 @@ class NilmDB(object): | ||||
|  | ||||
|         layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' | ||||
|         """ | ||||
|         if path[0] != '/': | ||||
|             raise ValueError("paths must start with /") | ||||
|         [ group, node ] = path.rsplit("/", 1) | ||||
|         if group == '': | ||||
|             raise ValueError("invalid path") | ||||
|         # Create the bulk storage.  Raises ValueError on error, which we | ||||
|         # pass along. | ||||
|         self.data.create(path, layout_name) | ||||
|  | ||||
|         # Get description | ||||
|         try: | ||||
|             desc = nilmdb.layout.get_named(layout_name).description() | ||||
|         except KeyError: | ||||
|             raise ValueError("no such layout") | ||||
|  | ||||
|         # Estimated table size (for PyTables optimization purposes): assume | ||||
|         # 3 months worth of data at 8 KHz.  It's OK if this is wrong. | ||||
|         exp_rows = 8000 * 60*60*24*30*3 | ||||
|  | ||||
|         # Create the table | ||||
|         try: | ||||
|             table = self.h5file.createTable(group, node, | ||||
|                                             description = desc, | ||||
|                                             expectedrows = exp_rows, | ||||
|                                             createparents = True) | ||||
|         except AttributeError: | ||||
|             # Trying to create e.g. /foo/bar/baz when /foo/bar is already | ||||
|             # a table raises this error. | ||||
|             raise ValueError("error creating table at that path") | ||||
|  | ||||
|         # Insert into SQL database once the PyTables is happy | ||||
|         # Insert into SQL database once the bulk storage is happy | ||||
|         with self.con as con: | ||||
|             con.execute("INSERT INTO streams (path, layout) VALUES (?,?)", | ||||
|                         (path, layout_name)) | ||||
| @@ -358,24 +338,14 @@ class NilmDB(object): | ||||
|  | ||||
|     def stream_destroy(self, path): | ||||
|         """Fully remove a table and all of its data from the database. | ||||
|         No way to undo it!  The group structure is removed, if there | ||||
|         are no other tables in it.  Metadata is removed.""" | ||||
|         No way to undo it!  Metadata is removed.""" | ||||
|         stream_id = self._stream_id(path) | ||||
|  | ||||
|         # Delete the cached interval data | ||||
|         if stream_id in self._cached_iset: | ||||
|             del self._cached_iset[stream_id] | ||||
|         self._get_intervals.cache_remove(self, stream_id) | ||||
|  | ||||
|         # Delete the data node, and all parent nodes (if they have no | ||||
|         # remaining children) | ||||
|         split_path = path.lstrip('/').split("/") | ||||
|         while split_path: | ||||
|             name = split_path.pop() | ||||
|             where = "/" + "/".join(split_path) | ||||
|             try: | ||||
|                 self.h5file.removeNode(where, name, recursive = False) | ||||
|             except tables.NodeError: | ||||
|                 break | ||||
|         # Delete the data | ||||
|         self.data.destroy(path) | ||||
|  | ||||
|         # Delete metadata, stream, intervals | ||||
|         with self.con as con: | ||||
| @@ -383,44 +353,30 @@ class NilmDB(object): | ||||
|             con.execute("DELETE FROM ranges WHERE stream_id=?", (stream_id,)) | ||||
|             con.execute("DELETE FROM streams WHERE id=?", (stream_id,)) | ||||
|  | ||||
|     def stream_insert(self, path, parser, old_timestamp = None): | ||||
|     def stream_insert(self, path, start, end, data): | ||||
|         """Insert new data into the database. | ||||
|            path: Path at which to add the data | ||||
|            parser: nilmdb.layout.Parser instance full of data to insert | ||||
|            start: Starting timestamp | ||||
|            end: Ending timestamp | ||||
|            data: Rows of data, to be passed to PyTable's table.append | ||||
|                  method.  E.g. nilmdb.layout.Parser.data | ||||
|            """ | ||||
|         if (not parser.min_timestamp or not parser.max_timestamp or | ||||
|             not len(parser.data)): | ||||
|             raise StreamError("no data provided") | ||||
|  | ||||
|         # If we were provided with an old timestamp, the expectation | ||||
|         # is that the client has a contiguous block of time it is sending, | ||||
|         # but it's doing it over multiple calls to stream_insert. | ||||
|         # old_timestamp is the max_timestamp of the previous insert. | ||||
|         # To make things continuous, use that as our starting timestamp | ||||
|         # instead of what the parser found. | ||||
|         if old_timestamp: | ||||
|             min_timestamp = old_timestamp | ||||
|         else: | ||||
|             min_timestamp = parser.min_timestamp | ||||
|  | ||||
|         # First check for basic overlap using timestamp info given. | ||||
|         stream_id = self._stream_id(path) | ||||
|         iset = self._get_intervals(stream_id) | ||||
|         interval = Interval(min_timestamp, parser.max_timestamp) | ||||
|         interval = Interval(start, end) | ||||
|         if iset.intersects(interval): | ||||
|             raise OverlapError("new data overlaps existing data at range: " | ||||
|                                + str(iset & interval)) | ||||
|  | ||||
|         # Insert the data into pytables | ||||
|         table = self.h5file.getNode(path) | ||||
|         # Insert the data | ||||
|         table = self.data.getnode(path) | ||||
|         row_start = table.nrows | ||||
|         table.append(parser.data) | ||||
|         table.append(data) | ||||
|         row_end = table.nrows | ||||
|         table.flush() | ||||
|  | ||||
|         # Insert the record into the sql database. | ||||
|         # Casts are to convert from numpy.int64. | ||||
|         self._add_interval(stream_id, interval, int(row_start), int(row_end)) | ||||
|         self._add_interval(stream_id, interval, row_start, row_end) | ||||
|  | ||||
|         # And that's all | ||||
|         return "ok" | ||||
| @@ -435,7 +391,7 @@ class NilmDB(object): | ||||
|         # Optimization for the common case where an interval wasn't truncated | ||||
|         if interval.start == interval.db_start: | ||||
|             return interval.db_startpos | ||||
|         return bisect.bisect_left(BisectableTable(table), | ||||
|         return bisect.bisect_left(bulkdata.TimestampOnlyTable(table), | ||||
|                                   interval.start, | ||||
|                                   interval.db_startpos, | ||||
|                                   interval.db_endpos) | ||||
| @@ -454,7 +410,7 @@ class NilmDB(object): | ||||
|         # want to include the given timestamp in the results.  This is | ||||
|         # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return | ||||
|         # non-overlapping data. | ||||
|         return bisect.bisect_left(BisectableTable(table), | ||||
|         return bisect.bisect_left(bulkdata.TimestampOnlyTable(table), | ||||
|                                   interval.end, | ||||
|                                   interval.db_startpos, | ||||
|                                   interval.db_endpos) | ||||
| @@ -478,7 +434,7 @@ class NilmDB(object): | ||||
|         than actually fetching the data.  It is not limited by | ||||
|         max_results. | ||||
|         """ | ||||
|         table = self.h5file.getNode(path) | ||||
|         table = self.data.getnode(path) | ||||
|         stream_id = self._stream_id(path) | ||||
|         intervals = self._get_intervals(stream_id) | ||||
|         requested = Interval(start or 0, end or 1e12) | ||||
|   | ||||
							
								
								
									
										118
									
								
								nilmdb/server.py
									
									
									
									
									
								
							
							
						
						
									
										118
									
								
								nilmdb/server.py
									
									
									
									
									
								
							| @@ -3,15 +3,15 @@ | ||||
| # Need absolute_import so that "import nilmdb" won't pull in nilmdb.py, | ||||
| # but will pull the nilmdb module instead. | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb | ||||
|  | ||||
| from nilmdb.printf import * | ||||
|  | ||||
| import cherrypy | ||||
| import sys | ||||
| import time | ||||
| import os | ||||
| import simplejson as json | ||||
| import functools | ||||
|  | ||||
| try: | ||||
|     import cherrypy | ||||
| @@ -26,6 +26,46 @@ class NilmApp(object): | ||||
|  | ||||
| version = "1.1" | ||||
|  | ||||
| # Decorators | ||||
| def chunked_response(func): | ||||
|     """Decorator to enable chunked responses""" | ||||
|     # Set this to False to get better tracebacks from some requests | ||||
|     # (/stream/extract, /stream/intervals). | ||||
|     func._cp_config = { 'response.stream': True } | ||||
|     return func | ||||
|  | ||||
| def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround) | ||||
|     """Decorator to work around CherryPy bug #1200 in a response | ||||
|     generator""" | ||||
|     # Even if chunked responses are disabled, you may still miss miss | ||||
|     # LookupError, or UnicodeError exceptions due to CherryPy bug | ||||
|     # #1200.  This throws them as generic Exceptions insteads. | ||||
|     import traceback | ||||
|     @functools.wraps(func) | ||||
|     def wrapper(*args, **kwargs): | ||||
|         try: | ||||
|             for val in func(*args, **kwargs): | ||||
|                 yield val | ||||
|         except (LookupError, UnicodeError) as e: | ||||
|             raise Exception("bug workaround; real exception is:\n" + | ||||
|                             traceback.format_exc()) | ||||
|     return wrapper | ||||
|  | ||||
| def exception_to_httperror(response = "400 Bad Request"): | ||||
|     """Return a decorator that catches Exception and throws | ||||
|     a HTTPError describing it instead""" | ||||
|     def decorator(func): | ||||
|         @functools.wraps(func) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             try: | ||||
|                 return func(*args, **kwargs) | ||||
|             except Exception as e: | ||||
|                 message = sprintf("%s: %s", type(e).__name__, str(e)) | ||||
|                 raise cherrypy.HTTPError(response, message) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
| # CherryPy apps | ||||
| class Root(NilmApp): | ||||
|     """Root application for NILM database""" | ||||
|  | ||||
| @@ -59,7 +99,7 @@ class Root(NilmApp): | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     def dbsize(self): | ||||
|         return nilmdb.du.du(self.db.get_basepath()) | ||||
|         return nilmdb.utils.du(self.db.get_basepath()) | ||||
|  | ||||
| class Stream(NilmApp): | ||||
|     """Stream-specific operations""" | ||||
| @@ -78,26 +118,20 @@ class Stream(NilmApp): | ||||
|     # /stream/create?path=/newton/prep&layout=PrepData | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror() | ||||
|     def create(self, path, layout): | ||||
|         """Create a new stream in the database.  Provide path | ||||
|         and one of the nilmdb.layout.layouts keys. | ||||
|         """ | ||||
|         try: | ||||
|         return self.db.stream_create(path, layout) | ||||
|         except Exception as e: | ||||
|             message = sprintf("%s: %s", type(e).__name__, e.message) | ||||
|             raise cherrypy.HTTPError("400 Bad Request", message) | ||||
|  | ||||
|     # /stream/destroy?path=/newton/prep | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror() | ||||
|     def destroy(self, path): | ||||
|         """Delete a stream and its associated data.""" | ||||
|         try: | ||||
|         return self.db.stream_destroy(path) | ||||
|         except Exception as e: | ||||
|             message = sprintf("%s: %s", type(e).__name__, e.message) | ||||
|             raise cherrypy.HTTPError("400 Bad Request", message) | ||||
|  | ||||
|     # /stream/get_metadata?path=/newton/prep | ||||
|     # /stream/get_metadata?path=/newton/prep&key=foo&key=bar | ||||
| @@ -126,49 +160,35 @@ class Stream(NilmApp): | ||||
|     # /stream/set_metadata?path=/newton/prep&data=<json> | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror() | ||||
|     def set_metadata(self, path, data): | ||||
|         """Set metadata for the named stream, replacing any | ||||
|         existing metadata.  Data should be a json-encoded | ||||
|         dictionary""" | ||||
|         try: | ||||
|         data_dict = json.loads(data) | ||||
|         self.db.stream_set_metadata(path, data_dict) | ||||
|         except Exception as e: | ||||
|             message = sprintf("%s: %s", type(e).__name__, e.message) | ||||
|             raise cherrypy.HTTPError("400 Bad Request", message) | ||||
|         return "ok" | ||||
|  | ||||
|     # /stream/update_metadata?path=/newton/prep&data=<json> | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     @exception_to_httperror() | ||||
|     def update_metadata(self, path, data): | ||||
|         """Update metadata for the named stream.  Data | ||||
|         should be a json-encoded dictionary""" | ||||
|         try: | ||||
|         data_dict = json.loads(data) | ||||
|         self.db.stream_update_metadata(path, data_dict) | ||||
|         except Exception as e: | ||||
|             message = sprintf("%s: %s", type(e).__name__, e.message) | ||||
|             raise cherrypy.HTTPError("400 Bad Request", message) | ||||
|         return "ok" | ||||
|  | ||||
|     # /stream/insert?path=/newton/prep | ||||
|     @cherrypy.expose | ||||
|     @cherrypy.tools.json_out() | ||||
|     #@cherrypy.tools.disable_prb() | ||||
|     def insert(self, path, old_timestamp = None): | ||||
|     def insert(self, path, start, end): | ||||
|         """ | ||||
|         Insert new data into the database.  Provide textual data | ||||
|         (matching the path's layout) as a HTTP PUT. | ||||
|  | ||||
|         old_timestamp is used when making multiple, split-up insertions | ||||
|         for a larger contiguous block of data.  The first insert | ||||
|         will return the maximum timestamp that it saw, and the second | ||||
|         insert should provide this timestamp as an argument.  This is | ||||
|         used to extend the previous database interval rather than | ||||
|         start a new one. | ||||
|         """ | ||||
|  | ||||
|         # Important that we always read the input before throwing any | ||||
|         # errors, to keep lengths happy for persistent connections. | ||||
|         # However, CherryPy 3.2.2 has a bug where this fails for GET | ||||
| @@ -193,22 +213,36 @@ class Stream(NilmApp): | ||||
|                                      "Error parsing input data: " + | ||||
|                                      e.message) | ||||
|  | ||||
|         if (not parser.min_timestamp or not parser.max_timestamp or | ||||
|             not len(parser.data)): | ||||
|             raise cherrypy.HTTPError("400 Bad Request", | ||||
|                                      "no data provided") | ||||
|  | ||||
|         # Check limits | ||||
|         start = float(start) | ||||
|         end = float(end) | ||||
|         if parser.min_timestamp < start: | ||||
|             raise cherrypy.HTTPError("400 Bad Request", "Data timestamp " + | ||||
|                                      repr(parser.min_timestamp) + | ||||
|                                      " < start time " + repr(start)) | ||||
|         if parser.max_timestamp >= end: | ||||
|             raise cherrypy.HTTPError("400 Bad Request", "Data timestamp " + | ||||
|                                      repr(parser.max_timestamp) + | ||||
|                                      " >= end time " + repr(end)) | ||||
|  | ||||
|         # Now do the nilmdb insert, passing it the parser full of data. | ||||
|         try: | ||||
|             if old_timestamp: | ||||
|                 old_timestamp = float(old_timestamp) | ||||
|             result = self.db.stream_insert(path, parser, old_timestamp) | ||||
|             result = self.db.stream_insert(path, start, end, parser.data) | ||||
|         except nilmdb.nilmdb.NilmDBError as e: | ||||
|             raise cherrypy.HTTPError("400 Bad Request", e.message) | ||||
|  | ||||
|         # Return the maximum timestamp that we saw.  The client will | ||||
|         # return this back to us as the old_timestamp parameter, if | ||||
|         # it has more data to send. | ||||
|         return ("ok", parser.max_timestamp) | ||||
|         # Done | ||||
|         return "ok" | ||||
|  | ||||
|     # /stream/intervals?path=/newton/prep | ||||
|     # /stream/intervals?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||
|     @cherrypy.expose | ||||
|     @chunked_response | ||||
|     def intervals(self, path, start = None, end = None): | ||||
|         """ | ||||
|         Get intervals from backend database.  Streams the resulting | ||||
| @@ -230,9 +264,9 @@ class Stream(NilmApp): | ||||
|         if len(streams) != 1: | ||||
|             raise cherrypy.HTTPError("404 Not Found", "No such stream") | ||||
|  | ||||
|         @workaround_cp_bug_1200 | ||||
|         def content(start, end): | ||||
|             # Note: disable response.stream below to get better debug info | ||||
|             # from tracebacks in this subfunction. | ||||
|             # Note: disable chunked responses to see tracebacks from here. | ||||
|             while True: | ||||
|                 (intervals, restart) = self.db.stream_intervals(path,start,end) | ||||
|                 response = ''.join([ json.dumps(i) + "\n" for i in intervals ]) | ||||
| @@ -241,10 +275,10 @@ class Stream(NilmApp): | ||||
|                     break | ||||
|                 start = restart | ||||
|         return content(start, end) | ||||
|     intervals._cp_config = { 'response.stream': True } # chunked HTTP response | ||||
|  | ||||
|     # /stream/extract?path=/newton/prep&start=1234567890.0&end=1234567899.0 | ||||
|     @cherrypy.expose | ||||
|     @chunked_response | ||||
|     def extract(self, path, start = None, end = None, count = False): | ||||
|         """ | ||||
|         Extract data from backend database.  Streams the resulting | ||||
| @@ -274,9 +308,9 @@ class Stream(NilmApp): | ||||
|         # Get formatter | ||||
|         formatter = nilmdb.layout.Formatter(layout) | ||||
|  | ||||
|         @workaround_cp_bug_1200 | ||||
|         def content(start, end, count): | ||||
|             # Note: disable response.stream below to get better debug info | ||||
|             # from tracebacks in this subfunction. | ||||
|             # Note: disable chunked responses to see tracebacks from here. | ||||
|             if count: | ||||
|                 matched = self.db.stream_extract(path, start, end, count) | ||||
|                 yield sprintf("%d\n", matched) | ||||
| @@ -292,8 +326,6 @@ class Stream(NilmApp): | ||||
|                     return | ||||
|                 start = restart | ||||
|         return content(start, end, count) | ||||
|     extract._cp_config = { 'response.stream': True } # chunked HTTP response | ||||
|  | ||||
|  | ||||
| class Exiter(object): | ||||
|     """App that exits the server, for testing""" | ||||
| @@ -318,7 +350,7 @@ class Server(object): | ||||
|         # Need to wrap DB object in a serializer because we'll call | ||||
|         # into it from separate threads. | ||||
|         self.embedded = embedded | ||||
|         self.db = nilmdb.serializer.WrapObject(db) | ||||
|         self.db = nilmdb.utils.Serializer(db) | ||||
|         cherrypy.config.update({ | ||||
|             'server.socket_host': host, | ||||
|             'server.socket_port': port, | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| """File-like objects that add timestamps to the input lines""" | ||||
|  | ||||
| from __future__ import absolute_import | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import time | ||||
| import os | ||||
|   | ||||
							
								
								
									
										9
									
								
								nilmdb/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								nilmdb/utils/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| """NilmDB utilities""" | ||||
|  | ||||
| from .timer import Timer | ||||
| from .iteratorizer import Iteratorizer | ||||
| from .serializer import Serializer | ||||
| from .lrucache import lru_cache | ||||
| from .diskusage import du | ||||
| from .mustclose import must_close | ||||
| from .urllib import urlencode | ||||
							
								
								
									
										66
									
								
								nilmdb/utils/lrucache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								nilmdb/utils/lrucache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| # Memoize a function's return value with a least-recently-used cache | ||||
| # Based on: | ||||
| #   http://code.activestate.com/recipes/498245-lru-and-lfu-cache-decorators/ | ||||
| # with added 'destructor' functionality. | ||||
|  | ||||
| import collections | ||||
| import functools | ||||
|  | ||||
| def lru_cache(size = 10, onremove = None): | ||||
|     """Least-recently-used cache decorator. | ||||
|  | ||||
|     @lru_cache(size = 10, onevict = None) | ||||
|     def f(...): | ||||
|         pass | ||||
|  | ||||
|     Given a function and arguments, memoize its return value. | ||||
|     Up to 'size' elements are cached. | ||||
|  | ||||
|     When evicting a value from the cache, call the function | ||||
|     'onremove' with the value that's being evicted. | ||||
|  | ||||
|     Call f.cache_remove(...) to evict the cache entry with the given | ||||
|     arguments.  Call f.cache_remove_all() to evict all entries. | ||||
|     f.cache_hits and f.cache_misses give statistics. | ||||
|     """ | ||||
|  | ||||
|     def decorator(func): | ||||
|         cache = collections.OrderedDict()	# order: least- to most-recent | ||||
|  | ||||
|         def evict(value): | ||||
|             if onremove: | ||||
|                 onremove(value) | ||||
|  | ||||
|         @functools.wraps(func) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             key = args + tuple(sorted(kwargs.items())) | ||||
|             try: | ||||
|                 value = cache.pop(key) | ||||
|                 wrapper.cache_hits += 1 | ||||
|             except KeyError: | ||||
|                 value = func(*args, **kwargs) | ||||
|                 wrapper.cache_misses += 1 | ||||
|                 if len(cache) >= size: | ||||
|                     evict(cache.popitem(0)[1])	# evict LRU cache entry | ||||
|             cache[key] = value              	# (re-)insert this key at end | ||||
|             return value | ||||
|  | ||||
|         def cache_remove(*args, **kwargs): | ||||
|             """Remove the described key from this cache, if present. | ||||
|             Note that if the original wrapped function was implicitly | ||||
|             passed 'self', you need to pass it as an argument here too.""" | ||||
|             key = args + tuple(sorted(kwargs.items())) | ||||
|             if key in cache: | ||||
|                 evict(cache.pop(key)) | ||||
|  | ||||
|         def cache_remove_all(): | ||||
|             for key in cache: | ||||
|                 evict(cache.pop(key)) | ||||
|  | ||||
|         wrapper.cache_hits = 0 | ||||
|         wrapper.cache_misses = 0 | ||||
|         wrapper.cache_remove = cache_remove | ||||
|         wrapper.cache_remove_all = cache_remove_all | ||||
|  | ||||
|         return wrapper | ||||
|     return decorator | ||||
							
								
								
									
										42
									
								
								nilmdb/utils/mustclose.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								nilmdb/utils/mustclose.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| # Class decorator that warns on stderr at deletion time if the class's | ||||
| # close() member wasn't called. | ||||
|  | ||||
| from nilmdb.utils.printf import * | ||||
| import sys | ||||
|  | ||||
| def must_close(errorfile = sys.stderr): | ||||
|     def decorator(cls): | ||||
|         def dummy(*args, **kwargs): | ||||
|             pass | ||||
|         if "__init__" not in cls.__dict__: | ||||
|             cls.__init__ = dummy | ||||
|         if "__del__" not in cls.__dict__: | ||||
|             cls.__del__ = dummy | ||||
|         if "close" not in cls.__dict__: | ||||
|             cls.close = dummy | ||||
|  | ||||
|         orig_init = cls.__init__ | ||||
|         orig_del = cls.__del__ | ||||
|         orig_close = cls.close | ||||
|  | ||||
|         def __init__(self, *args, **kwargs): | ||||
|             ret = orig_init(self, *args, **kwargs) | ||||
|             self.__dict__["_must_close"] = True | ||||
|             return ret | ||||
|  | ||||
|         def __del__(self): | ||||
|             if "_must_close" in self.__dict__: | ||||
|                 fprintf(errorfile, "error: %s.close() wasn't called!\n", | ||||
|                         self.__class__.__name__) | ||||
|             return orig_del(self) | ||||
|  | ||||
|         def close(self, *args, **kwargs): | ||||
|             del self._must_close | ||||
|             return orig_close(self) | ||||
|  | ||||
|         cls.__init__ = __init__ | ||||
|         cls.__del__ = __del__ | ||||
|         cls.close = close | ||||
|  | ||||
|         return cls | ||||
|     return decorator | ||||
| @@ -67,3 +67,6 @@ class WrapObject(object): | ||||
|     def __del__(self): | ||||
|         self.__wrap_call_queue.put((None, None, None, None)) | ||||
|         self.__wrap_serializer.join() | ||||
| 
 | ||||
| # Just an alias | ||||
| Serializer = WrapObject | ||||
| @@ -5,6 +5,7 @@ | ||||
| #   with nilmdb.Timer("flush"): | ||||
| #       foo.flush() | ||||
| 
 | ||||
| from __future__ import print_function | ||||
| import contextlib | ||||
| import time | ||||
| 
 | ||||
| @@ -18,4 +19,4 @@ def Timer(name = None, tosyslog = False): | ||||
|         import syslog | ||||
|         syslog.syslog(msg) | ||||
|     else: | ||||
|         print msg | ||||
|         print(msg) | ||||
							
								
								
									
										68
									
								
								nilmdb/utils/urllib.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								nilmdb/utils/urllib.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,68 @@ | ||||
| from __future__ import absolute_import | ||||
| from urllib import quote_plus, _is_unicode | ||||
|  | ||||
| # urllib.urlencode insists on encoding Unicode as ASCII.  This is an | ||||
| # exact copy of that function, except we encode it as UTF-8 instead. | ||||
|  | ||||
| def urlencode(query, doseq=0): | ||||
|     """Encode a sequence of two-element tuples or dictionary into a URL query string. | ||||
|  | ||||
|     If any values in the query arg are sequences and doseq is true, each | ||||
|     sequence element is converted to a separate parameter. | ||||
|  | ||||
|     If the query arg is a sequence of two-element tuples, the order of the | ||||
|     parameters in the output will match the order of parameters in the | ||||
|     input. | ||||
|     """ | ||||
|  | ||||
|     if hasattr(query,"items"): | ||||
|         # mapping objects | ||||
|         query = query.items() | ||||
|     else: | ||||
|         # it's a bother at times that strings and string-like objects are | ||||
|         # sequences... | ||||
|         try: | ||||
|             # non-sequence items should not work with len() | ||||
|             # non-empty strings will fail this | ||||
|             if len(query) and not isinstance(query[0], tuple): | ||||
|                 raise TypeError | ||||
|             # zero-length sequences of all types will get here and succeed, | ||||
|             # but that's a minor nit - since the original implementation | ||||
|             # allowed empty dicts that type of behavior probably should be | ||||
|             # preserved for consistency | ||||
|         except TypeError: | ||||
|             ty,va,tb = sys.exc_info() | ||||
|             raise TypeError, "not a valid non-string sequence or mapping object", tb | ||||
|  | ||||
|     l = [] | ||||
|     if not doseq: | ||||
|         # preserve old behavior | ||||
|         for k, v in query: | ||||
|             k = quote_plus(str(k)) | ||||
|             v = quote_plus(str(v)) | ||||
|             l.append(k + '=' + v) | ||||
|     else: | ||||
|         for k, v in query: | ||||
|             k = quote_plus(str(k)) | ||||
|             if isinstance(v, str): | ||||
|                 v = quote_plus(v) | ||||
|                 l.append(k + '=' + v) | ||||
|             elif _is_unicode(v): | ||||
|                 # is there a reasonable way to convert to ASCII? | ||||
|                 # encode generates a string, but "replace" or "ignore" | ||||
|                 # lose information and "strict" can raise UnicodeError | ||||
|                 v = quote_plus(v.encode("utf-8","strict")) | ||||
|                 l.append(k + '=' + v) | ||||
|             else: | ||||
|                 try: | ||||
|                     # is this a sufficient test for sequence-ness? | ||||
|                     len(v) | ||||
|                 except TypeError: | ||||
|                     # not a sequence | ||||
|                     v = quote_plus(str(v)) | ||||
|                     l.append(k + '=' + v) | ||||
|                 else: | ||||
|                     # loop over the sequence | ||||
|                     for elt in v: | ||||
|                         l.append(k + '=' + quote_plus(str(elt))) | ||||
|     return '&'.join(l) | ||||
| @@ -3,14 +3,17 @@ | ||||
| import nilmdb | ||||
| import argparse | ||||
|  | ||||
| parser = argparse.ArgumentParser(description='Run the NILM server') | ||||
| formatter = argparse.ArgumentDefaultsHelpFormatter | ||||
| parser = argparse.ArgumentParser(description='Run the NILM server', | ||||
|                                  formatter_class = formatter) | ||||
| parser.add_argument('-p', '--port', help='Port number', type=int, default=12380) | ||||
| parser.add_argument('-d', '--database', help='Database directory', default="db") | ||||
| parser.add_argument('-y', '--yappi', help='Run with yappi profiler', | ||||
|                     action='store_true') | ||||
| args = parser.parse_args() | ||||
|  | ||||
| # Start web app on a custom port | ||||
| db = nilmdb.NilmDB("db") | ||||
| db = nilmdb.NilmDB(args.database) | ||||
| server = nilmdb.Server(db, host = "127.0.0.1", | ||||
|                        port = args.port, | ||||
|                        embedded = False) | ||||
|   | ||||
| @@ -10,6 +10,8 @@ cover-erase= | ||||
| ##cover-branches=     # need nose 1.1.3 for this | ||||
| stop= | ||||
| verbosity=2 | ||||
| #tests=tests/test_mustclose.py | ||||
| #tests=tests/test_lrucache.py | ||||
| #tests=tests/test_cmdline.py | ||||
| #tests=tests/test_layout.py | ||||
| #tests=tests/test_rbtree.py | ||||
|   | ||||
| @@ -13,7 +13,7 @@ class Renderer(object): | ||||
|  | ||||
|     # Rendering | ||||
|     def __render_dot_node(self, node, max_depth = 20): | ||||
|         from nilmdb.printf import sprintf | ||||
|         from nilmdb.utils.printf import sprintf | ||||
|         """Render a single node and its children into a dot graph fragment""" | ||||
|         if max_depth == 0: | ||||
|             return "" | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| from nilmdb.client import ClientError, ServerError | ||||
|  | ||||
| import datetime_tz | ||||
| @@ -82,6 +84,8 @@ class TestClient(object): | ||||
|         # Bad layout type | ||||
|         with assert_raises(ClientError): | ||||
|             client.stream_create("/newton/prep", "NoSuchLayout") | ||||
|  | ||||
|         # Create three streams | ||||
|         client.stream_create("/newton/prep", "PrepData") | ||||
|         client.stream_create("/newton/raw", "RawData") | ||||
|         client.stream_create("/newton/zzz/rawnotch", "RawNotchedData") | ||||
| @@ -131,6 +135,7 @@ class TestClient(object): | ||||
|  | ||||
|         testfile = "tests/data/prep-20120323T1000" | ||||
|         start = datetime_tz.datetime_tz.smartparse("20120323T1000") | ||||
|         start = start.totimestamp() | ||||
|         rate = 120 | ||||
|  | ||||
|         # First try a nonexistent path | ||||
| @@ -155,14 +160,41 @@ class TestClient(object): | ||||
|  | ||||
|         # Try forcing a server request with empty data | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put("stream/insert", "", { "path": "/newton/prep" }) | ||||
|             client.http.put("stream/insert", "", { "path": "/newton/prep", | ||||
|                                                    "start": 0, "end": 0 }) | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("no data provided", str(e.exception)) | ||||
|  | ||||
|         # Specify start/end (starts too late) | ||||
|         data = nilmdb.timestamper.TimestamperRate(testfile, start, 120) | ||||
|         with assert_raises(ClientError) as e: | ||||
|             result = client.stream_insert("/newton/prep", data, | ||||
|                                           start + 5, start + 120) | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("Data timestamp 1332511200.0 < start time 1332511205.0", | ||||
|             str(e.exception)) | ||||
|  | ||||
|         # Specify start/end (ends too early) | ||||
|         data = nilmdb.timestamper.TimestamperRate(testfile, start, 120) | ||||
|         with assert_raises(ClientError) as e: | ||||
|             result = client.stream_insert("/newton/prep", data, | ||||
|                                           start, start + 1) | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         # Client chunks the input, so the exact timestamp here might change | ||||
|         # if the chunk positions change. | ||||
|         in_("Data timestamp 1332511271.016667 >= end time 1332511201.0", | ||||
|             str(e.exception)) | ||||
|  | ||||
|         # Now do the real load | ||||
|         data = nilmdb.timestamper.TimestamperRate(testfile, start, 120) | ||||
|         result = client.stream_insert("/newton/prep", data) | ||||
|         eq_(result[0], "ok") | ||||
|         result = client.stream_insert("/newton/prep", data, | ||||
|                                       start, start + 119.999777) | ||||
|         eq_(result, "ok") | ||||
|  | ||||
|         # Verify the intervals.  Should be just one, even if the data | ||||
|         # was inserted in chunks, due to nilmdb interval concatenation. | ||||
|         intervals = list(client.stream_intervals("/newton/prep")) | ||||
|         eq_(intervals, [[start, start + 119.999777]]) | ||||
|  | ||||
|         # Try some overlapping data -- just insert it again | ||||
|         data = nilmdb.timestamper.TimestamperRate(testfile, start, 120) | ||||
| @@ -215,7 +247,8 @@ class TestClient(object): | ||||
|         # Check PUT with generator out | ||||
|         with assert_raises(ClientError) as e: | ||||
|             client.http.put_gen("stream/insert", "", | ||||
|                                 { "path": "/newton/prep" }).next() | ||||
|                                 { "path": "/newton/prep", | ||||
|                                   "start": 0, "end": 0 }).next() | ||||
|         in_("400 Bad Request", str(e.exception)) | ||||
|         in_("no data provided", str(e.exception)) | ||||
|  | ||||
| @@ -238,7 +271,7 @@ class TestClient(object): | ||||
|         # still disable chunked responses for debugging. | ||||
|         x = client.http.get("stream/intervals", { "path": "/newton/prep" }, | ||||
|                             retjson=False) | ||||
|         eq_(x.count('\n'), 2) | ||||
|         lines_(x, 1) | ||||
|         if "transfer-encoding: chunked" not in client.http._headers.lower(): | ||||
|             warnings.warn("Non-chunked HTTP response for /stream/intervals") | ||||
|  | ||||
| @@ -248,3 +281,40 @@ class TestClient(object): | ||||
|                               "end": "123" }, retjson=False) | ||||
|         if "transfer-encoding: chunked" not in client.http._headers.lower(): | ||||
|             warnings.warn("Non-chunked HTTP response for /stream/extract") | ||||
|  | ||||
|     def test_client_7_unicode(self): | ||||
|         # Basic Unicode tests | ||||
|         client = nilmdb.Client(url = "http://localhost:12380/") | ||||
|  | ||||
|         # Delete streams that exist | ||||
|         for stream in client.stream_list(): | ||||
|             client.stream_destroy(stream[0]) | ||||
|  | ||||
|         # Database is empty | ||||
|         eq_(client.stream_list(), []) | ||||
|  | ||||
|         # Create Unicode stream, match it | ||||
|         raw = [ u"/düsseldorf/raw", u"uint16_6" ] | ||||
|         prep = [ u"/düsseldorf/prep", u"uint16_6" ] | ||||
|         client.stream_create(*raw) | ||||
|         eq_(client.stream_list(), [raw]) | ||||
|         eq_(client.stream_list(layout=raw[1]), [raw]) | ||||
|         eq_(client.stream_list(path=raw[0]), [raw]) | ||||
|         client.stream_create(*prep) | ||||
|         eq_(client.stream_list(), [prep, raw]) | ||||
|  | ||||
|         # Set / get metadata with Unicode keys and values | ||||
|         eq_(client.stream_get_metadata(raw[0]), {}) | ||||
|         eq_(client.stream_get_metadata(prep[0]), {}) | ||||
|         meta1 = { u"alpha": u"α", | ||||
|                   u"β": u"beta" } | ||||
|         meta2 = { u"alpha": u"α" } | ||||
|         meta3 = { u"β": u"beta" } | ||||
|         client.stream_set_metadata(prep[0], meta1) | ||||
|         client.stream_update_metadata(prep[0], {}) | ||||
|         client.stream_update_metadata(raw[0], meta2) | ||||
|         client.stream_update_metadata(raw[0], meta3) | ||||
|         eq_(client.stream_get_metadata(prep[0]), meta1) | ||||
|         eq_(client.stream_get_metadata(raw[0]), meta1) | ||||
|         eq_(client.stream_get_metadata(raw[0], [ "alpha" ]), meta2) | ||||
|         eq_(client.stream_get_metadata(raw[0], [ "alpha", "β" ]), meta1) | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import nilmdb.cmdline | ||||
|  | ||||
| from nose.tools import * | ||||
| @@ -13,7 +15,7 @@ import threading | ||||
| import urllib2 | ||||
| from urllib2 import urlopen, HTTPError | ||||
| import Queue | ||||
| import cStringIO | ||||
| import StringIO | ||||
| import shlex | ||||
|  | ||||
| from test_helpers import * | ||||
| @@ -45,12 +47,18 @@ def setup_module(): | ||||
| def teardown_module(): | ||||
|     server_stop() | ||||
|  | ||||
| # Add an encoding property to StringIO so Python will convert Unicode | ||||
| # properly when writing or reading. | ||||
| class UTF8StringIO(StringIO.StringIO): | ||||
|     encoding = 'utf-8' | ||||
|  | ||||
| class TestCmdline(object): | ||||
|  | ||||
|     def run(self, arg_string, infile=None, outfile=None): | ||||
|         """Run a cmdline client with the specified argument string, | ||||
|         passing the given input.  Returns a tuple with the output and | ||||
|         exit code""" | ||||
|         # printf("TZ=UTC ./nilmtool.py %s\n", arg_string) | ||||
|         class stdio_wrapper: | ||||
|             def __init__(self, stdin, stdout, stderr): | ||||
|                 self.io = (stdin, stdout, stderr) | ||||
| @@ -61,15 +69,18 @@ class TestCmdline(object): | ||||
|                 ( sys.stdin, sys.stdout, sys.stderr ) = self.saved | ||||
|         # Empty input if none provided | ||||
|         if infile is None: | ||||
|             infile = cStringIO.StringIO("") | ||||
|             infile = UTF8StringIO("") | ||||
|         # Capture stderr | ||||
|         errfile = cStringIO.StringIO() | ||||
|         errfile = UTF8StringIO() | ||||
|         if outfile is None: | ||||
|             # If no output file, capture stdout with stderr | ||||
|             outfile = errfile | ||||
|         with stdio_wrapper(infile, outfile, errfile) as s: | ||||
|             try: | ||||
|                 nilmdb.cmdline.Cmdline(shlex.split(arg_string)).run() | ||||
|                 # shlex doesn't support Unicode very well.  Encode the | ||||
|                 # string as UTF-8 explicitly before splitting. | ||||
|                 args = shlex.split(arg_string.encode('utf-8')) | ||||
|                 nilmdb.cmdline.Cmdline(args).run() | ||||
|                 sys.exit(0) | ||||
|             except SystemExit as e: | ||||
|                 exitcode = e.code | ||||
| @@ -199,7 +210,12 @@ class TestCmdline(object): | ||||
|         # Should not be able to create a stream with another stream as | ||||
|         # its parent | ||||
|         self.fail("create /newton/prep/blah PrepData") | ||||
|         self.contain("error creating table at that path") | ||||
|         self.contain("path is subdir of existing node") | ||||
|  | ||||
|         # Should not be able to create a stream at a location that | ||||
|         # has other nodes as children | ||||
|         self.fail("create /newton/zzz PrepData") | ||||
|         self.contain("subdirs of this path already exist") | ||||
|  | ||||
|         # Verify we got those 3 streams and they're returned in | ||||
|         # alphabetical order. | ||||
| @@ -292,16 +308,9 @@ class TestCmdline(object): | ||||
|         eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test) | ||||
|         eq_(cmd.parse_time("20120405 1800 UTC"), test) | ||||
|         eq_(cmd.parse_time("20120405 1400-0400 UTC"), test) | ||||
|         for badtime in [ "20120405 1400-9999", "hello", "-", "", "14:00" ]: | ||||
|             with assert_raises(ValueError): | ||||
|             print cmd.parse_time("20120405 1400-9999") | ||||
|         with assert_raises(ValueError): | ||||
|             print cmd.parse_time("hello") | ||||
|         with assert_raises(ValueError): | ||||
|             print cmd.parse_time("-") | ||||
|         with assert_raises(ValueError): | ||||
|             print cmd.parse_time("") | ||||
|         with assert_raises(ValueError): | ||||
|             print cmd.parse_time("14:00") | ||||
|                 x = cmd.parse_time(badtime) | ||||
|         eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test) | ||||
|         eq_(cmd.parse_time("prep-20120405T1400"), test) | ||||
|  | ||||
| @@ -368,36 +377,36 @@ class TestCmdline(object): | ||||
|     def test_cmdline_07_detail(self): | ||||
|         # Just count the number of lines, it's probably fine | ||||
|         self.ok("list --detail") | ||||
|         eq_(self.captured.count('\n'), 11) | ||||
|         lines_(self.captured, 8) | ||||
|  | ||||
|         self.ok("list --detail --path *prep") | ||||
|         eq_(self.captured.count('\n'), 7) | ||||
|         lines_(self.captured, 4) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:02'") | ||||
|         eq_(self.captured.count('\n'), 5) | ||||
|         lines_(self.captured, 3) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05'") | ||||
|         eq_(self.captured.count('\n'), 3) | ||||
|         lines_(self.captured, 2) | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15'") | ||||
|         eq_(self.captured.count('\n'), 2) | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.000") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'") | ||||
|         eq_(self.captured.count('\n'), 2) | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.500") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 19:05:15.50'") | ||||
|         eq_(self.captured.count('\n'), 2) | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("no intervals") | ||||
|  | ||||
|         self.ok("list --detail --path *prep --start='23 Mar 2012 10:05:15.50'" | ||||
|                 + " --end='23 Mar 2012 10:05:15.50'") | ||||
|         eq_(self.captured.count('\n'), 2) | ||||
|         lines_(self.captured, 2) | ||||
|         self.contain("10:05:15.500") | ||||
|  | ||||
|         self.ok("list --detail") | ||||
|         eq_(self.captured.count('\n'), 11) | ||||
|         lines_(self.captured, 8) | ||||
|  | ||||
|     def test_cmdline_08_extract(self): | ||||
|         # nonexistent stream | ||||
| @@ -450,7 +459,7 @@ class TestCmdline(object): | ||||
|  | ||||
|         # all data put in by tests | ||||
|         self.ok("extract -a /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
|         eq_(self.captured.count('\n'), 43204) | ||||
|         lines_(self.captured, 43204) | ||||
|         self.ok("extract -c /newton/prep --start 2000-01-01 --end 2020-01-01") | ||||
|         self.match("43200\n") | ||||
|  | ||||
| @@ -459,7 +468,7 @@ class TestCmdline(object): | ||||
|         server_stop() | ||||
|         server_start(max_results = 2) | ||||
|         self.ok("list --detail") | ||||
|         eq_(self.captured.count('\n'), 11) | ||||
|         lines_(self.captured, 8) | ||||
|         server_stop() | ||||
|         server_start() | ||||
|  | ||||
| @@ -484,7 +493,7 @@ class TestCmdline(object): | ||||
|  | ||||
|         # Notice how they're not empty | ||||
|         self.ok("list --detail") | ||||
|         eq_(self.captured.count('\n'), 11) | ||||
|         lines_(self.captured, 8) | ||||
|  | ||||
|         # Delete some | ||||
|         self.ok("destroy /newton/prep") | ||||
| @@ -513,3 +522,21 @@ class TestCmdline(object): | ||||
|             # Make sure it was created empty | ||||
|             self.ok("list --detail --path " + path) | ||||
|             self.contain("(no intervals)") | ||||
|  | ||||
|     def test_cmdline_11_unicode(self): | ||||
|         # Unicode paths. | ||||
|         self.ok("destroy /newton/asdf/qwer") | ||||
|         self.ok("destroy /newton/prep") | ||||
|         self.ok("destroy /newton/raw") | ||||
|         self.ok("destroy /newton/zzz") | ||||
|  | ||||
|         self.ok(u"create /düsseldorf/raw uint16_6") | ||||
|         self.ok("list --detail") | ||||
|         self.contain(u"/düsseldorf/raw uint16_6") | ||||
|         self.contain("(no intervals)") | ||||
|  | ||||
|         # Unicode metadata | ||||
|         self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'") | ||||
|         self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'") | ||||
|         self.ok(u"metadata /düsseldorf/raw") | ||||
|         self.match(u"α=β ε τ α\nγ=δ\n") | ||||
|   | ||||
| @@ -20,6 +20,12 @@ def ne_(a, b): | ||||
|     if not a != b: | ||||
|         raise AssertionError("unexpected %s == %s" % (myrepr(a), myrepr(b))) | ||||
|  | ||||
| def lines_(a, n): | ||||
|     l = a.count('\n') | ||||
|     if not l == n: | ||||
|         raise AssertionError("wanted %d lines, got %d in output: '%s'" | ||||
|                              % (n, l, a)) | ||||
|  | ||||
| def recursive_unlink(path): | ||||
|     try: | ||||
|         shutil.rmtree(path) | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
| import datetime_tz | ||||
|  | ||||
| from nose.tools import * | ||||
| @@ -137,6 +137,15 @@ class TestInterval: | ||||
|             x = iseta != 3 | ||||
|         ne_(IntervalSet(a), IntervalSet(b)) | ||||
|  | ||||
|         # Note that assignment makes a new reference (not a copy) | ||||
|         isetd = IntervalSet(isetb) | ||||
|         isete = isetd | ||||
|         eq_(isetd, isetb) | ||||
|         eq_(isetd, isete) | ||||
|         isetd -= a | ||||
|         ne_(isetd, isetb) | ||||
|         eq_(isetd, isete) | ||||
|  | ||||
|         # test iterator | ||||
|         for interval in iseta: | ||||
|             pass | ||||
| @@ -158,11 +167,18 @@ class TestInterval: | ||||
|         iset = IntervalSet(a) | ||||
|         iset += IntervalSet(b) | ||||
|         eq_(iset, IntervalSet([a, b])) | ||||
|  | ||||
|         iset = IntervalSet(a) | ||||
|         iset += b | ||||
|         eq_(iset, IntervalSet([a, b])) | ||||
|  | ||||
|         iset = IntervalSet(a) | ||||
|         iset.iadd_nocheck(b) | ||||
|         eq_(iset, IntervalSet([a, b])) | ||||
|  | ||||
|         iset = IntervalSet(a) + IntervalSet(b) | ||||
|         eq_(iset, IntervalSet([a, b])) | ||||
|  | ||||
|         iset = IntervalSet(b) + a | ||||
|         eq_(iset, IntervalSet([a, b])) | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import nose | ||||
| from nose.tools import * | ||||
| @@ -9,8 +9,6 @@ import time | ||||
|  | ||||
| from test_helpers import * | ||||
|  | ||||
| import nilmdb.iteratorizer | ||||
|  | ||||
| def func_with_callback(a, b, callback): | ||||
|     callback(a) | ||||
|     callback(b) | ||||
| @@ -27,7 +25,8 @@ class TestIteratorizer(object): | ||||
|         eq_(self.result, "123") | ||||
|  | ||||
|         # Now make it an iterator | ||||
|         it = nilmdb.iteratorizer.Iteratorizer(lambda x: | ||||
|         it = nilmdb.utils.Iteratorizer( | ||||
|             lambda x: | ||||
|             func_with_callback(1, 2, x)) | ||||
|         result = "" | ||||
|         for i in it: | ||||
| @@ -35,7 +34,8 @@ class TestIteratorizer(object): | ||||
|         eq_(result, "123") | ||||
|  | ||||
|         # Make sure things work when an exception occurs | ||||
|         it = nilmdb.iteratorizer.Iteratorizer(lambda x: | ||||
|         it = nilmdb.utils.Iteratorizer( | ||||
|             lambda x: | ||||
|             func_with_callback(1, "a", x)) | ||||
|         result = "" | ||||
|         with assert_raises(TypeError) as e: | ||||
| @@ -48,7 +48,8 @@ class TestIteratorizer(object): | ||||
|         # itself.  This doesn't have a particular result in the test, | ||||
|         # but gains coverage. | ||||
|         def foo(): | ||||
|             it = nilmdb.iteratorizer.Iteratorizer(lambda x: | ||||
|             it = nilmdb.utils.Iteratorizer( | ||||
|                 lambda x: | ||||
|                 func_with_callback(1, 2, x)) | ||||
|             it.next() | ||||
|         foo() | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import nilmdb | ||||
|  | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
| @@ -28,9 +28,13 @@ class TestLayouts(object): | ||||
|     # Some nilmdb.layout tests.  Not complete, just fills in missing | ||||
|     # coverage. | ||||
|     def test_layouts(self): | ||||
|         x = nilmdb.layout.get_named("PrepData").description() | ||||
|         y = nilmdb.layout.get_named("float32_8").description() | ||||
|         eq_(repr(x), repr(y)) | ||||
|         x = nilmdb.layout.get_named("PrepData") | ||||
|         y = nilmdb.layout.get_named("float32_8") | ||||
|         eq_(x.count, y.count) | ||||
|         eq_(x.datatype, y.datatype) | ||||
|         y = nilmdb.layout.get_named("float32_7") | ||||
|         ne_(x.count, y.count) | ||||
|         eq_(x.datatype, y.datatype) | ||||
|  | ||||
|     def test_parsing(self): | ||||
|         self.real_t_parsing("PrepData", "RawData", "RawNotchedData") | ||||
|   | ||||
							
								
								
									
										53
									
								
								tests/test_lrucache.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								tests/test_lrucache.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| import nilmdb | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import nose | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
| import threading | ||||
| import time | ||||
|  | ||||
| from test_helpers import * | ||||
|  | ||||
| @nilmdb.utils.lru_cache(size = 3) | ||||
| def foo1(n): | ||||
|     return n | ||||
|  | ||||
| @nilmdb.utils.lru_cache(size = 5) | ||||
| def foo2(n): | ||||
|     return n | ||||
|  | ||||
| def foo3d(n): | ||||
|     foo3d.destructed.append(n) | ||||
| foo3d.destructed = [] | ||||
| @nilmdb.utils.lru_cache(size = 3, onremove = foo3d) | ||||
| def foo3(n): | ||||
|     return n | ||||
|  | ||||
| class TestLRUCache(object): | ||||
|     def test(self): | ||||
|         [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo1.cache_hits, foo1.cache_misses), (6, 3)) | ||||
|         [ foo1(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo1.cache_hits, foo1.cache_misses), (15, 3)) | ||||
|         [ foo1(n) for n in [ 4, 2, 1, 1, 4 ] ] | ||||
|         eq_((foo1.cache_hits, foo1.cache_misses), (18, 5)) | ||||
|  | ||||
|         [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo2.cache_hits, foo2.cache_misses), (6, 3)) | ||||
|         [ foo2(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo2.cache_hits, foo2.cache_misses), (15, 3)) | ||||
|         [ foo2(n) for n in [ 4, 2, 1, 1, 4 ] ] | ||||
|         eq_((foo2.cache_hits, foo2.cache_misses), (19, 4)) | ||||
|  | ||||
|         [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo3.cache_hits, foo3.cache_misses), (6, 3)) | ||||
|         [ foo3(n) for n in [ 1, 2, 3, 1, 2, 3, 1, 2, 3 ] ] | ||||
|         eq_((foo3.cache_hits, foo3.cache_misses), (15, 3)) | ||||
|         [ foo3(n) for n in [ 4, 2, 1, 1, 4 ] ] | ||||
|         eq_((foo3.cache_hits, foo3.cache_misses), (18, 5)) | ||||
|         eq_(foo3d.destructed, [1, 3]) | ||||
|         foo3.cache_remove(1) | ||||
|         eq_(foo3d.destructed, [1, 3, 1]) | ||||
|         foo3.cache_remove_all() | ||||
|         eq_(foo3d.destructed, [1, 3, 1, 2, 4 ]) | ||||
							
								
								
									
										59
									
								
								tests/test_mustclose.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								tests/test_mustclose.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,59 @@ | ||||
| import nilmdb | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import nose | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
|  | ||||
| from test_helpers import * | ||||
|  | ||||
| import sys | ||||
| import cStringIO | ||||
|  | ||||
| err = cStringIO.StringIO() | ||||
|  | ||||
| @nilmdb.utils.must_close(errorfile = err) | ||||
| class Foo: | ||||
|     def __init__(self): | ||||
|         fprintf(err, "Init\n") | ||||
|  | ||||
|     def __del__(self): | ||||
|         fprintf(err, "Deleting\n") | ||||
|  | ||||
|     def close(self): | ||||
|         fprintf(err, "Closing\n") | ||||
|  | ||||
| @nilmdb.utils.must_close(errorfile = err) | ||||
| class Bar: | ||||
|     pass | ||||
|  | ||||
| class TestMustClose(object): | ||||
|     def test(self): | ||||
|  | ||||
|         # Note: this test might fail if the Python interpreter doesn't | ||||
|         # garbage collect the object (and call its __del__ function) | ||||
|         # right after a "del x". | ||||
|  | ||||
|         x = Foo() | ||||
|         del x | ||||
|         eq_(err.getvalue(), | ||||
|             "Init\n" | ||||
|             "error: Foo.close() wasn't called!\n" | ||||
|             "Deleting\n") | ||||
|  | ||||
|         err.truncate(0) | ||||
|  | ||||
|         y = Foo() | ||||
|         y.close() | ||||
|         del y | ||||
|         eq_(err.getvalue(), | ||||
|             "Init\n" | ||||
|             "Closing\n" | ||||
|             "Deleting\n") | ||||
|  | ||||
|         err.truncate(0) | ||||
|  | ||||
|         z = Bar() | ||||
|         z.close() | ||||
|         del z | ||||
|         eq_(err.getvalue(), "") | ||||
| @@ -14,6 +14,7 @@ import urllib2 | ||||
| from urllib2 import urlopen, HTTPError | ||||
| import Queue | ||||
| import cStringIO | ||||
| import time | ||||
|  | ||||
| testdb = "tests/testdb" | ||||
|  | ||||
| @@ -39,8 +40,8 @@ class Test00Nilmdb(object):  # named 00 so it runs first | ||||
|         capture = cStringIO.StringIO() | ||||
|         old = sys.stdout | ||||
|         sys.stdout = capture | ||||
|         with nilmdb.Timer("test"): | ||||
|             nilmdb.timer.time.sleep(0.01) | ||||
|         with nilmdb.utils.Timer("test"): | ||||
|             time.sleep(0.01) | ||||
|         sys.stdout = old | ||||
|         in_("test: ", capture.getvalue()) | ||||
|  | ||||
| @@ -69,12 +70,14 @@ class Test00Nilmdb(object):  # named 00 so it runs first | ||||
|         eq_(db.stream_list(layout="RawData"), [ ["/newton/raw", "RawData"] ]) | ||||
|         eq_(db.stream_list(path="/newton/raw"), [ ["/newton/raw", "RawData"] ]) | ||||
|  | ||||
|         # Verify that columns were made right | ||||
|         eq_(len(db.h5file.getNode("/newton/prep").cols), 9) | ||||
|         eq_(len(db.h5file.getNode("/newton/raw").cols), 7) | ||||
|         eq_(len(db.h5file.getNode("/newton/zzz/rawnotch").cols), 10) | ||||
|         assert(not db.h5file.getNode("/newton/prep").colindexed["timestamp"]) | ||||
|         assert(not db.h5file.getNode("/newton/prep").colindexed["c1"]) | ||||
|         # Verify that columns were made right (pytables specific) | ||||
|         if "h5file" in db.data.__dict__: | ||||
|             h5file = db.data.h5file | ||||
|             eq_(len(h5file.getNode("/newton/prep").cols), 9) | ||||
|             eq_(len(h5file.getNode("/newton/raw").cols), 7) | ||||
|             eq_(len(h5file.getNode("/newton/zzz/rawnotch").cols), 10) | ||||
|             assert(not h5file.getNode("/newton/prep").colindexed["timestamp"]) | ||||
|             assert(not h5file.getNode("/newton/prep").colindexed["c1"]) | ||||
|  | ||||
|         # Set / get metadata | ||||
|         eq_(db.stream_get_metadata("/newton/prep"), {}) | ||||
| @@ -196,6 +199,6 @@ class TestServer(object): | ||||
|         # GET instead of POST (no body) | ||||
|         # (actual POST test is done by client code) | ||||
|         with assert_raises(HTTPError) as e: | ||||
|             getjson("/stream/insert?path=/newton/prep") | ||||
|             getjson("/stream/insert?path=/newton/prep&start=0&end=0") | ||||
|         eq_(e.exception.code, 400) | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| from nose.tools import * | ||||
| from nose.tools import assert_raises | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import nose | ||||
| from nose.tools import * | ||||
| @@ -57,7 +57,7 @@ class TestUnserialized(Base): | ||||
| class TestSerialized(Base): | ||||
|     def setUp(self): | ||||
|         self.realfoo = Foo() | ||||
|         self.foo = nilmdb.serializer.WrapObject(self.realfoo) | ||||
|         self.foo = nilmdb.utils.Serializer(self.realfoo) | ||||
|  | ||||
|     def tearDown(self): | ||||
|         del self.foo | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import nilmdb | ||||
| from nilmdb.printf import * | ||||
| from nilmdb.utils.printf import * | ||||
|  | ||||
| import datetime_tz | ||||
|  | ||||
|   | ||||
							
								
								
									
										25
									
								
								timeit.sh
									
									
									
									
									
								
							
							
						
						
									
										25
									
								
								timeit.sh
									
									
									
									
									
								
							| @@ -1,21 +1,22 @@ | ||||
| ./nilmtool.py destroy /bpnilm/2/raw | ||||
| ./nilmtool.py create /bpnilm/2/raw RawData | ||||
|  | ||||
| if true; then | ||||
| if false; then | ||||
|     time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-110000 -r 8000 /bpnilm/2/raw | ||||
|     time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s 20110513-120001 -r 8000 /bpnilm/2/raw | ||||
| else | ||||
|     for i in $(seq 2000 2050); do | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 /bpnilm/2/raw | ||||
|     # 170 hours, about 98 gigs uncompressed: | ||||
|     for i in $(seq 2000 2016); do | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-010001 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-020002 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-030003 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-040004 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-050005 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-060006 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-070007 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-080008 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-090009 -r 8000 /bpnilm/2/raw | ||||
| 	time zcat /home/jim/bpnilm-data/snapshot-1-20110513-110002.raw.gz | ./nilmtool.py insert -s ${i}0101-100010 -r 8000 /bpnilm/2/raw | ||||
|     done | ||||
| fi | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user