@@ -1,605 +0,0 @@ | |||
// The RedBlackEntry class is an Abstract Base Class. This means that no | |||
// instance of the RedBlackEntry class can exist. Only classes which | |||
// inherit from the RedBlackEntry class can exist. Furthermore any class | |||
// which inherits from the RedBlackEntry class must define the member | |||
// function GetKey(). The Print() member function does not have to | |||
// be defined because a default definition exists. | |||
// | |||
// The GetKey() function should return an integer key for that entry. | |||
// The key for an entry should never change otherwise bad things might occur. | |||
class RedBlackEntry { | |||
public: | |||
RedBlackEntry(); | |||
virtual ~RedBlackEntry(); | |||
virtual int GetKey() const = 0; | |||
virtual void Print() const; | |||
}; | |||
class RedBlackTreeNode { | |||
friend class RedBlackTree; | |||
public: | |||
void Print(RedBlackTreeNode*, | |||
RedBlackTreeNode*) const; | |||
RedBlackTreeNode(); | |||
RedBlackTreeNode(RedBlackEntry *); | |||
RedBlackEntry * GetEntry() const; | |||
~RedBlackTreeNode(); | |||
protected: | |||
RedBlackEntry * storedEntry; | |||
int key; | |||
int red; /* if red=0 then the node is black */ | |||
RedBlackTreeNode * left; | |||
RedBlackTreeNode * right; | |||
RedBlackTreeNode * parent; | |||
}; | |||
class RedBlackTree { | |||
public: | |||
RedBlackTree(); | |||
~RedBlackTree(); | |||
void Print() const; | |||
RedBlackEntry * DeleteNode(RedBlackTreeNode *); | |||
RedBlackTreeNode * Insert(RedBlackEntry *); | |||
RedBlackTreeNode * GetPredecessorOf(RedBlackTreeNode *) const; | |||
RedBlackTreeNode * GetSuccessorOf(RedBlackTreeNode *) const; | |||
RedBlackTreeNode * Search(int key); | |||
TemplateStack<RedBlackTreeNode *> * Enumerate(int low, int high) ; | |||
void CheckAssumptions() const; | |||
protected: | |||
/* A sentinel is used for root and for nil. These sentinels are */ | |||
/* created when RedBlackTreeCreate is caled. root->left should always */ | |||
/* point to the node which is the root of the tree. nil points to a */ | |||
/* node which should always be black but has aribtrary children and */ | |||
/* parent and no key or info. The point of using these sentinels is so */ | |||
/* that the root and nil nodes do not require special cases in the code */ | |||
RedBlackTreeNode * root; | |||
RedBlackTreeNode * nil; | |||
void LeftRotate(RedBlackTreeNode *); | |||
void RightRotate(RedBlackTreeNode *); | |||
void TreeInsertHelp(RedBlackTreeNode *); | |||
void TreePrintHelper(RedBlackTreeNode *) const; | |||
void FixUpMaxHigh(RedBlackTreeNode *); | |||
void DeleteFixUp(RedBlackTreeNode *); | |||
}; | |||
const int MIN_INT=-MAX_INT; | |||
RedBlackTreeNode::RedBlackTreeNode(){ | |||
}; | |||
RedBlackTreeNode::RedBlackTreeNode(RedBlackEntry * newEntry) | |||
: storedEntry (newEntry) , key(newEntry->GetKey()) { | |||
}; | |||
RedBlackTreeNode::~RedBlackTreeNode(){ | |||
}; | |||
RedBlackEntry * RedBlackTreeNode::GetEntry() const {return storedEntry;} | |||
RedBlackEntry::RedBlackEntry(){ | |||
}; | |||
RedBlackEntry::~RedBlackEntry(){ | |||
}; | |||
void RedBlackEntry::Print() const { | |||
cout << "No Print Method defined. Using Default: " << GetKey() << endl; | |||
} | |||
RedBlackTree::RedBlackTree() | |||
{ | |||
nil = new RedBlackTreeNode; | |||
nil->left = nil->right = nil->parent = nil; | |||
nil->red = 0; | |||
nil->key = MIN_INT; | |||
nil->storedEntry = NULL; | |||
root = new RedBlackTreeNode; | |||
root->parent = root->left = root->right = nil; | |||
root->key = MAX_INT; | |||
root->red=0; | |||
root->storedEntry = NULL; | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: LeftRotate */ | |||
/**/ | |||
/* INPUTS: the node to rotate on */ | |||
/**/ | |||
/* OUTPUT: None */ | |||
/**/ | |||
/* Modifies Input: this, x */ | |||
/**/ | |||
/* EFFECTS: Rotates as described in _Introduction_To_Algorithms by */ | |||
/* Cormen, Leiserson, Rivest (Chapter 14). Basically this */ | |||
/* makes the parent of x be to the left of x, x the parent of */ | |||
/* its parent before the rotation and fixes other pointers */ | |||
/* accordingly. */ | |||
/***********************************************************************/ | |||
void RedBlackTree::LeftRotate(RedBlackTreeNode* x) { | |||
RedBlackTreeNode* y; | |||
/* I originally wrote this function to use the sentinel for */ | |||
/* nil to avoid checking for nil. However this introduces a */ | |||
/* very subtle bug because sometimes this function modifies */ | |||
/* the parent pointer of nil. This can be a problem if a */ | |||
/* function which calls LeftRotate also uses the nil sentinel */ | |||
/* and expects the nil sentinel's parent pointer to be unchanged */ | |||
/* after calling this function. For example, when DeleteFixUP */ | |||
/* calls LeftRotate it expects the parent pointer of nil to be */ | |||
/* unchanged. */ | |||
y=x->right; | |||
x->right=y->left; | |||
if (y->left != nil) y->left->parent=x; /* used to use sentinel here */ | |||
/* and do an unconditional assignment instead of testing for nil */ | |||
y->parent=x->parent; | |||
/* instead of checking if x->parent is the root as in the book, we */ | |||
/* count on the root sentinel to implicitly take care of this case */ | |||
if( x == x->parent->left) { | |||
x->parent->left=y; | |||
} else { | |||
x->parent->right=y; | |||
} | |||
y->left=x; | |||
x->parent=y; | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: RighttRotate */ | |||
/**/ | |||
/* INPUTS: node to rotate on */ | |||
/**/ | |||
/* OUTPUT: None */ | |||
/**/ | |||
/* Modifies Input?: this, y */ | |||
/**/ | |||
/* EFFECTS: Rotates as described in _Introduction_To_Algorithms by */ | |||
/* Cormen, Leiserson, Rivest (Chapter 14). Basically this */ | |||
/* makes the parent of x be to the left of x, x the parent of */ | |||
/* its parent before the rotation and fixes other pointers */ | |||
/* accordingly. */ | |||
/***********************************************************************/ | |||
void RedBlackTree::RightRotate(RedBlackTreeNode* y) { | |||
RedBlackTreeNode* x; | |||
/* I originally wrote this function to use the sentinel for */ | |||
/* nil to avoid checking for nil. However this introduces a */ | |||
/* very subtle bug because sometimes this function modifies */ | |||
/* the parent pointer of nil. This can be a problem if a */ | |||
/* function which calls LeftRotate also uses the nil sentinel */ | |||
/* and expects the nil sentinel's parent pointer to be unchanged */ | |||
/* after calling this function. For example, when DeleteFixUP */ | |||
/* calls LeftRotate it expects the parent pointer of nil to be */ | |||
/* unchanged. */ | |||
x=y->left; | |||
y->left=x->right; | |||
if (nil != x->right) x->right->parent=y; /*used to use sentinel here */ | |||
/* and do an unconditional assignment instead of testing for nil */ | |||
/* instead of checking if x->parent is the root as in the book, we */ | |||
/* count on the root sentinel to implicitly take care of this case */ | |||
x->parent=y->parent; | |||
if( y == y->parent->left) { | |||
y->parent->left=x; | |||
} else { | |||
y->parent->right=x; | |||
} | |||
x->right=y; | |||
y->parent=x; | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: TreeInsertHelp */ | |||
/**/ | |||
/* INPUTS: z is the node to insert */ | |||
/**/ | |||
/* OUTPUT: none */ | |||
/**/ | |||
/* Modifies Input: this, z */ | |||
/**/ | |||
/* EFFECTS: Inserts z into the tree as if it were a regular binary tree */ | |||
/* using the algorithm described in _Introduction_To_Algorithms_ */ | |||
/* by Cormen et al. This funciton is only intended to be called */ | |||
/* by the Insert function and not by the user */ | |||
/***********************************************************************/ | |||
void RedBlackTree::TreeInsertHelp(RedBlackTreeNode* z) { | |||
/* This function should only be called by RedBlackTree::Insert */ | |||
RedBlackTreeNode* x; | |||
RedBlackTreeNode* y; | |||
z->left=z->right=nil; | |||
y=root; | |||
x=root->left; | |||
while( x != nil) { | |||
y=x; | |||
if ( x->key > z->key) { | |||
x=x->left; | |||
} else { /* x->key <= z->key */ | |||
x=x->right; | |||
} | |||
} | |||
z->parent=y; | |||
if ( (y == root) || | |||
(y->key > z->key) ) { | |||
y->left=z; | |||
} else { | |||
y->right=z; | |||
} | |||
} | |||
/* Before calling InsertNode the node x should have its key set */ | |||
/***********************************************************************/ | |||
/* FUNCTION: InsertNode */ | |||
/**/ | |||
/* INPUTS: newEntry is the entry to insert*/ | |||
/**/ | |||
/* OUTPUT: This function returns a pointer to the newly inserted node */ | |||
/* which is guarunteed to be valid until this node is deleted. */ | |||
/* What this means is if another data structure stores this */ | |||
/* pointer then the tree does not need to be searched when this */ | |||
/* is to be deleted. */ | |||
/**/ | |||
/* Modifies Input: tree */ | |||
/**/ | |||
/* EFFECTS: Creates a node node which contains the appropriate key and */ | |||
/* info pointers and inserts it into the tree. */ | |||
/***********************************************************************/ | |||
/* jim */ | |||
RedBlackTreeNode * RedBlackTree::Insert(RedBlackEntry * newEntry) | |||
{ | |||
RedBlackTreeNode * y; | |||
RedBlackTreeNode * x; | |||
RedBlackTreeNode * newNode; | |||
x = new RedBlackTreeNode(newEntry); | |||
TreeInsertHelp(x); | |||
newNode = x; | |||
x->red=1; | |||
while(x->parent->red) { /* use sentinel instead of checking for root */ | |||
if (x->parent == x->parent->parent->left) { | |||
y=x->parent->parent->right; | |||
if (y->red) { | |||
x->parent->red=0; | |||
y->red=0; | |||
x->parent->parent->red=1; | |||
x=x->parent->parent; | |||
} else { | |||
if (x == x->parent->right) { | |||
x=x->parent; | |||
LeftRotate(x); | |||
} | |||
x->parent->red=0; | |||
x->parent->parent->red=1; | |||
RightRotate(x->parent->parent); | |||
} | |||
} else { /* case for x->parent == x->parent->parent->right */ | |||
/* this part is just like the section above with */ | |||
/* left and right interchanged */ | |||
y=x->parent->parent->left; | |||
if (y->red) { | |||
x->parent->red=0; | |||
y->red=0; | |||
x->parent->parent->red=1; | |||
x=x->parent->parent; | |||
} else { | |||
if (x == x->parent->left) { | |||
x=x->parent; | |||
RightRotate(x); | |||
} | |||
x->parent->red=0; | |||
x->parent->parent->red=1; | |||
LeftRotate(x->parent->parent); | |||
} | |||
} | |||
} | |||
root->left->red=0; | |||
return(newNode); | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: GetSuccessorOf */ | |||
/**/ | |||
/* INPUTS: x is the node we want the succesor of */ | |||
/**/ | |||
/* OUTPUT: This function returns the successor of x or NULL if no */ | |||
/* successor exists. */ | |||
/**/ | |||
/* Modifies Input: none */ | |||
/**/ | |||
/* Note: uses the algorithm in _Introduction_To_Algorithms_ */ | |||
/***********************************************************************/ | |||
RedBlackTreeNode * RedBlackTree::GetSuccessorOf(RedBlackTreeNode * x) const | |||
{ | |||
RedBlackTreeNode* y; | |||
if (nil != (y = x->right)) { /* assignment to y is intentional */ | |||
while(y->left != nil) { /* returns the minium of the right subtree of x */ | |||
y=y->left; | |||
} | |||
return(y); | |||
} else { | |||
y=x->parent; | |||
while(x == y->right) { /* sentinel used instead of checking for nil */ | |||
x=y; | |||
y=y->parent; | |||
} | |||
if (y == root) return(nil); | |||
return(y); | |||
} | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: GetPredecessorOf */ | |||
/**/ | |||
/* INPUTS: x is the node to get predecessor of */ | |||
/**/ | |||
/* OUTPUT: This function returns the predecessor of x or NULL if no */ | |||
/* predecessor exists. */ | |||
/**/ | |||
/* Modifies Input: none */ | |||
/**/ | |||
/* Note: uses the algorithm in _Introduction_To_Algorithms_ */ | |||
/***********************************************************************/ | |||
RedBlackTreeNode * RedBlackTree::GetPredecessorOf(RedBlackTreeNode * x) const { | |||
RedBlackTreeNode* y; | |||
if (nil != (y = x->left)) { /* assignment to y is intentional */ | |||
while(y->right != nil) { /* returns the maximum of the left subtree of x */ | |||
y=y->right; | |||
} | |||
return(y); | |||
} else { | |||
y=x->parent; | |||
while(x == y->left) { | |||
if (y == root) return(nil); | |||
x=y; | |||
y=y->parent; | |||
} | |||
return(y); | |||
} | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: Print */ | |||
/**/ | |||
/* INPUTS: none */ | |||
/**/ | |||
/* OUTPUT: none */ | |||
/**/ | |||
/* EFFECTS: This function recursively prints the nodes of the tree */ | |||
/* inorder. */ | |||
/**/ | |||
/* Modifies Input: none */ | |||
/**/ | |||
/* Note: This function should only be called from ITTreePrint */ | |||
/***********************************************************************/ | |||
void RedBlackTreeNode::Print(RedBlackTreeNode * nil, | |||
RedBlackTreeNode * root) const { | |||
storedEntry->Print(); | |||
printf(", key=%i ",key); | |||
printf(" l->key="); | |||
if( left == nil) printf("NULL"); else printf("%i",left->key); | |||
printf(" r->key="); | |||
if( right == nil) printf("NULL"); else printf("%i",right->key); | |||
printf(" p->key="); | |||
if( parent == root) printf("NULL"); else printf("%i",parent->key); | |||
printf(" red=%i\n",red); | |||
} | |||
void RedBlackTree::TreePrintHelper( RedBlackTreeNode* x) const { | |||
if (x != nil) { | |||
TreePrintHelper(x->left); | |||
x->Print(nil,root); | |||
TreePrintHelper(x->right); | |||
} | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: Print */ | |||
/**/ | |||
/* INPUTS: none */ | |||
/**/ | |||
/* OUTPUT: none */ | |||
/**/ | |||
/* EFFECT: This function recursively prints the nodes of the tree */ | |||
/* inorder. */ | |||
/**/ | |||
/* Modifies Input: none */ | |||
/**/ | |||
/***********************************************************************/ | |||
void RedBlackTree::Print() const { | |||
TreePrintHelper(root->left); | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: DeleteFixUp */ | |||
/**/ | |||
/* INPUTS: x is the child of the spliced */ | |||
/* out node in DeleteNode. */ | |||
/**/ | |||
/* OUTPUT: none */ | |||
/**/ | |||
/* EFFECT: Performs rotations and changes colors to restore red-black */ | |||
/* properties after a node is deleted */ | |||
/**/ | |||
/* Modifies Input: this, x */ | |||
/**/ | |||
/* The algorithm from this function is from _Introduction_To_Algorithms_ */ | |||
/***********************************************************************/ | |||
void RedBlackTree::DeleteFixUp(RedBlackTreeNode* x) { | |||
RedBlackTreeNode * w; | |||
RedBlackTreeNode * rootLeft = root->left; | |||
while( (!x->red) && (rootLeft != x)) { | |||
if (x == x->parent->left) { | |||
// | |||
w=x->parent->right; | |||
if (w->red) { | |||
w->red=0; | |||
x->parent->red=1; | |||
LeftRotate(x->parent); | |||
w=x->parent->right; | |||
} | |||
if ( (!w->right->red) && (!w->left->red) ) { | |||
w->red=1; | |||
x=x->parent; | |||
} else { | |||
if (!w->right->red) { | |||
w->left->red=0; | |||
w->red=1; | |||
RightRotate(w); | |||
w=x->parent->right; | |||
} | |||
w->red=x->parent->red; | |||
x->parent->red=0; | |||
w->right->red=0; | |||
LeftRotate(x->parent); | |||
x=rootLeft; /* this is to exit while loop */ | |||
} | |||
// | |||
} else { /* the code below is has left and right switched from above */ | |||
w=x->parent->left; | |||
if (w->red) { | |||
w->red=0; | |||
x->parent->red=1; | |||
RightRotate(x->parent); | |||
w=x->parent->left; | |||
} | |||
if ( (!w->right->red) && (!w->left->red) ) { | |||
w->red=1; | |||
x=x->parent; | |||
} else { | |||
if (!w->left->red) { | |||
w->right->red=0; | |||
w->red=1; | |||
LeftRotate(w); | |||
w=x->parent->left; | |||
} | |||
w->red=x->parent->red; | |||
x->parent->red=0; | |||
w->left->red=0; | |||
RightRotate(x->parent); | |||
x=rootLeft; /* this is to exit while loop */ | |||
} | |||
} | |||
} | |||
x->red=0; | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: DeleteNode */ | |||
/**/ | |||
/* INPUTS: tree is the tree to delete node z from */ | |||
/**/ | |||
/* OUTPUT: returns the RedBlackEntry stored at deleted node */ | |||
/**/ | |||
/* EFFECT: Deletes z from tree and but don't call destructor */ | |||
/**/ | |||
/* Modifies Input: z */ | |||
/**/ | |||
/* The algorithm from this function is from _Introduction_To_Algorithms_ */ | |||
/***********************************************************************/ | |||
RedBlackEntry * RedBlackTree::DeleteNode(RedBlackTreeNode * z){ | |||
RedBlackTreeNode* y; | |||
RedBlackTreeNode* x; | |||
RedBlackEntry * returnValue = z->storedEntry; | |||
y= ((z->left == nil) || (z->right == nil)) ? z : GetSuccessorOf(z); | |||
x= (y->left == nil) ? y->right : y->left; | |||
if (root == (x->parent = y->parent)) { /* assignment of y->p to x->p is intentional */ | |||
root->left=x; | |||
} else { | |||
if (y == y->parent->left) { | |||
y->parent->left=x; | |||
} else { | |||
y->parent->right=x; | |||
} | |||
} | |||
if (y != z) { /* y should not be nil in this case */ | |||
/* y is the node to splice out and x is its child */ | |||
y->left=z->left; | |||
y->right=z->right; | |||
y->parent=z->parent; | |||
z->left->parent=z->right->parent=y; | |||
if (z == z->parent->left) { | |||
z->parent->left=y; | |||
} else { | |||
z->parent->right=y; | |||
} | |||
if (!(y->red)) { | |||
y->red = z->red; | |||
DeleteFixUp(x); | |||
} else | |||
y->red = z->red; | |||
delete z; | |||
} else { | |||
if (!(y->red)) DeleteFixUp(x); | |||
delete y; | |||
} | |||
return returnValue; | |||
} | |||
/***********************************************************************/ | |||
/* FUNCTION: Enumerate */ | |||
/**/ | |||
/* INPUTS: tree is the tree to look for keys between [low,high] */ | |||
/**/ | |||
/* OUTPUT: stack containing pointers to the nodes between [low,high] */ | |||
/**/ | |||
/* Modifies Input: none */ | |||
/**/ | |||
/* EFFECT: Returns a stack containing pointers to nodes containing */ | |||
/* keys which in [low,high]/ */ | |||
/**/ | |||
/***********************************************************************/ | |||
TemplateStack<RedBlackTreeNode *> * RedBlackTree::Enumerate(int low, | |||
int high) { | |||
TemplateStack<RedBlackTreeNode *> * enumResultStack = | |||
new TemplateStack<RedBlackTreeNode *>(4); | |||
RedBlackTreeNode* x=root->left; | |||
RedBlackTreeNode* lastBest=NULL; | |||
while(nil != x) { | |||
if ( x->key > high ) { | |||
x=x->left; | |||
} else { | |||
lastBest=x; | |||
x=x->right; | |||
} | |||
} | |||
while ( (lastBest) && (low <= lastBest->key) ) { | |||
enumResultStack->Push(lastBest); | |||
lastBest=GetPredecessorOf(lastBest); | |||
} | |||
return(enumResultStack); | |||
} |
@@ -1,495 +0,0 @@ | |||
# cython: profile=False | |||
# This is from bx-python 554:07aca5a9f6fc (BSD licensed), modified to | |||
# store interval ranges as doubles rather than 32-bit integers. | |||
""" | |||
Data structure for performing intersect queries on a set of intervals which | |||
preserves all information about the intervals (unlike bitset projection methods). | |||
:Authors: James Taylor (james@jamestaylor.org), | |||
Ian Schenk (ian.schenck@gmail.com), | |||
Brent Pedersen (bpederse@gmail.com) | |||
""" | |||
# Historical note: | |||
# This module original contained an implementation based on sorted endpoints | |||
# and a binary search, using an idea from Scott Schwartz and Piotr Berman. | |||
# Later an interval tree implementation was implemented by Ian for Galaxy's | |||
# join tool (see `bx.intervals.operations.quicksect.py`). This was then | |||
# converted to Cython by Brent, who also added support for | |||
# upstream/downstream/neighbor queries. This was modified by James to | |||
# handle half-open intervals strictly, to maintain sort order, and to | |||
# implement the same interface as the original Intersecter. | |||
#cython: cdivision=True | |||
import operator | |||
cdef extern from "stdlib.h": | |||
int ceil(float f) | |||
float log(float f) | |||
int RAND_MAX | |||
int rand() | |||
int strlen(char *) | |||
int iabs(int) | |||
cdef inline double dmax2(double a, double b): | |||
if b > a: return b | |||
return a | |||
cdef inline double dmax3(double a, double b, double c): | |||
if b > a: | |||
if c > b: | |||
return c | |||
return b | |||
if a > c: | |||
return a | |||
return c | |||
cdef inline double dmin3(double a, double b, double c): | |||
if b < a: | |||
if c < b: | |||
return c | |||
return b | |||
if a < c: | |||
return a | |||
return c | |||
cdef inline double dmin2(double a, double b): | |||
if b < a: return b | |||
return a | |||
cdef float nlog = -1.0 / log(0.5) | |||
cdef class IntervalNode: | |||
""" | |||
A single node of an `IntervalTree`. | |||
NOTE: Unless you really know what you are doing, you probably should us | |||
`IntervalTree` rather than using this directly. | |||
""" | |||
cdef float priority | |||
cdef public object interval | |||
cdef public double start, end | |||
cdef double minend, maxend, minstart | |||
cdef IntervalNode cleft, cright, croot | |||
property left_node: | |||
def __get__(self): | |||
return self.cleft if self.cleft is not EmptyNode else None | |||
property right_node: | |||
def __get__(self): | |||
return self.cright if self.cright is not EmptyNode else None | |||
property root_node: | |||
def __get__(self): | |||
return self.croot if self.croot is not EmptyNode else None | |||
def __repr__(self): | |||
return "IntervalNode(%g, %g)" % (self.start, self.end) | |||
def __cinit__(IntervalNode self, double start, double end, object interval): | |||
# Python lacks the binomial distribution, so we convert a | |||
# uniform into a binomial because it naturally scales with | |||
# tree size. Also, python's uniform is perfect since the | |||
# upper limit is not inclusive, which gives us undefined here. | |||
self.priority = ceil(nlog * log(-1.0/(1.0 * rand()/RAND_MAX - 1))) | |||
self.start = start | |||
self.end = end | |||
self.interval = interval | |||
self.maxend = end | |||
self.minstart = start | |||
self.minend = end | |||
self.cleft = EmptyNode | |||
self.cright = EmptyNode | |||
self.croot = EmptyNode | |||
cpdef IntervalNode insert(IntervalNode self, double start, double end, object interval): | |||
""" | |||
Insert a new IntervalNode into the tree of which this node is | |||
currently the root. The return value is the new root of the tree (which | |||
may or may not be this node!) | |||
""" | |||
cdef IntervalNode croot = self | |||
# If starts are the same, decide which to add interval to based on | |||
# end, thus maintaining sortedness relative to start/end | |||
cdef double decision_endpoint = start | |||
if start == self.start: | |||
decision_endpoint = end | |||
if decision_endpoint > self.start: | |||
# insert to cright tree | |||
if self.cright is not EmptyNode: | |||
self.cright = self.cright.insert( start, end, interval ) | |||
else: | |||
self.cright = IntervalNode( start, end, interval ) | |||
# rebalance tree | |||
if self.priority < self.cright.priority: | |||
croot = self.rotate_left() | |||
else: | |||
# insert to cleft tree | |||
if self.cleft is not EmptyNode: | |||
self.cleft = self.cleft.insert( start, end, interval) | |||
else: | |||
self.cleft = IntervalNode( start, end, interval) | |||
# rebalance tree | |||
if self.priority < self.cleft.priority: | |||
croot = self.rotate_right() | |||
croot.set_ends() | |||
self.cleft.croot = croot | |||
self.cright.croot = croot | |||
return croot | |||
cdef IntervalNode rotate_right(IntervalNode self): | |||
cdef IntervalNode croot = self.cleft | |||
self.cleft = self.cleft.cright | |||
croot.cright = self | |||
self.set_ends() | |||
return croot | |||
cdef IntervalNode rotate_left(IntervalNode self): | |||
cdef IntervalNode croot = self.cright | |||
self.cright = self.cright.cleft | |||
croot.cleft = self | |||
self.set_ends() | |||
return croot | |||
cdef inline void set_ends(IntervalNode self): | |||
if self.cright is not EmptyNode and self.cleft is not EmptyNode: | |||
self.maxend = dmax3(self.end, self.cright.maxend, self.cleft.maxend) | |||
self.minend = dmin3(self.end, self.cright.minend, self.cleft.minend) | |||
self.minstart = dmin3(self.start, self.cright.minstart, self.cleft.minstart) | |||
elif self.cright is not EmptyNode: | |||
self.maxend = dmax2(self.end, self.cright.maxend) | |||
self.minend = dmin2(self.end, self.cright.minend) | |||
self.minstart = dmin2(self.start, self.cright.minstart) | |||
elif self.cleft is not EmptyNode: | |||
self.maxend = dmax2(self.end, self.cleft.maxend) | |||
self.minend = dmin2(self.end, self.cleft.minend) | |||
self.minstart = dmin2(self.start, self.cleft.minstart) | |||
def intersect( self, double start, double end, sort=True ): | |||
""" | |||
given a start and a end, return a list of features | |||
falling within that range | |||
""" | |||
cdef list results = [] | |||
self._intersect( start, end, results ) | |||
if sort: | |||
results = sorted(results) | |||
return results | |||
find = intersect | |||
cdef void _intersect( IntervalNode self, double start, double end, list results): | |||
# Left subtree | |||
if self.cleft is not EmptyNode and self.cleft.maxend > start: | |||
self.cleft._intersect( start, end, results ) | |||
# This interval | |||
if ( self.end > start ) and ( self.start < end ): | |||
results.append( self.interval ) | |||
# Right subtree | |||
if self.cright is not EmptyNode and self.start < end: | |||
self.cright._intersect( start, end, results ) | |||
cdef void _seek_left(IntervalNode self, double position, list results, int n, double max_dist): | |||
# we know we can bail in these 2 cases. | |||
if self.maxend + max_dist < position: | |||
return | |||
if self.minstart > position: | |||
return | |||
# the ordering of these 3 blocks makes it so the results are | |||
# ordered nearest to farest from the query position | |||
if self.cright is not EmptyNode: | |||
self.cright._seek_left(position, results, n, max_dist) | |||
if -1 < position - self.end < max_dist: | |||
results.append(self.interval) | |||
# TODO: can these conditionals be more stringent? | |||
if self.cleft is not EmptyNode: | |||
self.cleft._seek_left(position, results, n, max_dist) | |||
cdef void _seek_right(IntervalNode self, double position, list results, int n, double max_dist): | |||
# we know we can bail in these 2 cases. | |||
if self.maxend < position: return | |||
if self.minstart - max_dist > position: return | |||
#print "SEEK_RIGHT:",self, self.cleft, self.maxend, self.minstart, position | |||
# the ordering of these 3 blocks makes it so the results are | |||
# ordered nearest to farest from the query position | |||
if self.cleft is not EmptyNode: | |||
self.cleft._seek_right(position, results, n, max_dist) | |||
if -1 < self.start - position < max_dist: | |||
results.append(self.interval) | |||
if self.cright is not EmptyNode: | |||
self.cright._seek_right(position, results, n, max_dist) | |||
cpdef left(self, position, int n=1, double max_dist=2500): | |||
""" | |||
find n features with a start > than `position` | |||
f: a Interval object (or anything with an `end` attribute) | |||
n: the number of features to return | |||
max_dist: the maximum distance to look before giving up. | |||
""" | |||
cdef list results = [] | |||
# use start - 1 becuase .left() assumes strictly left-of | |||
self._seek_left( position - 1, results, n, max_dist ) | |||
if len(results) == n: return results | |||
r = results | |||
r.sort(key=operator.attrgetter('end'), reverse=True) | |||
return r[:n] | |||
cpdef right(self, position, int n=1, double max_dist=2500): | |||
""" | |||
find n features with a end < than position | |||
f: a Interval object (or anything with a `start` attribute) | |||
n: the number of features to return | |||
max_dist: the maximum distance to look before giving up. | |||
""" | |||
cdef list results = [] | |||
# use end + 1 becuase .right() assumes strictly right-of | |||
self._seek_right(position + 1, results, n, max_dist) | |||
if len(results) == n: return results | |||
r = results | |||
r.sort(key=operator.attrgetter('start')) | |||
return r[:n] | |||
def traverse(self): | |||
if self.cleft is not EmptyNode: | |||
for node in self.cleft.traverse(): | |||
yield node | |||
yield self.interval | |||
if self.cright is not EmptyNode: | |||
for node in self.cright.traverse(): | |||
yield node | |||
cdef IntervalNode EmptyNode = IntervalNode( 0, 0, Interval(0, 0)) | |||
## ---- Wrappers that retain the old interface ------------------------------- | |||
cdef class Interval: | |||
""" | |||
Basic feature, with required integer start and end properties. | |||
Also accepts optional strand as +1 or -1 (used for up/downstream queries), | |||
a name, and any arbitrary data is sent in on the info keyword argument | |||
>>> from bx.intervals.intersection import Interval | |||
>>> f1 = Interval(23, 36) | |||
>>> f2 = Interval(34, 48, value={'chr':12, 'anno':'transposon'}) | |||
>>> f2 | |||
Interval(34, 48, value={'anno': 'transposon', 'chr': 12}) | |||
""" | |||
cdef public double start, end | |||
cdef public object value, chrom, strand | |||
def __init__(self, double start, double end, object value=None, object chrom=None, object strand=None ): | |||
assert start <= end, "start must be less than end" | |||
self.start = start | |||
self.end = end | |||
self.value = value | |||
self.chrom = chrom | |||
self.strand = strand | |||
def __repr__(self): | |||
fstr = "Interval(%g, %g" % (self.start, self.end) | |||
if not self.value is None: | |||
fstr += ", value=" + str(self.value) | |||
fstr += ")" | |||
return fstr | |||
def __richcmp__(self, other, op): | |||
if op == 0: | |||
# < | |||
return self.start < other.start or self.end < other.end | |||
elif op == 1: | |||
# <= | |||
return self == other or self < other | |||
elif op == 2: | |||
# == | |||
return self.start == other.start and self.end == other.end | |||
elif op == 3: | |||
# != | |||
return self.start != other.start or self.end != other.end | |||
elif op == 4: | |||
# > | |||
return self.start > other.start or self.end > other.end | |||
elif op == 5: | |||
# >= | |||
return self == other or self > other | |||
cdef class IntervalTree: | |||
""" | |||
Data structure for performing window intersect queries on a set of | |||
of possibly overlapping 1d intervals. | |||
Usage | |||
===== | |||
Create an empty IntervalTree | |||
>>> from bx.intervals.intersection import Interval, IntervalTree | |||
>>> intersecter = IntervalTree() | |||
An interval is a start and end position and a value (possibly None). | |||
You can add any object as an interval: | |||
>>> intersecter.insert( 0, 10, "food" ) | |||
>>> intersecter.insert( 3, 7, dict(foo='bar') ) | |||
>>> intersecter.find( 2, 5 ) | |||
['food', {'foo': 'bar'}] | |||
If the object has start and end attributes (like the Interval class) there | |||
is are some shortcuts: | |||
>>> intersecter = IntervalTree() | |||
>>> intersecter.insert_interval( Interval( 0, 10 ) ) | |||
>>> intersecter.insert_interval( Interval( 3, 7 ) ) | |||
>>> intersecter.insert_interval( Interval( 3, 40 ) ) | |||
>>> intersecter.insert_interval( Interval( 13, 50 ) ) | |||
>>> intersecter.find( 30, 50 ) | |||
[Interval(3, 40), Interval(13, 50)] | |||
>>> intersecter.find( 100, 200 ) | |||
[] | |||
Before/after for intervals | |||
>>> intersecter.before_interval( Interval( 10, 20 ) ) | |||
[Interval(3, 7)] | |||
>>> intersecter.before_interval( Interval( 5, 20 ) ) | |||
[] | |||
Upstream/downstream | |||
>>> intersecter.upstream_of_interval(Interval(11, 12)) | |||
[Interval(0, 10)] | |||
>>> intersecter.upstream_of_interval(Interval(11, 12, strand="-")) | |||
[Interval(13, 50)] | |||
>>> intersecter.upstream_of_interval(Interval(1, 2, strand="-"), num_intervals=3) | |||
[Interval(3, 7), Interval(3, 40), Interval(13, 50)] | |||
""" | |||
cdef IntervalNode root | |||
def __cinit__( self ): | |||
root = None | |||
# ---- Position based interfaces ----------------------------------------- | |||
def insert( self, double start, double end, object value=None ): | |||
""" | |||
Insert the interval [start,end) associated with value `value`. | |||
""" | |||
if self.root is None: | |||
self.root = IntervalNode( start, end, value ) | |||
else: | |||
self.root = self.root.insert( start, end, value ) | |||
add = insert | |||
def find( self, start, end ): | |||
""" | |||
Return a sorted list of all intervals overlapping [start,end). | |||
""" | |||
if self.root is None: | |||
return [] | |||
return self.root.find( start, end ) | |||
def before( self, position, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie before `position` and are no | |||
further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
return self.root.left( position, num_intervals, max_dist ) | |||
def after( self, position, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie after `position` and are no | |||
further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
return self.root.right( position, num_intervals, max_dist ) | |||
# ---- Interval-like object based interfaces ----------------------------- | |||
def insert_interval( self, interval ): | |||
""" | |||
Insert an "interval" like object (one with at least start and end | |||
attributes) | |||
""" | |||
self.insert( interval.start, interval.end, interval ) | |||
add_interval = insert_interval | |||
def before_interval( self, interval, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie completely before `interval` | |||
and are no further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
return self.root.left( interval.start, num_intervals, max_dist ) | |||
def after_interval( self, interval, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie completely after `interval` and | |||
are no further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
return self.root.right( interval.end, num_intervals, max_dist ) | |||
def upstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie completely upstream of | |||
`interval` and are no further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
if interval.strand == -1 or interval.strand == "-": | |||
return self.root.right( interval.end, num_intervals, max_dist ) | |||
else: | |||
return self.root.left( interval.start, num_intervals, max_dist ) | |||
def downstream_of_interval( self, interval, num_intervals=1, max_dist=2500 ): | |||
""" | |||
Find `num_intervals` intervals that lie completely downstream of | |||
`interval` and are no further than `max_dist` positions away | |||
""" | |||
if self.root is None: | |||
return [] | |||
if interval.strand == -1 or interval.strand == "-": | |||
return self.root.left( interval.start, num_intervals, max_dist ) | |||
else: | |||
return self.root.right( interval.end, num_intervals, max_dist ) | |||
def traverse(self): | |||
""" | |||
iterator that traverses the tree | |||
""" | |||
if self.root is None: | |||
return iter([]) | |||
return self.root.traverse() | |||
# For backward compatibility | |||
Intersecter = IntervalTree |