Browse Source

New bxinterval implementation is at least linear -- not much better in

the TestIntervalSpeed versus the previous bisect version, but should
be better in general.



git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10748 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
b84ffddd50
3 changed files with 69 additions and 72 deletions
  1. +14
    -11
      nilmdb/bxintersect.pyx
  2. +46
    -56
      nilmdb/interval.py
  3. +9
    -5
      tests/test_interval.py

+ 14
- 11
nilmdb/bxintersect.pyx View File

@@ -175,6 +175,8 @@ cdef class IntervalNode:
"""
cdef list results = []
self._intersect( start, end, results )
if sort:
results = sorted(results)
return results

find = intersect
@@ -261,13 +263,14 @@ cdef class IntervalNode:
r.sort(key=operator.attrgetter('start'))
return r[:n]

def traverse(self, func):
self._traverse(func)

cdef void _traverse(IntervalNode self, object func):
if self.cleft is not EmptyNode: self.cleft._traverse(func)
func(self)
if self.cright is not EmptyNode: self.cright._traverse(func)
def traverse(self):
if self.cleft is not EmptyNode:
for node in self.cleft.traverse():
yield node
yield self.interval
if self.cright is not EmptyNode:
for node in self.cright.traverse():
yield node

cdef IntervalNode EmptyNode = IntervalNode( 0, 0, Interval(0, 0))

@@ -479,13 +482,13 @@ cdef class IntervalTree:
else:
return self.root.right( interval.end, num_intervals, max_dist )
def traverse(self, fn):
def traverse(self):
"""
call fn for each element in the tree
iterator that traverses the tree
"""
if self.root is None:
return None
return self.root.traverse(fn)
return iter([])
return self.root.traverse()

# For backward compatibility
Intersecter = IntervalTree

+ 46
- 56
nilmdb/interval.py View File

@@ -27,8 +27,6 @@ class Interval(bxintersect.Interval):
"""
if start > end:
raise IntervalError("start must precede end")
if start != int(start) or end != int(end):
raise IntervalError("arguments must be integers")
bxintersect.Interval.__init__(self, start, end)

def __repr__(self):
@@ -53,30 +51,34 @@ class Interval(bxintersect.Interval):

class IntervalSet(object):
"""
A sorted, non-intersecting set of intervals.
A non-intersecting set of intervals.
"""

def __init__(self, source=None):
"""
'source' is an Interval or IntervalSet to add.
"""
self.data = []
self.tree = bxintersect.IntervalTree()
if source is not None:
if isinstance(source, Interval):
self._add_single_interval(source)
else:
self._add_intervals(source)
self += source

def __iter__(self):
return self.data.__iter__()
return self.tree.traverse()

def __len__(self):
return sum(1 for x in self)

def __repr__(self):
return self.__class__.__name__ + "(" + repr(self.data) + ")"
descs = [ repr(x) for x in self ]
return self.__class__.__name__ + "([" + ", ".join(descs) + "])"

def __str__(self):
return "[" + ", ".join([ str(x) for x in self.data ]) + "]"
descs = [ str(x) for x in self ]
return "[" + ", ".join(descs) + "]"

def __eq__(self, other):
# This isn't particularly efficient, but it shouldn't get used in the
# general case.
"""Test equality of two IntervalSets.

Treats adjacent Intervals as equivalent to one long interval,
@@ -93,30 +95,33 @@ class IntervalSet(object):
else:
return False

this = [ x for x in self ]
that = [ x for x in other ]

try:
while True:
if (outside):
# To match, we need to be finished both sets
if (i >= len(self) and j >= len(other)):
if (i >= len(this) and j >= len(that)):
return True
# Or the starts need to match
if (self[i].start != other[j].start):
if (this[i].start != that[j].start):
return False
outside = False
else:
# We can move on if the two interval ends match
if (self[i].end == other[j].end):
if (this[i].end == that[j].end):
i += 1
j += 1
outside = True
else:
# Whichever ends first needs to be adjacent to the next
if (self[i].end < other[j].end):
if (not is_adjacent(self[i],self[i+1])):
if (this[i].end < that[j].end):
if (not is_adjacent(this[i],this[i+1])):
return False
i += 1
else:
if (not is_adjacent(other[j],other[j+1])):
if (not is_adjacent(that[j],that[j+1])):
return False
j += 1
except IndexError:
@@ -125,26 +130,22 @@ class IntervalSet(object):
def __ne__(self, other):
return not self.__eq__(other)

def __len__(self):
return len(self.data)

def __getitem__(self, key):
return self.data.__getitem__(key)

def __iadd__(self, other):
"""Inplace add -- modifies self

This throws an exception if the regions being added intersect."""
if isinstance(other, Interval):
self._add_single_interval(other)
if self.intersects(other):
raise IntervalError("Tried to add overlapping interval "
"to this set")
self.tree.insert_interval(other)
else:
self._add_intervals(other)
for x in other:
self.__iadd__(x)
return self

def __add__(self, other):
"""Add -- returns a new object

This throws an exception if the regions being added intersect."""
"""Add -- returns a new object"""
new = IntervalSet(self)
new += IntervalSet(other)
return new
@@ -158,36 +159,25 @@ class IntervalSet(object):
"""
out = IntervalSet()

if isinstance(other, IntervalSet):
# We were given a set -- intersect with each interval inside
for interval in other.data:
out += self & interval
else:
if not isinstance(other, IntervalSet):
other = [ other ]

for x in other:
# Intersecting with a just a single interval.
# Use a bisect as a quick optimization to find the earliest
start = max(bisect.bisect_left(self.data, other)-1, 0)
for n in xrange(start, len(self.data)):
this = self.data[n]
if this.start > other.end:
break
# If there's any overlap, add the overlapping region
if this.intersects(other):
out += this.subset(max(this.start, other.start),
min(this.end, other.end))
all_intersecting = self.tree.find(x.start, x.end)
for i in all_intersecting:
out += i.subset(max(i.start, x.start),
min(i.end, x.end))
return out

def intersects(self, other):
"""Return True if this IntervalSet intersects another"""
return len(self & other) > 0

def _add_intervals(self, iterable):
"""Add each Interval from an interable to this set"""
for element in iter(iterable):
self._add_single_interval(element)

def _add_single_interval(self, interval):
"""Add one Interval to this set"""
if self.intersects(interval):
raise IntervalError("Tried to add overlapping interval "
"to this set")
bisect.insort(self.data, interval)
if isinstance(other, Interval):
return len(self.tree.find(other.start, other.end)) > 0
else:
for x in other:
if self.intersects(x):
return True
else:
return False


+ 9
- 5
tests/test_interval.py View File

@@ -125,12 +125,16 @@ class TestInterval:
x = iseta != 3
ne_(IntervalSet(a), IntervalSet(b))

# test iterator
for interval in iseta:
pass

# overlap
with assert_raises(IntervalError):
x = IntervalSet([a, b, c])

# bad types
with assert_raises(AttributeError):
with assert_raises(Exception):
x = IntervalSet([1, 2])

iset = IntervalSet(isetb) # test iterator
@@ -207,7 +211,7 @@ class TestInterval:
makeset(" [] [-] []"))

class TestIntervalSpeed:
#@unittest.skip("slow")
@unittest.skip("this is slow")
def test_interval_speed(self):
import yappi
import time
@@ -216,14 +220,14 @@ class TestIntervalSpeed:
print
yappi.start()
speeds = {}
for j in [ 2**x for x in range(5,20) ]:
for j in [ 2**x for x in range(5,22) ]:
start = time.time()
iset = IntervalSet()
for i in xrange(j):
interval = Interval(i, i+1)
iset += interval
speed = int((time.time() - start) * 1000000)
printf("%d: %f μs\n", j, speed)
speed = (time.time() - start) * 1000000.0
printf("%d: %g μs (%g μs each)\n", j, speed, speed/j)
speeds[j] = speed
aplotter.plot(speeds.keys(), speeds.values(), plot_slope=True)
yappi.stop()


Loading…
Cancel
Save