Move Interval set_difference inside nilmdb.utils for clients

Clients might need to to Interval math too, so move a simple Interval
class and start putting helpers in there.
This commit is contained in:
Jim Paris 2013-03-15 21:11:26 -04:00
parent c54d8041c3
commit 4f06d6ae68
5 changed files with 127 additions and 81 deletions

View File

@ -1,5 +1,9 @@
"""Interval, IntervalSet
The Interval implemented here is just like
nilmdb.utils.interval.Interval, except implemented in Cython for
speed.
Represents an interval of time, and a set of such intervals.
Intervals are half-open, ie. they include data points with timestamps
@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
from ..utils.time import max_timestamp as nilmdb_max_timestamp
from ..utils.time import timestamp_to_string
from ..utils.iterator import imerge
from ..utils.interval import IntervalError
import itertools
cimport rbtree
@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t
ctypedef int64_t timestamp_t
class IntervalError(Exception):
"""Error due to interval overlap, etc"""
pass
cdef class Interval:
"""Represents an interval of time."""
@ -59,17 +60,7 @@ cdef class Interval:
def __cmp__(self, Interval other):
"""Compare two intervals. If non-equal, order by start then end"""
if not isinstance(other, Interval):
raise TypeError("bad type")
if self.start == other.start:
if self.end < other.end:
return -1
if self.end > other.end:
return 1
return 0
if self.start < other.start:
return -1
return 1
return cmp(self.start, other.start) or cmp(self.end, other.end)
cpdef intersects(self, Interval other):
"""Return True if two Interval objects intersect"""
@ -313,63 +304,6 @@ cdef class IntervalSet:
else:
yield subset
def set_difference(self, IntervalSet other not None,
Interval bounds = None):
"""
Compute the difference (self \\ other) between this
IntervalSet and the given IntervalSet; i.e., the ranges
that are present in 'self' but not 'other'.
If 'bounds' is not None, results are limited to the range
specified by the interval 'bounds'.
Returns a generator that yields each interval in turn.
Output intervals are built as subsets of the intervals in the
first argument (self).
"""
# Iterate through all starts and ends in sorted order. Add a
# tag to the iterator so that we can figure out which one they
# were, after sorting.
def decorate(it, key_start, key_end):
for i in it:
yield i.start, key_start, i
yield i.end, key_end, i
if bounds is None:
bounds = Interval(nilmdb_min_timestamp,
nilmdb_max_timestamp)
self_iter = decorate(self.intersection(bounds), 0, 2)
other_iter = decorate(other.intersection(bounds), 1, 3)
# Now iterate over the timestamps of each start and end.
# At each point, evaluate which type of end it is, to determine
# how to build up the output intervals.
self_interval = None
other_interval = None
out_start = None
for (ts, k, i) in imerge(self_iter, other_iter):
if k == 0:
# start self interval
self_interval = i
if other_interval is None:
out_start = ts
elif k == 1:
# start other interval
other_interval = i
if out_start is not None and out_start != ts:
yield self_interval.subset(out_start, ts)
out_start = None
elif k == 2:
# end self interval
if out_start is not None and out_start != ts:
yield self_interval.subset(out_start, ts)
out_start = None
self_interval = None
elif k == 3:
# end other interval
other_interval = None
if self_interval:
out_start = ts
cpdef intersects(self, Interval other):
"""Return True if this IntervalSet intersects another interval"""
for n in self.tree.intersect(other.start, other.end):

View File

@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
from __future__ import absolute_import
import nilmdb.utils
from nilmdb.utils.printf import *
from nilmdb.server.interval import (Interval, DBInterval,
IntervalSet, IntervalError)
from nilmdb.utils.interval import IntervalError
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
from nilmdb.server import bulkdata
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
@ -328,7 +330,7 @@ class NilmDB(object):
List all intervals in 'path' between 'start' and 'end'. If
'diffpath' is not none, list instead the set-difference
between the intervals in the two streams; i.e. all interval
ranges that are present in 'path' but not 'path2'.
ranges that are present in 'path' but not 'diffpath'.
Returns (intervals, restart) tuple.
@ -350,7 +352,9 @@ class NilmDB(object):
requested = Interval(start, end)
result = []
if diffpath:
getter = intervals.set_difference(diffintervals, requested)
getter = nilmdb.utils.interval.set_difference(
intervals.intersection(requested),
diffintervals.intersection(requested))
else:
getter = intervals.intersection(requested)
for n, i in enumerate(getter):

View File

@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
import nilmdb.utils.fallocate
import nilmdb.utils.time
import nilmdb.utils.iterator
import nilmdb.utils.interval

106
nilmdb/utils/interval.py Normal file
View File

@ -0,0 +1,106 @@
"""Interval. Like nilmdb.server.interval, but re-implemented here
in plain Python so clients have easier access to it.
Intervals are half-open, ie. they include data points with timestamps
[start, end)
"""
import nilmdb.utils.time
import nilmdb.utils.iterator
class IntervalError(Exception):
"""Error due to interval overlap, etc"""
pass
# Interval
class Interval:
"""Represents an interval of time."""
def __init__(self, start, end):
"""
'start' and 'end' are arbitrary numbers that represent time
"""
if start >= end:
# Explicitly disallow zero-width intervals (since they're half-open)
raise IntervalError("start %s must precede end %s" % (start, end))
self.start = start
self.end = end
def __repr__(self):
s = repr(self.start) + ", " + repr(self.end)
return self.__class__.__name__ + "(" + s + ")"
def __str__(self):
return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
" -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
def __cmp__(self, other):
"""Compare two intervals. If non-equal, order by start then end"""
return cmp(self.start, other.start) or cmp(self.end, other.end)
def intersects(self, other):
"""Return True if two Interval objects intersect"""
if not isinstance(other, Interval):
raise TypeError("need an Interval")
if self.end <= other.start or self.start >= other.end:
return False
return True
def subset(self, start, end):
"""Return a new Interval that is a subset of this one"""
# A subclass that tracks additional data might override this.
if start < self.start or end > self.end:
raise IntervalError("not a subset")
return Interval(start, end)
def set_difference(a, b):
"""
Compute the difference (a \\ b) between the intervals in 'a' and
the intervals in 'b'; i.e., the ranges that are present in 'self'
but not 'other'.
'a' and 'b' must both be iterables.
Returns a generator that yields each interval in turn.
Output intervals are built as subsets of the intervals in the
first argument (a).
"""
# Iterate through all starts and ends in sorted order. Add a
# tag to the iterator so that we can figure out which one they
# were, after sorting.
def decorate(it, key_start, key_end):
for i in it:
yield i.start, key_start, i
yield i.end, key_end, i
a_iter = decorate(iter(a), 0, 2)
b_iter = decorate(iter(b), 1, 3)
# Now iterate over the timestamps of each start and end.
# At each point, evaluate which type of end it is, to determine
# how to build up the output intervals.
a_interval = None
b_interval = None
out_start = None
for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
if k == 0:
# start a interval
a_interval = i
if b_interval is None:
out_start = ts
elif k == 1:
# start b interval
b_interval = i
if out_start is not None and out_start != ts:
yield a_interval.subset(out_start, ts)
out_start = None
elif k == 2:
# end a interval
if out_start is not None and out_start != ts:
yield a_interval.subset(out_start, ts)
out_start = None
a_interval = None
elif k == 3:
# end b interval
b_interval = None
if a_interval:
out_start = ts

View File

@ -8,8 +8,8 @@ from nose.tools import *
from nose.tools import assert_raises
import itertools
from nilmdb.server.interval import (Interval, DBInterval,
IntervalSet, IntervalError)
from nilmdb.utils.interval import IntervalError
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
from testutil.helpers import *
import unittest
@ -222,7 +222,7 @@ class TestInterval:
eq_(ab,c)
# a \ b == d
eq_(IntervalSet(a.set_difference(b)), d)
eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
# Intersection with intervals
do_test(makeset("[---|---)[)"),
@ -287,10 +287,11 @@ class TestInterval:
b = makeset("[-) [--) [)")
c = makeset("[----) ")
d = makeset(" [-) ")
eq_(a.set_difference(b, list(c)[0]), d)
eq_(nilmdb.utils.interval.set_difference(
a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
# Empty second set
eq_(a.set_difference(IntervalSet()), a)
eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
class TestIntervalDB:
def test_dbinterval(self):