Move Interval set_difference inside nilmdb.utils for clients
Clients might need to to Interval math too, so move a simple Interval class and start putting helpers in there.
This commit is contained in:
parent
c54d8041c3
commit
4f06d6ae68
|
@ -1,5 +1,9 @@
|
|||
"""Interval, IntervalSet
|
||||
|
||||
The Interval implemented here is just like
|
||||
nilmdb.utils.interval.Interval, except implemented in Cython for
|
||||
speed.
|
||||
|
||||
Represents an interval of time, and a set of such intervals.
|
||||
|
||||
Intervals are half-open, ie. they include data points with timestamps
|
||||
|
@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
|
|||
from ..utils.time import max_timestamp as nilmdb_max_timestamp
|
||||
from ..utils.time import timestamp_to_string
|
||||
from ..utils.iterator import imerge
|
||||
from ..utils.interval import IntervalError
|
||||
import itertools
|
||||
|
||||
cimport rbtree
|
||||
|
@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t
|
|||
|
||||
ctypedef int64_t timestamp_t
|
||||
|
||||
class IntervalError(Exception):
|
||||
"""Error due to interval overlap, etc"""
|
||||
pass
|
||||
|
||||
cdef class Interval:
|
||||
"""Represents an interval of time."""
|
||||
|
||||
|
@ -59,17 +60,7 @@ cdef class Interval:
|
|||
|
||||
def __cmp__(self, Interval other):
|
||||
"""Compare two intervals. If non-equal, order by start then end"""
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("bad type")
|
||||
if self.start == other.start:
|
||||
if self.end < other.end:
|
||||
return -1
|
||||
if self.end > other.end:
|
||||
return 1
|
||||
return 0
|
||||
if self.start < other.start:
|
||||
return -1
|
||||
return 1
|
||||
return cmp(self.start, other.start) or cmp(self.end, other.end)
|
||||
|
||||
cpdef intersects(self, Interval other):
|
||||
"""Return True if two Interval objects intersect"""
|
||||
|
@ -313,63 +304,6 @@ cdef class IntervalSet:
|
|||
else:
|
||||
yield subset
|
||||
|
||||
def set_difference(self, IntervalSet other not None,
|
||||
Interval bounds = None):
|
||||
"""
|
||||
Compute the difference (self \\ other) between this
|
||||
IntervalSet and the given IntervalSet; i.e., the ranges
|
||||
that are present in 'self' but not 'other'.
|
||||
|
||||
If 'bounds' is not None, results are limited to the range
|
||||
specified by the interval 'bounds'.
|
||||
|
||||
Returns a generator that yields each interval in turn.
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (self).
|
||||
"""
|
||||
# Iterate through all starts and ends in sorted order. Add a
|
||||
# tag to the iterator so that we can figure out which one they
|
||||
# were, after sorting.
|
||||
def decorate(it, key_start, key_end):
|
||||
for i in it:
|
||||
yield i.start, key_start, i
|
||||
yield i.end, key_end, i
|
||||
if bounds is None:
|
||||
bounds = Interval(nilmdb_min_timestamp,
|
||||
nilmdb_max_timestamp)
|
||||
self_iter = decorate(self.intersection(bounds), 0, 2)
|
||||
other_iter = decorate(other.intersection(bounds), 1, 3)
|
||||
|
||||
# Now iterate over the timestamps of each start and end.
|
||||
# At each point, evaluate which type of end it is, to determine
|
||||
# how to build up the output intervals.
|
||||
self_interval = None
|
||||
other_interval = None
|
||||
out_start = None
|
||||
for (ts, k, i) in imerge(self_iter, other_iter):
|
||||
if k == 0:
|
||||
# start self interval
|
||||
self_interval = i
|
||||
if other_interval is None:
|
||||
out_start = ts
|
||||
elif k == 1:
|
||||
# start other interval
|
||||
other_interval = i
|
||||
if out_start is not None and out_start != ts:
|
||||
yield self_interval.subset(out_start, ts)
|
||||
out_start = None
|
||||
elif k == 2:
|
||||
# end self interval
|
||||
if out_start is not None and out_start != ts:
|
||||
yield self_interval.subset(out_start, ts)
|
||||
out_start = None
|
||||
self_interval = None
|
||||
elif k == 3:
|
||||
# end other interval
|
||||
other_interval = None
|
||||
if self_interval:
|
||||
out_start = ts
|
||||
|
||||
cpdef intersects(self, Interval other):
|
||||
"""Return True if this IntervalSet intersects another interval"""
|
||||
for n in self.tree.intersect(other.start, other.end):
|
||||
|
|
|
@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
|
|||
from __future__ import absolute_import
|
||||
import nilmdb.utils
|
||||
from nilmdb.utils.printf import *
|
||||
from nilmdb.server.interval import (Interval, DBInterval,
|
||||
IntervalSet, IntervalError)
|
||||
|
||||
from nilmdb.utils.interval import IntervalError
|
||||
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
|
||||
|
||||
from nilmdb.server import bulkdata
|
||||
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
||||
|
||||
|
@ -328,7 +330,7 @@ class NilmDB(object):
|
|||
List all intervals in 'path' between 'start' and 'end'. If
|
||||
'diffpath' is not none, list instead the set-difference
|
||||
between the intervals in the two streams; i.e. all interval
|
||||
ranges that are present in 'path' but not 'path2'.
|
||||
ranges that are present in 'path' but not 'diffpath'.
|
||||
|
||||
Returns (intervals, restart) tuple.
|
||||
|
||||
|
@ -350,7 +352,9 @@ class NilmDB(object):
|
|||
requested = Interval(start, end)
|
||||
result = []
|
||||
if diffpath:
|
||||
getter = intervals.set_difference(diffintervals, requested)
|
||||
getter = nilmdb.utils.interval.set_difference(
|
||||
intervals.intersection(requested),
|
||||
diffintervals.intersection(requested))
|
||||
else:
|
||||
getter = intervals.intersection(requested)
|
||||
for n, i in enumerate(getter):
|
||||
|
|
|
@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
|
|||
import nilmdb.utils.fallocate
|
||||
import nilmdb.utils.time
|
||||
import nilmdb.utils.iterator
|
||||
import nilmdb.utils.interval
|
||||
|
|
106
nilmdb/utils/interval.py
Normal file
106
nilmdb/utils/interval.py
Normal file
|
@ -0,0 +1,106 @@
|
|||
"""Interval. Like nilmdb.server.interval, but re-implemented here
|
||||
in plain Python so clients have easier access to it.
|
||||
|
||||
Intervals are half-open, ie. they include data points with timestamps
|
||||
[start, end)
|
||||
"""
|
||||
|
||||
import nilmdb.utils.time
|
||||
import nilmdb.utils.iterator
|
||||
|
||||
class IntervalError(Exception):
|
||||
"""Error due to interval overlap, etc"""
|
||||
pass
|
||||
|
||||
# Interval
|
||||
class Interval:
|
||||
"""Represents an interval of time."""
|
||||
|
||||
def __init__(self, start, end):
|
||||
"""
|
||||
'start' and 'end' are arbitrary numbers that represent time
|
||||
"""
|
||||
if start >= end:
|
||||
# Explicitly disallow zero-width intervals (since they're half-open)
|
||||
raise IntervalError("start %s must precede end %s" % (start, end))
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
def __repr__(self):
|
||||
s = repr(self.start) + ", " + repr(self.end)
|
||||
return self.__class__.__name__ + "(" + s + ")"
|
||||
|
||||
def __str__(self):
|
||||
return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
|
||||
" -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
|
||||
|
||||
def __cmp__(self, other):
|
||||
"""Compare two intervals. If non-equal, order by start then end"""
|
||||
return cmp(self.start, other.start) or cmp(self.end, other.end)
|
||||
|
||||
def intersects(self, other):
|
||||
"""Return True if two Interval objects intersect"""
|
||||
if not isinstance(other, Interval):
|
||||
raise TypeError("need an Interval")
|
||||
if self.end <= other.start or self.start >= other.end:
|
||||
return False
|
||||
return True
|
||||
|
||||
def subset(self, start, end):
|
||||
"""Return a new Interval that is a subset of this one"""
|
||||
# A subclass that tracks additional data might override this.
|
||||
if start < self.start or end > self.end:
|
||||
raise IntervalError("not a subset")
|
||||
return Interval(start, end)
|
||||
|
||||
def set_difference(a, b):
|
||||
"""
|
||||
Compute the difference (a \\ b) between the intervals in 'a' and
|
||||
the intervals in 'b'; i.e., the ranges that are present in 'self'
|
||||
but not 'other'.
|
||||
|
||||
'a' and 'b' must both be iterables.
|
||||
|
||||
Returns a generator that yields each interval in turn.
|
||||
Output intervals are built as subsets of the intervals in the
|
||||
first argument (a).
|
||||
"""
|
||||
# Iterate through all starts and ends in sorted order. Add a
|
||||
# tag to the iterator so that we can figure out which one they
|
||||
# were, after sorting.
|
||||
def decorate(it, key_start, key_end):
|
||||
for i in it:
|
||||
yield i.start, key_start, i
|
||||
yield i.end, key_end, i
|
||||
a_iter = decorate(iter(a), 0, 2)
|
||||
b_iter = decorate(iter(b), 1, 3)
|
||||
|
||||
# Now iterate over the timestamps of each start and end.
|
||||
# At each point, evaluate which type of end it is, to determine
|
||||
# how to build up the output intervals.
|
||||
a_interval = None
|
||||
b_interval = None
|
||||
out_start = None
|
||||
for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
|
||||
if k == 0:
|
||||
# start a interval
|
||||
a_interval = i
|
||||
if b_interval is None:
|
||||
out_start = ts
|
||||
elif k == 1:
|
||||
# start b interval
|
||||
b_interval = i
|
||||
if out_start is not None and out_start != ts:
|
||||
yield a_interval.subset(out_start, ts)
|
||||
out_start = None
|
||||
elif k == 2:
|
||||
# end a interval
|
||||
if out_start is not None and out_start != ts:
|
||||
yield a_interval.subset(out_start, ts)
|
||||
out_start = None
|
||||
a_interval = None
|
||||
elif k == 3:
|
||||
# end b interval
|
||||
b_interval = None
|
||||
if a_interval:
|
||||
out_start = ts
|
|
@ -8,8 +8,8 @@ from nose.tools import *
|
|||
from nose.tools import assert_raises
|
||||
import itertools
|
||||
|
||||
from nilmdb.server.interval import (Interval, DBInterval,
|
||||
IntervalSet, IntervalError)
|
||||
from nilmdb.utils.interval import IntervalError
|
||||
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
|
||||
|
||||
from testutil.helpers import *
|
||||
import unittest
|
||||
|
@ -222,7 +222,7 @@ class TestInterval:
|
|||
eq_(ab,c)
|
||||
|
||||
# a \ b == d
|
||||
eq_(IntervalSet(a.set_difference(b)), d)
|
||||
eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
|
||||
|
||||
# Intersection with intervals
|
||||
do_test(makeset("[---|---)[)"),
|
||||
|
@ -287,10 +287,11 @@ class TestInterval:
|
|||
b = makeset("[-) [--) [)")
|
||||
c = makeset("[----) ")
|
||||
d = makeset(" [-) ")
|
||||
eq_(a.set_difference(b, list(c)[0]), d)
|
||||
eq_(nilmdb.utils.interval.set_difference(
|
||||
a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
|
||||
|
||||
# Empty second set
|
||||
eq_(a.set_difference(IntervalSet()), a)
|
||||
eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
|
||||
|
||||
class TestIntervalDB:
|
||||
def test_dbinterval(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user