You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

322 lines
11 KiB

  1. # cython: language_level=2
  2. """Interval, IntervalSet
  3. The Interval implemented here is just like
  4. nilmdb.utils.interval.Interval, except implemented in Cython for
  5. speed.
  6. Represents an interval of time, and a set of such intervals.
  7. Intervals are half-open, ie. they include data points with timestamps
  8. [start, end)
  9. """
  10. # First implementation kept a sorted list of intervals and used
  11. # biesct() to optimize some operations, but this was too slow.
  12. # Second version was based on the quicksect implementation from
  13. # python-bx, modified slightly to handle floating point intervals.
  14. # This didn't support deletion.
  15. # Third version is more similar to the first version, using a rb-tree
  16. # instead of a simple sorted list to maintain O(log n) operations.
  17. # Fourth version is an optimized rb-tree that stores interval starts
  18. # and ends directly in the tree, like bxinterval did.
  19. from ..utils.time import min_timestamp as nilmdb_min_timestamp
  20. from ..utils.time import max_timestamp as nilmdb_max_timestamp
  21. from ..utils.time import timestamp_to_string
  22. from ..utils.iterator import imerge
  23. from ..utils.interval import IntervalError
  24. import itertools
  25. cimport rbtree
  26. from libc.stdint cimport uint64_t, int64_t
  27. ctypedef int64_t timestamp_t
  28. cdef class Interval:
  29. """Represents an interval of time."""
  30. cdef public timestamp_t start, end
  31. def __init__(self, timestamp_t start, timestamp_t end):
  32. """
  33. 'start' and 'end' are arbitrary numbers that represent time
  34. """
  35. if start >= end:
  36. # Explicitly disallow zero-width intervals (since they're half-open)
  37. raise IntervalError("start %s must precede end %s" % (start, end))
  38. self.start = start
  39. self.end = end
  40. def __repr__(self):
  41. s = repr(self.start) + ", " + repr(self.end)
  42. return self.__class__.__name__ + "(" + s + ")"
  43. def __str__(self):
  44. return ("[" + timestamp_to_string(self.start) +
  45. " -> " + timestamp_to_string(self.end) + ")")
  46. def __cmp__(self, Interval other):
  47. """Compare two intervals. If non-equal, order by start then end"""
  48. def cmp(a, b):
  49. return (a > b) - (a < b)
  50. return cmp(self.start, other.start) or cmp(self.end, other.end)
  51. cpdef intersects(self, Interval other):
  52. """Return True if two Interval objects intersect"""
  53. if (self.end <= other.start or self.start >= other.end):
  54. return False
  55. return True
  56. cpdef subset(self, timestamp_t start, timestamp_t end):
  57. """Return a new Interval that is a subset of this one"""
  58. # A subclass that tracks additional data might override this.
  59. if start < self.start or end > self.end:
  60. raise IntervalError("not a subset")
  61. return Interval(start, end)
  62. cdef class DBInterval(Interval):
  63. """
  64. Like Interval, but also tracks corresponding start/end times and
  65. positions within the database. These are not currently modified
  66. when subsets are taken, but can be used later to help zero in on
  67. database positions.
  68. The actual 'start' and 'end' will always fall within the database
  69. start and end, e.g.:
  70. db_start = 100, db_startpos = 10000
  71. start = 123
  72. end = 150
  73. db_end = 200, db_endpos = 20000
  74. """
  75. cpdef public timestamp_t db_start, db_end
  76. cpdef public uint64_t db_startpos, db_endpos
  77. def __init__(self, start, end,
  78. db_start, db_end,
  79. db_startpos, db_endpos):
  80. """
  81. 'db_start' and 'db_end' are arbitrary numbers that represent
  82. time. They must be a strict superset of the time interval
  83. covered by 'start' and 'end'. The 'db_startpos' and
  84. 'db_endpos' are arbitrary database position indicators that
  85. correspond to those points.
  86. """
  87. Interval.__init__(self, start, end)
  88. self.db_start = db_start
  89. self.db_end = db_end
  90. self.db_startpos = db_startpos
  91. self.db_endpos = db_endpos
  92. if db_start > start or db_end < end:
  93. raise IntervalError("database times must span the interval times")
  94. def __repr__(self):
  95. s = repr(self.start) + ", " + repr(self.end)
  96. s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
  97. s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
  98. return self.__class__.__name__ + "(" + s + ")"
  99. cpdef subset(self, timestamp_t start, timestamp_t end):
  100. """
  101. Return a new DBInterval that is a subset of this one
  102. """
  103. if start < self.start or end > self.end:
  104. raise IntervalError("not a subset")
  105. return DBInterval(start, end,
  106. self.db_start, self.db_end,
  107. self.db_startpos, self.db_endpos)
  108. cdef class IntervalSet:
  109. """
  110. A non-intersecting set of intervals.
  111. """
  112. cdef public rbtree.RBTree tree
  113. def __init__(self, source=None):
  114. """
  115. 'source' is an Interval or IntervalSet to add.
  116. """
  117. self.tree = rbtree.RBTree()
  118. if source is not None:
  119. self += source
  120. def __iter__(self):
  121. for node in self.tree:
  122. if node.obj:
  123. yield node.obj
  124. def __len__(self):
  125. return sum(1 for x in self)
  126. def __repr__(self):
  127. descs = [ repr(x) for x in self ]
  128. return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
  129. def __str__(self):
  130. descs = [ str(x) for x in self ]
  131. return "[" + ", ".join(descs) + "]"
  132. def __match__(self, other):
  133. # This isn't particularly efficient, but it shouldn't get used in the
  134. # general case.
  135. """Test equality of two IntervalSets.
  136. Treats adjacent Intervals as equivalent to one long interval,
  137. so this function really tests whether the IntervalSets cover
  138. the same spans of time."""
  139. i = 0
  140. j = 0
  141. outside = True
  142. def is_adjacent(a, b):
  143. """Return True if two Intervals are adjacent (same end or start)"""
  144. if a.end == b.start or b.end == a.start:
  145. return True
  146. else:
  147. return False
  148. this = list(self)
  149. that = list(other)
  150. try:
  151. while True:
  152. if (outside):
  153. # To match, we need to be finished both sets
  154. if (i >= len(this) and j >= len(that)):
  155. return True
  156. # Or the starts need to match
  157. if (this[i].start != that[j].start):
  158. return False
  159. outside = False
  160. else:
  161. # We can move on if the two interval ends match
  162. if (this[i].end == that[j].end):
  163. i += 1
  164. j += 1
  165. outside = True
  166. else:
  167. # Whichever ends first needs to be adjacent to the next
  168. if (this[i].end < that[j].end):
  169. if (not is_adjacent(this[i],this[i+1])):
  170. return False
  171. i += 1
  172. else:
  173. if (not is_adjacent(that[j],that[j+1])):
  174. return False
  175. j += 1
  176. except IndexError:
  177. return False
  178. # Use __richcmp__ instead of __eq__, __ne__ for Cython.
  179. def __richcmp__(self, other, int op):
  180. if op == 2: # ==
  181. return self.__match__(other)
  182. elif op == 3: # !=
  183. return not self.__match__(other)
  184. return False
  185. #def __eq__(self, other):
  186. # return self.__match__(other)
  187. #
  188. #def __ne__(self, other):
  189. # return not self.__match__(other)
  190. def __iadd__(self, object other not None):
  191. """Inplace add -- modifies self
  192. This throws an exception if the regions being added intersect."""
  193. if isinstance(other, Interval):
  194. if self.intersects(other):
  195. raise IntervalError("Tried to add overlapping interval "
  196. "to this set")
  197. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  198. else:
  199. for x in other:
  200. self.__iadd__(x)
  201. return self
  202. def iadd_nocheck(self, Interval other not None):
  203. """Inplace add -- modifies self.
  204. 'Optimized' version that doesn't check for intersection and
  205. only inserts the new interval into the tree."""
  206. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  207. def __isub__(self, Interval other not None):
  208. """Inplace subtract -- modifies self
  209. Removes an interval from the set. Must exist exactly
  210. as provided -- cannot remove a subset of an existing interval."""
  211. i = self.tree.find(other.start, other.end)
  212. if i is None:
  213. raise IntervalError("interval " + str(other) + " not in tree")
  214. self.tree.delete(i)
  215. return self
  216. def __add__(self, other not None):
  217. """Add -- returns a new object"""
  218. new = IntervalSet(self)
  219. new += IntervalSet(other)
  220. return new
  221. def __and__(self, other not None):
  222. """
  223. Compute a new IntervalSet from the intersection of this
  224. IntervalSet with one other interval.
  225. Output intervals are built as subsets of the intervals in the
  226. first argument (self).
  227. """
  228. out = IntervalSet()
  229. for i in self.intersection(other):
  230. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  231. return out
  232. def intersection(self, Interval interval not None, orig = False):
  233. """
  234. Compute a sequence of intervals that correspond to the
  235. intersection between `self` and the provided interval.
  236. Returns a generator that yields each of these intervals
  237. in turn.
  238. Output intervals are built as subsets of the intervals in the
  239. first argument (self).
  240. If orig = True, also return the original interval that was
  241. (potentially) subsetted to make the one that is being
  242. returned.
  243. """
  244. if orig:
  245. for n in self.tree.intersect(interval.start, interval.end):
  246. i = n.obj
  247. subset = i.subset(max(i.start, interval.start),
  248. min(i.end, interval.end))
  249. yield (subset, i)
  250. else:
  251. for n in self.tree.intersect(interval.start, interval.end):
  252. i = n.obj
  253. subset = i.subset(max(i.start, interval.start),
  254. min(i.end, interval.end))
  255. yield subset
  256. cpdef intersects(self, Interval other):
  257. """Return True if this IntervalSet intersects another interval"""
  258. for n in self.tree.intersect(other.start, other.end):
  259. if n.obj.intersects(other):
  260. return True
  261. return False
  262. def find_end(self, timestamp_t t):
  263. """
  264. Return an Interval from this tree that ends at time t, or
  265. None if it doesn't exist.
  266. """
  267. n = self.tree.find_left_end(t)
  268. if n and n.obj.end == t:
  269. return n.obj
  270. return None