You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

389 lines
14 KiB

  1. """Interval, IntervalSet
  2. Represents an interval of time, and a set of such intervals.
  3. Intervals are half-open, ie. they include data points with timestamps
  4. [start, end)
  5. """
  6. # First implementation kept a sorted list of intervals and used
  7. # biesct() to optimize some operations, but this was too slow.
  8. # Second version was based on the quicksect implementation from
  9. # python-bx, modified slightly to handle floating point intervals.
  10. # This didn't support deletion.
  11. # Third version is more similar to the first version, using a rb-tree
  12. # instead of a simple sorted list to maintain O(log n) operations.
  13. # Fourth version is an optimized rb-tree that stores interval starts
  14. # and ends directly in the tree, like bxinterval did.
  15. from ..utils.time import min_timestamp as nilmdb_min_timestamp
  16. from ..utils.time import max_timestamp as nilmdb_max_timestamp
  17. from ..utils.time import timestamp_to_string
  18. from ..utils.iterator import imerge
  19. import itertools
  20. cimport rbtree
  21. from libc.stdint cimport uint64_t, int64_t
  22. ctypedef int64_t timestamp_t
  23. class IntervalError(Exception):
  24. """Error due to interval overlap, etc"""
  25. pass
  26. cdef class Interval:
  27. """Represents an interval of time."""
  28. cdef public timestamp_t start, end
  29. def __init__(self, timestamp_t start, timestamp_t end):
  30. """
  31. 'start' and 'end' are arbitrary numbers that represent time
  32. """
  33. if start >= end:
  34. # Explicitly disallow zero-width intervals (since they're half-open)
  35. raise IntervalError("start %s must precede end %s" % (start, end))
  36. self.start = start
  37. self.end = end
  38. def __repr__(self):
  39. s = repr(self.start) + ", " + repr(self.end)
  40. return self.__class__.__name__ + "(" + s + ")"
  41. def __str__(self):
  42. return ("[" + timestamp_to_string(self.start) +
  43. " -> " + timestamp_to_string(self.end) + ")")
  44. def __cmp__(self, Interval other):
  45. """Compare two intervals. If non-equal, order by start then end"""
  46. if not isinstance(other, Interval):
  47. raise TypeError("bad type")
  48. if self.start == other.start:
  49. if self.end < other.end:
  50. return -1
  51. if self.end > other.end:
  52. return 1
  53. return 0
  54. if self.start < other.start:
  55. return -1
  56. return 1
  57. cpdef intersects(self, Interval other):
  58. """Return True if two Interval objects intersect"""
  59. if (self.end <= other.start or self.start >= other.end):
  60. return False
  61. return True
  62. cpdef subset(self, timestamp_t start, timestamp_t end):
  63. """Return a new Interval that is a subset of this one"""
  64. # A subclass that tracks additional data might override this.
  65. if start < self.start or end > self.end:
  66. raise IntervalError("not a subset")
  67. return Interval(start, end)
  68. cdef class DBInterval(Interval):
  69. """
  70. Like Interval, but also tracks corresponding start/end times and
  71. positions within the database. These are not currently modified
  72. when subsets are taken, but can be used later to help zero in on
  73. database positions.
  74. The actual 'start' and 'end' will always fall within the database
  75. start and end, e.g.:
  76. db_start = 100, db_startpos = 10000
  77. start = 123
  78. end = 150
  79. db_end = 200, db_endpos = 20000
  80. """
  81. cpdef public timestamp_t db_start, db_end
  82. cpdef public uint64_t db_startpos, db_endpos
  83. def __init__(self, start, end,
  84. db_start, db_end,
  85. db_startpos, db_endpos):
  86. """
  87. 'db_start' and 'db_end' are arbitrary numbers that represent
  88. time. They must be a strict superset of the time interval
  89. covered by 'start' and 'end'. The 'db_startpos' and
  90. 'db_endpos' are arbitrary database position indicators that
  91. correspond to those points.
  92. """
  93. Interval.__init__(self, start, end)
  94. self.db_start = db_start
  95. self.db_end = db_end
  96. self.db_startpos = db_startpos
  97. self.db_endpos = db_endpos
  98. if db_start > start or db_end < end:
  99. raise IntervalError("database times must span the interval times")
  100. def __repr__(self):
  101. s = repr(self.start) + ", " + repr(self.end)
  102. s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
  103. s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
  104. return self.__class__.__name__ + "(" + s + ")"
  105. cpdef subset(self, timestamp_t start, timestamp_t end):
  106. """
  107. Return a new DBInterval that is a subset of this one
  108. """
  109. if start < self.start or end > self.end:
  110. raise IntervalError("not a subset")
  111. return DBInterval(start, end,
  112. self.db_start, self.db_end,
  113. self.db_startpos, self.db_endpos)
  114. cdef class IntervalSet:
  115. """
  116. A non-intersecting set of intervals.
  117. """
  118. cdef public rbtree.RBTree tree
  119. def __init__(self, source=None):
  120. """
  121. 'source' is an Interval or IntervalSet to add.
  122. """
  123. self.tree = rbtree.RBTree()
  124. if source is not None:
  125. self += source
  126. def __iter__(self):
  127. for node in self.tree:
  128. if node.obj:
  129. yield node.obj
  130. def __len__(self):
  131. return sum(1 for x in self)
  132. def __repr__(self):
  133. descs = [ repr(x) for x in self ]
  134. return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
  135. def __str__(self):
  136. descs = [ str(x) for x in self ]
  137. return "[" + ", ".join(descs) + "]"
  138. def __match__(self, other):
  139. # This isn't particularly efficient, but it shouldn't get used in the
  140. # general case.
  141. """Test equality of two IntervalSets.
  142. Treats adjacent Intervals as equivalent to one long interval,
  143. so this function really tests whether the IntervalSets cover
  144. the same spans of time."""
  145. i = 0
  146. j = 0
  147. outside = True
  148. def is_adjacent(a, b):
  149. """Return True if two Intervals are adjacent (same end or start)"""
  150. if a.end == b.start or b.end == a.start:
  151. return True
  152. else:
  153. return False
  154. this = list(self)
  155. that = list(other)
  156. try:
  157. while True:
  158. if (outside):
  159. # To match, we need to be finished both sets
  160. if (i >= len(this) and j >= len(that)):
  161. return True
  162. # Or the starts need to match
  163. if (this[i].start != that[j].start):
  164. return False
  165. outside = False
  166. else:
  167. # We can move on if the two interval ends match
  168. if (this[i].end == that[j].end):
  169. i += 1
  170. j += 1
  171. outside = True
  172. else:
  173. # Whichever ends first needs to be adjacent to the next
  174. if (this[i].end < that[j].end):
  175. if (not is_adjacent(this[i],this[i+1])):
  176. return False
  177. i += 1
  178. else:
  179. if (not is_adjacent(that[j],that[j+1])):
  180. return False
  181. j += 1
  182. except IndexError:
  183. return False
  184. # Use __richcmp__ instead of __eq__, __ne__ for Cython.
  185. def __richcmp__(self, other, int op):
  186. if op == 2: # ==
  187. return self.__match__(other)
  188. elif op == 3: # !=
  189. return not self.__match__(other)
  190. return False
  191. #def __eq__(self, other):
  192. # return self.__match__(other)
  193. #
  194. #def __ne__(self, other):
  195. # return not self.__match__(other)
  196. def __iadd__(self, object other not None):
  197. """Inplace add -- modifies self
  198. This throws an exception if the regions being added intersect."""
  199. if isinstance(other, Interval):
  200. if self.intersects(other):
  201. raise IntervalError("Tried to add overlapping interval "
  202. "to this set")
  203. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  204. else:
  205. for x in other:
  206. self.__iadd__(x)
  207. return self
  208. def iadd_nocheck(self, Interval other not None):
  209. """Inplace add -- modifies self.
  210. 'Optimized' version that doesn't check for intersection and
  211. only inserts the new interval into the tree."""
  212. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  213. def __isub__(self, Interval other not None):
  214. """Inplace subtract -- modifies self
  215. Removes an interval from the set. Must exist exactly
  216. as provided -- cannot remove a subset of an existing interval."""
  217. i = self.tree.find(other.start, other.end)
  218. if i is None:
  219. raise IntervalError("interval " + str(other) + " not in tree")
  220. self.tree.delete(i)
  221. return self
  222. def __add__(self, other not None):
  223. """Add -- returns a new object"""
  224. new = IntervalSet(self)
  225. new += IntervalSet(other)
  226. return new
  227. def __and__(self, other not None):
  228. """
  229. Compute a new IntervalSet from the intersection of this
  230. IntervalSet with one other interval.
  231. Output intervals are built as subsets of the intervals in the
  232. first argument (self).
  233. """
  234. out = IntervalSet()
  235. for i in self.intersection(other):
  236. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  237. return out
  238. def intersection(self, Interval interval not None, orig = False):
  239. """
  240. Compute a sequence of intervals that correspond to the
  241. intersection between `self` and the provided interval.
  242. Returns a generator that yields each of these intervals
  243. in turn.
  244. Output intervals are built as subsets of the intervals in the
  245. first argument (self).
  246. If orig = True, also return the original interval that was
  247. (potentially) subsetted to make the one that is being
  248. returned.
  249. """
  250. if not isinstance(interval, Interval):
  251. raise TypeError("bad type")
  252. for n in self.tree.intersect(interval.start, interval.end):
  253. i = n.obj
  254. if i:
  255. if i.start >= interval.start and i.end <= interval.end:
  256. if orig:
  257. yield (i, i)
  258. else:
  259. yield i
  260. else:
  261. subset = i.subset(max(i.start, interval.start),
  262. min(i.end, interval.end))
  263. if orig:
  264. yield (subset, i)
  265. else:
  266. yield subset
  267. def set_difference(self, IntervalSet other not None,
  268. Interval bounds = None):
  269. """
  270. Compute the difference (self \\ other) between this
  271. IntervalSet and the given IntervalSet; i.e., the ranges
  272. that are present in 'self' but not 'other'.
  273. If 'bounds' is not None, results are limited to the range
  274. specified by the interval 'bounds'.
  275. Returns a generator that yields each interval in turn.
  276. Output intervals are built as subsets of the intervals in the
  277. first argument (self).
  278. """
  279. # Iterate through all starts and ends in sorted order. Add a
  280. # tag to the iterator so that we can figure out which one they
  281. # were, after sorting.
  282. def decorate(it, key_start, key_end):
  283. for i in it:
  284. yield i.start, key_start, i
  285. yield i.end, key_end, i
  286. if bounds is None:
  287. bounds = Interval(nilmdb_min_timestamp,
  288. nilmdb_max_timestamp)
  289. self_iter = decorate(self.intersection(bounds), 0, 2)
  290. other_iter = decorate(other.intersection(bounds), 1, 3)
  291. # Now iterate over the timestamps of each start and end.
  292. # At each point, evaluate which type of end it is, to determine
  293. # how to build up the output intervals.
  294. self_interval = None
  295. other_interval = None
  296. out_start = None
  297. for (ts, k, i) in imerge(self_iter, other_iter):
  298. if k == 0:
  299. # start self interval
  300. self_interval = i
  301. if other_interval is None:
  302. out_start = ts
  303. elif k == 1:
  304. # start other interval
  305. other_interval = i
  306. if out_start is not None and out_start != ts:
  307. yield self_interval.subset(out_start, ts)
  308. out_start = None
  309. elif k == 2:
  310. # end self interval
  311. if out_start is not None and out_start != ts:
  312. yield self_interval.subset(out_start, ts)
  313. out_start = None
  314. self_interval = None
  315. elif k == 3:
  316. # end other interval
  317. other_interval = None
  318. if self_interval:
  319. out_start = ts
  320. cpdef intersects(self, Interval other):
  321. """Return True if this IntervalSet intersects another interval"""
  322. for n in self.tree.intersect(other.start, other.end):
  323. if n.obj.intersects(other):
  324. return True
  325. return False
  326. def find_end(self, timestamp_t t):
  327. """
  328. Return an Interval from this tree that ends at time t, or
  329. None if it doesn't exist.
  330. """
  331. n = self.tree.find_left_end(t)
  332. if n and n.obj.end == t:
  333. return n.obj
  334. return None