You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

283 lines
9.5 KiB

  1. """Interval and IntervalSet
  2. Represents an interval of time, and a set of such intervals.
  3. Intervals are closed, ie. they include timestamps [start, end]
  4. """
  5. # First implementation kept a sorted list of intervals and used
  6. # biesct() to optimize some operations, but this was too slow.
  7. # Second version was based on the quicksect implementation from
  8. # python-bx, modified slightly to handle floating point intervals.
  9. # This didn't support deletion.
  10. # Third version is more similar to the first version, using a rb-tree
  11. # instead of a simple sorted list to maintain O(log n) operations.
  12. # Fourth version is an optimized rb-tree that stores interval starts
  13. # and ends directly in the tree, like bxinterval did.
  14. import rbtree
  15. class IntervalError(Exception):
  16. """Error due to interval overlap, etc"""
  17. pass
  18. class Interval(object):
  19. """Represents an interval of time."""
  20. def __init__(self, start, end):
  21. """
  22. 'start' and 'end' are arbitrary floats that represent time
  23. """
  24. if start > end:
  25. raise IntervalError("start %s must precede end %s" % (start, end))
  26. self.start = float(start)
  27. self.end = float(end)
  28. def __repr__(self):
  29. s = repr(self.start) + ", " + repr(self.end)
  30. return self.__class__.__name__ + "(" + s + ")"
  31. def __str__(self):
  32. return "[" + str(self.start) + " -> " + str(self.end) + "]"
  33. def __cmp__(self, other):
  34. """Compare two intervals. If non-equal, order by start then end"""
  35. if not isinstance(other, Interval):
  36. raise TypeError("bad type")
  37. if self.start == other.start:
  38. if self.end < other.end:
  39. return -1
  40. if self.end > other.end:
  41. return 1
  42. return 0
  43. if self.start < other.start:
  44. return -1
  45. return 1
  46. def intersects(self, other):
  47. """Return True if two Interval objects intersect"""
  48. if (self.end <= other.start or self.start >= other.end):
  49. return False
  50. return True
  51. def subset(self, start, end):
  52. """Return a new Interval that is a subset of this one"""
  53. # A subclass that tracks additional data might override this.
  54. if start < self.start or end > self.end:
  55. raise IntervalError("not a subset")
  56. return Interval(start, end)
  57. class DBInterval(Interval):
  58. """
  59. Like Interval, but also tracks corresponding start/end times and
  60. positions within the database. These are not currently modified
  61. when subsets are taken, but can be used later to help zero in on
  62. database positions.
  63. The actual 'start' and 'end' will always fall within the database
  64. start and end, e.g.:
  65. db_start = 100, db_startpos = 10000
  66. start = 123
  67. end = 150
  68. db_end = 200, db_endpos = 20000
  69. """
  70. def __init__(self, start, end,
  71. db_start, db_end,
  72. db_startpos, db_endpos):
  73. """
  74. 'db_start' and 'db_end' are arbitrary floats that represent
  75. time. They must be a strict superset of the time interval
  76. covered by 'start' and 'end'. The 'db_startpos' and
  77. 'db_endpos' are arbitrary database position indicators that
  78. correspond to those points.
  79. """
  80. Interval.__init__(self, start, end)
  81. self.db_start = db_start
  82. self.db_end = db_end
  83. self.db_startpos = db_startpos
  84. self.db_endpos = db_endpos
  85. if db_start > start or db_end < end:
  86. raise IntervalError("database times must span the interval times")
  87. def __repr__(self):
  88. s = repr(self.start) + ", " + repr(self.end)
  89. s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
  90. s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
  91. return self.__class__.__name__ + "(" + s + ")"
  92. def subset(self, start, end):
  93. """
  94. Return a new DBInterval that is a subset of this one
  95. """
  96. if start < self.start or end > self.end:
  97. raise IntervalError("not a subset")
  98. return DBInterval(start, end,
  99. self.db_start, self.db_end,
  100. self.db_startpos, self.db_endpos)
  101. class IntervalSet(object):
  102. """
  103. A non-intersecting set of intervals.
  104. """
  105. def __init__(self, source=None):
  106. """
  107. 'source' is an Interval or IntervalSet to add.
  108. """
  109. self.tree = rbtree.RBTree()
  110. if source is not None:
  111. self += source
  112. def __iter__(self):
  113. for node in self.tree:
  114. if node.obj:
  115. yield node.obj
  116. def __len__(self):
  117. return sum(1 for x in self)
  118. def __repr__(self):
  119. descs = [ repr(x) for x in self ]
  120. return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
  121. def __str__(self):
  122. descs = [ str(x) for x in self ]
  123. return "[" + ", ".join(descs) + "]"
  124. def __eq__(self, other):
  125. # This isn't particularly efficient, but it shouldn't get used in the
  126. # general case.
  127. """Test equality of two IntervalSets.
  128. Treats adjacent Intervals as equivalent to one long interval,
  129. so this function really tests whether the IntervalSets cover
  130. the same spans of time."""
  131. i = 0
  132. j = 0
  133. outside = True
  134. def is_adjacent(a, b):
  135. """Return True if two Intervals are adjacent (same end or start)"""
  136. if a.end == b.start or b.end == a.start:
  137. return True
  138. else:
  139. return False
  140. this = [ x for x in self ]
  141. that = [ x for x in other ]
  142. try:
  143. while True:
  144. if (outside):
  145. # To match, we need to be finished both sets
  146. if (i >= len(this) and j >= len(that)):
  147. return True
  148. # Or the starts need to match
  149. if (this[i].start != that[j].start):
  150. return False
  151. outside = False
  152. else:
  153. # We can move on if the two interval ends match
  154. if (this[i].end == that[j].end):
  155. i += 1
  156. j += 1
  157. outside = True
  158. else:
  159. # Whichever ends first needs to be adjacent to the next
  160. if (this[i].end < that[j].end):
  161. if (not is_adjacent(this[i],this[i+1])):
  162. return False
  163. i += 1
  164. else:
  165. if (not is_adjacent(that[j],that[j+1])):
  166. return False
  167. j += 1
  168. except IndexError:
  169. return False
  170. def __ne__(self, other):
  171. return not self.__eq__(other)
  172. def __iadd__(self, other):
  173. """Inplace add -- modifies self
  174. This throws an exception if the regions being added intersect."""
  175. if isinstance(other, Interval):
  176. if self.intersects(other):
  177. raise IntervalError("Tried to add overlapping interval "
  178. "to this set")
  179. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  180. else:
  181. for x in other:
  182. self.__iadd__(x)
  183. return self
  184. def __isub__(self, other):
  185. """Inplace subtract -- modifies self
  186. Removes an interval from the set. Must exist exactly
  187. as provided -- cannot remove a subset of an existing interval."""
  188. i = self.tree.find(other.start, other.end)
  189. if i is None:
  190. raise IntervalError("interval " + str(other) + " not in tree")
  191. self.tree.delete(i)
  192. return self
  193. def __add__(self, other):
  194. """Add -- returns a new object"""
  195. new = IntervalSet(self)
  196. new += IntervalSet(other)
  197. return new
  198. def __and__(self, other):
  199. """
  200. Compute a new IntervalSet from the intersection of two others
  201. Output intervals are built as subsets of the intervals in the
  202. first argument (self).
  203. """
  204. out = IntervalSet()
  205. if not isinstance(other, IntervalSet):
  206. for i in self.intersection(other):
  207. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  208. else:
  209. for x in other:
  210. for i in self.intersection(x):
  211. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  212. return out
  213. def intersection(self, interval):
  214. """
  215. Compute a sequence of intervals that correspond to the
  216. intersection between `self` and the provided interval.
  217. Returns a generator that yields each of these intervals
  218. in turn.
  219. Output intervals are built as subsets of the intervals in the
  220. first argument (self).
  221. """
  222. if not isinstance(interval, Interval):
  223. raise TypeError("bad type")
  224. for n in self.tree.intersect(interval.start, interval.end):
  225. i = n.obj
  226. if i:
  227. if i.start >= interval.start and i.end <= interval.end:
  228. yield i
  229. else:
  230. subset = i.subset(max(i.start, interval.start),
  231. min(i.end, interval.end))
  232. yield subset
  233. def intersects(self, other):
  234. """Return True if this IntervalSet intersects another interval"""
  235. for n in self.tree.intersect(other.start, other.end):
  236. if n.obj.intersects(other):
  237. return True
  238. return False