You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

332 lines
11 KiB

  1. """Interval, IntervalSet
  2. Represents an interval of time, and a set of such intervals.
  3. Intervals are half-open, ie. they include data points with timestamps
  4. [start, end)
  5. """
  6. # First implementation kept a sorted list of intervals and used
  7. # biesct() to optimize some operations, but this was too slow.
  8. # Second version was based on the quicksect implementation from
  9. # python-bx, modified slightly to handle floating point intervals.
  10. # This didn't support deletion.
  11. # Third version is more similar to the first version, using a rb-tree
  12. # instead of a simple sorted list to maintain O(log n) operations.
  13. # Fourth version is an optimized rb-tree that stores interval starts
  14. # and ends directly in the tree, like bxinterval did.
  15. from ..utils.time import float_time_to_string as ftts
  16. cimport rbtree
  17. cdef extern from "stdint.h":
  18. ctypedef unsigned long long uint64_t
  19. class IntervalError(Exception):
  20. """Error due to interval overlap, etc"""
  21. pass
  22. cdef class Interval:
  23. """Represents an interval of time."""
  24. cdef public double start, end
  25. def __init__(self, double start, double end):
  26. """
  27. 'start' and 'end' are arbitrary floats that represent time
  28. """
  29. if start >= end:
  30. # Explicitly disallow zero-width intervals (since they're half-open)
  31. raise IntervalError("start %s must precede end %s" % (start, end))
  32. self.start = float(start)
  33. self.end = float(end)
  34. def __repr__(self):
  35. s = repr(self.start) + ", " + repr(self.end)
  36. return self.__class__.__name__ + "(" + s + ")"
  37. def __str__(self):
  38. return "[" + ftts(self.start) + " -> " + ftts(self.end) + ")"
  39. def __cmp__(self, Interval other):
  40. """Compare two intervals. If non-equal, order by start then end"""
  41. if not isinstance(other, Interval):
  42. raise TypeError("bad type")
  43. if self.start == other.start:
  44. if self.end < other.end:
  45. return -1
  46. if self.end > other.end:
  47. return 1
  48. return 0
  49. if self.start < other.start:
  50. return -1
  51. return 1
  52. cpdef intersects(self, Interval other):
  53. """Return True if two Interval objects intersect"""
  54. if (self.end <= other.start or self.start >= other.end):
  55. return False
  56. return True
  57. cpdef subset(self, double start, double end):
  58. """Return a new Interval that is a subset of this one"""
  59. # A subclass that tracks additional data might override this.
  60. if start < self.start or end > self.end:
  61. raise IntervalError("not a subset")
  62. return Interval(start, end)
  63. cdef class DBInterval(Interval):
  64. """
  65. Like Interval, but also tracks corresponding start/end times and
  66. positions within the database. These are not currently modified
  67. when subsets are taken, but can be used later to help zero in on
  68. database positions.
  69. The actual 'start' and 'end' will always fall within the database
  70. start and end, e.g.:
  71. db_start = 100, db_startpos = 10000
  72. start = 123
  73. end = 150
  74. db_end = 200, db_endpos = 20000
  75. """
  76. cpdef public double db_start, db_end
  77. cpdef public uint64_t db_startpos, db_endpos
  78. def __init__(self, start, end,
  79. db_start, db_end,
  80. db_startpos, db_endpos):
  81. """
  82. 'db_start' and 'db_end' are arbitrary floats that represent
  83. time. They must be a strict superset of the time interval
  84. covered by 'start' and 'end'. The 'db_startpos' and
  85. 'db_endpos' are arbitrary database position indicators that
  86. correspond to those points.
  87. """
  88. Interval.__init__(self, start, end)
  89. self.db_start = db_start
  90. self.db_end = db_end
  91. self.db_startpos = db_startpos
  92. self.db_endpos = db_endpos
  93. if db_start > start or db_end < end:
  94. raise IntervalError("database times must span the interval times")
  95. def __repr__(self):
  96. s = repr(self.start) + ", " + repr(self.end)
  97. s += ", " + repr(self.db_start) + ", " + repr(self.db_end)
  98. s += ", " + repr(self.db_startpos) + ", " + repr(self.db_endpos)
  99. return self.__class__.__name__ + "(" + s + ")"
  100. cpdef subset(self, double start, double end):
  101. """
  102. Return a new DBInterval that is a subset of this one
  103. """
  104. if start < self.start or end > self.end:
  105. raise IntervalError("not a subset")
  106. return DBInterval(start, end,
  107. self.db_start, self.db_end,
  108. self.db_startpos, self.db_endpos)
  109. cdef class IntervalSet:
  110. """
  111. A non-intersecting set of intervals.
  112. """
  113. cdef public rbtree.RBTree tree
  114. def __init__(self, source=None):
  115. """
  116. 'source' is an Interval or IntervalSet to add.
  117. """
  118. self.tree = rbtree.RBTree()
  119. if source is not None:
  120. self += source
  121. def __iter__(self):
  122. for node in self.tree:
  123. if node.obj:
  124. yield node.obj
  125. def __len__(self):
  126. return sum(1 for x in self)
  127. def __repr__(self):
  128. descs = [ repr(x) for x in self ]
  129. return self.__class__.__name__ + "([" + ", ".join(descs) + "])"
  130. def __str__(self):
  131. descs = [ str(x) for x in self ]
  132. return "[" + ", ".join(descs) + "]"
  133. def __match__(self, other):
  134. # This isn't particularly efficient, but it shouldn't get used in the
  135. # general case.
  136. """Test equality of two IntervalSets.
  137. Treats adjacent Intervals as equivalent to one long interval,
  138. so this function really tests whether the IntervalSets cover
  139. the same spans of time."""
  140. i = 0
  141. j = 0
  142. outside = True
  143. def is_adjacent(a, b):
  144. """Return True if two Intervals are adjacent (same end or start)"""
  145. if a.end == b.start or b.end == a.start:
  146. return True
  147. else:
  148. return False
  149. this = list(self)
  150. that = list(other)
  151. try:
  152. while True:
  153. if (outside):
  154. # To match, we need to be finished both sets
  155. if (i >= len(this) and j >= len(that)):
  156. return True
  157. # Or the starts need to match
  158. if (this[i].start != that[j].start):
  159. return False
  160. outside = False
  161. else:
  162. # We can move on if the two interval ends match
  163. if (this[i].end == that[j].end):
  164. i += 1
  165. j += 1
  166. outside = True
  167. else:
  168. # Whichever ends first needs to be adjacent to the next
  169. if (this[i].end < that[j].end):
  170. if (not is_adjacent(this[i],this[i+1])):
  171. return False
  172. i += 1
  173. else:
  174. if (not is_adjacent(that[j],that[j+1])):
  175. return False
  176. j += 1
  177. except IndexError:
  178. return False
  179. # Use __richcmp__ instead of __eq__, __ne__ for Cython.
  180. def __richcmp__(self, other, int op):
  181. if op == 2: # ==
  182. return self.__match__(other)
  183. elif op == 3: # !=
  184. return not self.__match__(other)
  185. return False
  186. #def __eq__(self, other):
  187. # return self.__match__(other)
  188. #
  189. #def __ne__(self, other):
  190. # return not self.__match__(other)
  191. def __iadd__(self, object other not None):
  192. """Inplace add -- modifies self
  193. This throws an exception if the regions being added intersect."""
  194. if isinstance(other, Interval):
  195. if self.intersects(other):
  196. raise IntervalError("Tried to add overlapping interval "
  197. "to this set")
  198. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  199. else:
  200. for x in other:
  201. self.__iadd__(x)
  202. return self
  203. def iadd_nocheck(self, Interval other not None):
  204. """Inplace add -- modifies self.
  205. 'Optimized' version that doesn't check for intersection and
  206. only inserts the new interval into the tree."""
  207. self.tree.insert(rbtree.RBNode(other.start, other.end, other))
  208. def __isub__(self, Interval other not None):
  209. """Inplace subtract -- modifies self
  210. Removes an interval from the set. Must exist exactly
  211. as provided -- cannot remove a subset of an existing interval."""
  212. i = self.tree.find(other.start, other.end)
  213. if i is None:
  214. raise IntervalError("interval " + str(other) + " not in tree")
  215. self.tree.delete(i)
  216. return self
  217. def __add__(self, other not None):
  218. """Add -- returns a new object"""
  219. new = IntervalSet(self)
  220. new += IntervalSet(other)
  221. return new
  222. def __and__(self, other not None):
  223. """
  224. Compute a new IntervalSet from the intersection of two others
  225. Output intervals are built as subsets of the intervals in the
  226. first argument (self).
  227. """
  228. out = IntervalSet()
  229. if not isinstance(other, IntervalSet):
  230. for i in self.intersection(other):
  231. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  232. else:
  233. for x in other:
  234. for i in self.intersection(x):
  235. out.tree.insert(rbtree.RBNode(i.start, i.end, i))
  236. return out
  237. def intersection(self, Interval interval not None, orig = False):
  238. """
  239. Compute a sequence of intervals that correspond to the
  240. intersection between `self` and the provided interval.
  241. Returns a generator that yields each of these intervals
  242. in turn.
  243. Output intervals are built as subsets of the intervals in the
  244. first argument (self).
  245. If orig = True, also return the original interval that was
  246. (potentially) subsetted to make the one that is being
  247. returned.
  248. """
  249. if not isinstance(interval, Interval):
  250. raise TypeError("bad type")
  251. for n in self.tree.intersect(interval.start, interval.end):
  252. i = n.obj
  253. if i:
  254. if i.start >= interval.start and i.end <= interval.end:
  255. if orig:
  256. yield (i, i)
  257. else:
  258. yield i
  259. else:
  260. subset = i.subset(max(i.start, interval.start),
  261. min(i.end, interval.end))
  262. if orig:
  263. yield (subset, i)
  264. else:
  265. yield subset
  266. cpdef intersects(self, Interval other):
  267. """Return True if this IntervalSet intersects another interval"""
  268. for n in self.tree.intersect(other.start, other.end):
  269. if n.obj.intersects(other):
  270. return True
  271. return False
  272. def find_end(self, double t):
  273. """
  274. Return an Interval from this tree that ends at time t, or
  275. None if it doesn't exist.
  276. """
  277. n = self.tree.find_left_end(t)
  278. if n and n.obj.end == t:
  279. return n.obj
  280. return None