You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

445 lines
15 KiB

  1. # -*- coding: utf-8 -*-
  2. import nilmdb
  3. from nilmdb.utils.printf import *
  4. import datetime_tz
  5. from nose.tools import *
  6. from nose.tools import assert_raises
  7. import itertools
  8. from nilmdb.utils.interval import IntervalError
  9. from nilmdb.server.interval import Interval, DBInterval, IntervalSet
  10. # so we can test them separately
  11. from nilmdb.utils.interval import Interval as UtilsInterval
  12. from testutil.helpers import *
  13. import unittest
  14. # set to False to skip live renders
  15. do_live_renders = False
  16. def render(iset, description = "", live = True):
  17. import testutil.renderdot as renderdot
  18. r = renderdot.RBTreeRenderer(iset.tree)
  19. return r.render(description, live and do_live_renders)
  20. def makeset(string):
  21. """Build an IntervalSet from a string, for testing purposes
  22. Each character is 1 second
  23. [ = interval start
  24. | = interval end + next start
  25. ] = interval end
  26. . = zero-width interval (identical start and end)
  27. anything else is ignored
  28. """
  29. iset = IntervalSet()
  30. for i, c in enumerate(string):
  31. day = i + 10000
  32. if (c == "["):
  33. start = day
  34. elif (c == "|"):
  35. iset += Interval(start, day)
  36. start = day
  37. elif (c == ")"):
  38. iset += Interval(start, day)
  39. del start
  40. elif (c == "."):
  41. iset += Interval(day, day)
  42. return iset
  43. class TestInterval:
  44. def test_client_interval(self):
  45. # Run interval tests against the Python version of Interval.
  46. global Interval
  47. NilmdbInterval = Interval
  48. Interval = UtilsInterval
  49. self.test_interval()
  50. self.test_interval_intersect()
  51. Interval = NilmdbInterval
  52. # Other helpers in nilmdb.utils.interval
  53. i = [ UtilsInterval(1,2), UtilsInterval(2,3), UtilsInterval(4,5) ]
  54. eq_(list(nilmdb.utils.interval.optimize(i)),
  55. [ UtilsInterval(1,3), UtilsInterval(4,5) ])
  56. eq_(list(nilmdb.utils.interval.optimize([])), [])
  57. eq_(UtilsInterval(1234567890123456, 1234567890654321).human_string(),
  58. "[ Fri, 13 Feb 2009 18:31:30.123456 -0500 -> " +
  59. "Fri, 13 Feb 2009 18:31:30.654321 -0500 ]")
  60. def test_interval(self):
  61. # Test Interval class
  62. os.environ['TZ'] = "America/New_York"
  63. datetime_tz._localtz = None
  64. (d1, d2, d3) = [ nilmdb.utils.time.parse_time(x)
  65. for x in [ "03/24/2012", "03/25/2012", "03/26/2012" ] ]
  66. # basic construction
  67. i = Interval(d1, d2)
  68. i = Interval(d1, d3)
  69. eq_(i.start, d1)
  70. eq_(i.end, d3)
  71. # assignment is allowed, but not verified
  72. i.start = d2
  73. #with assert_raises(IntervalError):
  74. # i.end = d1
  75. i.start = d1
  76. i.end = d2
  77. # end before start
  78. with assert_raises(IntervalError):
  79. i = Interval(d3, d1)
  80. # compare
  81. assert(Interval(d1, d2) == Interval(d1, d2))
  82. assert(Interval(d1, d2) <= Interval(d1, d2))
  83. assert(Interval(d1, d2) >= Interval(d1, d2))
  84. assert(Interval(d1, d2) != Interval(d1, d3))
  85. assert(Interval(d1, d2) < Interval(d1, d3))
  86. assert(Interval(d1, d2) <= Interval(d1, d3))
  87. assert(Interval(d1, d3) > Interval(d1, d2))
  88. assert(Interval(d1, d3) >= Interval(d1, d2))
  89. assert(Interval(d1, d2) < Interval(d2, d3))
  90. assert(Interval(d1, d3) < Interval(d2, d3))
  91. assert(Interval(d2, d2+1) > Interval(d1, d3))
  92. assert(Interval(d3, d3+1) == Interval(d3, d3+1))
  93. #with assert_raises(TypeError): # was AttributeError, that's wrong
  94. # x = (i == 123)
  95. # subset
  96. eq_(Interval(d1, d3).subset(d1, d2), Interval(d1, d2))
  97. with assert_raises(IntervalError):
  98. x = Interval(d2, d3).subset(d1, d2)
  99. # big integers, negative integers
  100. x = Interval(5000111222000000, 6000111222000000)
  101. eq_(str(x), "[5000111222000000 -> 6000111222000000)")
  102. x = Interval(-5000111222000000, -4000111222000000)
  103. eq_(str(x), "[-5000111222000000 -> -4000111222000000)")
  104. # misc
  105. i = Interval(d1, d2)
  106. eq_(repr(i), repr(eval(repr(i))))
  107. eq_(str(i), "[1332561600000000 -> 1332648000000000)")
  108. def test_interval_intersect(self):
  109. # Test Interval intersections
  110. dates = [ 100, 200, 300, 400 ]
  111. perm = list(itertools.permutations(dates, 2))
  112. prod = list(itertools.product(perm, perm))
  113. should_intersect = {
  114. False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100],
  115. True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29,
  116. 32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]
  117. }
  118. for i,((a,b),(c,d)) in enumerate(prod):
  119. try:
  120. i1 = Interval(a, b)
  121. i2 = Interval(c, d)
  122. eq_(i1.intersects(i2), i2.intersects(i1))
  123. in_(i, should_intersect[i1.intersects(i2)])
  124. except IntervalError:
  125. assert(i not in should_intersect[True] and
  126. i not in should_intersect[False])
  127. with assert_raises(TypeError):
  128. x = i1.intersects(1234)
  129. def test_intervalset_construct(self):
  130. # Test IntervalSet construction
  131. dates = [ 100, 200, 300, 400 ]
  132. a = Interval(dates[0], dates[1])
  133. b = Interval(dates[1], dates[2])
  134. c = Interval(dates[0], dates[2])
  135. d = Interval(dates[2], dates[3])
  136. iseta = IntervalSet(a)
  137. isetb = IntervalSet([a, b])
  138. isetc = IntervalSet([a])
  139. ne_(iseta, isetb)
  140. eq_(iseta, isetc)
  141. with assert_raises(TypeError):
  142. x = iseta != 3
  143. ne_(IntervalSet(a), IntervalSet(b))
  144. # Note that assignment makes a new reference (not a copy)
  145. isetd = IntervalSet(isetb)
  146. isete = isetd
  147. eq_(isetd, isetb)
  148. eq_(isetd, isete)
  149. isetd -= a
  150. ne_(isetd, isetb)
  151. eq_(isetd, isete)
  152. # test iterator
  153. for interval in iseta:
  154. pass
  155. # overlap
  156. with assert_raises(IntervalError):
  157. x = IntervalSet([a, b, c])
  158. # bad types
  159. with assert_raises(Exception):
  160. x = IntervalSet([1, 2])
  161. iset = IntervalSet(isetb) # test iterator
  162. eq_(iset, isetb)
  163. eq_(len(iset), 2)
  164. eq_(len(IntervalSet()), 0)
  165. # Test adding
  166. iset = IntervalSet(a)
  167. iset += IntervalSet(b)
  168. eq_(iset, IntervalSet([a, b]))
  169. iset = IntervalSet(a)
  170. iset += b
  171. eq_(iset, IntervalSet([a, b]))
  172. iset = IntervalSet(a)
  173. iset.iadd_nocheck(b)
  174. eq_(iset, IntervalSet([a, b]))
  175. iset = IntervalSet(a) + IntervalSet(b)
  176. eq_(iset, IntervalSet([a, b]))
  177. iset = IntervalSet(b) + a
  178. eq_(iset, IntervalSet([a, b]))
  179. # A set consisting of [0-1],[1-2] should match a set consisting of [0-2]
  180. eq_(IntervalSet([a,b]), IntervalSet([c]))
  181. # Etc
  182. ne_(IntervalSet([a,d]), IntervalSet([c]))
  183. ne_(IntervalSet([c]), IntervalSet([a,d]))
  184. ne_(IntervalSet([c,d]), IntervalSet([b,d]))
  185. # misc
  186. eq_(repr(iset), repr(eval(repr(iset))))
  187. eq_(str(iset),
  188. "[[100 -> 200), [200 -> 300)]")
  189. def test_intervalset_geniset(self):
  190. # Test basic iset construction
  191. eq_(makeset(" [----) "),
  192. makeset(" [-|--) "))
  193. eq_(makeset("[) [--) ") +
  194. makeset(" [) [--)"),
  195. makeset("[|) [-----)"))
  196. eq_(makeset(" [-------)"),
  197. makeset(" [-|-----|"))
  198. def test_intervalset_intersect_difference(self):
  199. # Test intersection (&)
  200. with assert_raises(TypeError): # was AttributeError
  201. x = makeset("[--)") & 1234
  202. def do_test(a, b, c, d):
  203. # a & b == c (using nilmdb.server.interval)
  204. ab = IntervalSet()
  205. for x in b:
  206. for i in (a & x):
  207. ab += i
  208. eq_(ab,c)
  209. # a & b == c (using nilmdb.utils.interval)
  210. eq_(IntervalSet(nilmdb.utils.interval.intersection(a,b)), c)
  211. # a \ b == d
  212. eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
  213. # Intersection with intervals
  214. do_test(makeset("[---|---)[)"),
  215. makeset(" [------) "),
  216. makeset(" [-----) "), # intersection
  217. makeset("[-) [)")) # difference
  218. do_test(makeset("[---------)"),
  219. makeset(" [---) "),
  220. makeset(" [---) "), # intersection
  221. makeset("[) [----)")) # difference
  222. do_test(makeset(" [---) "),
  223. makeset("[---------)"),
  224. makeset(" [---) "), # intersection
  225. makeset(" ")) # difference
  226. do_test(makeset(" [-----)"),
  227. makeset(" [-----) "),
  228. makeset(" [--) "), # intersection
  229. makeset(" [--)")) # difference
  230. do_test(makeset(" [--) [--)"),
  231. makeset(" [------) "),
  232. makeset(" [-) [-) "), # intersection
  233. makeset(" [) [)")) # difference
  234. do_test(makeset(" [---)"),
  235. makeset(" [--) "),
  236. makeset(" "), # intersection
  237. makeset(" [---)")) # difference
  238. do_test(makeset(" [-|---)"),
  239. makeset(" [-----|-) "),
  240. makeset(" [----) "), # intersection
  241. makeset(" [)")) # difference
  242. do_test(makeset(" [-|-) "),
  243. makeset(" [-|--|--) "),
  244. makeset(" [---) "), # intersection
  245. makeset(" ")) # difference
  246. do_test(makeset("[-)[-)[-)[)"),
  247. makeset(" [) [|)[) "),
  248. makeset(" [) [) "), # intersection
  249. makeset("[) [-) [)[)")) # difference
  250. # Border cases -- will give different results if intervals are
  251. # half open or fully closed. In nilmdb, they are half open.
  252. do_test(makeset(" [---)"),
  253. makeset(" [----) "),
  254. makeset(" "), # intersection
  255. makeset(" [---)")) # difference
  256. do_test(makeset(" [----)[--)"),
  257. makeset("[-) [--) [)"),
  258. makeset(" [) [-) [)"), # intersection
  259. makeset(" [-) [-) ")) # difference
  260. # Set difference with bounds
  261. a = makeset(" [----)[--)")
  262. b = makeset("[-) [--) [)")
  263. c = makeset("[----) ")
  264. d = makeset(" [-) ")
  265. eq_(nilmdb.utils.interval.set_difference(
  266. a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
  267. # Fill out test coverage for non-subsets
  268. def diff2(a,b, subset):
  269. return nilmdb.utils.interval._interval_math_helper(
  270. a, b, (lambda a, b: b and not a), subset=subset)
  271. with assert_raises(nilmdb.utils.interval.IntervalError):
  272. list(diff2(a,b,True))
  273. list(diff2(a,b,False))
  274. # Fill out test coverage with a union operator (not implemented
  275. # in interval.py, because nilmdb doesn't need it)
  276. def union(a, b):
  277. return nilmdb.utils.interval._interval_math_helper(
  278. a, b, (lambda a, b: a or b), subset=False)
  279. list(union(makeset("[---) "),
  280. makeset(" [---)")))
  281. # Empty second set
  282. eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
  283. class TestIntervalDB:
  284. def test_dbinterval(self):
  285. # Test DBInterval class
  286. i = DBInterval(100, 200, 100, 200, 10000, 20000)
  287. eq_(i.start, 100)
  288. eq_(i.end, 200)
  289. eq_(i.db_start, 100)
  290. eq_(i.db_end, 200)
  291. eq_(i.db_startpos, 10000)
  292. eq_(i.db_endpos, 20000)
  293. eq_(repr(i), repr(eval(repr(i))))
  294. # end before start
  295. with assert_raises(IntervalError):
  296. i = DBInterval(200, 100, 100, 200, 10000, 20000)
  297. # db_start too late
  298. with assert_raises(IntervalError):
  299. i = DBInterval(100, 200, 150, 200, 10000, 20000)
  300. # db_end too soon
  301. with assert_raises(IntervalError):
  302. i = DBInterval(100, 200, 100, 150, 10000, 20000)
  303. # actual start, end can be a subset
  304. a = DBInterval(150, 200, 100, 200, 10000, 20000)
  305. b = DBInterval(100, 150, 100, 200, 10000, 20000)
  306. c = DBInterval(150, 160, 100, 200, 10000, 20000)
  307. # Make a set of DBIntervals
  308. iseta = IntervalSet([a, b])
  309. isetc = IntervalSet(c)
  310. assert(iseta.intersects(a))
  311. assert(iseta.intersects(b))
  312. # Test subset
  313. with assert_raises(IntervalError):
  314. x = a.subset(150, 250)
  315. # Subset of those IntervalSets should still contain DBIntervals
  316. for i in IntervalSet(iseta.intersection(Interval(125,250))):
  317. assert(isinstance(i, DBInterval))
  318. class TestIntervalTree:
  319. def test_interval_tree(self):
  320. import random
  321. random.seed(1234)
  322. # make a set of 100 intervals
  323. iset = IntervalSet()
  324. j = 100
  325. for i in random.sample(range(j),j):
  326. interval = Interval(i, i+1)
  327. iset += interval
  328. render(iset, "Random Insertion")
  329. # remove about half of them
  330. for i in random.sample(range(j),j):
  331. if random.randint(0,1):
  332. iset -= Interval(i, i+1)
  333. # try removing an interval that doesn't exist
  334. with assert_raises(IntervalError):
  335. iset -= Interval(1234,5678)
  336. render(iset, "Random Insertion, deletion")
  337. # make a set of 100 intervals, inserted in order
  338. iset = IntervalSet()
  339. j = 100
  340. for i in range(j):
  341. interval = Interval(i, i+1)
  342. iset += interval
  343. render(iset, "In-order insertion")
  344. class TestIntervalSpeed:
  345. @unittest.skip("this is slow")
  346. def test_interval_speed(self):
  347. import yappi
  348. import time
  349. import random
  350. import math
  351. print()
  352. yappi.start()
  353. speeds = {}
  354. limit = 22 # was 20
  355. for j in [ 2**x for x in range(5,limit) ]:
  356. start = time.time()
  357. iset = IntervalSet()
  358. for i in random.sample(range(j),j):
  359. interval = Interval(i, i+1)
  360. iset += interval
  361. speed = (time.time() - start) * 1000000.0
  362. printf("%d: %g μs (%g μs each, O(n log n) ratio %g)\n",
  363. j,
  364. speed,
  365. speed/j,
  366. speed / (j*math.log(j))) # should be constant
  367. speeds[j] = speed
  368. yappi.stop()
  369. stats = yappi.get_func_stats()
  370. stats.sort("ttot")
  371. stats.print_all()