Use a pure-python version of bisect_left, to fix 32-bit issues

The default bisect module includes a fast C implementation, which
requires that array indices fit within the system "long" type.  For
32-bit systems, that's not acceptable, as the table indices for raw
data can exceed 2^32 very quickly.  A pure python version works fine.
This commit is contained in:
Jim Paris 2015-01-20 18:31:58 -05:00
parent 45c81d2019
commit ba55ad82f0

View File

@ -23,7 +23,6 @@ from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
import sqlite3 import sqlite3
import os import os
import errno import errno
import bisect
# Note about performance and transactions: # Note about performance and transactions:
# #
@ -516,6 +515,17 @@ class NilmDB(object):
# And that's all # And that's all
return return
def _bisect_left(self, a, x, lo, hi):
# Like bisect.bisect_left, but doesn't choke on large indices on
# 32-bit systems, like bisect's fast C implementation does.
while lo < hi:
mid = (lo + hi) / 2
if a[mid] < x:
lo = mid + 1
else:
hi = mid
return lo
def _find_start(self, table, dbinterval): def _find_start(self, table, dbinterval):
""" """
Given a DBInterval, find the row in the database that Given a DBInterval, find the row in the database that
@ -526,10 +536,10 @@ class NilmDB(object):
# Optimization for the common case where an interval wasn't truncated # Optimization for the common case where an interval wasn't truncated
if dbinterval.start == dbinterval.db_start: if dbinterval.start == dbinterval.db_start:
return dbinterval.db_startpos return dbinterval.db_startpos
return bisect.bisect_left(table, return self._bisect_left(table,
dbinterval.start, dbinterval.start,
dbinterval.db_startpos, dbinterval.db_startpos,
dbinterval.db_endpos) dbinterval.db_endpos)
def _find_end(self, table, dbinterval): def _find_end(self, table, dbinterval):
""" """
@ -545,10 +555,10 @@ class NilmDB(object):
# want to include the given timestamp in the results. This is # want to include the given timestamp in the results. This is
# so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return # so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
# non-overlapping data. # non-overlapping data.
return bisect.bisect_left(table, return self._bisect_left(table,
dbinterval.end, dbinterval.end,
dbinterval.db_startpos, dbinterval.db_startpos,
dbinterval.db_endpos) dbinterval.db_endpos)
def stream_extract(self, path, start = None, end = None, def stream_extract(self, path, start = None, end = None,
count = False, markup = False, binary = False): count = False, markup = False, binary = False):