Compare commits
8 Commits
nilmdb-1.1
...
python2
Author | SHA1 | Date | |
---|---|---|---|
8125d9c840 | |||
ba55ad82f0 | |||
45c81d2019 | |||
78cfda32e3 | |||
3658d3876b | |||
022b50950f | |||
e5efbadc8e | |||
74f633c9da |
@@ -425,11 +425,15 @@ class Fsck(object):
|
|||||||
for intv in ints:
|
for intv in ints:
|
||||||
last_ts = None
|
last_ts = None
|
||||||
(stime, etime, spos, epos) = intv
|
(stime, etime, spos, epos) = intv
|
||||||
if spos == epos:
|
|
||||||
continue
|
# Break interval into maxrows-sized chunks
|
||||||
for start in xrange(*slice(spos, epos, maxrows).indices(epos)):
|
next_start = spos
|
||||||
|
while next_start < epos:
|
||||||
|
start = next_start
|
||||||
stop = min(start + maxrows, epos)
|
stop = min(start + maxrows, epos)
|
||||||
count = stop - start
|
count = stop - start
|
||||||
|
next_start = stop
|
||||||
|
|
||||||
# Get raw data, convert to NumPy arary
|
# Get raw data, convert to NumPy arary
|
||||||
try:
|
try:
|
||||||
raw = tab.get_data(start, stop, binary = True)
|
raw = tab.get_data(start, stop, binary = True)
|
||||||
|
@@ -43,6 +43,12 @@ class BulkData(object):
|
|||||||
# 32768 files per dir should work even on FAT32
|
# 32768 files per dir should work even on FAT32
|
||||||
self.files_per_dir = 32768
|
self.files_per_dir = 32768
|
||||||
|
|
||||||
|
if "initial_nrows" in kwargs:
|
||||||
|
self.initial_nrows = kwargs["initial_nrows"]
|
||||||
|
else:
|
||||||
|
# First row is 0
|
||||||
|
self.initial_nrows = 0
|
||||||
|
|
||||||
# Make root path
|
# Make root path
|
||||||
if not os.path.isdir(self.root):
|
if not os.path.isdir(self.root):
|
||||||
os.mkdir(self.root)
|
os.mkdir(self.root)
|
||||||
@@ -254,7 +260,7 @@ class BulkData(object):
|
|||||||
path = self._encode_filename(unicodepath)
|
path = self._encode_filename(unicodepath)
|
||||||
elements = path.lstrip('/').split('/')
|
elements = path.lstrip('/').split('/')
|
||||||
ospath = os.path.join(self.root, *elements)
|
ospath = os.path.join(self.root, *elements)
|
||||||
return Table(ospath)
|
return Table(ospath, self.initial_nrows)
|
||||||
|
|
||||||
@nilmdb.utils.must_close(wrap_verify = False)
|
@nilmdb.utils.must_close(wrap_verify = False)
|
||||||
class Table(object):
|
class Table(object):
|
||||||
@@ -291,9 +297,10 @@ class Table(object):
|
|||||||
pickle.dump(fmt, f, 2)
|
pickle.dump(fmt, f, 2)
|
||||||
|
|
||||||
# Normal methods
|
# Normal methods
|
||||||
def __init__(self, root):
|
def __init__(self, root, initial_nrows = 0):
|
||||||
"""'root' is the full OS path to the directory of this table"""
|
"""'root' is the full OS path to the directory of this table"""
|
||||||
self.root = root
|
self.root = root
|
||||||
|
self.initial_nrows = initial_nrows
|
||||||
|
|
||||||
# Load the format
|
# Load the format
|
||||||
with open(os.path.join(self.root, "_format"), "rb") as f:
|
with open(os.path.join(self.root, "_format"), "rb") as f:
|
||||||
@@ -353,8 +360,14 @@ class Table(object):
|
|||||||
# Convert to row number
|
# Convert to row number
|
||||||
return self._row_from_offset(subdir, filename, offset)
|
return self._row_from_offset(subdir, filename, offset)
|
||||||
|
|
||||||
# No files, so no data
|
# No files, so no data. We typically start at row 0 in this
|
||||||
return 0
|
# case, although initial_nrows is specified during some tests
|
||||||
|
# to exercise other parts of the code better. Since we have
|
||||||
|
# no files yet, round initial_nrows up so it points to a row
|
||||||
|
# that would begin a new file.
|
||||||
|
nrows = ((self.initial_nrows + (self.rows_per_file - 1)) //
|
||||||
|
self.rows_per_file) * self.rows_per_file
|
||||||
|
return nrows
|
||||||
|
|
||||||
def _offset_from_row(self, row):
|
def _offset_from_row(self, row):
|
||||||
"""Return a (subdir, filename, offset, count) tuple:
|
"""Return a (subdir, filename, offset, count) tuple:
|
||||||
|
@@ -23,7 +23,6 @@ from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
|||||||
import sqlite3
|
import sqlite3
|
||||||
import os
|
import os
|
||||||
import errno
|
import errno
|
||||||
import bisect
|
|
||||||
|
|
||||||
# Note about performance and transactions:
|
# Note about performance and transactions:
|
||||||
#
|
#
|
||||||
@@ -516,6 +515,17 @@ class NilmDB(object):
|
|||||||
# And that's all
|
# And that's all
|
||||||
return
|
return
|
||||||
|
|
||||||
|
def _bisect_left(self, a, x, lo, hi):
|
||||||
|
# Like bisect.bisect_left, but doesn't choke on large indices on
|
||||||
|
# 32-bit systems, like bisect's fast C implementation does.
|
||||||
|
while lo < hi:
|
||||||
|
mid = (lo + hi) / 2
|
||||||
|
if a[mid] < x:
|
||||||
|
lo = mid + 1
|
||||||
|
else:
|
||||||
|
hi = mid
|
||||||
|
return lo
|
||||||
|
|
||||||
def _find_start(self, table, dbinterval):
|
def _find_start(self, table, dbinterval):
|
||||||
"""
|
"""
|
||||||
Given a DBInterval, find the row in the database that
|
Given a DBInterval, find the row in the database that
|
||||||
@@ -526,10 +536,10 @@ class NilmDB(object):
|
|||||||
# Optimization for the common case where an interval wasn't truncated
|
# Optimization for the common case where an interval wasn't truncated
|
||||||
if dbinterval.start == dbinterval.db_start:
|
if dbinterval.start == dbinterval.db_start:
|
||||||
return dbinterval.db_startpos
|
return dbinterval.db_startpos
|
||||||
return bisect.bisect_left(table,
|
return self._bisect_left(table,
|
||||||
dbinterval.start,
|
dbinterval.start,
|
||||||
dbinterval.db_startpos,
|
dbinterval.db_startpos,
|
||||||
dbinterval.db_endpos)
|
dbinterval.db_endpos)
|
||||||
|
|
||||||
def _find_end(self, table, dbinterval):
|
def _find_end(self, table, dbinterval):
|
||||||
"""
|
"""
|
||||||
@@ -545,10 +555,10 @@ class NilmDB(object):
|
|||||||
# want to include the given timestamp in the results. This is
|
# want to include the given timestamp in the results. This is
|
||||||
# so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
|
# so a queries like 1:00 -> 2:00 and 2:00 -> 3:00 return
|
||||||
# non-overlapping data.
|
# non-overlapping data.
|
||||||
return bisect.bisect_left(table,
|
return self._bisect_left(table,
|
||||||
dbinterval.end,
|
dbinterval.end,
|
||||||
dbinterval.db_startpos,
|
dbinterval.db_startpos,
|
||||||
dbinterval.db_endpos)
|
dbinterval.db_endpos)
|
||||||
|
|
||||||
def stream_extract(self, path, start = None, end = None,
|
def stream_extract(self, path, start = None, end = None,
|
||||||
count = False, markup = False, binary = False):
|
count = False, markup = False, binary = False):
|
||||||
|
@@ -429,7 +429,7 @@ class Server(object):
|
|||||||
cherrypy.config.update({
|
cherrypy.config.update({
|
||||||
'server.socket_host': host,
|
'server.socket_host': host,
|
||||||
'server.socket_port': port,
|
'server.socket_port': port,
|
||||||
'engine.autoreload_on': False,
|
'engine.autoreload.on': False,
|
||||||
'server.max_request_body_size': 8*1024*1024,
|
'server.max_request_body_size': 8*1024*1024,
|
||||||
})
|
})
|
||||||
if self.embedded:
|
if self.embedded:
|
||||||
|
@@ -87,7 +87,7 @@ def parse_time(toparse):
|
|||||||
try:
|
try:
|
||||||
return unix_to_timestamp(datetime_tz.datetime_tz.
|
return unix_to_timestamp(datetime_tz.datetime_tz.
|
||||||
smartparse(toparse).totimestamp())
|
smartparse(toparse).totimestamp())
|
||||||
except (ValueError, OverflowError):
|
except (ValueError, OverflowError, TypeError):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# If it's parseable as a float, treat it as a Unix or NILM
|
# If it's parseable as a float, treat it as a Unix or NILM
|
||||||
|
11
setup.py
11
setup.py
@@ -6,15 +6,6 @@
|
|||||||
# Then just package it up:
|
# Then just package it up:
|
||||||
# python setup.py sdist
|
# python setup.py sdist
|
||||||
|
|
||||||
# This is supposed to be using Distribute:
|
|
||||||
#
|
|
||||||
# distutils provides a "setup" method.
|
|
||||||
# setuptools is a set of monkeypatches on top of that.
|
|
||||||
# distribute is a particular version/implementation of setuptools.
|
|
||||||
#
|
|
||||||
# So we don't really know if this is using the old setuptools or the
|
|
||||||
# Distribute-provided version of setuptools.
|
|
||||||
|
|
||||||
import traceback
|
import traceback
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
@@ -109,7 +100,7 @@ setup(name='nilmdb',
|
|||||||
'coverage',
|
'coverage',
|
||||||
'numpy',
|
'numpy',
|
||||||
],
|
],
|
||||||
setup_requires = [ 'distribute',
|
setup_requires = [ 'setuptools',
|
||||||
],
|
],
|
||||||
install_requires = [ 'decorator',
|
install_requires = [ 'decorator',
|
||||||
'cherrypy >= 3.2',
|
'cherrypy >= 3.2',
|
||||||
|
@@ -834,9 +834,12 @@ class TestCmdline(object):
|
|||||||
def test_13_files(self):
|
def test_13_files(self):
|
||||||
# Test BulkData's ability to split into multiple files,
|
# Test BulkData's ability to split into multiple files,
|
||||||
# by forcing the file size to be really small.
|
# by forcing the file size to be really small.
|
||||||
|
# Also increase the initial nrows, so that start/end positions
|
||||||
|
# in the database are very large (> 32 bit)
|
||||||
server_stop()
|
server_stop()
|
||||||
server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
|
server_start(bulkdata_args = { "file_size" : 920, # 23 rows per file
|
||||||
"files_per_dir" : 3 })
|
"files_per_dir" : 3,
|
||||||
|
"initial_nrows" : 2**40 })
|
||||||
|
|
||||||
# Fill data
|
# Fill data
|
||||||
self.ok("create /newton/prep float32_8")
|
self.ok("create /newton/prep float32_8")
|
||||||
@@ -888,7 +891,8 @@ class TestCmdline(object):
|
|||||||
server_stop()
|
server_stop()
|
||||||
server_start(max_removals = 4321,
|
server_start(max_removals = 4321,
|
||||||
bulkdata_args = { "file_size" : 920, # 23 rows per file
|
bulkdata_args = { "file_size" : 920, # 23 rows per file
|
||||||
"files_per_dir" : 3 })
|
"files_per_dir" : 3,
|
||||||
|
"initial_nrows" : 2**40 })
|
||||||
self.do_remove_files()
|
self.do_remove_files()
|
||||||
self.ok("destroy -R /newton/prep") # destroy again
|
self.ok("destroy -R /newton/prep") # destroy again
|
||||||
|
|
||||||
@@ -897,7 +901,8 @@ class TestCmdline(object):
|
|||||||
server_stop()
|
server_stop()
|
||||||
server_start(max_int_removals = 1,
|
server_start(max_int_removals = 1,
|
||||||
bulkdata_args = { "file_size" : 920, # 23 rows per file
|
bulkdata_args = { "file_size" : 920, # 23 rows per file
|
||||||
"files_per_dir" : 3 })
|
"files_per_dir" : 3,
|
||||||
|
"initial_nrows" : 2**40 })
|
||||||
self.do_remove_files()
|
self.do_remove_files()
|
||||||
|
|
||||||
def do_remove_files(self):
|
def do_remove_files(self):
|
||||||
|
@@ -1,5 +1,6 @@
|
|||||||
import nilmdb
|
import nilmdb
|
||||||
from nilmdb.utils.printf import *
|
from nilmdb.utils.printf import *
|
||||||
|
from nilmdb.utils import datetime_tz
|
||||||
|
|
||||||
from nose.tools import *
|
from nose.tools import *
|
||||||
from nose.tools import assert_raises
|
from nose.tools import assert_raises
|
||||||
@@ -19,6 +20,8 @@ class TestTimestamper(object):
|
|||||||
def join(list):
|
def join(list):
|
||||||
return "\n".join(list) + "\n"
|
return "\n".join(list) + "\n"
|
||||||
|
|
||||||
|
datetime_tz.localtz_set("America/New_York")
|
||||||
|
|
||||||
start = nilmdb.utils.time.parse_time("03/24/2012")
|
start = nilmdb.utils.time.parse_time("03/24/2012")
|
||||||
lines_in = [ "hello", "world", "hello world", "# commented out" ]
|
lines_in = [ "hello", "world", "hello world", "# commented out" ]
|
||||||
lines_out = [ "1332561600000000 hello",
|
lines_out = [ "1332561600000000 hello",
|
||||||
|
Reference in New Issue
Block a user