Browse Source

Branch

git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10372 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
4ca726439b
36 changed files with 598 additions and 961 deletions
  1. +3
    -0
      Makefile
  2. +1
    -1
      README.txt
  3. +1
    -0
      TODO
  4. +0
    -26
      bin/nilm-test.py
  5. +5
    -2
      nilmdb/__init__.py
  6. +0
    -37
      nilmdb/fileinterval.py
  7. +0
    -205
      nilmdb/interval.py
  8. +67
    -0
      nilmdb/layout.py
  9. +89
    -0
      nilmdb/nilmdb.py
  10. +7
    -0
      nilmdb/printf.py
  11. +69
    -0
      nilmdb/serializer.py
  12. +111
    -0
      nilmdb/server.py
  13. +0
    -46
      nilmdb/test_fileinterval.py
  14. +0
    -188
      nilmdb/test_interval.py
  15. +0
    -5
      pytables-test/Makefile
  16. +0
    -2
      pytables-test/README.jim
  17. +0
    -12
      pytables-test/TODO.txt
  18. +0
    -3
      pytables-test/indexing-notes
  19. +0
    -3
      pytables-test/sample-query
  20. +0
    -53
      pytables-test/server.py
  21. +0
    -16
      pytables-test/speed-pytables.py
  22. +0
    -54
      pytables-test/test-indexing.py
  23. +0
    -54
      pytables-test/test-write.py
  24. +0
    -9
      setup.py
  25. +0
    -5
      test/Makefile
  26. +0
    -4
      test/printf.py
  27. +0
    -60
      test/speed-readascii.py
  28. +0
    -74
      test/speed-readbinary.py
  29. +0
    -76
      test/speed-writebinary.py
  30. +0
    -11
      test/test-struct-pack.py
  31. +0
    -1
      test/test.dat
  32. +0
    -7
      test/todo.md
  33. +150
    -0
      tests/test_nilmdb.py
  34. +25
    -0
      tests/test_printf.py
  35. +70
    -0
      tests/test_serializer.py
  36. +0
    -7
      todo.txt

+ 3
- 0
Makefile View File

@@ -1,2 +1,5 @@
all:
nosetests

clean:
find . -name '*pyc' | xargs rm

+ 1
- 1
README.txt View File

@@ -1,4 +1,4 @@
To install,

python seutp.py install
python setup.py install


+ 1
- 0
TODO View File

@@ -0,0 +1 @@
- More stream operations: insert & extract

+ 0
- 26
bin/nilm-test.py View File

@@ -1,26 +0,0 @@
#!/usr/bin/python

from nilmdb import Interval
from optparse import OptionParser
import sys

version = "1.0"

parser = OptionParser()
parser.add_option("-d", "--db", dest="database", metavar="DATABASE",
help="location of sqlite database")
parser.add_option("-V", "--version", dest="version", default=False, action="store_true",
help="print version then exit")

(options, args) = parser.parse_args()

if (options.version):
print "This script version: " + version
sys.exit(0)

if options.database is None:
print "Error: database is mandatory"
sys.exit(1)

print "Database is " + options.database


+ 5
- 2
nilmdb/__init__.py View File

@@ -1,2 +1,5 @@
from nilmdb.interval import *
from nilmdb.fileinterval import *
# empty
from nilmdb import NilmDB
from server import Server
from layout import *


+ 0
- 37
nilmdb/fileinterval.py View File

@@ -1,37 +0,0 @@
"""FileInterval

An Interval that is backed with file data storage"""

from nilmdb.interval import Interval, IntervalSet, IntervalError
from datetime import datetime
import bisect

class FileInterval(Interval):
"""Represents an interval of time and its corresponding data"""

def __init__(self, start, end,
filename,
start_offset = None, end_offset = None):
self.start = start
self.end = end
self.filename = filename
if start_offset is None:
start_offset = 0
self.start_offset = start_offset
if end_offset is None:
f = open(filename, 'rb')
f.seek(0, os.SEEK_END)
end_offset = f.tell()
self.end_offset = end_offset

def __setattr__(self, name, value):
pass

def subset(self, start, end):
"""Return a new Interval that is a subset of this one"""
# TODO: Any magic regarding file/offset/length mapping for subsets
if (start < self.start or end > self.end):
raise IntervalError("not a subset")
return FileInterval(start, end)


+ 0
- 205
nilmdb/interval.py View File

@@ -1,205 +0,0 @@
"""Interval and IntervalSet

Represents an interval of time, and a sorted set of such intervals"""

from datetime import datetime
import bisect

class IntervalError(Exception):
"""Error due to interval overlap, etc"""
pass

class Interval(object):
"""Represents an interval of time"""

start = None
end = None

def __init__(self, start, end):
self.start = start
self.end = end

def __repr__(self):
return "Interval(" + repr(self.start) + ", " + repr(self.end) + ")"

def __str__(self):
return "[" + str(self.start) + " -> " + str(self.end) + "]"

def __setattr__(self, name, value):
"""Set attribute"""
# TODO: If we need to manipulate file names, offsets, lengths, etc,
# based on start and end time changing, maybe this is the right spot?
# Or we could just disallow changing it here.
if not isinstance(value, datetime):
raise IntervalError("Must set datetime values")
self.__dict__[name] = value
if (type(self.start) is type(self.end)):
if (self.start > self.end):
raise IntervalError("Interval start must precede interval end")

def __cmp__(self, other):
"""Compare two intervals. If non-equal, order by start then end"""
if not isinstance(other, Interval):
raise TypeError("Can't compare to non-interval")
if (self.start == other.start):
if (self.end < other.end):
return -1
if (self.end > other.end):
return 1
return 0
if (self.start < other.start):
return -1
return 1
def intersects(self, other):
"""Return True if two Interval objects intersect"""
if (not isinstance(other, Interval)):
raise TypeError("need Interval for intersection test")
if (self.end <= other.start or
self.start >= other.end):
return False
else:
return True

def is_adjacent(self, other):
"""Return True if two Intervals are adjacent (same end or start)"""
if (not isinstance(other, Interval)):
raise TypeError("need Interval for adjacency test")
if (self.end == other.start or
self.start == other.end):
return True
else:
return False

def subset(self, start, end):
"""Return a new Interval that is a subset of this one"""
# TODO: Any magic regarding file/offset/length mapping for subsets
if (start < self.start or end > self.end):
raise IntervalError("not a subset")
return Interval(start, end)

class IntervalSet(object):
"""A non-intersecting set of intervals

Kept sorted internally"""

def __init__(self, iterable=None):
self.data = []
if iterable is not None:
if isinstance(iterable, Interval):
iterable = [iterable]
self._add_intervals(iterable)

def __iter__(self):
return self.data.__iter__()

def __repr__(self):
return "IntervalSet(" + repr(list(self.data)) + ")"

def __cmp__(self, other):
# compare isn't supported, they don't really have an ordering
raise TypeError("can't compare IntervalSets with cmp()")

def __eq__(self, other):
"""Test equality of two IntervalSets.

Treats adjacent Intervals as equivalent to one long interval,
so this function really tests whether the IntervalSets cover
the same spans of time."""
if not isinstance(other, IntervalSet):
return False
i = 0
j = 0
outside = True
try:
while True:
if (outside):
# To match, we need to be finished this set
if (i >= len(self) and j >= len(other)):
return True
# Or the starts need to match
if (self[i].start != other[j].start):
return False
outside = False
else:
# We can move on if the two interval ends match
if (self[i].end == other[j].end):
i += 1
j += 1
outside = True
else:
# Whichever ends first needs to be adjacent to the next
if (self[i].end < other[j].end):
if (not self[i].is_adjacent(self[i+1])):
return False
i += 1
else:
if (not other[j].is_adjacent(other[j+1])):
return False
j += 1
except IndexError:
return False

def __ne__(self, other):
return not self.__eq__(other)

def __len__(self):
return len(self.data)

def __getitem__(self, key):
return self.data.__getitem__(key)

def __iadd__(self, other):
"""Inplace add -- modifies self

This throws an exception if the regions being added intersect."""
if isinstance(other, Interval):
other = [other]
self._add_intervals(other)
return self
def __add__(self, other):
"""Add -- returns a new object

This throws an exception if the regions being added intersect."""
new = IntervalSet(self)
new += IntervalSet(other)
return new

def __and__(self, other):
"""Compute a new IntervalSet from the intersection of two others

Output intervals are built as subsets of the intervals in the
first argument (self)."""
# If we were given a set, intersect with each interval in that set
if isinstance(other, IntervalSet):
out = IntervalSet()
for interval in other.data:
out += self & interval
return out

if not isinstance(other, Interval):
raise TypeError("can't intersect with that type")

out = IntervalSet()
for this in self.data:
# If there's any overlap, add the overlapping region
if (this.end > other.start and this.start < other.end):
out += this.subset(max(this.start, other.start),
min(this.end, other.end))
return out
def _add_intervals(self, iterable):
"""Add each Interval from an interable to this set"""
for element in iter(iterable):
self._add_single_interval(element)

def _add_single_interval(self, interval):
"""Add one Interval to this set"""
if (not isinstance(interval, Interval)):
raise TypeError("can only add Intervals")
for existing in self.data:
if existing.intersects(interval):
raise IntervalError("Tried to add overlapping interval "
"to this set")
bisect.insort(self.data, interval)

+ 67
- 0
nilmdb/layout.py View File

@@ -0,0 +1,67 @@
import tables

# Note: 'expected_daily_rows' is an estimate, used to help PyTables
# choose a proper chunk size for the table.

layouts = {
# Typical prep output
"PrepData": {
'expected_daily_rows': 120 * 86400,
'description': {
'timestamp': tables.Int64Col(pos=1),
'p1': tables.Float32Col(pos=2),
'q1': tables.Float32Col(pos=3),
'p3': tables.Float32Col(pos=4),
'q3': tables.Float32Col(pos=5),
'p5': tables.Float32Col(pos=6),
'q5': tables.Float32Col(pos=7),
'p7': tables.Float32Col(pos=8),
'q7': tables.Float32Col(pos=9),
} },

# Raw data
"RawData": {
'expected_daily_rows': 8000 * 86400,
'description': {
'timestamp': tables.Int64Col(pos=1),
'va': tables.UInt16Col(pos=2),
'vb': tables.UInt16Col(pos=3),
'vc': tables.UInt16Col(pos=4),
'ia': tables.UInt16Col(pos=5),
'ib': tables.UInt16Col(pos=6),
'ic': tables.UInt16Col(pos=7),
} },

# Raw data plus 60 Hz notched current
"RawNotchedData": {
'expected_daily_rows': 8000 * 86400,
'description': {
'timestamp': tables.Int64Col(pos=1),
'va': tables.UInt16Col(pos=2),
'vb': tables.UInt16Col(pos=3),
'vc': tables.UInt16Col(pos=4),
'ia': tables.UInt16Col(pos=5),
'ib': tables.UInt16Col(pos=6),
'ic': tables.UInt16Col(pos=7),
'notch_ia': tables.UInt16Col(pos=8),
'notch_ib': tables.UInt16Col(pos=9),
'notch_ic': tables.UInt16Col(pos=10),
} },
}

def description(layout):
return tables.Description(layouts[layout]["description"])

def expected_daily_rows(layout):
return layouts[layout]["expected_daily_rows"]

def desc_to_layout(match_desc):
"""
Match a tables.Description to our fixed list of layouts, and
return a string reprensenting the one that matched, or None
if nothing matches.
"""
for layout in layouts.keys():
if description(layout)._v_colObjects == match_desc._v_colObjects:
return layout
return None

+ 89
- 0
nilmdb/nilmdb.py View File

@@ -0,0 +1,89 @@
"""NilmDB

Object that represents a NILM database file"""

# Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
# but will pull the nilmdb module instead.
from __future__ import absolute_import
import nilmdb
from nilmdb.printf import *

import tables
import time
import sys

class NilmDB(object):
def __init__(self, filename):
# Open or create the file
self.filename = filename
self.h5file = tables.openFile(filename, "a", "NILM Database")
self.opened = True

def __del__(self):
if "opened" in self.__dict__: # pragma: no cover
fprintf(sys.stderr,
"error: NilmDB.close() wasn't called, file %s",
self.filename)

def close(self):
self.h5file.close()
del self.opened

def stream_list(self, layout = None):
"""Return list of information for all tables in the database
that match the given layout, or all tables if layout is None.

Format of returned data is a list of tuples:
(path, layout, nrows)
"""
paths = []
for node in self.h5file.walkNodes('/', 'Table'):
path = node._v_pathname
this_layout = nilmdb.layout.desc_to_layout(node.description)
if layout is None or layout == this_layout:
tuple = (path, this_layout, node.nrows)
paths.append(tuple)
return sorted(paths)

def stream_create(self, path, layout_name, index = None):
"""Create a table at the given path, with the contents
matching the given layout_name (e.g. 'PrepData').
Columns listed in 'index' are marked as indices. If index =
None, the 'timestamp' column is indexed if it exists. Pass
an empty list to prevent indexing"""
[ group, node ] = path.rsplit("/", 1)
if group == '':
raise ValueError("Invalid path")

# Make the group structure, one element at a time
group_path = group.lstrip('/').split("/")
for i in range(len(group_path)):
parent = "/" + "/".join(group_path[0:i])
child = group_path[i]
try:
self.h5file.createGroup(parent, child)
except tables.NodeError:
pass

# Get description
desc = nilmdb.layout.description(layout_name)

# Estimated table size (for PyTables optimization purposes): assume
# 3 months worth of data. It's OK if this is wrong.
exp_rows = nilmdb.layout.expected_daily_rows(layout_name) * 90

table = self.h5file.createTable(group, node,
description = desc,
expectedrows = exp_rows)

# Create indices
try:
if index is None and "timestamp" in table.colnames:
index = [ "timestamp" ]
for ind in index:
table.cols._f_col(ind).createIndex()
except KeyError, e:
# Remove this table if we got an error
self.h5file.removeNode(group, node)
raise e

+ 7
- 0
nilmdb/printf.py View File

@@ -0,0 +1,7 @@
from __future__ import print_function
def printf(str, *args):
print(str % args, end='')
def fprintf(file, str, *args):
print(str % args, end='', file=file)
def sprintf(str, *args):
return (str % args)

+ 69
- 0
nilmdb/serializer.py View File

@@ -0,0 +1,69 @@
import Queue
import threading
import sys

# This file provides a class that will wrap an object and serialize
# all calls to its methods. All calls to that object will be queued
# and executed from a single thread, regardless of which thread makes
# the call.

# Based partially on http://stackoverflow.com/questions/2642515/

class SerializerThread(threading.Thread):
"""Thread that retrieves call information from the queue, makes the
call, and returns the results."""
def __init__(self, call_queue):
threading.Thread.__init__(self)
self.call_queue = call_queue
def run(self):
while True:
result_queue, func, args, kwargs = self.call_queue.get()
# Terminate if result_queue is None
if result_queue is None:
return
try:
result = func(*args, **kwargs) # wrapped
except:
result_queue.put((sys.exc_info(), None))
else:
result_queue.put((None, result))

class WrapCall(object):
"""Wrap a callable using the given queues"""

def __init__(self, call_queue, result_queue, func):
self.call_queue = call_queue
self.result_queue = result_queue
self.func = func

def __call__(self, *args, **kwargs):
self.call_queue.put((self.result_queue, self.func, args, kwargs))
( exc_info, result ) = self.result_queue.get()
if exc_info is None:
return result
else:
raise exc_info[0], exc_info[1], exc_info[2]

class WrapObject(object):
"""Wrap all calls to methods in a target object with WrapCall"""

def __init__(self, target):
self.__wrap_target = target
self.__wrap_call_queue = Queue.Queue()
self.__wrap_serializer = SerializerThread(self.__wrap_call_queue)
self.__wrap_serializer.daemon = True
self.__wrap_serializer.start()

def __getattr__(self, key):
"""Wrap methods of self.__wrap_target in a WrapCall instance"""
func = getattr(self.__wrap_target, key)
if not callable(func):
raise TypeError("Can't serialize attribute %r (type: %s)"
% (key, type(func)))
result_queue = Queue.Queue()
return WrapCall(self.__wrap_call_queue, result_queue, func)

def __del__(self):
self.__wrap_call_queue.put((None, None, None, None))
self.__wrap_serializer.join()

+ 111
- 0
nilmdb/server.py View File

@@ -0,0 +1,111 @@
"""CherryPy-based server for accessing NILM database via HTTP"""

# Need absolute_import so that "import nilmdb" won't pull in nilmdb.py,
# but will pull the nilmdb module instead.
from __future__ import absolute_import
import nilmdb

from nilmdb.printf import *

import cherrypy
import sys
import os
import traceback

try:
import cherrypy
cherrypy.tools.json_out
except: # pragma: no cover
sys.stderr.write("Cherrypy 3.2+ required\n")
sys.exit(1)

class NilmApp(object):
def __init__(self, db):
self.db = db

class Root(NilmApp):
"""Root application for NILM database"""

def __init__(self, db, version):
super(Root, self).__init__(db)
self.server_version = version

# /
@cherrypy.expose
def index(self):
raise cherrypy.NotFound()

# /favicon.ico
@cherrypy.expose
def favicon_ico(self):
raise cherrypy.NotFound()

# /version
@cherrypy.expose
@cherrypy.tools.json_out()
def version(self):
return self.server_version

class Stream(NilmApp):
"""Stream-specific operations"""

# /stream/list
# /stream/list?layout=PrepData
@cherrypy.expose
@cherrypy.tools.json_out()
def list(self, layout = None):
return self.db.stream_list(layout)

class Exiter(object):
"""App that exits the server, for testing"""
@cherrypy.expose
def index(self):
cherrypy.response.headers['Content-Type'] = 'text/plain'
def content():
yield 'Exiting by request'
raise SystemExit
return content()
index._cp_config = { 'response.stream': True }

class Server(object):
version = "1.0"
def __init__(self, db, host = '127.0.0.1', port = 8080, stoppable = False):
# Need to wrap DB object in a serializer because we'll call into it from separate threads.
self.db = nilmdb.serializer.WrapObject(db)
cherrypy.config.update({
'server.socket_host': host,
'server.socket_port': port,
'engine.autoreload_on': False,
'environment': 'embedded',
})
cherrypy.tree.apps = {}
cherrypy.tree.mount(Root(self.db, self.version), "/")
cherrypy.tree.mount(Stream(self.db), "/stream")
if stoppable:
cherrypy.tree.mount(Exiter(), "/exit")

def start(self, blocking = False, event = None):

# Cherrypy stupidly calls os._exit(70) when it can't bind the
# port. At least try to print a reasonable error and continue
# in this case, rather than just dying silently (as we would
# otherwise do in embedded mode)
real_exit = os._exit
def fake_exit(code): # pragma: no cover
if code == os.EX_SOFTWARE:
fprintf(sys.stderr, "error: CherryPy called os._exit!\n")
else:
real_exit(code)
os._exit = fake_exit
cherrypy.engine.start()
os._exit = real_exit
if event is not None:
event.set()
if blocking:
cherrypy.engine.wait(cherrypy.engine.states.EXITING,
interval = 0.1, channel = 'main')

def stop(self):
cherrypy.engine.exit()

+ 0
- 46
nilmdb/test_fileinterval.py View File

@@ -1,46 +0,0 @@
from nilmdb import Interval, IntervalSet, IntervalError, FileInterval
from datetime import datetime
from nose.tools import assert_raises

from test_interval import iset

def fiset(string):
"""Like iset, but builds with FileIntervals instead of Intervals"""
iset = IntervalSet()
for i, c in enumerate(string):
day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
if (c == "["):
start = day
elif (c == "|"):
iset += FileInterval(start, day, "test.dat")
start = day
elif (c == "]"):
iset += FileInterval(start, day, "test.dat")
del start
return iset

def test_fileinterval_vs_interval():
"""Test FileInterval/Interval inheritance"""

i = iset("[--]")
f = fiset("[--]")

# check types
assert(isinstance(i[0], Interval))
assert(not isinstance(i[0], FileInterval))
assert(isinstance(f[0], Interval))
assert(isinstance(f[0], FileInterval))

# when doing an intersection, result should be a subset of the first arg
u = (i & f)
assert(isinstance(u[0], Interval))
assert(not isinstance(u[0], FileInterval))
u = (f & i)
assert(isinstance(u[0], Interval))
assert(isinstance(u[0], FileInterval))

# they're still the same though
assert(i == f == u)

# just for coverage
assert_raises(IntervalError, fiset("[]")[0].subset, f[0].start, f[0].end)

+ 0
- 188
nilmdb/test_interval.py View File

@@ -1,188 +0,0 @@
from nilmdb import Interval, IntervalSet, IntervalError
from datetime import datetime
from nose.tools import assert_raises
import itertools

def test_interval():
"""Test the Interval class"""
d1 = datetime.strptime("19801205","%Y%m%d")
d2 = datetime.strptime("19900216","%Y%m%d")
d3 = datetime.strptime("20111205","%Y%m%d")

# basic construction
i = Interval(d1, d1)
i = Interval(d1, d3)
assert(i.start == d1)
assert(i.end == d3)

# assignment should work
i.start = d2
try:
i.end = d1
raise Exception("should have died there")
except IntervalError:
pass
i.start = d1
i.end = d2

# end before start
assert_raises(IntervalError, Interval, d3, d1)

# wrong type
assert_raises(IntervalError, Interval, 1, 2)

# compare
assert(Interval(d1, d2) == Interval(d1, d2))
assert(Interval(d1, d2) < Interval(d1, d3))
assert(Interval(d1, d3) > Interval(d1, d2))
assert(Interval(d1, d2) < Interval(d2, d3))
assert(Interval(d1, d3) < Interval(d2, d3))
assert(Interval(d2, d2) > Interval(d1, d3))
assert(Interval(d3, d3) == Interval(d3, d3))
assert_raises(TypeError, cmp, i, 123)

# subset
assert(Interval(d1, d3).subset(d1, d2) == Interval(d1, d2))
assert_raises(IntervalError, Interval(d2, d3).subset, d1, d2)

# append
assert(Interval(d1, d2).is_adjacent(Interval(d2,d3)))
assert(Interval(d2, d3).is_adjacent(Interval(d1,d2)))
assert(not Interval(d2, d3).is_adjacent(Interval(d1,d3)))
assert_raises(TypeError, Interval(d1, d2).is_adjacent, 1)

# misc
assert(repr(i) == repr(eval(repr(i).replace("datetime.",""))))
assert(str(i) == "[1980-12-05 00:00:00 -> 1990-02-16 00:00:00]")

def test_interval_intersect():
"""Test Interval intersections"""
dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ] ]
perm = list(itertools.permutations(dates, 2))
prod = list(itertools.product(perm, perm))
should_intersect = {
False: [4, 5, 8, 20, 48, 56, 60, 96, 97, 100],
True: [0, 1, 2, 12, 13, 14, 16, 17, 24, 25, 26, 28, 29,
32, 49, 50, 52, 53, 61, 62, 64, 65, 68, 98, 101, 104]}
for i,((a,b),(c,d)) in enumerate(prod):
try:
i1 = Interval(a, b)
i2 = Interval(c, d)
assert(i1.intersects(i2) == i2.intersects(i1))
assert(i in should_intersect[i1.intersects(i2)])
except IntervalError:
assert(i not in should_intersect[True] and
i not in should_intersect[False])
assert_raises(TypeError, i1.intersects, 1234)

def test_intervalset_construct():
"""Test interval set construction"""
dates = [ datetime.strptime(year, "%y") for year in [ "00", "01", "02", "03" ]]

a = Interval(dates[0], dates[1])
b = Interval(dates[1], dates[2])
c = Interval(dates[0], dates[2])
d = Interval(dates[2], dates[3])

iseta = IntervalSet(a)
isetb = IntervalSet([a, b])
isetc = IntervalSet([a])
assert(iseta != isetb)
assert(iseta == isetc)
assert(iseta != 3)
assert(IntervalSet(a) != IntervalSet(b))
assert_raises(TypeError, cmp, iseta, isetb)
assert_raises(IntervalError, IntervalSet, [a, b, c])
assert_raises(TypeError, IntervalSet, [1, 2])

iset = IntervalSet(isetb) # test iterator
assert(iset == isetb)
assert(len(iset) == 2)
assert(len(IntervalSet()) == 0)

# Test adding
iset = IntervalSet(a)
iset += IntervalSet(b)
assert(iset == IntervalSet([a, b]))
iset = IntervalSet(a)
iset += b
assert(iset == IntervalSet([a, b]))
iset = IntervalSet(a) + IntervalSet(b)
assert(iset == IntervalSet([a, b]))
iset = IntervalSet(b) + a
assert(iset == IntervalSet([a, b]))

# A set consisting of [0-1],[1-2] should match a set consisting of [0-2]
assert(IntervalSet([a,b]) == IntervalSet([c]))
# Etc
assert(IntervalSet([a,d]) != IntervalSet([c]))
assert(IntervalSet([c]) != IntervalSet([a,d]))
assert(IntervalSet([c,d]) != IntervalSet([b,d]))
# misc
assert(repr(iset) == repr(eval(repr(iset).replace("datetime.",""))))

def iset(string):
"""Build an IntervalSet from a string, for testing purposes

Each character is a year
[ = interval start
| = interval end + adjacent start
] = interval end
anything else is ignored
"""
iset = IntervalSet()
for i, c in enumerate(string):
day = datetime.strptime("{0:04d}".format(i+2000), "%Y")
if (c == "["):
start = day
elif (c == "|"):
iset += Interval(start, day)
start = day
elif (c == "]"):
iset += Interval(start, day)
del start
return iset

def test_intervalset_iset():
"""Test basic iset construction"""
assert(iset(" [----] ") ==
iset(" [-|--] "))

assert(iset("[] [--] ") +
iset(" [] [--]") ==
iset("[|] [-----]"))

def test_intervalset_intsersect():
"""Test intersection (&)"""
assert_raises(TypeError, iset("[--]").__and__, 1234)
assert(iset("[---------]") &
iset(" [---] ") ==
iset(" [---] "))

assert(iset(" [---] ") &
iset("[---------]") ==
iset(" [---] "))

assert(iset(" [-----]") &
iset(" [-----] ") ==
iset(" [--] "))

assert(iset(" [---]") &
iset(" [--] ") ==
iset(" "))

assert(iset(" [-|---]") &
iset(" [-----|-] ") ==
iset(" [----] "))

assert(iset(" [-|-] ") &
iset(" [-|--|--] ") ==
iset(" [---] "))

assert(iset(" [----][--]") &
iset("[-] [--] []") ==
iset(" [] [-] []"))


+ 0
- 5
pytables-test/Makefile View File

@@ -1,5 +0,0 @@
all:
time python test-indexed-read.py

clean:
rm -f *pyc

+ 0
- 2
pytables-test/README.jim View File

@@ -1,2 +0,0 @@
New version from:
http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=641485#15

+ 0
- 12
pytables-test/TODO.txt View File

@@ -1,12 +0,0 @@
- Make CherryPy server that can handle simple GET/POST,
and a httplib client that can talk to that server.
Steps:
- Make server handle GET
- Make client send request, get response
- Add request streaming to server
- Add request streaming to client
- Make server handle POST
- Make client send request, get response
- Add request streaming to server
- Add request streaming to client
- Integrate into a server process that also keeps database open.

+ 0
- 3
pytables-test/indexing-notes View File

@@ -1,3 +0,0 @@
Indexing time64 doesn't seem to work -- needed to do "time >= 1243052015" even though the actual database times
should be something like 1243052015.847000. Let's switch to just using a 64-bit integer counting e.g.
microseconds since 1970-01-01

+ 0
- 3
pytables-test/sample-query View File

@@ -1,3 +0,0 @@
timestamp > 1243052015
took 394.5 minutes in vitables
(2340 rows matched)

+ 0
- 53
pytables-test/server.py View File

@@ -1,53 +0,0 @@
import sys
import tables
import nilmdb

try:
import cherrypy
cherrypy.tools.json_out
except:
sys.stderr.write("Cherrypy 3.2+ required\n")
sys.exit(1)

class NilmApp:
def __init__(self, db):
self.db = db

class Root(NilmApp):
"""NILM Database"""

server_version = "1.0"

@cherrypy.expose
def index(self):
raise cherrypy.NotFound()

@cherrypy.expose
def favicon_ico(self):
raise cherrypy.NotFound()

@cherrypy.expose
@cherrypy.tools.json_out()
def version(self):
return self.server_version

class Stream(NilmApp):
"""Stream operations"""

@cherrypy.expose
@cherrypy.tools.json_out()
def list(self):
return
cherrypy.config.update({
'server.socket_host': '127.0.0.1',
'server.socket_port': 12380
})

db = nilmdb.nilmdb()
cherrypy.tree.mount(Root(db), "/")
cherrypy.tree.mount(Stream(db), "/stream")

if __name__ == "__main__":
cherrypy.engine.start()
cherrypy.engine.block()

+ 0
- 16
pytables-test/speed-pytables.py View File

@@ -1,16 +0,0 @@
import tables
import numpy

class RawSample(tables.IsDescription):
timestamp = tables.UInt64Col()
voltage = tables.UInt16Col(shape = 3)
current = tables.UInt16Col(shape = 3)

h5file = tables.openFile("test.h5", mode = "w", title = "Test")
group = h5file.createGroup("/", "raw", "Raw Data")
table = h5file.createTable(group, "nilm1", RawSample, "NILM 1")

print repr(h5file)

# write rows


+ 0
- 54
pytables-test/test-indexing.py View File

@@ -1,54 +0,0 @@
#!/usr/bin/python

from tables import *
import re
import time

# A class to describe our data
class PrepData(IsDescription):
timestamp = Int64Col()
p1 = Float32Col()
q1 = Float32Col()
p3 = Float32Col()
q3 = Float32Col()
p5 = Float32Col()
q5 = Float32Col()
p7 = Float32Col()
q7 = Float32Col()

filename = "test.h5"
h5file = openFile(filename, mode = "w", title = "NILM Test")

group = h5file.createGroup("/", "newton", "Newton school")
table = h5file.createTable(group, "prep", PrepData, "Prep Data", expectedrows = 120 * 86400 * 90)

table.cols.timestamp.createIndex()

for i in range(0, 80):
# Open file
data = open("data/alldata")
count = 0
oldtime = time.time()
prep = table.row
for line in data:
count = count + 1
if count % 1000000 == 0:
print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
oldtime = time.time()
v = re.split('\s+', line)
prep['timestamp'] = int(v[0]) + 500000000 * i
prep['p1'] = v[1]
prep['q1'] = v[2]
prep['p3'] = v[3]
prep['q3'] = v[4]
prep['p5'] = v[5]
prep['q5'] = v[6]
prep['p7'] = v[7]
prep['q7'] = v[8]
prep.append()
data.close()

h5file.close()




+ 0
- 54
pytables-test/test-write.py View File

@@ -1,54 +0,0 @@
#!/usr/bin/python

from tables import *
import re
import time

# A class to describe our data
class PrepData(IsDescription):
timestamp = Time64Col()
p1 = Float32Col()
q1 = Float32Col()
p3 = Float32Col()
q3 = Float32Col()
p5 = Float32Col()
q5 = Float32Col()
p7 = Float32Col()
q7 = Float32Col()

filename = "test.h5"
h5file = openFile(filename, mode = "w", title = "NILM Test")

group = h5file.createGroup("/", "newton", "Newton school")
table = h5file.createTable(group, "prep", PrepData, "Prep Data")

table.cols.timestamp.createIndex()

for i in range(0, 80):
# Open file
data = open("data/alldata")
count = 0
oldtime = time.time()
prep = table.row
for line in data:
count = count + 1
if count % 1000000 == 0:
print str(i) + ": " + str((time.time() - oldtime)) + ", total " + str(count/1000000) + "m lines"
oldtime = time.time()
v = re.split('\s+', line)
prep['timestamp'] = float(v[0]) / 1000.0 + 500000 * i
prep['p1'] = v[1]
prep['q1'] = v[2]
prep['p3'] = v[3]
prep['q3'] = v[4]
prep['p5'] = v[5]
prep['q5'] = v[6]
prep['p7'] = v[7]
prep['q7'] = v[8]
prep.append()
data.close()

h5file.close()




+ 0
- 9
setup.py View File

@@ -1,9 +0,0 @@
#!/usr/bin/python

from distutils.core import setup

setup(name = 'nilmdb',
version = '1.0',
scripts = [ 'bin/nilm-test.py' ],
packages = [ 'nilmdb' ],
)

+ 0
- 5
test/Makefile View File

@@ -1,5 +0,0 @@
all:
python speed-readbinary.py

clean:
rm -f *pyc

+ 0
- 4
test/printf.py View File

@@ -1,4 +0,0 @@
from __future__ import print_function
def printf(str, *args):
print(str % args, end='')

+ 0
- 60
test/speed-readascii.py View File

@@ -1,60 +0,0 @@
#!/usr/bin/python

from printf import printf
import time
import re
import numpy as np
import itertools

class Timer():
def __init__(self, arg):
self.arg = arg
def __enter__(self): self.start = time.time()
def __exit__(self, *args): printf("%s: %f lines/sec\n", self.arg, 1e6 / (time.time() - self.start))

def test_split():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(i) for i in line.partition('#')[0].split() ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

def test_split2():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

def test_regex():
for n, line in enumerate(open('1m.raw', 'r')):
out = [0]*6
tmp = [ int(x) for x in re.findall('(\d+)\s+',line.partition('#')[0]) ]
out[0:len(tmp)] = tmp
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

def test_bigregex():
regex = re.compile('^(?:\s*)' + '(?:(\d+)\s+)?' * 6)
for n, line in enumerate(open('1m.raw', 'r')):
out = [ int(x or 0) for x in re.match(regex, line).groups() ]
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

with Timer("regex"):
test_regex() # 102k/sec

with Timer("split"):
test_split() # 175k/sec

with Timer("split2"):
test_split2() # 275k/sec

with Timer("bigregex"):
test_bigregex() # 110k/sec

# The "int" operation takes quite a while -- int(x,10) is twice as fast
# Perl does about 500k/sec


+ 0
- 74
test/speed-readbinary.py View File

@@ -1,74 +0,0 @@
#!/usr/bin/python

from printf import printf
import time
import re
import numpy as np
import itertools
import struct
import array
import os
import mmap

class Timer():
def __init__(self, arg):
self.arg = arg
def __enter__(self): self.start = time.time()
def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))

def test_struct1():
"""read with struct.unpack"""
f = open('1m.bin', 'rb')
f.seek(0,os.SEEK_END)
filesize = f.tell()
f.seek(0,os.SEEK_SET)
packer = struct.Struct('!dHHHHHH')
items = filesize / packer.size
for n in xrange(items):
s = f.read(packer.size)
out = packer.unpack(s)
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

def test_struct2():
"""read with struct.unpack, convert to string"""
f = open('1m.bin', 'rb')
f.seek(0,os.SEEK_END)
filesize = f.tell()
f.seek(0,os.SEEK_SET)
packer = struct.Struct('!dHHHHHH')
items = filesize / packer.size
for n in xrange(items):
s = f.read(packer.size)
out = packer.unpack(s)
x = str(out)
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

def test_mmap():
"""struct.unpack with mmap"""
with open('1m.bin', 'rb') as f:
f.seek(0,os.SEEK_END)
filesize = f.tell()
f.seek(0,os.SEEK_SET)
m = mmap.mmap(f.fileno(), filesize, access=mmap.ACCESS_READ)
packer = struct.Struct('!dHHHHHH')
items = filesize / packer.size
for n in xrange(items):
out = packer.unpack(m[packer.size*n : packer.size*(n+1)])
if (n % 100000 == 0):
printf("line %d = %s\n", n, str(out))

with Timer("mmap"):
test_mmap() # 1600k

with Timer("struct1"):
test_struct1() # 1460k

with Timer("struct2"):
test_struct2() # 210k

# Reading from the file is again much quicker than converting to string
# Use mmap, it's good



+ 0
- 76
test/speed-writebinary.py View File

@@ -1,76 +0,0 @@
#!/usr/bin/python

from printf import printf
import time
import re
import numpy as np
import itertools
import struct
import array

class Timer():
def __init__(self, arg):
self.arg = arg
def __enter__(self): self.start = time.time()
def __exit__(self, *args): printf("%s: %f klines/sec\n", self.arg, 1e3 / (time.time() - self.start))

def read_ascii():
for n in xrange(1000000):
yield (1234, 2345, 3456, 4576, 5678, 6789)
# for n, line in enumerate(open('1m.raw', 'r')):
# out = [0]*6
# tmp = [ int(i,10) for i in line.partition('#')[0].split() ]
# out[0:len(tmp)] = tmp
# if (n % 100000 == 0):
# printf("line %d = %s\n", n, str(out))
# yield out

def test_struct1():
"""write with struct.pack"""
f = open('1m.bin', 'wb')
for out in read_ascii():
s = struct.pack('!HHHHHH', *out)
f.write(s)

def test_struct2():
"""use constant format string"""
f = open('1m.bin', 'wb')
packer = struct.Struct('!HHHHHH')
for out in read_ascii():
f.write(packer.pack(*out))
f.close()
printf("size was %d\n", packer.size)

def test_struct3():
"""like struct1, with timestamp"""
f = open('1m.bin', 'wb')
for out in read_ascii():
s = struct.pack('!dHHHHHH', time.time(), *out)
f.write(s)

def test_struct4():
"""like struct2, with timestamp"""
f = open('1m.bin', 'wb')
packer = struct.Struct('!dHHHHHH')
for out in read_ascii():
f.write(packer.pack(time.time(), *out))
f.close()
printf("size was %d\n", packer.size)

#raise Exception('done')

with Timer("struct1"):
test_struct1() # 1089k

with Timer("struct2"):
test_struct2() # 1249k

with Timer("struct3"):
test_struct3() # 845k

with Timer("struct4"):
test_struct4() # 922k

# This seems fast enough for writing new data, since it's faster than
# we read ascii data anyway. Use e.g. struct4


+ 0
- 11
test/test-struct-pack.py View File

@@ -1,11 +0,0 @@
#!/usr/bin/python

import struct
import mmap

f = open("test.dat", "rb+")
mm = mmap.mmap(f.fileno(),3)

print len(mm)
print "first 3 bytes: " + mm[0:3];


+ 0
- 1
test/test.dat View File

@@ -1 +0,0 @@


+ 0
- 7
test/todo.md View File

@@ -1,7 +0,0 @@
- Have a class representing the file contents
- Looks like an array
- len(), get(), index
- some form of bisect search
- get_extents = return [0].timestamp, [-1].timestamp
-
- Can append? Sure, why not. Just write to the file, extend mmap accordingly.

+ 150
- 0
tests/test_nilmdb.py View File

@@ -0,0 +1,150 @@
import nilmdb

from nose.tools import *
from nose.tools import assert_raises
from distutils.version import StrictVersion as V
import json
import itertools
import os
import sys
import cherrypy
import threading
import urllib2
import Queue

testdb = "tests/test.db"

#@atexit.register
#def cleanup():
# os.unlink(testdb)

class Test00Nilmdb(object): # named 00 so it runs first
def test_NilmDB(self):
try:
os.unlink(testdb)
except:
pass

with assert_raises(IOError):
nilmdb.NilmDB("/nonexistant-db/foo")

db = nilmdb.NilmDB(testdb)
db.close()
db = nilmdb.NilmDB(testdb)
db.close()

def test_stream(self):
db = nilmdb.NilmDB(testdb)
eq_(db.stream_list(), [])

# Bad path
with assert_raises(ValueError):
db.stream_create("/foo", "PrepData")
# Bad layout type
with assert_raises(KeyError):
db.stream_create("/newton/prep", "NoSuchLayout")
# Bad index columns
with assert_raises(KeyError):
db.stream_create("/newton/prep", "PrepData", ["nonexistant"])
db.stream_create("/newton/prep", "PrepData")
db.stream_create("/newton/raw", "RawData")
db.stream_create("/newton/zzz/rawnotch", "RawNotchedData")

# Verify we got 3 streams
eq_(db.stream_list(), [ ("/newton/prep", "PrepData", 0),
("/newton/raw", "RawData", 0),
("/newton/zzz/rawnotch", "RawNotchedData", 0)
])
# Match just one type
eq_(db.stream_list("RawData"), [ ("/newton/raw", "RawData", 0) ])

# Verify returned types if a layout is missing
save = nilmdb.layout.layouts.copy()
del nilmdb.layout.layouts["RawData"]
eq_(db.stream_list(), [ ("/newton/prep", "PrepData", 0),
("/newton/raw", None, 0),
("/newton/zzz/rawnotch", "RawNotchedData", 0)
])
nilmdb.layout.layouts = save

# Verify that columns were made right
eq_(len(db.h5file.getNode("/newton/prep").cols), 9)
eq_(len(db.h5file.getNode("/newton/raw").cols), 7)
eq_(len(db.h5file.getNode("/newton/zzz/rawnotch").cols), 10)
assert(db.h5file.getNode("/newton/prep").colindexed["timestamp"])
assert(not db.h5file.getNode("/newton/prep").colindexed["p1"])
db.close()

class TestBlockingServer(object):
def setUp(self):
self.db = nilmdb.NilmDB(testdb)

def tearDown(self):
self.db.close()
def test_blocking_server(self):
# Start web app on a custom port
self.server = nilmdb.Server(self.db, host = "127.0.0.1",
port = 12380, stoppable = True)

# Run it
event = threading.Event()
def run_server():
self.server.start(blocking = True, event = event)
thread = threading.Thread(target = run_server)
thread.start()
event.wait(timeout = 2)
# Send request to exit.
req = urllib2.urlopen("http://127.0.0.1:12380/exit/", timeout = 1)

# Wait for it
thread.join()
def geturl(path):
req = urllib2.urlopen("http://127.0.0.1:12380" + path, timeout = 10)
return req.read()

def getjson(path):
return json.loads(geturl(path))

class TestServer(object):

def setUp(self):
# Start web app on a custom port
self.db = nilmdb.NilmDB(testdb)
self.server = nilmdb.Server(self.db, host = "127.0.0.1",
port = 12380, stoppable = False)
self.server.start(blocking = False)

def tearDown(self):
# Close web app
self.server.stop()
self.db.close()

def test_server(self):
# Make sure we can't force an exit, and test other 404 errors
for url in [ "/exit", "/", "/favicon.ico" ]:
with assert_raises(urllib2.HTTPError) as e:
geturl(url)
eq_(e.exception.code, 404)

# Check version
eq_(V(getjson("/version")), V(self.server.version))

def test_stream_list(self):
# Known streams that got populated by an earlier test (test_nilmdb)
streams = getjson("/stream/list")

eq_(streams, [
['/newton/prep', 'PrepData', 0],
['/newton/raw', 'RawData', 0],
['/newton/zzz/rawnotch', 'RawNotchedData', 0],
])

streams = getjson("/stream/list?layout=RawData")
eq_(streams, [['/newton/raw', 'RawData', 0]])

streams = getjson("/stream/list?layout=NoSuchLayout")
eq_(streams, [])


+ 25
- 0
tests/test_printf.py View File

@@ -0,0 +1,25 @@
import nilmdb
from nilmdb.printf import *

from nose.tools import *
from nose.tools import assert_raises
from cStringIO import StringIO
import sys

class TestPrintf(object):
def test_printf(self):
old_stdout = sys.stdout
sys.stdout = test1 = StringIO()
test2 = StringIO()
test3 = ""
try:
printf("hello, world: %d", 123)
fprintf(test2, "hello too: %d", 123)
test3 = sprintf("hello three: %d", 123)
except:
sys.stdout = old_stdout
raise
sys.stdout = old_stdout
eq_(test1.getvalue(), "hello, world: 123")
eq_(test2.getvalue(), "hello too: 123")
eq_(test3, "hello three: 123")

+ 70
- 0
tests/test_serializer.py View File

@@ -0,0 +1,70 @@
import nilmdb
from nilmdb.printf import *

import nose
from nose.tools import *
from nose.tools import assert_raises
import threading
import time

#raise nose.exc.SkipTest("Skip these")

class Foo(object):
val = 0

def fail(self):
raise Exception("you asked me to do this")

def test(self, debug = False):
# purposely not thread-safe
oldval = self.val
newval = oldval + 1
time.sleep(0.05)
self.val = newval
if debug:
printf("[%s] value changed: %d -> %d\n",
threading.current_thread().name, oldval, newval)

class Base(object):

def test_wrapping(self):
self.foo.test()
with assert_raises(Exception):
self.foo.fail()

def test_threaded(self):
def func(foo):
foo.test()
threads = []
for i in xrange(20):
threads.append(threading.Thread(target = func, args = (self.foo,)))
for t in threads:
t.start()
for t in threads:
t.join()
self.verify_result()

class TestUnserialized(Base):
def setUp(self):
self.foo = Foo()

def verify_result(self):
# This should have failed to increment properly
assert(self.foo.val != 20)

class TestSerialized(Base):
def setUp(self):
self.realfoo = Foo()
self.foo = nilmdb.serializer.WrapObject(self.realfoo)

def tearDown(self):
del self.foo

def verify_result(self):
# This should have worked
eq_(self.realfoo.val, 20)

def test_attribute(self):
# Can't wrap attributes yet
with assert_raises(TypeError):
self.foo.val

+ 0
- 7
todo.txt View File

@@ -1,7 +0,0 @@
- test performance of binary storage
- write
- read
- seek to timestamp?

- implement binary storage classes
- fileinterval, binaryfileinterval, etc?

Loading…
Cancel
Save