Compare commits
6 Commits
nilmdb-1.4
...
nilmdb-1.4
Author | SHA1 | Date | |
---|---|---|---|
7429c1788d | |||
0ef71c193b | |||
4a50dd015e | |||
22274550ab | |||
4f06d6ae68 | |||
c54d8041c3 |
@@ -186,6 +186,19 @@ IntervalSet speed
|
|||||||
- rbtree and interval converted to cython:
|
- rbtree and interval converted to cython:
|
||||||
8.4 μS, total 12 s, 134 MB RAM
|
8.4 μS, total 12 s, 134 MB RAM
|
||||||
|
|
||||||
|
- Would like to move Interval itself back to Python so other
|
||||||
|
non-cythonized code like client code can use it more easily.
|
||||||
|
Testing speed with just `test_interval` being tested, with
|
||||||
|
`range(5,22)`, using `/usr/bin/time -v python tests/runtests.py`,
|
||||||
|
times recorded for 2097152:
|
||||||
|
- 52ae397 (Interval in cython):
|
||||||
|
12.6133 μs each, ratio 0.866533, total 47 sec, 399 MB RAM
|
||||||
|
- 9759dcf (Interval in python):
|
||||||
|
21.2937 μs each, ratio 1.462870, total 83 sec, 1107 MB RAM
|
||||||
|
That's a huge difference! Instead, will keep Interval and DBInterval
|
||||||
|
cythonized inside nilmdb, and just have an additional copy in
|
||||||
|
nilmdb.utils for clients to use.
|
||||||
|
|
||||||
Layouts
|
Layouts
|
||||||
-------
|
-------
|
||||||
Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
|
Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
|
||||||
|
@@ -1,5 +1,9 @@
|
|||||||
"""Interval, IntervalSet
|
"""Interval, IntervalSet
|
||||||
|
|
||||||
|
The Interval implemented here is just like
|
||||||
|
nilmdb.utils.interval.Interval, except implemented in Cython for
|
||||||
|
speed.
|
||||||
|
|
||||||
Represents an interval of time, and a set of such intervals.
|
Represents an interval of time, and a set of such intervals.
|
||||||
|
|
||||||
Intervals are half-open, ie. they include data points with timestamps
|
Intervals are half-open, ie. they include data points with timestamps
|
||||||
@@ -23,6 +27,7 @@ from ..utils.time import min_timestamp as nilmdb_min_timestamp
|
|||||||
from ..utils.time import max_timestamp as nilmdb_max_timestamp
|
from ..utils.time import max_timestamp as nilmdb_max_timestamp
|
||||||
from ..utils.time import timestamp_to_string
|
from ..utils.time import timestamp_to_string
|
||||||
from ..utils.iterator import imerge
|
from ..utils.iterator import imerge
|
||||||
|
from ..utils.interval import IntervalError
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
cimport rbtree
|
cimport rbtree
|
||||||
@@ -30,10 +35,6 @@ from libc.stdint cimport uint64_t, int64_t
|
|||||||
|
|
||||||
ctypedef int64_t timestamp_t
|
ctypedef int64_t timestamp_t
|
||||||
|
|
||||||
class IntervalError(Exception):
|
|
||||||
"""Error due to interval overlap, etc"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
cdef class Interval:
|
cdef class Interval:
|
||||||
"""Represents an interval of time."""
|
"""Represents an interval of time."""
|
||||||
|
|
||||||
@@ -59,17 +60,7 @@ cdef class Interval:
|
|||||||
|
|
||||||
def __cmp__(self, Interval other):
|
def __cmp__(self, Interval other):
|
||||||
"""Compare two intervals. If non-equal, order by start then end"""
|
"""Compare two intervals. If non-equal, order by start then end"""
|
||||||
if not isinstance(other, Interval):
|
return cmp(self.start, other.start) or cmp(self.end, other.end)
|
||||||
raise TypeError("bad type")
|
|
||||||
if self.start == other.start:
|
|
||||||
if self.end < other.end:
|
|
||||||
return -1
|
|
||||||
if self.end > other.end:
|
|
||||||
return 1
|
|
||||||
return 0
|
|
||||||
if self.start < other.start:
|
|
||||||
return -1
|
|
||||||
return 1
|
|
||||||
|
|
||||||
cpdef intersects(self, Interval other):
|
cpdef intersects(self, Interval other):
|
||||||
"""Return True if two Interval objects intersect"""
|
"""Return True if two Interval objects intersect"""
|
||||||
@@ -313,63 +304,6 @@ cdef class IntervalSet:
|
|||||||
else:
|
else:
|
||||||
yield subset
|
yield subset
|
||||||
|
|
||||||
def set_difference(self, IntervalSet other not None,
|
|
||||||
Interval bounds = None):
|
|
||||||
"""
|
|
||||||
Compute the difference (self \\ other) between this
|
|
||||||
IntervalSet and the given IntervalSet; i.e., the ranges
|
|
||||||
that are present in 'self' but not 'other'.
|
|
||||||
|
|
||||||
If 'bounds' is not None, results are limited to the range
|
|
||||||
specified by the interval 'bounds'.
|
|
||||||
|
|
||||||
Returns a generator that yields each interval in turn.
|
|
||||||
Output intervals are built as subsets of the intervals in the
|
|
||||||
first argument (self).
|
|
||||||
"""
|
|
||||||
# Iterate through all starts and ends in sorted order. Add a
|
|
||||||
# tag to the iterator so that we can figure out which one they
|
|
||||||
# were, after sorting.
|
|
||||||
def decorate(it, key_start, key_end):
|
|
||||||
for i in it:
|
|
||||||
yield i.start, key_start, i
|
|
||||||
yield i.end, key_end, i
|
|
||||||
if bounds is None:
|
|
||||||
bounds = Interval(nilmdb_min_timestamp,
|
|
||||||
nilmdb_max_timestamp)
|
|
||||||
self_iter = decorate(self.intersection(bounds), 0, 2)
|
|
||||||
other_iter = decorate(other.intersection(bounds), 1, 3)
|
|
||||||
|
|
||||||
# Now iterate over the timestamps of each start and end.
|
|
||||||
# At each point, evaluate which type of end it is, to determine
|
|
||||||
# how to build up the output intervals.
|
|
||||||
self_interval = None
|
|
||||||
other_interval = None
|
|
||||||
out_start = None
|
|
||||||
for (ts, k, i) in imerge(self_iter, other_iter):
|
|
||||||
if k == 0:
|
|
||||||
# start self interval
|
|
||||||
self_interval = i
|
|
||||||
if other_interval is None:
|
|
||||||
out_start = ts
|
|
||||||
elif k == 1:
|
|
||||||
# start other interval
|
|
||||||
other_interval = i
|
|
||||||
if out_start is not None and out_start != ts:
|
|
||||||
yield self_interval.subset(out_start, ts)
|
|
||||||
out_start = None
|
|
||||||
elif k == 2:
|
|
||||||
# end self interval
|
|
||||||
if out_start is not None and out_start != ts:
|
|
||||||
yield self_interval.subset(out_start, ts)
|
|
||||||
out_start = None
|
|
||||||
self_interval = None
|
|
||||||
elif k == 3:
|
|
||||||
# end other interval
|
|
||||||
other_interval = None
|
|
||||||
if self_interval:
|
|
||||||
out_start = ts
|
|
||||||
|
|
||||||
cpdef intersects(self, Interval other):
|
cpdef intersects(self, Interval other):
|
||||||
"""Return True if this IntervalSet intersects another interval"""
|
"""Return True if this IntervalSet intersects another interval"""
|
||||||
for n in self.tree.intersect(other.start, other.end):
|
for n in self.tree.intersect(other.start, other.end):
|
||||||
|
@@ -1,204 +0,0 @@
|
|||||||
# cython: profile=False
|
|
||||||
|
|
||||||
import time
|
|
||||||
import sys
|
|
||||||
import inspect
|
|
||||||
import cStringIO
|
|
||||||
|
|
||||||
from ..utils.time import min_timestamp as nilmdb_min_timestamp
|
|
||||||
|
|
||||||
cdef enum:
|
|
||||||
max_value_count = 64
|
|
||||||
|
|
||||||
cimport cython
|
|
||||||
cimport libc.stdlib
|
|
||||||
cimport libc.stdio
|
|
||||||
cimport libc.string
|
|
||||||
|
|
||||||
class ParserError(Exception):
|
|
||||||
def __init__(self, line, message):
|
|
||||||
self.message = "line " + str(line) + ": " + message
|
|
||||||
Exception.__init__(self, self.message)
|
|
||||||
|
|
||||||
class FormatterError(Exception):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class Layout:
|
|
||||||
"""Represents a NILM database layout"""
|
|
||||||
|
|
||||||
def __init__(self, typestring):
|
|
||||||
"""Initialize this Layout object to handle the specified
|
|
||||||
type string"""
|
|
||||||
try:
|
|
||||||
[ datatype, count ] = typestring.split("_")
|
|
||||||
except:
|
|
||||||
raise KeyError("invalid layout string")
|
|
||||||
|
|
||||||
try:
|
|
||||||
self.count = int(count)
|
|
||||||
except ValueError:
|
|
||||||
raise KeyError("invalid count")
|
|
||||||
if self.count < 1 or self.count > max_value_count:
|
|
||||||
raise KeyError("invalid count")
|
|
||||||
|
|
||||||
if datatype == 'uint16':
|
|
||||||
self.parse = self.parse_uint16
|
|
||||||
self.format_str = "%.6f" + " %d" * self.count
|
|
||||||
self.format = self.format_generic
|
|
||||||
elif datatype == 'float32':
|
|
||||||
self.parse = self.parse_float64
|
|
||||||
self.format_str = "%.6f" + " %.6e" * self.count
|
|
||||||
self.format = self.format_generic
|
|
||||||
elif datatype == 'float64':
|
|
||||||
self.parse = self.parse_float64
|
|
||||||
self.format_str = "%.6f" + " %.16e" * self.count
|
|
||||||
self.format = self.format_generic
|
|
||||||
else:
|
|
||||||
raise KeyError("invalid type")
|
|
||||||
|
|
||||||
self.datatype = datatype
|
|
||||||
|
|
||||||
# Parsers
|
|
||||||
def parse_float64(self, char *text):
|
|
||||||
cdef int n
|
|
||||||
cdef double ts
|
|
||||||
# Return doubles even in float32 case, since they're going into
|
|
||||||
# a Python array which would upconvert to double anyway.
|
|
||||||
result = [0] * (self.count + 1)
|
|
||||||
cdef char *end
|
|
||||||
ts = libc.stdlib.strtod(text, &end)
|
|
||||||
if end == text:
|
|
||||||
raise ValueError("bad timestamp")
|
|
||||||
result[0] = ts
|
|
||||||
for n in range(self.count):
|
|
||||||
text = end
|
|
||||||
result[n+1] = libc.stdlib.strtod(text, &end)
|
|
||||||
if end == text:
|
|
||||||
raise ValueError("wrong number of values")
|
|
||||||
n = 0
|
|
||||||
while end[n] == ' ':
|
|
||||||
n += 1
|
|
||||||
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
|
|
||||||
raise ValueError("extra data on line")
|
|
||||||
return (ts, result)
|
|
||||||
|
|
||||||
def parse_uint16(self, char *text):
|
|
||||||
cdef int n
|
|
||||||
cdef double ts
|
|
||||||
cdef int v
|
|
||||||
cdef char *end
|
|
||||||
result = [0] * (self.count + 1)
|
|
||||||
ts = libc.stdlib.strtod(text, &end)
|
|
||||||
if end == text:
|
|
||||||
raise ValueError("bad timestamp")
|
|
||||||
result[0] = ts
|
|
||||||
for n in range(self.count):
|
|
||||||
text = end
|
|
||||||
v = libc.stdlib.strtol(text, &end, 10)
|
|
||||||
if v < 0 or v > 65535:
|
|
||||||
raise ValueError("value out of range")
|
|
||||||
result[n+1] = v
|
|
||||||
if end == text:
|
|
||||||
raise ValueError("wrong number of values")
|
|
||||||
n = 0
|
|
||||||
while end[n] == ' ':
|
|
||||||
n += 1
|
|
||||||
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
|
|
||||||
raise ValueError("extra data on line")
|
|
||||||
return (ts, result)
|
|
||||||
|
|
||||||
# Formatters
|
|
||||||
def format_generic(self, d):
|
|
||||||
n = len(d) - 1
|
|
||||||
if n != self.count:
|
|
||||||
raise ValueError("wrong number of values for layout type: "
|
|
||||||
"got %d, wanted %d" % (n, self.count))
|
|
||||||
return (self.format_str % tuple(d)) + "\n"
|
|
||||||
|
|
||||||
# Get a layout by name
|
|
||||||
def get_named(typestring):
|
|
||||||
try:
|
|
||||||
return Layout(typestring)
|
|
||||||
except KeyError:
|
|
||||||
compat = { "PrepData": "float32_8",
|
|
||||||
"RawData": "uint16_6",
|
|
||||||
"RawNotchedData": "uint16_9" }
|
|
||||||
return Layout(compat[typestring])
|
|
||||||
|
|
||||||
class Parser(object):
|
|
||||||
"""Object that parses and stores ASCII data for inclusion into the
|
|
||||||
database"""
|
|
||||||
|
|
||||||
def __init__(self, layout):
|
|
||||||
if issubclass(layout.__class__, Layout):
|
|
||||||
self.layout = layout
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
self.layout = get_named(layout)
|
|
||||||
except KeyError:
|
|
||||||
raise TypeError("unknown layout")
|
|
||||||
|
|
||||||
self.data = []
|
|
||||||
self.min_timestamp = None
|
|
||||||
self.max_timestamp = None
|
|
||||||
|
|
||||||
def parse(self, textdata):
|
|
||||||
"""
|
|
||||||
Parse the data, provided as lines of text, using the current
|
|
||||||
layout, into an internal data structure suitable for a
|
|
||||||
pytables 'table.append(parser.data)'.
|
|
||||||
"""
|
|
||||||
cdef double last_ts = nilmdb_min_timestamp
|
|
||||||
cdef double ts
|
|
||||||
cdef int n = 0, i
|
|
||||||
cdef char *line
|
|
||||||
|
|
||||||
indata = cStringIO.StringIO(textdata)
|
|
||||||
# Assume any parsing error is a real error.
|
|
||||||
# In the future we might want to skip completely empty lines,
|
|
||||||
# or partial lines right before EOF?
|
|
||||||
try:
|
|
||||||
self.data = []
|
|
||||||
for pyline in indata:
|
|
||||||
line = pyline
|
|
||||||
n += 1
|
|
||||||
if line[0] == '\#':
|
|
||||||
continue
|
|
||||||
(ts, row) = self.layout.parse(line)
|
|
||||||
if ts <= last_ts:
|
|
||||||
raise ValueError("timestamp is not "
|
|
||||||
"monotonically increasing")
|
|
||||||
last_ts = ts
|
|
||||||
self.data.append(row)
|
|
||||||
except (ValueError, IndexError, TypeError) as e:
|
|
||||||
raise ParserError(n, "error: " + e.message)
|
|
||||||
|
|
||||||
# Mark timestamp ranges
|
|
||||||
if len(self.data):
|
|
||||||
self.min_timestamp = self.data[0][0]
|
|
||||||
self.max_timestamp = self.data[-1][0]
|
|
||||||
|
|
||||||
class Formatter(object):
|
|
||||||
"""Object that formats database data into ASCII"""
|
|
||||||
|
|
||||||
def __init__(self, layout):
|
|
||||||
if issubclass(layout.__class__, Layout):
|
|
||||||
self.layout = layout
|
|
||||||
else:
|
|
||||||
try:
|
|
||||||
self.layout = get_named(layout)
|
|
||||||
except KeyError:
|
|
||||||
raise TypeError("unknown layout")
|
|
||||||
|
|
||||||
def format(self, data):
|
|
||||||
"""
|
|
||||||
Format raw data from the database, using the current layout,
|
|
||||||
as lines of ACSII text.
|
|
||||||
"""
|
|
||||||
text = cStringIO.StringIO()
|
|
||||||
try:
|
|
||||||
for row in data:
|
|
||||||
text.write(self.layout.format(row))
|
|
||||||
except (ValueError, IndexError, TypeError) as e:
|
|
||||||
raise FormatterError("formatting error: " + e.message)
|
|
||||||
return text.getvalue()
|
|
@@ -12,8 +12,10 @@ Manages both the SQL database and the table storage backend.
|
|||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
import nilmdb.utils
|
import nilmdb.utils
|
||||||
from nilmdb.utils.printf import *
|
from nilmdb.utils.printf import *
|
||||||
from nilmdb.server.interval import (Interval, DBInterval,
|
|
||||||
IntervalSet, IntervalError)
|
from nilmdb.utils.interval import IntervalError
|
||||||
|
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
|
||||||
|
|
||||||
from nilmdb.server import bulkdata
|
from nilmdb.server import bulkdata
|
||||||
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
from nilmdb.server.errors import NilmDBError, StreamError, OverlapError
|
||||||
|
|
||||||
@@ -328,7 +330,7 @@ class NilmDB(object):
|
|||||||
List all intervals in 'path' between 'start' and 'end'. If
|
List all intervals in 'path' between 'start' and 'end'. If
|
||||||
'diffpath' is not none, list instead the set-difference
|
'diffpath' is not none, list instead the set-difference
|
||||||
between the intervals in the two streams; i.e. all interval
|
between the intervals in the two streams; i.e. all interval
|
||||||
ranges that are present in 'path' but not 'path2'.
|
ranges that are present in 'path' but not 'diffpath'.
|
||||||
|
|
||||||
Returns (intervals, restart) tuple.
|
Returns (intervals, restart) tuple.
|
||||||
|
|
||||||
@@ -350,7 +352,9 @@ class NilmDB(object):
|
|||||||
requested = Interval(start, end)
|
requested = Interval(start, end)
|
||||||
result = []
|
result = []
|
||||||
if diffpath:
|
if diffpath:
|
||||||
getter = intervals.set_difference(diffintervals, requested)
|
getter = nilmdb.utils.interval.set_difference(
|
||||||
|
intervals.intersection(requested),
|
||||||
|
diffintervals.intersection(requested))
|
||||||
else:
|
else:
|
||||||
getter = intervals.intersection(requested)
|
getter = intervals.intersection(requested)
|
||||||
for n, i in enumerate(getter):
|
for n, i in enumerate(getter):
|
||||||
|
@@ -11,3 +11,4 @@ import nilmdb.utils.threadsafety
|
|||||||
import nilmdb.utils.fallocate
|
import nilmdb.utils.fallocate
|
||||||
import nilmdb.utils.time
|
import nilmdb.utils.time
|
||||||
import nilmdb.utils.iterator
|
import nilmdb.utils.iterator
|
||||||
|
import nilmdb.utils.interval
|
||||||
|
106
nilmdb/utils/interval.py
Normal file
106
nilmdb/utils/interval.py
Normal file
@@ -0,0 +1,106 @@
|
|||||||
|
"""Interval. Like nilmdb.server.interval, but re-implemented here
|
||||||
|
in plain Python so clients have easier access to it.
|
||||||
|
|
||||||
|
Intervals are half-open, ie. they include data points with timestamps
|
||||||
|
[start, end)
|
||||||
|
"""
|
||||||
|
|
||||||
|
import nilmdb.utils.time
|
||||||
|
import nilmdb.utils.iterator
|
||||||
|
|
||||||
|
class IntervalError(Exception):
|
||||||
|
"""Error due to interval overlap, etc"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Interval
|
||||||
|
class Interval:
|
||||||
|
"""Represents an interval of time."""
|
||||||
|
|
||||||
|
def __init__(self, start, end):
|
||||||
|
"""
|
||||||
|
'start' and 'end' are arbitrary numbers that represent time
|
||||||
|
"""
|
||||||
|
if start >= end:
|
||||||
|
# Explicitly disallow zero-width intervals (since they're half-open)
|
||||||
|
raise IntervalError("start %s must precede end %s" % (start, end))
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
s = repr(self.start) + ", " + repr(self.end)
|
||||||
|
return self.__class__.__name__ + "(" + s + ")"
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return ("[" + nilmdb.utils.time.timestamp_to_string(self.start) +
|
||||||
|
" -> " + nilmdb.utils.time.timestamp_to_string(self.end) + ")")
|
||||||
|
|
||||||
|
def __cmp__(self, other):
|
||||||
|
"""Compare two intervals. If non-equal, order by start then end"""
|
||||||
|
return cmp(self.start, other.start) or cmp(self.end, other.end)
|
||||||
|
|
||||||
|
def intersects(self, other):
|
||||||
|
"""Return True if two Interval objects intersect"""
|
||||||
|
if not isinstance(other, Interval):
|
||||||
|
raise TypeError("need an Interval")
|
||||||
|
if self.end <= other.start or self.start >= other.end:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def subset(self, start, end):
|
||||||
|
"""Return a new Interval that is a subset of this one"""
|
||||||
|
# A subclass that tracks additional data might override this.
|
||||||
|
if start < self.start or end > self.end:
|
||||||
|
raise IntervalError("not a subset")
|
||||||
|
return Interval(start, end)
|
||||||
|
|
||||||
|
def set_difference(a, b):
|
||||||
|
"""
|
||||||
|
Compute the difference (a \\ b) between the intervals in 'a' and
|
||||||
|
the intervals in 'b'; i.e., the ranges that are present in 'self'
|
||||||
|
but not 'other'.
|
||||||
|
|
||||||
|
'a' and 'b' must both be iterables.
|
||||||
|
|
||||||
|
Returns a generator that yields each interval in turn.
|
||||||
|
Output intervals are built as subsets of the intervals in the
|
||||||
|
first argument (a).
|
||||||
|
"""
|
||||||
|
# Iterate through all starts and ends in sorted order. Add a
|
||||||
|
# tag to the iterator so that we can figure out which one they
|
||||||
|
# were, after sorting.
|
||||||
|
def decorate(it, key_start, key_end):
|
||||||
|
for i in it:
|
||||||
|
yield i.start, key_start, i
|
||||||
|
yield i.end, key_end, i
|
||||||
|
a_iter = decorate(iter(a), 0, 2)
|
||||||
|
b_iter = decorate(iter(b), 1, 3)
|
||||||
|
|
||||||
|
# Now iterate over the timestamps of each start and end.
|
||||||
|
# At each point, evaluate which type of end it is, to determine
|
||||||
|
# how to build up the output intervals.
|
||||||
|
a_interval = None
|
||||||
|
b_interval = None
|
||||||
|
out_start = None
|
||||||
|
for (ts, k, i) in nilmdb.utils.iterator.imerge(a_iter, b_iter):
|
||||||
|
if k == 0:
|
||||||
|
# start a interval
|
||||||
|
a_interval = i
|
||||||
|
if b_interval is None:
|
||||||
|
out_start = ts
|
||||||
|
elif k == 1:
|
||||||
|
# start b interval
|
||||||
|
b_interval = i
|
||||||
|
if out_start is not None and out_start != ts:
|
||||||
|
yield a_interval.subset(out_start, ts)
|
||||||
|
out_start = None
|
||||||
|
elif k == 2:
|
||||||
|
# end a interval
|
||||||
|
if out_start is not None and out_start != ts:
|
||||||
|
yield a_interval.subset(out_start, ts)
|
||||||
|
out_start = None
|
||||||
|
a_interval = None
|
||||||
|
elif k == 3:
|
||||||
|
# end b interval
|
||||||
|
b_interval = None
|
||||||
|
if a_interval:
|
||||||
|
out_start = ts
|
@@ -1,5 +1,6 @@
|
|||||||
from nilmdb.utils import datetime_tz
|
from nilmdb.utils import datetime_tz
|
||||||
import re
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
# Range
|
# Range
|
||||||
min_timestamp = (-2**63)
|
min_timestamp = (-2**63)
|
||||||
@@ -36,6 +37,7 @@ def unix_to_timestamp(unix):
|
|||||||
"""Convert a Unix timestamp (floating point seconds since epoch)
|
"""Convert a Unix timestamp (floating point seconds since epoch)
|
||||||
into a NILM timestamp (integer microseconds since epoch)"""
|
into a NILM timestamp (integer microseconds since epoch)"""
|
||||||
return int(round(unix * 1e6))
|
return int(round(unix * 1e6))
|
||||||
|
seconds_to_timestamp = unix_to_timestamp
|
||||||
|
|
||||||
def timestamp_to_unix(timestamp):
|
def timestamp_to_unix(timestamp):
|
||||||
"""Convert a NILM timestamp (integer microseconds since epoch)
|
"""Convert a NILM timestamp (integer microseconds since epoch)
|
||||||
@@ -118,4 +120,4 @@ def parse_time(toparse):
|
|||||||
|
|
||||||
def now():
|
def now():
|
||||||
"""Return current timestamp"""
|
"""Return current timestamp"""
|
||||||
return unix_to_timestamp(datetime_tz.datetime_tz.utcnow().totimestamp())
|
return unix_to_timestamp(time.time())
|
||||||
|
1
setup.py
1
setup.py
@@ -43,7 +43,6 @@ except: pass
|
|||||||
|
|
||||||
# Use Cython if it's new enough, otherwise use preexisting C files.
|
# Use Cython if it's new enough, otherwise use preexisting C files.
|
||||||
cython_modules = [ 'nilmdb.server.interval',
|
cython_modules = [ 'nilmdb.server.interval',
|
||||||
'nilmdb.server.layout',
|
|
||||||
'nilmdb.server.rbtree' ]
|
'nilmdb.server.rbtree' ]
|
||||||
try:
|
try:
|
||||||
import Cython
|
import Cython
|
||||||
|
@@ -7,7 +7,6 @@ test_serializer.py
|
|||||||
test_iteratorizer.py
|
test_iteratorizer.py
|
||||||
|
|
||||||
test_timestamper.py
|
test_timestamper.py
|
||||||
test_layout.py
|
|
||||||
test_rbtree.py
|
test_rbtree.py
|
||||||
test_interval.py
|
test_interval.py
|
||||||
|
|
||||||
|
@@ -8,8 +8,11 @@ from nose.tools import *
|
|||||||
from nose.tools import assert_raises
|
from nose.tools import assert_raises
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from nilmdb.server.interval import (Interval, DBInterval,
|
from nilmdb.utils.interval import IntervalError
|
||||||
IntervalSet, IntervalError)
|
from nilmdb.server.interval import Interval, DBInterval, IntervalSet
|
||||||
|
|
||||||
|
# so we can test them separately
|
||||||
|
from nilmdb.utils.interval import Interval as UtilsInterval
|
||||||
|
|
||||||
from testutil.helpers import *
|
from testutil.helpers import *
|
||||||
import unittest
|
import unittest
|
||||||
@@ -47,6 +50,15 @@ def makeset(string):
|
|||||||
return iset
|
return iset
|
||||||
|
|
||||||
class TestInterval:
|
class TestInterval:
|
||||||
|
def test_client_interval(self):
|
||||||
|
# Run interval tests against the Python version of Interval.
|
||||||
|
global Interval
|
||||||
|
NilmdbInterval = Interval
|
||||||
|
Interval = UtilsInterval
|
||||||
|
self.test_interval()
|
||||||
|
self.test_interval_intersect()
|
||||||
|
Interval = NilmdbInterval
|
||||||
|
|
||||||
def test_interval(self):
|
def test_interval(self):
|
||||||
# Test Interval class
|
# Test Interval class
|
||||||
os.environ['TZ'] = "America/New_York"
|
os.environ['TZ'] = "America/New_York"
|
||||||
@@ -222,7 +234,7 @@ class TestInterval:
|
|||||||
eq_(ab,c)
|
eq_(ab,c)
|
||||||
|
|
||||||
# a \ b == d
|
# a \ b == d
|
||||||
eq_(IntervalSet(a.set_difference(b)), d)
|
eq_(IntervalSet(nilmdb.utils.interval.set_difference(a,b)), d)
|
||||||
|
|
||||||
# Intersection with intervals
|
# Intersection with intervals
|
||||||
do_test(makeset("[---|---)[)"),
|
do_test(makeset("[---|---)[)"),
|
||||||
@@ -287,10 +299,11 @@ class TestInterval:
|
|||||||
b = makeset("[-) [--) [)")
|
b = makeset("[-) [--) [)")
|
||||||
c = makeset("[----) ")
|
c = makeset("[----) ")
|
||||||
d = makeset(" [-) ")
|
d = makeset(" [-) ")
|
||||||
eq_(a.set_difference(b, list(c)[0]), d)
|
eq_(nilmdb.utils.interval.set_difference(
|
||||||
|
a.intersection(list(c)[0]), b.intersection(list(c)[0])), d)
|
||||||
|
|
||||||
# Empty second set
|
# Empty second set
|
||||||
eq_(a.set_difference(IntervalSet()), a)
|
eq_(nilmdb.utils.interval.set_difference(a, IntervalSet()), a)
|
||||||
|
|
||||||
class TestIntervalDB:
|
class TestIntervalDB:
|
||||||
def test_dbinterval(self):
|
def test_dbinterval(self):
|
||||||
@@ -379,7 +392,7 @@ class TestIntervalSpeed:
|
|||||||
print
|
print
|
||||||
yappi.start()
|
yappi.start()
|
||||||
speeds = {}
|
speeds = {}
|
||||||
limit = 10 # was 20
|
limit = 22 # was 20
|
||||||
for j in [ 2**x for x in range(5,limit) ]:
|
for j in [ 2**x for x in range(5,limit) ]:
|
||||||
start = time.time()
|
start = time.time()
|
||||||
iset = IntervalSet()
|
iset = IntervalSet()
|
||||||
|
@@ -1,266 +0,0 @@
|
|||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import nilmdb
|
|
||||||
|
|
||||||
from nilmdb.utils.printf import *
|
|
||||||
|
|
||||||
from nose.tools import *
|
|
||||||
from nose.tools import assert_raises
|
|
||||||
import distutils.version
|
|
||||||
import itertools
|
|
||||||
import os
|
|
||||||
import sys
|
|
||||||
import random
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
from testutil.helpers import *
|
|
||||||
|
|
||||||
from nilmdb.server.layout import *
|
|
||||||
|
|
||||||
class TestLayouts(object):
|
|
||||||
# Some nilmdb.layout tests. Not complete, just fills in missing
|
|
||||||
# coverage.
|
|
||||||
def test_layouts(self):
|
|
||||||
x = nilmdb.server.layout.get_named("float32_8")
|
|
||||||
y = nilmdb.server.layout.get_named("float32_8")
|
|
||||||
eq_(x.count, y.count)
|
|
||||||
eq_(x.datatype, y.datatype)
|
|
||||||
y = nilmdb.server.layout.get_named("float32_7")
|
|
||||||
ne_(x.count, y.count)
|
|
||||||
eq_(x.datatype, y.datatype)
|
|
||||||
|
|
||||||
def test_parsing(self):
|
|
||||||
self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
|
|
||||||
self.real_t_parsing("float32_8", "uint16_6", "uint16_9")
|
|
||||||
def real_t_parsing(self, name_prep, name_raw, name_rawnotch):
|
|
||||||
# invalid layouts
|
|
||||||
with assert_raises(TypeError) as e:
|
|
||||||
parser = Parser("NoSuchLayout")
|
|
||||||
with assert_raises(TypeError) as e:
|
|
||||||
parser = Parser("float32")
|
|
||||||
|
|
||||||
# too little data
|
|
||||||
parser = Parser(name_prep)
|
|
||||||
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5\n" +
|
|
||||||
"1234567890.100000 1.1 2.2 3.3 4.4 5.5\n")
|
|
||||||
with assert_raises(ParserError) as e:
|
|
||||||
parser.parse(data)
|
|
||||||
in_("error", str(e.exception))
|
|
||||||
|
|
||||||
# too much data
|
|
||||||
parser = Parser(name_prep)
|
|
||||||
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n" +
|
|
||||||
"1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8 9.9\n")
|
|
||||||
with assert_raises(ParserError) as e:
|
|
||||||
parser.parse(data)
|
|
||||||
in_("error", str(e.exception))
|
|
||||||
|
|
||||||
# just right
|
|
||||||
parser = Parser(name_prep)
|
|
||||||
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n" +
|
|
||||||
"1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n")
|
|
||||||
parser.parse(data)
|
|
||||||
eq_(parser.min_timestamp, 1234567890.0)
|
|
||||||
eq_(parser.max_timestamp, 1234567890.1)
|
|
||||||
eq_(parser.data, [[1234567890.0,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8],
|
|
||||||
[1234567890.1,1.1,2.2,3.3,4.4,5.5,6.6,7.7,8.8]])
|
|
||||||
|
|
||||||
# try uint16_6 too, with clamping
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ( "1234567890.000000 1 2 3 4 5 6\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6\n" )
|
|
||||||
parser.parse(data)
|
|
||||||
eq_(parser.data, [[1234567890.0,1,2,3,4,5,6],
|
|
||||||
[1234567890.1,1,2,3,4,5,6]])
|
|
||||||
|
|
||||||
# pass an instantiated class
|
|
||||||
parser = Parser(get_named(name_rawnotch))
|
|
||||||
data = ( "1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6 7 8 9\n" )
|
|
||||||
parser.parse(data)
|
|
||||||
|
|
||||||
# non-monotonic
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
|
|
||||||
"1234567890.099999 1 2 3 4 5 6\n" )
|
|
||||||
with assert_raises(ParserError) as e:
|
|
||||||
parser.parse(data)
|
|
||||||
in_("not monotonically increasing", str(e.exception))
|
|
||||||
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6\n" )
|
|
||||||
with assert_raises(ParserError) as e:
|
|
||||||
parser.parse(data)
|
|
||||||
in_("not monotonically increasing", str(e.exception))
|
|
||||||
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ( "1234567890.100000 1 2 3 4 5 6\n" +
|
|
||||||
"1234567890.100001 1 2 3 4 5 6\n" )
|
|
||||||
parser.parse(data)
|
|
||||||
|
|
||||||
# uint16_6 with values out of bounds
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ( "1234567890.000000 1 2 3 4 500000 6\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6\n" )
|
|
||||||
with assert_raises(ParserError) as e:
|
|
||||||
parser.parse(data)
|
|
||||||
in_("value out of range", str(e.exception))
|
|
||||||
|
|
||||||
# Empty data should work but is useless
|
|
||||||
parser = Parser(name_raw)
|
|
||||||
data = ""
|
|
||||||
parser.parse(data)
|
|
||||||
assert(parser.min_timestamp is None)
|
|
||||||
assert(parser.max_timestamp is None)
|
|
||||||
|
|
||||||
def test_formatting(self):
|
|
||||||
self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
|
|
||||||
self.real_t_formatting("float32_8", "uint16_6", "uint16_9")
|
|
||||||
def real_t_formatting(self, name_prep, name_raw, name_rawnotch):
|
|
||||||
# invalid layout
|
|
||||||
with assert_raises(TypeError) as e:
|
|
||||||
formatter = Formatter("NoSuchLayout")
|
|
||||||
|
|
||||||
# too little data
|
|
||||||
formatter = Formatter(name_prep)
|
|
||||||
data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5 ],
|
|
||||||
[ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5 ] ]
|
|
||||||
with assert_raises(FormatterError) as e:
|
|
||||||
formatter.format(data)
|
|
||||||
in_("error", str(e.exception))
|
|
||||||
|
|
||||||
# too much data
|
|
||||||
formatter = Formatter(name_prep)
|
|
||||||
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
|
|
||||||
[ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
|
|
||||||
with assert_raises(FormatterError) as e:
|
|
||||||
formatter.format(data)
|
|
||||||
in_("error", str(e.exception))
|
|
||||||
|
|
||||||
# just right
|
|
||||||
formatter = Formatter(name_prep)
|
|
||||||
data = [ [ 1234567890.000000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ],
|
|
||||||
[ 1234567890.100000, 1.1, 2.2, 3.3, 4.4, 5.5, 6.6, 7.7, 8.8 ] ]
|
|
||||||
text = formatter.format(data)
|
|
||||||
eq_(text,
|
|
||||||
"1234567890.000000 1.100000e+00 2.200000e+00 3.300000e+00 "
|
|
||||||
"4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
|
|
||||||
"8.800000e+00\n" +
|
|
||||||
"1234567890.100000 1.100000e+00 2.200000e+00 3.300000e+00 "
|
|
||||||
"4.400000e+00 5.500000e+00 6.600000e+00 7.700000e+00 "
|
|
||||||
"8.800000e+00\n")
|
|
||||||
|
|
||||||
# try uint16_6 too
|
|
||||||
formatter = Formatter(name_raw)
|
|
||||||
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6 ],
|
|
||||||
[ 1234567890.100000, 1, 2, 3, 4, 5, 6 ] ]
|
|
||||||
text = formatter.format(data)
|
|
||||||
eq_(text,
|
|
||||||
"1234567890.000000 1 2 3 4 5 6\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6\n")
|
|
||||||
|
|
||||||
# pass an instantiated class
|
|
||||||
formatter = Formatter(get_named(name_rawnotch))
|
|
||||||
data = [ [ 1234567890.000000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ],
|
|
||||||
[ 1234567890.100000, 1, 2, 3, 4, 5, 6, 7, 8, 9 ] ]
|
|
||||||
text = formatter.format(data)
|
|
||||||
eq_(text,
|
|
||||||
"1234567890.000000 1 2 3 4 5 6 7 8 9\n" +
|
|
||||||
"1234567890.100000 1 2 3 4 5 6 7 8 9\n")
|
|
||||||
|
|
||||||
# Empty data should work but is useless
|
|
||||||
formatter = Formatter(name_raw)
|
|
||||||
data = []
|
|
||||||
text = formatter.format(data)
|
|
||||||
eq_(text, "")
|
|
||||||
|
|
||||||
def test_roundtrip(self):
|
|
||||||
self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
|
|
||||||
self.real_t_roundtrip("float32_8", "uint16_6", "uint16_9")
|
|
||||||
def real_t_roundtrip(self, name_prep, name_raw, name_rawnotch):
|
|
||||||
# Verify that textual data passed into the Parser, and then
|
|
||||||
# back through the Formatter, then back into the Parser,
|
|
||||||
# gives identical parsed representations
|
|
||||||
random.seed(12345)
|
|
||||||
|
|
||||||
def do_roundtrip(layout, datagen):
|
|
||||||
for i in range(100):
|
|
||||||
rows = random.randint(1,100)
|
|
||||||
data = ""
|
|
||||||
ts = 1234567890
|
|
||||||
for r in range(rows):
|
|
||||||
ts += random.uniform(0,1)
|
|
||||||
row = sprintf("%f", ts) + " "
|
|
||||||
row += " ".join(datagen())
|
|
||||||
row += "\n"
|
|
||||||
data += row
|
|
||||||
parser1 = Parser(layout)
|
|
||||||
formatter = Formatter(layout)
|
|
||||||
parser2 = Parser(layout)
|
|
||||||
parser1.parse(data)
|
|
||||||
parser2.parse(formatter.format(parser1.data))
|
|
||||||
eq_(parser1.data, parser2.data)
|
|
||||||
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%.6e", random.uniform(-1000,1000))
|
|
||||||
for x in range(8) ]
|
|
||||||
do_roundtrip(name_prep, datagen)
|
|
||||||
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%d", random.randint(0,65535))
|
|
||||||
for x in range(6) ]
|
|
||||||
do_roundtrip(name_raw, datagen)
|
|
||||||
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%d", random.randint(0,65535))
|
|
||||||
for x in range(9) ]
|
|
||||||
do_roundtrip(name_rawnotch, datagen)
|
|
||||||
|
|
||||||
class TestLayoutSpeed:
|
|
||||||
@unittest.skip("this is slow")
|
|
||||||
def test_layout_speed(self):
|
|
||||||
import time
|
|
||||||
|
|
||||||
random.seed(54321)
|
|
||||||
|
|
||||||
def do_speedtest(layout, datagen, rows = 5000, times = 100):
|
|
||||||
# Build data once
|
|
||||||
data = ""
|
|
||||||
ts = 1234567890
|
|
||||||
for r in range(rows):
|
|
||||||
ts += random.uniform(0,1)
|
|
||||||
row = sprintf("%f", ts) + " "
|
|
||||||
row += " ".join(datagen())
|
|
||||||
row += "\n"
|
|
||||||
data += row
|
|
||||||
|
|
||||||
# Do lots of roundtrips
|
|
||||||
start = time.time()
|
|
||||||
for i in range(times):
|
|
||||||
parser = Parser(layout)
|
|
||||||
formatter = Formatter(layout)
|
|
||||||
parser.parse(data)
|
|
||||||
formatter.format(parser.data)
|
|
||||||
elapsed = time.time() - start
|
|
||||||
printf("roundtrip %s: %d ms, %.1f μs/row, %d rows/sec\n",
|
|
||||||
layout,
|
|
||||||
elapsed * 1e3,
|
|
||||||
(elapsed * 1e6) / (rows * times),
|
|
||||||
(rows * times) / elapsed)
|
|
||||||
|
|
||||||
print ""
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%.6e", random.uniform(-1000,1000))
|
|
||||||
for x in range(10) ]
|
|
||||||
do_speedtest("float32_10", datagen)
|
|
||||||
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%d", random.randint(0,65535))
|
|
||||||
for x in range(10) ]
|
|
||||||
do_speedtest("uint16_10", datagen)
|
|
||||||
|
|
||||||
def datagen():
|
|
||||||
return [ sprintf("%d", random.randint(0,65535))
|
|
||||||
for x in range(6) ]
|
|
||||||
do_speedtest("uint16_6", datagen)
|
|
Reference in New Issue
Block a user