Browse Source

Start reworking the layout types.

Current/old design has specific layouts: RawData, PrepData,
RawNotchedData.
Let's get rid of this entirely and switch to simpler data types that
are
just collections and counts of a single type.  We'll still use strings
to describe them, with format:

    type_count
  
where type is "uint16", "float32", or "float64", and count is an
integer.

nilmdb.layout.named() will parse these strings into the appropriate
handlers.  For compatibility:
  
    "RawData" == "uint16_6"
    "RawNotchedData" == "uint16_9"
    "PrepData" == "float32_8"


git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10981 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 11 years ago
parent
commit
5130ab7e6a
7 changed files with 152 additions and 133 deletions
  1. +19
    -0
      design.md
  2. +5
    -10
      nilmdb/cmdline/insert.py
  3. +110
    -108
      nilmdb/layout.pyx
  4. +5
    -5
      nilmdb/nilmdb.py
  5. +1
    -1
      nilmdb/server.py
  6. +11
    -8
      tests/test_cmdline.py
  7. +1
    -1
      tests/test_layout.py

+ 19
- 0
design.md View File

@@ -160,3 +160,22 @@ IntervalSet speed

- Might be algorithmic improvements to be made in Interval.py,
like in `__and__`


Layouts
-------
Current/old design has specific layouts: RawData, PrepData, RawNotchedData.
Let's get rid of this entirely and switch to simpler data types that are
just collections and counts of a single type. We'll still use strings
to describe them, with format:

type_count
where type is "uint16", "float32", or "float64", and count is an integer.

nilmdb.layout.named() will parse these strings into the appropriate
handlers. For compatibility:
"RawData" == "uint16_6"
"RawNotchedData" == "uint16_9"
"PrepData" == "float32_8"

+ 5
- 10
nilmdb/cmdline/insert.py View File

@@ -28,8 +28,7 @@ def setup(self, sub):

group.add_argument("-r", "--rate", type=float,
help="""
If needed, rate in Hz (default: based on
stream layout)
If needed, rate in Hz (required when using --start)
""")
exc = group.add_mutually_exclusive_group()
exc.add_argument("-s", "--start",
@@ -73,14 +72,6 @@ def cmd_insert(self):
if self.args.none:
ts = nilmdb.timestamper.TimestamperNull(infile)
else:
# If no rate, see if we can get it from nilmdb.layout
if not self.args.rate:
try:
self.args.rate = nilmdb.layout.named[layout].rate_hz
except KeyError: # pragma: no cover
self.die("Need to specify --rate")
rate = self.args.rate

if self.args.start:
start = self.args.start
else:
@@ -90,6 +81,10 @@ def cmd_insert(self):
self.die("Error extracting time from filename '%s'",
filename)

if not self.args.rate:
self.die("Need to specify --rate")
rate = self.args.rate

ts = nilmdb.timestamper.TimestamperRate(infile, start, rate)

# Print info


+ 110
- 108
nilmdb/layout.pyx View File

@@ -7,6 +7,9 @@ import inspect
import cStringIO
import numpy as np

cdef enum:
max_value_count = 64

cimport cython
cimport libc.stdlib
cimport libc.stdio
@@ -22,118 +25,117 @@ class FormatterError(Exception):

class Layout:
"""Represents a NILM database layout"""
def description(self):
"""Return the PyTables description of this layout"""
desc = {}
for (n, (name, type)) in enumerate(self.fields):
desc[name] = tables.Col.from_type(type, pos=n+1)
return tables.Description(desc)

def parse(self, char *text):
raise ParserError("n/a", "no parser for this layout")

def format(self, char *text):
raise FormatterError("no formatter for this layout")

class PrepData(Layout):
rate_hz = 120
fields = [ ( 'timestamp', 'float64' ),
( 'p1', 'float32' ),
( 'q1', 'float32' ),
( 'p3', 'float32' ),
( 'q3', 'float32' ),
( 'p5', 'float32' ),
( 'q5', 'float32' ),
( 'p7', 'float32' ),
( 'q7', 'float32' ) ]

def parse(self, char *text):
def __init__(self, typestring):
"""Initialize this Layout object to handle the specified
type string"""
try:
[ datatype, count ] = typestring.split("_")
except:
raise KeyError("invalid layout string")

try:
self.count = int(count)
except ValueError:
raise KeyError("invalid count")
if self.count < 1 or self.count > max_value_count:
raise KeyError("invalid count")

if datatype == 'uint16':
self.parse = self.parse_uint16
self.format = self.format_uint16
elif datatype == 'float32' or datatype == 'float64':
self.parse = self.parse_float64
self.format = self.format_float64
else:
raise KeyError("invalid type")

self.datatype = datatype

# Parsers
def parse_float64(self, char *text):
cdef int n
cdef double ts
# return doubles instead of float32, since they're going into
# Return doubles even in float32 case, since they're going into
# a Python array which would upconvert to double anyway.
cdef double v[8]
cdef char dummy
n = libc.stdio.sscanf(text, " %lf %lf %lf %lf %lf %lf %lf %lf %lf %c",
&ts, &v[0], &v[1], &v[2], &v[3], &v[4],
&v[5], &v[6], &v[7], &dummy)
if (n < 9) or (n > 9 and (dummy != '#' and dummy != '\n')):
raise ValueError("wrong number of values: wanted 9, got " + str(n))
return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]])

def format(self, d):
n = len(d)
if n != 9:
raise ValueError("wrong number of values: wanted 9, got " + str(n))
return ("%.6f %f %f %f %f %f %f %f %f\n" %
(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8]))

class RawData(Layout):
rate_hz = 8000
fields = [ ( 'timestamp', 'float64' ),
( 'va', 'uint16' ),
( 'vb', 'uint16' ),
( 'vc', 'uint16' ),
( 'ia', 'uint16' ),
( 'ib', 'uint16' ),
( 'ic', 'uint16' ) ]

def parse(self, char *text):
result = []
cdef char *end
ts = libc.stdlib.strtod(text, &end)
if end == text:
raise ValueError("bad timestamp")
result.append(ts)
for n in range(self.count):
text = end
result.append(libc.stdlib.strtod(text, &end))
if end == text:
raise ValueError("wrong number of values")
n = 0
while end[n] == ' ':
n += 1
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
raise ValueError("extra data on line")
return (ts, result)

def parse_uint16(self, char *text):
cdef int n
cdef double ts
cdef int v[6]
cdef char dummy
n = libc.stdio.sscanf(text, " %lf %u %u %u %u %u %u %c",
&ts, &v[0], &v[1], &v[2],
&v[3], &v[4], &v[5], &dummy)
if (n < 7) or (n > 7 and (dummy != '#' and dummy != '\n')):
raise ValueError("wrong number of values: wanted 7, got " + str(n))
for i in range(6):
if v[i] < 0 or v[i] > 65535:
raise ValueError("value out of range: " + str(v[i]))
return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5]])

def format(self, d):
n = len(d)
if n != 7:
raise ValueError("wrong number of values: wanted 7, got " + str(n))
return ("%.6f %d %d %d %d %d %d\n" %
(d[0], d[1], d[2], d[3], d[4], d[5], d[6]))

class RawNotchedData(RawData):
rate_hz = 8000
fields = RawData.fields + [
( 'notch_ia', 'uint16' ),
( 'notch_ib', 'uint16' ),
( 'notch_ic', 'uint16' ) ]

def parse(self, char *text):
cdef int n
cdef double ts
cdef int v[9]
cdef char dummy
n = libc.stdio.sscanf(text, " %lf %u %u %u %u %u %u %u %u %u %c",
&ts, &v[0], &v[1], &v[2], &v[3], &v[4],
&v[5], &v[6], &v[7], &v[8], &dummy)
if (n < 10) or (n > 10 and (dummy != '#' and dummy != '\n')):
raise ValueError("wrong number of values: wanted 10, got " + str(n))
for i in range(9):
if v[i] < 0 or v[i] > 65535:
raise ValueError("value out of range: " + str(v[i]))
return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8]])

def format(self, d):
n = len(d)
if n != 10:
raise ValueError("wrong number of values: wanted 10, got " + str(n))
return ("%.6f %d %d %d %d %d %d %d %d %d\n" %
(d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8], d[9]))

# Instantiate all layouts, indexed by their name
named = {}
for name, obj in inspect.getmembers(sys.modules[__name__]):
if inspect.isclass(obj) and issubclass(obj, Layout):
named[name] = obj()
result = []
cdef char *end
ts = libc.stdlib.strtod(text, &end)
if end == text:
raise ValueError("bad timestamp")
result.append(ts)
for n in range(self.count):
text = end
result.append(libc.stdlib.strtol(text, &end, 10))
if end == text:
raise ValueError("wrong number of values")
n = 0
while end[n] == ' ':
n += 1
if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
raise ValueError("extra data on line")
return (ts, result)

# Formatters
def format_float64(self, d):
n = len(d) - 1
if n != self.count:
raise ValueError("wrong number of values for layout type: "
"got %d, wanted %d" % (n, self.count))
s = "%.6f" % d[0]
for i in range(n):
s += " %f" % d[i+1]
return s + "\n"

def format_uint16(self, d):
n = len(d) - 1
if n != self.count:
raise ValueError("wrong number of values for layout type: "
"got %d, wanted %d" % (n, self.count))
s = "%.6f" % d[0]
for i in range(n):
s += " %d" % d[i+1]
return s + "\n"

# PyTables description
def description(self):
"""Return the PyTables description of this layout"""
desc = {}
desc['timestamp'] = tables.Col.from_type('float64', pos=0)
for n in range(self.count):
desc['c' + str(n+1)] = tables.Col.from_type(self.datatype, pos=n+1)
return tables.Description(desc)

# Get a layout by name
def get_named(typestring):
try:
return Layout(typestring)
except KeyError:
compat = { "PrepData": "float32_8",
"RawData": "uint16_6",
"RawNotchedData": "uint16_9" }
return Layout(compat[typestring])

class Parser(object):
"""Object that parses and stores ASCII data for inclusion into the
@@ -144,7 +146,7 @@ class Parser(object):
self.layout = layout
else:
try:
self.layout = named[layout]
self.layout = get_named(layout)
except KeyError:
raise TypeError("unknown layout")

@@ -195,7 +197,7 @@ class Formatter(object):
self.layout = layout
else:
try:
self.layout = named[layout]
self.layout = get_named(layout)
except KeyError:
raise TypeError("unknown layout")



+ 5
- 5
nilmdb/nilmdb.py View File

@@ -45,7 +45,7 @@ _sql_schema_updates = {
CREATE TABLE streams(
id INTEGER PRIMARY KEY, -- stream ID
path TEXT UNIQUE NOT NULL, -- path, e.g. '/newton/prep'
layout TEXT NOT NULL -- one of nilmdb.layout.layouts
layout TEXT NOT NULL -- layout name, e.g. float32_8
);

-- Individual timestamped ranges in those streams.
@@ -275,7 +275,7 @@ class NilmDB(object):
/newton/upstairs/prep
/newton/upstairs/raw

layout_name: one of the nilmdb.layout.layouts keys, e.g. 'PrepData'
layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
"""
if path[0] != '/':
raise ValueError("paths must start with /")
@@ -295,13 +295,13 @@ class NilmDB(object):

# Get description
try:
desc = nilmdb.layout.named[layout_name].description()
desc = nilmdb.layout.get_named(layout_name).description()
except KeyError:
raise ValueError("no such layout")

# Estimated table size (for PyTables optimization purposes): assume
# 3 months worth of data. It's OK if this is wrong.
exp_rows = nilmdb.layout.named[layout_name].rate_hz * 60*60*24*30*3
# 3 months worth of data at 8 KHz. It's OK if this is wrong.
exp_rows = 8000 * 60*60*24*30*3

# Create the table
table = self.h5file.createTable(group, node,


+ 1
- 1
nilmdb/server.py View File

@@ -267,7 +267,7 @@ class Stream(NilmApp):
return
start = restart
return content(start, end, count)
extract._cp_config = { 'response.stream': True } # chunked HTTP response
extract._cp_config = { 'response.stream': False } # chunked HTTP response


class Exiter(object):


+ 11
- 8
tests/test_cmdline.py View File

@@ -101,7 +101,10 @@ class TestCmdline(object):
def matchfile(self, file):
# Captured data should match file contents exactly
with open(file) as f:
if f.read() != self.captured:
contents = f.read()
if contents != self.captured:
#print contents[1:1000] + "\n"
#print self.captured[1:1000] + "\n"
raise AssertionError("captured data doesn't match " + file)

def matchfilecount(self, file):
@@ -317,13 +320,13 @@ class TestCmdline(object):

# insert data with normal timestamper from filename
os.environ['TZ'] = "UTC"
self.ok("insert /newton/prep "
self.ok("insert --rate 120 /newton/prep "
"tests/data/prep-20120323T1000 "
"tests/data/prep-20120323T1002")

# overlap
os.environ['TZ'] = "UTC"
self.fail("insert /newton/prep "
self.fail("insert --rate 120 /newton/prep "
"tests/data/prep-20120323T1004")
self.contain("overlap")

@@ -335,22 +338,22 @@ class TestCmdline(object):

# still an overlap if we specify a different start
os.environ['TZ'] = "America/New_York"
self.fail("insert --start '03/23/2012 06:05:00' /newton/prep "
"tests/data/prep-20120323T1004")
self.fail("insert --rate 120 --start '03/23/2012 06:05:00' /newton/prep"
" tests/data/prep-20120323T1004")
self.contain("overlap")

# wrong format
os.environ['TZ'] = "UTC"
self.fail("insert /newton/raw "
self.fail("insert --rate 120 /newton/raw "
"tests/data/prep-20120323T1004")
self.contain("Error parsing input data")

# empty data does nothing
self.ok("insert --start '03/23/2012 06:05:00' /newton/prep "
self.ok("insert --rate 120 --start '03/23/2012 06:05:00' /newton/prep "
"/dev/null")

# bad start time
self.fail("insert --start 'whatever' /newton/prep /dev/null")
self.fail("insert --rate 120 --start 'whatever' /newton/prep /dev/null")

def test_cmdline_07_detail(self):
# Just count the number of lines, it's probably fine


+ 1
- 1
tests/test_layout.py View File

@@ -26,7 +26,7 @@ class TestLayouts(object):
# Some nilmdb.layout tests. Not complete, just fills in missing
# coverage.
def test_layouts(self):
x = nilmdb.layout.named["PrepData"].description()
x = nilmdb.layout.get_named("PrepData").description()

def test_parsing(self):
# invalid layout


Loading…
Cancel
Save