Browse Source

Use instantiated classes for layouts.

Change expected_daily_rows into rate_hz just for simplicity

nilmdb.layout.Parser now validates that timestamps are monotonically
increasing, and remembers the min and max seen.


git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10660 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
3dfd187710
3 changed files with 47 additions and 16 deletions
  1. +32
    -13
      nilmdb/layout.py
  2. +1
    -1
      nilmdb/nilmdb.py
  3. +14
    -2
      tests/test_client.py

+ 32
- 13
nilmdb/layout.py View File

@@ -6,20 +6,19 @@ import cStringIO

class Layout(object):
"""Represents a NILM database layout"""
@classmethod
def description(cls):
def description(self):
"""Return the PyTables description of this layout"""
desc = {}
for (n, (name, type)) in enumerate(cls.fields):
for (n, (name, type)) in enumerate(self.fields):
desc[name] = tables.Col.from_type(type, pos=n+1)
return tables.Description(desc)

@classmethod
def parse(cls, in_fields):
def parse(self, in_fields):
"""Given in_fields as text, return a list of values
converted to the correct types"""
# Consider overriding this in subclasses for speed?
out=[]
for (n, (name, type)) in enumerate(cls.fields):
for (n, (name, type)) in enumerate(self.fields):
if name == 'timestamp':
# special case: parse float, save as int
out.append(int(float(in_fields[n]) * 1e6))
@@ -29,9 +28,10 @@ class Layout(object):
out.append(max(0, min(65535, int(in_fields[n], 10))))
else:
raise TypeError("Can't parse type " + type)
return out

class PrepData(Layout):
expected_daily_rows = 120 * 86400
rate_hz = 120
fields = [ ( 'timestamp', 'int64' ),
( 'p1', 'float32' ),
( 'q1', 'float32'),
@@ -43,7 +43,7 @@ class PrepData(Layout):
( 'q7', 'float32') ]

class RawData(Layout):
expected_daily_rows = 8000 * 86400
rate_hz = 8000
fields = [ ( 'timestamp', 'int64'),
( 'va', 'uint16'),
( 'vb', 'uint16'),
@@ -53,16 +53,16 @@ class RawData(Layout):
( 'ic', 'uint16') ]

class RawNotchedData(Layout):
expected_daily_rows = 8000 * 86400
rate_hz = 8000
fields = RawData.fields + [ ( 'notch_ia', 'uint16' ),
( 'notch_ib', 'uint16' ),
( 'notch_ic', 'uint16' ) ]

# Build list of all layouts, so we can look them up by name
# Instantiate all layouts, indexed by their name
named = {}
for name, obj in inspect.getmembers(sys.modules[__name__]):
if inspect.isclass(obj) and issubclass(obj, Layout):
named[name] = obj
named[name] = obj()

class Parser(object):
"""Object that parses and stores ASCII data for inclusion into the database"""
@@ -73,6 +73,10 @@ class Parser(object):
self.layout = named[layout]
self.data = []
self.nrows = 0
self.min_timestamp = None
self.max_timestamp = None
# Assume timestamp is always the first field, for now
self.ts_field = 0

def parse(self, textdata):
"""Parse the data, provided as lines of text, using the current
@@ -86,12 +90,27 @@ class Parser(object):
# In the future we might want to skip completely empty lines,
# or partial lines right before EOF?
try:
last_ts = None
for line in indata:
self.nrows += 1

# Parse and append
fields = line.partition('#')[0].split()
self.data.append(self.layout.parse(fields))
out = self.layout.parse(fields)
self.data.append(out)

# Verify timestamp
if self.ts_field is not None:
if last_ts is not None and out[self.ts_field] < last_ts:
raise ValueError("timestamp is not monotonically increasing")
last_ts = out[self.ts_field]
except (ValueError, TypeError, IndexError) as e:
raise TypeError("line " + self.nrows + ": " + e.message)
raise TypeError("line " + str(self.nrows) + ": " + e.message)

# Mark timestamp ranges
if len(self.data) and self.ts_field is not None:
self.min_timestamp = self.data[0][self.ts_field]
self.max_timestamp = self.data[-1][self.ts_field]

def fillrow(self, tablerow, rownum):
"""Fill a PyTables row object with the parsed data.


+ 1
- 1
nilmdb/nilmdb.py View File

@@ -198,7 +198,7 @@ class NilmDB(object):

# Estimated table size (for PyTables optimization purposes): assume
# 3 months worth of data. It's OK if this is wrong.
exp_rows = nilmdb.layout.named[layout_name].expected_daily_rows * 90
exp_rows = nilmdb.layout.named[layout_name].rate_hz * 60 * 60 * 24 * 30 * 3

table = self.h5file.createTable(group, node,
description = desc,


+ 14
- 2
tests/test_client.py View File

@@ -20,6 +20,10 @@ def eq_(a, b):
if not a == b:
raise AssertionError("%r != %r" % (a, b))

def in_(a, b):
if a not in b:
raise AssertionError("%r not in %r" % (a, b))

def ne_(a, b):
if not a != b:
raise AssertionError("unexpected %r == %r" % (a, b))
@@ -86,7 +90,7 @@ class TestClient(object):
# Bad index column
with assert_raises(ClientError) as e:
client.stream_create("/newton/prep", "PrepData", ["nonexistant"])
assert("KeyError: nonexistant" in str(e.exception))
in_("KeyError: nonexistant", str(e.exception))
client.stream_create("/newton/prep", "PrepData")
client.stream_create("/newton/raw", "RawData")
client.stream_create("/newton/zzz/rawnotch", "RawNotchedData")
@@ -136,8 +140,16 @@ class TestClient(object):
data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
with assert_raises(ClientError) as e:
result = client.stream_insert("/newton/no-such-path", data)
assert("404" in e.exception.status)
in_("404 Not Found", str(e.exception))

# Now try reversed timestamps
data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
data = reversed(list(data))
with assert_raises(ClientError) as e:
result = client.stream_insert("/newton/prep", data)
in_("400 Bad Request", str(e.exception))
in_("timestamp is not monotonically increasing", str(e.exception))
# Now do the real load
data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
result = client.stream_insert("/newton/prep", data)


Loading…
Cancel
Save