Browse Source

work on improving layout parsing

git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10663 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
7a9012c3e9
4 changed files with 90 additions and 14 deletions
  1. +55
    -11
      nilmdb/layout.py
  2. +14
    -1
      nilmdb/nilmdb.py
  3. +2
    -1
      nilmdb/server.py
  4. +19
    -1
      tests/test_layout.py

+ 55
- 11
nilmdb/layout.py View File

@@ -25,11 +25,9 @@ class Layout(object):
def parse(self, in_fields):
"""Given in_fields as text, return a list of values
converted to the correct types"""
# Consider overriding this in subclasses for speed?
# Consider overriding this in subclasses for speed.
# In general it takes about 2/3 the time that way.
out=[]
if len(self.fields) != len(in_fields):
raise IndexError(sprintf("wanted %d fields, got %d",
len(self.fields), len(in_fields)))
for (n, (name, type)) in enumerate(self.fields):
if name == 'timestamp':
# special case: parse float, save as int
@@ -37,15 +35,24 @@ class Layout(object):
elif type == 'float32':
out.append(float(in_fields[n]))
elif type == 'uint16':
out.append(max(0, min(65535, int(in_fields[n], 10))))
x = int(in_fields[n], 10)
if x < 0 or x > 65535:
raise ValueError("data out of range")
out.append(x)
else:
raise TypeError("can't parse type " + repr(type))
return out

def parse_uint16(string):
x = int(string, 10)
if x < 0 or x > 65535:
raise ValueError("data out of range")
return x

class PrepData(Layout):
rate_hz = 120
fields = [ ( 'timestamp', 'int64' ),
( 'p1', 'float32' ),
( 'p1', 'float32'),
( 'q1', 'float32'),
( 'p3', 'float32'),
( 'q3', 'float32'),
@@ -53,6 +60,16 @@ class PrepData(Layout):
( 'q5', 'float32'),
( 'p7', 'float32'),
( 'q7', 'float32') ]
def parse(self, in_fields):
return [ int(float(in_fields[0]) * 1e6),
float(in_fields[1]),
float(in_fields[2]),
float(in_fields[3]),
float(in_fields[4]),
float(in_fields[5]),
float(in_fields[6]),
float(in_fields[7]),
float(in_fields[8]) ]

class RawData(Layout):
rate_hz = 8000
@@ -63,12 +80,38 @@ class RawData(Layout):
( 'ia', 'uint16'),
( 'ib', 'uint16'),
( 'ic', 'uint16') ]
def parse(self, in_fields):
return [ int(float(in_fields[0]) * 1e6),
parse_uint16(in_fields[1]),
parse_uint16(in_fields[2]),
parse_uint16(in_fields[3]),
parse_uint16(in_fields[4]),
parse_uint16(in_fields[5]),
parse_uint16(in_fields[6]) ]

class RawNotchedData(Layout):
rate_hz = 8000
fields = RawData.fields + [ ( 'notch_ia', 'uint16' ),
( 'notch_ib', 'uint16' ),
( 'notch_ic', 'uint16' ) ]
fields = [ ( 'timestamp', 'int64'),
( 'va', 'uint16'),
( 'vb', 'uint16'),
( 'vc', 'uint16'),
( 'ia', 'uint16'),
( 'ib', 'uint16'),
( 'ic', 'uint16'),
( 'notch_ia', 'uint16'),
( 'notch_ib', 'uint16'),
( 'notch_ic', 'uint16') ]
def parse(self, in_fields):
return [ int(float(in_fields[0]) * 1e6),
parse_uint16(in_fields[1]),
parse_uint16(in_fields[2]),
parse_uint16(in_fields[3]),
parse_uint16(in_fields[4]),
parse_uint16(in_fields[5]),
parse_uint16(in_fields[6]),
parse_uint16(in_fields[7]),
parse_uint16(in_fields[8]),
parse_uint16(in_fields[9]) ]

# Instantiate all layouts, indexed by their name
named = {}
@@ -97,8 +140,6 @@ class Parser(object):
"""Parse the data, provided as lines of text, using the current
layout, into an internal data structure."""

# This currently takes about 0.1 seconds for 1 megabyte of prep data,
# 85 klines/sec. Could clearly be optimized a lot...
indata = cStringIO.StringIO(textdata)
n = 0
# Assume any parsing error is a real error.
@@ -111,6 +152,9 @@ class Parser(object):

# Parse and append
fields = line.partition('#')[0].split()
if len(fields) != len(self.layout.fields):
raise IndexError(sprintf("wanted %d fields, got %d",
len(self.layout.fields), len(fields)))
out = self.layout.parse(fields)
self.data.append(out)



+ 14
- 1
nilmdb/nilmdb.py View File

@@ -290,14 +290,27 @@ class NilmDB(object):
raise OverlapError("New data overlaps existing data: "
+ str(iset & interval))

# TODO: Check through layout and see if there's a better way
# to handle the data parsing now that we can use
# table.append(). Probably not a good idea to pass strings to
# it, though, to reduce time on the serialized nilmdb side of
# things.
# Either way, start using table.append() below, then
# figure out the row tracking, insert intervals into the database,
# and do tests of multiple inserts, overlapping data, etc.

# Insert the data into pytables
table = self.h5file.getNode(path)
print "rows", table.nrows
with nilmdb.Timer("append"):
table.append(parser.data)
print "rows", table.nrows
table.flush()
# with nilmdb.Timer("fill"):
# parser.fill_table(table)
# with nilmdb.Timer("flush"):
# table.flush()

# with nilmdb.Timer("append"):
# table.append()
# with nilmdb.Timer("flush"):
# table.flush()


+ 2
- 1
nilmdb/server.py View File

@@ -164,7 +164,8 @@ class Stream(NilmApp):
# Parse the input data
try:
parser = nilmdb.layout.Parser(layout)
parser.parse(body)
with nilmdb.Timer("parse"):
parser.parse(body)
except nilmdb.layout.ParserError as e:
raise cherrypy.HTTPError("400 Bad Request",
"Error parsing input data: " +


+ 19
- 1
tests/test_layout.py View File

@@ -51,8 +51,10 @@ class TestLayouts(object):
data = ( "1234567890.000000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n" +
"1234567890.100000 1.1 2.2 3.3 4.4 5.5 6.6 7.7 8.8\n")
parser.parse(data)
eq_(parser.min_timestamp, 1234567890000000)
eq_(parser.max_timestamp, 1234567890100000)
# try RawData too
# try RawData too, with clamping
parser = Parser("RawData")
data = ( "1234567890.000000 1 2 3 4 5 6\n" +
"1234567890.100000 1 2 3 4 5 6\n" )
@@ -69,6 +71,7 @@ class TestLayouts(object):
pass
x = CrappyLayout()
x.fields = x.fields + [("fakename", "faketype")]
x.parse = super(RawData, x).parse
parser = Parser(x)
data = ( "1234567890.000000 1 2 3 4 5 6 fake\n" +
"1234567890.100000 1 2 3 4 5 6 fake\n" )
@@ -83,3 +86,18 @@ class TestLayouts(object):
with assert_raises(ParserError) as e:
parser.parse(data)
in_("not monotonically increasing", str(e.exception))

# RawData with values out of bounds
parser = Parser("RawData")
data = ( "1234567890.000000 1 2 3 4 500000 6\n" +
"1234567890.100000 1 2 3 4 5 6\n" )
with assert_raises(ParserError) as e:
parser.parse(data)
in_("data out of range", str(e.exception))

# Empty data should work but is useless
parser = Parser("RawData")
data = ""
parser.parse(data)
assert(parser.min_timestamp is None)
assert(parser.max_timestamp is None)

Loading…
Cancel
Save