|
- from __future__ import absolute_import
- import nilmdb
- from nilmdb.printf import *
-
- import tables
- import time
- import sys
- import inspect
- import cStringIO
-
- class ParserError(Exception):
- def __init__(self, line, message):
- self.message = sprintf("line %d: %s", line, message)
- Exception.__init__(self, self.message)
-
- class Layout(object):
- """Represents a NILM database layout"""
- def description(self):
- """Return the PyTables description of this layout"""
- desc = {}
- for (n, (name, type)) in enumerate(self.fields):
- desc[name] = tables.Col.from_type(type, pos=n+1)
- return tables.Description(desc)
-
- def parse(self, in_fields):
- """Given in_fields as text, return a list of values
- converted to the correct types"""
- # Consider overriding this in subclasses for speed?
- out=[]
- if len(self.fields) != len(in_fields):
- raise IndexError(sprintf("wanted %d fields, got %d",
- len(self.fields), len(in_fields)))
- for (n, (name, type)) in enumerate(self.fields):
- if name == 'timestamp':
- # special case: parse float, save as int
- out.append(int(float(in_fields[n]) * 1e6))
- elif type == 'float32':
- out.append(float(in_fields[n]))
- elif type == 'uint16':
- out.append(max(0, min(65535, int(in_fields[n], 10))))
- else:
- raise TypeError("can't parse type " + repr(type))
- return out
-
- class PrepData(Layout):
- rate_hz = 120
- fields = [ ( 'timestamp', 'int64' ),
- ( 'p1', 'float32' ),
- ( 'q1', 'float32'),
- ( 'p3', 'float32'),
- ( 'q3', 'float32'),
- ( 'p5', 'float32'),
- ( 'q5', 'float32'),
- ( 'p7', 'float32'),
- ( 'q7', 'float32') ]
-
- class RawData(Layout):
- rate_hz = 8000
- fields = [ ( 'timestamp', 'int64'),
- ( 'va', 'uint16'),
- ( 'vb', 'uint16'),
- ( 'vc', 'uint16'),
- ( 'ia', 'uint16'),
- ( 'ib', 'uint16'),
- ( 'ic', 'uint16') ]
-
- class RawNotchedData(Layout):
- rate_hz = 8000
- fields = RawData.fields + [ ( 'notch_ia', 'uint16' ),
- ( 'notch_ib', 'uint16' ),
- ( 'notch_ic', 'uint16' ) ]
-
- # Instantiate all layouts, indexed by their name
- named = {}
- for name, obj in inspect.getmembers(sys.modules[__name__]):
- if inspect.isclass(obj) and issubclass(obj, Layout):
- named[name] = obj()
-
- class Parser(object):
- """Object that parses and stores ASCII data for inclusion into the database"""
- def __init__(self, layout):
- if issubclass(layout.__class__, Layout):
- self.layout = layout
- else:
- try:
- self.layout = named[layout]
- except KeyError:
- raise TypeError("unknown layout")
-
- self.data = []
- self.min_timestamp = None
- self.max_timestamp = None
- # Assume timestamp is always the first field, for now
- self.ts_field = 0
-
- def parse(self, textdata):
- """Parse the data, provided as lines of text, using the current
- layout, into an internal data structure."""
-
- # This currently takes about 0.1 seconds for 1 megabyte of prep data,
- # 85 klines/sec. Could clearly be optimized a lot...
- indata = cStringIO.StringIO(textdata)
- n = 0
- # Assume any parsing error is a real error.
- # In the future we might want to skip completely empty lines,
- # or partial lines right before EOF?
- try:
- last_ts = None
- for line in indata:
- n += 1
-
- # Parse and append
- fields = line.partition('#')[0].split()
- out = self.layout.parse(fields)
- self.data.append(out)
-
- # Verify timestamp
- if self.ts_field is not None:
- if last_ts is not None and out[self.ts_field] < last_ts:
- raise ValueError("timestamp is not monotonically increasing")
- last_ts = out[self.ts_field]
- except ValueError as e:
- raise ParserError(n, "value error: " + e.message)
- except IndexError as e:
- raise ParserError(n, "index error: " + e.message)
- except TypeError as e:
- raise ParserError(n, "type error: " + e.message)
-
- # Mark timestamp ranges
- if len(self.data) and self.ts_field is not None:
- self.min_timestamp = self.data[0][self.ts_field]
- self.max_timestamp = self.data[-1][self.ts_field]
-
- def fill_table(self, table):
- """Fill a PyTables table object with the parsed data.
- The table's rows must match the parser's layout"""
- row = table.row
- for data in self.data:
- for (n, (name, type)) in enumerate(self.layout.fields):
- row[name] = data[n]
- row.append()
|