|
- import tables
- import time
- import sys
- import inspect
- import cStringIO
-
- class Layout(object):
- """Represents a NILM database layout"""
- @classmethod
- def description(cls):
- """Return the PyTables description of this layout"""
- desc = {}
- for (n, (name, type)) in enumerate(cls.fields):
- desc[name] = tables.Col.from_type(type, pos=n+1)
- return tables.Description(desc)
-
- @classmethod
- def parse(cls, in_fields):
- """Given in_fields as text, return a list of values
- converted to the correct types"""
- out=[]
- for (n, (name, type)) in enumerate(cls.fields):
- if name == 'timestamp':
- # special case: parse float, save as int
- out.append(int(float(in_fields[n]) * 1e6))
- elif type == 'float32':
- out.append(float(in_fields[n]))
- elif type == 'uint16':
- out.append(max(0, min(65535, int(in_fields[n], 10))))
- else:
- raise TypeError("Can't parse type " + type)
-
- class PrepData(Layout):
- expected_daily_rows = 120 * 86400
- fields = [ ( 'timestamp', 'int64' ),
- ( 'p1', 'float32' ),
- ( 'q1', 'float32'),
- ( 'p3', 'float32'),
- ( 'q3', 'float32'),
- ( 'p5', 'float32'),
- ( 'q5', 'float32'),
- ( 'p7', 'float32'),
- ( 'q7', 'float32') ]
-
- class RawData(Layout):
- expected_daily_rows = 8000 * 86400
- fields = [ ( 'timestamp', 'int64'),
- ( 'va', 'uint16'),
- ( 'vb', 'uint16'),
- ( 'vc', 'uint16'),
- ( 'ia', 'uint16'),
- ( 'ib', 'uint16'),
- ( 'ic', 'uint16') ]
-
- class RawNotchedData(Layout):
- expected_daily_rows = 8000 * 86400
- fields = RawData.fields + [ ( 'notch_ia', 'uint16' ),
- ( 'notch_ib', 'uint16' ),
- ( 'notch_ic', 'uint16' ) ]
-
- # Build list of all layouts, so we can look them up by name
- named = {}
- for name, obj in inspect.getmembers(sys.modules[__name__]):
- if inspect.isclass(obj) and issubclass(obj, Layout):
- named[name] = obj
-
- class Parser(object):
- """Object that parses and stores ASCII data for inclusion into the database"""
- def __init__(self, layout):
- if layout not in named:
- raise TypeError("unknown layout")
-
- self.layout = named[layout]
- self.data = []
- self.nrows = 0
-
- def parse(self, textdata):
- """Parse the data, provided as lines of text, using the current
- layout, into an internal data structure."""
-
- # This currently takes about 0.1 seconds for 1 megabyte of prep data,
- # 85 klines/sec. Could clearly be optimized a lot...
- indata = cStringIO.StringIO(textdata)
- self.nrows = 0
- # Assume any parsing error is a real error.
- # In the future we might want to skip completely empty lines,
- # or partial lines right before EOF?
- try:
- for line in indata:
- self.nrows += 1
- fields = line.partition('#')[0].split()
- self.data.append(self.layout.parse(fields))
- except (ValueError, TypeError, IndexError) as e:
- raise TypeError("line " + self.nrows + ": " + e.message)
-
- def fillrow(self, tablerow, rownum):
- """Fill a PyTables row object with the parsed data.
- The row must match the parser's layout"""
- for (n, (name, type)) in enumerate(self.layout.fields):
- tablerow[name] = self.data[rownum][n]
|