|
- # cython: profile=False
-
- import tables
- import time
- import sys
- import inspect
- import cStringIO
- import numpy as np
-
- cimport cython
- cimport libc.stdlib
- cimport libc.stdio
- cimport libc.string
-
- class ParserError(Exception):
- def __init__(self, line, message):
- self.message = "line " + str(line) + ": " + message
- Exception.__init__(self, self.message)
-
- class FormatterError(Exception):
- pass
-
- class Layout:
- """Represents a NILM database layout"""
- def description(self):
- """Return the PyTables description of this layout"""
- desc = {}
- for (n, (name, type)) in enumerate(self.fields):
- desc[name] = tables.Col.from_type(type, pos=n+1)
- return tables.Description(desc)
-
- def parse(self, char *text):
- raise ParserError("no parser for this layout")
-
- class PrepData(Layout):
- rate_hz = 120
- fields = [ ( 'timestamp', 'float64' ),
- ( 'p1', 'float32' ),
- ( 'q1', 'float32' ),
- ( 'p3', 'float32' ),
- ( 'q3', 'float32' ),
- ( 'p5', 'float32' ),
- ( 'q5', 'float32' ),
- ( 'p7', 'float32' ),
- ( 'q7', 'float32' ) ]
-
- def parse(self, char *text):
- cdef int n
- cdef double ts
- # return doubles instead of float32, since they're going into
- # a Python array which would upconvert to double anyway.
- cdef double v[8]
- cdef char dummy
- n = libc.stdio.sscanf(text, " %lf %lf %lf %lf %lf %lf %lf %lf %lf %c",
- &ts, &v[0], &v[1], &v[2], &v[3], &v[4],
- &v[5], &v[6], &v[7], &dummy)
- if (n < 9) or (n > 9 and (dummy != '#' and dummy != '\n')):
- raise ValueError("wrong number of values: wanted 9, got " + str(n))
- return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]])
-
- class RawData(Layout):
- rate_hz = 8000
- fields = [ ( 'timestamp', 'float64' ),
- ( 'va', 'uint16' ),
- ( 'vb', 'uint16' ),
- ( 'vc', 'uint16' ),
- ( 'ia', 'uint16' ),
- ( 'ib', 'uint16' ),
- ( 'ic', 'uint16' ) ]
-
- def parse(self, char *text):
- cdef int n
- cdef double ts
- cdef int v[6]
- cdef char dummy
- n = libc.stdio.sscanf(text, " %lf %u %u %u %u %u %u %c",
- &ts, &v[0], &v[1], &v[2],
- &v[3], &v[4], &v[5], &dummy)
- if (n < 7) or (n > 7 and (dummy != '#' and dummy != '\n')):
- raise ValueError("wrong number of values: wanted 7, got " + str(n))
- for i in range(6):
- if v[i] < 0 or v[i] > 65535:
- raise ValueError("value out of range: " + str(v[i]))
- return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5]])
-
- class RawNotchedData(RawData):
- rate_hz = 8000
- fields = RawData.fields + [
- ( 'notch_ia', 'uint16' ),
- ( 'notch_ib', 'uint16' ),
- ( 'notch_ic', 'uint16' ) ]
-
- def parse(self, char *text):
- cdef int n
- cdef double ts
- cdef int v[9]
- cdef char dummy
- n = libc.stdio.sscanf(text, " %lf %u %u %u %u %u %u %u %u %u %c",
- &ts, &v[0], &v[1], &v[2], &v[3], &v[4],
- &v[5], &v[6], &v[7], &v[8], &dummy)
- if (n < 10) or (n > 10 and (dummy != '#' and dummy != '\n')):
- raise ValueError("wrong number of values: wanted 10, got " + str(n))
- for i in range(9):
- if v[i] < 0 or v[i] > 65535:
- raise ValueError("value out of range: " + str(v[i]))
- return (ts, [ts, v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7], v[8]])
-
- # Instantiate all layouts, indexed by their name
- named = {}
- for name, obj in inspect.getmembers(sys.modules[__name__]):
- if inspect.isclass(obj) and issubclass(obj, Layout):
- named[name] = obj()
-
- class Parser(object):
- """Object that parses and stores ASCII data for inclusion into the
- database"""
-
- def __init__(self, layout):
- if issubclass(layout.__class__, Layout):
- self.layout = layout
- else:
- try:
- self.layout = named[layout]
- except KeyError:
- raise TypeError("unknown layout")
-
- self.data = []
- self.min_timestamp = None
- self.max_timestamp = None
-
- def parse(self, textdata):
- """
- Parse the data, provided as lines of text, using the current
- layout, into an internal data structure suitable for a
- pytables 'table.append(parser.data)'.
- """
- cdef double last_ts = 0, ts
- cdef int n = 0, i
- cdef char *line
-
- indata = cStringIO.StringIO(textdata)
- # Assume any parsing error is a real error.
- # In the future we might want to skip completely empty lines,
- # or partial lines right before EOF?
- try:
- self.data = []
- for pyline in indata:
- line = pyline
- n += 1
- if line[0] == '\#':
- continue
- (ts, row) = self.layout.parse(line)
- if ts < last_ts:
- raise ValueError("timestamp is not "
- "monotonically increasing")
- last_ts = ts
- self.data.append(row)
- except (ValueError, IndexError, TypeError) as e:
- raise ParserError(n, "error: " + e.message)
-
- # Mark timestamp ranges
- if len(self.data):
- self.min_timestamp = self.data[0][0]
- self.max_timestamp = self.data[-1][0]
-
- class Formatter(object):
- """Object that formats database data into ASCII"""
-
- def __init__(self, layout):
- if issubclass(layout.__class__, Layout):
- self.layout = layout
- else:
- try:
- self.layout = named[layout]
- except KeyError:
- raise TypeError("unknown layout")
-
- def format(self, data):
- """
- Format raw data from the database, using the current layout,
- as lines of ACSII text.
- """
- text = cStringIO.StringIO()
- try:
- for row in data:
- text.write(self.layout.format(row))
- except (ValueError, IndexError, TypeError) as e:
- raise FormatterError("formatting error: " + e.message)
- return text.getvalue()
|