|
- # cython: profile=False
-
- import time
- import sys
- import inspect
- import cStringIO
- import numpy as np
-
- cdef enum:
- max_value_count = 64
-
- cimport cython
- cimport libc.stdlib
- cimport libc.stdio
- cimport libc.string
-
- class ParserError(Exception):
- def __init__(self, line, message):
- self.message = "line " + str(line) + ": " + message
- Exception.__init__(self, self.message)
-
- class FormatterError(Exception):
- pass
-
- class Layout:
- """Represents a NILM database layout"""
-
- def __init__(self, typestring):
- """Initialize this Layout object to handle the specified
- type string"""
- try:
- [ datatype, count ] = typestring.split("_")
- except:
- raise KeyError("invalid layout string")
-
- try:
- self.count = int(count)
- except ValueError:
- raise KeyError("invalid count")
- if self.count < 1 or self.count > max_value_count:
- raise KeyError("invalid count")
-
- if datatype == 'uint16':
- self.parse = self.parse_uint16
- self.format = self.format_uint16
- elif datatype == 'float32' or datatype == 'float64':
- self.parse = self.parse_float64
- self.format = self.format_float64
- else:
- raise KeyError("invalid type")
-
- self.datatype = datatype
-
- # Parsers
- def parse_float64(self, char *text):
- cdef int n
- cdef double ts
- # Return doubles even in float32 case, since they're going into
- # a Python array which would upconvert to double anyway.
- result = []
- cdef char *end
- ts = libc.stdlib.strtod(text, &end)
- if end == text:
- raise ValueError("bad timestamp")
- result.append(ts)
- for n in range(self.count):
- text = end
- result.append(libc.stdlib.strtod(text, &end))
- if end == text:
- raise ValueError("wrong number of values")
- n = 0
- while end[n] == ' ':
- n += 1
- if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
- raise ValueError("extra data on line")
- return (ts, result)
-
- def parse_uint16(self, char *text):
- cdef int n
- cdef double ts
- cdef int v
- result = []
- cdef char *end
- ts = libc.stdlib.strtod(text, &end)
- if end == text:
- raise ValueError("bad timestamp")
- result.append(ts)
- for n in range(self.count):
- text = end
- v = libc.stdlib.strtol(text, &end, 10)
- if v < 0 or v > 65535:
- raise ValueError("value out of range")
- result.append(v)
- if end == text:
- raise ValueError("wrong number of values")
- n = 0
- while end[n] == ' ':
- n += 1
- if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
- raise ValueError("extra data on line")
- return (ts, result)
-
- # Formatters
- def format_float64(self, d):
- n = len(d) - 1
- if n != self.count:
- raise ValueError("wrong number of values for layout type: "
- "got %d, wanted %d" % (n, self.count))
- s = "%.6f" % d[0]
- for i in range(n):
- s += " %f" % d[i+1]
- return s + "\n"
-
- def format_uint16(self, d):
- n = len(d) - 1
- if n != self.count:
- raise ValueError("wrong number of values for layout type: "
- "got %d, wanted %d" % (n, self.count))
- s = "%.6f" % d[0]
- for i in range(n):
- s += " %d" % d[i+1]
- return s + "\n"
-
- # Get a layout by name
- def get_named(typestring):
- try:
- return Layout(typestring)
- except KeyError:
- compat = { "PrepData": "float32_8",
- "RawData": "uint16_6",
- "RawNotchedData": "uint16_9" }
- return Layout(compat[typestring])
-
- class Parser(object):
- """Object that parses and stores ASCII data for inclusion into the
- database"""
-
- def __init__(self, layout):
- if issubclass(layout.__class__, Layout):
- self.layout = layout
- else:
- try:
- self.layout = get_named(layout)
- except KeyError:
- raise TypeError("unknown layout")
-
- self.data = []
- self.min_timestamp = None
- self.max_timestamp = None
-
- def parse(self, textdata):
- """
- Parse the data, provided as lines of text, using the current
- layout, into an internal data structure suitable for a
- pytables 'table.append(parser.data)'.
- """
- cdef double last_ts = 0, ts
- cdef int n = 0, i
- cdef char *line
-
- indata = cStringIO.StringIO(textdata)
- # Assume any parsing error is a real error.
- # In the future we might want to skip completely empty lines,
- # or partial lines right before EOF?
- try:
- self.data = []
- for pyline in indata:
- line = pyline
- n += 1
- if line[0] == '\#':
- continue
- (ts, row) = self.layout.parse(line)
- if ts <= last_ts:
- raise ValueError("timestamp is not "
- "monotonically increasing")
- last_ts = ts
- self.data.append(row)
- except (ValueError, IndexError, TypeError) as e:
- raise ParserError(n, "error: " + e.message)
-
- # Mark timestamp ranges
- if len(self.data):
- self.min_timestamp = self.data[0][0]
- self.max_timestamp = self.data[-1][0]
-
- class Formatter(object):
- """Object that formats database data into ASCII"""
-
- def __init__(self, layout):
- if issubclass(layout.__class__, Layout):
- self.layout = layout
- else:
- try:
- self.layout = get_named(layout)
- except KeyError:
- raise TypeError("unknown layout")
-
- def format(self, data):
- """
- Format raw data from the database, using the current layout,
- as lines of ACSII text.
- """
- text = cStringIO.StringIO()
- try:
- for row in data:
- text.write(self.layout.format(row))
- except (ValueError, IndexError, TypeError) as e:
- raise FormatterError("formatting error: " + e.message)
- return text.getvalue()
|