You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

202 lines
6.2 KiB

  1. # cython: profile=False
  2. import time
  3. import sys
  4. import inspect
  5. import cStringIO
  6. cdef enum:
  7. max_value_count = 64
  8. cimport cython
  9. cimport libc.stdlib
  10. cimport libc.stdio
  11. cimport libc.string
  12. class ParserError(Exception):
  13. def __init__(self, line, message):
  14. self.message = "line " + str(line) + ": " + message
  15. Exception.__init__(self, self.message)
  16. class FormatterError(Exception):
  17. pass
  18. class Layout:
  19. """Represents a NILM database layout"""
  20. def __init__(self, typestring):
  21. """Initialize this Layout object to handle the specified
  22. type string"""
  23. try:
  24. [ datatype, count ] = typestring.split("_")
  25. except:
  26. raise KeyError("invalid layout string")
  27. try:
  28. self.count = int(count)
  29. except ValueError:
  30. raise KeyError("invalid count")
  31. if self.count < 1 or self.count > max_value_count:
  32. raise KeyError("invalid count")
  33. if datatype == 'uint16':
  34. self.parse = self.parse_uint16
  35. self.format_str = "%.6f" + " %d" * self.count
  36. self.format = self.format_generic
  37. elif datatype == 'float32':
  38. self.parse = self.parse_float64
  39. self.format_str = "%.6f" + " %.6e" * self.count
  40. self.format = self.format_generic
  41. elif datatype == 'float64':
  42. self.parse = self.parse_float64
  43. self.format_str = "%.6f" + " %.16e" * self.count
  44. self.format = self.format_generic
  45. else:
  46. raise KeyError("invalid type")
  47. self.datatype = datatype
  48. # Parsers
  49. def parse_float64(self, char *text):
  50. cdef int n
  51. cdef double ts
  52. # Return doubles even in float32 case, since they're going into
  53. # a Python array which would upconvert to double anyway.
  54. result = [0] * (self.count + 1)
  55. cdef char *end
  56. ts = libc.stdlib.strtod(text, &end)
  57. if end == text:
  58. raise ValueError("bad timestamp")
  59. result[0] = ts
  60. for n in range(self.count):
  61. text = end
  62. result[n+1] = libc.stdlib.strtod(text, &end)
  63. if end == text:
  64. raise ValueError("wrong number of values")
  65. n = 0
  66. while end[n] == ' ':
  67. n += 1
  68. if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
  69. raise ValueError("extra data on line")
  70. return (ts, result)
  71. def parse_uint16(self, char *text):
  72. cdef int n
  73. cdef double ts
  74. cdef int v
  75. cdef char *end
  76. result = [0] * (self.count + 1)
  77. ts = libc.stdlib.strtod(text, &end)
  78. if end == text:
  79. raise ValueError("bad timestamp")
  80. result[0] = ts
  81. for n in range(self.count):
  82. text = end
  83. v = libc.stdlib.strtol(text, &end, 10)
  84. if v < 0 or v > 65535:
  85. raise ValueError("value out of range")
  86. result[n+1] = v
  87. if end == text:
  88. raise ValueError("wrong number of values")
  89. n = 0
  90. while end[n] == ' ':
  91. n += 1
  92. if end[n] != '\n' and end[n] != '#' and end[n] != '\0':
  93. raise ValueError("extra data on line")
  94. return (ts, result)
  95. # Formatters
  96. def format_generic(self, d):
  97. n = len(d) - 1
  98. if n != self.count:
  99. raise ValueError("wrong number of values for layout type: "
  100. "got %d, wanted %d" % (n, self.count))
  101. return (self.format_str % tuple(d)) + "\n"
  102. # Get a layout by name
  103. def get_named(typestring):
  104. try:
  105. return Layout(typestring)
  106. except KeyError:
  107. compat = { "PrepData": "float32_8",
  108. "RawData": "uint16_6",
  109. "RawNotchedData": "uint16_9" }
  110. return Layout(compat[typestring])
  111. class Parser(object):
  112. """Object that parses and stores ASCII data for inclusion into the
  113. database"""
  114. def __init__(self, layout):
  115. if issubclass(layout.__class__, Layout):
  116. self.layout = layout
  117. else:
  118. try:
  119. self.layout = get_named(layout)
  120. except KeyError:
  121. raise TypeError("unknown layout")
  122. self.data = []
  123. self.min_timestamp = None
  124. self.max_timestamp = None
  125. def parse(self, textdata):
  126. """
  127. Parse the data, provided as lines of text, using the current
  128. layout, into an internal data structure suitable for a
  129. pytables 'table.append(parser.data)'.
  130. """
  131. cdef double last_ts = -1e12, ts
  132. cdef int n = 0, i
  133. cdef char *line
  134. indata = cStringIO.StringIO(textdata)
  135. # Assume any parsing error is a real error.
  136. # In the future we might want to skip completely empty lines,
  137. # or partial lines right before EOF?
  138. try:
  139. self.data = []
  140. for pyline in indata:
  141. line = pyline
  142. n += 1
  143. if line[0] == '\#':
  144. continue
  145. (ts, row) = self.layout.parse(line)
  146. if ts <= last_ts:
  147. raise ValueError("timestamp is not "
  148. "monotonically increasing")
  149. last_ts = ts
  150. self.data.append(row)
  151. except (ValueError, IndexError, TypeError) as e:
  152. raise ParserError(n, "error: " + e.message)
  153. # Mark timestamp ranges
  154. if len(self.data):
  155. self.min_timestamp = self.data[0][0]
  156. self.max_timestamp = self.data[-1][0]
  157. class Formatter(object):
  158. """Object that formats database data into ASCII"""
  159. def __init__(self, layout):
  160. if issubclass(layout.__class__, Layout):
  161. self.layout = layout
  162. else:
  163. try:
  164. self.layout = get_named(layout)
  165. except KeyError:
  166. raise TypeError("unknown layout")
  167. def format(self, data):
  168. """
  169. Format raw data from the database, using the current layout,
  170. as lines of ACSII text.
  171. """
  172. text = cStringIO.StringIO()
  173. try:
  174. for row in data:
  175. text.write(self.layout.format(row))
  176. except (ValueError, IndexError, TypeError) as e:
  177. raise FormatterError("formatting error: " + e.message)
  178. return text.getvalue()