You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

138 lines
4.7 KiB

  1. # Python implementation of the "rocket" data parsing interface.
  2. # This interface translates between the binary format on disk
  3. # and the ASCII format used when communicating with clients.
  4. # This is slow! Use the C version instead.
  5. from __future__ import absolute_import
  6. import struct
  7. import cStringIO
  8. import itertools
  9. from . import layout as _layout
  10. import nilmdb.utils
  11. from nilmdb.utils.time import float_time_to_string as ftts
  12. ERR_UNKNOWN = 0
  13. ERR_NON_MONOTONIC = 1
  14. ERR_OUT_OF_INTERVAL = 2
  15. class ParseError(Exception):
  16. pass
  17. @nilmdb.utils.must_close(wrap_verify = False)
  18. class Rocket(object):
  19. def __init__(self, layout, filename):
  20. self.layout = layout
  21. if filename:
  22. self.file = open(filename, "a+b")
  23. else:
  24. self.file = None
  25. # For packing/unpacking into a binary file.
  26. # This will change in the C version
  27. try:
  28. (self.ltype, lcount) = layout.split('_', 2)
  29. self.lcount = int(lcount)
  30. except:
  31. raise ValueError("no such layout: badly formatted string")
  32. if self.lcount < 1:
  33. raise ValueError("no such layout: bad count")
  34. try:
  35. struct_fmt = '<d' # Little endian, double timestamp
  36. struct_mapping = {
  37. "int8": 'b',
  38. "uint8": 'B',
  39. "int16": 'h',
  40. "uint16": 'H',
  41. "int32": 'i',
  42. "uint32": 'I',
  43. "int64": 'q',
  44. "uint64": 'Q',
  45. "float32": 'f',
  46. "float64": 'd',
  47. }
  48. struct_fmt += struct_mapping[self.ltype] * self.lcount
  49. except KeyError:
  50. raise ValueError("no such layout: bad data type")
  51. self.packer = struct.Struct(struct_fmt)
  52. # For packing/unpacking from strings.
  53. self.layoutparser = _layout.Layout(self.layout)
  54. self.formatter = _layout.Formatter(self.layout)
  55. def close(self):
  56. if self.file:
  57. self.file.close()
  58. @property
  59. def binary_size(self):
  60. """Return size of one row of data in the binary file, in bytes"""
  61. return self.packer.size
  62. def append_iter(self, maxrows, data):
  63. """Append the list data to the file"""
  64. # We assume the file is opened in append mode,
  65. # so all writes go to the end.
  66. written = 0
  67. for row in itertools.islice(data, maxrows):
  68. self.file.write(self.packer.pack(*row))
  69. written += 1
  70. self.file.flush()
  71. return written
  72. def append_string(self, count, data, data_offset, linenum,
  73. start, end, last_timestamp):
  74. """Parse string and append data.
  75. count: maximum number of rows to add
  76. data: string data
  77. data_offset: byte offset into data to start parsing
  78. linenum: current line number of data
  79. start: starting timestamp for interval
  80. end: end timestamp for interval
  81. last_timestamp: last timestamp that was previously parsed
  82. Raises ParseError if timestamps are non-monotonic, outside the
  83. start/end interval, etc.
  84. On success, return a tuple with three values:
  85. added_rows: how many rows were added from the file
  86. data_offset: current offset into the data string
  87. last_timestamp: last timestamp we parsed
  88. """
  89. # Parse the input data
  90. indata = cStringIO.StringIO(data)
  91. indata.seek(data_offset)
  92. written = 0
  93. while written < count:
  94. line = indata.readline()
  95. linenum += 1
  96. if line == "":
  97. break
  98. try:
  99. (ts, row) = self.layoutparser.parse(line)
  100. except ValueError as e:
  101. raise ParseError(linenum, ERR_UNKNOWN, e)
  102. if ts <= last_timestamp:
  103. raise ParseError(linenum, ERR_NON_MONOTONIC, ts)
  104. last_timestamp = ts
  105. if ts < start or ts >= end:
  106. raise ParseError(linenum, ERR_OUT_OF_INTERVAL, ts)
  107. self.append_iter(1, [row])
  108. written += 1
  109. return (written, indata.tell(), last_timestamp, linenum)
  110. def extract_list(self, offset, count):
  111. """Extract count rows of data from the file at offset offset.
  112. Return a list of lists [[row],[row],...]"""
  113. ret = []
  114. self.file.seek(offset)
  115. for i in xrange(count):
  116. data = self.file.read(self.binary_size)
  117. ret.append(list(self.packer.unpack(data)))
  118. return ret
  119. def extract_string(self, offset, count):
  120. """Extract count rows of data from the file at offset offset.
  121. Return an ascii formatted string according to the layout"""
  122. return self.formatter.format(self.extract_list(offset, count))