You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

78 lines
2.7 KiB

  1. # -*- coding: utf-8 -*-
  2. """Provide a NumpyClient class that is based on normal Client, but has
  3. additional methods for extracting and inserting data via Numpy arrays."""
  4. import nilmdb.utils
  5. import nilmdb.client.client
  6. import nilmdb.client.httpclient
  7. from nilmdb.client.errors import ClientError
  8. import contextlib
  9. from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
  10. import numpy
  11. import cStringIO
  12. def layout_to_dtype(layout):
  13. ltype = layout.split('_')[0]
  14. lcount = int(layout.split('_')[1])
  15. if ltype.startswith('int'):
  16. atype = '<i' + str(int(ltype[3:]) / 8)
  17. elif ltype.startswith('uint'):
  18. atype = '<u' + str(int(ltype[4:]) / 8)
  19. elif ltype.startswith('float'):
  20. atype = '<f' + str(int(ltype[5:]) / 8)
  21. else:
  22. raise ValueError("bad layout")
  23. return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])
  24. class NumpyClient(nilmdb.client.client.Client):
  25. """Subclass of nilmdb.client.Client that adds additional methods for
  26. extracting and inserting data via Numpy arrays."""
  27. def stream_extract_numpy(self, path, start = None, end = None,
  28. layout = None, maxrows = 100000,
  29. structured = False):
  30. """
  31. Extract data from a stream. Returns a generator that yields
  32. Numpy arrays of up to 'maxrows' of data each.
  33. If 'layout' is None, it is read using stream_info.
  34. If 'structured' is False, all data is converted to float64
  35. and returned in a flat 2D array. Otherwise, data is returned
  36. as a structured dtype in a 1D array.
  37. """
  38. if layout is None:
  39. streams = self.stream_list(path)
  40. if len(streams) != 1:
  41. raise ClientError("can't get layout for path: " + path)
  42. layout = streams[0][1]
  43. dtype = layout_to_dtype(layout)
  44. def to_numpy(data):
  45. a = numpy.fromstring(data, dtype)
  46. if structured:
  47. return a
  48. return numpy.c_[a['timestamp'], a['data']]
  49. chunks = []
  50. total_len = 0
  51. maxsize = dtype.itemsize * maxrows
  52. for data in self.stream_extract(path, start, end, binary = True):
  53. # Add this block of binary data
  54. chunks.append(data)
  55. total_len += len(data)
  56. # See if we have enough to make the requested Numpy array
  57. while total_len >= maxsize:
  58. assembled = "".join(chunks)
  59. total_len -= maxsize
  60. chunks = [ assembled[maxsize:] ]
  61. block = assembled[:maxsize]
  62. yield to_numpy(block)
  63. if total_len:
  64. yield to_numpy("".join(chunks))