|
- # -*- coding: utf-8 -*-
-
- """Provide a NumpyClient class that is based on normal Client, but has
- additional methods for extracting and inserting data via Numpy arrays."""
-
- import nilmdb.utils
- import nilmdb.client.client
- import nilmdb.client.httpclient
- from nilmdb.client.errors import ClientError
-
- import contextlib
- from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
-
- import numpy
- import cStringIO
-
- def layout_to_dtype(layout):
- ltype = layout.split('_')[0]
- lcount = int(layout.split('_')[1])
- if ltype.startswith('int'):
- atype = '<i' + str(int(ltype[3:]) / 8)
- elif ltype.startswith('uint'):
- atype = '<u' + str(int(ltype[4:]) / 8)
- elif ltype.startswith('float'):
- atype = '<f' + str(int(ltype[5:]) / 8)
- else:
- raise ValueError("bad layout")
- return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])
-
- class NumpyClient(nilmdb.client.client.Client):
- """Subclass of nilmdb.client.Client that adds additional methods for
- extracting and inserting data via Numpy arrays."""
-
- def stream_extract_numpy(self, path, start = None, end = None,
- layout = None, maxrows = 100000,
- structured = False):
- """
- Extract data from a stream. Returns a generator that yields
- Numpy arrays of up to 'maxrows' of data each.
-
- If 'layout' is None, it is read using stream_info.
-
- If 'structured' is False, all data is converted to float64
- and returned in a flat 2D array. Otherwise, data is returned
- as a structured dtype in a 1D array.
- """
- if layout is None:
- streams = self.stream_list(path)
- if len(streams) != 1:
- raise ClientError("can't get layout for path: " + path)
- layout = streams[0][1]
- dtype = layout_to_dtype(layout)
-
- def to_numpy(data):
- a = numpy.fromstring(data, dtype)
- if structured:
- return a
- return numpy.c_[a['timestamp'], a['data']]
-
- chunks = []
- total_len = 0
- maxsize = dtype.itemsize * maxrows
- for data in self.stream_extract(path, start, end, binary = True):
- # Add this block of binary data
- chunks.append(data)
- total_len += len(data)
-
- # See if we have enough to make the requested Numpy array
- while total_len >= maxsize:
- assembled = "".join(chunks)
- total_len -= maxsize
- chunks = [ assembled[maxsize:] ]
- block = assembled[:maxsize]
- yield to_numpy(block)
-
- if total_len:
- yield to_numpy("".join(chunks))
|