nilm
/
nilmdb


			
				
					
						
						
							
							# -*- coding: utf-8 -*-

"""Provide a NumpyClient class that is based on normal Client, but has
additional methods for extracting and inserting data via Numpy arrays."""

import nilmdb.utils
import nilmdb.client.client
import nilmdb.client.httpclient
from nilmdb.client.errors import ClientError

import contextlib
from nilmdb.utils.time import timestamp_to_string, string_to_timestamp

import numpy
import cStringIO

def layout_to_dtype(layout):
    ltype = layout.split('_')[0]
    lcount = int(layout.split('_')[1])
    if ltype.startswith('int'):
        atype = '<i' + str(int(ltype[3:]) / 8)
    elif ltype.startswith('uint'):
        atype = '<u' + str(int(ltype[4:]) / 8)
    elif ltype.startswith('float'):
        atype = '<f' + str(int(ltype[5:]) / 8)
    else:
        raise ValueError("bad layout")
    return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])

class NumpyClient(nilmdb.client.client.Client):
    """Subclass of nilmdb.client.Client that adds additional methods for
    extracting and inserting data via Numpy arrays."""

    def stream_extract_numpy(self, path, start = None, end = None,
                             layout = None, maxrows = 100000,
                             structured = False):
        """
        Extract data from a stream.  Returns a generator that yields
        Numpy arrays of up to 'maxrows' of data each.

        If 'layout' is None, it is read using stream_info.

        If 'structured' is False, all data is converted to float64
        and returned in a flat 2D array.  Otherwise, data is returned
        as a structured dtype in a 1D array.
        """
        if layout is None:
            streams = self.stream_list(path)
            if len(streams) != 1:
                raise ClientError("can't get layout for path: " + path)
            layout = streams[0][1]
        dtype = layout_to_dtype(layout)

        def to_numpy(data):
            a = numpy.fromstring(data, dtype)
            if structured:
                return a
            return numpy.c_[a['timestamp'], a['data']]

        chunks = []
        total_len = 0
        maxsize = dtype.itemsize * maxrows
        for data in self.stream_extract(path, start, end, binary = True):
            # Add this block of binary data
            chunks.append(data)
            total_len += len(data)

            # See if we have enough to make the requested Numpy array
            while total_len >= maxsize:
                assembled = "".join(chunks)
                total_len -= maxsize
                chunks = [ assembled[maxsize:] ]
                block = assembled[:maxsize]
                yield to_numpy(block)

        if total_len:
            yield to_numpy("".join(chunks))