nilmtools/src/decimate.py

#!/usr/bin/python

import nilmtools.filter
import nilmdb.client
import numpy as np
import operator

def main(argv = None):
    f = nilmtools.filter.Filter()
    parser = f.setup_parser("Decimate a stream")
    group = parser.add_argument_group("Decimate options")
    group.add_argument('-f', '--factor', action='store', default=4, type=int,
                       help='Decimation factor (default: %(default)s)')

    # Parse arguments
    try:
        args = f.parse_args(argv)
    except nilmtools.filter.MissingDestination as e:
        # If no destination, suggest how to create it by figuring out
        # a recommended layout.
        src = e.src
        dest = e.dest
        print "Source is %s (%s)" % (src.path, src.layout)
        print "Destination %s doesn't exist" % (dest.path)
        if "decimate_source" in f.client_src.stream_get_metadata(src.path):
            rec = src.layout
        elif 'int32' in src.layout_type or 'float64' in src.layout_type:
            rec = 'float64_' + str(src.layout_count * 3)
        else:
            rec = 'float32_' + str(src.layout_count * 3)
        print "You could make it with a command like:"
        print "  nilmtool -u %s create %s %s" % (e.dest.url,
                                                 e.dest.path, rec)
        raise SystemExit(1)

    if not (args.factor >= 2):
        raise Exception("factor needs to be 2 or more")

    f.check_dest_metadata({ "decimate_source": f.src.path,
                            "decimate_factor": args.factor })

    # If source is decimated, we have to decimate a bit differently
    if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
        again = True
    else:
        again = False
    f.process_numpy(decimate, args = (args.factor, again))

def decimate(data, interval, args, insert_function, final):
    """Decimate data"""
    (factor, again) = args
    (n, m) = data.shape

    # Figure out which columns to use as the source for mean, min, and max,
    # depending on whether this is the first decimation or we're decimating
    # again.  Note that we include the timestamp in the means.
    if again:
        c = (m - 1) // 3
        # e.g. c = 3
        # ts mean1 mean2 mean3 min1 min2 min3 max1 max2 max3
        mean_col = slice(0, c + 1)
        min_col = slice(c + 1, 2 * c + 1)
        max_col = slice(2 * c + 1, 3 * c + 1)
    else:
        mean_col = slice(0, m)
        min_col = slice(1, m)
        max_col = slice(1, m)

    # Discard extra rows that aren't a multiple of factor
    n = n // factor * factor
    data = data[:n,:]

    # Reshape it into 3D so we can process 'factor' rows at a time
    data = data.reshape(n // factor, factor, m)

    # Fill the result
    out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
                 np.min(data[:,:,min_col], axis=1),
                 np.max(data[:,:,max_col], axis=1) ]

    insert_function(out)
    return n

if __name__ == "__main__":
    main()