Update decimate to use process_numpy

11 years ago · 60f09427cf
--- a/+ 3
+++ b/+ 3
@@ -9,6 +9,9 @@ else
 endif

 test:
 	src/decimate.py

 test_insert:
 	@make install >/dev/null
 	src/insert.py --file --dry-run  /test/foo </dev/null

--- a/src/decimate.py
+++ b/src/decimate.py
@@ -41,41 +41,45 @@ def main(argv = None):

    # If source is decimated, we have to decimate a bit differently
    if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
        n = f.src.layout_count // 3
        f.process_python(function = decimate_again, rows = args.factor,
                         args = (n,))
        again = True
    else:
        n = f.src.layout_count
        f.process_python(function = decimate_first, rows = args.factor,
                         args = (n,))
        again = False
    f.process_numpy(decimate, args = (args.factor, again))

 def decimate_first(data, n):
    """Decimate original data -- result has 3 times as many columns"""
    # For this simple calculation, converting to a Numpy array
    # and doing the math is slower than just doing it directly.
    rows = iter(data)
    r_sum = r_min = r_max = rows.next()
    for row in rows:
        r_sum = map(operator.add, r_sum, row)
        r_min = map(min, r_min, row)
        r_max = map(max, r_max, row)
    r_mean = [ x / len(data) for x in r_sum ]
    return [ [ r_mean[0] ] + r_mean[1:] + r_min[1:] + r_max[1:] ]
 def decimate(data, interval, args, insert_function, final):
    """Decimate data"""
    (factor, again) = args
    (n, m) = data.shape

 def decimate_again(data, n):
    """Decimate already-decimated data -- result has the same number
    of columns"""
    rows = iter(data)
    r = rows.next()
    r_sum = r[0:(n+1)]
    r_min = r[(n+1):(2*n+1)]
    r_max = r[(2*n+1):(3*n+1)]
    for r in rows:
        r_sum = map(operator.add, r_sum, r[0:(n+1)])
        r_min = map(min, r_min, r[(n+1):(2*n+1)])
        r_max = map(max, r_max, r[(2*n+1):(3*n+1)])
    r_mean = [ x / len(data) for x in r_sum ]
    return [ r_mean + r_min + r_max ]
    # Figure out which columns to use as the source for mean, min, and max,
    # depending on whether this is the first decimation or we're decimating
    # again.  Note that we include the timestamp in the means.
    if again:
        c = (m - 1) // 3
        # e.g. c = 3
        # ts mean1 mean2 mean3 min1 min2 min3 max1 max2 max3
        mean_col = slice(0, c + 1)
        min_col = slice(c + 1, 2 * c + 1)
        max_col = slice(2 * c + 1, 3 * c + 1)
    else:
        mean_col = slice(0, m)
        min_col = slice(1, m)
        max_col = slice(1, m)

    # Discard extra rows that aren't a multiple of factor
    n = n // factor * factor
    data = data[:n,:]

    # Reshape it into 3D so we can process 'factor' rows at a time
    data.shape = (n // factor, factor, m)

    # Fill the result
    out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
                 np.min(data[:,:,min_col], axis=1),
                 np.max(data[:,:,max_col], axis=1) ]

    insert_function(out)
    return n

 if __name__ == "__main__":
    main()