Browse Source

Decimate seems to work pretty well right now

tags/nilmtools-0.2^0
Jim Paris 8 years ago
parent
commit
54f8c34f8e
3 changed files with 141 additions and 49 deletions
  1. +4
    -1
      Makefile
  2. +40
    -32
      nilmtools/decimate.py
  3. +97
    -16
      nilmtools/filter.py

+ 4
- 1
Makefile View File

@@ -1,5 +1,8 @@
test:
python nilmtools/decimate.py /lees-compressor/noleak/raw /lees-compressor/noleak/raw~4
nilmtool remove /lees-compressor/noleak/raw~4 -s 2000 -e 2020
nilmtool remove /lees-compressor/noleak/raw~16 -s 2000 -e 2020
python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/noleak/raw /lees-compressor/noleak/raw~4
python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/noleak/raw~4 /lees-compressor/noleak/raw~16

all:
@echo "Try 'make install'"


+ 40
- 32
nilmtools/decimate.py View File

@@ -2,9 +2,7 @@

import nilmtools.filter
import nilmdb.client

def DecimateException(Exception):
pass
import numpy as np

def main():
f = nilmtools.filter.Filter()
@@ -16,10 +14,10 @@ def main():
try:
args = f.parse_args()
except nilmtools.filter.MissingDestination as e:
# If no destination, suggest how to create it.
# If no destination, suggest how to create it by figuring out
# a recommended layout.
print "Source is %s (%s)" % (e.src, e.layout)
print "Destination %s doesn't exist" % (e.dest)
# Figure out a recommended layout
if "decimate_source" in f.client.stream_get_metadata(e.src):
rec = e.layout
elif 'int32' in e.layout_type or 'float64' in e.layout_type:
@@ -30,33 +28,43 @@ def main():
print " nilmtool create", e.dest, rec
raise SystemExit(1)

# See if the metadata jives, and complain if it doesn't
dest_metadata = f.client.stream_get_metadata(args.destpath)
try:
rows = f.destinfo[4] # don't complain unless there's data
tmp = dest_metadata.get("decimate_source", args.srcpath)
if tmp != args.srcpath and rows > 0:
raise DecimateException("storing decimated data from %s" % tmp)
tmp = int(dest_metadata.get("decimate_factor", args.factor))
if tmp != args.factor and rows > 0:
raise DecimateException("storing data decimated at "
"a different factor (%d)", tmp)
except DecimateException as e:
print "The destination seems to already be " + str(e)
print "Refusing to change it. You can change the stream's"
print "decimate_* metadata, or remove all data in the stream,"
print "to prevent this error."

# Fill in the metadata in case it's missing
f.client.stream_update_metadata(args.destpath,
{ "decimate_source": args.srcpath,
"decimate_factor": args.factor })

# Process it
f.process(maxlen = 600, function = decimate, args = (factor,))

def decimate(data, start, end, factor):
pass
f.check_dest_metadata({ "decimate_source": args.srcpath,
"decimate_factor": args.factor })

# If source is decimated, we have to decimate a bit differently
if "decimate_source" in f.client.stream_get_metadata(args.srcpath):
f.process(function = decimate_again, rows = args.factor)
else:
f.process(function = decimate_first, rows = args.factor)

def decimate_first(data):
"""Decimate original data -- result has 3 times as many columns"""
data = np.array(data)
rows, cols = data.shape
n = cols - 1
out = np.zeros(1 + 3 * n)

out[0] = np.mean(data[:, 0], 0)
out[ 1 : n+1 ] = np.mean(data[:, 1 : n+1], 0)
out[ n+1 : 2*n+1] = np.min( data[:, 1 : n+1], 0)
out[2*n+1 : 3*n+1] = np.max( data[:, 1 : n+1], 0)

return [out]

def decimate_again(data):
"""Decimate already-decimated data -- result has the same number
of columns"""
data = np.array(data)
rows, cols = data.shape
n = (cols - 1) // 3
out = np.zeros(1 + 3 * n)

out[0] = np.mean(data[:, 0], 0)
out[ 1 : n+1 ] = np.mean(data[:, 1 : n+1], 0)
out[ n+1 : 2*n+1] = np.min( data[:, n+1 : 2*n+1], 0)
out[2*n+1 : 3*n+1] = np.max( data[:, 2*n+1 : 3*n+1], 0)

return [out]

if __name__ == "__main__":
main()

+ 97
- 16
nilmtools/filter.py View File

@@ -5,6 +5,8 @@ from nilmdb.utils.printf import *
from nilmdb.utils.time import parse_time, format_time

import nilmtools

import itertools
import time
import sys
import re
@@ -48,6 +50,14 @@ class Filter(object):
default = False,
help="Just print intervals that would be "
"processed")
group.add_argument("-s", "--start",
metavar="TIME", type=self.arg_time,
help="Starting timestamp for intervals "
"(free-form, inclusive)")
group.add_argument("-e", "--end",
metavar="TIME", type=self.arg_time,
help="Ending timestamp for intervals "
"(free-form, noninclusive)")
group.add_argument("srcpath", action="store",
help="Path of source stream, e.g. /foo/bar")
group.add_argument("destpath", action="store",
@@ -89,11 +99,20 @@ class Filter(object):
"""Generate all the intervals that this filter should process"""
self._using_client = True
for i in self._client.stream_intervals(
self._args.srcpath, diffpath = self._args.destpath):
self._args.srcpath, diffpath = self._args.destpath,
start = self._args.start, end = self._args.end):
yield i
self._using_client = False

# Misc helpers
def arg_time(self, toparse):
"""Parse a time string argument"""
try:
return nilmdb.utils.time.parse_time(toparse).totimestamp()
except ValueError as e:
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
str(e), toparse))

def stream_info_string(self, info):
"""Print stream info as a string"""
return sprintf("%s (%s), %.2fM rows, %.2f hours",
@@ -104,27 +123,89 @@ class Filter(object):
return sprintf("[ %s -> %s ]", format_time(interval[0]),
format_time(interval[1]))

def check_dest_metadata(self, data):
"""See if the metadata jives, and complain if it doesn't. If
there's no conflict, update the metadata to match 'data'."""
metadata = self._client.stream_get_metadata(self._args.destpath)
rows = self.destinfo[4]
for key in data:
wanted = str(data[key])
val = metadata.get(key, wanted)
if val != wanted and rows > 0:
m = "Metadata in destination stream:\n"
m += " %s = %s\n" % (key, val)
m += "doesn't match desired data:\n"
m += " %s = %s\n" % (key, wanted)
m += "Refusing to change it. You can change the stream's "
m += "metadata manually, or\n"
m += "remove existing data from the stream, to prevent "
m += "this error.\n"
raise Exception(m)
# All good -- write the metadata in case it's not already there
self._client.stream_update_metadata(self._args.destpath, data)

# Main processing helper
def process(self, function, maxlen, args):
"""Process data in chunks.
def process(self, function, rows, partial = True, args = None):
"""Process data in chunks of 'rows' data at a time.

function: function to process the data
maxlen: maximum length of data to pass to function, in seconds
args: tuple containing extra arguments to pass to function
rows: maximum number of rows to pass to 'function' at once
args: tuple containing extra arguments to pass to 'function'
partial: if true, less than 'rows' may be passed to 'function'.
if false, partial data at the end of an interval will
be dropped.

'function' should be defined like:
function(data, start, end, *args)
It will be passed a block of data from the source stream,
the start and end times of that block, and any arguments
that were passed to process in 'args'. The total
length of the interval will be at most 'maxlen' seconds.

'function' should transform the data as desired, and return
a new list of data, which will be inserted into the
destination stream."""
function(data, *args)
It will be passed an array containing up to 'rows' rows of
data from the source stream, and any arguments passed in
'args'. It should transform the data as desired, and return a
new array of data, which will be inserted into the destination
stream.
"""
if args is None:
args = []
extractor = nilmdb.client.Client(self._args.url).stream_extract
inserter = nilmdb.client.Client(self._args.url).stream_insert_context
src = self._args.srcpath
dest = self._args.destpath
islice = itertools.islice

# Figure out how to format output data
dest_layout = self.destinfo[1].split('_')[1]
def int_formatter(row):
return ("%.6f " % row[0]) + " ".join(str(int(x)) for x in row[1:])
def float_formatter(row):
return ("%.6f " % row[0]) + " ".join(repr(x) for x in row[1:])
if "int" in dest_layout:
formatter = int_formatter
else:
formatter = float_formatter

for (start, end) in self.intervals():
if (end - start)
return
print "Processing", self.interval_string((start, end))
with inserter(dest, start, end) as insert_ctx:
src_array = []
for line in extractor(src, start, end):
# Read in data
src_array.append([ float(x) for x in line.split() ])

if len(src_array) == rows:
# Pass through filter function
dest_array = function(src_array, *args)

# Write result to destination
out = [ formatter(row) for row in dest_array ]
insert_ctx.insert("\n".join(out) + "\n")

# Clear source array
src_array = []

# Take care of partial chunk
if len(src_array) and partial:
dest_array = function(src_array, *args)
out = [ formatter(row) for row in dest_array ]
insert_ctx.insert("\n".join(out) + "\n")

def main():
# This is just a dummy function; actual filters can use the other


Loading…
Cancel
Save