Compare commits
35 Commits
nilmtools-
...
nilmtools-
Author | SHA1 | Date | |
---|---|---|---|
25c35a56f6 | |||
d610deaef0 | |||
d7d5ccc9a7 | |||
f28753ff5c | |||
c9c2e0d5a8 | |||
5a2a32bec5 | |||
706c3933f9 | |||
cfd1719152 | |||
c62fb45980 | |||
57d856f2fa | |||
5d83d93019 | |||
5f847a0513 | |||
29cd7eb6c7 | |||
62c8af41ea | |||
4f6bc48619 | |||
cf9eb0ed48 | |||
32066fc260 | |||
739da3f973 | |||
83ad18ebf6 | |||
c76d527f95 | |||
b8a73278e7 | |||
ce0691d6c4 | |||
4da658e960 | |||
8ab31eafc2 | |||
979ab13bff | |||
f4fda837ae | |||
5547d266d0 | |||
372e977e4a | |||
640a680704 | |||
2e74e6cd63 | |||
de2a794e00 | |||
065a40f265 | |||
65fa43aff1 | |||
57c23c3792 | |||
d4c8e4acb4 |
36
Makefile
36
Makefile
@@ -8,22 +8,30 @@ else
|
||||
@echo "Try 'make install'"
|
||||
endif
|
||||
|
||||
test: test_cleanup
|
||||
test: test_trainola
|
||||
|
||||
test_trainola:
|
||||
-nilmtool -u http://bucket/nilmdb remove -s min -e max \
|
||||
/sharon/prep-a-matches
|
||||
nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"
|
||||
|
||||
test_cleanup:
|
||||
src/cleanup.py -e extras/cleanup.cfg
|
||||
src/cleanup.py extras/cleanup.cfg
|
||||
nilmtools/cleanup.py -e extras/cleanup.cfg
|
||||
nilmtools/cleanup.py extras/cleanup.cfg
|
||||
|
||||
test_insert:
|
||||
@make install >/dev/null
|
||||
src/insert.py --file --dry-run /test/foo </dev/null
|
||||
nilmtools/insert.py --file --dry-run /test/foo </dev/null
|
||||
|
||||
test_copy:
|
||||
@make install >/dev/null
|
||||
src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
|
||||
nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
|
||||
|
||||
test_prep:
|
||||
@make install >/dev/null
|
||||
/tmp/raw.dat:
|
||||
octave --eval 'fs = 8000;' \
|
||||
--eval 't = (0:fs*10)*2*pi*60/fs;' \
|
||||
--eval 'raw = transpose([sin(t); 0.3*sin(3*t)+sin(t)]);' \
|
||||
--eval 'save("-ascii","/tmp/raw.dat","raw");'
|
||||
|
||||
test_prep: /tmp/raw.dat
|
||||
-nilmtool destroy -R /test/raw
|
||||
-nilmtool destroy -R /test/sinefit
|
||||
-nilmtool destroy -R /test/prep
|
||||
@@ -31,8 +39,8 @@ test_prep:
|
||||
nilmtool create /test/sinefit float32_3
|
||||
nilmtool create /test/prep float32_8
|
||||
nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
|
||||
src/sinefit.py -c 1 /test/raw /test/sinefit
|
||||
src/prep.py -c 2 /test/raw /test/sinefit /test/prep
|
||||
nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
|
||||
nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
|
||||
nilmtool extract -s min -e max /test/prep | head -20
|
||||
|
||||
test_decimate:
|
||||
@@ -40,8 +48,8 @@ test_decimate:
|
||||
-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
|
||||
-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
|
||||
-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
|
||||
time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
|
||||
python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
|
||||
time python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
|
||||
python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
|
||||
|
||||
version:
|
||||
python setup.py version
|
||||
@@ -63,4 +71,4 @@ clean::
|
||||
gitclean::
|
||||
git clean -dXf
|
||||
|
||||
.PHONY: all version dist sdist install clean gitclean
|
||||
.PHONY: all version dist sdist install clean gitclean test
|
||||
|
@@ -5,10 +5,10 @@ by Jim Paris <jim@jtan.com>
|
||||
Prerequisites:
|
||||
|
||||
# Runtime and build environments
|
||||
sudo apt-get install python2.7 python2.7-dev python-setuptools
|
||||
sudo apt-get install python-numpy python-scipy python-matplotlib
|
||||
sudo apt-get install python2.7 python2.7-dev python-setuptools python-pip
|
||||
sudo apt-get install python-numpy python-scipy
|
||||
|
||||
nilmdb (1.5.0+)
|
||||
nilmdb (1.8.1+)
|
||||
|
||||
Install:
|
||||
|
||||
|
31
extras/trainola-test-param.js
Normal file
31
extras/trainola-test-param.js
Normal file
@@ -0,0 +1,31 @@
|
||||
{ "url": "http://bucket.mit.edu/nilmdb",
|
||||
"dest_stream": "/sharon/prep-a-matches",
|
||||
"stream": "/sharon/prep-a",
|
||||
"start": 1366111383280463,
|
||||
"end": 1366126163457797,
|
||||
"columns": [ { "name": "P1", "index": 0 },
|
||||
{ "name": "Q1", "index": 1 },
|
||||
{ "name": "P3", "index": 2 } ],
|
||||
"exemplars": [
|
||||
{ "name": "Boiler Pump ON",
|
||||
"url": "http://bucket.mit.edu/nilmdb",
|
||||
"stream": "/sharon/prep-a",
|
||||
"start": 1366260494269078,
|
||||
"end": 1366260608185031,
|
||||
"dest_column": 0,
|
||||
"columns": [ { "name": "P1", "index": 0 },
|
||||
{ "name": "Q1", "index": 1 }
|
||||
]
|
||||
},
|
||||
{ "name": "Boiler Pump OFF",
|
||||
"url": "http://bucket.mit.edu/nilmdb",
|
||||
"stream": "/sharon/prep-a",
|
||||
"start": 1366260864215764,
|
||||
"end": 1366260870882998,
|
||||
"dest_column": 1,
|
||||
"columns": [ { "name": "P1", "index": 0 },
|
||||
{ "name": "Q1", "index": 1 }
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
|
||||
|
||||
tag_prefix = "nilmtools-"
|
||||
parentdir_prefix = "nilmtools-"
|
||||
versionfile_source = "src/_version.py"
|
||||
versionfile_source = "nilmtools/_version.py"
|
||||
|
||||
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
|
||||
variables = { "refnames": git_refnames, "full": git_full }
|
@@ -238,12 +238,15 @@ def main(argv = None):
|
||||
timestamp_to_seconds(total)))
|
||||
continue
|
||||
printf(" removing data before %s\n", timestamp_to_human(remove_before))
|
||||
if args.yes:
|
||||
client.stream_remove(path, None, remove_before)
|
||||
for ap in streams[path].also_clean_paths:
|
||||
printf(" also removing from %s\n", ap)
|
||||
# Clean in reverse order. Since we only use the primary stream and not
|
||||
# the decimated streams to figure out which data to remove, removing
|
||||
# the primary stream last means that we might recover more nicely if
|
||||
# we are interrupted and restarted.
|
||||
clean_paths = list(reversed(streams[path].also_clean_paths)) + [ path ]
|
||||
for p in clean_paths:
|
||||
printf(" removing from %s\n", p)
|
||||
if args.yes:
|
||||
client.stream_remove(ap, None, remove_before)
|
||||
client.stream_remove(p, None, remove_before)
|
||||
|
||||
# All done
|
||||
if not args.yes:
|
@@ -4,15 +4,19 @@ import nilmtools.filter
|
||||
import nilmtools.decimate
|
||||
import nilmdb.client
|
||||
import argparse
|
||||
import fnmatch
|
||||
|
||||
def main(argv = None):
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter,
|
||||
version = "1.0",
|
||||
version = nilmtools.__version__,
|
||||
description = """\
|
||||
Automatically create multiple decimations from a single source
|
||||
stream, continuing until the last decimated level contains fewer
|
||||
than 500 points total.
|
||||
|
||||
Wildcards and multiple paths are accepted. Decimated paths are
|
||||
ignored when matching wildcards.
|
||||
""")
|
||||
parser.add_argument("-u", "--url", action="store",
|
||||
default="http://localhost/nilmdb/",
|
||||
@@ -23,20 +27,36 @@ def main(argv = None):
|
||||
default = False,
|
||||
help="Force metadata changes if the dest "
|
||||
"doesn't match")
|
||||
parser.add_argument("path", action="store",
|
||||
parser.add_argument("path", action="store", nargs='+',
|
||||
help='Path of base stream')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
# Pull out info about the base stream
|
||||
client = nilmdb.client.Client(args.url)
|
||||
|
||||
info = nilmtools.filter.get_stream_info(client, args.path)
|
||||
if not info:
|
||||
raise Exception("path " + args.path + " not found")
|
||||
# Find list of paths to process
|
||||
streams = [ unicode(s[0]) for s in client.stream_list() ]
|
||||
streams = [ s for s in streams if "~decim-" not in s ]
|
||||
paths = []
|
||||
for path in args.path:
|
||||
new = fnmatch.filter(streams, unicode(path))
|
||||
if not new:
|
||||
print "error: no stream matched path:", path
|
||||
raise SystemExit(1)
|
||||
paths.extend(new)
|
||||
|
||||
meta = client.stream_get_metadata(args.path)
|
||||
for path in paths:
|
||||
do_decimation(client, args, path)
|
||||
|
||||
def do_decimation(client, args, path):
|
||||
print "Decimating", path
|
||||
info = nilmtools.filter.get_stream_info(client, path)
|
||||
if not info:
|
||||
raise Exception("path " + path + " not found")
|
||||
|
||||
meta = client.stream_get_metadata(path)
|
||||
if "decimate_source" in meta:
|
||||
print "Stream", args.path, "was decimated from", meta["decimate_source"]
|
||||
print "Stream", path, "was decimated from", meta["decimate_source"]
|
||||
print "You need to pass the base stream instead"
|
||||
raise SystemExit(1)
|
||||
|
||||
@@ -53,7 +73,7 @@ def main(argv = None):
|
||||
if info.rows <= 500:
|
||||
break
|
||||
factor *= args.factor
|
||||
new_path = "%s~decim-%d" % (args.path, factor)
|
||||
new_path = "%s~decim-%d" % (path, factor)
|
||||
|
||||
# Create the stream if needed
|
||||
new_info = nilmtools.filter.get_stream_info(client, new_path)
|
||||
@@ -72,5 +92,7 @@ def main(argv = None):
|
||||
# Update info using the newly decimated stream
|
||||
info = nilmtools.filter.get_stream_info(client, new_path)
|
||||
|
||||
return
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -19,6 +19,10 @@ import re
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cStringIO
|
||||
import functools
|
||||
|
||||
class ArgumentError(Exception):
|
||||
pass
|
||||
|
||||
class MissingDestination(Exception):
|
||||
def __init__(self, args, src, dest):
|
||||
@@ -65,9 +69,73 @@ def get_stream_info(client, path):
|
||||
return None
|
||||
return StreamInfo(client.geturl(), streams[0])
|
||||
|
||||
# Filter processing for a single interval of data.
|
||||
def process_numpy_interval(interval, extractor, inserter, warn_rows,
|
||||
function, args = None):
|
||||
"""For the given 'interval' of data, extract data, process it
|
||||
through 'function', and insert the result.
|
||||
|
||||
'extractor' should be a function like NumpyClient.stream_extract_numpy
|
||||
but with the the interval 'start' and 'end' as the only parameters,
|
||||
e.g.:
|
||||
extractor = functools.partial(NumpyClient.stream_extract_numpy,
|
||||
src_path, layout = l, maxrows = m)
|
||||
|
||||
'inserter' should be a function like NumpyClient.stream_insert_context
|
||||
but with the interval 'start' and 'end' as the only parameters, e.g.:
|
||||
inserter = functools.partial(NumpyClient.stream_insert_context,
|
||||
dest_path)
|
||||
|
||||
If 'warn_rows' is not None, print a warning to stdout when the
|
||||
number of unprocessed rows exceeds this amount.
|
||||
|
||||
See process_numpy for details on 'function' and 'args'.
|
||||
"""
|
||||
if args is None:
|
||||
args = []
|
||||
|
||||
with inserter(interval.start, interval.end) as insert_ctx:
|
||||
insert_func = insert_ctx.insert
|
||||
old_array = np.array([])
|
||||
for new_array in extractor(interval.start, interval.end):
|
||||
# If we still had old data left, combine it
|
||||
if old_array.shape[0] != 0:
|
||||
array = np.vstack((old_array, new_array))
|
||||
else:
|
||||
array = new_array
|
||||
|
||||
# Pass the data to the user provided function
|
||||
processed = function(array, interval, args, insert_func, False)
|
||||
|
||||
# Send any pending data that the user function inserted
|
||||
insert_ctx.send()
|
||||
|
||||
# Save the unprocessed parts
|
||||
if processed >= 0:
|
||||
old_array = array[processed:]
|
||||
else:
|
||||
raise Exception(
|
||||
sprintf("%s return value %s must be >= 0",
|
||||
str(function), str(processed)))
|
||||
|
||||
# Warn if there's too much data remaining
|
||||
if warn_rows is not None and old_array.shape[0] > warn_rows:
|
||||
printf("warning: %d unprocessed rows in buffer\n",
|
||||
old_array.shape[0])
|
||||
|
||||
# Last call for this contiguous interval
|
||||
if old_array.shape[0] != 0:
|
||||
processed = function(old_array, interval, args,
|
||||
insert_func, True)
|
||||
if processed != old_array.shape[0]:
|
||||
# Truncate the interval we're inserting at the first
|
||||
# unprocessed data point. This ensures that
|
||||
# we'll not miss any data when we run again later.
|
||||
insert_ctx.update_end(old_array[processed][0])
|
||||
|
||||
class Filter(object):
|
||||
|
||||
def __init__(self):
|
||||
def __init__(self, parser_description = None):
|
||||
self._parser = None
|
||||
self._client_src = None
|
||||
self._client_dest = None
|
||||
@@ -78,6 +146,9 @@ class Filter(object):
|
||||
self.end = None
|
||||
self.interhost = False
|
||||
self.force_metadata = False
|
||||
if parser_description is not None:
|
||||
self.setup_parser(parser_description)
|
||||
self.parse_args()
|
||||
|
||||
@property
|
||||
def client_src(self):
|
||||
@@ -131,63 +202,52 @@ class Filter(object):
|
||||
self._parser = parser
|
||||
return parser
|
||||
|
||||
def interval_string(self, interval):
|
||||
return sprintf("[ %s -> %s ]",
|
||||
timestamp_to_human(interval.start),
|
||||
timestamp_to_human(interval.end))
|
||||
|
||||
def parse_args(self, argv = None):
|
||||
args = self._parser.parse_args(argv)
|
||||
|
||||
if args.dest_url is None:
|
||||
args.dest_url = args.url
|
||||
if args.url != args.dest_url:
|
||||
def set_args(self, url, dest_url, srcpath, destpath, start, end,
|
||||
parsed_args = None, quiet = True):
|
||||
"""Set arguments directly from parameters"""
|
||||
if dest_url is None:
|
||||
dest_url = url
|
||||
if url != dest_url:
|
||||
self.interhost = True
|
||||
|
||||
self._client_src = Client(args.url)
|
||||
self._client_dest = Client(args.dest_url)
|
||||
self._client_src = Client(url)
|
||||
self._client_dest = Client(dest_url)
|
||||
|
||||
if (not self.interhost) and (args.srcpath == args.destpath):
|
||||
self._parser.error("source and destination path must be different")
|
||||
if (not self.interhost) and (srcpath == destpath):
|
||||
raise ArgumentError("source and destination path must be different")
|
||||
|
||||
# Open and print info about the streams
|
||||
self.src = get_stream_info(self._client_src, args.srcpath)
|
||||
# Open the streams
|
||||
self.src = get_stream_info(self._client_src, srcpath)
|
||||
if not self.src:
|
||||
self._parser.error("source path " + args.srcpath + " not found")
|
||||
raise ArgumentError("source path " + srcpath + " not found")
|
||||
|
||||
self.dest = get_stream_info(self._client_dest, args.destpath)
|
||||
self.dest = get_stream_info(self._client_dest, destpath)
|
||||
if not self.dest:
|
||||
raise MissingDestination(args, self.src,
|
||||
StreamInfo(args.dest_url, [args.destpath]))
|
||||
raise MissingDestination(parsed_args, self.src,
|
||||
StreamInfo(dest_url, [destpath]))
|
||||
|
||||
print "Source:", self.src.string(self.interhost)
|
||||
print " Dest:", self.dest.string(self.interhost)
|
||||
self.start = start
|
||||
self.end = end
|
||||
|
||||
if args.dry_run:
|
||||
for interval in self.intervals():
|
||||
print self.interval_string(interval)
|
||||
raise SystemExit(0)
|
||||
# Print info
|
||||
if not quiet:
|
||||
print "Source:", self.src.string(self.interhost)
|
||||
print " Dest:", self.dest.string(self.interhost)
|
||||
|
||||
def parse_args(self, argv = None):
|
||||
"""Parse arguments from a command line"""
|
||||
args = self._parser.parse_args(argv)
|
||||
|
||||
self.set_args(args.url, args.dest_url, args.srcpath, args.destpath,
|
||||
args.start, args.end, quiet = False, parsed_args = args)
|
||||
|
||||
self.force_metadata = args.force_metadata
|
||||
|
||||
self.start = args.start
|
||||
self.end = args.end
|
||||
|
||||
if args.dry_run:
|
||||
for interval in self.intervals():
|
||||
print interval.human_string()
|
||||
raise SystemExit(0)
|
||||
return args
|
||||
|
||||
def _optimize_int(self, it):
|
||||
"""Join and yield adjacent intervals from the iterator 'it'"""
|
||||
saved_int = None
|
||||
for interval in it:
|
||||
if saved_int is not None:
|
||||
if saved_int.end == interval.start:
|
||||
interval.start = saved_int.start
|
||||
else:
|
||||
yield saved_int
|
||||
saved_int = interval
|
||||
if saved_int is not None:
|
||||
yield saved_int
|
||||
|
||||
def intervals(self):
|
||||
"""Generate all the intervals that this filter should process"""
|
||||
self._using_client = True
|
||||
@@ -214,12 +274,13 @@ class Filter(object):
|
||||
self.src.path, diffpath = self.dest.path,
|
||||
start = self.start, end = self.end) )
|
||||
# Optimize intervals: join intervals that are adjacent
|
||||
for interval in self._optimize_int(intervals):
|
||||
for interval in nilmdb.utils.interval.optimize(intervals):
|
||||
yield interval
|
||||
self._using_client = False
|
||||
|
||||
# Misc helpers
|
||||
def arg_time(self, toparse):
|
||||
@staticmethod
|
||||
def arg_time(toparse):
|
||||
"""Parse a time string argument"""
|
||||
try:
|
||||
return nilmdb.utils.time.parse_time(toparse)
|
||||
@@ -233,8 +294,14 @@ class Filter(object):
|
||||
metadata = self._client_dest.stream_get_metadata(self.dest.path)
|
||||
if not self.force_metadata:
|
||||
for key in data:
|
||||
wanted = str(data[key])
|
||||
wanted = data[key]
|
||||
if not isinstance(wanted, basestring):
|
||||
wanted = str(wanted)
|
||||
val = metadata.get(key, wanted)
|
||||
# Force UTF-8 encoding for comparison and display
|
||||
wanted = wanted.encode('utf-8')
|
||||
val = val.encode('utf-8')
|
||||
key = key.encode('utf-8')
|
||||
if val != wanted and self.dest.rows > 0:
|
||||
m = "Metadata in destination stream:\n"
|
||||
m += " %s = %s\n" % (key, val)
|
||||
@@ -250,13 +317,16 @@ class Filter(object):
|
||||
|
||||
# The main filter processing method.
|
||||
def process_numpy(self, function, args = None, rows = 100000):
|
||||
"""For all intervals that exist in self.src but don't exist in
|
||||
self.dest, call 'function' with a Numpy array corresponding to
|
||||
the data. The data is converted to a Numpy array in chunks of
|
||||
'rows' rows at a time.
|
||||
"""Calls process_numpy_interval for each interval that currently
|
||||
exists in self.src, but doesn't exist in self.dest. It will
|
||||
process the data in chunks as follows:
|
||||
|
||||
For each chunk of data, call 'function' with a Numpy array
|
||||
corresponding to the data. The data is converted to a Numpy
|
||||
array in chunks of 'rows' rows at a time.
|
||||
|
||||
'function' should be defined as:
|
||||
def function(data, interval, args, insert_func, final)
|
||||
# def function(data, interval, args, insert_func, final)
|
||||
|
||||
'data': array of data to process -- may be empty
|
||||
|
||||
@@ -275,51 +345,23 @@ class Filter(object):
|
||||
Return value of 'function' is the number of data rows processed.
|
||||
Unprocessed data will be provided again in a subsequent call
|
||||
(unless 'final' is True).
|
||||
|
||||
If unprocessed data remains after 'final' is True, the interval
|
||||
being inserted will be ended at the timestamp of the first
|
||||
unprocessed data point.
|
||||
"""
|
||||
if args is None:
|
||||
args = []
|
||||
extractor = NumpyClient(self.src.url).stream_extract_numpy
|
||||
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
|
||||
|
||||
for interval in self.intervals():
|
||||
print "Processing", self.interval_string(interval)
|
||||
with inserter(self.dest.path,
|
||||
interval.start, interval.end) as insert_ctx:
|
||||
insert_function = insert_ctx.insert
|
||||
old_array = np.array([])
|
||||
for new_array in extractor(self.src.path,
|
||||
interval.start, interval.end,
|
||||
extractor_func = functools.partial(extractor, self.src.path,
|
||||
layout = self.src.layout,
|
||||
maxrows = rows):
|
||||
# If we still had old data left, combine it
|
||||
if old_array.shape[0] != 0:
|
||||
array = np.vstack((old_array, new_array))
|
||||
else:
|
||||
array = new_array
|
||||
maxrows = rows)
|
||||
inserter_func = functools.partial(inserter, self.dest.path)
|
||||
|
||||
# Pass it to the process function
|
||||
processed = function(array, interval, args,
|
||||
insert_function, False)
|
||||
|
||||
# Send any pending data
|
||||
insert_ctx.send()
|
||||
|
||||
# Save the unprocessed parts
|
||||
if processed >= 0:
|
||||
old_array = array[processed:]
|
||||
else:
|
||||
raise Exception(
|
||||
sprintf("%s return value %s must be >= 0",
|
||||
str(function), str(processed)))
|
||||
|
||||
# Warn if there's too much data remaining
|
||||
if old_array.shape[0] > 3 * rows:
|
||||
printf("warning: %d unprocessed rows in buffer\n",
|
||||
old_array.shape[0])
|
||||
|
||||
# Last call for this contiguous interval
|
||||
if old_array.shape[0] != 0:
|
||||
function(old_array, interval, args, insert_function, True)
|
||||
for interval in self.intervals():
|
||||
print "Processing", interval.human_string()
|
||||
process_numpy_interval(interval, extractor_func, inserter_func,
|
||||
rows * 3, function, args)
|
||||
|
||||
def main(argv = None):
|
||||
# This is just a dummy function; actual filters can use the other
|
||||
@@ -328,7 +370,7 @@ def main(argv = None):
|
||||
parser = f.setup_parser()
|
||||
args = f.parse_args(argv)
|
||||
for i in f.intervals():
|
||||
print "Generic filter: need to handle", f.interval_string(i)
|
||||
print "Generic filter: need to handle", i.human_string()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
43
nilmtools/median.py
Executable file
43
nilmtools/median.py
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/usr/bin/python
|
||||
import nilmtools.filter, scipy.signal
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Median Filter")
|
||||
group = parser.add_argument_group("Median filter options")
|
||||
group.add_argument("-z", "--size", action="store", type=int, default=25,
|
||||
help = "median filter size (default %(default)s)")
|
||||
group.add_argument("-d", "--difference", action="store_true",
|
||||
help = "store difference rather than filtered values")
|
||||
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url,
|
||||
e.dest.path, e.src.layout)
|
||||
raise SystemExit(1)
|
||||
|
||||
meta = f.client_src.stream_get_metadata(f.src.path)
|
||||
f.check_dest_metadata({ "median_filter_source": f.src.path,
|
||||
"median_filter_size": args.size,
|
||||
"median_filter_difference": repr(args.difference) })
|
||||
|
||||
f.process_numpy(median_filter, args = (args.size, args.difference))
|
||||
|
||||
def median_filter(data, interval, args, insert, final):
|
||||
(size, diff) = args
|
||||
(rows, cols) = data.shape
|
||||
for i in range(cols - 1):
|
||||
filtered = scipy.signal.medfilt(data[:, i+1], size)
|
||||
if diff:
|
||||
data[:, i+1] -= filtered
|
||||
else:
|
||||
data[:, i+1] = filtered
|
||||
insert(data)
|
||||
return rows
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -3,6 +3,8 @@
|
||||
# Spectral envelope preprocessor.
|
||||
# Requires two streams as input: the original raw data, and sinefit data.
|
||||
|
||||
from nilmdb.utils.printf import *
|
||||
from nilmdb.utils.time import timestamp_to_human
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
from numpy import *
|
||||
@@ -77,7 +79,8 @@ def main(argv = None):
|
||||
# Check and set metadata in prep stream
|
||||
f.check_dest_metadata({ "prep_raw_source": f.src.path,
|
||||
"prep_sinefit_source": sinefit.path,
|
||||
"prep_column": args.column })
|
||||
"prep_column": args.column,
|
||||
"prep_rotation": repr(rotation) })
|
||||
|
||||
# Run the processing function on all data
|
||||
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
|
||||
@@ -105,7 +108,6 @@ def process(data, interval, args, insert_function, final):
|
||||
# Pull out sinefit data for the entire time range of this block
|
||||
for sinefit_line in client.stream_extract(sinefit_path,
|
||||
data[0, 0], data[rows-1, 0]):
|
||||
|
||||
def prep_period(t_min, t_max, rot):
|
||||
"""
|
||||
Compute prep coefficients from time t_min to t_max, which
|
||||
@@ -162,7 +164,15 @@ def process(data, interval, args, insert_function, final):
|
||||
break
|
||||
processed = idx_max
|
||||
|
||||
print "Processed", processed, "of", rows, "rows"
|
||||
# If we processed no data but there's lots in here, pretend we
|
||||
# processed half of it.
|
||||
if processed == 0 and rows > 10000:
|
||||
processed = rows / 2
|
||||
printf("%s: warning: no periods found; skipping %d rows\n",
|
||||
timestamp_to_human(data[0][0]), processed)
|
||||
else:
|
||||
printf("%s: processed %d of %d rows\n",
|
||||
timestamp_to_human(data[0][0]), processed, rows)
|
||||
return processed
|
||||
|
||||
if __name__ == "__main__":
|
262
nilmtools/sinefit.py
Executable file
262
nilmtools/sinefit.py
Executable file
@@ -0,0 +1,262 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Sine wave fitting.
|
||||
from nilmdb.utils.printf import *
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
from nilmdb.utils.time import (timestamp_to_human,
|
||||
timestamp_to_seconds,
|
||||
seconds_to_timestamp)
|
||||
|
||||
from numpy import *
|
||||
from scipy import *
|
||||
#import pylab as p
|
||||
import operator
|
||||
import sys
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Sine wave fitting")
|
||||
group = parser.add_argument_group("Sine fit options")
|
||||
group.add_argument('-c', '--column', action='store', type=int,
|
||||
help='Column number (first data column is 1)')
|
||||
group.add_argument('-f', '--frequency', action='store', type=float,
|
||||
default=60.0,
|
||||
help='Approximate frequency (default: %(default)s)')
|
||||
group.add_argument('-m', '--min-freq', action='store', type=float,
|
||||
help='Minimum valid frequency '
|
||||
'(default: approximate frequency / 2))')
|
||||
group.add_argument('-M', '--max-freq', action='store', type=float,
|
||||
help='Maximum valid frequency '
|
||||
'(default: approximate frequency * 2))')
|
||||
group.add_argument('-a', '--min-amp', action='store', type=float,
|
||||
default=20.0,
|
||||
help='Minimum signal amplitude (default: %(default)s)')
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
rec = "float32_3"
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
|
||||
raise SystemExit(1)
|
||||
|
||||
if args.column is None or args.column < 1:
|
||||
parser.error("need a column number >= 1")
|
||||
if args.frequency < 0.1:
|
||||
parser.error("frequency must be >= 0.1")
|
||||
if args.min_freq is None:
|
||||
args.min_freq = args.frequency / 2
|
||||
if args.max_freq is None:
|
||||
args.max_freq = args.frequency * 2
|
||||
if (args.min_freq > args.max_freq or
|
||||
args.min_freq > args.frequency or
|
||||
args.max_freq < args.frequency):
|
||||
parser.error("invalid min or max frequency")
|
||||
if args.min_amp < 0:
|
||||
parser.error("min amplitude must be >= 0")
|
||||
|
||||
f.check_dest_metadata({ "sinefit_source": f.src.path,
|
||||
"sinefit_column": args.column })
|
||||
f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
|
||||
args.min_freq, args.max_freq))
|
||||
|
||||
class SuppressibleWarning(object):
|
||||
def __init__(self, maxcount = 10, maxsuppress = 100):
|
||||
self.maxcount = maxcount
|
||||
self.maxsuppress = maxsuppress
|
||||
self.count = 0
|
||||
self.last_msg = ""
|
||||
|
||||
def _write(self, sec, msg):
|
||||
if sec:
|
||||
now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
|
||||
else:
|
||||
now = ""
|
||||
sys.stderr.write(now + msg)
|
||||
|
||||
def warn(self, msg, seconds = None):
|
||||
self.count += 1
|
||||
if self.count <= self.maxcount:
|
||||
self._write(seconds, msg)
|
||||
if (self.count - self.maxcount) >= self.maxsuppress:
|
||||
self.reset(seconds)
|
||||
|
||||
def reset(self, seconds = None):
|
||||
if self.count > self.maxcount:
|
||||
self._write(seconds, sprintf("(%d warnings suppressed)\n",
|
||||
self.count - self.maxcount))
|
||||
self.count = 0
|
||||
|
||||
def process(data, interval, args, insert_function, final):
|
||||
(column, f_expected, a_min, f_min, f_max) = args
|
||||
rows = data.shape[0]
|
||||
|
||||
# Estimate sampling frequency from timestamps
|
||||
fs = (rows-1) / (timestamp_to_seconds(data[-1][0]) -
|
||||
timestamp_to_seconds(data[0][0]))
|
||||
|
||||
# Pull out about 3.5 periods of data at once;
|
||||
# we'll expect to match 3 zero crossings in each window
|
||||
N = max(int(3.5 * fs / f_expected), 10)
|
||||
|
||||
# If we don't have enough data, don't bother processing it
|
||||
if rows < N:
|
||||
return 0
|
||||
|
||||
warn = SuppressibleWarning(3, 1000)
|
||||
|
||||
# Process overlapping windows
|
||||
start = 0
|
||||
num_zc = 0
|
||||
last_inserted_timestamp = None
|
||||
while start < (rows - N):
|
||||
this = data[start:start+N, column]
|
||||
t_min = timestamp_to_seconds(data[start, 0])
|
||||
t_max = timestamp_to_seconds(data[start+N-1, 0])
|
||||
|
||||
# Do 4-parameter sine wave fit
|
||||
(A, f0, phi, C) = sfit4(this, fs)
|
||||
|
||||
# Check bounds. If frequency is too crazy, ignore this window
|
||||
if f0 < f_min or f0 > f_max:
|
||||
warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
|
||||
str(f0), str(f_min), str(f_max)), t_min)
|
||||
start += N
|
||||
continue
|
||||
|
||||
# If amplitude is too low, results are probably just noise
|
||||
if A < a_min:
|
||||
warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
|
||||
str(A), str(a_min)), t_min)
|
||||
start += N
|
||||
continue
|
||||
|
||||
#p.plot(arange(N), this)
|
||||
#p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
|
||||
|
||||
# Period starts when the argument of sine is 0 degrees,
|
||||
# so we're looking for sample number:
|
||||
# n = (0 - phi) / (f0/fs * 2 * pi)
|
||||
zc_n = (0 - phi) / (f0 / fs * 2 * pi)
|
||||
period_n = fs/f0
|
||||
|
||||
# Add periods to make N positive
|
||||
while zc_n < 0:
|
||||
zc_n += period_n
|
||||
|
||||
last_zc = None
|
||||
# Mark the zero crossings until we're a half period away
|
||||
# from the end of the window
|
||||
while zc_n < (N - period_n/2):
|
||||
#p.plot(zc_n, C, 'ro')
|
||||
t = t_min + zc_n / fs
|
||||
if (last_inserted_timestamp is None or
|
||||
t > last_inserted_timestamp):
|
||||
insert_function([[seconds_to_timestamp(t), f0, A, C]])
|
||||
last_inserted_timestamp = t
|
||||
warn.reset(t)
|
||||
else:
|
||||
warn.warn("timestamp overlap\n", t)
|
||||
num_zc += 1
|
||||
last_zc = zc_n
|
||||
zc_n += period_n
|
||||
|
||||
# Advance the window one quarter period past the last marked
|
||||
# zero crossing, or advance the window by half its size if we
|
||||
# didn't mark any.
|
||||
if last_zc is not None:
|
||||
advance = min(last_zc + period_n/4, N)
|
||||
else:
|
||||
advance = N/2
|
||||
#p.plot(advance, C, 'go')
|
||||
#p.show()
|
||||
|
||||
start = int(round(start + advance))
|
||||
|
||||
# Return the number of rows we've processed
|
||||
warn.reset(last_inserted_timestamp)
|
||||
if last_inserted_timestamp:
|
||||
now = timestamp_to_human(seconds_to_timestamp(
|
||||
last_inserted_timestamp)) + ": "
|
||||
else:
|
||||
now = ""
|
||||
printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
|
||||
return start
|
||||
|
||||
def sfit4(data, fs):
|
||||
"""(A, f0, phi, C) = sfit4(data, fs)
|
||||
|
||||
Compute 4-parameter (unknown-frequency) least-squares fit to
|
||||
sine-wave data, according to IEEE Std 1241-2010 Annex B
|
||||
|
||||
Input:
|
||||
data vector of input samples
|
||||
fs sampling rate (Hz)
|
||||
|
||||
Output:
|
||||
Parameters [A, f0, phi, C] to fit the equation
|
||||
x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
|
||||
where n is sample number. Or, as a function of time:
|
||||
x(t) = A * sin(f0 * 2 * pi * t + phi) + C
|
||||
|
||||
by Jim Paris
|
||||
(Verified to match sfit4.m)
|
||||
"""
|
||||
N = len(data)
|
||||
t = linspace(0, (N-1) / float(fs), N)
|
||||
|
||||
## Estimate frequency using FFT (step b)
|
||||
Fc = fft(data)
|
||||
F = abs(Fc)
|
||||
F[0] = 0 # eliminate DC
|
||||
|
||||
# Find pair of spectral lines with largest amplitude:
|
||||
# resulting values are in F(i) and F(i+1)
|
||||
i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
|
||||
|
||||
# Interpolate FFT to get a better result (from Markus [B37])
|
||||
U1 = real(Fc[i])
|
||||
U2 = real(Fc[i+1])
|
||||
V1 = imag(Fc[i])
|
||||
V2 = imag(Fc[i+1])
|
||||
n = 2 * pi / N
|
||||
ni1 = n * i
|
||||
ni2 = n * (i+1)
|
||||
K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
|
||||
Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
|
||||
Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
|
||||
i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
|
||||
|
||||
# Convert to Hz
|
||||
f0 = i * float(fs) / N
|
||||
|
||||
# Fit it. We'll catch exceptions here and just returns zeros
|
||||
# if something fails with the least squares fit, etc.
|
||||
try:
|
||||
# first guess for A0, B0 using 3-parameter fit (step c)
|
||||
s = zeros(3)
|
||||
w = 2*pi*f0
|
||||
|
||||
# Now iterate 7 times (step b, plus 6 iterations of step i)
|
||||
for idx in range(7):
|
||||
D = c_[cos(w*t), sin(w*t), ones(N),
|
||||
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
|
||||
s = linalg.lstsq(D, data)[0] # eqn B.18
|
||||
w = w + s[3] # update frequency estimate
|
||||
|
||||
## Extract results
|
||||
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
|
||||
f0 = w / (2*pi)
|
||||
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
|
||||
C = s[2]
|
||||
return (A, f0, phi, C)
|
||||
except Exception as e:
|
||||
# something broke down, just return zeros
|
||||
return (0, 0, 0, 0)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
279
nilmtools/trainola.py
Executable file
279
nilmtools/trainola.py
Executable file
@@ -0,0 +1,279 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from nilmdb.utils.printf import *
|
||||
import nilmdb.client
|
||||
import nilmtools.filter
|
||||
from nilmdb.utils.time import (timestamp_to_human,
|
||||
timestamp_to_seconds,
|
||||
seconds_to_timestamp)
|
||||
from nilmdb.utils.interval import Interval
|
||||
|
||||
import numpy as np
|
||||
import scipy
|
||||
import scipy.signal
|
||||
from numpy.core.umath_tests import inner1d
|
||||
import nilmrun
|
||||
from collections import OrderedDict
|
||||
import sys
|
||||
import functools
|
||||
import collections
|
||||
|
||||
class DataError(ValueError):
|
||||
pass
|
||||
|
||||
def build_column_mapping(colinfo, streaminfo):
|
||||
"""Given the 'columns' list from the JSON data, verify and
|
||||
pull out a dictionary mapping for the column names/numbers."""
|
||||
columns = OrderedDict()
|
||||
for c in colinfo:
|
||||
if (c['name'] in columns.keys() or
|
||||
c['index'] in columns.values()):
|
||||
raise DataError("duplicated columns")
|
||||
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
|
||||
raise DataError("bad column number")
|
||||
columns[c['name']] = c['index']
|
||||
if not len(columns):
|
||||
raise DataError("no columns")
|
||||
return columns
|
||||
|
||||
class Exemplar(object):
|
||||
def __init__(self, exinfo, min_rows = 10, max_rows = 100000):
|
||||
"""Given a dictionary entry from the 'exemplars' input JSON,
|
||||
verify the stream, columns, etc. Then, fetch all the data
|
||||
into self.data."""
|
||||
|
||||
self.name = exinfo['name']
|
||||
self.url = exinfo['url']
|
||||
self.stream = exinfo['stream']
|
||||
self.start = exinfo['start']
|
||||
self.end = exinfo['end']
|
||||
self.dest_column = exinfo['dest_column']
|
||||
|
||||
# Get stream info
|
||||
self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
|
||||
self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
|
||||
|
||||
# Build up name => index mapping for the columns
|
||||
self.columns = build_column_mapping(exinfo['columns'], self.info)
|
||||
|
||||
# Count points
|
||||
self.count = self.client.stream_count(self.stream, self.start, self.end)
|
||||
|
||||
# Verify count
|
||||
if self.count == 0:
|
||||
raise DataError("No data in this exemplar!")
|
||||
if self.count < min_rows:
|
||||
raise DataError("Too few data points: " + str(self.count))
|
||||
if self.count > max_rows:
|
||||
raise DataError("Too many data points: " + str(self.count))
|
||||
|
||||
# Extract the data
|
||||
datagen = self.client.stream_extract_numpy(self.stream,
|
||||
self.start, self.end,
|
||||
self.info.layout,
|
||||
maxrows = self.count)
|
||||
self.data = list(datagen)[0]
|
||||
|
||||
# Discard timestamp
|
||||
self.data = self.data[:,1:]
|
||||
|
||||
# Subtract the mean from each column
|
||||
self.data = self.data - self.data.mean(axis=0)
|
||||
|
||||
# Get scale factors for each column by computing dot product
|
||||
# of each column with itself.
|
||||
self.scale = inner1d(self.data.T, self.data.T)
|
||||
|
||||
# Ensure a minimum (nonzero) scale and convert to list
|
||||
self.scale = np.maximum(self.scale, [1e-9]).tolist()
|
||||
|
||||
def __str__(self):
|
||||
return sprintf("\"%s\" %s [%s] %s rows",
|
||||
self.name, self.stream, ",".join(self.columns.keys()),
|
||||
self.count)
|
||||
|
||||
def peak_detect(data, delta):
|
||||
"""Simple min/max peak detection algorithm, taken from my code
|
||||
in the disagg.m from the 10-8-5 paper"""
|
||||
mins = [];
|
||||
maxs = [];
|
||||
cur_min = (None, np.inf)
|
||||
cur_max = (None, -np.inf)
|
||||
lookformax = False
|
||||
for (n, p) in enumerate(data):
|
||||
if p > cur_max[1]:
|
||||
cur_max = (n, p)
|
||||
if p < cur_min[1]:
|
||||
cur_min = (n, p)
|
||||
if lookformax:
|
||||
if p < (cur_max[1] - delta):
|
||||
maxs.append(cur_max)
|
||||
cur_min = (n, p)
|
||||
lookformax = False
|
||||
else:
|
||||
if p > (cur_min[1] + delta):
|
||||
mins.append(cur_min)
|
||||
cur_max = (n, p)
|
||||
lookformax = True
|
||||
return (mins, maxs)
|
||||
|
||||
def trainola_matcher(data, interval, args, insert_func, final_chunk):
|
||||
"""Perform cross-correlation match"""
|
||||
( src_columns, dest_count, exemplars ) = args
|
||||
nrows = data.shape[0]
|
||||
|
||||
# We want at least 10% more points than the widest exemplar.
|
||||
widest = max([ x.count for x in exemplars ])
|
||||
if (widest * 1.1) > nrows:
|
||||
return 0
|
||||
|
||||
# This is how many points we'll consider valid in the
|
||||
# cross-correlation.
|
||||
valid = nrows + 1 - widest
|
||||
matches = collections.defaultdict(list)
|
||||
|
||||
# Try matching against each of the exemplars
|
||||
for e in exemplars:
|
||||
corrs = []
|
||||
|
||||
# Compute cross-correlation for each column
|
||||
for col_name in e.columns:
|
||||
a = data[:, src_columns[col_name] + 1]
|
||||
b = e.data[:, e.columns[col_name]]
|
||||
corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]
|
||||
|
||||
# Scale by the norm of the exemplar
|
||||
corr = corr / e.scale[e.columns[col_name]]
|
||||
corrs.append(corr)
|
||||
|
||||
# Find the peaks using the column with the largest amplitude
|
||||
biggest = e.scale.index(max(e.scale))
|
||||
peaks_minmax = peak_detect(corrs[biggest], 0.1)
|
||||
peaks = [ p[0] for p in peaks_minmax[1] ]
|
||||
|
||||
# Now look at every peak
|
||||
for row in peaks:
|
||||
# Correlation for each column must be close enough to 1.
|
||||
for (corr, scale) in zip(corrs, e.scale):
|
||||
# The accepted distance from 1 is based on the relative
|
||||
# amplitude of the column. Use a linear mapping:
|
||||
# scale 1.0 -> distance 0.1
|
||||
# scale 0.0 -> distance 1.0
|
||||
distance = 1 - 0.9 * (scale / e.scale[biggest])
|
||||
if abs(corr[row] - 1) > distance:
|
||||
# No match
|
||||
break
|
||||
else:
|
||||
# Successful match
|
||||
matches[row].append(e)
|
||||
|
||||
# Insert matches into destination stream.
|
||||
matched_rows = sorted(matches.keys())
|
||||
out = np.zeros((len(matched_rows), dest_count + 1))
|
||||
|
||||
for n, row in enumerate(matched_rows):
|
||||
# Fill timestamp
|
||||
out[n][0] = data[row, 0]
|
||||
|
||||
# Mark matched exemplars
|
||||
for exemplar in matches[row]:
|
||||
out[n, exemplar.dest_column + 1] = 1.0
|
||||
|
||||
# Insert it
|
||||
insert_func(out)
|
||||
|
||||
# Return how many rows we processed
|
||||
return max(valid, 0)
|
||||
|
||||
def trainola(conf):
|
||||
print "Trainola", nilmtools.__version__
|
||||
|
||||
# Load main stream data
|
||||
url = conf['url']
|
||||
src_path = conf['stream']
|
||||
dest_path = conf['dest_stream']
|
||||
start = conf['start']
|
||||
end = conf['end']
|
||||
|
||||
# Get info for the src and dest streams
|
||||
src_client = nilmdb.client.numpyclient.NumpyClient(url)
|
||||
src = nilmtools.filter.get_stream_info(src_client, src_path)
|
||||
if not src:
|
||||
raise DataError("source path '" + src_path + "' does not exist")
|
||||
src_columns = build_column_mapping(conf['columns'], src)
|
||||
|
||||
dest_client = nilmdb.client.numpyclient.NumpyClient(url)
|
||||
dest = nilmtools.filter.get_stream_info(dest_client, dest_path)
|
||||
if not dest:
|
||||
raise DataError("destination path '" + dest_path + "' does not exist")
|
||||
|
||||
printf("Source:\n")
|
||||
printf(" %s [%s]\n", src.path, ",".join(src_columns.keys()))
|
||||
printf("Destination:\n")
|
||||
printf(" %s (%s columns)\n", dest.path, dest.layout_count)
|
||||
|
||||
# Pull in the exemplar data
|
||||
exemplars = []
|
||||
for n, exinfo in enumerate(conf['exemplars']):
|
||||
printf("Loading exemplar %d:\n", n)
|
||||
e = Exemplar(exinfo)
|
||||
col = e.dest_column
|
||||
if col < 0 or col >= dest.layout_count:
|
||||
raise DataError(sprintf("bad destination column number %d\n" +
|
||||
"dest stream only has 0 through %d",
|
||||
col, dest.layout_count - 1))
|
||||
printf(" %s, output column %d\n", str(e), col)
|
||||
exemplars.append(e)
|
||||
if len(exemplars) == 0:
|
||||
raise DataError("missing exemplars")
|
||||
|
||||
# Verify that the exemplar columns are all represented in the main data
|
||||
for n, ex in enumerate(exemplars):
|
||||
for col in ex.columns:
|
||||
if col not in src_columns:
|
||||
raise DataError(sprintf("Exemplar %d column %s is not "
|
||||
"available in source data", n, col))
|
||||
|
||||
# Figure out which intervals we should process
|
||||
intervals = ( Interval(s, e) for (s, e) in
|
||||
src_client.stream_intervals(src_path,
|
||||
diffpath = dest_path,
|
||||
start = start, end = end) )
|
||||
intervals = nilmdb.utils.interval.optimize(intervals)
|
||||
|
||||
# Do the processing
|
||||
rows = 100000
|
||||
extractor = functools.partial(src_client.stream_extract_numpy,
|
||||
src.path, layout = src.layout, maxrows = rows)
|
||||
inserter = functools.partial(dest_client.stream_insert_numpy_context,
|
||||
dest.path)
|
||||
for interval in intervals:
|
||||
printf("Processing interval:\n")
|
||||
printf(" %s\n", interval.human_string())
|
||||
nilmtools.filter.process_numpy_interval(
|
||||
interval, extractor, inserter, rows * 3,
|
||||
trainola_matcher, (src_columns, dest.layout_count, exemplars))
|
||||
|
||||
return "done"
|
||||
|
||||
def main(argv = None):
|
||||
import simplejson as json
|
||||
import sys
|
||||
|
||||
if argv is None:
|
||||
argv = sys.argv[1:]
|
||||
if len(argv) != 1:
|
||||
raise DataError("need one argument, either a dictionary or JSON string")
|
||||
|
||||
try:
|
||||
# Passed in a JSON string (e.g. on the command line)
|
||||
conf = json.loads(argv[0])
|
||||
except TypeError as e:
|
||||
# Passed in the config dictionary (e.g. from NilmRun)
|
||||
conf = argv[0]
|
||||
|
||||
return trainola(conf)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
9
setup.py
9
setup.py
@@ -30,7 +30,7 @@ except ImportError:
|
||||
# Versioneer manages version numbers from git tags.
|
||||
# https://github.com/warner/python-versioneer
|
||||
import versioneer
|
||||
versioneer.versionfile_source = 'src/_version.py'
|
||||
versioneer.versionfile_source = 'nilmtools/_version.py'
|
||||
versioneer.versionfile_build = 'nilmtools/_version.py'
|
||||
versioneer.tag_prefix = 'nilmtools-'
|
||||
versioneer.parentdir_prefix = 'nilmtools-'
|
||||
@@ -61,14 +61,13 @@ setup(name='nilmtools',
|
||||
long_description = "NILM Database Tools",
|
||||
license = "Proprietary",
|
||||
author_email = 'jim@jtan.com',
|
||||
install_requires = [ 'nilmdb >= 1.5.0',
|
||||
install_requires = [ 'nilmdb >= 1.8.1',
|
||||
'numpy',
|
||||
'scipy',
|
||||
'matplotlib',
|
||||
#'matplotlib',
|
||||
],
|
||||
packages = [ 'nilmtools',
|
||||
],
|
||||
package_dir = { 'nilmtools': 'src' },
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'nilm-decimate = nilmtools.decimate:main',
|
||||
@@ -79,6 +78,8 @@ setup(name='nilmtools',
|
||||
'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
|
||||
'nilm-sinefit = nilmtools.sinefit:main',
|
||||
'nilm-cleanup = nilmtools.cleanup:main',
|
||||
'nilm-median = nilmtools.median:main',
|
||||
'nilm-trainola = nilmtools.trainola:main',
|
||||
],
|
||||
},
|
||||
zip_safe = False,
|
||||
|
187
src/sinefit.py
187
src/sinefit.py
@@ -1,187 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Sine wave fitting. This runs about 5x faster than realtime on raw data.
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
from numpy import *
|
||||
from scipy import *
|
||||
#import pylab as p
|
||||
import operator
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Sine wave fitting")
|
||||
group = parser.add_argument_group("Sine fit options")
|
||||
group.add_argument('-c', '--column', action='store', type=int,
|
||||
help='Column number (first data column is 1)')
|
||||
group.add_argument('-f', '--frequency', action='store', type=float,
|
||||
default=60.0,
|
||||
help='Approximate frequency (default: %(default)s)')
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
rec = "float32_3"
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
|
||||
raise SystemExit(1)
|
||||
|
||||
if args.column is None or args.column < 1:
|
||||
parser.error("need a column number >= 1")
|
||||
if args.frequency < 0.1:
|
||||
parser.error("frequency must be >= 0.1")
|
||||
|
||||
f.check_dest_metadata({ "sinefit_source": f.src.path,
|
||||
"sinefit_column": args.column })
|
||||
f.process_numpy(process, args = (args.column, args.frequency))
|
||||
|
||||
def process(data, interval, args, insert_function, final):
|
||||
(column, f_expected) = args
|
||||
rows = data.shape[0]
|
||||
|
||||
# Estimate sampling frequency from timestamps
|
||||
fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
|
||||
|
||||
# Pull out about 3.5 periods of data at once;
|
||||
# we'll expect to match 3 zero crossings in each window
|
||||
N = max(int(3.5 * fs / f_expected), 10)
|
||||
|
||||
# If we don't have enough data, don't bother processing it
|
||||
if rows < N:
|
||||
return 0
|
||||
|
||||
# Process overlapping windows
|
||||
start = 0
|
||||
num_zc = 0
|
||||
while start < (rows - N):
|
||||
this = data[start:start+N, column]
|
||||
t_min = data[start, 0]/1e6
|
||||
t_max = data[start+N-1, 0]/1e6
|
||||
|
||||
# Do 4-parameter sine wave fit
|
||||
(A, f0, phi, C) = sfit4(this, fs)
|
||||
|
||||
# Check bounds. If frequency is too crazy, ignore this window
|
||||
if f0 < (f_expected/2) or f0 > (f_expected*2):
|
||||
print "frequency", f0, "too far from expected value", f_expected
|
||||
start += N
|
||||
continue
|
||||
|
||||
#p.plot(arange(N), this)
|
||||
#p.plot(arange(N), A * cos(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
|
||||
|
||||
# Period starts when the argument of cosine is 3*pi/2 degrees,
|
||||
# so we're looking for sample number:
|
||||
# n = (3 * pi / 2 - phi) / (f0/fs * 2 * pi)
|
||||
zc_n = (3 * pi / 2 - phi) / (f0 / fs * 2 * pi)
|
||||
period_n = fs/f0
|
||||
|
||||
# Add periods to make N positive
|
||||
while zc_n < 0:
|
||||
zc_n += period_n
|
||||
|
||||
last_zc = None
|
||||
# Mark the zero crossings until we're a half period away
|
||||
# from the end of the window
|
||||
while zc_n < (N - period_n/2):
|
||||
#p.plot(zc_n, C, 'ro')
|
||||
t = t_min + zc_n / fs
|
||||
insert_function([[t * 1e6, f0, A, C]])
|
||||
num_zc += 1
|
||||
last_zc = zc_n
|
||||
zc_n += period_n
|
||||
|
||||
# Advance the window one quarter period past the last marked
|
||||
# zero crossing, or advance the window by half its size if we
|
||||
# didn't mark any.
|
||||
if last_zc is not None:
|
||||
advance = min(last_zc + period_n/4, N)
|
||||
else:
|
||||
advance = N/2
|
||||
#p.plot(advance, C, 'go')
|
||||
#p.show()
|
||||
|
||||
start = int(round(start + advance))
|
||||
|
||||
# Return the number of rows we've processed
|
||||
print "Marked", num_zc, "zero-crossings in", start, "rows"
|
||||
return start
|
||||
|
||||
def sfit4(data, fs):
|
||||
"""(A, f0, phi, C) = sfit4(data, fs)
|
||||
|
||||
Compute 4-parameter (unknown-frequency) least-squares fit to
|
||||
sine-wave data, according to IEEE Std 1241-2010 Annex B
|
||||
|
||||
Input:
|
||||
data vector of input samples
|
||||
fs sampling rate (Hz)
|
||||
|
||||
Output:
|
||||
Parameters [A, f0, phi, C] to fit the equation
|
||||
x[n] = A * cos(f0/fs * 2 * pi * n + phi) + C
|
||||
where n is sample number. Or, as a function of time:
|
||||
x(t) = A * cos(f0 * 2 * pi * t + phi) + C
|
||||
|
||||
by Jim Paris
|
||||
(Verified to match sfit4.m)
|
||||
"""
|
||||
N = len(data)
|
||||
t = linspace(0, (N-1) / fs, N)
|
||||
|
||||
## Estimate frequency using FFT (step b)
|
||||
Fc = fft(data)
|
||||
F = abs(Fc)
|
||||
F[0] = 0 # eliminate DC
|
||||
|
||||
# Find pair of spectral lines with largest amplitude:
|
||||
# resulting values are in F(i) and F(i+1)
|
||||
i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
|
||||
|
||||
# Interpolate FFT to get a better result (from Markus [B37])
|
||||
U1 = real(Fc[i])
|
||||
U2 = real(Fc[i+1])
|
||||
V1 = imag(Fc[i])
|
||||
V2 = imag(Fc[i+1])
|
||||
n = 2 * pi / N
|
||||
ni1 = n * i
|
||||
ni2 = n * (i+1)
|
||||
K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
|
||||
Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
|
||||
Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
|
||||
i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
|
||||
|
||||
# Convert to Hz
|
||||
f0 = i * fs / N
|
||||
|
||||
## Fit it
|
||||
# first guess for A0, B0 using 3-parameter fit (step c)
|
||||
w = 2*pi*f0
|
||||
D = c_[cos(w*t), sin(w*t), ones(N)]
|
||||
s = linalg.lstsq(D, data)[0]
|
||||
|
||||
# Now iterate 6 times (step i)
|
||||
for idx in range(6):
|
||||
D = c_[cos(w*t), sin(w*t), ones(N),
|
||||
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
|
||||
s = linalg.lstsq(D, data)[0] # eqn B.18
|
||||
w = w + s[3] # update frequency estimate
|
||||
|
||||
## Extract results
|
||||
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
|
||||
f0 = w / (2*pi)
|
||||
try:
|
||||
phi = -arctan2(s[1], s[0]) # eqn B.22
|
||||
except TypeError:
|
||||
# something broke down, just return zeros
|
||||
return (0, 0, 0, 0)
|
||||
C = s[2]
|
||||
|
||||
return (A, f0, phi, C)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user