Compare commits
33 Commits
nilmtools-
...
nilmtools-
Author | SHA1 | Date | |
---|---|---|---|
a4d4bc22fc | |||
6090dd6112 | |||
![]() |
9c0d9ad324 | ||
![]() |
8b9c5d4898 | ||
cf2c28b0fb | |||
87a26c907b | |||
def465b57c | |||
0589b8d316 | |||
9c5f07106d | |||
62e11a11c0 | |||
2bdcee2c36 | |||
6dce8c5296 | |||
25c35a56f6 | |||
d610deaef0 | |||
d7d5ccc9a7 | |||
f28753ff5c | |||
c9c2e0d5a8 | |||
5a2a32bec5 | |||
706c3933f9 | |||
cfd1719152 | |||
c62fb45980 | |||
57d856f2fa | |||
5d83d93019 | |||
5f847a0513 | |||
29cd7eb6c7 | |||
62c8af41ea | |||
4f6bc48619 | |||
cf9eb0ed48 | |||
32066fc260 | |||
739da3f973 | |||
83ad18ebf6 | |||
c76d527f95 | |||
b8a73278e7 |
44
Makefile
44
Makefile
@@ -8,22 +8,37 @@ else
|
|||||||
@echo "Try 'make install'"
|
@echo "Try 'make install'"
|
||||||
endif
|
endif
|
||||||
|
|
||||||
test: test_cleanup
|
test: test_insert
|
||||||
|
|
||||||
|
test_pipewatch:
|
||||||
|
nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"
|
||||||
|
|
||||||
|
test_trainola:
|
||||||
|
-nilmtool -u http://bucket/nilmdb remove -s min -e max \
|
||||||
|
/sharon/prep-a-matches
|
||||||
|
nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
|
||||||
|
-nilmtool -u http://bucket/nilmdb remove -s min -e max \
|
||||||
|
/sharon/prep-a-matches
|
||||||
|
nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"
|
||||||
|
|
||||||
|
|
||||||
test_cleanup:
|
test_cleanup:
|
||||||
src/cleanup.py -e extras/cleanup.cfg
|
nilmtools/cleanup.py -e extras/cleanup.cfg
|
||||||
src/cleanup.py extras/cleanup.cfg
|
nilmtools/cleanup.py extras/cleanup.cfg
|
||||||
|
|
||||||
test_insert:
|
test_insert:
|
||||||
@make install >/dev/null
|
nilmtools/insert.py --skip --file --dry-run /foo/bar ~/data/20130311T2100.prep1.gz ~/data/20130311T2100.prep1.gz ~/data/20130311T2200.prep1.gz
|
||||||
src/insert.py --file --dry-run /test/foo </dev/null
|
|
||||||
|
|
||||||
test_copy:
|
test_copy:
|
||||||
@make install >/dev/null
|
nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
|
||||||
src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
|
|
||||||
|
|
||||||
test_prep:
|
/tmp/raw.dat:
|
||||||
@make install >/dev/null
|
octave --eval 'fs = 8000;' \
|
||||||
|
--eval 't = (0:fs*10)*2*pi*60/fs;' \
|
||||||
|
--eval 'raw = transpose([sin(t); 0.3*sin(3*t)+sin(t)]);' \
|
||||||
|
--eval 'save("-ascii","/tmp/raw.dat","raw");'
|
||||||
|
|
||||||
|
test_prep: /tmp/raw.dat
|
||||||
-nilmtool destroy -R /test/raw
|
-nilmtool destroy -R /test/raw
|
||||||
-nilmtool destroy -R /test/sinefit
|
-nilmtool destroy -R /test/sinefit
|
||||||
-nilmtool destroy -R /test/prep
|
-nilmtool destroy -R /test/prep
|
||||||
@@ -31,8 +46,9 @@ test_prep:
|
|||||||
nilmtool create /test/sinefit float32_3
|
nilmtool create /test/sinefit float32_3
|
||||||
nilmtool create /test/prep float32_8
|
nilmtool create /test/prep float32_8
|
||||||
nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
|
nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
|
||||||
src/sinefit.py -c 1 /test/raw /test/sinefit
|
nilmtools/sinefit.py -a 0.5 -c 1 -s '@0' -e '@5000000' /test/raw /test/sinefit
|
||||||
src/prep.py -c 2 /test/raw /test/sinefit /test/prep
|
nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
|
||||||
|
nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
|
||||||
nilmtool extract -s min -e max /test/prep | head -20
|
nilmtool extract -s min -e max /test/prep | head -20
|
||||||
|
|
||||||
test_decimate:
|
test_decimate:
|
||||||
@@ -40,8 +56,8 @@ test_decimate:
|
|||||||
-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
|
-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
|
||||||
-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
|
-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
|
||||||
-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
|
-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
|
||||||
time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
|
time python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
|
||||||
python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
|
python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
|
||||||
|
|
||||||
version:
|
version:
|
||||||
python setup.py version
|
python setup.py version
|
||||||
@@ -63,4 +79,4 @@ clean::
|
|||||||
gitclean::
|
gitclean::
|
||||||
git clean -dXf
|
git clean -dXf
|
||||||
|
|
||||||
.PHONY: all version dist sdist install clean gitclean
|
.PHONY: all version dist sdist install clean gitclean test
|
||||||
|
@@ -6,9 +6,9 @@ Prerequisites:
|
|||||||
|
|
||||||
# Runtime and build environments
|
# Runtime and build environments
|
||||||
sudo apt-get install python2.7 python2.7-dev python-setuptools
|
sudo apt-get install python2.7 python2.7-dev python-setuptools
|
||||||
sudo apt-get install python-numpy python-scipy python-matplotlib
|
sudo apt-get install python-numpy python-scipy python-daemon
|
||||||
|
|
||||||
nilmdb (1.5.0+)
|
nilmdb (1.8.5+)
|
||||||
|
|
||||||
Install:
|
Install:
|
||||||
|
|
||||||
|
10
extras/sample-cron-scripts/capture.sh
Executable file
10
extras/sample-cron-scripts/capture.sh
Executable file
@@ -0,0 +1,10 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Start the ethstream capture using nilm-pipewatch
|
||||||
|
|
||||||
|
# Bail out on errors
|
||||||
|
set -e
|
||||||
|
|
||||||
|
nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
|
||||||
|
"ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
|
||||||
|
"nilm-insert -m 10 -r 8000 --live /sharon/raw"
|
8
extras/sample-cron-scripts/cleanup.cfg
Normal file
8
extras/sample-cron-scripts/cleanup.cfg
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
[/sharon/prep-*]
|
||||||
|
keep = 1y
|
||||||
|
|
||||||
|
[/sharon/raw]
|
||||||
|
keep = 2w
|
||||||
|
|
||||||
|
[/sharon/sinefit]
|
||||||
|
keep = 1y
|
9
extras/sample-cron-scripts/crontab
Normal file
9
extras/sample-cron-scripts/crontab
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
# Install this by running "crontab crontab" (will replace existing crontab)
|
||||||
|
|
||||||
|
# m h dom mon dow cmd
|
||||||
|
|
||||||
|
# Run NilmDB processing every 5 minutes
|
||||||
|
*/5 * * * * chronic /home/nilm/data/process.sh
|
||||||
|
|
||||||
|
# Check the capture process every minute
|
||||||
|
*/1 * * * * chronic /home/nilm/data/capture.sh
|
28
extras/sample-cron-scripts/process.sh
Executable file
28
extras/sample-cron-scripts/process.sh
Executable file
@@ -0,0 +1,28 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Run all necessary processing on NilmDB data.
|
||||||
|
|
||||||
|
# Bail out on errors
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Ensure only one copy of this code runs at a time:
|
||||||
|
LOCKFILE="/tmp/nilmdb-process.lock"
|
||||||
|
exec 99>"$LOCKFILE"
|
||||||
|
if ! flock -n -x 99 ; then
|
||||||
|
echo "NilmDB processing already running, giving up..."
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
trap 'rm -f "$LOCKFILE"' 0
|
||||||
|
|
||||||
|
# sinefit on phase A voltage
|
||||||
|
nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
|
||||||
|
|
||||||
|
# prep on A, B, C with appropriate rotations
|
||||||
|
nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
|
||||||
|
nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
|
||||||
|
nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
|
||||||
|
|
||||||
|
# decimate raw and prep data
|
||||||
|
nilm-decimate-auto /sharon/raw /sharon/prep*
|
||||||
|
|
||||||
|
# run cleanup
|
||||||
|
nilm-cleanup --yes /home/nilm/data/cleanup.cfg
|
29
extras/trainola-test-param-2.js
Normal file
29
extras/trainola-test-param-2.js
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
{ "columns" : [ { "index" : 0, "name" : "P1" },
|
||||||
|
{ "index" : 1, "name" : "Q1" },
|
||||||
|
{ "index" : 2, "name" : "P3" } ],
|
||||||
|
"stream" : "/sharon/prep-a",
|
||||||
|
"url" : "http://bucket.mit.edu/nilmdb",
|
||||||
|
"dest_stream" : "/sharon/prep-a-matches",
|
||||||
|
"start" : 1365153062643133.5,
|
||||||
|
"end" : 1365168814443575.5,
|
||||||
|
"exemplars" : [ { "columns" : [ { "index" : 0,
|
||||||
|
"name" : "P1"
|
||||||
|
} ],
|
||||||
|
"dest_column" : 0,
|
||||||
|
"end" : 1365073657682000,
|
||||||
|
"name" : "Turn ON",
|
||||||
|
"start" : 1365073654321000,
|
||||||
|
"stream" : "/sharon/prep-a",
|
||||||
|
"url" : "http://bucket.mit.edu/nilmdb"
|
||||||
|
},
|
||||||
|
{ "columns" : [ { "index" : 2, "name" : "P3" },
|
||||||
|
{ "index" : 0, "name" : "P1" } ],
|
||||||
|
"dest_column" : 1,
|
||||||
|
"end" : 1365176528818000,
|
||||||
|
"name" : "Type 2 turn ON",
|
||||||
|
"start" : 1365176520030000,
|
||||||
|
"stream" : "/sharon/prep-a",
|
||||||
|
"url" : "http://bucket.mit.edu/nilmdb"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
31
extras/trainola-test-param.js
Normal file
31
extras/trainola-test-param.js
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
{ "url": "http://bucket.mit.edu/nilmdb",
|
||||||
|
"dest_stream": "/sharon/prep-a-matches",
|
||||||
|
"stream": "/sharon/prep-a",
|
||||||
|
"start": 1366111383280463,
|
||||||
|
"end": 1366126163457797,
|
||||||
|
"columns": [ { "name": "P1", "index": 0 },
|
||||||
|
{ "name": "Q1", "index": 1 },
|
||||||
|
{ "name": "P3", "index": 2 } ],
|
||||||
|
"exemplars": [
|
||||||
|
{ "name": "Boiler Pump ON",
|
||||||
|
"url": "http://bucket.mit.edu/nilmdb",
|
||||||
|
"stream": "/sharon/prep-a",
|
||||||
|
"start": 1366260494269078,
|
||||||
|
"end": 1366260608185031,
|
||||||
|
"dest_column": 0,
|
||||||
|
"columns": [ { "name": "P1", "index": 0 },
|
||||||
|
{ "name": "Q1", "index": 1 }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{ "name": "Boiler Pump OFF",
|
||||||
|
"url": "http://bucket.mit.edu/nilmdb",
|
||||||
|
"stream": "/sharon/prep-a",
|
||||||
|
"start": 1366260864215764,
|
||||||
|
"end": 1366260870882998,
|
||||||
|
"dest_column": 1,
|
||||||
|
"columns": [ { "name": "P1", "index": 0 },
|
||||||
|
{ "name": "Q1", "index": 1 }
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
|
|||||||
|
|
||||||
tag_prefix = "nilmtools-"
|
tag_prefix = "nilmtools-"
|
||||||
parentdir_prefix = "nilmtools-"
|
parentdir_prefix = "nilmtools-"
|
||||||
versionfile_source = "src/_version.py"
|
versionfile_source = "nilmtools/_version.py"
|
||||||
|
|
||||||
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
|
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
|
||||||
variables = { "refnames": git_refnames, "full": git_full }
|
variables = { "refnames": git_refnames, "full": git_full }
|
@@ -19,6 +19,10 @@ import re
|
|||||||
import argparse
|
import argparse
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import cStringIO
|
import cStringIO
|
||||||
|
import functools
|
||||||
|
|
||||||
|
class ArgumentError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
class MissingDestination(Exception):
|
class MissingDestination(Exception):
|
||||||
def __init__(self, args, src, dest):
|
def __init__(self, args, src, dest):
|
||||||
@@ -65,6 +69,70 @@ def get_stream_info(client, path):
|
|||||||
return None
|
return None
|
||||||
return StreamInfo(client.geturl(), streams[0])
|
return StreamInfo(client.geturl(), streams[0])
|
||||||
|
|
||||||
|
# Filter processing for a single interval of data.
|
||||||
|
def process_numpy_interval(interval, extractor, inserter, warn_rows,
|
||||||
|
function, args = None):
|
||||||
|
"""For the given 'interval' of data, extract data, process it
|
||||||
|
through 'function', and insert the result.
|
||||||
|
|
||||||
|
'extractor' should be a function like NumpyClient.stream_extract_numpy
|
||||||
|
but with the the interval 'start' and 'end' as the only parameters,
|
||||||
|
e.g.:
|
||||||
|
extractor = functools.partial(NumpyClient.stream_extract_numpy,
|
||||||
|
src_path, layout = l, maxrows = m)
|
||||||
|
|
||||||
|
'inserter' should be a function like NumpyClient.stream_insert_context
|
||||||
|
but with the interval 'start' and 'end' as the only parameters, e.g.:
|
||||||
|
inserter = functools.partial(NumpyClient.stream_insert_context,
|
||||||
|
dest_path)
|
||||||
|
|
||||||
|
If 'warn_rows' is not None, print a warning to stdout when the
|
||||||
|
number of unprocessed rows exceeds this amount.
|
||||||
|
|
||||||
|
See process_numpy for details on 'function' and 'args'.
|
||||||
|
"""
|
||||||
|
if args is None:
|
||||||
|
args = []
|
||||||
|
|
||||||
|
with inserter(interval.start, interval.end) as insert_ctx:
|
||||||
|
insert_func = insert_ctx.insert
|
||||||
|
old_array = np.array([])
|
||||||
|
for new_array in extractor(interval.start, interval.end):
|
||||||
|
# If we still had old data left, combine it
|
||||||
|
if old_array.shape[0] != 0:
|
||||||
|
array = np.vstack((old_array, new_array))
|
||||||
|
else:
|
||||||
|
array = new_array
|
||||||
|
|
||||||
|
# Pass the data to the user provided function
|
||||||
|
processed = function(array, interval, args, insert_func, False)
|
||||||
|
|
||||||
|
# Send any pending data that the user function inserted
|
||||||
|
insert_ctx.send()
|
||||||
|
|
||||||
|
# Save the unprocessed parts
|
||||||
|
if processed >= 0:
|
||||||
|
old_array = array[processed:]
|
||||||
|
else:
|
||||||
|
raise Exception(
|
||||||
|
sprintf("%s return value %s must be >= 0",
|
||||||
|
str(function), str(processed)))
|
||||||
|
|
||||||
|
# Warn if there's too much data remaining
|
||||||
|
if warn_rows is not None and old_array.shape[0] > warn_rows:
|
||||||
|
printf("warning: %d unprocessed rows in buffer\n",
|
||||||
|
old_array.shape[0])
|
||||||
|
|
||||||
|
# Last call for this contiguous interval
|
||||||
|
if old_array.shape[0] != 0:
|
||||||
|
processed = function(old_array, interval, args,
|
||||||
|
insert_func, True)
|
||||||
|
if processed != old_array.shape[0]:
|
||||||
|
# Truncate the interval we're inserting at the first
|
||||||
|
# unprocessed data point. This ensures that
|
||||||
|
# we'll not miss any data when we run again later.
|
||||||
|
insert_ctx.update_end(old_array[processed][0])
|
||||||
|
|
||||||
class Filter(object):
|
class Filter(object):
|
||||||
|
|
||||||
def __init__(self, parser_description = None):
|
def __init__(self, parser_description = None):
|
||||||
@@ -134,63 +202,52 @@ class Filter(object):
|
|||||||
self._parser = parser
|
self._parser = parser
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
def interval_string(self, interval):
|
def set_args(self, url, dest_url, srcpath, destpath, start, end,
|
||||||
return sprintf("[ %s -> %s ]",
|
parsed_args = None, quiet = True):
|
||||||
timestamp_to_human(interval.start),
|
"""Set arguments directly from parameters"""
|
||||||
timestamp_to_human(interval.end))
|
if dest_url is None:
|
||||||
|
dest_url = url
|
||||||
def parse_args(self, argv = None):
|
if url != dest_url:
|
||||||
args = self._parser.parse_args(argv)
|
|
||||||
|
|
||||||
if args.dest_url is None:
|
|
||||||
args.dest_url = args.url
|
|
||||||
if args.url != args.dest_url:
|
|
||||||
self.interhost = True
|
self.interhost = True
|
||||||
|
|
||||||
self._client_src = Client(args.url)
|
self._client_src = Client(url)
|
||||||
self._client_dest = Client(args.dest_url)
|
self._client_dest = Client(dest_url)
|
||||||
|
|
||||||
if (not self.interhost) and (args.srcpath == args.destpath):
|
if (not self.interhost) and (srcpath == destpath):
|
||||||
self._parser.error("source and destination path must be different")
|
raise ArgumentError("source and destination path must be different")
|
||||||
|
|
||||||
# Open and print info about the streams
|
# Open the streams
|
||||||
self.src = get_stream_info(self._client_src, args.srcpath)
|
self.src = get_stream_info(self._client_src, srcpath)
|
||||||
if not self.src:
|
if not self.src:
|
||||||
self._parser.error("source path " + args.srcpath + " not found")
|
raise ArgumentError("source path " + srcpath + " not found")
|
||||||
|
|
||||||
self.dest = get_stream_info(self._client_dest, args.destpath)
|
self.dest = get_stream_info(self._client_dest, destpath)
|
||||||
if not self.dest:
|
if not self.dest:
|
||||||
raise MissingDestination(args, self.src,
|
raise MissingDestination(parsed_args, self.src,
|
||||||
StreamInfo(args.dest_url, [args.destpath]))
|
StreamInfo(dest_url, [destpath]))
|
||||||
|
|
||||||
print "Source:", self.src.string(self.interhost)
|
self.start = start
|
||||||
print " Dest:", self.dest.string(self.interhost)
|
self.end = end
|
||||||
|
|
||||||
if args.dry_run:
|
# Print info
|
||||||
for interval in self.intervals():
|
if not quiet:
|
||||||
print self.interval_string(interval)
|
print "Source:", self.src.string(self.interhost)
|
||||||
raise SystemExit(0)
|
print " Dest:", self.dest.string(self.interhost)
|
||||||
|
|
||||||
|
def parse_args(self, argv = None):
|
||||||
|
"""Parse arguments from a command line"""
|
||||||
|
args = self._parser.parse_args(argv)
|
||||||
|
|
||||||
|
self.set_args(args.url, args.dest_url, args.srcpath, args.destpath,
|
||||||
|
args.start, args.end, quiet = False, parsed_args = args)
|
||||||
|
|
||||||
self.force_metadata = args.force_metadata
|
self.force_metadata = args.force_metadata
|
||||||
|
if args.dry_run:
|
||||||
self.start = args.start
|
for interval in self.intervals():
|
||||||
self.end = args.end
|
print interval.human_string()
|
||||||
|
raise SystemExit(0)
|
||||||
return args
|
return args
|
||||||
|
|
||||||
def _optimize_int(self, it):
|
|
||||||
"""Join and yield adjacent intervals from the iterator 'it'"""
|
|
||||||
saved_int = None
|
|
||||||
for interval in it:
|
|
||||||
if saved_int is not None:
|
|
||||||
if saved_int.end == interval.start:
|
|
||||||
interval.start = saved_int.start
|
|
||||||
else:
|
|
||||||
yield saved_int
|
|
||||||
saved_int = interval
|
|
||||||
if saved_int is not None:
|
|
||||||
yield saved_int
|
|
||||||
|
|
||||||
def intervals(self):
|
def intervals(self):
|
||||||
"""Generate all the intervals that this filter should process"""
|
"""Generate all the intervals that this filter should process"""
|
||||||
self._using_client = True
|
self._using_client = True
|
||||||
@@ -217,12 +274,13 @@ class Filter(object):
|
|||||||
self.src.path, diffpath = self.dest.path,
|
self.src.path, diffpath = self.dest.path,
|
||||||
start = self.start, end = self.end) )
|
start = self.start, end = self.end) )
|
||||||
# Optimize intervals: join intervals that are adjacent
|
# Optimize intervals: join intervals that are adjacent
|
||||||
for interval in self._optimize_int(intervals):
|
for interval in nilmdb.utils.interval.optimize(intervals):
|
||||||
yield interval
|
yield interval
|
||||||
self._using_client = False
|
self._using_client = False
|
||||||
|
|
||||||
# Misc helpers
|
# Misc helpers
|
||||||
def arg_time(self, toparse):
|
@staticmethod
|
||||||
|
def arg_time(toparse):
|
||||||
"""Parse a time string argument"""
|
"""Parse a time string argument"""
|
||||||
try:
|
try:
|
||||||
return nilmdb.utils.time.parse_time(toparse)
|
return nilmdb.utils.time.parse_time(toparse)
|
||||||
@@ -236,8 +294,14 @@ class Filter(object):
|
|||||||
metadata = self._client_dest.stream_get_metadata(self.dest.path)
|
metadata = self._client_dest.stream_get_metadata(self.dest.path)
|
||||||
if not self.force_metadata:
|
if not self.force_metadata:
|
||||||
for key in data:
|
for key in data:
|
||||||
wanted = str(data[key])
|
wanted = data[key]
|
||||||
|
if not isinstance(wanted, basestring):
|
||||||
|
wanted = str(wanted)
|
||||||
val = metadata.get(key, wanted)
|
val = metadata.get(key, wanted)
|
||||||
|
# Force UTF-8 encoding for comparison and display
|
||||||
|
wanted = wanted.encode('utf-8')
|
||||||
|
val = val.encode('utf-8')
|
||||||
|
key = key.encode('utf-8')
|
||||||
if val != wanted and self.dest.rows > 0:
|
if val != wanted and self.dest.rows > 0:
|
||||||
m = "Metadata in destination stream:\n"
|
m = "Metadata in destination stream:\n"
|
||||||
m += " %s = %s\n" % (key, val)
|
m += " %s = %s\n" % (key, val)
|
||||||
@@ -252,14 +316,21 @@ class Filter(object):
|
|||||||
self._client_dest.stream_update_metadata(self.dest.path, data)
|
self._client_dest.stream_update_metadata(self.dest.path, data)
|
||||||
|
|
||||||
# The main filter processing method.
|
# The main filter processing method.
|
||||||
def process_numpy(self, function, args = None, rows = 100000):
|
def process_numpy(self, function, args = None, rows = 100000,
|
||||||
"""For all intervals that exist in self.src but don't exist in
|
intervals = None):
|
||||||
self.dest, call 'function' with a Numpy array corresponding to
|
"""Calls process_numpy_interval for each interval that currently
|
||||||
the data. The data is converted to a Numpy array in chunks of
|
exists in self.src, but doesn't exist in self.dest. It will
|
||||||
'rows' rows at a time.
|
process the data in chunks as follows:
|
||||||
|
|
||||||
|
For each chunk of data, call 'function' with a Numpy array
|
||||||
|
corresponding to the data. The data is converted to a Numpy
|
||||||
|
array in chunks of 'rows' rows at a time.
|
||||||
|
|
||||||
|
If 'intervals' is not None, process those intervals instead of
|
||||||
|
the default list.
|
||||||
|
|
||||||
'function' should be defined as:
|
'function' should be defined as:
|
||||||
def function(data, interval, args, insert_func, final)
|
# def function(data, interval, args, insert_func, final)
|
||||||
|
|
||||||
'data': array of data to process -- may be empty
|
'data': array of data to process -- may be empty
|
||||||
|
|
||||||
@@ -283,56 +354,18 @@ class Filter(object):
|
|||||||
being inserted will be ended at the timestamp of the first
|
being inserted will be ended at the timestamp of the first
|
||||||
unprocessed data point.
|
unprocessed data point.
|
||||||
"""
|
"""
|
||||||
if args is None:
|
|
||||||
args = []
|
|
||||||
extractor = NumpyClient(self.src.url).stream_extract_numpy
|
extractor = NumpyClient(self.src.url).stream_extract_numpy
|
||||||
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
|
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
|
||||||
|
|
||||||
for interval in self.intervals():
|
extractor_func = functools.partial(extractor, self.src.path,
|
||||||
print "Processing", self.interval_string(interval)
|
|
||||||
with inserter(self.dest.path,
|
|
||||||
interval.start, interval.end) as insert_ctx:
|
|
||||||
insert_function = insert_ctx.insert
|
|
||||||
old_array = np.array([])
|
|
||||||
for new_array in extractor(self.src.path,
|
|
||||||
interval.start, interval.end,
|
|
||||||
layout = self.src.layout,
|
layout = self.src.layout,
|
||||||
maxrows = rows):
|
maxrows = rows)
|
||||||
# If we still had old data left, combine it
|
inserter_func = functools.partial(inserter, self.dest.path)
|
||||||
if old_array.shape[0] != 0:
|
|
||||||
array = np.vstack((old_array, new_array))
|
|
||||||
else:
|
|
||||||
array = new_array
|
|
||||||
|
|
||||||
# Pass it to the process function
|
for interval in (intervals or self.intervals()):
|
||||||
processed = function(array, interval, args,
|
print "Processing", interval.human_string()
|
||||||
insert_function, False)
|
process_numpy_interval(interval, extractor_func, inserter_func,
|
||||||
|
rows * 3, function, args)
|
||||||
# Send any pending data
|
|
||||||
insert_ctx.send()
|
|
||||||
|
|
||||||
# Save the unprocessed parts
|
|
||||||
if processed >= 0:
|
|
||||||
old_array = array[processed:]
|
|
||||||
else:
|
|
||||||
raise Exception(
|
|
||||||
sprintf("%s return value %s must be >= 0",
|
|
||||||
str(function), str(processed)))
|
|
||||||
|
|
||||||
# Warn if there's too much data remaining
|
|
||||||
if old_array.shape[0] > 3 * rows:
|
|
||||||
printf("warning: %d unprocessed rows in buffer\n",
|
|
||||||
old_array.shape[0])
|
|
||||||
|
|
||||||
# Last call for this contiguous interval
|
|
||||||
if old_array.shape[0] != 0:
|
|
||||||
processed = function(old_array, interval, args,
|
|
||||||
insert_function, True)
|
|
||||||
if processed != old_array.shape[0]:
|
|
||||||
# Truncate the interval we're inserting at the first
|
|
||||||
# unprocessed data point. This ensures that
|
|
||||||
# we'll not miss any data when we run again later.
|
|
||||||
insert_ctx.update_end(old_array[processed][0])
|
|
||||||
|
|
||||||
def main(argv = None):
|
def main(argv = None):
|
||||||
# This is just a dummy function; actual filters can use the other
|
# This is just a dummy function; actual filters can use the other
|
||||||
@@ -341,7 +374,7 @@ def main(argv = None):
|
|||||||
parser = f.setup_parser()
|
parser = f.setup_parser()
|
||||||
args = f.parse_args(argv)
|
args = f.parse_args(argv)
|
||||||
for i in f.intervals():
|
for i in f.intervals():
|
||||||
print "Generic filter: need to handle", f.interval_string(i)
|
print "Generic filter: need to handle", i.human_string()
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
@@ -53,7 +53,8 @@ def parse_args(argv = None):
|
|||||||
is stepped forward to match 'clock'.
|
is stepped forward to match 'clock'.
|
||||||
|
|
||||||
- If 'data' is running ahead, there is overlap in the data, and an
|
- If 'data' is running ahead, there is overlap in the data, and an
|
||||||
error is raised.
|
error is raised. If '--ignore' is specified, the current file
|
||||||
|
is skipped instead of raising an error.
|
||||||
"""))
|
"""))
|
||||||
parser.add_argument("-u", "--url", action="store",
|
parser.add_argument("-u", "--url", action="store",
|
||||||
default="http://localhost/nilmdb/",
|
default="http://localhost/nilmdb/",
|
||||||
@@ -61,6 +62,8 @@ def parse_args(argv = None):
|
|||||||
group = parser.add_argument_group("Misc options")
|
group = parser.add_argument_group("Misc options")
|
||||||
group.add_argument("-D", "--dry-run", action="store_true",
|
group.add_argument("-D", "--dry-run", action="store_true",
|
||||||
help="Parse files, but don't insert any data")
|
help="Parse files, but don't insert any data")
|
||||||
|
group.add_argument("-s", "--skip", action="store_true",
|
||||||
|
help="Skip files if the data would overlap")
|
||||||
group.add_argument("-m", "--max-gap", action="store", default=10.0,
|
group.add_argument("-m", "--max-gap", action="store", default=10.0,
|
||||||
metavar="SEC", type=float,
|
metavar="SEC", type=float,
|
||||||
help="Max discrepency between clock and data "
|
help="Max discrepency between clock and data "
|
||||||
@@ -235,6 +238,10 @@ def main(argv = None):
|
|||||||
"is %s but clock time is only %s",
|
"is %s but clock time is only %s",
|
||||||
timestamp_to_human(data_ts),
|
timestamp_to_human(data_ts),
|
||||||
timestamp_to_human(clock_ts))
|
timestamp_to_human(clock_ts))
|
||||||
|
if args.skip:
|
||||||
|
printf("%s\n", err)
|
||||||
|
printf("Skipping the remainder of this file\n")
|
||||||
|
break
|
||||||
raise ParseError(filename, err)
|
raise ParseError(filename, err)
|
||||||
|
|
||||||
if (data_ts + max_gap) < clock_ts:
|
if (data_ts + max_gap) < clock_ts:
|
43
nilmtools/median.py
Executable file
43
nilmtools/median.py
Executable file
@@ -0,0 +1,43 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
import nilmtools.filter, scipy.signal
|
||||||
|
|
||||||
|
def main(argv = None):
|
||||||
|
f = nilmtools.filter.Filter()
|
||||||
|
parser = f.setup_parser("Median Filter")
|
||||||
|
group = parser.add_argument_group("Median filter options")
|
||||||
|
group.add_argument("-z", "--size", action="store", type=int, default=25,
|
||||||
|
help = "median filter size (default %(default)s)")
|
||||||
|
group.add_argument("-d", "--difference", action="store_true",
|
||||||
|
help = "store difference rather than filtered values")
|
||||||
|
|
||||||
|
try:
|
||||||
|
args = f.parse_args(argv)
|
||||||
|
except nilmtools.filter.MissingDestination as e:
|
||||||
|
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||||
|
print "Destination %s doesn't exist" % (e.dest.path)
|
||||||
|
print "You could make it with a command like:"
|
||||||
|
print " nilmtool -u %s create %s %s" % (e.dest.url,
|
||||||
|
e.dest.path, e.src.layout)
|
||||||
|
raise SystemExit(1)
|
||||||
|
|
||||||
|
meta = f.client_src.stream_get_metadata(f.src.path)
|
||||||
|
f.check_dest_metadata({ "median_filter_source": f.src.path,
|
||||||
|
"median_filter_size": args.size,
|
||||||
|
"median_filter_difference": repr(args.difference) })
|
||||||
|
|
||||||
|
f.process_numpy(median_filter, args = (args.size, args.difference))
|
||||||
|
|
||||||
|
def median_filter(data, interval, args, insert, final):
|
||||||
|
(size, diff) = args
|
||||||
|
(rows, cols) = data.shape
|
||||||
|
for i in range(cols - 1):
|
||||||
|
filtered = scipy.signal.medfilt(data[:, i+1], size)
|
||||||
|
if diff:
|
||||||
|
data[:, i+1] -= filtered
|
||||||
|
else:
|
||||||
|
data[:, i+1] = filtered
|
||||||
|
insert(data)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
168
nilmtools/pipewatch.py
Executable file
168
nilmtools/pipewatch.py
Executable file
@@ -0,0 +1,168 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
import nilmdb.client
|
||||||
|
from nilmdb.utils.printf import *
|
||||||
|
import nilmdb.utils.lock
|
||||||
|
import nilmtools
|
||||||
|
|
||||||
|
import time
|
||||||
|
import sys
|
||||||
|
import os
|
||||||
|
import argparse
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
import select
|
||||||
|
import signal
|
||||||
|
import Queue
|
||||||
|
import daemon
|
||||||
|
|
||||||
|
def parse_args(argv = None):
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
|
||||||
|
version = nilmtools.__version__,
|
||||||
|
description = """\
|
||||||
|
Pipe data from 'generator' to 'consumer'. This is intended to be
|
||||||
|
executed frequently from cron, and will exit if another copy is
|
||||||
|
already running. If 'generator' or 'consumer' returns an error,
|
||||||
|
or if 'generator' stops sending data for a while, it will exit.
|
||||||
|
|
||||||
|
Intended for use with ethstream (generator) and nilm-insert
|
||||||
|
(consumer). Commands are executed through the shell.
|
||||||
|
""")
|
||||||
|
parser.add_argument("-d", "--daemon", action="store_true",
|
||||||
|
help="Run in background")
|
||||||
|
parser.add_argument("-l", "--lock", metavar="FILENAME", action="store",
|
||||||
|
default=tempfile.gettempdir() +
|
||||||
|
"/nilm-pipewatch.lock",
|
||||||
|
help="Lock file for detecting running instance")
|
||||||
|
parser.add_argument("-t", "--timeout", metavar="SECONDS", action="store",
|
||||||
|
type=float, default=30,
|
||||||
|
help="Restart if no output from " +
|
||||||
|
"generator for this long")
|
||||||
|
group = parser.add_argument_group("commands to execute")
|
||||||
|
group.add_argument("generator", action="store",
|
||||||
|
help="Data generator (e.g. \"ethstream -r 8000\")")
|
||||||
|
group.add_argument("consumer", action="store",
|
||||||
|
help="Data consumer (e.g. \"nilm-insert /foo/bar\")")
|
||||||
|
args = parser.parse_args(argv)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
def reader_thread(queue, fd):
|
||||||
|
# Read from a file descriptor, write to queue.
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
(r, w, x) = select.select([fd], [], [fd], 0.25)
|
||||||
|
if x:
|
||||||
|
raise Exception # generator died?
|
||||||
|
if not r:
|
||||||
|
# short timeout -- just try again. This is to catch the
|
||||||
|
# fd being closed elsewhere, which is only detected
|
||||||
|
# when select restarts.
|
||||||
|
continue
|
||||||
|
data = os.read(fd, 65536)
|
||||||
|
if data == "": # generator EOF
|
||||||
|
raise Exception
|
||||||
|
queue.put(data)
|
||||||
|
except Exception:
|
||||||
|
queue.put(None)
|
||||||
|
|
||||||
|
def watcher_thread(queue, procs):
|
||||||
|
# Put None in the queue if either process dies
|
||||||
|
while True:
|
||||||
|
for p in procs:
|
||||||
|
if p.poll() is not None:
|
||||||
|
queue.put(None)
|
||||||
|
return
|
||||||
|
time.sleep(0.25)
|
||||||
|
|
||||||
|
def pipewatch(args):
|
||||||
|
# Run the processes, etc
|
||||||
|
with open(os.devnull, "r") as devnull:
|
||||||
|
generator = subprocess.Popen(args.generator, shell = True,
|
||||||
|
bufsize = -1, close_fds = True,
|
||||||
|
stdin = devnull,
|
||||||
|
stdout = subprocess.PIPE,
|
||||||
|
stderr = None)
|
||||||
|
consumer = subprocess.Popen(args.consumer, shell = True,
|
||||||
|
bufsize = -11, close_fds = True,
|
||||||
|
stdin = subprocess.PIPE,
|
||||||
|
stdout = None, stderr = None)
|
||||||
|
|
||||||
|
queue = Queue.Queue(maxsize = 32)
|
||||||
|
reader = threading.Thread(target = reader_thread,
|
||||||
|
args = (queue, generator.stdout.fileno()))
|
||||||
|
reader.start()
|
||||||
|
watcher = threading.Thread(target = watcher_thread,
|
||||||
|
args = (queue, [generator, consumer]))
|
||||||
|
watcher.start()
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
data = queue.get(True, args.timeout)
|
||||||
|
if data is None:
|
||||||
|
break
|
||||||
|
consumer.stdin.write(data)
|
||||||
|
except Queue.Empty:
|
||||||
|
# Timeout: kill the generator
|
||||||
|
fprintf(sys.stderr, "pipewatch: timeout\n")
|
||||||
|
generator.terminate()
|
||||||
|
break
|
||||||
|
|
||||||
|
generator.stdout.close()
|
||||||
|
consumer.stdin.close()
|
||||||
|
except IOError:
|
||||||
|
fprintf(sys.stderr, "pipewatch: I/O error\n")
|
||||||
|
|
||||||
|
def kill(proc):
|
||||||
|
# Wait for a process to end, or kill it
|
||||||
|
def poll_timeout(proc, timeout):
|
||||||
|
for x in range(1+int(timeout / 0.1)):
|
||||||
|
if proc.poll() is not None:
|
||||||
|
break
|
||||||
|
time.sleep(0.1)
|
||||||
|
return proc.poll()
|
||||||
|
try:
|
||||||
|
if poll_timeout(proc, 0.5) is None:
|
||||||
|
proc.terminate()
|
||||||
|
if poll_timeout(proc, 0.5) is None:
|
||||||
|
proc.kill()
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
return poll_timeout(proc, 0.5)
|
||||||
|
|
||||||
|
# Wait for them to die, or kill them
|
||||||
|
gret = kill(generator)
|
||||||
|
cret = kill(consumer)
|
||||||
|
|
||||||
|
fprintf(sys.stderr, "pipewatch: generator returned %d, " +
|
||||||
|
"consumer returned %d\n", gret, cret)
|
||||||
|
if gret == 0 and cret == 0:
|
||||||
|
sys.exit(0)
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
def main(argv = None):
|
||||||
|
args = parse_args(argv)
|
||||||
|
|
||||||
|
lockfile = open(args.lock, "w")
|
||||||
|
if not nilmdb.utils.lock.exclusive_lock(lockfile):
|
||||||
|
printf("pipewatch process already running (according to %s)\n",
|
||||||
|
args.lock)
|
||||||
|
sys.exit(0)
|
||||||
|
try:
|
||||||
|
# Run as a daemon if requested, otherwise run directly.
|
||||||
|
if args.daemon:
|
||||||
|
with daemon.DaemonContext(files_preserve = [ lockfile ]):
|
||||||
|
pipewatch(args)
|
||||||
|
else:
|
||||||
|
pipewatch(args)
|
||||||
|
finally:
|
||||||
|
# Clean up lockfile
|
||||||
|
try:
|
||||||
|
os.unlink(args.lock)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
@@ -12,6 +12,7 @@ import scipy.fftpack
|
|||||||
import scipy.signal
|
import scipy.signal
|
||||||
#from matplotlib import pyplot as p
|
#from matplotlib import pyplot as p
|
||||||
import bisect
|
import bisect
|
||||||
|
from nilmdb.utils.interval import Interval
|
||||||
|
|
||||||
def main(argv = None):
|
def main(argv = None):
|
||||||
# Set up argument parser
|
# Set up argument parser
|
||||||
@@ -80,11 +81,22 @@ def main(argv = None):
|
|||||||
f.check_dest_metadata({ "prep_raw_source": f.src.path,
|
f.check_dest_metadata({ "prep_raw_source": f.src.path,
|
||||||
"prep_sinefit_source": sinefit.path,
|
"prep_sinefit_source": sinefit.path,
|
||||||
"prep_column": args.column,
|
"prep_column": args.column,
|
||||||
"prep_rotation": rotation })
|
"prep_rotation": repr(rotation) })
|
||||||
|
|
||||||
# Run the processing function on all data
|
# Find the intersection of the usual set of intervals we'd filter,
|
||||||
|
# and the intervals actually present in sinefit data. This is
|
||||||
|
# what we will process.
|
||||||
|
filter_int = f.intervals()
|
||||||
|
sinefit_int = ( Interval(start, end) for (start, end) in
|
||||||
|
client_sinefit.stream_intervals(
|
||||||
|
args.sinepath, start = f.start, end = f.end) )
|
||||||
|
intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
|
||||||
|
|
||||||
|
# Run the process (using the helper in the filter module)
|
||||||
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
|
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
|
||||||
args.nharm, rotation, args.nshift))
|
args.nharm, rotation, args.nshift),
|
||||||
|
intervals = intervals)
|
||||||
|
|
||||||
|
|
||||||
def process(data, interval, args, insert_function, final):
|
def process(data, interval, args, insert_function, final):
|
||||||
(client, sinefit_path, column, nharm, rotation, nshift) = args
|
(client, sinefit_path, column, nharm, rotation, nshift) = args
|
@@ -1,13 +1,18 @@
|
|||||||
#!/usr/bin/python
|
#!/usr/bin/python
|
||||||
|
|
||||||
# Sine wave fitting. This runs about 5x faster than realtime on raw data.
|
# Sine wave fitting.
|
||||||
|
from nilmdb.utils.printf import *
|
||||||
import nilmtools.filter
|
import nilmtools.filter
|
||||||
import nilmdb.client
|
import nilmdb.client
|
||||||
|
from nilmdb.utils.time import (timestamp_to_human,
|
||||||
|
timestamp_to_seconds,
|
||||||
|
seconds_to_timestamp)
|
||||||
|
|
||||||
from numpy import *
|
from numpy import *
|
||||||
from scipy import *
|
from scipy import *
|
||||||
#import pylab as p
|
#import pylab as p
|
||||||
import operator
|
import operator
|
||||||
|
import sys
|
||||||
|
|
||||||
def main(argv = None):
|
def main(argv = None):
|
||||||
f = nilmtools.filter.Filter()
|
f = nilmtools.filter.Filter()
|
||||||
@@ -59,12 +64,40 @@ def main(argv = None):
|
|||||||
f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
|
f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
|
||||||
args.min_freq, args.max_freq))
|
args.min_freq, args.max_freq))
|
||||||
|
|
||||||
|
class SuppressibleWarning(object):
|
||||||
|
def __init__(self, maxcount = 10, maxsuppress = 100):
|
||||||
|
self.maxcount = maxcount
|
||||||
|
self.maxsuppress = maxsuppress
|
||||||
|
self.count = 0
|
||||||
|
self.last_msg = ""
|
||||||
|
|
||||||
|
def _write(self, sec, msg):
|
||||||
|
if sec:
|
||||||
|
now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
|
||||||
|
else:
|
||||||
|
now = ""
|
||||||
|
sys.stderr.write(now + msg)
|
||||||
|
|
||||||
|
def warn(self, msg, seconds = None):
|
||||||
|
self.count += 1
|
||||||
|
if self.count <= self.maxcount:
|
||||||
|
self._write(seconds, msg)
|
||||||
|
if (self.count - self.maxcount) >= self.maxsuppress:
|
||||||
|
self.reset(seconds)
|
||||||
|
|
||||||
|
def reset(self, seconds = None):
|
||||||
|
if self.count > self.maxcount:
|
||||||
|
self._write(seconds, sprintf("(%d warnings suppressed)\n",
|
||||||
|
self.count - self.maxcount))
|
||||||
|
self.count = 0
|
||||||
|
|
||||||
def process(data, interval, args, insert_function, final):
|
def process(data, interval, args, insert_function, final):
|
||||||
(column, f_expected, a_min, f_min, f_max) = args
|
(column, f_expected, a_min, f_min, f_max) = args
|
||||||
rows = data.shape[0]
|
rows = data.shape[0]
|
||||||
|
|
||||||
# Estimate sampling frequency from timestamps
|
# Estimate sampling frequency from timestamps
|
||||||
fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
|
fs = (rows-1) / (timestamp_to_seconds(data[-1][0]) -
|
||||||
|
timestamp_to_seconds(data[0][0]))
|
||||||
|
|
||||||
# Pull out about 3.5 periods of data at once;
|
# Pull out about 3.5 periods of data at once;
|
||||||
# we'll expect to match 3 zero crossings in each window
|
# we'll expect to match 3 zero crossings in each window
|
||||||
@@ -74,26 +107,31 @@ def process(data, interval, args, insert_function, final):
|
|||||||
if rows < N:
|
if rows < N:
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
warn = SuppressibleWarning(3, 1000)
|
||||||
|
|
||||||
# Process overlapping windows
|
# Process overlapping windows
|
||||||
start = 0
|
start = 0
|
||||||
num_zc = 0
|
num_zc = 0
|
||||||
|
last_inserted_timestamp = None
|
||||||
while start < (rows - N):
|
while start < (rows - N):
|
||||||
this = data[start:start+N, column]
|
this = data[start:start+N, column]
|
||||||
t_min = data[start, 0]/1e6
|
t_min = timestamp_to_seconds(data[start, 0])
|
||||||
t_max = data[start+N-1, 0]/1e6
|
t_max = timestamp_to_seconds(data[start+N-1, 0])
|
||||||
|
|
||||||
# Do 4-parameter sine wave fit
|
# Do 4-parameter sine wave fit
|
||||||
(A, f0, phi, C) = sfit4(this, fs)
|
(A, f0, phi, C) = sfit4(this, fs)
|
||||||
|
|
||||||
# Check bounds. If frequency is too crazy, ignore this window
|
# Check bounds. If frequency is too crazy, ignore this window
|
||||||
if f0 < f_min or f0 > f_max:
|
if f0 < f_min or f0 > f_max:
|
||||||
print "frequency", f0, "outside valid range", f_min, "-", f_max
|
warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
|
||||||
|
str(f0), str(f_min), str(f_max)), t_min)
|
||||||
start += N
|
start += N
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# If amplitude is too low, results are probably just noise
|
# If amplitude is too low, results are probably just noise
|
||||||
if A < a_min:
|
if A < a_min:
|
||||||
print "amplitude", A, "below minimum threshold", a_min
|
warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
|
||||||
|
str(A), str(a_min)), t_min)
|
||||||
start += N
|
start += N
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -116,7 +154,13 @@ def process(data, interval, args, insert_function, final):
|
|||||||
while zc_n < (N - period_n/2):
|
while zc_n < (N - period_n/2):
|
||||||
#p.plot(zc_n, C, 'ro')
|
#p.plot(zc_n, C, 'ro')
|
||||||
t = t_min + zc_n / fs
|
t = t_min + zc_n / fs
|
||||||
insert_function([[t * 1e6, f0, A, C]])
|
if (last_inserted_timestamp is None or
|
||||||
|
t > last_inserted_timestamp):
|
||||||
|
insert_function([[seconds_to_timestamp(t), f0, A, C]])
|
||||||
|
last_inserted_timestamp = t
|
||||||
|
warn.reset(t)
|
||||||
|
else:
|
||||||
|
warn.warn("timestamp overlap\n", t)
|
||||||
num_zc += 1
|
num_zc += 1
|
||||||
last_zc = zc_n
|
last_zc = zc_n
|
||||||
zc_n += period_n
|
zc_n += period_n
|
||||||
@@ -134,7 +178,13 @@ def process(data, interval, args, insert_function, final):
|
|||||||
start = int(round(start + advance))
|
start = int(round(start + advance))
|
||||||
|
|
||||||
# Return the number of rows we've processed
|
# Return the number of rows we've processed
|
||||||
print "Marked", num_zc, "zero-crossings in", start, "rows"
|
warn.reset(last_inserted_timestamp)
|
||||||
|
if last_inserted_timestamp:
|
||||||
|
now = timestamp_to_human(seconds_to_timestamp(
|
||||||
|
last_inserted_timestamp)) + ": "
|
||||||
|
else:
|
||||||
|
now = ""
|
||||||
|
printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
|
||||||
return start
|
return start
|
||||||
|
|
||||||
def sfit4(data, fs):
|
def sfit4(data, fs):
|
304
nilmtools/trainola.py
Executable file
304
nilmtools/trainola.py
Executable file
@@ -0,0 +1,304 @@
|
|||||||
|
#!/usr/bin/python
|
||||||
|
|
||||||
|
from nilmdb.utils.printf import *
|
||||||
|
import nilmdb.client
|
||||||
|
import nilmtools.filter
|
||||||
|
from nilmdb.utils.time import (timestamp_to_human,
|
||||||
|
timestamp_to_seconds,
|
||||||
|
seconds_to_timestamp)
|
||||||
|
from nilmdb.utils import datetime_tz
|
||||||
|
from nilmdb.utils.interval import Interval
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import scipy
|
||||||
|
import scipy.signal
|
||||||
|
from numpy.core.umath_tests import inner1d
|
||||||
|
import nilmrun
|
||||||
|
from collections import OrderedDict
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
import functools
|
||||||
|
import collections
|
||||||
|
|
||||||
|
class DataError(ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def build_column_mapping(colinfo, streaminfo):
|
||||||
|
"""Given the 'columns' list from the JSON data, verify and
|
||||||
|
pull out a dictionary mapping for the column names/numbers."""
|
||||||
|
columns = OrderedDict()
|
||||||
|
for c in colinfo:
|
||||||
|
col_num = c['index'] + 1 # skip timestamp
|
||||||
|
if (c['name'] in columns.keys() or col_num in columns.values()):
|
||||||
|
raise DataError("duplicated columns")
|
||||||
|
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
|
||||||
|
raise DataError("bad column number")
|
||||||
|
columns[c['name']] = col_num
|
||||||
|
if not len(columns):
|
||||||
|
raise DataError("no columns")
|
||||||
|
return columns
|
||||||
|
|
||||||
|
class Exemplar(object):
|
||||||
|
def __init__(self, exinfo, min_rows = 10, max_rows = 100000):
|
||||||
|
"""Given a dictionary entry from the 'exemplars' input JSON,
|
||||||
|
verify the stream, columns, etc. Then, fetch all the data
|
||||||
|
into self.data."""
|
||||||
|
|
||||||
|
self.name = exinfo['name']
|
||||||
|
self.url = exinfo['url']
|
||||||
|
self.stream = exinfo['stream']
|
||||||
|
self.start = exinfo['start']
|
||||||
|
self.end = exinfo['end']
|
||||||
|
self.dest_column = exinfo['dest_column']
|
||||||
|
|
||||||
|
# Get stream info
|
||||||
|
self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
|
||||||
|
self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
|
||||||
|
if not self.info:
|
||||||
|
raise DataError(sprintf("exemplar stream '%s' does not exist " +
|
||||||
|
"on server '%s'", self.stream, self.url))
|
||||||
|
|
||||||
|
# Build up name => index mapping for the columns
|
||||||
|
self.columns = build_column_mapping(exinfo['columns'], self.info)
|
||||||
|
|
||||||
|
# Count points
|
||||||
|
self.count = self.client.stream_count(self.stream, self.start, self.end)
|
||||||
|
|
||||||
|
# Verify count
|
||||||
|
if self.count == 0:
|
||||||
|
raise DataError("No data in this exemplar!")
|
||||||
|
if self.count < min_rows:
|
||||||
|
raise DataError("Too few data points: " + str(self.count))
|
||||||
|
if self.count > max_rows:
|
||||||
|
raise DataError("Too many data points: " + str(self.count))
|
||||||
|
|
||||||
|
# Extract the data
|
||||||
|
datagen = self.client.stream_extract_numpy(self.stream,
|
||||||
|
self.start, self.end,
|
||||||
|
self.info.layout,
|
||||||
|
maxrows = self.count)
|
||||||
|
self.data = list(datagen)[0]
|
||||||
|
|
||||||
|
# Extract just the columns that were specified in self.columns,
|
||||||
|
# skipping the timestamp.
|
||||||
|
extract_columns = [ value for (key, value) in self.columns.items() ]
|
||||||
|
self.data = self.data[:,extract_columns]
|
||||||
|
|
||||||
|
# Fix the column indices in e.columns, since we removed/reordered
|
||||||
|
# columns in self.data
|
||||||
|
for n, k in enumerate(self.columns):
|
||||||
|
self.columns[k] = n
|
||||||
|
|
||||||
|
# Subtract the means from each column
|
||||||
|
self.data = self.data - self.data.mean(axis=0)
|
||||||
|
|
||||||
|
# Get scale factors for each column by computing dot product
|
||||||
|
# of each column with itself.
|
||||||
|
self.scale = inner1d(self.data.T, self.data.T)
|
||||||
|
|
||||||
|
# Ensure a minimum (nonzero) scale and convert to list
|
||||||
|
self.scale = np.maximum(self.scale, [1e-9]).tolist()
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return sprintf("\"%s\" %s [%s] %s rows",
|
||||||
|
self.name, self.stream, ",".join(self.columns.keys()),
|
||||||
|
self.count)
|
||||||
|
|
||||||
|
def peak_detect(data, delta):
|
||||||
|
"""Simple min/max peak detection algorithm, taken from my code
|
||||||
|
in the disagg.m from the 10-8-5 paper"""
|
||||||
|
mins = [];
|
||||||
|
maxs = [];
|
||||||
|
cur_min = (None, np.inf)
|
||||||
|
cur_max = (None, -np.inf)
|
||||||
|
lookformax = False
|
||||||
|
for (n, p) in enumerate(data):
|
||||||
|
if p > cur_max[1]:
|
||||||
|
cur_max = (n, p)
|
||||||
|
if p < cur_min[1]:
|
||||||
|
cur_min = (n, p)
|
||||||
|
if lookformax:
|
||||||
|
if p < (cur_max[1] - delta):
|
||||||
|
maxs.append(cur_max)
|
||||||
|
cur_min = (n, p)
|
||||||
|
lookformax = False
|
||||||
|
else:
|
||||||
|
if p > (cur_min[1] + delta):
|
||||||
|
mins.append(cur_min)
|
||||||
|
cur_max = (n, p)
|
||||||
|
lookformax = True
|
||||||
|
return (mins, maxs)
|
||||||
|
|
||||||
|
def timestamp_to_short_human(timestamp):
|
||||||
|
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
|
||||||
|
return dt.strftime("%H:%M:%S")
|
||||||
|
|
||||||
|
def trainola_matcher(data, interval, args, insert_func, final_chunk):
|
||||||
|
"""Perform cross-correlation match"""
|
||||||
|
( src_columns, dest_count, exemplars ) = args
|
||||||
|
nrows = data.shape[0]
|
||||||
|
|
||||||
|
# We want at least 10% more points than the widest exemplar.
|
||||||
|
widest = max([ x.count for x in exemplars ])
|
||||||
|
if (widest * 1.1) > nrows:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# This is how many points we'll consider valid in the
|
||||||
|
# cross-correlation.
|
||||||
|
valid = nrows + 1 - widest
|
||||||
|
matches = collections.defaultdict(list)
|
||||||
|
|
||||||
|
# Try matching against each of the exemplars
|
||||||
|
for e in exemplars:
|
||||||
|
corrs = []
|
||||||
|
|
||||||
|
# Compute cross-correlation for each column
|
||||||
|
for col_name in e.columns:
|
||||||
|
a = data[:, src_columns[col_name]]
|
||||||
|
b = e.data[:, e.columns[col_name]]
|
||||||
|
corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]
|
||||||
|
|
||||||
|
# Scale by the norm of the exemplar
|
||||||
|
corr = corr / e.scale[e.columns[col_name]]
|
||||||
|
corrs.append(corr)
|
||||||
|
|
||||||
|
# Find the peaks using the column with the largest amplitude
|
||||||
|
biggest = e.scale.index(max(e.scale))
|
||||||
|
peaks_minmax = peak_detect(corrs[biggest], 0.1)
|
||||||
|
peaks = [ p[0] for p in peaks_minmax[1] ]
|
||||||
|
|
||||||
|
# Now look at every peak
|
||||||
|
for row in peaks:
|
||||||
|
# Correlation for each column must be close enough to 1.
|
||||||
|
for (corr, scale) in zip(corrs, e.scale):
|
||||||
|
# The accepted distance from 1 is based on the relative
|
||||||
|
# amplitude of the column. Use a linear mapping:
|
||||||
|
# scale 1.0 -> distance 0.1
|
||||||
|
# scale 0.0 -> distance 1.0
|
||||||
|
distance = 1 - 0.9 * (scale / e.scale[biggest])
|
||||||
|
if abs(corr[row] - 1) > distance:
|
||||||
|
# No match
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
# Successful match
|
||||||
|
matches[row].append(e)
|
||||||
|
|
||||||
|
# Insert matches into destination stream.
|
||||||
|
matched_rows = sorted(matches.keys())
|
||||||
|
out = np.zeros((len(matched_rows), dest_count + 1))
|
||||||
|
|
||||||
|
for n, row in enumerate(matched_rows):
|
||||||
|
# Fill timestamp
|
||||||
|
out[n][0] = data[row, 0]
|
||||||
|
|
||||||
|
# Mark matched exemplars
|
||||||
|
for exemplar in matches[row]:
|
||||||
|
out[n, exemplar.dest_column + 1] = 1.0
|
||||||
|
|
||||||
|
# Insert it
|
||||||
|
insert_func(out)
|
||||||
|
|
||||||
|
# Return how many rows we processed
|
||||||
|
valid = max(valid, 0)
|
||||||
|
printf(" [%s] matched %d exemplars in %d rows\n",
|
||||||
|
timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
|
||||||
|
return valid
|
||||||
|
|
||||||
|
def trainola(conf):
|
||||||
|
print "Trainola", nilmtools.__version__
|
||||||
|
|
||||||
|
# Load main stream data
|
||||||
|
url = conf['url']
|
||||||
|
src_path = conf['stream']
|
||||||
|
dest_path = conf['dest_stream']
|
||||||
|
start = conf['start']
|
||||||
|
end = conf['end']
|
||||||
|
|
||||||
|
# Get info for the src and dest streams
|
||||||
|
src_client = nilmdb.client.numpyclient.NumpyClient(url)
|
||||||
|
src = nilmtools.filter.get_stream_info(src_client, src_path)
|
||||||
|
if not src:
|
||||||
|
raise DataError("source path '" + src_path + "' does not exist")
|
||||||
|
src_columns = build_column_mapping(conf['columns'], src)
|
||||||
|
|
||||||
|
dest_client = nilmdb.client.numpyclient.NumpyClient(url)
|
||||||
|
dest = nilmtools.filter.get_stream_info(dest_client, dest_path)
|
||||||
|
if not dest:
|
||||||
|
raise DataError("destination path '" + dest_path + "' does not exist")
|
||||||
|
|
||||||
|
printf("Source:\n")
|
||||||
|
printf(" %s [%s]\n", src.path, ",".join(src_columns.keys()))
|
||||||
|
printf("Destination:\n")
|
||||||
|
printf(" %s (%s columns)\n", dest.path, dest.layout_count)
|
||||||
|
|
||||||
|
# Pull in the exemplar data
|
||||||
|
exemplars = []
|
||||||
|
for n, exinfo in enumerate(conf['exemplars']):
|
||||||
|
printf("Loading exemplar %d:\n", n)
|
||||||
|
e = Exemplar(exinfo)
|
||||||
|
col = e.dest_column
|
||||||
|
if col < 0 or col >= dest.layout_count:
|
||||||
|
raise DataError(sprintf("bad destination column number %d\n" +
|
||||||
|
"dest stream only has 0 through %d",
|
||||||
|
col, dest.layout_count - 1))
|
||||||
|
printf(" %s, output column %d\n", str(e), col)
|
||||||
|
exemplars.append(e)
|
||||||
|
if len(exemplars) == 0:
|
||||||
|
raise DataError("missing exemplars")
|
||||||
|
|
||||||
|
# Verify that the exemplar columns are all represented in the main data
|
||||||
|
for n, ex in enumerate(exemplars):
|
||||||
|
for col in ex.columns:
|
||||||
|
if col not in src_columns:
|
||||||
|
raise DataError(sprintf("Exemplar %d column %s is not "
|
||||||
|
"available in source data", n, col))
|
||||||
|
|
||||||
|
# Figure out which intervals we should process
|
||||||
|
intervals = ( Interval(s, e) for (s, e) in
|
||||||
|
src_client.stream_intervals(src_path,
|
||||||
|
diffpath = dest_path,
|
||||||
|
start = start, end = end) )
|
||||||
|
intervals = nilmdb.utils.interval.optimize(intervals)
|
||||||
|
|
||||||
|
# Do the processing
|
||||||
|
rows = 100000
|
||||||
|
extractor = functools.partial(src_client.stream_extract_numpy,
|
||||||
|
src.path, layout = src.layout, maxrows = rows)
|
||||||
|
inserter = functools.partial(dest_client.stream_insert_numpy_context,
|
||||||
|
dest.path)
|
||||||
|
start = time.time()
|
||||||
|
processed_time = 0
|
||||||
|
printf("Processing intervals:\n")
|
||||||
|
for interval in intervals:
|
||||||
|
printf("%s\n", interval.human_string())
|
||||||
|
nilmtools.filter.process_numpy_interval(
|
||||||
|
interval, extractor, inserter, rows * 3,
|
||||||
|
trainola_matcher, (src_columns, dest.layout_count, exemplars))
|
||||||
|
processed_time += (timestamp_to_seconds(interval.end) -
|
||||||
|
timestamp_to_seconds(interval.start))
|
||||||
|
elapsed = max(time.time() - start, 1e-3)
|
||||||
|
|
||||||
|
printf("Done. Processed %.2f seconds per second.\n",
|
||||||
|
processed_time / elapsed)
|
||||||
|
|
||||||
|
def main(argv = None):
|
||||||
|
import simplejson as json
|
||||||
|
import sys
|
||||||
|
|
||||||
|
if argv is None:
|
||||||
|
argv = sys.argv[1:]
|
||||||
|
if len(argv) != 1:
|
||||||
|
raise DataError("need one argument, either a dictionary or JSON string")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Passed in a JSON string (e.g. on the command line)
|
||||||
|
conf = json.loads(argv[0])
|
||||||
|
except TypeError as e:
|
||||||
|
# Passed in the config dictionary (e.g. from NilmRun)
|
||||||
|
conf = argv[0]
|
||||||
|
|
||||||
|
return trainola(conf)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
11
setup.py
11
setup.py
@@ -30,7 +30,7 @@ except ImportError:
|
|||||||
# Versioneer manages version numbers from git tags.
|
# Versioneer manages version numbers from git tags.
|
||||||
# https://github.com/warner/python-versioneer
|
# https://github.com/warner/python-versioneer
|
||||||
import versioneer
|
import versioneer
|
||||||
versioneer.versionfile_source = 'src/_version.py'
|
versioneer.versionfile_source = 'nilmtools/_version.py'
|
||||||
versioneer.versionfile_build = 'nilmtools/_version.py'
|
versioneer.versionfile_build = 'nilmtools/_version.py'
|
||||||
versioneer.tag_prefix = 'nilmtools-'
|
versioneer.tag_prefix = 'nilmtools-'
|
||||||
versioneer.parentdir_prefix = 'nilmtools-'
|
versioneer.parentdir_prefix = 'nilmtools-'
|
||||||
@@ -61,14 +61,14 @@ setup(name='nilmtools',
|
|||||||
long_description = "NILM Database Tools",
|
long_description = "NILM Database Tools",
|
||||||
license = "Proprietary",
|
license = "Proprietary",
|
||||||
author_email = 'jim@jtan.com',
|
author_email = 'jim@jtan.com',
|
||||||
install_requires = [ 'nilmdb >= 1.6.0',
|
install_requires = [ 'nilmdb >= 1.8.5',
|
||||||
'numpy',
|
'numpy',
|
||||||
'scipy',
|
'scipy',
|
||||||
'matplotlib',
|
'python-daemon >= 1.5',
|
||||||
|
#'matplotlib',
|
||||||
],
|
],
|
||||||
packages = [ 'nilmtools',
|
packages = [ 'nilmtools',
|
||||||
],
|
],
|
||||||
package_dir = { 'nilmtools': 'src' },
|
|
||||||
entry_points = {
|
entry_points = {
|
||||||
'console_scripts': [
|
'console_scripts': [
|
||||||
'nilm-decimate = nilmtools.decimate:main',
|
'nilm-decimate = nilmtools.decimate:main',
|
||||||
@@ -79,6 +79,9 @@ setup(name='nilmtools',
|
|||||||
'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
|
'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
|
||||||
'nilm-sinefit = nilmtools.sinefit:main',
|
'nilm-sinefit = nilmtools.sinefit:main',
|
||||||
'nilm-cleanup = nilmtools.cleanup:main',
|
'nilm-cleanup = nilmtools.cleanup:main',
|
||||||
|
'nilm-median = nilmtools.median:main',
|
||||||
|
'nilm-trainola = nilmtools.trainola:main',
|
||||||
|
'nilm-pipewatch = nilmtools.pipewatch:main',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
zip_safe = False,
|
zip_safe = False,
|
||||||
|
Reference in New Issue
Block a user