Compare commits

..

102 Commits

Author SHA1 Message Date
e99027db46 Freeze requirements 2020-08-06 18:14:24 -04:00
cfc66b6847 Fix flake8 errors throughout code
This found a small number of real bugs too, for example,
this one that looked weird because of a 2to3 conversion,
but was wrong both before and after:
-        except IndexError as TypeError:
+        except (IndexError, TypeError):
2020-08-06 17:58:41 -04:00
4f3b1d3c60 Cleanup Makefile 2020-08-06 17:32:06 -04:00
9d49d39ff9 Flesh out remaining tests and Python 3 fixes 2020-08-06 17:29:30 -04:00
fe87c3fab4 Fix error detection in nilm-sinefit, and improve test coverage for math.py 2020-08-06 14:53:42 -04:00
8fd511b5df Add nilm-pipewatch test and update for Python 3 2020-08-06 14:28:57 -04:00
8c7713076b Finish trainola testing and porting to Python 3 2020-08-06 13:36:28 -04:00
2c892a9df7 Start test for trainola 2020-08-05 17:08:48 -04:00
0d952584ea Update versioneer 2020-08-05 17:04:58 -04:00
78dc640444 Fix #! at top of shell scripts for py3 and venvs 2020-08-05 17:04:28 -04:00
752d326b77 Add nilm-prep test and updates for Python 3 2020-08-05 16:29:16 -04:00
9f49875969 Add nilm-cleanup test, update for Python 3
Also fix a bug when reporting bad units on "keep" configuration key
2020-08-05 15:32:04 -04:00
05be240564 Add nilm-median test, and update for Python 3 2020-08-05 15:30:28 -04:00
7bed742957 Add nilm-sinefit test, and update for Python 3 2020-08-05 14:26:20 -04:00
3a8c04e04a Add nilm-insert test, and update for Python 3 2020-08-05 14:25:16 -04:00
0fa46c09a9 Add nilm-decimate-auto test 2020-08-05 14:25:16 -04:00
aae4d79eaf Add --max argument for nilm-decimate-auto
This is instead of hardcoding 500
2020-08-05 14:25:16 -04:00
bef2054357 Fix nilm-decimate-auto for Python 3 2020-08-05 14:25:16 -04:00
d8df408ce1 Add nilm-decimate test, and fix support for int64 types 2020-08-05 14:25:16 -04:00
453c015f5e No need to manually encode output for Python 3 2020-08-05 14:25:16 -04:00
feb12ecfc1 Add nilm-copy-wildcard test 2020-08-05 14:25:16 -04:00
060bd8fee3 Fix test order 2020-08-05 14:25:16 -04:00
b1f2a3c5d5 Replace self.raise() with self.fail() + self.contain() in tests 2020-08-04 23:37:33 -04:00
c15b53924d Use multiprocessing to start multiple servers in tests
Cherrypy uses a bunch of globals, so use multiprocessing to let us
spawn a few that are totally independent.
2020-08-04 23:37:24 -04:00
0f9f0ded71 Add test framework, and initial test for nilm-copy 2020-08-04 13:58:07 -04:00
f507e793a2 Run 2to3 over all code 2020-08-04 10:44:02 -04:00
0c1a3449cb Start Python 3 conversion 2020-08-04 10:42:00 -04:00
0cf2db6c5e Fix divide by zero in sinefit 2014-02-14 15:56:52 -05:00
f530edd8a0 sfit4: if interpolated DFT fails, use peak 2013-08-16 15:36:39 -04:00
4d946bee79 Set shell and path in sample cron script 2013-08-16 15:36:20 -04:00
13ceb91999 Add test_sinefit makefile target 2013-08-16 15:36:11 -04:00
dab9625296 Run fsck at startup 2013-08-09 16:03:14 -04:00
3e7527ab57 Support -h and -v in nilm-trainola 2013-08-08 16:30:08 -04:00
31b6d82dfc Make 'make test' do nothing from command line 2013-08-07 20:19:39 -04:00
077010ba3a Store nshift in prep metadata 2013-08-07 20:19:28 -04:00
77751a8529 Fix typo in help text 2013-08-07 18:39:19 -04:00
9c711300a2 Add short form of --force-metadata, -F 2013-08-06 23:07:36 -04:00
74cf34e2ad Update sharon cleanup.cfg 2013-08-06 22:48:06 -04:00
120bf58b85 Support --nometa option for copy_one and copy_wildcard 2013-08-06 22:47:16 -04:00
c26daa9a3b Update crontab 2013-08-03 11:23:43 -04:00
6993f5c886 Fix process termination in nilm-pipewatch 2013-08-03 11:13:30 -04:00
Sharon NILM
dd69f3e51d Update process.sh 2013-08-02 23:19:14 -04:00
dc26e32b6e Make interhost, force_metadata private to Filter 2013-08-02 23:14:19 -04:00
981f23ff14 Better documentation for callback function 2013-08-02 23:14:19 -04:00
492445a469 Split off useful math functions to math.py 2013-08-02 17:27:39 -04:00
33c3586bea trainola: suppress peaks if larger ones are nearby
Might fix the problem Mark noticed where turn-off transients
are erroneously matching the drop that follows startup transients.
2013-07-31 19:12:16 -04:00
c1e0f8ffbc Fix bug in copy_one 2013-07-31 14:47:16 -04:00
d2853bdb0e Add test case for bad trainola detections 2013-07-30 20:35:54 -04:00
a4d4bc22fc Add --skip option to nilm-insert 2013-07-30 18:25:47 -04:00
6090dd6112 prep: only process intervals present in both raw & sinefit 2013-07-30 14:55:06 -04:00
Sharon NILM
9c0d9ad324 Sample scripts from Sharon 2013-07-29 18:37:55 -04:00
Sharon NILM
8b9c5d4898 Fix daemon dependency 2013-07-29 17:40:51 -04:00
cf2c28b0fb Add --daemon flag 2013-07-29 17:16:18 -04:00
87a26c907b Watch for process termination too 2013-07-29 15:08:49 -04:00
def465b57c Improve pipewatch; add nilm-pipewatch script 2013-07-29 14:58:15 -04:00
0589b8d316 start of pipewatch util 2013-07-29 14:10:56 -04:00
9c5f07106d Don't need python-pip 2013-07-20 16:15:29 -04:00
62e11a11c0 Fix issue with column ordering in the exemplars
If the max scale in the exemplar was a column we weren't using, it
would bail out when looking for that correlation later.  Change things
around so exemplars in RAM only keep around the columns we care about.
2013-07-18 22:51:27 -04:00
2bdcee2c36 More helpful error if exemplar stream doesn't exist 2013-07-15 15:19:52 -04:00
6dce8c5296 More output 2013-07-11 18:56:53 -04:00
25c35a56f6 Trainola inserts into the destination stream now 2013-07-10 12:59:39 -04:00
d610deaef0 More trainola work 2013-07-10 11:38:32 -04:00
d7d5ccc9a7 More filter cleanup 2013-07-09 19:27:20 -04:00
f28753ff5c Move process_numpy_interval outside the class 2013-07-09 18:40:49 -04:00
c9c2e0d5a8 Improve split between process_numpy and process_numpy_interval 2013-07-09 18:09:05 -04:00
5a2a32bec5 WIP on trainola improvements 2013-07-09 17:56:26 -04:00
706c3933f9 Add trainola from nilmrun 2013-07-09 17:55:57 -04:00
cfd1719152 Use nilmdb.utils.interval.optimize; bump nilmdb min version 2013-07-09 17:53:04 -04:00
c62fb45980 Makefile cleanup; add nilm-trainola binary 2013-07-09 16:53:47 -04:00
57d856f2fa Split filter.py internals up a little more
This makes it easier to use the filter stuff from other code, but it's
also turning it into more of a spaghetti nightmare.  Might not be
worth continuing down this path.
2013-07-09 16:52:00 -04:00
5d83d93019 Rename src/ directory to nilmtools/ 2013-07-08 11:54:13 -04:00
5f847a0513 Split process_numpy innards process_numpy_interval 2013-07-03 12:07:22 -04:00
29cd7eb6c7 Improve test_prep target in Makefile 2013-07-03 12:06:50 -04:00
62c8af41ea Cleanup comments 2013-06-06 15:34:23 -04:00
4f6bc48619 sinefit: include timestamps on marking output too 2013-05-11 11:00:31 -04:00
cf9eb0ed48 Improve sinefit resiliancy 2013-05-10 14:19:55 -04:00
32066fc260 Remove hard matplotlib dependency 2013-05-09 13:17:36 -04:00
739da3f973 Add median filter 2013-05-08 23:36:50 -04:00
83ad18ebf6 Fix non-string arguments to metadata_check 2013-05-08 12:49:38 -04:00
c76d527f95 Fix unicode handling in filter metadata match 2013-05-07 12:40:53 -04:00
b8a73278e7 Always store metadata rotation as a string 2013-04-29 14:25:11 -04:00
ce0691d6c4 sineefit: Change sfit4 to fit to \sin instead of \cos
And adjust the period locator accordingly.
Fitting \sin is the same mathematically, it's just conceptually more
straightforward since we're locating zero crossings anyway.
2013-04-27 18:12:20 -04:00
4da658e960 sinefit: move initial estimate into the main iteration loop
Just a little less code.  Same results.
2013-04-27 17:50:23 -04:00
8ab31eafc2 Allow shorthand method for creating an option-less parser.
This is mostly just intended to make a simple filter example shorter.
2013-04-21 16:53:28 -04:00
979ab13bff Force fs to be a float in sfit4 2013-04-17 17:58:15 -04:00
f4fda837ae Bump required nilmdb version to 1.6.0 2013-04-11 11:55:11 -04:00
5547d266d0 filter: Don't include trailing unprocessed data in the inserted intervals 2013-04-11 11:53:17 -04:00
372e977e4a Reverse cleanup order to handle interruptions better 2013-04-10 18:38:41 -04:00
640a680704 Increase default min amplitude in sinefit 2013-04-10 17:09:52 -04:00
2e74e6cd63 Skip over data if we aren't able to process any. Change output format 2013-04-10 17:01:07 -04:00
de2a794e00 Support wildcards in nilm-decimate-auto 2013-04-10 16:05:16 -04:00
065a40f265 sinefit: add minimum amplitude check 2013-04-10 15:33:51 -04:00
65fa43aff1 sinefit: catch all errors in sfit4 2013-04-10 14:36:50 -04:00
57c23c3792 sinefit: allow user to override min/max frequency detection 2013-04-10 14:36:40 -04:00
d4c8e4acb4 Include rotation in metadata 2013-04-10 14:36:05 -04:00
fd1b33401f Require a --yes argument before actually cleaning data 2013-04-09 20:13:38 -04:00
4c748ec00c Fix minor bugs 2013-04-09 20:08:25 -04:00
b72d6b6908 Warn if column count is wrong for this nharm value 2013-04-09 19:59:59 -04:00
80d642e52e Change nilm-cleanup config file format, tweak output 2013-04-09 19:43:41 -04:00
001b89b1d2 Support multiple shifted FFTs per period in nilm-prep.
New option --nshift controls how many shifted FFT windows to perform
per period.  "nilm-prep -N 2" is similar to old prep behavior.  Note
that this is redundant information and takes up extra storage space,
though.
2013-04-09 18:53:27 -04:00
f978823505 Fix prep scaling and fix comments 2013-04-09 17:44:13 -04:00
ffd6675979 Remove outdated code 2013-04-08 19:46:16 -04:00
73 changed files with 48938 additions and 1615 deletions

12
.coveragerc Normal file
View File

@@ -0,0 +1,12 @@
# -*- conf -*-
[run]
branch = True
[report]
exclude_lines =
pragma: no cover
if 0:
if __name__ == "__main__":
omit = nilmtools/_version.py
show_missing = True

4
.gitignore vendored
View File

@@ -1,3 +1,4 @@
.coverage
oldprep
newprep
*.dat
@@ -5,5 +6,6 @@ build/
*.pyc
dist/
*.egg-info/
MANIFEST.in
.eggs/
tests/testdb*
MANIFEST

8
MANIFEST.in Normal file
View File

@@ -0,0 +1,8 @@
# Root
include README.md
include setup.py
include versioneer.py
include Makefile
# Version
include nilmtools/_version.py

View File

@@ -1,59 +1,45 @@
#URL="http://bucket.mit.edu:8080/nilmdb"
URL="http://localhost/nilmdb"
all:
ifeq ($(INSIDE_EMACS), t)
@make test
else
@echo "Try 'make install'"
endif
test:
src/decimate.py
test_insert:
@make install >/dev/null
src/insert.py --file --dry-run /test/foo </dev/null
test_copy:
@make install >/dev/null
src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
test_prep:
@make install >/dev/null
src/prep.py -c 3 \
/lees-compressor/no-leak/raw \
/lees-compressor/no-leak/sinefit \
/lees-compressor/no-leak/prep \
-s '2013-02-19 18:00:00' \
-r 0
test_decimate:
-@nilmtool destroy /lees-compressor/no-leak/raw/4 || true
-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
# By default, run the tests.
all: test
version:
python setup.py version
python3 setup.py version
build:
python3 setup.py build_ext --inplace
dist: sdist
sdist:
python setup.py sdist
python3 setup.py sdist
install:
python setup.py install
python3 setup.py install
develop:
python setup.py develop
python3 setup.py develop
ctrl: flake
flake:
flake8 nilmtools
lint:
pylint3 --rcfile=setup.cfg nilmtools
test:
ifneq ($(INSIDE_EMACS),)
# Use the slightly more flexible script
python3 setup.py build_ext --inplace
python3 tests/runtests.py
else
# Let setup.py check dependencies, build stuff, and run the test
python3 setup.py nosetests
endif
clean::
find . -name '*pyc' | xargs rm -f
rm -rf nilmtools.egg-info/ build/ MANIFEST.in
find . -name '*.pyc' -o -name '__pycache__' -print0 | xargs -0 rm -rf
rm -f .coverage
rm -rf nilmtools.egg-info/ build/ .eggs
gitclean::
git clean -dXf
.PHONY: all version dist sdist install clean gitclean
.PHONY: all version dist sdist install test
.PHONY: ctrl lint flake clean gitclean

42
README.md Normal file
View File

@@ -0,0 +1,42 @@
# nilmtools: Tools and utilities for NilmDB
Tools and utilities for interacting with the NILM Database, or writing
programs that interact with the NILM database.
by Jim Paris <jim@jtan.com>
## Prerequisites:
# Runtime and build environments
sudo apt-get install python3
# Create a new Python virtual environment to isolate deps.
python3 -m venv ../venv
source ../venv/bin/activate # run "deactivate" to leave
# Install all Python dependencies
pip3 install -r requirements.txt
## Install:
Install it into the virtual environment
python3 setup.py install
If you want to instead install it system-wide, you will also need to
install the requirements system-wide:
sudo pip3 install -r requirements.txt
sudo python3 setup.py install
## Building new tools:
The tools in this package are meant to be installed with `python3
setup.py install`. If you want to make a new one, an easier way to
develop would be to first install this package, and then copy a
specific script like `src/sinefit.py` to a new location, and modify it
as desired.
To add a tool to the package, place it in `src/` and add the
appropriate configuration to `setup.py`.

View File

@@ -1,27 +0,0 @@
nilmtools: Tools and utilities for interacting with the NILM Database,
or writing programs that interact with the NILM database.
by Jim Paris <jim@jtan.com>
Prerequisites:
# Runtime and build environments
sudo apt-get install python2.7 python2.7-dev python-setuptools
sudo apt-get install python-numpy python-scipy python-matplotlib
nilmdb (1.5.0+)
Install:
python setup.py install
Building new tools:
The tools in this package are meant to be installed with
"python setup.py install". If you want to make a new one,
an easier way to develop would be to first install this package,
and then copy a specific script like "src/sinefit.py" to a new
location, and modify it as desired.
To add a tool to the package, place it in "src/" and add the
appropriate configuration to "setup.py".

22
extras/cleanup.cfg Normal file
View File

@@ -0,0 +1,22 @@
[/lees-compressor/no-leak/prep]
keep = 2d
rate = 60
[*/raw]
keep = 2d
[*/something]
rate = 10
[*/sinefit]
keep = 1w
decimated = False
[/test/raw]
keep = 0.01d
[/test/sinefit]
keep = 0.01d
[/test/prep]
keep = 0.01d

View File

@@ -0,0 +1,10 @@
#!/bin/bash
# Start the ethstream capture using nilm-pipewatch
# Bail out on errors
set -e
nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
"ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
"nilm-insert -m 10 -r 8000 --live /sharon/raw"

View File

@@ -0,0 +1,9 @@
[/sharon/prep-*]
keep = 1y
[/sharon/raw]
keep = 2w
[/sharon/sinefit]
keep = 1y
decimated = false

View File

@@ -0,0 +1,15 @@
# Install this by running "crontab crontab" (will replace existing crontab)
SHELL=/bin/bash
PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin
# m h dom mon dow cmd
# Run NilmDB processing every 5 minutes
*/5 * * * * chronic /home/nilm/data/process.sh
# Try frequently restarting the capture process in case it died
*/5 * * * * chronic /home/nilm/data/capture.sh
# Run fsck at startup
@reboot chronic nilmdb-fsck --fix --no-data /home/nilm/data/db/

View File

@@ -0,0 +1,32 @@
#!/bin/bash
# Run all necessary processing on NilmDB data.
# Bail out on errors
set -e
# Ensure only one copy of this code runs at a time:
LOCKFILE="/tmp/nilmdb-process.lock"
exec 99>"$LOCKFILE"
if ! flock -n -x 99 ; then
echo "NilmDB processing already running, giving up..."
exit 0
fi
trap 'rm -f "$LOCKFILE"' 0
# redirect stdout/stderr to log, but keep it on the console too
exec > >(tee /home/nilm/data/process.log)
exec 2> >(tee -a /home/nilm/data/process.log >&2)
echo "sinefit on phase A voltage"
nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
echo "prep on A, B, C with appropriate rotations"
nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
echo "decimate raw and prep data"
nilm-decimate-auto /sharon/raw /sharon/prep*
echo "run cleanup"
nilm-cleanup --yes /home/nilm/data/cleanup.cfg

View File

@@ -0,0 +1,29 @@
{ "columns" : [ { "index" : 0, "name" : "P1" },
{ "index" : 1, "name" : "Q1" },
{ "index" : 2, "name" : "P3" } ],
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb",
"dest_stream" : "/sharon/prep-a-matches",
"start" : 1365153062643133.5,
"end" : 1365168814443575.5,
"exemplars" : [ { "columns" : [ { "index" : 0,
"name" : "P1"
} ],
"dest_column" : 0,
"end" : 1365073657682000,
"name" : "Turn ON",
"start" : 1365073654321000,
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb"
},
{ "columns" : [ { "index" : 2, "name" : "P3" },
{ "index" : 0, "name" : "P1" } ],
"dest_column" : 1,
"end" : 1365176528818000,
"name" : "Type 2 turn ON",
"start" : 1365176520030000,
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb"
}
]
}

View File

@@ -0,0 +1,40 @@
{
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"dest_stream": "/test/jim",
"start": 1364184839901599,
"end": 1364184942407610.2,
"columns": [ { "index": 0, "name": "P1" } ],
"exemplars": [
{
"name": "A - True DBL Freezer ON",
"dest_column": 0,
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"columns": [ { "index": 0, "name": "P1" } ],
"start": 1365277707649000,
"end": 1365277710705000
},
{
"name": "A - Boiler 1 Fan OFF",
"dest_column": 1,
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"columns": [ { "index": 0, "name": "P1" } ],
"start": 1364188370735000,
"end": 1364188373819000
},
{
"name": "A - True DBL Freezer OFF",
"dest_column": 2,
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"columns": [ { "index": 0, "name": "P1" } ],
"start": 1365278087982000,
"end": 1365278089340000
}
]
}

View File

@@ -0,0 +1,31 @@
{ "url": "http://bucket.mit.edu/nilmdb",
"dest_stream": "/sharon/prep-a-matches",
"stream": "/sharon/prep-a",
"start": 1366111383280463,
"end": 1366126163457797,
"columns": [ { "name": "P1", "index": 0 },
{ "name": "Q1", "index": 1 },
{ "name": "P3", "index": 2 } ],
"exemplars": [
{ "name": "Boiler Pump ON",
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"start": 1366260494269078,
"end": 1366260608185031,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 },
{ "name": "Q1", "index": 1 }
]
},
{ "name": "Boiler Pump OFF",
"url": "http://bucket.mit.edu/nilmdb",
"stream": "/sharon/prep-a",
"start": 1366260864215764,
"end": 1366260870882998,
"dest_column": 1,
"columns": [ { "name": "P1", "index": 0 },
{ "name": "Q1", "index": 1 }
]
}
]
}

View File

520
nilmtools/_version.py Normal file
View File

@@ -0,0 +1,520 @@
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.18 (https://github.com/warner/python-versioneer)
"""Git implementation of _version.py."""
import errno
import os
import re
import subprocess
import sys
def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440"
cfg.tag_prefix = "nilmtools-"
cfg.parentdir_prefix = "nilmtools-"
cfg.versionfile_source = "nilmtools/_version.py"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY = {}
HANDLERS = {}
def register_vcs_handler(vcs, method): # decorator
"""Decorator to mark a method as the handler for a particular VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = p.communicate()[0].strip()
if sys.version_info[0] >= 3:
stdout = stdout.decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, p.returncode
return stdout, p.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
(str(rootdirs), parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
print("picking %s" % r)
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=True)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
"--always", "--long",
"--match", "%s*" % tag_prefix],
cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
% (full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
cwd=root)
pieces["distance"] = int(count_out) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
cwd=root)[0].strip()
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_pre(pieces):
"""TAG[.post.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post.devDISTANCE
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += ".post.dev%d" % pieces["distance"]
else:
# exception #1
rendered = "0.post.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Eexceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)
return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}
def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}

267
nilmtools/cleanup.py Executable file
View File

@@ -0,0 +1,267 @@
#!/usr/bin/env python3
from nilmdb.utils.printf import printf, fprintf, sprintf
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp)
from nilmdb.utils.diskusage import human_size
from nilmdb.utils.interval import Interval
import nilmdb.client
import nilmdb.client.numpyclient
import nilmtools
import argparse
import configparser
import sys
import collections
import fnmatch
import re
import os
def warn(msg, *args):
fprintf(sys.stderr, "warning: " + msg + "\n", *args)
class TimePeriod(object):
_units = {'h': ('hour', 60*60),
'd': ('day', 60*60*24),
'w': ('week', 60*60*24*7),
'm': ('month', 60*60*24*30),
'y': ('year', 60*60*24*365)}
def __init__(self, val):
for u in self._units:
if val.endswith(u):
self.unit = self._units[u][0]
self.scale = self._units[u][1]
self.count = float(val[:-len(u)])
break
else:
raise ValueError("unknown units: " + val)
def seconds(self):
return self.count * self.scale
def describe_seconds(self, seconds):
count = seconds / self.scale
units = self.unit if count == 1 else (self.unit + "s")
if count == int(count):
return sprintf("%d %s", count, units)
else:
return sprintf("%.2f %s", count, units)
def __str__(self):
return self.describe_seconds(self.seconds())
class StreamCleanupConfig(object):
def __init__(self, info):
self.path = info[0]
self.layout = info[1]
if info[4] != 0 and info[5] != 0:
self.rate = info[4] / timestamp_to_seconds(info[5])
else:
self.rate = None
self.keep = None
self.clean_decimated = True
self.decimated_from = None
self.also_clean_paths = []
def main(argv=None):
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Clean up old data from streams using a configuration file to specify
which data to remove.
The format of the config file is as follows:
[/stream/path]
keep = 3w # keep up to 3 weeks of data
rate = 8000 # optional, used for the --estimate option
decimated = false # whether to delete decimated data too (default true)
[*/prep]
keep = 3.5m # or 2520h or 105d or 15w or 0.29y
The suffix for 'keep' is 'h' for hours, 'd' for days, 'w' for weeks,
'm' for months, or 'y' for years.
Streams paths may include wildcards. If a path is matched by more than
one config section, data from the last config section counts.
Decimated streams (paths containing '~decim-') are treated specially:
- They don't match wildcards
- When deleting data from a parent stream, data is also deleted
from its decimated streams, unless decimated=false
Rate is optional and is only used for the --estimate option.
""")
parser.add_argument("-v", "--version", action="version",
version=nilmtools.__version__)
def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
parser.add_argument("-u", "--url", action="store", default=def_url,
help="NilmDB server URL (default: %(default)s)")
parser.add_argument("-y", "--yes", action="store_true",
default=False,
help="Actually remove the data (default: no)")
parser.add_argument("-e", "--estimate", action="store_true",
default=False,
help="Estimate how much disk space will be used")
parser.add_argument("configfile", type=argparse.FileType('r'),
help="Configuration file")
args = parser.parse_args(argv)
# Parse config file
config = configparser.RawConfigParser()
config.readfp(args.configfile)
# List all streams
client = nilmdb.client.Client(args.url)
streamlist = client.stream_list(extended=True)
# Create config objects
streams = collections.OrderedDict()
for s in streamlist:
streams[s[0]] = StreamCleanupConfig(s)
m = re.search(r"^(.*)~decim-[0-9]+$", s[0])
if m:
streams[s[0]].decimated_from = m.group(1)
# Build up configuration
for section in config.sections():
matched = False
for path in streams.keys():
# Decimated streams only allow exact matches
if streams[path].decimated_from and path != section:
continue
if not fnmatch.fnmatch(path, section):
continue
matched = True
options = config.options(section)
# Keep period (days, weeks, months, years)
if 'keep' in options:
streams[path].keep = TimePeriod(config.get(section, 'keep'))
options.remove('keep')
# Rate
if 'rate' in options:
streams[path].rate = config.getfloat(section, 'rate')
options.remove('rate')
# Decimated
if 'decimated' in options:
val = config.getboolean(section, 'decimated')
streams[path].clean_decimated = val
options.remove('decimated')
for leftover in options:
warn("option '%s' for '%s' is unknown", leftover, section)
if not matched:
warn("config for '%s' did not match any existing streams", section)
# List all decimated streams in the parent stream's info
for path in list(streams.keys()):
src = streams[path].decimated_from
if src and src in streams:
if streams[src].clean_decimated:
streams[src].also_clean_paths.append(path)
del streams[path]
# Warn about streams that aren't getting cleaned up
for path in list(streams.keys()):
if streams[path].keep is None or streams[path].keep.seconds() < 0:
warn("no config for existing stream '%s'", path)
del streams[path]
if args.estimate:
# Estimate disk usage
total = 0
for path in list(streams.keys()):
rate = streams[path].rate
if not rate or rate < 0:
warn("unable to estimate disk usage for stream '%s' because "
"the data rate is unknown", path)
continue
printf("%s:\n", path)
layout = streams[path].layout
dtype = nilmdb.client.numpyclient.layout_to_dtype(layout)
per_row = dtype.itemsize
per_sec = per_row * rate
printf("%17s: %s per row, %s rows per second\n",
"base rate",
human_size(per_row),
round(rate, 1))
printf("%17s: %s per hour, %s per day\n",
"base size",
human_size(per_sec * 3600),
human_size(per_sec * 3600 * 24))
# If we'll be cleaning up decimated data, add an
# estimation for how much room decimated data takes up.
if streams[path].clean_decimated:
d_layout = "float32_" + str(3*(int(layout.split('_')[1])))
d_dtype = nilmdb.client.numpyclient.layout_to_dtype(d_layout)
# Assume the decimations will be a factor of 4
# sum_{k=0..inf} (rate / (n^k)) * d_dtype.itemsize
d_per_row = d_dtype.itemsize
factor = 4.0
d_per_sec = (d_per_row *
(rate / factor) *
(1 / (1 - (1/factor))))
per_sec += d_per_sec
printf("%17s: %s per hour, %s per day\n",
"with decimation",
human_size(per_sec * 3600),
human_size(per_sec * 3600 * 24))
keep = per_sec * streams[path].keep.seconds()
printf("%17s: %s\n\n",
"keep " + str(streams[path].keep), human_size(keep))
total += keep
printf("Total estimated disk usage for these streams:\n")
printf(" %s\n", human_size(total))
raise SystemExit(0)
# Do the cleanup
for path in streams:
printf("%s: keep %s\n", path, streams[path].keep)
# Figure out the earliest timestamp we should keep.
intervals = [Interval(start, end) for (start, end) in
reversed(list(client.stream_intervals(path)))]
total = 0
keep = seconds_to_timestamp(streams[path].keep.seconds())
for i in intervals:
total += i.end - i.start
if total <= keep:
continue
remove_before = i.start + (total - keep)
break
else:
printf(" nothing to do (only %s of data present)\n",
streams[path].keep.describe_seconds(
timestamp_to_seconds(total)))
continue
printf(" removing data before %s\n",
timestamp_to_human(remove_before))
# Clean in reverse order. Since we only use the primary stream and not
# the decimated streams to figure out which data to remove, removing
# the primary stream last means that we might recover more nicely if
# we are interrupted and restarted.
clean_paths = list(reversed(streams[path].also_clean_paths)) + [path]
for p in clean_paths:
printf(" removing from %s\n", p)
if args.yes:
client.stream_remove(p, None, remove_before)
# All done
if not args.yes:
printf("Note: specify --yes to actually perform removals\n")
return
if __name__ == "__main__":
main()

View File

@@ -1,30 +1,31 @@
#!/usr/bin/python
#!/usr/bin/env python3
# This is called copy_one instead of copy to avoid name conflicts with
# the Python standard library.
import nilmtools.filter
import nilmdb.client
from nilmdb.client.numpyclient import NumpyClient
import numpy as np
import sys
def main(argv = None):
def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Copy a stream")
parser.add_argument('-n', '--nometa', action='store_true',
help="Don't copy or check metadata")
# Parse arguments
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
print "Source is %s (%s)" % (e.src.path, e.src.layout)
print "Destination %s doesn't exist" % (e.dest.path)
print "You could make it with a command like:"
print " nilmtool -u %s create %s %s" % (e.dest.url,
e.dest.path, e.src.layout)
print("Source is %s (%s)" % (e.src.path, e.src.layout))
print("Destination %s doesn't exist" % (e.dest.path))
print("You could make it with a command like:")
print(" nilmtool -u %s create %s %s" % (e.dest.url,
e.dest.path, e.src.layout))
raise SystemExit(1)
# Copy metadata
if not args.nometa:
meta = f.client_src.stream_get_metadata(f.src.path)
f.check_dest_metadata(meta)
@@ -32,10 +33,11 @@ def main(argv = None):
extractor = NumpyClient(f.src.url).stream_extract_numpy
inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
for i in f.intervals():
print "Processing", f.interval_string(i)
print("Processing", i.human_string())
with inserter(f.dest.path, i.start, i.end) as insert_ctx:
for data in extractor(f.src.path, i.start, i.end):
insert_ctx.insert(data)
if __name__ == "__main__":
main()

View File

@@ -1,21 +1,23 @@
#!/usr/bin/python
#!/usr/bin/env python3
# Copy streams between NilmDB servers with wildcards
import nilmtools.filter
import nilmtools.copy_one
import nilmdb.client
import argparse
import fnmatch
def main(argv = None):
def main(argv=None):
f = nilmtools.filter.Filter()
# Reuse filter's parser, since it handles most options we need.
parser = f.setup_parser(description = """\
parser = f.setup_parser(description="""\
Copy all streams matching the given wildcard from one host to another.
Example: %(prog)s -u http://host1/nilmdb -U http://host2/nilmdb /sharon/*
""", skip_paths = True)
""", skip_paths=True)
parser.add_argument('-n', '--nometa', action='store_true',
help="Don't copy or check metadata")
parser.add_argument("path", action="store", nargs="+",
help='Wildcard paths to copy')
args = parser.parse_args(argv)
@@ -27,13 +29,13 @@ def main(argv = None):
client_dest = nilmdb.client.Client(args.dest_url)
if client_src.geturl() == client_dest.geturl():
parser.error("source and destination URL must be different")
print "Source URL:", client_src.geturl()
print " Dest URL:", client_dest.geturl()
print("Source URL:", client_src.geturl())
print(" Dest URL:", client_dest.geturl())
# Find matching streams
matched = []
for path in args.path:
matched.extend([s for s in client_src.stream_list(extended = True)
matched.extend([s for s in client_src.stream_list(extended=True)
if fnmatch.fnmatch(s[0], path)
and s not in matched])
@@ -42,20 +44,22 @@ def main(argv = None):
src = nilmtools.filter.StreamInfo(client_src.geturl(), stream)
dest = nilmtools.filter.get_stream_info(client_dest, src.path)
if not dest:
print "Creating destination stream", src.path
print("Creating destination stream", src.path)
client_dest.stream_create(src.path, src.layout)
# Copy them all by running the "copy" tool as if it were
# invoked from the command line.
for stream in matched:
new_argv = ["--url", client_src.geturl(),
"--dest-url", client_dest.geturl() ]
"--dest-url", client_dest.geturl()]
if args.start:
new_argv.extend(["--start", "@" + repr(args.start)])
if args.end:
new_argv.extend(["--end", "@" + repr(args.end)])
if args.dry_run:
new_argv.extend(["--dry-run"])
if args.nometa:
new_argv.extend(["--nometa"])
if args.force_metadata:
new_argv.extend(["--force-metadata"])
new_argv.extend([stream[0], stream[0]])
@@ -63,8 +67,9 @@ def main(argv = None):
nilmtools.copy_one.main(new_argv)
except SystemExit as e:
# Ignore SystemExit which could be raised on --dry-run
if e.code != 0:
if e.code != 0: # pragma: no cover (shouldn't happen)
raise
if __name__ == "__main__":
main()

View File

@@ -1,11 +1,10 @@
#!/usr/bin/python
#!/usr/bin/env python3
import nilmtools.filter
import nilmdb.client
import numpy as np
import operator
def main(argv = None):
def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Decimate a stream")
group = parser.add_argument_group("Decimate options")
@@ -20,31 +19,34 @@ def main(argv = None):
# a recommended layout.
src = e.src
dest = e.dest
print "Source is %s (%s)" % (src.path, src.layout)
print "Destination %s doesn't exist" % (dest.path)
print("Source is %s (%s)" % (src.path, src.layout))
print("Destination %s doesn't exist" % (dest.path))
if "decimate_source" in f.client_src.stream_get_metadata(src.path):
rec = src.layout
elif 'int32' in src.layout_type or 'float64' in src.layout_type:
elif ('int32' in src.layout_type or
'int64' in src.layout_type or
'float64' in src.layout_type):
rec = 'float64_' + str(src.layout_count * 3)
else:
rec = 'float32_' + str(src.layout_count * 3)
print "You could make it with a command like:"
print " nilmtool -u %s create %s %s" % (e.dest.url,
e.dest.path, rec)
print("You could make it with a command like:")
print(" nilmtool -u %s create %s %s" % (e.dest.url,
e.dest.path, rec))
raise SystemExit(1)
if not (args.factor >= 2):
raise Exception("factor needs to be 2 or more")
f.check_dest_metadata({ "decimate_source": f.src.path,
"decimate_factor": args.factor })
f.check_dest_metadata({"decimate_source": f.src.path,
"decimate_factor": args.factor})
# If source is decimated, we have to decimate a bit differently
if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
again = True
else:
again = False
f.process_numpy(decimate, args = (args.factor, again))
f.process_numpy(decimate, args=(args.factor, again))
def decimate(data, interval, args, insert_function, final):
"""Decimate data"""
@@ -68,18 +70,19 @@ def decimate(data, interval, args, insert_function, final):
# Discard extra rows that aren't a multiple of factor
n = n // factor * factor
data = data[:n,:]
data = data[:n, :]
# Reshape it into 3D so we can process 'factor' rows at a time
data = data.reshape(n // factor, factor, m)
# Fill the result
out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
np.min(data[:,:,min_col], axis=1),
np.max(data[:,:,max_col], axis=1) ]
out = np.c_[np.mean(data[:, :, mean_col], axis=1),
np.min(data[:, :, min_col], axis=1),
np.max(data[:, :, max_col], axis=1)]
insert_function(out)
return n
if __name__ == "__main__":
main()

112
nilmtools/decimate_auto.py Executable file
View File

@@ -0,0 +1,112 @@
#!/usr/bin/env python3
import os
import nilmtools.filter
import nilmtools.decimate
import nilmdb.client
import argparse
import fnmatch
def main(argv=None):
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Automatically create multiple decimations from a single source
stream, continuing until the last decimated level contains fewer
than 500 points total.
Wildcards and multiple paths are accepted. Decimated paths are
ignored when matching wildcards.
""")
def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
parser.add_argument("-u", "--url", action="store", default=def_url,
help="NilmDB server URL (default: %(default)s)")
parser.add_argument("-f", "--factor", action="store", default=4, type=int,
help='Decimation factor (default: %(default)s)')
parser.add_argument("-m", "--max", action="store", default=500, type=int,
help='Maximum number of points in last level ' +
'(default: %(default)s)')
parser.add_argument("-F", "--force-metadata", action="store_true",
default=False,
help="Force metadata changes if the dest "
"doesn't match")
parser.add_argument("-v", "--version", action="version",
version=nilmtools.__version__)
parser.add_argument("path", action="store", nargs='+',
help='Path of base stream')
args = parser.parse_args(argv)
if args.max < 0:
print("error: bad max, must be nonnegative")
raise SystemExit(1)
# Pull out info about the base stream
client = nilmdb.client.Client(args.url)
# Find list of paths to process
streams = [str(s[0]) for s in client.stream_list()]
streams = [s for s in streams if "~decim-" not in s]
paths = []
for path in args.path:
new = fnmatch.filter(streams, str(path))
if not new:
print("error: no stream matched path:", path)
raise SystemExit(1)
paths.extend(new)
for path in paths:
do_decimation(client, args, path)
def do_decimation(client, args, path):
print("Decimating", path)
info = nilmtools.filter.get_stream_info(client, path)
if not info: # pragma: no cover (only good paths passed above)
raise Exception("path " + path + " not found")
meta = client.stream_get_metadata(path)
if "decimate_source" in meta:
print("Stream", path, "was decimated from", meta["decimate_source"])
print("You need to pass the base stream instead")
raise SystemExit(1)
# Figure out the type we should use for decimated streams
if ('int32' in info.layout_type or
'int64' in info.layout_type or
'float64' in info.layout_type):
decimated_type = 'float64_' + str(info.layout_count * 3)
else:
decimated_type = 'float32_' + str(info.layout_count * 3)
# Now do the decimations until we have few enough points
factor = 1
while True:
print("Level", factor, "decimation has", info.rows, "rows")
if info.rows <= args.max:
break
factor *= args.factor
new_path = "%s~decim-%d" % (path, factor)
# Create the stream if needed
new_info = nilmtools.filter.get_stream_info(client, new_path)
if not new_info:
print("Creating stream", new_path)
client.stream_create(new_path, decimated_type)
# Run the decimation as if it were run from the commandline
new_argv = ["-u", args.url,
"-f", str(args.factor)]
if args.force_metadata:
new_argv.extend(["--force-metadata"])
new_argv.extend([info.path, new_path])
nilmtools.decimate.main(new_argv)
# Update info using the newly decimated stream
info = nilmtools.filter.get_stream_info(client, new_path)
return
if __name__ == "__main__":
main()

393
nilmtools/filter.py Normal file
View File

@@ -0,0 +1,393 @@
#!/usr/bin/env python3
import nilmdb.client
from nilmdb.client import Client
from nilmdb.client.numpyclient import NumpyClient
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.interval import Interval
import nilmtools
import os
import argparse
import numpy as np
import functools
class ArgumentError(Exception):
pass
class MissingDestination(Exception):
def __init__(self, args, src, dest):
self.parsed_args = args
self.src = src
self.dest = dest
Exception.__init__(self, f"destination path {dest.path} not found")
class StreamInfo(object):
def __init__(self, url, info):
self.url = url
self.info = info
try:
self.path = info[0]
self.layout = info[1]
self.layout_type = self.layout.split('_')[0]
self.layout_count = int(self.layout.split('_')[1])
self.total_count = self.layout_count + 1
self.timestamp_min = info[2]
self.timestamp_max = info[3]
self.rows = info[4]
self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
except (IndexError, TypeError):
pass
def string(self, interhost):
"""Return stream info as a string. If interhost is true,
include the host URL."""
if interhost:
return sprintf("[%s] ", self.url) + str(self)
return str(self)
def __str__(self):
"""Return stream info as a string."""
return sprintf("%s (%s), %.2fM rows, %.2f hours",
self.path, self.layout, self.rows / 1e6,
self.seconds / 3600.0)
def get_stream_info(client, path):
"""Return a StreamInfo object about the given path, or None if it
doesn't exist"""
streams = client.stream_list(path, extended=True)
if len(streams) != 1:
return None
return StreamInfo(client.geturl(), streams[0])
# Filter processing for a single interval of data.
def process_numpy_interval(interval, extractor, inserter, warn_rows,
function, args=None):
"""For the given 'interval' of data, extract data, process it
through 'function', and insert the result.
'extractor' should be a function like NumpyClient.stream_extract_numpy
but with the the interval 'start' and 'end' as the only parameters,
e.g.:
extractor = functools.partial(NumpyClient.stream_extract_numpy,
src_path, layout = l, maxrows = m)
'inserter' should be a function like NumpyClient.stream_insert_context
but with the interval 'start' and 'end' as the only parameters, e.g.:
inserter = functools.partial(NumpyClient.stream_insert_context,
dest_path)
If 'warn_rows' is not None, print a warning to stdout when the
number of unprocessed rows exceeds this amount.
See process_numpy for details on 'function' and 'args'.
"""
if args is None:
args = []
with inserter(interval.start, interval.end) as insert_ctx:
insert_func = insert_ctx.insert
old_array = np.array([])
for new_array in extractor(interval.start, interval.end):
# If we still had old data left, combine it
if old_array.shape[0] != 0:
array = np.vstack((old_array, new_array))
else:
array = new_array
# Pass the data to the user provided function
processed = function(array, interval, args, insert_func, False)
# Send any pending data that the user function inserted
insert_ctx.send()
# Save the unprocessed parts
if processed >= 0:
old_array = array[processed:]
else:
raise Exception(
sprintf("%s return value %s must be >= 0",
str(function), str(processed)))
# Warn if there's too much data remaining
if warn_rows is not None and old_array.shape[0] > warn_rows:
printf("warning: %d unprocessed rows in buffer\n",
old_array.shape[0])
# Last call for this contiguous interval
if old_array.shape[0] != 0:
processed = function(old_array, interval, args,
insert_func, True)
if processed != old_array.shape[0]:
# Truncate the interval we're inserting at the first
# unprocessed data point. This ensures that
# we'll not miss any data when we run again later.
insert_ctx.update_end(old_array[processed][0])
def example_callback_function(data, interval, args, insert_func, final):
"""Example of the signature for the function that gets passed
to process_numpy_interval.
'data': array of data to process -- may be empty
'interval': overall interval we're processing (but not necessarily
the interval of this particular chunk of data)
'args': opaque arguments passed to process_numpy
'insert_func': function to call in order to insert array of data.
Should be passed a 2-dimensional array of data to insert.
Data timestamps must be within the provided interval.
'final': True if this is the last bit of data for this
contiguous interval, False otherwise.
Return value of 'function' is the number of data rows processed.
Unprocessed data will be provided again in a subsequent call
(unless 'final' is True).
If unprocessed data remains after 'final' is True, the interval
being inserted will be ended at the timestamp of the first
unprocessed data point.
"""
raise NotImplementedError("example_callback_function does nothing")
class Filter(object):
def __init__(self, parser_description=None):
self._parser = None
self._client_src = None
self._client_dest = None
self._using_client = False
self.src = None
self.dest = None
self.start = None
self.end = None
self._interhost = False
self._force_metadata = False
self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
if parser_description is not None:
self.setup_parser(parser_description)
self.parse_args()
@property
def client_src(self):
if self._using_client:
raise Exception("Filter src client is in use; make another")
return self._client_src
@property
def client_dest(self):
if self._using_client:
raise Exception("Filter dest client is in use; make another")
return self._client_dest
def setup_parser(self, description="Filter data", skip_paths=False):
parser = argparse.ArgumentParser(
formatter_class=argparse.RawDescriptionHelpFormatter,
description=description)
group = parser.add_argument_group("General filter arguments")
group.add_argument("-u", "--url", action="store",
default=self.def_url,
help="Server URL (default: %(default)s)")
group.add_argument("-U", "--dest-url", action="store",
help="Destination server URL "
"(default: same as source)")
group.add_argument("-D", "--dry-run", action="store_true",
default=False,
help="Just print intervals that would be "
"processed")
group.add_argument("-q", "--quiet", action="store_true",
default=False,
help="Don't print source and dest stream info")
group.add_argument("-F", "--force-metadata", action="store_true",
default=False,
help="Force metadata changes if the dest "
"doesn't match")
group.add_argument("-s", "--start",
metavar="TIME", type=self.arg_time,
help="Starting timestamp for intervals "
"(free-form, inclusive)")
group.add_argument("-e", "--end",
metavar="TIME", type=self.arg_time,
help="Ending timestamp for intervals "
"(free-form, noninclusive)")
group.add_argument("-v", "--version", action="version",
version=nilmtools.__version__)
if not skip_paths:
# Individual filter scripts might want to add these arguments
# themselves, to include multiple sources in a different order
# (for example). "srcpath" and "destpath" arguments must exist,
# though.
group.add_argument("srcpath", action="store",
help="Path of source stream, eg. /foo/bar")
group.add_argument("destpath", action="store",
help="Path of destination stream, eg. /foo/bar")
self._parser = parser
return parser
def set_args(self, url, dest_url, srcpath, destpath, start, end,
parsed_args=None, quiet=True):
"""Set arguments directly from parameters"""
if dest_url is None:
dest_url = url
if url != dest_url:
self._interhost = True
self._client_src = Client(url)
self._client_dest = Client(dest_url)
if (not self._interhost) and (srcpath == destpath):
raise ArgumentError(
"source and destination path must be different")
# Open the streams
self.src = get_stream_info(self._client_src, srcpath)
if not self.src:
raise ArgumentError("source path " + srcpath + " not found")
self.dest = get_stream_info(self._client_dest, destpath)
if not self.dest:
raise MissingDestination(parsed_args, self.src,
StreamInfo(dest_url, [destpath]))
self.start = start
self.end = end
# Print info
if not quiet:
print("Source:", self.src.string(self._interhost))
print(" Dest:", self.dest.string(self._interhost))
def parse_args(self, argv=None):
"""Parse arguments from a command line"""
args = self._parser.parse_args(argv)
self.set_args(args.url, args.dest_url, args.srcpath, args.destpath,
args.start, args.end, quiet=args.quiet, parsed_args=args)
self._force_metadata = args.force_metadata
if args.dry_run:
for interval in self.intervals():
print(interval.human_string())
raise SystemExit(0)
return args
def intervals(self):
"""Generate all the intervals that this filter should process"""
self._using_client = True
if self._interhost:
# Do the difference ourselves
s_intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start=self.start, end=self.end))
d_intervals = (Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start=self.start, end=self.end))
intervals = nilmdb.utils.interval.set_difference(s_intervals,
d_intervals)
else:
# Let the server do the difference for us
intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath=self.dest.path,
start=self.start, end=self.end))
# Optimize intervals: join intervals that are adjacent
for interval in nilmdb.utils.interval.optimize(intervals):
yield interval
self._using_client = False
# Misc helpers
@staticmethod
def arg_time(toparse):
"""Parse a time string argument"""
try:
return nilmdb.utils.time.parse_time(toparse)
except ValueError as e:
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
str(e), toparse))
def check_dest_metadata(self, data):
"""See if the metadata jives, and complain if it doesn't. For
each key in data, if the stream contains the key, it must match
values. If the stream does not contain the key, it is created."""
metadata = self._client_dest.stream_get_metadata(self.dest.path)
if not self._force_metadata:
for key in data:
wanted = data[key]
if not isinstance(wanted, str):
wanted = str(wanted)
val = metadata.get(key, wanted)
if val != wanted and self.dest.rows > 0:
m = "Metadata in destination stream:\n"
m += " %s = %s\n" % (key, val)
m += "doesn't match desired data:\n"
m += " %s = %s\n" % (key, wanted)
m += "Refusing to change it. To prevent this error, "
m += "change or delete the metadata with nilmtool,\n"
m += "remove existing data from the stream, or "
m += "retry with --force-metadata."
raise Exception(m)
# All good -- write the metadata in case it's not already there
self._client_dest.stream_update_metadata(self.dest.path, data)
# The main filter processing method.
def process_numpy(self, function, args=None, rows=100000,
intervals=None):
"""Calls process_numpy_interval for each interval that currently
exists in self.src, but doesn't exist in self.dest. It will
process the data in chunks as follows:
For each chunk of data, call 'function' with a Numpy array
corresponding to the data. The data is converted to a Numpy
array in chunks of 'rows' rows at a time.
If 'intervals' is not None, process those intervals instead of
the default list.
'function' should be defined with the same interface as
nilmtools.filter.example_callback_function. See the
documentation of that for details. 'args' are passed to
'function'.
"""
extractor = NumpyClient(self.src.url).stream_extract_numpy
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
extractor_func = functools.partial(extractor, self.src.path,
layout=self.src.layout,
maxrows=rows)
inserter_func = functools.partial(inserter, self.dest.path)
for interval in (intervals or self.intervals()):
print("Processing", interval.human_string())
process_numpy_interval(interval, extractor_func, inserter_func,
rows * 3, function, args)
def main(argv=None):
# This is just a dummy function; actual filters can use the other
# functions to prepare stuff, and then do something with the data.
f = Filter()
parser = f.setup_parser() # noqa: F841
args = f.parse_args(argv) # noqa: F841
for i in f.intervals():
print("Generic filter: need to handle", i.human_string())
if __name__ == "__main__":
main()

View File

@@ -1,29 +1,29 @@
#!/usr/bin/python
#!/usr/bin/env python3
import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp,
rate_to_period, now as time_now)
import os
import nilmtools
import time
import sys
import re
import argparse
import subprocess
import textwrap
class ParseError(Exception):
def __init__(self, filename, error):
msg = filename + ": " + error
super(ParseError, self).__init__(msg)
def parse_args(argv = None):
def parse_args(argv=None):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
version = nilmtools.__version__,
description = textwrap.dedent("""\
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
Insert large amount of data from an external source like ethstream.
This code tracks two timestamps:
@@ -53,14 +53,20 @@ def parse_args(argv = None):
is stepped forward to match 'clock'.
- If 'data' is running ahead, there is overlap in the data, and an
error is raised.
error is raised. If '--skip' is specified, the current file
is skipped instead of raising an error.
"""))
parser.add_argument("-u", "--url", action="store",
default="http://localhost/nilmdb/",
def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
parser.add_argument("-u", "--url", action="store", default=def_url,
help="NilmDB server URL (default: %(default)s)")
parser.add_argument("-v", "--version", action="version",
version=nilmtools.__version__)
group = parser.add_argument_group("Misc options")
group.add_argument("-D", "--dry-run", action="store_true",
help="Parse files, but don't insert any data")
group.add_argument("-s", "--skip", action="store_true",
help="Skip files if the data would overlap")
group.add_argument("-m", "--max-gap", action="store", default=10.0,
metavar="SEC", type=float,
help="Max discrepency between clock and data "
@@ -95,7 +101,7 @@ def parse_args(argv = None):
help="Path of stream, e.g. /foo/bar")
group = parser.add_argument_group("Input files")
group.add_argument("infile", type=argparse.FileType('r'), nargs='*',
group.add_argument("infile", type=argparse.FileType('rb'), nargs='*',
default=[sys.stdin],
help="Input files (default: stdin)")
@@ -123,7 +129,8 @@ def parse_args(argv = None):
return args
def main(argv = None):
def main(argv=None):
args = parse_args(argv)
client = nilmdb.client.Client(args.url)
@@ -133,6 +140,7 @@ def main(argv = None):
data_ts_inc = 0
data_ts_rate = args.rate
data_ts_delta = 0
def get_data_ts():
if args.delta:
return data_ts_base + data_ts_delta
@@ -168,7 +176,7 @@ def main(argv = None):
# decompress.
if filename.endswith(".gz"):
p = subprocess.Popen(["gzip", "-dc"],
stdin = f, stdout = subprocess.PIPE)
stdin=f, stdout=subprocess.PIPE)
f = p.stdout
# Try to get a real timestamp from the filename
@@ -181,17 +189,11 @@ def main(argv = None):
except ValueError:
pass
truncated_lines = 0
# Read each line
for line in f:
# Once in a while a line might be truncated, if we're
# at the end of a file. Ignore it, but if we ignore
# too many, bail out.
if line[-1] != '\n':
truncated_lines += 1
if truncated_lines > 3:
raise ParseError(filename, "too many short lines")
# The last line in the file may be truncated.
# Ignore it; we shouldn't ever see more than one at the end.
if line[-1] != b'\n'[0]:
printf("Ignoring short line in %s\n", filename)
continue
@@ -200,13 +202,16 @@ def main(argv = None):
continue
# If line starts with a comment, look for a timestamp
if line[0] == '#':
if line[0] == b'#'[0]:
try:
clock_ts = parse_time(line[1:]) + offset_comment
comment = line[1:].decode('utf-8', errors='ignore')
clock_ts = parse_time(comment) + offset_comment
print_clock_updated()
except ValueError:
pass
continue
# for some reason the following line doesn't show up as
# being covered, even though it definitely runs
continue # pragma: no cover
# If --delta mode, increment data_ts_delta by the
# delta from the file.
@@ -235,6 +240,10 @@ def main(argv = None):
"is %s but clock time is only %s",
timestamp_to_human(data_ts),
timestamp_to_human(clock_ts))
if args.skip:
printf("%s\n", err)
printf("Skipping the remainder of this file\n")
break
raise ParseError(filename, err)
if (data_ts + max_gap) < clock_ts:
@@ -261,8 +270,9 @@ def main(argv = None):
# Insert it
if not args.dry_run:
stream.insert("%d %s" % (data_ts, line))
print "Done"
stream.insert(b"%d %s" % (data_ts, line))
print("Done")
if __name__ == "__main__":
main()

130
nilmtools/math.py Normal file
View File

@@ -0,0 +1,130 @@
#!/usr/bin/env python3
# Miscellaenous useful mathematical functions
from numpy import *
import scipy
def numpy_raise_errors(func):
def wrap(*args, **kwargs):
old = seterr('raise')
try:
return func(*args, **kwargs)
finally:
seterr(**old)
return wrap
@numpy_raise_errors
def sfit4(data, fs):
"""(A, f0, phi, C) = sfit4(data, fs)
Compute 4-parameter (unknown-frequency) least-squares fit to
sine-wave data, according to IEEE Std 1241-2010 Annex B
Input:
data vector of input samples
fs sampling rate (Hz)
Output:
Parameters [A, f0, phi, C] to fit the equation
x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
where n is sample number. Or, as a function of time:
x(t) = A * sin(f0 * 2 * pi * t + phi) + C
by Jim Paris
(Verified to match sfit4.m)
"""
N = len(data)
if N < 2:
raise ValueError("bad data")
t = linspace(0, (N-1) / float(fs), N)
#
# Estimate frequency using FFT (step b)
#
Fc = scipy.fft.fft(data)
F = abs(Fc)
F[0] = 0 # eliminate DC
# Find pair of spectral lines with largest amplitude:
# resulting values are in F(i) and F(i+1)
i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
# Interpolate FFT to get a better result (from Markus [B37])
try:
U1 = real(Fc[i])
U2 = real(Fc[i+1])
V1 = imag(Fc[i])
V2 = imag(Fc[i+1])
n = 2 * pi / N
ni1 = n * i
ni2 = n * (i+1)
K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
except Exception:
# Just go with the biggest FFT peak
i = argmax(F[0:int(N/2)])
# Convert to Hz
f0 = i * float(fs) / N
# Fit it. We'll catch exceptions here and just returns zeros
# if something fails with the least squares fit, etc.
try:
# first guess for A0, B0 using 3-parameter fit (step c)
s = zeros(3)
w = 2*pi*f0
# Now iterate 7 times (step b, plus 6 iterations of step i)
for idx in range(7):
D = c_[cos(w*t), sin(w*t), ones(N),
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t)] # eqn B.16
s = linalg.lstsq(D, data, rcond=None)[0] # eqn B.18
w = w + s[3] # update frequency estimate
#
# Extract results
#
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
f0 = w / (2*pi)
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
C = s[2]
return (A, f0, phi, C)
except Exception: # pragma: no cover (not sure if we can hit this?)
# something broke down; just return zeros
return (0, 0, 0, 0)
def peak_detect(data, delta=0.1):
"""Simple min/max peak detection algorithm, taken from my code
in the disagg.m from the 10-8-5 paper.
Returns an array of peaks: each peak is a tuple
(n, p, is_max)
where n is the row number in 'data', and p is 'data[n]',
and is_max is True if this is a maximum, False if it's a minimum,
"""
peaks = []
cur_min = (None, inf)
cur_max = (None, -inf)
lookformax = False
for (n, p) in enumerate(data):
if p > cur_max[1]:
cur_max = (n, p)
if p < cur_min[1]:
cur_min = (n, p)
if lookformax:
if p < (cur_max[1] - delta):
peaks.append((cur_max[0], cur_max[1], True))
cur_min = (n, p)
lookformax = False
else:
if p > (cur_min[1] + delta):
peaks.append((cur_min[0], cur_min[1], False))
cur_max = (n, p)
lookformax = True
return peaks

46
nilmtools/median.py Executable file
View File

@@ -0,0 +1,46 @@
#!/usr/bin/env python3
import nilmtools.filter
import scipy.signal
def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Median Filter")
group = parser.add_argument_group("Median filter options")
group.add_argument("-z", "--size", action="store", type=int, default=25,
help="median filter size (default %(default)s)")
group.add_argument("-d", "--difference", action="store_true",
help="store difference rather than filtered values")
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
print("Source is %s (%s)" % (e.src.path, e.src.layout))
print("Destination %s doesn't exist" % (e.dest.path))
print("You could make it with a command like:")
print(" nilmtool -u %s create %s %s" % (e.dest.url,
e.dest.path, e.src.layout))
raise SystemExit(1)
f.check_dest_metadata({"median_filter_source": f.src.path,
"median_filter_size": args.size,
"median_filter_difference": repr(args.difference)})
f.process_numpy(median_filter, args=(args.size, args.difference))
def median_filter(data, interval, args, insert, final):
(size, diff) = args
(rows, cols) = data.shape
for i in range(cols - 1):
filtered = scipy.signal.medfilt(data[:, i+1], size)
if diff:
data[:, i+1] -= filtered
else:
data[:, i+1] = filtered
insert(data)
return rows
if __name__ == "__main__":
main()

186
nilmtools/pipewatch.py Executable file
View File

@@ -0,0 +1,186 @@
#!/usr/bin/env python3
import nilmdb.client
from nilmdb.utils.printf import printf, fprintf
import nilmdb.utils.lock
import nilmtools
import time
import sys
import os
import argparse
import subprocess
import tempfile
import threading
import select
import signal
import queue
import daemon
def parse_args(argv=None):
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""\
Pipe data from 'generator' to 'consumer'. This is intended to be
executed frequently from cron, and will exit if another copy is
already running. If 'generator' or 'consumer' returns an error,
or if 'generator' stops sending data for a while, it will exit.
Intended for use with ethstream (generator) and nilm-insert
(consumer). Commands are executed through the shell.
""")
parser.add_argument("-v", "--version", action="version",
version=nilmtools.__version__)
parser.add_argument("-d", "--daemon", action="store_true",
help="Run in background")
parser.add_argument("-l", "--lock", metavar="FILENAME", action="store",
default=tempfile.gettempdir() +
"/nilm-pipewatch.lock",
help="Lock file for detecting running instance")
parser.add_argument("-t", "--timeout", metavar="SECONDS", action="store",
type=float, default=30,
help="Exit if no output from " +
"generator for this long")
group = parser.add_argument_group("commands to execute")
group.add_argument("generator", action="store",
help="Data generator (e.g. \"ethstream -r 8000\")")
group.add_argument("consumer", action="store",
help="Data consumer (e.g. \"nilm-insert /foo/bar\")")
args = parser.parse_args(argv)
return args
def reader_thread(q, fd):
# Read from a file descriptor, write to queue.
try:
while True:
(r, w, x) = select.select([fd], [], [fd], 0.25)
if x: # pragma: no cover -- never expect this to happen
# Very few things are "exceptional conditions";
# just TCP OOB data, some TTY state changes, etc.
raise Exception
if not r:
# short timeout -- just try again. This is to catch the
# fd being closed elsewhere, which is only detected
# when select restarts.
continue
data = os.read(fd, 65536)
if data == b"": # generator EOF
raise Exception
q.put(data)
except Exception:
q.put(None)
def watcher_thread(q, procs):
# Put None in the queue if either process dies
while True:
for p in procs:
if p.poll() is not None:
q.put(None)
return
time.sleep(0.25)
def pipewatch(args):
# Run the processes, etc
with open(os.devnull, "r") as devnull:
generator = subprocess.Popen(args.generator, shell=True,
bufsize=-1, close_fds=True,
stdin=devnull,
stdout=subprocess.PIPE,
stderr=None,
preexec_fn=os.setpgrp)
consumer = subprocess.Popen(args.consumer, shell=True,
bufsize=-11, close_fds=True,
stdin=subprocess.PIPE,
stdout=None,
stderr=None,
preexec_fn=os.setpgrp)
q = queue.Queue(maxsize=4)
reader = threading.Thread(target=reader_thread,
args=(q, generator.stdout.fileno()))
reader.start()
watcher = threading.Thread(target=watcher_thread,
args=(q, [generator, consumer]))
watcher.start()
try:
while True:
try:
data = q.get(True, args.timeout)
if data is None:
break
consumer.stdin.write(data)
except queue.Empty:
# Timeout: kill the generator
fprintf(sys.stderr, "pipewatch: timeout\n")
generator.terminate()
break
generator.stdout.close()
consumer.stdin.close()
except IOError:
fprintf(sys.stderr, "pipewatch: I/O error\n")
def kill(proc):
# Wait for a process to end, or kill it
def poll_timeout(proc, timeout):
for x in range(1+int(timeout / 0.1)):
if proc.poll() is not None:
break
time.sleep(0.1)
return proc.poll()
try:
if poll_timeout(proc, 0.5) is None:
os.killpg(proc.pid, signal.SIGTERM)
if poll_timeout(proc, 0.5) is None:
os.killpg(proc.pid, signal.SIGKILL)
except OSError: # pragma: no cover
# (hard to trigger race condition in os.killpg)
pass
return poll_timeout(proc, 0.5)
# Wait for them to die, or kill them
cret = kill(consumer)
gret = kill(generator)
# Consume all remaining data in the queue until the reader
# and watcher threads are done
while reader.is_alive() or watcher.is_alive():
q.get(True, 0.1)
fprintf(sys.stderr, "pipewatch: generator returned %d, " +
"consumer returned %d\n", gret, cret)
if gret == 0 and cret == 0:
sys.exit(0)
sys.exit(1)
def main(argv=None):
args = parse_args(argv)
lockfile = open(args.lock, "w")
if not nilmdb.utils.lock.exclusive_lock(lockfile):
printf("pipewatch process already running (according to %s)\n",
args.lock)
sys.exit(0)
try:
# Run as a daemon if requested, otherwise run directly.
if args.daemon: # pragma: no cover (hard to do from inside test suite)
with daemon.DaemonContext(files_preserve=[lockfile]):
pipewatch(args)
else:
pipewatch(args)
finally:
# Clean up lockfile
try:
os.unlink(args.lock)
except OSError:
pass
if __name__ == "__main__":
main()

197
nilmtools/prep.py Executable file
View File

@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# Spectral envelope preprocessor.
# Requires two streams as input: the original raw data, and sinefit data.
from nilmdb.utils.printf import printf
from nilmdb.utils.time import timestamp_to_human
import nilmtools.filter
import nilmdb.client
from numpy import pi, zeros, r_, e, real, imag
import scipy.fftpack
import scipy.signal
import bisect
from nilmdb.utils.interval import Interval
def main(argv=None):
# Set up argument parser
f = nilmtools.filter.Filter()
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths=True)
group = parser.add_argument_group("Prep options")
group.add_argument("-c", "--column", action="store", type=int,
help="Column number (first data column is 1)")
group.add_argument("-n", "--nharm", action="store", type=int, default=4,
help="number of odd harmonics to compute (default 4)")
group.add_argument("-N", "--nshift", action="store", type=int, default=1,
help="number of shifted FFTs per period (default 1)")
exc = group.add_mutually_exclusive_group()
exc.add_argument("-r", "--rotate", action="store", type=float,
help="rotate FFT output by this many degrees (default 0)")
exc.add_argument("-R", "--rotate-rad", action="store", type=float,
help="rotate FFT output by this many radians (default 0)")
group.add_argument("srcpath", action="store",
help="Path of raw input, e.g. /foo/raw")
group.add_argument("sinepath", action="store",
help="Path of sinefit input, e.g. /foo/sinefit")
group.add_argument("destpath", action="store",
help="Path of prep output, e.g. /foo/prep")
# Parse arguments
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
rec = "float32_%d" % (e.parsed_args.nharm * 2)
print("Source is %s (%s)" % (e.src.path, e.src.layout))
print("Destination %s doesn't exist" % (e.dest.path))
print("You could make it with a command like:")
print(" nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec))
raise SystemExit(1)
# Check arguments
if args.column is None or args.column < 1:
parser.error("need a column number >= 1")
if args.nharm < 1 or args.nharm > 32:
parser.error("number of odd harmonics must be 1-32")
if args.nshift < 1:
parser.error("number of shifted FFTs must be >= 1")
if args.rotate is not None:
rotation = args.rotate * 2.0 * pi / 360.0
else:
rotation = args.rotate_rad or 0.0
if f.dest.layout_count != args.nharm * 2:
print("error: need", args.nharm*2, "columns in destination stream")
raise SystemExit(1)
# Check the sine fit stream
client_sinefit = nilmdb.client.Client(args.url)
sinefit = nilmtools.filter.get_stream_info(client_sinefit, args.sinepath)
if not sinefit:
raise Exception("sinefit data not found")
if sinefit.layout != "float32_3":
raise Exception("sinefit data type is " + sinefit.layout
+ "; expected float32_3")
# Check and set metadata in prep stream
f.check_dest_metadata({"prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column,
"prep_rotation": repr(rotation),
"prep_nshift": args.nshift})
# Find the intersection of the usual set of intervals we'd filter,
# and the intervals actually present in sinefit data. This is
# what we will process.
filter_int = f.intervals()
sinefit_int = (Interval(start, end) for (start, end) in
client_sinefit.stream_intervals(
args.sinepath, start=f.start, end=f.end))
intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
# Run the process (using the helper in the filter module)
f.process_numpy(process, args=(client_sinefit, sinefit.path, args.column,
args.nharm, rotation, args.nshift),
intervals=intervals)
def process(data, interval, args, insert_function, final):
(client, sinefit_path, column, nharm, rotation, nshift) = args
rows = data.shape[0]
data_timestamps = data[:, 0]
if rows < 2:
return 0
last_inserted = [nilmdb.utils.time.min_timestamp]
def insert_if_nonoverlapping(data):
"""Call insert_function to insert data, but only if this
data doesn't overlap with other data that we inserted."""
if data[0][0] <= last_inserted[0]: # pragma: no cover
# Getting coverage here is hard -- not sure exactly when
# it gets triggered or why this was added; probably some
# unlikely edge condition with timestamp rounding or something.
return
last_inserted[0] = data[-1][0]
insert_function(data)
processed = 0
out = zeros((1, nharm * 2 + 1))
# Pull out sinefit data for the entire time range of this block
for sinefit_line in client.stream_extract(sinefit_path,
data[0, 0], data[rows-1, 0]):
def prep_period(t_min, t_max, rot):
"""
Compute prep coefficients from time t_min to t_max, which
are the timestamps of the start and end of one period.
Results are rotated by an additional extra_rot before
being inserted into the database. Returns the maximum
index processed, or None if the period couldn't be
processed.
"""
# Find the indices of data that correspond to (t_min, t_max)
idx_min = bisect.bisect_left(data_timestamps, t_min)
idx_max = bisect.bisect_left(data_timestamps, t_max)
if idx_min >= idx_max or idx_max >= len(data_timestamps):
return None
# Perform FFT over those indices
N = idx_max - idx_min
d = data[idx_min:idx_max, column]
F = scipy.fftpack.fft(d) * 2.0 / N
# If we wanted more harmonics than the FFT gave us, pad with zeros
if N < (nharm * 2):
F = r_[F, zeros(nharm * 2 - N)]
# Fill output data.
out[0, 0] = round(t_min)
for k in range(nharm):
Fk = F[2 * k + 1] * e**(rot * 1j * (k+1))
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk
insert_if_nonoverlapping(out)
return idx_max
# Extract sinefit data to get zero crossing timestamps.
# t_min = beginning of period
# t_max = end of period
(t_min, f0, A, C) = [float(x) for x in sinefit_line.split()]
t_max = t_min + 1e6 / f0
# Compute prep over shifted windows of the period
# (nshift is typically 1)
for n in range(nshift):
# Compute timestamps and rotations for shifted window
time_shift = n * (t_max - t_min) / nshift
shifted_min = t_min + time_shift
shifted_max = t_max + time_shift
angle_shift = n * 2 * pi / nshift
shifted_rot = rotation - angle_shift
# Run prep computation
idx_max = prep_period(shifted_min, shifted_max, shifted_rot)
if not idx_max:
break
processed = idx_max
# If we processed no data but there's lots in here, pretend we
# processed half of it.
if processed == 0 and rows > 10000:
processed = rows // 2
printf("%s: warning: no periods found; skipping %d rows\n",
timestamp_to_human(data[0][0]), processed)
else:
printf("%s: processed %d of %d rows\n",
timestamp_to_human(data[0][0]), processed, rows)
return processed
if __name__ == "__main__":
main()

198
nilmtools/sinefit.py Executable file
View File

@@ -0,0 +1,198 @@
#!/usr/bin/env python3
# Sine wave fitting.
from nilmdb.utils.printf import printf, sprintf
import nilmtools.filter
import nilmtools.math
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds,
seconds_to_timestamp)
import numpy
import sys
# import pylab as p
def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Sine wave fitting")
group = parser.add_argument_group("Sine fit options")
group.add_argument('-c', '--column', action='store', type=int,
help='Column number (first data column is 1)')
group.add_argument('-f', '--frequency', action='store', type=float,
default=60.0,
help='Approximate frequency (default: %(default)s)')
group.add_argument('-m', '--min-freq', action='store', type=float,
help='Minimum valid frequency '
'(default: approximate frequency / 2))')
group.add_argument('-M', '--max-freq', action='store', type=float,
help='Maximum valid frequency '
'(default: approximate frequency * 2))')
group.add_argument('-a', '--min-amp', action='store', type=float,
default=20.0,
help='Minimum signal amplitude (default: %(default)s)')
# Parse arguments
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
rec = "float32_3"
print("Source is %s (%s)" % (e.src.path, e.src.layout))
print("Destination %s doesn't exist" % (e.dest.path))
print("You could make it with a command like:")
print(" nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec))
raise SystemExit(1)
if args.column is None or args.column < 1:
parser.error("need a column number >= 1")
if args.frequency < 0.1:
parser.error("frequency must be >= 0.1")
if args.min_freq is None:
args.min_freq = args.frequency / 2
if args.max_freq is None:
args.max_freq = args.frequency * 2
if (args.min_freq > args.max_freq or
args.min_freq > args.frequency or
args.max_freq < args.frequency):
parser.error("invalid min or max frequency")
if args.min_amp < 0:
parser.error("min amplitude must be >= 0")
f.check_dest_metadata({"sinefit_source": f.src.path,
"sinefit_column": args.column})
f.process_numpy(process, args=(args.column, args.frequency, args.min_amp,
args.min_freq, args.max_freq))
class SuppressibleWarning(object):
def __init__(self, maxcount=10, maxsuppress=100):
self.maxcount = maxcount
self.maxsuppress = maxsuppress
self.count = 0
self.last_msg = ""
def _write(self, sec, msg):
if sec:
now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
else:
now = ""
sys.stderr.write(now + msg)
def warn(self, msg, seconds=None):
self.count += 1
if self.count <= self.maxcount:
self._write(seconds, msg)
if (self.count - self.maxcount) >= self.maxsuppress:
self.reset()
def reset(self, seconds=None):
if self.count > self.maxcount:
self._write(seconds, sprintf("(%d warnings suppressed)\n",
self.count - self.maxcount))
self.count = 0
def process(data, interval, args, insert_function, final):
(column, f_expected, a_min, f_min, f_max) = args
rows = data.shape[0]
# Estimate sampling frequency from timestamps
ts_min = timestamp_to_seconds(data[0][0])
ts_max = timestamp_to_seconds(data[-1][0])
if ts_min >= ts_max: # pragma: no cover; process_numpy shouldn't send this
return 0
fs = (rows-1) / (ts_max - ts_min)
# Pull out about 3.5 periods of data at once;
# we'll expect to match 3 zero crossings in each window
N = max(int(3.5 * fs / f_expected), 10)
# If we don't have enough data, don't bother processing it
if rows < N:
return 0
warn = SuppressibleWarning(3, 1000)
# Process overlapping windows
start = 0
num_zc = 0
last_inserted_timestamp = None
while start < (rows - N):
this = data[start:start+N, column]
t_min = timestamp_to_seconds(data[start, 0])
# t_max = timestamp_to_seconds(data[start+N-1, 0])
# Do 4-parameter sine wave fit
(A, f0, phi, C) = nilmtools.math.sfit4(this, fs)
# Check bounds. If frequency is too crazy, ignore this window
if f0 < f_min or f0 > f_max:
warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
str(f0), str(f_min), str(f_max)), t_min)
start += N
continue
# If amplitude is too low, results are probably just noise
if A < a_min:
warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
str(A), str(a_min)), t_min)
start += N
continue
# p.plot(arange(N), this)
# p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
# Period starts when the argument of sine is 0 degrees,
# so we're looking for sample number:
# n = (0 - phi) / (f0/fs * 2 * pi)
zc_n = (0 - phi) / (f0 / fs * 2 * numpy.pi)
period_n = fs/f0
# Add periods to make N positive
while zc_n < 0:
zc_n += period_n
last_zc = None
# Mark the zero crossings until we're a half period away
# from the end of the window
while zc_n < (N - period_n/2):
# p.plot(zc_n, C, 'ro')
t = t_min + zc_n / fs
if (last_inserted_timestamp is None or
t > last_inserted_timestamp):
insert_function([[seconds_to_timestamp(t), f0, A, C]])
last_inserted_timestamp = t
warn.reset(t)
else: # pragma: no cover -- this is hard to trigger,
# if it's even possible at all; I think it would require
# some jitter in how the waves fit, across a window boundary.
warn.warn("timestamp overlap\n", t)
num_zc += 1
last_zc = zc_n
zc_n += period_n
# Advance the window one quarter period past the last marked
# zero crossing, or advance the window by half its size if we
# didn't mark any.
if last_zc is not None:
advance = min(last_zc + period_n/4, N)
else:
advance = N/2
# p.plot(advance, C, 'go')
# p.show()
start = int(round(start + advance))
# Return the number of rows we've processed
warn.reset(last_inserted_timestamp)
if last_inserted_timestamp:
now = timestamp_to_human(seconds_to_timestamp(
last_inserted_timestamp)) + ": "
else:
now = ""
printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
return start
if __name__ == "__main__":
main()

325
nilmtools/trainola.py Executable file
View File

@@ -0,0 +1,325 @@
#!/usr/bin/env python3
from nilmdb.utils.printf import printf, sprintf
import nilmdb.client
import nilmtools.filter
import nilmtools.math
from nilmdb.utils.time import timestamp_to_seconds
import datetime_tz
from nilmdb.utils.interval import Interval
import numpy as np
import scipy
import scipy.signal
from numpy.core.umath_tests import inner1d
from collections import OrderedDict
import time
import functools
import collections
class DataError(ValueError):
pass
def build_column_mapping(colinfo, streaminfo):
"""Given the 'columns' list from the JSON data, verify and
pull out a dictionary mapping for the column names/numbers."""
columns = OrderedDict()
for c in colinfo:
col_num = c['index'] + 1 # skip timestamp
if (c['name'] in list(columns.keys()) or
col_num in list(columns.values())):
raise DataError("duplicated columns")
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
raise DataError("bad column number")
columns[c['name']] = col_num
if not len(columns):
raise DataError("no columns")
return columns
class Exemplar(object):
def __init__(self, exinfo, min_rows=10, max_rows=100000):
"""Given a dictionary entry from the 'exemplars' input JSON,
verify the stream, columns, etc. Then, fetch all the data
into self.data."""
self.name = exinfo['name']
self.url = exinfo['url']
self.stream = exinfo['stream']
self.start = exinfo['start']
self.end = exinfo['end']
self.dest_column = exinfo['dest_column']
# Get stream info
self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
if not self.info:
raise DataError(sprintf("exemplar stream '%s' does not exist " +
"on server '%s'", self.stream, self.url))
# Build up name => index mapping for the columns
self.columns = build_column_mapping(exinfo['columns'], self.info)
# Count points
self.count = self.client.stream_count(self.stream,
self.start, self.end)
# Verify count
if self.count == 0:
raise DataError("No data in this exemplar!")
if self.count < min_rows:
raise DataError("Too few data points: " + str(self.count))
if self.count > max_rows:
raise DataError("Too many data points: " + str(self.count))
# Extract the data
datagen = self.client.stream_extract_numpy(self.stream,
self.start, self.end,
self.info.layout,
maxrows=self.count)
self.data = list(datagen)[0]
# Extract just the columns that were specified in self.columns,
# skipping the timestamp.
extract_cols = [value for (key, value) in list(self.columns.items())]
self.data = self.data[:, extract_cols]
# Fix the column indices in e.columns, since we removed/reordered
# columns in self.data
for n, k in enumerate(self.columns):
self.columns[k] = n
# Subtract the means from each column
self.data = self.data - self.data.mean(axis=0)
# Get scale factors for each column by computing dot product
# of each column with itself.
self.scale = inner1d(self.data.T, self.data.T)
# Ensure a minimum (nonzero) scale and convert to list
self.scale = np.maximum(self.scale, [1e-9]).tolist()
def __str__(self):
return sprintf("\"%s\" %s [%s] %s rows",
self.name, self.stream,
",".join(list(self.columns.keys())),
self.count)
def timestamp_to_short_human(timestamp):
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
return dt.strftime("%H:%M:%S")
def trainola_matcher(data, interval, args, insert_func, final_chunk):
"""Perform cross-correlation match"""
(src_columns, dest_count, exemplars) = args
nrows = data.shape[0]
# We want at least 10% more points than the widest exemplar.
widest = max([x.count for x in exemplars])
if (widest * 1.1) > nrows:
return 0
# This is how many points we'll consider valid in the
# cross-correlation.
valid = nrows + 1 - widest
matches = collections.defaultdict(list)
# Try matching against each of the exemplars
for e in exemplars:
corrs = []
# Compute cross-correlation for each column
for col_name in e.columns:
a = data[:, src_columns[col_name]]
b = e.data[:, e.columns[col_name]]
corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]
# Scale by the norm of the exemplar
corr = corr / e.scale[e.columns[col_name]]
corrs.append(corr)
# Find the peaks using the column with the largest amplitude
biggest = e.scale.index(max(e.scale))
peaks = nilmtools.math.peak_detect(corrs[biggest], 0.1)
# To try to reduce false positives, discard peaks where
# there's a higher-magnitude peak (either min or max) within
# one exemplar width nearby.
good_peak_locations = []
for (i, (n, p, is_max)) in enumerate(peaks):
if not is_max:
continue
ok = True
# check up to 'e.count' rows before this one
j = i-1
while ok and j >= 0 and peaks[j][0] > (n - e.count):
if abs(peaks[j][1]) > abs(p):
ok = False
j -= 1
# check up to 'e.count' rows after this one
j = i+1
while ok and j < len(peaks) and peaks[j][0] < (n + e.count):
if abs(peaks[j][1]) > abs(p):
ok = False
j += 1
if ok:
good_peak_locations.append(n)
# Now look at all good peaks
for row in good_peak_locations:
# Correlation for each column must be close enough to 1.
for (corr, scale) in zip(corrs, e.scale):
# The accepted distance from 1 is based on the relative
# amplitude of the column. Use a linear mapping:
# scale 1.0 -> distance 0.1
# scale 0.0 -> distance 1.0
distance = 1 - 0.9 * (scale / e.scale[biggest])
if abs(corr[row] - 1) > distance:
# No match
break
else:
# Successful match
matches[row].append(e)
# Insert matches into destination stream.
matched_rows = sorted(matches.keys())
out = np.zeros((len(matched_rows), dest_count + 1))
for n, row in enumerate(matched_rows):
# Fill timestamp
out[n][0] = data[row, 0]
# Mark matched exemplars
for exemplar in matches[row]:
out[n, exemplar.dest_column + 1] = 1.0
# Insert it
insert_func(out)
# Return how many rows we processed
valid = max(valid, 0)
printf(" [%s] matched %d exemplars in %d rows\n",
timestamp_to_short_human(data[0][0]), np.sum(out[:, 1:]), valid)
return valid
def trainola(conf):
print("Trainola", nilmtools.__version__)
# Load main stream data
url = conf['url']
src_path = conf['stream']
dest_path = conf['dest_stream']
start = conf['start']
end = conf['end']
# Get info for the src and dest streams
src_client = nilmdb.client.numpyclient.NumpyClient(url)
src = nilmtools.filter.get_stream_info(src_client, src_path)
if not src:
raise DataError("source path '" + src_path + "' does not exist")
src_columns = build_column_mapping(conf['columns'], src)
dest_client = nilmdb.client.numpyclient.NumpyClient(url)
dest = nilmtools.filter.get_stream_info(dest_client, dest_path)
if not dest:
raise DataError("destination path '" + dest_path + "' does not exist")
printf("Source:\n")
printf(" %s [%s]\n", src.path, ",".join(list(src_columns.keys())))
printf("Destination:\n")
printf(" %s (%s columns)\n", dest.path, dest.layout_count)
# Pull in the exemplar data
exemplars = []
if 'exemplars' not in conf:
raise DataError("missing exemplars")
for n, exinfo in enumerate(conf['exemplars']):
printf("Loading exemplar %d:\n", n)
e = Exemplar(exinfo)
col = e.dest_column
if col < 0 or col >= dest.layout_count:
raise DataError(sprintf("bad destination column number %d\n" +
"dest stream only has 0 through %d",
col, dest.layout_count - 1))
printf(" %s, output column %d\n", str(e), col)
exemplars.append(e)
if len(exemplars) == 0:
raise DataError("missing exemplars")
# Verify that the exemplar columns are all represented in the main data
for n, ex in enumerate(exemplars):
for col in ex.columns:
if col not in src_columns:
raise DataError(sprintf("Exemplar %d column %s is not "
"available in source data", n, col))
# Figure out which intervals we should process
intervals = (Interval(s, e) for (s, e) in
src_client.stream_intervals(src_path,
diffpath=dest_path,
start=start, end=end))
intervals = nilmdb.utils.interval.optimize(intervals)
# Do the processing
rows = 100000
extractor = functools.partial(src_client.stream_extract_numpy,
src.path, layout=src.layout, maxrows=rows)
inserter = functools.partial(dest_client.stream_insert_numpy_context,
dest.path)
start = time.time()
processed_time = 0
printf("Processing intervals:\n")
for interval in intervals:
printf("%s\n", interval.human_string())
nilmtools.filter.process_numpy_interval(
interval, extractor, inserter, rows * 3,
trainola_matcher, (src_columns, dest.layout_count, exemplars))
processed_time += (timestamp_to_seconds(interval.end) -
timestamp_to_seconds(interval.start))
elapsed = max(time.time() - start, 1e-3)
printf("Done. Processed %.2f seconds per second.\n",
processed_time / elapsed)
def main(argv=None):
import json
import sys
if argv is None:
argv = sys.argv[1:]
if len(argv) != 1 or argv[0] == '-h' or argv[0] == '--help':
printf("usage: %s [-h] [-v] <json-config-dictionary>\n\n", sys.argv[0])
printf(" Where <json-config-dictionary> is a JSON-encoded " +
"dictionary string\n")
printf(" with exemplar and stream data.\n\n")
printf(" See extras/trainola-test-param*.js in the nilmtools " +
"repository\n")
printf(" for examples.\n")
if len(argv) != 1:
raise SystemExit(1)
raise SystemExit(0)
if argv[0] == '-v' or argv[0] == '--version':
printf("%s\n", nilmtools.__version__)
raise SystemExit(0)
try:
# Passed in a JSON string (e.g. on the command line)
conf = json.loads(argv[0])
except TypeError:
# Passed in the config dictionary (e.g. from NilmRun)
conf = argv[0]
return trainola(conf)
if __name__ == "__main__":
main()

7
requirements.txt Normal file
View File

@@ -0,0 +1,7 @@
nilmdb>=2.0.3
numpy==1.19.1
scipy==1.5.2
python-daemon==2.2.4
docutils==0.16
lockfile==0.12.2
psutil==5.7.2

39
setup.cfg Normal file
View File

@@ -0,0 +1,39 @@
[aliases]
test = nosetests
[nosetests]
# Note: values must be set to 1, and have no comments on the same line,
# for "python setup.py nosetests" to work correctly.
nocapture=1
# Comment this out to see CherryPy logs on failure:
nologcapture=1
with-coverage=1
cover-inclusive=1
cover-package=nilmtools
cover-erase=1
# this works, puts html output in cover/ dir:
# cover-html=1
# need nose 1.1.3 for this:
# cover-branches=1
#debug=nose
#debug-log=nose.log
stop=1
verbosity=2
tests=tests
[versioneer]
VCS=git
style=pep440
versionfile_source=nilmtools/_version.py
versionfile_build=nilmtools/_version.py
tag_prefix=nilmtools-
parentdir_prefix=nilmtools-
[flake8]
exclude=_version.py
extend-ignore=E731
per-file-ignores=math.py:F403,F405
[pylint]
ignore=_version.py
disable=C0103,C0111,R0913,R0914

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3
# To release a new version, tag it:
# git tag -a nilmtools-1.1 -m "Version 1.1"
@@ -6,69 +6,33 @@
# Then just package it up:
# python setup.py sdist
# This is supposed to be using Distribute:
#
# distutils provides a "setup" method.
# setuptools is a set of monkeypatches on top of that.
# distribute is a particular version/implementation of setuptools.
#
# So we don't really know if this is using the old setuptools or the
# Distribute-provided version of setuptools.
import traceback
import sys
import os
try:
from setuptools import setup, find_packages
import distutils.version
except ImportError:
traceback.print_exc()
print "Please install the prerequisites listed in README.txt"
sys.exit(1)
from setuptools import setup
# Versioneer manages version numbers from git tags.
# https://github.com/warner/python-versioneer
import versioneer
versioneer.versionfile_source = 'src/_version.py'
versioneer.versionfile_build = 'nilmtools/_version.py'
versioneer.tag_prefix = 'nilmtools-'
versioneer.parentdir_prefix = 'nilmtools-'
# Hack to workaround logging/multiprocessing issue:
# https://groups.google.com/d/msg/nose-users/fnJ-kAUbYHQ/_UsLN786ygcJ
try: import multiprocessing
except: pass
# We need a MANIFEST.in. Generate it here rather than polluting the
# repository with yet another setup-related file.
with open("MANIFEST.in", "w") as m:
m.write("""
# Root
include README.txt
include setup.py
include versioneer.py
include Makefile
""")
# Get list of requirements to use in `install_requires` below. Note
# that we don't make a distinction between things that are actually
# required for end-users vs developers (or use `test_requires` or
# anything else) -- just install everything for simplicity.
install_requires = open('requirements.txt').readlines()
# Run setup
setup(name='nilmtools',
version = versioneer.get_version(),
cmdclass = versioneer.get_cmdclass(),
url = 'https://git.jim.sh/jim/lees/nilmtools.git',
url = 'https://git.jim.sh/nilm/nilmtools.git',
author = 'Jim Paris',
description = "NILM Database Tools",
long_description = "NILM Database Tools",
license = "Proprietary",
author_email = 'jim@jtan.com',
install_requires = [ 'nilmdb >= 1.5.0',
'numpy',
'scipy',
'matplotlib',
],
install_requires = install_requires,
packages = [ 'nilmtools',
],
package_dir = { 'nilmtools': 'src' },
entry_points = {
'console_scripts': [
'nilm-decimate = nilmtools.decimate:main',
@@ -78,6 +42,10 @@ setup(name='nilmtools',
'nilm-prep = nilmtools.prep:main',
'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
'nilm-sinefit = nilmtools.sinefit:main',
'nilm-cleanup = nilmtools.cleanup:main',
'nilm-median = nilmtools.median:main',
'nilm-trainola = nilmtools.trainola:main',
'nilm-pipewatch = nilmtools.pipewatch:main',
],
},
zip_safe = False,

View File

@@ -1,197 +0,0 @@
IN_LONG_VERSION_PY = True
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (build by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain. Generated by
# versioneer-0.7+ (https://github.com/warner/python-versioneer)
# these strings will be replaced by git during git-archive
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
import subprocess
import sys
def run_command(args, cwd=None, verbose=False):
try:
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
except EnvironmentError:
e = sys.exc_info()[1]
if verbose:
print("unable to run %s" % args[0])
print(e)
return None
stdout = p.communicate()[0].strip()
if sys.version >= '3':
stdout = stdout.decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % args[0])
return None
return stdout
import sys
import re
import os.path
def get_expanded_variables(versionfile_source):
# the code embedded in _version.py can just fetch the value of these
# variables. When used from setup.py, we don't want to import
# _version.py, so we do it with a regexp instead. This function is not
# used from _version.py.
variables = {}
try:
for line in open(versionfile_source,"r").readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
variables["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
variables["full"] = mo.group(1)
except EnvironmentError:
pass
return variables
def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
refnames = variables["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("variables are unexpanded, not using")
return {} # unexpanded, so not in an unpacked git-archive tarball
refs = set([r.strip() for r in refnames.strip("()").split(",")])
for ref in list(refs):
if not re.search(r'\d', ref):
if verbose:
print("discarding '%s', no digits" % ref)
refs.discard(ref)
# Assume all version tags have a digit. git's %d expansion
# behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us
# distinguish between branches and tags. By ignoring refnames
# without digits, we filter out many common branch names like
# "release" and "stabilization", as well as "HEAD" and "master".
if verbose:
print("remaining refs: %s" % ",".join(sorted(refs)))
for ref in sorted(refs):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
print("picking %s" % r)
return { "version": r,
"full": variables["full"].strip() }
# no suitable tags, so we use the full revision id
if verbose:
print("no suitable tags, using full revision id")
return { "version": variables["full"].strip(),
"full": variables["full"].strip() }
def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
# this runs 'git' from the root of the source tree. That either means
# someone ran a setup.py command (and this code is in versioneer.py, so
# IN_LONG_VERSION_PY=False, thus the containing directory is the root of
# the source tree), or someone ran a project-specific entry point (and
# this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
# containing directory is somewhere deeper in the source tree). This only
# gets called if the git-archive 'subst' variables were *not* expanded,
# and _version.py hasn't already been rewritten with a short version
# string, meaning we're inside a checked out source tree.
try:
here = os.path.abspath(__file__)
except NameError:
# some py2exe/bbfreeze/non-CPython implementations don't do __file__
return {} # not always correct
# versionfile_source is the relative path from the top of the source tree
# (where the .git directory might live) to this file. Invert this to find
# the root from __file__.
root = here
if IN_LONG_VERSION_PY:
for i in range(len(versionfile_source.split("/"))):
root = os.path.dirname(root)
else:
root = os.path.dirname(here)
if not os.path.exists(os.path.join(root, ".git")):
if verbose:
print("no .git in %s" % root)
return {}
GIT = "git"
if sys.platform == "win32":
GIT = "git.cmd"
stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
cwd=root)
if stdout is None:
return {}
if not stdout.startswith(tag_prefix):
if verbose:
print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
return {}
tag = stdout[len(tag_prefix):]
stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
if stdout is None:
return {}
full = stdout.strip()
if tag.endswith("-dirty"):
full += "-dirty"
return {"version": tag, "full": full}
def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
if IN_LONG_VERSION_PY:
# We're running from _version.py. If it's from a source tree
# (execute-in-place), we can work upwards to find the root of the
# tree, and then check the parent directory for a version string. If
# it's in an installed application, there's no hope.
try:
here = os.path.abspath(__file__)
except NameError:
# py2exe/bbfreeze/non-CPython don't have __file__
return {} # without __file__, we have no hope
# versionfile_source is the relative path from the top of the source
# tree to _version.py. Invert this to find the root from __file__.
root = here
for i in range(len(versionfile_source.split("/"))):
root = os.path.dirname(root)
else:
# we're running from versioneer.py, which means we're running from
# the setup.py in a source tree. sys.argv[0] is setup.py in the root.
here = os.path.abspath(sys.argv[0])
root = os.path.dirname(here)
# Source tarballs conventionally unpack into a directory that includes
# both the project name and a version string.
dirname = os.path.basename(root)
if not dirname.startswith(parentdir_prefix):
if verbose:
print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
(root, dirname, parentdir_prefix))
return None
return {"version": dirname[len(parentdir_prefix):], "full": ""}
tag_prefix = "nilmtools-"
parentdir_prefix = "nilmtools-"
versionfile_source = "src/_version.py"
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
variables = { "refnames": git_refnames, "full": git_full }
ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
if not ver:
ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
if not ver:
ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
verbose)
if not ver:
ver = default
return ver

View File

@@ -1,76 +0,0 @@
#!/usr/bin/python
import nilmtools.filter
import nilmtools.decimate
import nilmdb.client
import argparse
def main(argv = None):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
version = "1.0",
description = """\
Automatically create multiple decimations from a single source
stream, continuing until the last decimated level contains fewer
than 500 points total.
""")
parser.add_argument("-u", "--url", action="store",
default="http://localhost/nilmdb/",
help="NilmDB server URL (default: %(default)s)")
parser.add_argument('-f', '--factor', action='store', default=4, type=int,
help='Decimation factor (default: %(default)s)')
parser.add_argument("--force-metadata", action="store_true",
default = False,
help="Force metadata changes if the dest "
"doesn't match")
parser.add_argument("path", action="store",
help='Path of base stream')
args = parser.parse_args(argv)
# Pull out info about the base stream
client = nilmdb.client.Client(args.url)
info = nilmtools.filter.get_stream_info(client, args.path)
if not info:
raise Exception("path " + args.path + " not found")
meta = client.stream_get_metadata(args.path)
if "decimate_source" in meta:
print "Stream", args.path, "was decimated from", meta["decimate_source"]
print "You need to pass the base stream instead"
raise SystemExit(1)
# Figure out the type we should use for decimated streams
if 'int32' in info.layout_type or 'float64' in info.layout_type:
decimated_type = 'float64_' + str(info.layout_count * 3)
else:
decimated_type = 'float32_' + str(info.layout_count * 3)
# Now do the decimations until we have few enough points
factor = 1
while True:
print "Level", factor, "decimation has", info.rows, "rows"
if info.rows <= 500:
break
factor *= args.factor
new_path = "%s~decim-%d" % (args.path, factor)
# Create the stream if needed
new_info = nilmtools.filter.get_stream_info(client, new_path)
if not new_info:
print "Creating stream", new_path
client.stream_create(new_path, decimated_type)
# Run the decimation as if it were run from the commandline
new_argv = [ "-u", args.url,
"-f", str(args.factor) ]
if args.force_metadata:
new_argv.extend([ "--force-metadata" ])
new_argv.extend([info.path, new_path])
nilmtools.decimate.main(new_argv)
# Update info using the newly decimated stream
info = nilmtools.filter.get_stream_info(client, new_path)
if __name__ == "__main__":
main()

View File

@@ -1,342 +0,0 @@
#!/usr/bin/python
from __future__ import absolute_import
import nilmdb.client
from nilmdb.client import Client
from nilmdb.client.numpyclient import NumpyClient
from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds)
from nilmdb.utils.interval import Interval
import nilmtools
import itertools
import time
import sys
import re
import argparse
import numpy as np
import cStringIO
class MissingDestination(Exception):
def __init__(self, args, src, dest):
self.parsed_args = args
self.src = src
self.dest = dest
Exception.__init__(self, "destination path " + dest.path + " not found")
class StreamInfo(object):
def __init__(self, url, info):
self.url = url
self.info = info
try:
self.path = info[0]
self.layout = info[1]
self.layout_type = self.layout.split('_')[0]
self.layout_count = int(self.layout.split('_')[1])
self.total_count = self.layout_count + 1
self.timestamp_min = info[2]
self.timestamp_max = info[3]
self.rows = info[4]
self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
except IndexError, TypeError:
pass
def string(self, interhost):
"""Return stream info as a string. If interhost is true,
include the host URL."""
if interhost:
return sprintf("[%s] ", self.url) + str(self)
return str(self)
def __str__(self):
"""Return stream info as a string."""
return sprintf("%s (%s), %.2fM rows, %.2f hours",
self.path, self.layout, self.rows / 1e6,
self.seconds / 3600.0)
def get_stream_info(client, path):
"""Return a StreamInfo object about the given path, or None if it
doesn't exist"""
streams = client.stream_list(path, extended = True)
if len(streams) != 1:
return None
return StreamInfo(client.geturl(), streams[0])
class Filter(object):
def __init__(self):
self._parser = None
self._client_src = None
self._client_dest = None
self._using_client = False
self.src = None
self.dest = None
self.start = None
self.end = None
self.interhost = False
self.force_metadata = False
@property
def client_src(self):
if self._using_client:
raise Exception("Filter client is in use; make another")
return self._client_src
@property
def client_dest(self):
if self._using_client:
raise Exception("Filter client is in use; make another")
return self._client_dest
def setup_parser(self, description = "Filter data", skip_paths = False):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
version = nilmtools.__version__,
description = description)
group = parser.add_argument_group("General filter arguments")
group.add_argument("-u", "--url", action="store",
default="http://localhost/nilmdb/",
help="Server URL (default: %(default)s)")
group.add_argument("-U", "--dest-url", action="store",
help="Destination server URL "
"(default: same as source)")
group.add_argument("-D", "--dry-run", action="store_true",
default = False,
help="Just print intervals that would be "
"processed")
group.add_argument("--force-metadata", action="store_true",
default = False,
help="Force metadata changes if the dest "
"doesn't match")
group.add_argument("-s", "--start",
metavar="TIME", type=self.arg_time,
help="Starting timestamp for intervals "
"(free-form, inclusive)")
group.add_argument("-e", "--end",
metavar="TIME", type=self.arg_time,
help="Ending timestamp for intervals "
"(free-form, noninclusive)")
if not skip_paths:
# Individual filter scripts might want to add these arguments
# themselves, to include multiple sources in a different order
# (for example). "srcpath" and "destpath" arguments must exist,
# though.
group.add_argument("srcpath", action="store",
help="Path of source stream, e.g. /foo/bar")
group.add_argument("destpath", action="store",
help="Path of destination stream, e.g. /foo/bar")
self._parser = parser
return parser
def interval_string(self, interval):
return sprintf("[ %s -> %s ]",
timestamp_to_human(interval.start),
timestamp_to_human(interval.end))
def parse_args(self, argv = None):
args = self._parser.parse_args(argv)
if args.dest_url is None:
args.dest_url = args.url
if args.url != args.dest_url:
self.interhost = True
self._client_src = Client(args.url)
self._client_dest = Client(args.dest_url)
if (not self.interhost) and (args.srcpath == args.destpath):
self._parser.error("source and destination path must be different")
# Open and print info about the streams
self.src = get_stream_info(self._client_src, args.srcpath)
if not self.src:
self._parser.error("source path " + args.srcpath + " not found")
self.dest = get_stream_info(self._client_dest, args.destpath)
if not self.dest:
raise MissingDestination(args, self.src,
StreamInfo(args.dest_url, [args.destpath]))
print "Source:", self.src.string(self.interhost)
print " Dest:", self.dest.string(self.interhost)
if args.dry_run:
for interval in self.intervals():
print self.interval_string(interval)
raise SystemExit(0)
self.force_metadata = args.force_metadata
self.start = args.start
self.end = args.end
return args
def _optimize_int(self, it):
"""Join and yield adjacent intervals from the iterator 'it'"""
saved_int = None
for interval in it:
if saved_int is not None:
if saved_int.end == interval.start:
interval.start = saved_int.start
else:
yield saved_int
saved_int = interval
if saved_int is not None:
yield saved_int
def intervals(self):
"""Generate all the intervals that this filter should process"""
self._using_client = True
if self.interhost:
# Do the difference ourselves
s_intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start = self.start, end = self.end) )
d_intervals = ( Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start = self.start, end = self.end) )
intervals = nilmdb.utils.interval.set_difference(s_intervals,
d_intervals)
else:
# Let the server do the difference for us
intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath = self.dest.path,
start = self.start, end = self.end) )
# Optimize intervals: join intervals that are adjacent
for interval in self._optimize_int(intervals):
yield interval
self._using_client = False
# Misc helpers
def arg_time(self, toparse):
"""Parse a time string argument"""
try:
return nilmdb.utils.time.parse_time(toparse)
except ValueError as e:
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
str(e), toparse))
def check_dest_metadata(self, data):
"""See if the metadata jives, and complain if it doesn't. If
there's no conflict, update the metadata to match 'data'."""
metadata = self._client_dest.stream_get_metadata(self.dest.path)
if not self.force_metadata:
for key in data:
wanted = str(data[key])
val = metadata.get(key, wanted)
if val != wanted and self.dest.rows > 0:
m = "Metadata in destination stream:\n"
m += " %s = %s\n" % (key, val)
m += "doesn't match desired data:\n"
m += " %s = %s\n" % (key, wanted)
m += "Refusing to change it. To prevent this error, "
m += "change or delete the metadata with nilmtool,\n"
m += "remove existing data from the stream, or "
m += "retry with --force-metadata."
raise Exception(m)
# All good -- write the metadata in case it's not already there
self._client_dest.stream_update_metadata(self.dest.path, data)
# The main filter processing method.
def process_numpy(self, function, args = None, rows = 100000):
"""For all intervals that exist in self.src but don't exist in
self.dest, call 'function' with a Numpy array corresponding to
the data. The data is converted to a Numpy array in chunks of
'rows' rows at a time.
'function' should be defined as:
def function(data, interval, args, insert_func, final)
'data': array of data to process -- may be empty
'interval': overall interval we're processing (but not necessarily
the interval of this particular chunk of data)
'args': opaque arguments passed to process_numpy
'insert_func': function to call in order to insert array of data.
Should be passed a 2-dimensional array of data to insert.
Data timestamps must be within the provided interval.
'final': True if this is the last bit of data for this
contiguous interval, False otherwise.
Return value of 'function' is the number of data rows processed.
Unprocessed data will be provided again in a subsequent call
(unless 'final' is True).
"""
if args is None:
args = []
extractor = NumpyClient(self.src.url).stream_extract_numpy
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
# Format output data.
formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
def batch(iterable, size):
c = itertools.count()
for k, g in itertools.groupby(iterable, lambda x: c.next() // size):
yield g
for interval in self.intervals():
print "Processing", self.interval_string(interval)
with inserter(self.dest.path,
interval.start, interval.end) as insert_ctx:
insert_function = insert_ctx.insert
old_array = np.array([])
for new_array in extractor(self.src.path,
interval.start, interval.end,
layout = self.src.layout,
maxrows = rows):
# If we still had old data left, combine it
if old_array.shape[0] != 0:
array = np.vstack((old_array, new_array))
else:
array = new_array
# Pass it to the process function
processed = function(array, interval, args,
insert_function, False)
# Send any pending data
insert_ctx.send()
# Save the unprocessed parts
if processed >= 0:
old_array = array[processed:]
else:
raise Exception(
sprintf("%s return value %s must be >= 0",
str(function), str(processed)))
# Warn if there's too much data remaining
if old_array.shape[0] > 3 * rows:
printf("warning: %d unprocessed rows in buffer\n",
old_array.shape[0])
# Last call for this contiguous interval
if old_array.shape[0] != 0:
function(old_array, interval, args, insert_function, True)
def main(argv = None):
# This is just a dummy function; actual filters can use the other
# functions to prepare stuff, and then do something with the data.
f = Filter()
parser = f.setup_parser()
args = f.parse_args(argv)
for i in f.intervals():
print "Generic filter: need to handle", f.interval_string(i)
if __name__ == "__main__":
main()

View File

@@ -1,126 +0,0 @@
#!/usr/bin/python
# Spectral envelope preprocessor.
# Requires two streams as input: the original raw data, and sinefit data.
import nilmtools.filter
import nilmdb.client
from numpy import *
import scipy.fftpack
import scipy.signal
#from matplotlib import pyplot as p
import bisect
def main(argv = None):
# Set up argument parser
f = nilmtools.filter.Filter()
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths = True)
group = parser.add_argument_group("Prep options")
group.add_argument("-c", "--column", action="store", type=int,
help="Column number (first data column is 1)")
group.add_argument("-n", "--nharm", action="store", type=int, default=4,
help="number of odd harmonics to compute")
exc = group.add_mutually_exclusive_group()
exc.add_argument("-r", "--rotate", action="store", type=float,
help="rotate FFT output by this many degrees")
exc.add_argument("-R", "--rotate-rad", action="store", type=float,
help="rotate FFT output by this many radians")
group.add_argument("srcpath", action="store",
help="Path of raw input, e.g. /foo/raw")
group.add_argument("sinepath", action="store",
help="Path of sinefit input, e.g. /foo/sinefit")
group.add_argument("destpath", action="store",
help="Path of prep output, e.g. /foo/prep")
# Parse arguments
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
rec = "float32_%d" % (e.parsed_args.nharm * 2)
print "Source is %s (%s)" % (e.src.path, e.src.layout)
print "Destination %s doesn't exist" % (e.dest.path)
print "You could make it with a command like:"
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
raise SystemExit(1)
# Check arguments
if args.column is None or args.column < 1:
parser.error("need a column number >= 1")
if args.nharm < 1 or args.nharm > 32:
parser.error("number of odd harmonics must be 1-32")
if args.rotate is not None:
rotation = args.rotate * 2.0 * pi / 360.0
else:
rotation = args.rotate_rad or 0.0
# Check the sine fit stream
client_sinefit = nilmdb.client.Client(args.url)
sinefit = nilmtools.filter.get_stream_info(client_sinefit, args.sinepath)
if not sinefit:
raise Exception("sinefit data not found")
if sinefit.layout != "float32_3":
raise Exception("sinefit data type is " + sinefit.layout
+ "; expected float32_3")
# Check and set metadata in prep stream
f.check_dest_metadata({ "prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column })
# Run the processing function on all data
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
args.nharm, rotation))
def process(data, interval, args, insert_function, final):
(client, sinefit_path, column, nharm, rotation) = args
rows = data.shape[0]
data_timestamps = data[:,0]
processed = 0
out = zeros((1, nharm * 2 + 1))
# Pull out sinefit data for the entire time range of this block
for sinefit_line in client.stream_extract(sinefit_path,
data[0, 0], data[rows-1, 0]):
# Extract sinefit data to get zero crossing timestamps
(t_min, f0, A, C) = [ float(x) for x in sinefit_line.split() ]
t_max = t_min + 1e6 / f0
# Find the indices of data that correspond to (t_min, t_max)
idx_min = bisect.bisect_left(data_timestamps, t_min)
idx_max = bisect.bisect_left(data_timestamps, t_max)
if idx_min >= idx_max:
# something's wonky; ignore this period
continue
if idx_max >= len(data_timestamps):
# max is likely past the end of our chunk, so stop
# processing this chunk now.
break
# Perform FFT over those indices
N = idx_max - idx_min
d = data[idx_min:idx_max, column]
F = scipy.fftpack.fft(d) / N
# If we wanted more harmonics than we have, pad with zeros
if N < (nharm * 2):
F = r_[F, zeros(nharm * 2 - N)]
# Fill output data
out[0, 0] = t_min
for k in range(nharm):
Fk = F[2 * k + 1] * e**(rotation * 1j * (k+1))
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk
# Insert it and continue
insert_function(out)
processed = idx_max
print "Processed", processed, "of", rows, "rows"
return processed
if __name__ == "__main__":
main()

View File

@@ -1,187 +0,0 @@
#!/usr/bin/python
# Sine wave fitting. This runs about 5x faster than realtime on raw data.
import nilmtools.filter
import nilmdb.client
from numpy import *
from scipy import *
#import pylab as p
import operator
def main(argv = None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Sine wave fitting")
group = parser.add_argument_group("Sine fit options")
group.add_argument('-c', '--column', action='store', type=int,
help='Column number (first data column is 1)')
group.add_argument('-f', '--frequency', action='store', type=float,
default=60.0,
help='Approximate frequency (default: %(default)s)')
# Parse arguments
try:
args = f.parse_args(argv)
except nilmtools.filter.MissingDestination as e:
rec = "float32_3"
print "Source is %s (%s)" % (e.src.path, e.src.layout)
print "Destination %s doesn't exist" % (e.dest.path)
print "You could make it with a command like:"
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
raise SystemExit(1)
if args.column is None or args.column < 1:
parser.error("need a column number >= 1")
if args.frequency < 0.1:
parser.error("frequency must be >= 0.1")
f.check_dest_metadata({ "sinefit_source": f.src.path,
"sinefit_column": args.column })
f.process_numpy(process, args = (args.column, args.frequency))
def process(data, interval, args, insert_function, final):
(column, f_expected) = args
rows = data.shape[0]
# Estimate sampling frequency from timestamps
fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
# Pull out about 3.5 periods of data at once;
# we'll expect to match 3 zero crossings in each window
N = max(int(3.5 * fs / f_expected), 10)
# If we don't have enough data, don't bother processing it
if rows < N:
return 0
# Process overlapping windows
start = 0
num_zc = 0
while start < (rows - N):
this = data[start:start+N, column]
t_min = data[start, 0]/1e6
t_max = data[start+N-1, 0]/1e6
# Do 4-parameter sine wave fit
(A, f0, phi, C) = sfit4(this, fs)
# Check bounds. If frequency is too crazy, ignore this window
if f0 < (f_expected/2) or f0 > (f_expected*2):
print "frequency", f0, "too far from expected value", f_expected
start += N
continue
#p.plot(arange(N), this)
#p.plot(arange(N), A * cos(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
# Period starts when the argument of cosine is 3*pi/2 degrees,
# so we're looking for sample number:
# n = (3 * pi / 2 - phi) / (f0/fs * 2 * pi)
zc_n = (3 * pi / 2 - phi) / (f0 / fs * 2 * pi)
period_n = fs/f0
# Add periods to make N positive
while zc_n < 0:
zc_n += period_n
last_zc = None
# Mark the zero crossings until we're a half period away
# from the end of the window
while zc_n < (N - period_n/2):
#p.plot(zc_n, C, 'ro')
t = t_min + zc_n / fs
insert_function([[t * 1e6, f0, A, C]])
num_zc += 1
last_zc = zc_n
zc_n += period_n
# Advance the window one quarter period past the last marked
# zero crossing, or advance the window by half its size if we
# didn't mark any.
if last_zc is not None:
advance = min(last_zc + period_n/4, N)
else:
advance = N/2
#p.plot(advance, C, 'go')
#p.show()
start = int(round(start + advance))
# Return the number of rows we've processed
print "Marked", num_zc, "zero-crossings in", start, "rows"
return start
def sfit4(data, fs):
"""(A, f0, phi, C) = sfit4(data, fs)
Compute 4-parameter (unknown-frequency) least-squares fit to
sine-wave data, according to IEEE Std 1241-2010 Annex B
Input:
data vector of input samples
fs sampling rate (Hz)
Output:
Parameters [A, f0, phi, C] to fit the equation
x[n] = A * cos(f0/fs * 2 * pi * n + phi) + C
where n is sample number. Or, as a function of time:
x(t) = A * cos(f0 * 2 * pi * t + phi) + C
by Jim Paris
(Verified to match sfit4.m)
"""
N = len(data)
t = linspace(0, (N-1) / fs, N)
## Estimate frequency using FFT (step b)
Fc = fft(data)
F = abs(Fc)
F[0] = 0 # eliminate DC
# Find pair of spectral lines with largest amplitude:
# resulting values are in F(i) and F(i+1)
i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
# Interpolate FFT to get a better result (from Markus [B37])
U1 = real(Fc[i])
U2 = real(Fc[i+1])
V1 = imag(Fc[i])
V2 = imag(Fc[i+1])
n = 2 * pi / N
ni1 = n * i
ni2 = n * (i+1)
K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
# Convert to Hz
f0 = i * fs / N
## Fit it
# first guess for A0, B0 using 3-parameter fit (step c)
w = 2*pi*f0
D = c_[cos(w*t), sin(w*t), ones(N)]
s = linalg.lstsq(D, data)[0]
# Now iterate 6 times (step i)
for idx in range(6):
D = c_[cos(w*t), sin(w*t), ones(N),
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
s = linalg.lstsq(D, data)[0] # eqn B.18
w = w + s[3] # update frequency estimate
## Extract results
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
f0 = w / (2*pi)
try:
phi = -arctan2(s[1], s[0]) # eqn B.22
except TypeError:
# something broke down, just return zeros
return (0, 0, 0, 0)
C = s[2]
return (A, f0, phi, C)
if __name__ == "__main__":
main()

BIN
tests/data/bpnilm-raw-1.gz Normal file
View File

Binary file not shown.

BIN
tests/data/bpnilm-raw-2.gz Normal file
View File

Binary file not shown.

View File

@@ -0,0 +1,2 @@
[/newton/*]
keep = 3

View File

@@ -0,0 +1,4 @@
[/newton/*]
keep = 3w
rate = 8000
decimated = false

13
tests/data/cleanup.cfg Normal file
View File

@@ -0,0 +1,13 @@
[/newton/*]
keep = 3w
rate = 8000
decimated = true
[/sf/*]
keep = 0.01h
dummy = xxx
[/empty/foo]
[/nonexistent/bar]

14401
tests/data/prep-20120323T1000 Normal file
View File

File diff suppressed because it is too large Load Diff

14400
tests/data/prep-20120323T1002 Normal file
View File

File diff suppressed because it is too large Load Diff

14400
tests/data/prep-20120323T1004 Normal file
View File

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,12 @@
# deltas are in microseconds
1000000 2.61246e+05 2.22735e+05 4.60340e+03 2.58221e+03 8.42804e+03 3.41890e+03 9.57898e+02 4.00585e+03
1000000 2.56439e+05 2.24775e+05 2.92897e+03 4.66646e+03 7.58491e+03 3.57351e+03 -4.34171e+02 2.98819e+03
1000000 2.51903e+05 2.23202e+05 4.23696e+03 3.49363e+03 8.53493e+03 4.29416e+03 8.49573e+02 2.38189e+03
1000000 2.57625e+05 2.20247e+05 5.47017e+03 1.35872e+03 9.18903e+03 4.56136e+03 2.65599e+03 2.60912e+03
1000000 2.63375e+05 2.20706e+05 4.51842e+03 1.80758e+03 8.17208e+03 4.17463e+03 2.57884e+03 3.32848e+03
1000000 2.59221e+05 2.22346e+05 2.98879e+03 3.66264e+03 6.87274e+03 3.94223e+03 1.25928e+03 3.51786e+03
50000000 2.51918e+05 2.22281e+05 4.22677e+03 2.84764e+03 7.78323e+03 3.81659e+03 8.04944e+02 3.46314e+03
1000000 2.54478e+05 2.21701e+05 5.61366e+03 1.02262e+03 9.26581e+03 3.50152e+03 1.29331e+03 3.07271e+03
1000000 2.59568e+05 2.22945e+05 4.97190e+03 1.28250e+03 8.62081e+03 4.06316e+03 1.85717e+03 2.61990e+03
1000000 2.57269e+05 2.23697e+05 3.60527e+03 3.05749e+03 7.22363e+03 4.90330e+03 1.93736e+03 2.35357e+03
1000000 2.52274e+05 2.21438e+05 5.01228e+03 2.86309e+03 7.87115e+03 4.80448e+03 2.18291e+03 2.93397e+03

View File

@@ -0,0 +1,12 @@
# deltas are in microseconds
1000000 2.61246e+05 2.22735e+05 4.60340e+03 2.58221e+03 8.42804e+03 3.41890e+03 9.57898e+02 4.00585e+03
1000000 2.56439e+05 2.24775e+05 2.92897e+03 4.66646e+03 7.58491e+03 3.57351e+03 -4.34171e+02 2.98819e+03
1000000 2.51903e+05 2.23202e+05 4.23696e+03 3.49363e+03 8.53493e+03 4.29416e+03 8.49573e+02 2.38189e+03
1000000 2.57625e+05 2.20247e+05 5.47017e+03 1.35872e+03 9.18903e+03 4.56136e+03 2.65599e+03 2.60912e+03
1000000 2.63375e+05 2.20706e+05 4.51842e+03 1.80758e+03 8.17208e+03 4.17463e+03 2.57884e+03 3.32848e+03
1000000 2.59221e+05 2.22346e+05 2.98879e+03 3.66264e+03 6.87274e+03 3.94223e+03 1.25928e+03 3.51786e+03
1000000 2.51918e+05 2.22281e+05 4.22677e+03 2.84764e+03 7.78323e+03 3.81659e+03 8.04944e+02 3.46314e+03
1000000 2.54478e+05 2.21701e+05 5.61366e+03 1.02262e+03 9.26581e+03 3.50152e+03 1.29331e+03 3.07271e+03
1000000 2.59568e+05 2.22945e+05 4.97190e+03 1.28250e+03 8.62081e+03 4.06316e+03 1.85717e+03 2.61990e+03
1000000 2.57269e+05 2.23697e+05 3.60527e+03 3.05749e+03 7.22363e+03 4.90330e+03 1.93736e+03 2.35357e+03
1000000 2.52274e+05 2.21438e+05 5.01228e+03 2.86309e+03 7.87115e+03 4.80448e+03 2.18291e+03 2.93397e+03

View File

@@ -0,0 +1,3 @@
# deltas are in microseconds
1000000A 2.61246e+05 2.22735e+05 4.60340e+03 2.58221e+03 8.42804e+03 3.41890e+03 9.57898e+02 4.00585e+03
BAD_DELTA 2.56439e+05 2.24775e+05 2.92897e+03 4.66646e+03 7.58491e+03 3.57351e+03 -4.34171e+02 2.98819e+03

3
tests/data/prep-notime Normal file
View File

@@ -0,0 +1,3 @@
# comments are cool? what if they contain →UNICODE← or invalid utf-8 like <20>(
2.66568e+05 2.24029e+05 5.16140e+03 2.52517e+03 8.35084e+03 3.72470e+03 1.35534e+03 2.03900e+03
2.57914e+05 2.27183e+05 4.30368e+03 4.13080e+03 7.25535e+03 4.89047e+03 1.63859e+03 1.93496e+03

View File

@@ -0,0 +1,7 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ ]
}

View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 34000000,
"end": 34000001,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/train/big",
"start": 0,
"end": 110000,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 34000000,
"end": 36000000,
"dest_column": 0,
"columns": [ { "name": "FOO", "index": 0 } ]
}
]
}

View File

@@ -0,0 +1,8 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 },
{ "name": "P1", "index": 1 } ]
}

View File

@@ -0,0 +1,7 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 10 } ]
}

View File

@@ -0,0 +1,7 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/a/b",
"stream": "/c/d",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ]
}

View File

@@ -0,0 +1,7 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/a/b",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ]
}

View File

@@ -0,0 +1,7 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ]
}

View File

@@ -0,0 +1,8 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [ ]
}

View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/e/f",
"start": 34000000,
"end": 36000000,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 10034000000,
"end": 10035000000,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

25
tests/data/trainola1.js Normal file
View File

@@ -0,0 +1,25 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "Big ON",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 34000000,
"end": 36000000,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
},
{ "name": "Big OFF",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 36000000,
"end": 38000000,
"dest_column": 1,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

17
tests/data/trainola2.js Normal file
View File

@@ -0,0 +1,17 @@
{ "url": "http://localhost:32182/",
"dest_stream": "/train/matches2",
"stream": "/train/data",
"start": 0,
"end": 100000000,
"columns": [ { "name": "P1", "index": 0 } ],
"exemplars": [
{ "name": "a",
"url": "http://localhost:32182/",
"stream": "/train/data",
"start": 1000000,
"end": 2000000,
"dest_column": 0,
"columns": [ { "name": "P1", "index": 0 } ]
}
]
}

1
tests/data/trunc1 Normal file
View File

@@ -0,0 +1 @@
1

1
tests/data/trunc2 Normal file
View File

@@ -0,0 +1 @@
1 2

5
tests/data/trunc3 Normal file
View File

@@ -0,0 +1,5 @@
# this has blank lines
# and timestamps: 2000-01-01 12:00:00
# and then the last is truncated
1 2 3

1
tests/data/trunc4 Normal file
View File

@@ -0,0 +1 @@
1 2 3 4

50
tests/runtests.py Executable file
View File

@@ -0,0 +1,50 @@
#!/usr/bin/env python3
import nose
import os
import sys
import glob
from collections import OrderedDict
# Change into parent dir
os.chdir(os.path.dirname(os.path.realpath(__file__)) + "/..")
class JimOrderPlugin(nose.plugins.Plugin):
"""When searching for tests and encountering a directory that
contains a 'test.order' file, run tests listed in that file, in the
order that they're listed. Globs are OK in that file and duplicates
are removed."""
name = 'jimorder'
score = 10000
def prepareTestLoader(self, loader):
def wrap(func):
def wrapper(name, *args, **kwargs):
addr = nose.selector.TestAddress(
name, workingDir=loader.workingDir)
try:
order = os.path.join(addr.filename, "test.order")
except Exception:
order = None
if order and os.path.exists(order):
files = []
for line in open(order):
line = line.split('#')[0].strip()
if not line:
continue
fn = os.path.join(addr.filename, line.strip())
files.extend(sorted(glob.glob(fn)) or [fn])
files = list(OrderedDict.fromkeys(files))
tests = [ wrapper(fn, *args, **kwargs) for fn in files ]
return loader.suiteClass(tests)
return func(name, *args, **kwargs)
return wrapper
loader.loadTestsFromName = wrap(loader.loadTestsFromName)
return loader
# Use setup.cfg for most of the test configuration. Adding
# --with-jimorder here means that a normal "nosetests" run will
# still work, it just won't support test.order.
if __name__ == "__main__":
nose.main(addplugins = [ JimOrderPlugin() ],
argv = sys.argv + ["--with-jimorder"])

3
tests/test.order Normal file
View File

@@ -0,0 +1,3 @@
test.py
test*.py

911
tests/test.py Normal file
View File

@@ -0,0 +1,911 @@
# -*- coding: utf-8 -*-
import nilmtools.copy_one
import nilmtools.cleanup
import nilmtools.copy_one
import nilmtools.copy_wildcard
import nilmtools.decimate_auto
import nilmtools.decimate
import nilmtools.insert
import nilmtools.median
import nilmtools.pipewatch
import nilmtools.prep
import nilmtools.sinefit
import nilmtools.trainola
from nilmdb.utils.interval import Interval
from nose.tools import assert_raises
import unittest
import numpy
import math
import json
import random
from testutil.helpers import *
import subprocess
import traceback
import os
import atexit
import signal
import functools
from urllib.request import urlopen
from nilmtools.filter import ArgumentError
def run_cherrypy_server(path, port, event):
db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(path)
server = nilmdb.server.Server(db, host="127.0.0.1",
port=port, stoppable=True)
server.start(blocking = True, event = event)
db.close()
class CommandTester():
url = "http://localhost:32182/"
url2 = "http://localhost:32183/"
@classmethod
def setup_class(cls):
# We need two servers running for "copy_multiple", but
# cherrypy uses globals and can only run once per process.
# Using multiprocessing with "spawn" method should work in
# theory, but is hard to get working when the test suite is
# spawned directly by nosetests (rather than ./run-tests.py).
# Instead, just run the real nilmdb-server that got installed
# along with our nilmdb dependency.
def terminate_servers():
for p in cls.servers:
p.terminate()
atexit.register(terminate_servers)
cls.servers = []
for (path, port) in (("tests/testdb1", 32182),
("tests/testdb2", 32183)):
def listening():
try:
urlopen(f"http://127.0.0.1:{port}/", timeout=0.1)
return True
except Exception as e:
return False
if listening():
raise Exception(f"another server already running on {port}")
recursive_unlink(path)
p = subprocess.Popen(["nilmdb-server",
"--address", "127.0.0.1",
"--database", path,
"--port", str(port),
"--quiet",
"--traceback"],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL)
for i in range(50):
if listening():
break
time.sleep(0.1)
else:
raise Exception(f"server didn't start on port {port}")
@classmethod
def teardown_class(cls):
for p in cls.servers:
p.terminate()
def run(self, arg_string, infile=None, outfile=None):
"""Run a cmdline client with the specified argument string,
passing the given input. Save the output and exit code."""
os.environ['NILMDB_URL'] = self.url
self.last_args = arg_string
class stdio_wrapper:
def __init__(self, stdin, stdout, stderr):
self.io = (stdin, stdout, stderr)
def __enter__(self):
self.saved = ( sys.stdin, sys.stdout, sys.stderr )
( sys.stdin, sys.stdout, sys.stderr ) = self.io
def __exit__(self, type, value, traceback):
( sys.stdin, sys.stdout, sys.stderr ) = self.saved
# Empty input if none provided
if infile is None:
infile = io.TextIOWrapper(io.BytesIO(b""))
# Capture stderr
errfile = io.TextIOWrapper(io.BytesIO())
if outfile is None:
# If no output file, capture stdout with stderr
outfile = errfile
with stdio_wrapper(infile, outfile, errfile) as s:
try:
args = shlex.split(arg_string)
sys.argv[0] = "test_runner"
self.main(args)
sys.exit(0)
except SystemExit as e:
exitcode = e.code
except Exception as e:
traceback.print_exc()
exitcode = 1
# Capture raw binary output, and also try to decode a Unicode
# string copy.
self.captured_binary = outfile.buffer.getvalue()
try:
outfile.seek(0)
self.captured = outfile.read()
except UnicodeDecodeError:
self.captured = None
self.exitcode = exitcode
def ok(self, arg_string, infile = None):
self.run(arg_string, infile)
if self.exitcode != 0:
self.dump()
eq_(self.exitcode, 0)
def fail(self, arg_string, infile=None, exitcode=None):
self.run(arg_string, infile)
if exitcode is not None and self.exitcode != exitcode:
# Wrong exit code
self.dump()
eq_(self.exitcode, exitcode)
if self.exitcode == 0:
# Success, when we wanted failure
self.dump()
ne_(self.exitcode, 0)
def contain(self, checkstring, contain=True):
if contain:
in_(checkstring, self.captured)
else:
nin_(checkstring, self.captured)
def match(self, checkstring):
eq_(checkstring, self.captured)
def matchfile(self, file):
# Captured data should match file contents exactly
with open(file) as f:
contents = f.read()
if contents != self.captured:
print("--- reference file (first 1000 bytes):\n")
print(contents[0:1000] + "\n")
print("--- captured data (first 1000 bytes):\n")
print(self.captured[0:1000] + "\n")
zipped = itertools.zip_longest(contents, self.captured)
for (n, (a, b)) in enumerate(zipped):
if a != b:
print("--- first difference is at offset", n)
print("--- reference:", repr(a))
print("--- captured:", repr(b))
break
raise AssertionError("captured data doesn't match " + file)
def matchfilecount(self, file):
# Last line of captured data should match the number of
# non-commented lines in file
count = 0
with open(file) as f:
for line in f:
if line[0] != '#':
count += 1
eq_(self.captured.splitlines()[-1], sprintf("%d", count))
def dump(self):
printf("\n===args start===\n%s\n===args end===\n", self.last_args)
printf("===dump start===\n%s===dump end===\n", self.captured)
class TestAllCommands(CommandTester):
def test_00_load_data(self):
client = nilmdb.client.Client(url=self.url)
client.stream_create("/newton/prep", "float32_8")
client.stream_set_metadata("/newton/prep",
{ "description": "newton" })
for ts in ("20120323T1000", "20120323T1002", "20120323T1004"):
start = nilmdb.utils.time.parse_time(ts)
fn = f"tests/data/prep-{ts}"
data = nilmdb.utils.timestamper.TimestamperRate(fn, start, 120)
client.stream_insert("/newton/prep", data);
def test_01_copy(self):
self.main = nilmtools.copy_one.main
client = nilmdb.client.Client(url=self.url)
# basic arguments
self.fail(f"")
self.fail(f"no-such-src no-such-dest")
self.contain("source path no-such-src not found")
self.fail(f"-u {self.url} no-such-src no-such-dest")
# nonexistent dest
self.fail(f"/newton/prep /newton/prep-copy")
self.contain("Destination /newton/prep-copy doesn't exist")
# wrong type
client.stream_create("/newton/prep-copy-wrongtype", "uint16_6")
self.fail(f"/newton/prep /newton/prep-copy-wrongtype")
self.contain("wrong number of fields")
# copy with metadata, and compare
client.stream_create("/newton/prep-copy", "float32_8")
self.ok(f"/newton/prep /newton/prep-copy")
a = list(client.stream_extract("/newton/prep"))
b = list(client.stream_extract("/newton/prep-copy"))
eq_(a, b)
a = client.stream_get_metadata("/newton/prep")
b = client.stream_get_metadata("/newton/prep-copy")
eq_(a, b)
# copy with no metadata
client.stream_create("/newton/prep-copy-nometa", "float32_8")
self.ok(f"--nometa /newton/prep /newton/prep-copy-nometa")
a = list(client.stream_extract("/newton/prep"))
b = list(client.stream_extract("/newton/prep-copy-nometa"))
eq_(a, b)
a = client.stream_get_metadata("/newton/prep")
b = client.stream_get_metadata("/newton/prep-copy-nometa")
ne_(a, b)
def test_02_copy_wildcard(self):
self.main = nilmtools.copy_wildcard.main
client1 = nilmdb.client.Client(url=self.url)
client2 = nilmdb.client.Client(url=self.url2)
# basic arguments
self.fail(f"")
self.fail(f"/newton")
self.fail(f"-u {self.url} -U {self.url} /newton")
self.contain("URL must be different")
# no matches; silent
self.ok(f"-u {self.url} -U {self.url2} /newton")
self.ok(f"-u {self.url} -U {self.url2} /asdf*")
self.ok(f"-u {self.url2} -U {self.url} /newton*")
eq_(client2.stream_list(), [])
# this won't actually copy, but will still create streams
self.ok(f"-u {self.url} -U {self.url2} --dry-run /newton*")
self.contain("Creating destination stream /newton/prep-copy")
eq_(len(list(client2.stream_extract("/newton/prep"))), 0)
# this should copy a bunch
self.ok(f"-u {self.url} -U {self.url2} /*")
self.contain("Creating destination stream /newton/prep-copy", False)
eq_(client1.stream_list(), client2.stream_list())
eq_(list(client1.stream_extract("/newton/prep")),
list(client2.stream_extract("/newton/prep")))
eq_(client1.stream_get_metadata("/newton/prep"),
client2.stream_get_metadata("/newton/prep"))
# repeating it is OK; it just won't recreate streams.
# Let's try with --nometa too
client2.stream_remove("/newton/prep")
client2.stream_destroy("/newton/prep")
self.ok(f"-u {self.url} -U {self.url2} --nometa /newton*")
self.contain("Creating destination stream /newton/prep-copy", False)
self.contain("Creating destination stream /newton/prep", True)
eq_(client1.stream_list(), client2.stream_list())
eq_(list(client1.stream_extract("/newton/prep")),
list(client2.stream_extract("/newton/prep")))
eq_(client2.stream_get_metadata("/newton/prep"), {})
# fill in test cases
self.ok(f"-u {self.url} -U {self.url2} -s 2010 -e 2020 -F /newton*")
def test_03_decimate(self):
self.main = nilmtools.decimate.main
client = nilmdb.client.Client(url=self.url)
# basic arguments
self.fail(f"")
# no dest
self.fail(f"/newton/prep /newton/prep-decimated-1")
self.contain("doesn't exist")
# wrong dest shape
client.stream_create("/newton/prep-decimated-bad", "float32_8")
self.fail(f"/newton/prep /newton/prep-decimated-bad")
self.contain("wrong number of fields")
# bad factor
self.fail(f"/newton/prep -f 1 /newton/prep-decimated-bad")
self.contain("needs to be 2 or more")
# ok, default factor 4
client.stream_create("/newton/prep-decimated-4", "float32_24")
self.ok(f"/newton/prep /newton/prep-decimated-4")
a = client.stream_count("/newton/prep")
b = client.stream_count("/newton/prep-decimated-4")
eq_(a // 4, b)
# factor 10
client.stream_create("/newton/prep-decimated-10", "float32_24")
self.ok(f"/newton/prep -f 10 /newton/prep-decimated-10")
self.contain("Processing")
a = client.stream_count("/newton/prep")
b = client.stream_count("/newton/prep-decimated-10")
eq_(a // 10, b)
# different factor, same target
self.fail(f"/newton/prep -f 16 /newton/prep-decimated-10")
self.contain("Metadata in destination stream")
self.contain("decimate_factor = 10")
self.contain("doesn't match desired data")
self.contain("decimate_factor = 16")
# unless we force it
self.ok(f"/newton/prep -f 16 -F /newton/prep-decimated-10")
a = client.stream_count("/newton/prep")
b = client.stream_count("/newton/prep-decimated-10")
# but all data was already converted, so no more
eq_(a // 10, b)
# if we try to decimate an already-decimated stream, the suggested
# shape is different
self.fail(f"/newton/prep-decimated-4 -f 4 /newton/prep-decimated-16")
self.contain("create /newton/prep-decimated-16 float32_24")
# decimate again
client.stream_create("/newton/prep-decimated-16", "float32_24")
self.ok(f"/newton/prep-decimated-4 -f 4 /newton/prep-decimated-16")
self.contain("Processing")
# check shape suggestion for different input types
for (shape, expected) in (("int32_1", "float64_3"),
("uint32_1", "float64_3"),
("int64_1", "float64_3"),
("uint64_1", "float64_3"),
("float32_1", "float32_3"),
("float64_1", "float64_3")):
client.stream_create(f"/test/{shape}", shape)
self.fail(f"/test/{shape} /test/{shape}-decim")
self.contain(f"create /test/{shape}-decim {expected}")
def test_04_decimate_auto(self):
self.main = nilmtools.decimate_auto.main
client = nilmdb.client.Client(url=self.url)
self.fail(f"")
self.fail(f"--max -1 asdf")
self.contain("bad max")
self.fail(f"/no/such/stream")
self.contain("no stream matched path")
# normal run
self.ok(f"/newton/prep")
# can't auto decimate a decimated stream
self.fail(f"/newton/prep-decimated-16")
self.contain("need to pass the base stream instead")
# decimate prep again, this time much more; also use -F
self.ok(f"-m 10 --force-metadata /newton/pr??")
self.contain("Level 4096 decimation has 9 rows")
# decimate the different shapes
self.ok(f"/test/*")
self.contain("Level 1 decimation has 0 rows")
def test_05_insert(self):
self.main = nilmtools.insert.main
client = nilmdb.client.Client(url=self.url)
self.fail(f"")
self.ok(f"--help")
# mutually exclusive arguments
self.fail(f"--delta --rate 123 /foo bar")
self.fail(f"--live --filename /foo bar")
# Insert from file
client.stream_create("/insert/prep", "float32_8")
t0 = "tests/data/prep-20120323T1000"
t2 = "tests/data/prep-20120323T1002"
t4 = "tests/data/prep-20120323T1004"
self.ok(f"--file --dry-run --rate 120 /insert/prep {t0} {t2} {t4}")
self.contain("Dry run")
# wrong rate
self.fail(f"--file --dry-run --rate 10 /insert/prep {t0} {t2} {t4}")
self.contain("Data is coming in too fast")
# skip forward in time
self.ok(f"--file --dry-run --rate 120 /insert/prep {t0} {t4}")
self.contain("data timestamp behind by 120")
self.contain("Skipping data timestamp forward")
# skip backwards in time
self.fail(f"--file --dry-run --rate 120 /insert/prep {t0} {t2} {t0}")
self.contain("data timestamp ahead by 240")
# skip backwards in time is OK if --skip provided
self.ok(f"--skip -f -D -r 120 insert/prep {t0} {t2} {t0} {t4}")
self.contain("Skipping the remainder of this file")
# Now insert for real
self.ok(f"--skip --file --rate 120 /insert/prep {t0} {t2} {t4}")
self.contain("Done")
# Overlap
self.fail(f"--skip --file --rate 120 /insert/prep {t0}")
self.contain("new data overlaps existing data")
# Not overlap if we change file offset
self.ok(f"--skip --file --rate 120 -o 0 /insert/prep {t0}")
# Data with no timestamp
self.fail(f"-f -r 120 /insert/prep tests/data/prep-notime")
self.contain("No idea what timestamp to use")
# Check intervals so far
eq_(list(client.stream_intervals("/insert/prep")),
[[1332507600000000, 1332507959991668],
[1332511200000000, 1332511319991668]])
# Delta supplied by file
self.ok(f"--file --delta -o 0 /insert/prep {t4}-delta")
eq_(list(client.stream_intervals("/insert/prep")),
[[1332507600000000, 1332507959991668],
[1332511200000000, 1332511319991668],
[1332511440000000, 1332511499000001]])
# Now fake live timestamps by using the delta file, and a
# fake clock that increments one second per call.
def fake_time_now():
nonlocal fake_time_base
ret = fake_time_base
fake_time_base += 1000000
return ret
real_time_now = nilmtools.insert.time_now
nilmtools.insert.time_now = fake_time_now
# Delta supplied by file. This data is too fast because delta
# contains a 50 sec jump
fake_time_base = 1332511560000000
self.fail(f"--live --delta -o 0 /insert/prep {t4}-delta")
self.contain("Data is coming in too fast")
self.contain("data time is Fri, 23 Mar 2012 10:06:55")
self.contain("clock time is only Fri, 23 Mar 2012 10:06:06")
# This data is OK, no jump
fake_time_base = 1332511560000000
self.ok(f"--live --delta -o 0 /insert/prep {t4}-delta2")
# This has unparseable delta
fake_time_base = 1332511560000000
self.fail(f"--live --delta -o 0 /insert/prep {t4}-delta3")
self.contain("can't parse delta")
# Insert some gzipped data, with no timestamp in name
bp1 = "tests/data/bpnilm-raw-1.gz"
bp2 = "tests/data/bpnilm-raw-2.gz"
client.stream_create("/insert/raw", "uint16_6")
self.ok(f"--file /insert/raw {bp1} {bp2}")
# Try truncated data
tr = "tests/data/trunc"
self.ok(f"--file /insert/raw {tr}1 {tr}2 {tr}3 {tr}4")
nilmtools.insert.time_now = real_time_now
def generate_sine_data(self, client, path, data_sec, fs, freq):
# generate raw data
client.stream_create(path, "uint16_2")
with client.stream_insert_context(path) as ctx:
for n in range(fs * data_sec):
t = n / fs
v = math.sin(t * 2 * math.pi * freq)
i = 0.3 * math.sin(3*t) + math.sin(t)
line = b"%d %d %d\n" % (
(t + 1234567890) * 1e6,
v * 32767 + 32768,
i * 32768 + 32768)
ctx.insert(line)
if 0:
for (s, e) in client.stream_intervals(path):
print(Interval(s,e).human_string())
def test_06_sinefit(self):
self.main = nilmtools.sinefit.main
client = nilmdb.client.Client(url=self.url)
self.fail(f"")
self.ok(f"--help")
self.generate_sine_data(client, "/sf/raw", 50, 8000, 60)
client.stream_create("/sf/out-bad", "float32_4")
self.fail(f"--column 1 /sf/raw /sf/out-bad")
self.contain("wrong number of fields")
self.fail(f"--column 1 /sf/raw /sf/out")
self.contain("/sf/out doesn't exist")
# basic run
client.stream_create("/sf/out", "float32_3")
self.ok(f"--column 1 /sf/raw /sf/out")
eq_(client.stream_count("/sf/out"), 3000)
# parameter errors
self.fail(f"--column 0 /sf/raw /sf/out")
self.contain("need a column number")
self.fail(f"/sf/raw /sf/out")
self.contain("need a column number")
self.fail(f"-c 1 --frequency 0 /sf/raw /sf/out")
self.contain("frequency must be")
self.fail(f"-c 1 --min-freq 100 /sf/raw /sf/out")
self.contain("invalid min or max frequency")
self.fail(f"-c 1 --max-freq 5 /sf/raw /sf/out")
self.contain("invalid min or max frequency")
self.fail(f"-c 1 --min-amp -1 /sf/raw /sf/out")
self.contain("min amplitude must be")
# trigger some warnings
client.stream_create("/sf/out2", "float32_3")
self.ok(f"-c 1 -f 500 -e @1234567897000000 /sf/raw /sf/out2")
self.contain("outside valid range")
self.contain("1000 warnings suppressed")
eq_(client.stream_count("/sf/out2"), 0)
self.ok(f"-c 1 -a 40000 -e @1234567898000000 /sf/raw /sf/out2")
self.contain("below minimum threshold")
# get coverage for "advance = N/2" line near end of sinefit,
# where we found a fit but it was after the end of the window,
# so we didn't actually mark anything in this window.
self.ok(f"-c 1 -f 240 -m 50 -e @1234567898010000 /sf/raw /sf/out2")
def test_07_median(self):
self.main = nilmtools.median.main
client = nilmdb.client.Client(url=self.url)
self.fail(f"")
self.ok(f"--help")
client.stream_create("/median/1", "float32_8")
client.stream_create("/median/2", "float32_8")
self.fail("/newton/prep /median/0")
self.contain("doesn't exist")
self.ok("/newton/prep /median/1")
self.ok("--difference /newton/prep /median/2")
def test_08_prep(self):
self.main = nilmtools.prep.main
client = nilmdb.client.Client(url=self.url)
self.fail(f"")
self.ok(f"--help")
self.fail(f"-c 2 /sf/raw /sf/out /prep/out")
self.contain("/prep/out doesn't exist")
# basic usage
client.stream_create("/prep/out", "float32_8")
self.ok(f"-c 2 /sf/raw /sf/out /prep/out")
self.contain("processed 100000")
# test arguments
self.fail(f"/sf/raw /sf/out /prep/out")
self.contain("need a column number")
self.fail(f"-c 0 /sf/raw /sf/out /prep/out")
self.contain("need a column number")
self.fail(f"-c 2 -n 3 /sf/raw /sf/out /prep/out")
self.contain("need 6 columns")
self.fail(f"-c 2 -n 0 /sf/raw /sf/out /prep/out")
self.contain("number of odd harmonics must be")
self.fail(f"-c 2 -N 0 /sf/raw /sf/out /prep/out")
self.contain("number of shifted FFTs must be")
self.ok(f"-c 2 -r 0 /sf/raw /sf/out /prep/out")
self.ok(f"-c 2 -R 0 /sf/raw /sf/out /prep/out")
self.fail(f"-c 2 -r 0 -R 0 /sf/raw /sf/out /prep/out")
self.fail(f"-c 2 /sf/raw /sf/no-sinefit-data /prep/out")
self.contain("sinefit data not found")
self.fail(f"-c 2 /sf/raw /prep/out /prep/out")
self.contain("sinefit data type is float32_8; expected float32_3")
# Limit time so only one row gets passed in
client.stream_create("/prep/tmp", "float32_8")
s = 1234567890000000
e = 1234567890000125
self.ok(f"-c 2 -s {s} -e {e} /sf/raw /sf/out /prep/tmp")
# Lower sampling rate on everything, so that the FFT doesn't
# return all the harmonics, and prep has to fill with zeros.
# Tests the "if N < (nharm * 2):" condition in prep
self.generate_sine_data(client, "/sf/raw-low", 5, 100, 60)
self.main = nilmtools.sinefit.main
client.stream_create("/sf/out-low", "float32_3")
self.ok(f"--column 1 /sf/raw-low /sf/out-low")
self.main = nilmtools.prep.main
client.stream_create("/prep/out-low", "float32_8")
self.ok(f"-c 2 /sf/raw-low /sf/out-low /prep/out-low")
# Test prep with empty sinefit data
client.stream_create("/sf/out-empty", "float32_3")
with client.stream_insert_context("/sf/out-empty",
1034567890123456,
2034567890123456):
pass
client.stream_create("/prep/out-empty", "float32_8")
self.ok(f"-c 2 /sf/raw /sf/out-empty /prep/out-empty")
self.contain("warning: no periods found; skipping")
def generate_trainola_data(self):
# Build some fake data for trainola, which is just pulses of varying
# length.
client = nilmdb.client.Client(url=self.url)
total_sec = 100
fs = 100
rg = numpy.random.Generator(numpy.random.MT19937(1234567))
path = "/train/data"
# Just build up some random pulses. This uses seeded random numbers,
# so any changes here will affect the success/failures of tests later.
client.stream_create(path, "float32_1")
with client.stream_insert_context(path) as ctx:
remaining = 0
for n in range(fs * total_sec):
t = n / fs
data = rg.normal(100) / 100 - 1
if remaining > 0:
remaining -= 1
data += 1
else:
if rg.integers(fs * 10 * total_sec) < fs:
if rg.integers(3) < 2:
remaining = fs*2
else:
remaining = fs/2
line = b"%d %f\n" % (t * 1e6, data)
ctx.insert(line)
# To view what was made, try:
if 0:
subprocess.call(f"nilmtool -u {self.url} extract -s min -e max " +
f"{path} > /tmp/data", shell=True)
# then in Octave: a=load("/tmp/data"); plot(a(:,2));
if 0:
for (s, e) in client.stream_intervals(path):
print(Interval(s,e).human_string())
# Also generate something with more than 100k data points
client.stream_create("/train/big", "uint8_1")
with client.stream_insert_context("/train/big") as ctx:
for n in range(110000):
ctx.insert(b"%d 0\n" % n)
def test_09_trainola(self):
self.main = nilmtools.trainola.main
client = nilmdb.client.numpyclient.NumpyClient(url=self.url)
self.fail(f"")
self.ok(f"--help")
self.ok(f"--version")
self.generate_trainola_data()
def get_json(path):
with open(path) as f:
js = f.read().replace('\n', ' ')
return f"'{js}'"
# pass a dict as argv[0]
with assert_raises(KeyError):
saved_stdout = sys.stdout
try:
with open(os.devnull, 'w') as sys.stdout:
nilmtools.trainola.main([{ "url": self.url }])
finally:
sys.stdout = saved_stdout
# pass no args and they come from sys.argv
saved_argv = sys.argv
try:
sys.argv = [ "prog", "bad-json," ]
with assert_raises(json.decoder.JSONDecodeError):
nilmtools.trainola.main()
finally:
sys.argv = saved_argv
# catch a bunch of errors based on different json input
client.stream_create("/train/matches", "uint8_1")
for (num, error) in [ (1, "no columns"),
(2, "duplicated columns"),
(3, "bad column number"),
(4, "source path '/c/d' does not exist"),
(5, "destination path '/a/b' does not exist"),
(6, "missing exemplars"),
(7, "missing exemplars"),
(8, "exemplar stream '/e/f' does not exist"),
(9, "No data in this exemplar"),
(10, "Too few data points"),
(11, "Too many data points"),
(12, "column FOO is not available in source") ]:
self.fail(get_json(f"tests/data/trainola-bad{num}.js"))
self.contain(error)
# not enough columns in dest
self.fail(get_json("tests/data/trainola1.js"))
self.contain("bad destination column number")
# run normally
client.stream_destroy("/train/matches")
client.stream_create("/train/matches", "uint8_2")
self.ok(get_json("tests/data/trainola1.js"))
self.contain("matched 10 exemplars")
# check actual matches, since we made up the data
matches = list(client.stream_extract_numpy("/train/matches"))
eq_(matches[0].tolist(), [[34000000, 1, 0],
[36000000, 0, 1],
[40800000, 1, 0],
[42800000, 0, 1],
[60310000, 1, 0],
[62310000, 0, 1],
[69290000, 1, 0],
[71290000, 0, 1],
[91210000, 1, 0],
[93210000, 0, 1]])
# another run using random noise as an exemplar, to get better coverage
client.stream_create("/train/matches2", "uint8_1")
self.ok(get_json("tests/data/trainola2.js"))
def test_10_pipewatch(self):
self.main = nilmtools.pipewatch.main
self.fail(f"")
self.ok(f"--help")
lock = "tests/pipewatch.lock"
lk = f"--lock {lock}"
try:
os.unlink(lock)
except OSError:
pass
# try locking so pipewatch will exit (with code 0)
lockfile = open(lock, "w")
nilmdb.utils.lock.exclusive_lock(lockfile)
self.ok(f"{lk} true true")
self.contain("pipewatch process already running")
os.unlink(lock)
# have pipewatch remove its own lock to trigger error later
self.ok(f"{lk} 'rm {lock}' true")
# various cases to get coverage
self.ok(f"{lk} true 'cat >/dev/null'")
self.contain("generator returned 0, consumer returned 0")
self.fail(f"{lk} false true")
self.contain("generator returned 1, consumer returned 0")
self.fail(f"{lk} false false")
self.contain("generator returned 1, consumer returned 1")
self.fail(f"{lk} true false")
self.contain("generator returned 0, consumer returned 1")
self.fail(f"{lk} 'kill -15 $$' true")
self.ok(f"{lk} 'sleep 1 ; echo hi' 'cat >/dev/null'")
self.ok(f"{lk} 'echo hi' 'cat >/dev/null'")
self.fail(f"{lk} --timeout 0.5 'sleep 10 ; echo hi' 'cat >/dev/null'")
self.fail(f"{lk} 'yes' 'head -1 >/dev/null'")
self.fail(f"{lk} false 'exec 2>&-; trap \"sleep 10\" 0 15 ; sleep 10'")
def test_11_cleanup(self):
self.main = nilmtools.cleanup.main
client = nilmdb.client.Client(url=self.url)
# This mostly just gets coverage, doesn't carefully verify behavior
self.fail(f"")
self.ok(f"--help")
self.fail(f"tests/data/cleanup-bad.cfg")
self.contain("unknown units")
client.stream_create("/empty/foo", "uint16_1")
self.ok(f"tests/data/cleanup.cfg")
self.contain("'/nonexistent/bar' did not match any existing streams")
self.contain("no config for existing stream '/empty/foo'")
self.contain("nothing to do (only 0.00 weeks of data present)")
self.contain("specify --yes to actually perform")
self.ok(f"--yes tests/data/cleanup.cfg")
self.contain("removing data before")
self.contain("removing from /sf/raw")
self.ok(f"--estimate tests/data/cleanup.cfg")
self.contain("Total estimated disk usage")
self.contain("MiB")
self.contain("GiB")
self.ok(f"--yes tests/data/cleanup-nodecim.cfg")
self.ok(f"--estimate tests/data/cleanup-nodecim.cfg")
def test_12_misc(self):
# Fill in test cases that were missed by earlier code:
# math.py
with assert_raises(ValueError):
nilmtools.math.sfit4([1], 5)
nilmtools.math.sfit4([1,2], 5)
# filter.py
client = nilmdb.client.numpyclient.NumpyClient(self.url)
client.stream_create("/misc/a", "uint8_1")
client.stream_create("/misc/b", "uint8_1")
with client.stream_insert_context("/misc/a") as ctx:
for n in range(10000):
ctx.insert(b"%d 0\n" % n)
pni = nilmtools.filter.process_numpy_interval
src = nilmtools.filter.get_stream_info(client, "/misc/a")
extractor = functools.partial(
client.stream_extract_numpy, "/misc/a",
layout=src.layout, maxrows=1000)
inserter = functools.partial(
client.stream_insert_numpy_context, "/misc/b")
def func1(*args):
return 0
def func2(*args):
return -1
def func3(array, interval, args, insert_func, last):
if last:
return array.shape[0]
return 0
saved = (sys.stdout, sys.stderr)
try:
with open(os.devnull, 'w') as sys.stdout:
with open(os.devnull, 'w') as sys.stderr:
pni(Interval(0, 10000), extractor, inserter, 100, func1)
with assert_raises(SystemExit):
f = nilmtools.filter.Filter("hello world")
finally:
(sys.stdout, sys.stderr) = saved
with assert_raises(Exception):
pni(Interval(0, 10000), extractor, inserter, 100, func2)
pni(Interval(0, 10000), extractor, inserter, 100000, func3)
with assert_raises(NotImplementedError):
pni(Interval(0, 10000), extractor, inserter, 100000,
nilmtools.filter.example_callback_function)
self.main = nilmtools.filter.main
self.fail(f"")
self.ok(f"--help")
self.fail(f"/misc/a /misc/a")
self.contain("must be different")
self.fail(f"--start HELLOWORLD /misc/a /misc/a")
self.contain("not enough digits for a timestamp")
client.stream_create("/misc/c", "uint8_1")
self.ok(f"--quiet /misc/a /misc/c")
self.contain("Source: /misc/a", False)
self.contain("Generic filter: need to handle")
f = nilmtools.filter.Filter()
parser = f.setup_parser()
args = f.parse_args(["--quiet", "/misc/a", "/misc/c"])
x = f.client_src
x = f.client_dest
for i in f.intervals():
with assert_raises(Exception) as e:
x = f.client_src
in_("client is in use", str(e.exception))
with assert_raises(Exception) as e:
x = f.client_dest
in_("client is in use", str(e.exception))

View File

@@ -0,0 +1 @@
# empty

64
tests/testutil/helpers.py Normal file
View File

@@ -0,0 +1,64 @@
# Just some helpers for test functions
import io
import os
import re
import sys
import time
import shlex
import shutil
import nilmdb.server
import nilmdb.utils
import nilmdb.utils.timestamper
from nilmdb.utils.printf import printf
def myrepr(x):
if isinstance(x, str):
return '"' + x + '"'
else:
return repr(x)
def eq_(a, b):
if not a == b:
raise AssertionError("%s != %s" % (myrepr(a), myrepr(b)))
def lt_(a, b):
if not a < b:
raise AssertionError("%s is not less than %s" % (myrepr(a), myrepr(b)))
def in_(a, b):
if a not in b:
raise AssertionError("%s not in %s" % (myrepr(a), myrepr(b)))
def nin_(a, b):
if a in b:
raise AssertionError("unexpected %s in %s" % (myrepr(a), myrepr(b)))
def in2_(a1, a2, b):
if a1 not in b and a2 not in b:
raise AssertionError("(%s or %s) not in %s" % (myrepr(a1), myrepr(a2),
myrepr(b)))
def ne_(a, b):
if not a != b:
raise AssertionError("unexpected %s == %s" % (myrepr(a), myrepr(b)))
def lines_(a, n):
l = a.count('\n')
if not l == n:
if len(a) > 5000:
a = a[0:5000] + " ... truncated"
raise AssertionError("wanted %d lines, got %d in output: '%s'"
% (n, l, a))
def recursive_unlink(path):
try:
shutil.rmtree(path)
except OSError:
pass
try:
os.unlink(path)
except OSError:
pass

View File

File diff suppressed because it is too large Load Diff