Compare commits
42 Commits
nilmtools-
...
nilmtools-
Author | SHA1 | Date | |
---|---|---|---|
eb6d7a8809 | |||
c8be6755ae | |||
9e321d9e41 | |||
f2bebea5d0 | |||
d919a73387 | |||
17fa79a5dc | |||
ca970fa1fd | |||
805d8fb24f | |||
05da75e34a | |||
56e778df71 | |||
87178e9599 | |||
f8b1a001c3 | |||
7e88da3c26 | |||
b637f17887 | |||
9a7a1df537 | |||
101b701882 | |||
457c518809 | |||
3eff3d81fe | |||
a56dc22030 | |||
9b770cd28f | |||
348c435d1e | |||
7f1c1a6c32 | |||
bdfc29887b | |||
4e5907f381 | |||
9078a014ae | |||
533892e624 | |||
e0f973b449 | |||
698cb6ef26 | |||
1db38cc5da | |||
a984e54f23 | |||
974c9a3050 | |||
320c32cfdc | |||
0f1e442cd4 | |||
3e78da12dc | |||
ef9277cbff | |||
de68956f76 | |||
e73dd313d5 | |||
d23fa9ee78 | |||
2b9ecc6697 | |||
54f8c34f8e | |||
9d38d6c21b | |||
4243301434 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,3 +1,6 @@
|
||||
oldprep
|
||||
newprep
|
||||
*.dat
|
||||
build/
|
||||
*.pyc
|
||||
dist/
|
||||
|
28
Makefile
28
Makefile
@@ -1,5 +1,33 @@
|
||||
#URL="http://bucket.mit.edu:8080/nilmdb"
|
||||
URL="http://localhost/nilmdb"
|
||||
|
||||
all:
|
||||
ifeq ($(INSIDE_EMACS), t)
|
||||
@make test
|
||||
else
|
||||
@echo "Try 'make install'"
|
||||
endif
|
||||
|
||||
test:
|
||||
@make install >/dev/null
|
||||
src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
|
||||
|
||||
test_prep:
|
||||
@make install >/dev/null
|
||||
src/prep.py -c 3 \
|
||||
/lees-compressor/no-leak/raw \
|
||||
/lees-compressor/no-leak/sinefit \
|
||||
/lees-compressor/no-leak/prep \
|
||||
-s '2013-02-19 18:00:00' \
|
||||
-r 0
|
||||
|
||||
test_decimate:
|
||||
-@nilmtool destroy /lees-compressor/no-leak/raw/4 || true
|
||||
-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
|
||||
-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
|
||||
-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
|
||||
time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
|
||||
python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
|
||||
|
||||
version:
|
||||
python setup.py version
|
||||
|
13
README.txt
13
README.txt
@@ -6,9 +6,22 @@ Prerequisites:
|
||||
|
||||
# Runtime and build environments
|
||||
sudo apt-get install python2.7 python2.7-dev python-setuptools
|
||||
sudo apt-get install python-numpy python-scipy python-matplotlib
|
||||
|
||||
nilmdb (1.3.1+)
|
||||
|
||||
Install:
|
||||
|
||||
python setup.py install
|
||||
|
||||
Building new tools:
|
||||
|
||||
The tools in this package are meant to be installed with
|
||||
"python setup.py install". If you want to make a new one,
|
||||
an easier way to develop would be to first install this package,
|
||||
and then copy a specific script like "src/sinefit.py" to a new
|
||||
location, and modify it as desired.
|
||||
|
||||
To add a tool to the package, place it in "src/" and add the
|
||||
appropriate configuration to "setup.py".
|
||||
|
||||
|
@@ -1,66 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import nilmdb.client
|
||||
from nilmdb.utils.printf import *
|
||||
from nilmdb.utils.time import parse_time, format_time
|
||||
|
||||
import time
|
||||
import sys
|
||||
import re
|
||||
import argparse
|
||||
|
||||
class Filter(object):
|
||||
|
||||
def __init__(self, description = "Filter data"):
|
||||
self.args = None
|
||||
self._client = None
|
||||
self.parse_args(description)
|
||||
|
||||
def parse_args(self, description):
|
||||
parser = argparse.ArgumentParser(
|
||||
description = description,
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter)
|
||||
parser.add_argument("-u", "--url", action="store",
|
||||
default="http://localhost:12380/",
|
||||
help="Server URL (default: %(default)s)")
|
||||
parser.add_argument("srcpath", action="store",
|
||||
help="Path of source stream, e.g. /foo/bar")
|
||||
parser.add_argument("destpath", action="store",
|
||||
help="Path of destination stream, e.g. /foo/bar")
|
||||
self.args = parser.parse_args()
|
||||
|
||||
self._client = nilmdb.client.Client(args.url)
|
||||
|
||||
if args.srcpath == args.destpath:
|
||||
raise Exception("source and destination path must be different")
|
||||
|
||||
# Open and print info about the streams
|
||||
def stream_info_string(info):
|
||||
return sprintf("%s (%s), %.2fM rows, %.2f hours\n",
|
||||
info[0], info[1], info[4] / 1e6, info[5] / 3600)
|
||||
|
||||
src = self._client.stream_list(args.srcpath, extended = True)
|
||||
if len(src) != 1:
|
||||
raise Exception("source path " + args.srcpath + " not found")
|
||||
print "Source:", stream_info_string(src[0])
|
||||
|
||||
dest = self._client.stream_list(args.destpath, extended = True)
|
||||
if len(dest) != 1:
|
||||
raise Exception("destination path " + args.destpath + " not found")
|
||||
print " Dest:", stream_info_string(dest[0])
|
||||
|
||||
def intervals(self):
|
||||
"""Generate all the intervals that this filter should process"""
|
||||
for i in self._client.stream_intervals(
|
||||
args.srcpath, diffpath = args.destpath):
|
||||
yield i
|
||||
|
||||
def main():
|
||||
# This is just a dummy function; actual filters can use the other
|
||||
# functions to prepare stuff, and then do something with the data.
|
||||
f = Filter()
|
||||
for interval in f.intervals():
|
||||
print "Generic filter: need to handle interval:", interval
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
12
setup.py
12
setup.py
@@ -30,7 +30,7 @@ except ImportError:
|
||||
# Versioneer manages version numbers from git tags.
|
||||
# https://github.com/warner/python-versioneer
|
||||
import versioneer
|
||||
versioneer.versionfile_source = 'nilmtools/_version.py'
|
||||
versioneer.versionfile_source = 'src/_version.py'
|
||||
versioneer.versionfile_build = 'nilmtools/_version.py'
|
||||
versioneer.tag_prefix = 'nilmtools-'
|
||||
versioneer.parentdir_prefix = 'nilmtools-'
|
||||
@@ -61,14 +61,22 @@ setup(name='nilmtools',
|
||||
long_description = "NILM Database Tools",
|
||||
license = "Proprietary",
|
||||
author_email = 'jim@jtan.com',
|
||||
install_requires = [ 'nilmdb >= 1.3.0',
|
||||
install_requires = [ 'nilmdb >= 1.4.6',
|
||||
'numpy',
|
||||
'scipy',
|
||||
'matplotlib',
|
||||
],
|
||||
packages = [ 'nilmtools',
|
||||
],
|
||||
package_dir = { 'nilmtools': 'src' },
|
||||
entry_points = {
|
||||
'console_scripts': [
|
||||
'nilm-decimate = nilmtools.decimate:main',
|
||||
'nilm-decimate-auto = nilmtools.decimate_auto:main',
|
||||
'nilm-insert = nilmtools.insert:main',
|
||||
'nilm-copy = nilmtools.copy_one:main',
|
||||
'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
|
||||
'nilm-sinefit = nilmtools.sinefit:main',
|
||||
],
|
||||
},
|
||||
zip_safe = False,
|
||||
|
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
|
||||
|
||||
tag_prefix = "nilmtools-"
|
||||
parentdir_prefix = "nilmtools-"
|
||||
versionfile_source = "nilmtools/_version.py"
|
||||
versionfile_source = "src/_version.py"
|
||||
|
||||
def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
|
||||
variables = { "refnames": git_refnames, "full": git_full }
|
40
src/copy_one.py
Executable file
40
src/copy_one.py
Executable file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# This is called copy_one instead of copy to avoid name conflicts with
|
||||
# the Python standard library.
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Copy a stream")
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url,
|
||||
e.dest.path, e.src.layout)
|
||||
raise SystemExit(1)
|
||||
|
||||
# Copy metadata
|
||||
meta = f.client_src.stream_get_metadata(f.src.path)
|
||||
f.check_dest_metadata(meta)
|
||||
|
||||
# Copy all rows of data as ASCII strings
|
||||
extractor = nilmdb.client.Client(f.src.url).stream_extract
|
||||
inserter = nilmdb.client.Client(f.dest.url).stream_insert_context
|
||||
for i in f.intervals():
|
||||
print "Processing", f.interval_string(i)
|
||||
with inserter(f.dest.path, i.start, i.end) as insert_ctx:
|
||||
for row in extractor(f.src.path, i.start, i.end):
|
||||
insert_ctx.insert(row + "\n")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
70
src/copy_wildcard.py
Executable file
70
src/copy_wildcard.py
Executable file
@@ -0,0 +1,70 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Copy streams between NilmDB servers with wildcards
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmtools.copy_one
|
||||
import nilmdb.client
|
||||
import argparse
|
||||
import fnmatch
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
# Reuse filter's parser, since it handles most options we need.
|
||||
parser = f.setup_parser(description = """\
|
||||
Copy all streams matching the given wildcard from one host to another.
|
||||
|
||||
Example: %(prog)s -u http://host1/nilmdb -U http://host2/nilmdb /sharon/*
|
||||
""", skip_paths = True)
|
||||
parser.add_argument("path", action="store", nargs="+",
|
||||
help='Wildcard paths to copy')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
# Verify arguments
|
||||
if args.dest_url is None:
|
||||
parser.error("must provide both source and destination URL")
|
||||
client_src = nilmdb.client.Client(args.url)
|
||||
client_dest = nilmdb.client.Client(args.dest_url)
|
||||
if client_src.geturl() == client_dest.geturl():
|
||||
parser.error("source and destination URL must be different")
|
||||
print "Source URL:", client_src.geturl()
|
||||
print " Dest URL:", client_dest.geturl()
|
||||
|
||||
# Find matching streams
|
||||
matched = []
|
||||
for path in args.path:
|
||||
matched.extend([s for s in client_src.stream_list(extended = True)
|
||||
if fnmatch.fnmatch(s[0], path)
|
||||
and s not in matched])
|
||||
|
||||
# Create destination streams if they don't exist
|
||||
for stream in matched:
|
||||
src = nilmtools.filter.StreamInfo(client_src.geturl(), stream)
|
||||
dest = nilmtools.filter.get_stream_info(client_dest, src.path)
|
||||
if not dest:
|
||||
print "Creating destination stream", src.path
|
||||
client_dest.stream_create(src.path, src.layout)
|
||||
|
||||
# Copy them all by running the "copy" tool as if it were
|
||||
# invoked from the command line.
|
||||
for stream in matched:
|
||||
new_argv = ["--url", client_src.geturl(),
|
||||
"--dest-url", client_dest.geturl() ]
|
||||
if args.start:
|
||||
new_argv.extend(["--start", "@" + repr(args.start)])
|
||||
if args.end:
|
||||
new_argv.extend(["--end", "@" + repr(args.end)])
|
||||
if args.dry_run:
|
||||
new_argv.extend(["--dry-run"])
|
||||
if args.force_metadata:
|
||||
new_argv.extend(["--force-metadata"])
|
||||
new_argv.extend([stream[0], stream[0]])
|
||||
try:
|
||||
nilmtools.copy_one.main(new_argv)
|
||||
except SystemExit as e:
|
||||
# Ignore SystemExit which could be raised on --dry-run
|
||||
if e.code != 0:
|
||||
raise
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
81
src/decimate.py
Executable file
81
src/decimate.py
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
import numpy as np
|
||||
import operator
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Decimate a stream")
|
||||
group = parser.add_argument_group("Decimate options")
|
||||
group.add_argument('-f', '--factor', action='store', default=4, type=int,
|
||||
help='Decimation factor (default: %(default)s)')
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
# If no destination, suggest how to create it by figuring out
|
||||
# a recommended layout.
|
||||
src = e.src
|
||||
dest = e.dest
|
||||
print "Source is %s (%s)" % (src.path, src.layout)
|
||||
print "Destination %s doesn't exist" % (dest.path)
|
||||
if "decimate_source" in f.client_src.stream_get_metadata(src.path):
|
||||
rec = src.layout
|
||||
elif 'int32' in src.layout_type or 'float64' in src.layout_type:
|
||||
rec = 'float64_' + str(src.layout_count * 3)
|
||||
else:
|
||||
rec = 'float32_' + str(src.layout_count * 3)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url,
|
||||
e.dest.path, rec)
|
||||
raise SystemExit(1)
|
||||
|
||||
if not (args.factor >= 2):
|
||||
raise Exception("factor needs to be 2 or more")
|
||||
|
||||
f.check_dest_metadata({ "decimate_source": f.src.path,
|
||||
"decimate_factor": args.factor })
|
||||
|
||||
# If source is decimated, we have to decimate a bit differently
|
||||
if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
|
||||
n = f.src.layout_count // 3
|
||||
f.process_python(function = decimate_again, rows = args.factor,
|
||||
args = (n,))
|
||||
else:
|
||||
n = f.src.layout_count
|
||||
f.process_python(function = decimate_first, rows = args.factor,
|
||||
args = (n,))
|
||||
|
||||
def decimate_first(data, n):
|
||||
"""Decimate original data -- result has 3 times as many columns"""
|
||||
# For this simple calculation, converting to a Numpy array
|
||||
# and doing the math is slower than just doing it directly.
|
||||
rows = iter(data)
|
||||
r_sum = r_min = r_max = rows.next()
|
||||
for row in rows:
|
||||
r_sum = map(operator.add, r_sum, row)
|
||||
r_min = map(min, r_min, row)
|
||||
r_max = map(max, r_max, row)
|
||||
r_mean = [ x / len(data) for x in r_sum ]
|
||||
return [ [ r_mean[0] ] + r_mean[1:] + r_min[1:] + r_max[1:] ]
|
||||
|
||||
def decimate_again(data, n):
|
||||
"""Decimate already-decimated data -- result has the same number
|
||||
of columns"""
|
||||
rows = iter(data)
|
||||
r = rows.next()
|
||||
r_sum = r[0:(n+1)]
|
||||
r_min = r[(n+1):(2*n+1)]
|
||||
r_max = r[(2*n+1):(3*n+1)]
|
||||
for r in rows:
|
||||
r_sum = map(operator.add, r_sum, r[0:(n+1)])
|
||||
r_min = map(min, r_min, r[(n+1):(2*n+1)])
|
||||
r_max = map(max, r_max, r[(2*n+1):(3*n+1)])
|
||||
r_mean = [ x / len(data) for x in r_sum ]
|
||||
return [ r_mean + r_min + r_max ]
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
76
src/decimate_auto.py
Executable file
76
src/decimate_auto.py
Executable file
@@ -0,0 +1,76 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmtools.decimate
|
||||
import nilmdb.client
|
||||
import argparse
|
||||
|
||||
def main(argv = None):
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter,
|
||||
version = "1.0",
|
||||
description = """\
|
||||
Automatically create multiple decimations from a single source
|
||||
stream, continuing until the last decimated level contains fewer
|
||||
than 500 points total.
|
||||
""")
|
||||
parser.add_argument("-u", "--url", action="store",
|
||||
default="http://localhost/nilmdb/",
|
||||
help="NilmDB server URL (default: %(default)s)")
|
||||
parser.add_argument('-f', '--factor', action='store', default=4, type=int,
|
||||
help='Decimation factor (default: %(default)s)')
|
||||
parser.add_argument("--force-metadata", action="store_true",
|
||||
default = False,
|
||||
help="Force metadata changes if the dest "
|
||||
"doesn't match")
|
||||
parser.add_argument("path", action="store",
|
||||
help='Path of base stream')
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
# Pull out info about the base stream
|
||||
client = nilmdb.client.Client(args.url)
|
||||
|
||||
info = nilmtools.filter.get_stream_info(client, args.path)
|
||||
if not info:
|
||||
raise Exception("path " + args.path + " not found")
|
||||
|
||||
meta = client.stream_get_metadata(args.path)
|
||||
if "decimate_source" in meta:
|
||||
print "Stream", args.path, "was decimated from", meta["decimate_source"]
|
||||
print "You need to pass the base stream instead"
|
||||
raise SystemExit(1)
|
||||
|
||||
# Figure out the type we should use for decimated streams
|
||||
if 'int32' in info.layout_type or 'float64' in info.layout_type:
|
||||
decimated_type = 'float64_' + str(info.layout_count * 3)
|
||||
else:
|
||||
decimated_type = 'float32_' + str(info.layout_count * 3)
|
||||
|
||||
# Now do the decimations until we have few enough points
|
||||
factor = 1
|
||||
while True:
|
||||
print "Level", factor, "decimation has", info.rows, "rows"
|
||||
if info.rows <= 500:
|
||||
break
|
||||
factor *= args.factor
|
||||
new_path = "%s~decim-%d" % (args.path, factor)
|
||||
|
||||
# Create the stream if needed
|
||||
new_info = nilmtools.filter.get_stream_info(client, new_path)
|
||||
if not new_info:
|
||||
print "Creating stream", new_path
|
||||
client.stream_create(new_path, decimated_type)
|
||||
|
||||
# Run the decimation as if it were run from the commandline
|
||||
new_argv = [ "-u", args.url,
|
||||
"-f", str(args.factor) ]
|
||||
if args.force_metadata:
|
||||
new_argv.extend([ "--force-metadata" ])
|
||||
new_argv.extend([info.path, new_path])
|
||||
nilmtools.decimate.main(new_argv)
|
||||
|
||||
# Update info using the newly decimated stream
|
||||
info = nilmtools.filter.get_stream_info(client, new_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
413
src/filter.py
Normal file
413
src/filter.py
Normal file
@@ -0,0 +1,413 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
from __future__ import absolute_import
|
||||
|
||||
import nilmdb.client
|
||||
from nilmdb.client import Client
|
||||
from nilmdb.utils.printf import *
|
||||
from nilmdb.utils.time import (parse_time, timestamp_to_human,
|
||||
timestamp_to_seconds)
|
||||
from nilmdb.utils.interval import Interval
|
||||
|
||||
import nilmtools
|
||||
|
||||
import itertools
|
||||
import time
|
||||
import sys
|
||||
import re
|
||||
import argparse
|
||||
import numpy as np
|
||||
import cStringIO
|
||||
|
||||
class MissingDestination(Exception):
|
||||
def __init__(self, args, src, dest):
|
||||
self.parsed_args = args
|
||||
self.src = src
|
||||
self.dest = dest
|
||||
Exception.__init__(self, "destination path " + dest.path + " not found")
|
||||
|
||||
class StreamInfo(object):
|
||||
def __init__(self, url, info):
|
||||
self.url = url
|
||||
self.info = info
|
||||
try:
|
||||
self.path = info[0]
|
||||
self.layout = info[1]
|
||||
self.layout_type = self.layout.split('_')[0]
|
||||
self.layout_count = int(self.layout.split('_')[1])
|
||||
self.total_count = self.layout_count + 1
|
||||
self.timestamp_min = info[2]
|
||||
self.timestamp_max = info[3]
|
||||
self.rows = info[4]
|
||||
self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
|
||||
except IndexError, TypeError:
|
||||
pass
|
||||
|
||||
def string(self, interhost):
|
||||
"""Return stream info as a string. If interhost is true,
|
||||
include the host URL."""
|
||||
if interhost:
|
||||
return sprintf("[%s] ", self.url) + str(self)
|
||||
return str(self)
|
||||
|
||||
def __str__(self):
|
||||
"""Return stream info as a string."""
|
||||
return sprintf("%s (%s), %.2fM rows, %.2f hours",
|
||||
self.path, self.layout, self.rows / 1e6,
|
||||
self.seconds / 3600.0)
|
||||
|
||||
def get_stream_info(client, path):
|
||||
"""Return a StreamInfo object about the given path, or None if it
|
||||
doesn't exist"""
|
||||
streams = client.stream_list(path, extended = True)
|
||||
if len(streams) != 1:
|
||||
return None
|
||||
return StreamInfo(client.geturl(), streams[0])
|
||||
|
||||
class Filter(object):
|
||||
|
||||
def __init__(self):
|
||||
self._parser = None
|
||||
self._client_src = None
|
||||
self._client_dest = None
|
||||
self._using_client = False
|
||||
self.src = None
|
||||
self.dest = None
|
||||
self.start = None
|
||||
self.end = None
|
||||
self.interhost = False
|
||||
self.force_metadata = False
|
||||
|
||||
@property
|
||||
def client_src(self):
|
||||
if self._using_client:
|
||||
raise Exception("Filter client is in use; make another")
|
||||
return self._client_src
|
||||
|
||||
@property
|
||||
def client_dest(self):
|
||||
if self._using_client:
|
||||
raise Exception("Filter client is in use; make another")
|
||||
return self._client_dest
|
||||
|
||||
def setup_parser(self, description = "Filter data", skip_paths = False):
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter,
|
||||
version = nilmtools.__version__,
|
||||
description = description)
|
||||
group = parser.add_argument_group("General filter arguments")
|
||||
group.add_argument("-u", "--url", action="store",
|
||||
default="http://localhost/nilmdb/",
|
||||
help="Server URL (default: %(default)s)")
|
||||
group.add_argument("-U", "--dest-url", action="store",
|
||||
help="Destination server URL "
|
||||
"(default: same as source)")
|
||||
group.add_argument("-D", "--dry-run", action="store_true",
|
||||
default = False,
|
||||
help="Just print intervals that would be "
|
||||
"processed")
|
||||
group.add_argument("--force-metadata", action="store_true",
|
||||
default = False,
|
||||
help="Force metadata changes if the dest "
|
||||
"doesn't match")
|
||||
group.add_argument("-s", "--start",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Starting timestamp for intervals "
|
||||
"(free-form, inclusive)")
|
||||
group.add_argument("-e", "--end",
|
||||
metavar="TIME", type=self.arg_time,
|
||||
help="Ending timestamp for intervals "
|
||||
"(free-form, noninclusive)")
|
||||
if not skip_paths:
|
||||
# Individual filter scripts might want to add these arguments
|
||||
# themselves, to include multiple sources in a different order
|
||||
# (for example). "srcpath" and "destpath" arguments must exist,
|
||||
# though.
|
||||
group.add_argument("srcpath", action="store",
|
||||
help="Path of source stream, e.g. /foo/bar")
|
||||
group.add_argument("destpath", action="store",
|
||||
help="Path of destination stream, e.g. /foo/bar")
|
||||
self._parser = parser
|
||||
return parser
|
||||
|
||||
def interval_string(self, interval):
|
||||
return sprintf("[ %s -> %s ]",
|
||||
timestamp_to_human(interval.start),
|
||||
timestamp_to_human(interval.end))
|
||||
|
||||
def parse_args(self, argv = None):
|
||||
args = self._parser.parse_args(argv)
|
||||
|
||||
if args.dest_url is None:
|
||||
args.dest_url = args.url
|
||||
if args.url != args.dest_url:
|
||||
self.interhost = True
|
||||
|
||||
self._client_src = Client(args.url)
|
||||
self._client_dest = Client(args.dest_url)
|
||||
|
||||
if (not self.interhost) and (args.srcpath == args.destpath):
|
||||
self._parser.error("source and destination path must be different")
|
||||
|
||||
# Open and print info about the streams
|
||||
self.src = get_stream_info(self._client_src, args.srcpath)
|
||||
if not self.src:
|
||||
self._parser.error("source path " + args.srcpath + " not found")
|
||||
|
||||
self.dest = get_stream_info(self._client_dest, args.destpath)
|
||||
if not self.dest:
|
||||
raise MissingDestination(args, self.src,
|
||||
StreamInfo(args.dest_url, [args.destpath]))
|
||||
|
||||
print "Source:", self.src.string(self.interhost)
|
||||
print " Dest:", self.dest.string(self.interhost)
|
||||
|
||||
if args.dry_run:
|
||||
for interval in self.intervals():
|
||||
print self.interval_string(interval)
|
||||
raise SystemExit(0)
|
||||
|
||||
self.force_metadata = args.force_metadata
|
||||
|
||||
self.start = args.start
|
||||
self.end = args.end
|
||||
|
||||
return args
|
||||
|
||||
def _optimize_int(self, it):
|
||||
"""Join and yield adjacent intervals from the iterator 'it'"""
|
||||
saved_int = None
|
||||
for interval in it:
|
||||
if saved_int is not None:
|
||||
if saved_int.end == interval.start:
|
||||
interval.start = saved_int.start
|
||||
else:
|
||||
yield saved_int
|
||||
saved_int = interval
|
||||
if saved_int is not None:
|
||||
yield saved_int
|
||||
|
||||
def intervals(self):
|
||||
"""Generate all the intervals that this filter should process"""
|
||||
self._using_client = True
|
||||
|
||||
if self.interhost:
|
||||
# Do the difference ourselves
|
||||
s_intervals = ( Interval(start, end)
|
||||
for (start, end) in
|
||||
self._client_src.stream_intervals(
|
||||
self.src.path,
|
||||
start = self.start, end = self.end) )
|
||||
d_intervals = ( Interval(start, end)
|
||||
for (start, end) in
|
||||
self._client_dest.stream_intervals(
|
||||
self.dest.path,
|
||||
start = self.start, end = self.end) )
|
||||
intervals = nilmdb.utils.interval.set_difference(s_intervals,
|
||||
d_intervals)
|
||||
else:
|
||||
# Let the server do the difference for us
|
||||
intervals = ( Interval(start, end)
|
||||
for (start, end) in
|
||||
self._client_src.stream_intervals(
|
||||
self.src.path, diffpath = self.dest.path,
|
||||
start = self.start, end = self.end) )
|
||||
# Optimize intervals: join intervals that are adjacent
|
||||
for interval in self._optimize_int(intervals):
|
||||
yield interval
|
||||
self._using_client = False
|
||||
|
||||
# Misc helpers
|
||||
def arg_time(self, toparse):
|
||||
"""Parse a time string argument"""
|
||||
try:
|
||||
return nilmdb.utils.time.parse_time(toparse)
|
||||
except ValueError as e:
|
||||
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
|
||||
str(e), toparse))
|
||||
|
||||
def check_dest_metadata(self, data):
|
||||
"""See if the metadata jives, and complain if it doesn't. If
|
||||
there's no conflict, update the metadata to match 'data'."""
|
||||
metadata = self._client_dest.stream_get_metadata(self.dest.path)
|
||||
if not self.force_metadata:
|
||||
for key in data:
|
||||
wanted = str(data[key])
|
||||
val = metadata.get(key, wanted)
|
||||
if val != wanted and self.dest.rows > 0:
|
||||
m = "Metadata in destination stream:\n"
|
||||
m += " %s = %s\n" % (key, val)
|
||||
m += "doesn't match desired data:\n"
|
||||
m += " %s = %s\n" % (key, wanted)
|
||||
m += "Refusing to change it. To prevent this error, "
|
||||
m += "change or delete the metadata with nilmtool,\n"
|
||||
m += "remove existing data from the stream, or "
|
||||
m += "retry with --force-metadata."
|
||||
raise Exception(m)
|
||||
# All good -- write the metadata in case it's not already there
|
||||
self._client_dest.stream_update_metadata(self.dest.path, data)
|
||||
|
||||
# Main processing helper
|
||||
def process_python(self, function, rows, args = None, partial = False):
|
||||
"""Process data in chunks of 'rows' data at a time.
|
||||
|
||||
This provides data as nested Python lists and expects the same
|
||||
back.
|
||||
|
||||
function: function to process the data
|
||||
rows: maximum number of rows to pass to 'function' at once
|
||||
args: tuple containing extra arguments to pass to 'function'
|
||||
partial: if true, less than 'rows' may be passed to 'function'.
|
||||
if false, partial data at the end of an interval will
|
||||
be dropped.
|
||||
|
||||
'function' should be defined like:
|
||||
function(data, *args)
|
||||
It will be passed a list containing up to 'rows' rows of
|
||||
data from the source stream, and any arguments passed in
|
||||
'args'. It should transform the data as desired, and return a
|
||||
new list of rdata, which will be inserted into the destination
|
||||
stream.
|
||||
"""
|
||||
if args is None:
|
||||
args = []
|
||||
extractor = Client(self.src.url).stream_extract
|
||||
inserter = Client(self.dest.url).stream_insert_context
|
||||
|
||||
# Parse input data. We use homogenous types for now, which
|
||||
# means the timestamp type will be either float or int.
|
||||
if "int" in self.src.layout_type:
|
||||
parser = lambda line: [ int(x) for x in line.split() ]
|
||||
else:
|
||||
parser = lambda line: [ float(x) for x in line.split() ]
|
||||
|
||||
# Format output data.
|
||||
formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
|
||||
|
||||
for interval in self.intervals():
|
||||
print "Processing", self.interval_string(interval)
|
||||
with inserter(self.dest.path,
|
||||
interval.start, interval.end) as insert_ctx:
|
||||
src_array = []
|
||||
for line in extractor(self.src.path,
|
||||
interval.start, interval.end):
|
||||
# Read in data
|
||||
src_array.append([ float(x) for x in line.split() ])
|
||||
|
||||
if len(src_array) == rows:
|
||||
# Pass through filter function
|
||||
dest_array = function(src_array, *args)
|
||||
|
||||
# Write result to destination
|
||||
out = [ formatter(row) for row in dest_array ]
|
||||
insert_ctx.insert("".join(out))
|
||||
|
||||
# Clear source array
|
||||
src_array = []
|
||||
|
||||
# Take care of partial chunk
|
||||
if len(src_array) and partial:
|
||||
dest_array = function(src_array, *args)
|
||||
out = [ formatter(row) for row in dest_array ]
|
||||
insert_ctx.insert("".join(out))
|
||||
|
||||
# Like process_python, but provides Numpy arrays and allows for
|
||||
# partial processing.
|
||||
def process_numpy(self, function, args = None, rows = 100000):
|
||||
"""For all intervals that exist in self.src but don't exist in
|
||||
self.dest, call 'function' with a Numpy array corresponding to
|
||||
the data. The data is converted to a Numpy array in chunks of
|
||||
'rows' rows at a time.
|
||||
|
||||
'function' should be defined as:
|
||||
def function(data, interval, args, insert_func, final)
|
||||
|
||||
'data': array of data to process -- may be empty
|
||||
|
||||
'interval': overall interval we're processing (but not necessarily
|
||||
the interval of this particular chunk of data)
|
||||
|
||||
'args': opaque arguments passed to process_numpy
|
||||
|
||||
'insert_func': function to call in order to insert array of data.
|
||||
Should be passed a 2-dimensional array of data to insert.
|
||||
Data timestamps must be within the provided interval.
|
||||
|
||||
'final': True if this is the last bit of data for this
|
||||
contiguous interval, False otherwise.
|
||||
|
||||
Return value of 'function' is the number of data rows processed.
|
||||
Unprocessed data will be provided again in a subsequent call
|
||||
(unless 'final' is True).
|
||||
"""
|
||||
if args is None:
|
||||
args = []
|
||||
extractor = Client(self.src.url).stream_extract
|
||||
inserter = Client(self.dest.url).stream_insert_context
|
||||
|
||||
# Format output data.
|
||||
formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
|
||||
|
||||
def batch(iterable, size):
|
||||
c = itertools.count()
|
||||
for k, g in itertools.groupby(iterable, lambda x: c.next() // size):
|
||||
yield g
|
||||
|
||||
for interval in self.intervals():
|
||||
print "Processing", self.interval_string(interval)
|
||||
with inserter(self.dest.path,
|
||||
interval.start, interval.end) as insert_ctx:
|
||||
def insert_function(array):
|
||||
s = cStringIO.StringIO()
|
||||
if len(np.shape(array)) != 2:
|
||||
raise Exception("array must be 2-dimensional")
|
||||
np.savetxt(s, array)
|
||||
insert_ctx.insert(s.getvalue())
|
||||
|
||||
extract = extractor(self.src.path, interval.start, interval.end)
|
||||
old_array = np.array([])
|
||||
for batched in batch(extract, rows):
|
||||
# Read in this batch of data
|
||||
new_array = np.loadtxt(batched)
|
||||
|
||||
# If we still had old data left, combine it
|
||||
if old_array.shape[0] != 0:
|
||||
array = np.vstack((old_array, new_array))
|
||||
else:
|
||||
array = new_array
|
||||
|
||||
# Pass it to the process function
|
||||
processed = function(array, interval, args,
|
||||
insert_function, False)
|
||||
|
||||
# Send any pending data
|
||||
insert_ctx.send()
|
||||
|
||||
# Save the unprocessed parts
|
||||
if processed >= 0:
|
||||
old_array = array[processed:]
|
||||
else:
|
||||
raise Exception(
|
||||
sprintf("%s return value %s must be >= 0",
|
||||
str(function), str(processed)))
|
||||
|
||||
# Warn if there's too much data remaining
|
||||
if old_array.shape[0] > 3 * rows:
|
||||
printf("warning: %d unprocessed rows in buffer\n",
|
||||
old_array.shape[0])
|
||||
|
||||
# Last call for this contiguous interval
|
||||
if old_array.shape[0] != 0:
|
||||
function(old_array, interval, args, insert_function, True)
|
||||
|
||||
def main(argv = None):
|
||||
# This is just a dummy function; actual filters can use the other
|
||||
# functions to prepare stuff, and then do something with the data.
|
||||
f = Filter()
|
||||
parser = f.setup_parser()
|
||||
args = f.parse_args(argv)
|
||||
for i in f.intervals():
|
||||
print "Generic filter: need to handle", f.interval_string(i)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@@ -2,8 +2,11 @@
|
||||
|
||||
import nilmdb.client
|
||||
from nilmdb.utils.printf import *
|
||||
from nilmdb.utils.time import parse_time, format_time
|
||||
from nilmdb.utils.time import (parse_time, timestamp_to_human,
|
||||
timestamp_to_seconds, seconds_to_timestamp,
|
||||
rate_to_period, now as time_now)
|
||||
|
||||
import nilmtools
|
||||
import time
|
||||
import sys
|
||||
import re
|
||||
@@ -15,8 +18,11 @@ class ParseError(Exception):
|
||||
msg = filename + ": " + error
|
||||
super(ParseError, self).__init__(msg)
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser(description = """\
|
||||
def parse_args(argv = None):
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class = argparse.RawDescriptionHelpFormatter,
|
||||
version = nilmtools.__version__,
|
||||
description = """\
|
||||
Insert data from ethstream, either live (using the system time as a
|
||||
reference) or prerecorded (using comments in the file as a reference).
|
||||
|
||||
@@ -24,28 +30,29 @@ def parse_args():
|
||||
Small discrepencies between the accumulated timestamps and the
|
||||
reference time are ignored; larger discrepencies cause gaps to be
|
||||
created in the stream. Overlapping data returns an error.
|
||||
""", formatter_class = argparse.RawDescriptionHelpFormatter)
|
||||
""")
|
||||
parser.add_argument("-u", "--url", action="store",
|
||||
default="http://localhost:12380/",
|
||||
default="http://localhost/nilmdb/",
|
||||
help="NilmDB server URL (default: %(default)s)")
|
||||
parser.add_argument("-r", "--rate", action="store", default=8000, type=float,
|
||||
parser.add_argument("-r", "--rate", action="store", default=8000,
|
||||
type=float,
|
||||
help="Data rate in Hz (default: %(default)s)")
|
||||
parser.add_argument("-l", "--live", action="store_true",
|
||||
help="Live capture; use system time to verify rate")
|
||||
parser.add_argument("path", action="store",
|
||||
help="Path of stream, e.g. /foo/bar")
|
||||
parser.add_argument("infile", type=argparse.FileType('r'), nargs='*',
|
||||
default=[sys.stdin], help="Input files (default: stdin)")
|
||||
args = parser.parse_args()
|
||||
default=[sys.stdin],
|
||||
help="Input files (default: stdin)")
|
||||
args = parser.parse_args(argv)
|
||||
|
||||
printf("Stream path: %s\n", args.path)
|
||||
printf(" Data rate: %s Hz\n", repr(args.rate))
|
||||
|
||||
return args
|
||||
|
||||
def main(args = None):
|
||||
if args is None:
|
||||
args = parse_args()
|
||||
def main(argv = None):
|
||||
args = parse_args(argv)
|
||||
|
||||
client = nilmdb.client.Client(args.url)
|
||||
|
||||
@@ -55,20 +62,22 @@ def main(args = None):
|
||||
# data_ts is the timestamp that we'll use for the current line
|
||||
data_ts_base = 0
|
||||
data_ts_inc = 0
|
||||
data_ts_step = 1.0 / args.rate
|
||||
data_ts_rate = args.rate
|
||||
|
||||
# clock_ts is the imprecise "real" timestamp (from the filename,
|
||||
# comments, or or system clock)
|
||||
clock_ts = None
|
||||
|
||||
def print_clock_updated():
|
||||
printf("Clock time updated to %s\n", format_time(clock_ts))
|
||||
printf("Clock time updated to %s\n", timestamp_to_human(clock_ts))
|
||||
if data_ts_base != 0:
|
||||
diff = data_ts - clock_ts
|
||||
if diff >= 0:
|
||||
printf(" (data timestamp ahead by %.6f sec)\n", diff)
|
||||
printf(" (data timestamp ahead by %.6f sec)\n",
|
||||
timestamp_to_seconds(diff))
|
||||
else:
|
||||
printf(" (data timestamp behind by %.6f sec)\n", -diff)
|
||||
printf(" (data timestamp behind by %.6f sec)\n",
|
||||
timestamp_to_seconds(-diff))
|
||||
|
||||
with client.stream_insert_context(args.path) as stream:
|
||||
for f in args.infile:
|
||||
@@ -86,7 +95,7 @@ def main(args = None):
|
||||
# Subtract 1 hour because files are created at the end
|
||||
# of the hour. Hopefully, we'll be able to use
|
||||
# internal comments and this value won't matter anyway.
|
||||
clock_ts = parse_time(filename).totimestamp() - 3600
|
||||
clock_ts = parse_time(filename) - seconds_to_timestamp(3600)
|
||||
print_clock_updated()
|
||||
except ValueError:
|
||||
pass
|
||||
@@ -95,7 +104,8 @@ def main(args = None):
|
||||
|
||||
# Read each line
|
||||
for line in f:
|
||||
data_ts = data_ts_base + data_ts_inc * data_ts_step
|
||||
data_ts = data_ts_base + rate_to_period(data_ts_rate,
|
||||
data_ts_inc)
|
||||
|
||||
# If no content other than the newline, skip it
|
||||
if len(line) <= 1:
|
||||
@@ -104,7 +114,7 @@ def main(args = None):
|
||||
# If line starts with a comment, look for a timestamp
|
||||
if line[0] == '#':
|
||||
try:
|
||||
clock_ts = parse_time(line[1:]).totimestamp()
|
||||
clock_ts = parse_time(line[1:])
|
||||
print_clock_updated()
|
||||
except ValueError:
|
||||
pass
|
||||
@@ -112,30 +122,30 @@ def main(args = None):
|
||||
|
||||
# If inserting live, use clock timestamp
|
||||
if live:
|
||||
clock_ts = time.time()
|
||||
clock_ts = time_now()
|
||||
|
||||
# If we have a real timestamp, compare it to the data
|
||||
# timestamp, and make sure things match up.
|
||||
if clock_ts is not None:
|
||||
if (data_ts - 10) > clock_ts:
|
||||
if (data_ts - seconds_to_timestamp(10)) > clock_ts:
|
||||
# Accumulated line timestamps are in the future.
|
||||
# If we were to set data_ts=clock_ts, we'd create
|
||||
# an overlap, so we have to just bail out here.
|
||||
err = sprintf("Data is coming in too fast: data time "
|
||||
"is %s but clock time is only %s",
|
||||
format_time(data_ts),
|
||||
format_time(clock_ts))
|
||||
timestamp_to_human(data_ts),
|
||||
timestamp_to_human(clock_ts))
|
||||
raise ParseError(filename, err)
|
||||
|
||||
if (data_ts + 10) < clock_ts:
|
||||
if (data_ts + seconds_to_timestamp(10)) < clock_ts:
|
||||
# Accumulated line timetamps are in the past. We
|
||||
# can just skip some time and leave a gap in the
|
||||
# data.
|
||||
if data_ts_base != 0:
|
||||
printf("Skipping data timestamp forward from "
|
||||
"%s to %s to match clock time\n",
|
||||
format_time(data_ts),
|
||||
format_time(clock_ts))
|
||||
timestamp_to_human(data_ts),
|
||||
timestamp_to_human(clock_ts))
|
||||
stream.finalize()
|
||||
data_ts_base = data_ts = clock_ts
|
||||
data_ts_inc = 0
|
||||
@@ -160,7 +170,7 @@ def main(args = None):
|
||||
continue
|
||||
|
||||
# Insert it
|
||||
stream.insert("%.6f %s" % (data_ts, line))
|
||||
stream.insert("%d %s" % (data_ts, line))
|
||||
print "Done"
|
||||
|
||||
if __name__ == "__main__":
|
126
src/prep.py
Executable file
126
src/prep.py
Executable file
@@ -0,0 +1,126 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Spectral envelope preprocessor.
|
||||
# Requires two streams as input: the original raw data, and sinefit data.
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
from numpy import *
|
||||
import scipy.fftpack
|
||||
import scipy.signal
|
||||
from matplotlib import pyplot as p
|
||||
import bisect
|
||||
|
||||
def main(argv = None):
|
||||
# Set up argument parser
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths = True)
|
||||
group = parser.add_argument_group("Prep options")
|
||||
group.add_argument("-c", "--column", action="store", type=int,
|
||||
help="Column number (first data column is 1)")
|
||||
group.add_argument("-n", "--nharm", action="store", type=int, default=4,
|
||||
help="number of odd harmonics to compute")
|
||||
exc = group.add_mutually_exclusive_group()
|
||||
exc.add_argument("-r", "--rotate", action="store", type=float,
|
||||
help="rotate FFT output by this many degrees")
|
||||
exc.add_argument("-R", "--rotate-rad", action="store", type=float,
|
||||
help="rotate FFT output by this many radians")
|
||||
|
||||
group.add_argument("srcpath", action="store",
|
||||
help="Path of raw input, e.g. /foo/raw")
|
||||
group.add_argument("sinepath", action="store",
|
||||
help="Path of sinefit input, e.g. /foo/sinefit")
|
||||
group.add_argument("destpath", action="store",
|
||||
help="Path of prep output, e.g. /foo/prep")
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
rec = "float32_%d" % (e.parsed_args.nharm * 2)
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
|
||||
raise SystemExit(1)
|
||||
|
||||
# Check arguments
|
||||
if args.column is None or args.column < 1:
|
||||
parser.error("need a column number >= 1")
|
||||
|
||||
if args.nharm < 1 or args.nharm > 32:
|
||||
parser.error("number of odd harmonics must be 1-32")
|
||||
|
||||
if args.rotate is not None:
|
||||
rotation = args.rotate * 2.0 * pi / 360.0
|
||||
else:
|
||||
rotation = args.rotate_rad or 0.0
|
||||
|
||||
# Check the sine fit stream
|
||||
client_sinefit = nilmdb.client.Client(args.url)
|
||||
sinefit = nilmtools.filter.get_stream_info(client_sinefit, args.sinepath)
|
||||
if not sinefit:
|
||||
raise Exception("sinefit data not found")
|
||||
if sinefit.layout != "float32_3":
|
||||
raise Exception("sinefit data type is " + sinefit.layout
|
||||
+ "; expected float32_3")
|
||||
|
||||
# Check and set metadata in prep stream
|
||||
f.check_dest_metadata({ "prep_raw_source": f.src.path,
|
||||
"prep_sinefit_source": sinefit.path,
|
||||
"prep_column": args.column })
|
||||
|
||||
# Run the processing function on all data
|
||||
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
|
||||
args.nharm, rotation))
|
||||
|
||||
def process(data, interval, args, insert_function, final):
|
||||
(client, sinefit_path, column, nharm, rotation) = args
|
||||
rows = data.shape[0]
|
||||
data_timestamps = data[:,0]
|
||||
|
||||
processed = 0
|
||||
out = zeros((1, nharm * 2 + 1))
|
||||
# Pull out sinefit data for the entire time range of this block
|
||||
for sinefit_line in client.stream_extract(sinefit_path,
|
||||
data[0, 0], data[rows-1, 0]):
|
||||
# Extract sinefit data to get zero crossing timestamps
|
||||
(t_min, f0, A, C) = [ float(x) for x in sinefit_line.split() ]
|
||||
t_max = t_min + 1e6 / f0
|
||||
|
||||
# Find the indices of data that correspond to (t_min, t_max)
|
||||
idx_min = bisect.bisect_left(data_timestamps, t_min)
|
||||
idx_max = bisect.bisect_left(data_timestamps, t_max)
|
||||
if idx_min >= idx_max:
|
||||
# something's wonky; ignore this period
|
||||
continue
|
||||
if idx_max >= len(data_timestamps):
|
||||
# max is likely past the end of our chunk, so stop
|
||||
# processing this chunk now.
|
||||
break
|
||||
|
||||
# Perform FFT over those indices
|
||||
N = idx_max - idx_min
|
||||
d = data[idx_min:idx_max, column]
|
||||
F = scipy.fftpack.fft(d) / N
|
||||
|
||||
# If we wanted more harmonics than we have, pad with zeros
|
||||
if N < (nharm * 2):
|
||||
F = r_[F, zeros(nharm * 2 - N)]
|
||||
|
||||
# Fill output data
|
||||
out[0, 0] = t_min
|
||||
for k in range(nharm):
|
||||
Fk = F[2 * k + 1] * e**(rotation * 1j * k)
|
||||
out[0, 2 * k + 1] = -imag(Fk) # Pk
|
||||
out[0, 2 * k + 2] = real(Fk) # Qk
|
||||
|
||||
# Insert it and continue
|
||||
insert_function(out)
|
||||
processed = idx_max
|
||||
|
||||
print "Processed", processed, "of", rows, "rows"
|
||||
return processed
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
187
src/sinefit.py
Executable file
187
src/sinefit.py
Executable file
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
# Sine wave fitting. This runs about 5x faster than realtime on raw data.
|
||||
|
||||
import nilmtools.filter
|
||||
import nilmdb.client
|
||||
from numpy import *
|
||||
from scipy import *
|
||||
#import pylab as p
|
||||
import operator
|
||||
|
||||
def main(argv = None):
|
||||
f = nilmtools.filter.Filter()
|
||||
parser = f.setup_parser("Sine wave fitting")
|
||||
group = parser.add_argument_group("Sine fit options")
|
||||
group.add_argument('-c', '--column', action='store', type=int,
|
||||
help='Column number (first data column is 1)')
|
||||
group.add_argument('-f', '--frequency', action='store', type=float,
|
||||
default=60.0,
|
||||
help='Approximate frequency (default: %(default)s)')
|
||||
|
||||
# Parse arguments
|
||||
try:
|
||||
args = f.parse_args(argv)
|
||||
except nilmtools.filter.MissingDestination as e:
|
||||
rec = "float32_3"
|
||||
print "Source is %s (%s)" % (e.src.path, e.src.layout)
|
||||
print "Destination %s doesn't exist" % (e.dest.path)
|
||||
print "You could make it with a command like:"
|
||||
print " nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
|
||||
raise SystemExit(1)
|
||||
|
||||
if args.column is None or args.column < 1:
|
||||
parser.error("need a column number >= 1")
|
||||
if args.frequency < 0.1:
|
||||
parser.error("frequency must be >= 0.1")
|
||||
|
||||
f.check_dest_metadata({ "sinefit_source": f.src.path,
|
||||
"sinefit_column": args.column })
|
||||
f.process_numpy(process, args = (args.column, args.frequency))
|
||||
|
||||
def process(data, interval, args, insert_function, final):
|
||||
(column, f_expected) = args
|
||||
rows = data.shape[0]
|
||||
|
||||
# Estimate sampling frequency from timestamps
|
||||
fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
|
||||
|
||||
# Pull out about 3.5 periods of data at once;
|
||||
# we'll expect to match 3 zero crossings in each window
|
||||
N = max(int(3.5 * fs / f_expected), 10)
|
||||
|
||||
# If we don't have enough data, don't bother processing it
|
||||
if rows < N:
|
||||
return 0
|
||||
|
||||
# Process overlapping windows
|
||||
start = 0
|
||||
num_zc = 0
|
||||
while start < (rows - N):
|
||||
this = data[start:start+N, column]
|
||||
t_min = data[start, 0]/1e6
|
||||
t_max = data[start+N-1, 0]/1e6
|
||||
|
||||
# Do 4-parameter sine wave fit
|
||||
(A, f0, phi, C) = sfit4(this, fs)
|
||||
|
||||
# Check bounds. If frequency is too crazy, ignore this window
|
||||
if f0 < (f_expected/2) or f0 > (f_expected*2):
|
||||
print "frequency", f0, "too far from expected value", f_expected
|
||||
start += N
|
||||
continue
|
||||
|
||||
#p.plot(arange(N), this)
|
||||
#p.plot(arange(N), A * cos(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
|
||||
|
||||
# Period starts when the argument of cosine is 3*pi/2 degrees,
|
||||
# so we're looking for sample number:
|
||||
# n = (3 * pi / 2 - phi) / (f0/fs * 2 * pi)
|
||||
zc_n = (3 * pi / 2 - phi) / (f0 / fs * 2 * pi)
|
||||
period_n = fs/f0
|
||||
|
||||
# Add periods to make N positive
|
||||
while zc_n < 0:
|
||||
zc_n += period_n
|
||||
|
||||
last_zc = None
|
||||
# Mark the zero crossings until we're a half period away
|
||||
# from the end of the window
|
||||
while zc_n < (N - period_n/2):
|
||||
#p.plot(zc_n, C, 'ro')
|
||||
t = t_min + zc_n / fs
|
||||
insert_function([[t * 1e6, f0, A, C]])
|
||||
num_zc += 1
|
||||
last_zc = zc_n
|
||||
zc_n += period_n
|
||||
|
||||
# Advance the window one quarter period past the last marked
|
||||
# zero crossing, or advance the window by half its size if we
|
||||
# didn't mark any.
|
||||
if last_zc is not None:
|
||||
advance = min(last_zc + period_n/4, N)
|
||||
else:
|
||||
advance = N/2
|
||||
#p.plot(advance, C, 'go')
|
||||
#p.show()
|
||||
|
||||
start = int(round(start + advance))
|
||||
|
||||
# Return the number of rows we've processed
|
||||
print "Marked", num_zc, "zero-crossings in", start, "rows"
|
||||
return start
|
||||
|
||||
def sfit4(data, fs):
|
||||
"""(A, f0, phi, C) = sfit4(data, fs)
|
||||
|
||||
Compute 4-parameter (unknown-frequency) least-squares fit to
|
||||
sine-wave data, according to IEEE Std 1241-2010 Annex B
|
||||
|
||||
Input:
|
||||
data vector of input samples
|
||||
fs sampling rate (Hz)
|
||||
|
||||
Output:
|
||||
Parameters [A, f0, phi, C] to fit the equation
|
||||
x[n] = A * cos(f0/fs * 2 * pi * n + phi) + C
|
||||
where n is sample number. Or, as a function of time:
|
||||
x(t) = A * cos(f0 * 2 * pi * t + phi) + C
|
||||
|
||||
by Jim Paris
|
||||
(Verified to match sfit4.m)
|
||||
"""
|
||||
N = len(data)
|
||||
t = linspace(0, (N-1) / fs, N)
|
||||
|
||||
## Estimate frequency using FFT (step b)
|
||||
Fc = fft(data)
|
||||
F = abs(Fc)
|
||||
F[0] = 0 # eliminate DC
|
||||
|
||||
# Find pair of spectral lines with largest amplitude:
|
||||
# resulting values are in F(i) and F(i+1)
|
||||
i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
|
||||
|
||||
# Interpolate FFT to get a better result (from Markus [B37])
|
||||
U1 = real(Fc[i])
|
||||
U2 = real(Fc[i+1])
|
||||
V1 = imag(Fc[i])
|
||||
V2 = imag(Fc[i+1])
|
||||
n = 2 * pi / N
|
||||
ni1 = n * i
|
||||
ni2 = n * (i+1)
|
||||
K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
|
||||
Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
|
||||
Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
|
||||
i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
|
||||
|
||||
# Convert to Hz
|
||||
f0 = i * fs / N
|
||||
|
||||
## Fit it
|
||||
# first guess for A0, B0 using 3-parameter fit (step c)
|
||||
w = 2*pi*f0
|
||||
D = c_[cos(w*t), sin(w*t), ones(N)]
|
||||
s = linalg.lstsq(D, data)[0]
|
||||
|
||||
# Now iterate 6 times (step i)
|
||||
for idx in range(6):
|
||||
D = c_[cos(w*t), sin(w*t), ones(N),
|
||||
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
|
||||
s = linalg.lstsq(D, data)[0] # eqn B.18
|
||||
w = w + s[3] # update frequency estimate
|
||||
|
||||
## Extract results
|
||||
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
|
||||
f0 = w / (2*pi)
|
||||
try:
|
||||
phi = -arctan2(s[1], s[0]) # eqn B.22
|
||||
except TypeError:
|
||||
# something broke down, just return zeros
|
||||
return (0, 0, 0, 0)
|
||||
C = s[2]
|
||||
|
||||
return (A, f0, phi, C)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Reference in New Issue
Block a user