Browse Source

Fix flake8 errors throughout code

This found a small number of real bugs too, for example,
this one that looked weird because of a 2to3 conversion,
but was wrong both before and after:
-        except IndexError as TypeError:
+        except (IndexError, TypeError):
tags/nilmtools-2.0.0^0
Jim Paris 3 years ago
parent
commit
cfc66b6847
14 changed files with 265 additions and 220 deletions
  1. +27
    -19
      nilmtools/cleanup.py
  2. +3
    -4
      nilmtools/copy_one.py
  3. +7
    -6
      nilmtools/copy_wildcard.py
  4. +11
    -10
      nilmtools/decimate.py
  5. +13
    -10
      nilmtools/decimate_auto.py
  6. +51
    -48
      nilmtools/filter.py
  7. +10
    -7
      nilmtools/insert.py
  8. +19
    -12
      nilmtools/math.py
  9. +12
    -9
      nilmtools/median.py
  10. +32
    -26
      nilmtools/pipewatch.py
  11. +23
    -21
      nilmtools/prep.py
  12. +26
    -24
      nilmtools/sinefit.py
  13. +30
    -24
      nilmtools/trainola.py
  14. +1
    -0
      setup.cfg

+ 27
- 19
nilmtools/cleanup.py View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
from nilmdb.utils.printf import printf, fprintf, sprintf
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp) timestamp_to_seconds, seconds_to_timestamp)
from nilmdb.utils.diskusage import human_size from nilmdb.utils.diskusage import human_size
from nilmdb.utils.interval import Interval from nilmdb.utils.interval import Interval
@@ -16,15 +16,17 @@ import fnmatch
import re import re
import os import os



def warn(msg, *args): def warn(msg, *args):
fprintf(sys.stderr, "warning: " + msg + "\n", *args) fprintf(sys.stderr, "warning: " + msg + "\n", *args)



class TimePeriod(object): class TimePeriod(object):
_units = { 'h': ('hour', 60*60),
'd': ('day', 60*60*24),
'w': ('week', 60*60*24*7),
'm': ('month', 60*60*24*30),
'y': ('year', 60*60*24*365) }
_units = {'h': ('hour', 60*60),
'd': ('day', 60*60*24),
'w': ('week', 60*60*24*7),
'm': ('month', 60*60*24*30),
'y': ('year', 60*60*24*365)}


def __init__(self, val): def __init__(self, val):
for u in self._units: for u in self._units:
@@ -50,6 +52,7 @@ class TimePeriod(object):
def __str__(self): def __str__(self):
return self.describe_seconds(self.seconds()) return self.describe_seconds(self.seconds())



class StreamCleanupConfig(object): class StreamCleanupConfig(object):
def __init__(self, info): def __init__(self, info):
self.path = info[0] self.path = info[0]
@@ -63,10 +66,11 @@ class StreamCleanupConfig(object):
self.decimated_from = None self.decimated_from = None
self.also_clean_paths = [] self.also_clean_paths = []


def main(argv = None):

def main(argv=None):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = """\
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Clean up old data from streams using a configuration file to specify Clean up old data from streams using a configuration file to specify
which data to remove. which data to remove.


@@ -99,10 +103,10 @@ def main(argv = None):
parser.add_argument("-u", "--url", action="store", default=def_url, parser.add_argument("-u", "--url", action="store", default=def_url,
help="NilmDB server URL (default: %(default)s)") help="NilmDB server URL (default: %(default)s)")
parser.add_argument("-y", "--yes", action="store_true", parser.add_argument("-y", "--yes", action="store_true",
default = False,
default=False,
help="Actually remove the data (default: no)") help="Actually remove the data (default: no)")
parser.add_argument("-e", "--estimate", action="store_true", parser.add_argument("-e", "--estimate", action="store_true",
default = False,
default=False,
help="Estimate how much disk space will be used") help="Estimate how much disk space will be used")
parser.add_argument("configfile", type=argparse.FileType('r'), parser.add_argument("configfile", type=argparse.FileType('r'),
help="Configuration file") help="Configuration file")
@@ -114,7 +118,7 @@ def main(argv = None):


# List all streams # List all streams
client = nilmdb.client.Client(args.url) client = nilmdb.client.Client(args.url)
streamlist = client.stream_list(extended = True)
streamlist = client.stream_list(extended=True)


# Create config objects # Create config objects
streams = collections.OrderedDict() streams = collections.OrderedDict()
@@ -189,7 +193,7 @@ def main(argv = None):
printf("%17s: %s per row, %s rows per second\n", printf("%17s: %s per row, %s rows per second\n",
"base rate", "base rate",
human_size(per_row), human_size(per_row),
round(rate,1))
round(rate, 1))
printf("%17s: %s per hour, %s per day\n", printf("%17s: %s per hour, %s per day\n",
"base size", "base size",
human_size(per_sec * 3600), human_size(per_sec * 3600),
@@ -204,7 +208,9 @@ def main(argv = None):
# sum_{k=0..inf} (rate / (n^k)) * d_dtype.itemsize # sum_{k=0..inf} (rate / (n^k)) * d_dtype.itemsize
d_per_row = d_dtype.itemsize d_per_row = d_dtype.itemsize
factor = 4.0 factor = 4.0
d_per_sec = d_per_row * (rate / factor) * (1 / (1 - (1/factor)))
d_per_sec = (d_per_row *
(rate / factor) *
(1 / (1 - (1/factor))))
per_sec += d_per_sec per_sec += d_per_sec
printf("%17s: %s per hour, %s per day\n", printf("%17s: %s per hour, %s per day\n",
"with decimation", "with decimation",
@@ -224,8 +230,8 @@ def main(argv = None):
printf("%s: keep %s\n", path, streams[path].keep) printf("%s: keep %s\n", path, streams[path].keep)


# Figure out the earliest timestamp we should keep. # Figure out the earliest timestamp we should keep.
intervals = [ Interval(start, end) for (start, end) in
reversed(list(client.stream_intervals(path))) ]
intervals = [Interval(start, end) for (start, end) in
reversed(list(client.stream_intervals(path)))]
total = 0 total = 0
keep = seconds_to_timestamp(streams[path].keep.seconds()) keep = seconds_to_timestamp(streams[path].keep.seconds())
for i in intervals: for i in intervals:
@@ -239,12 +245,13 @@ def main(argv = None):
streams[path].keep.describe_seconds( streams[path].keep.describe_seconds(
timestamp_to_seconds(total))) timestamp_to_seconds(total)))
continue continue
printf(" removing data before %s\n", timestamp_to_human(remove_before))
printf(" removing data before %s\n",
timestamp_to_human(remove_before))
# Clean in reverse order. Since we only use the primary stream and not # Clean in reverse order. Since we only use the primary stream and not
# the decimated streams to figure out which data to remove, removing # the decimated streams to figure out which data to remove, removing
# the primary stream last means that we might recover more nicely if # the primary stream last means that we might recover more nicely if
# we are interrupted and restarted. # we are interrupted and restarted.
clean_paths = list(reversed(streams[path].also_clean_paths)) + [ path ]
clean_paths = list(reversed(streams[path].also_clean_paths)) + [path]
for p in clean_paths: for p in clean_paths:
printf(" removing from %s\n", p) printf(" removing from %s\n", p)
if args.yes: if args.yes:
@@ -255,5 +262,6 @@ def main(argv = None):
printf("Note: specify --yes to actually perform removals\n") printf("Note: specify --yes to actually perform removals\n")
return return



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 3
- 4
nilmtools/copy_one.py View File

@@ -4,12 +4,10 @@
# the Python standard library. # the Python standard library.


import nilmtools.filter import nilmtools.filter
import nilmdb.client
from nilmdb.client.numpyclient import NumpyClient from nilmdb.client.numpyclient import NumpyClient
import numpy as np
import sys


def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
parser = f.setup_parser("Copy a stream") parser = f.setup_parser("Copy a stream")
parser.add_argument('-n', '--nometa', action='store_true', parser.add_argument('-n', '--nometa', action='store_true',
@@ -40,5 +38,6 @@ def main(argv = None):
for data in extractor(f.src.path, i.start, i.end): for data in extractor(f.src.path, i.start, i.end):
insert_ctx.insert(data) insert_ctx.insert(data)



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 7
- 6
nilmtools/copy_wildcard.py View File

@@ -5,17 +5,17 @@
import nilmtools.filter import nilmtools.filter
import nilmtools.copy_one import nilmtools.copy_one
import nilmdb.client import nilmdb.client
import argparse
import fnmatch import fnmatch


def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
# Reuse filter's parser, since it handles most options we need. # Reuse filter's parser, since it handles most options we need.
parser = f.setup_parser(description = """\
parser = f.setup_parser(description="""\
Copy all streams matching the given wildcard from one host to another. Copy all streams matching the given wildcard from one host to another.


Example: %(prog)s -u http://host1/nilmdb -U http://host2/nilmdb /sharon/* Example: %(prog)s -u http://host1/nilmdb -U http://host2/nilmdb /sharon/*
""", skip_paths = True)
""", skip_paths=True)
parser.add_argument('-n', '--nometa', action='store_true', parser.add_argument('-n', '--nometa', action='store_true',
help="Don't copy or check metadata") help="Don't copy or check metadata")
parser.add_argument("path", action="store", nargs="+", parser.add_argument("path", action="store", nargs="+",
@@ -35,7 +35,7 @@ def main(argv = None):
# Find matching streams # Find matching streams
matched = [] matched = []
for path in args.path: for path in args.path:
matched.extend([s for s in client_src.stream_list(extended = True)
matched.extend([s for s in client_src.stream_list(extended=True)
if fnmatch.fnmatch(s[0], path) if fnmatch.fnmatch(s[0], path)
and s not in matched]) and s not in matched])


@@ -51,7 +51,7 @@ def main(argv = None):
# invoked from the command line. # invoked from the command line.
for stream in matched: for stream in matched:
new_argv = ["--url", client_src.geturl(), new_argv = ["--url", client_src.geturl(),
"--dest-url", client_dest.geturl() ]
"--dest-url", client_dest.geturl()]
if args.start: if args.start:
new_argv.extend(["--start", "@" + repr(args.start)]) new_argv.extend(["--start", "@" + repr(args.start)])
if args.end: if args.end:
@@ -70,5 +70,6 @@ def main(argv = None):
if e.code != 0: # pragma: no cover (shouldn't happen) if e.code != 0: # pragma: no cover (shouldn't happen)
raise raise



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 11
- 10
nilmtools/decimate.py View File

@@ -1,11 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


import nilmtools.filter import nilmtools.filter
import nilmdb.client
import numpy as np import numpy as np
import operator


def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
parser = f.setup_parser("Decimate a stream") parser = f.setup_parser("Decimate a stream")
group = parser.add_argument_group("Decimate options") group = parser.add_argument_group("Decimate options")
@@ -38,15 +37,16 @@ def main(argv = None):
if not (args.factor >= 2): if not (args.factor >= 2):
raise Exception("factor needs to be 2 or more") raise Exception("factor needs to be 2 or more")


f.check_dest_metadata({ "decimate_source": f.src.path,
"decimate_factor": args.factor })
f.check_dest_metadata({"decimate_source": f.src.path,
"decimate_factor": args.factor})


# If source is decimated, we have to decimate a bit differently # If source is decimated, we have to decimate a bit differently
if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath): if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
again = True again = True
else: else:
again = False again = False
f.process_numpy(decimate, args = (args.factor, again))
f.process_numpy(decimate, args=(args.factor, again))



def decimate(data, interval, args, insert_function, final): def decimate(data, interval, args, insert_function, final):
"""Decimate data""" """Decimate data"""
@@ -70,18 +70,19 @@ def decimate(data, interval, args, insert_function, final):


# Discard extra rows that aren't a multiple of factor # Discard extra rows that aren't a multiple of factor
n = n // factor * factor n = n // factor * factor
data = data[:n,:]
data = data[:n, :]


# Reshape it into 3D so we can process 'factor' rows at a time # Reshape it into 3D so we can process 'factor' rows at a time
data = data.reshape(n // factor, factor, m) data = data.reshape(n // factor, factor, m)


# Fill the result # Fill the result
out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
np.min(data[:,:,min_col], axis=1),
np.max(data[:,:,max_col], axis=1) ]
out = np.c_[np.mean(data[:, :, mean_col], axis=1),
np.min(data[:, :, min_col], axis=1),
np.max(data[:, :, max_col], axis=1)]


insert_function(out) insert_function(out)
return n return n



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 13
- 10
nilmtools/decimate_auto.py View File

@@ -7,10 +7,11 @@ import nilmdb.client
import argparse import argparse
import fnmatch import fnmatch


def main(argv = None):

def main(argv=None):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = """\
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Automatically create multiple decimations from a single source Automatically create multiple decimations from a single source
stream, continuing until the last decimated level contains fewer stream, continuing until the last decimated level contains fewer
than 500 points total. than 500 points total.
@@ -44,8 +45,8 @@ def main(argv = None):
client = nilmdb.client.Client(args.url) client = nilmdb.client.Client(args.url)


# Find list of paths to process # Find list of paths to process
streams = [ str(s[0]) for s in client.stream_list() ]
streams = [ s for s in streams if "~decim-" not in s ]
streams = [str(s[0]) for s in client.stream_list()]
streams = [s for s in streams if "~decim-" not in s]
paths = [] paths = []
for path in args.path: for path in args.path:
new = fnmatch.filter(streams, str(path)) new = fnmatch.filter(streams, str(path))
@@ -57,6 +58,7 @@ def main(argv = None):
for path in paths: for path in paths:
do_decimation(client, args, path) do_decimation(client, args, path)



def do_decimation(client, args, path): def do_decimation(client, args, path):
print("Decimating", path) print("Decimating", path)
info = nilmtools.filter.get_stream_info(client, path) info = nilmtools.filter.get_stream_info(client, path)
@@ -71,8 +73,8 @@ def do_decimation(client, args, path):


# Figure out the type we should use for decimated streams # Figure out the type we should use for decimated streams
if ('int32' in info.layout_type or if ('int32' in info.layout_type or
'int64' in info.layout_type or
'float64' in info.layout_type):
'int64' in info.layout_type or
'float64' in info.layout_type):
decimated_type = 'float64_' + str(info.layout_count * 3) decimated_type = 'float64_' + str(info.layout_count * 3)
else: else:
decimated_type = 'float32_' + str(info.layout_count * 3) decimated_type = 'float32_' + str(info.layout_count * 3)
@@ -93,10 +95,10 @@ def do_decimation(client, args, path):
client.stream_create(new_path, decimated_type) client.stream_create(new_path, decimated_type)


# Run the decimation as if it were run from the commandline # Run the decimation as if it were run from the commandline
new_argv = [ "-u", args.url,
"-f", str(args.factor) ]
new_argv = ["-u", args.url,
"-f", str(args.factor)]
if args.force_metadata: if args.force_metadata:
new_argv.extend([ "--force-metadata" ])
new_argv.extend(["--force-metadata"])
new_argv.extend([info.path, new_path]) new_argv.extend([info.path, new_path])
nilmtools.decimate.main(new_argv) nilmtools.decimate.main(new_argv)


@@ -105,5 +107,6 @@ def do_decimation(client, args, path):


return return



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 51
- 48
nilmtools/filter.py View File

@@ -3,32 +3,28 @@
import nilmdb.client import nilmdb.client
from nilmdb.client import Client from nilmdb.client import Client
from nilmdb.client.numpyclient import NumpyClient from nilmdb.client.numpyclient import NumpyClient
from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds)
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.interval import Interval from nilmdb.utils.interval import Interval


import nilmtools import nilmtools


import itertools
import time
import sys
import os import os
import re
import argparse import argparse
import numpy as np import numpy as np
import io
import functools import functools



class ArgumentError(Exception): class ArgumentError(Exception):
pass pass



class MissingDestination(Exception): class MissingDestination(Exception):
def __init__(self, args, src, dest): def __init__(self, args, src, dest):
self.parsed_args = args self.parsed_args = args
self.src = src self.src = src
self.dest = dest self.dest = dest
Exception.__init__(self, "destination path " + dest.path + " not found")
Exception.__init__(self, f"destination path {dest.path} not found")



class StreamInfo(object): class StreamInfo(object):
def __init__(self, url, info): def __init__(self, url, info):
@@ -44,7 +40,7 @@ class StreamInfo(object):
self.timestamp_max = info[3] self.timestamp_max = info[3]
self.rows = info[4] self.rows = info[4]
self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5]) self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
except IndexError as TypeError:
except (IndexError, TypeError):
pass pass


def string(self, interhost): def string(self, interhost):
@@ -60,17 +56,19 @@ class StreamInfo(object):
self.path, self.layout, self.rows / 1e6, self.path, self.layout, self.rows / 1e6,
self.seconds / 3600.0) self.seconds / 3600.0)



def get_stream_info(client, path): def get_stream_info(client, path):
"""Return a StreamInfo object about the given path, or None if it """Return a StreamInfo object about the given path, or None if it
doesn't exist""" doesn't exist"""
streams = client.stream_list(path, extended = True)
streams = client.stream_list(path, extended=True)
if len(streams) != 1: if len(streams) != 1:
return None return None
return StreamInfo(client.geturl(), streams[0]) return StreamInfo(client.geturl(), streams[0])



# Filter processing for a single interval of data. # Filter processing for a single interval of data.
def process_numpy_interval(interval, extractor, inserter, warn_rows, def process_numpy_interval(interval, extractor, inserter, warn_rows,
function, args = None):
function, args=None):
"""For the given 'interval' of data, extract data, process it """For the given 'interval' of data, extract data, process it
through 'function', and insert the result. through 'function', and insert the result.


@@ -132,6 +130,7 @@ def process_numpy_interval(interval, extractor, inserter, warn_rows,
# we'll not miss any data when we run again later. # we'll not miss any data when we run again later.
insert_ctx.update_end(old_array[processed][0]) insert_ctx.update_end(old_array[processed][0])



def example_callback_function(data, interval, args, insert_func, final): def example_callback_function(data, interval, args, insert_func, final):
"""Example of the signature for the function that gets passed """Example of the signature for the function that gets passed
to process_numpy_interval. to process_numpy_interval.
@@ -160,9 +159,10 @@ def example_callback_function(data, interval, args, insert_func, final):
""" """
raise NotImplementedError("example_callback_function does nothing") raise NotImplementedError("example_callback_function does nothing")



class Filter(object): class Filter(object):


def __init__(self, parser_description = None):
def __init__(self, parser_description=None):
self._parser = None self._parser = None
self._client_src = None self._client_src = None
self._client_dest = None self._client_dest = None
@@ -190,10 +190,10 @@ class Filter(object):
raise Exception("Filter dest client is in use; make another") raise Exception("Filter dest client is in use; make another")
return self._client_dest return self._client_dest


def setup_parser(self, description = "Filter data", skip_paths = False):
def setup_parser(self, description="Filter data", skip_paths=False):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = description)
formatter_class=argparse.RawDescriptionHelpFormatter,
description=description)
group = parser.add_argument_group("General filter arguments") group = parser.add_argument_group("General filter arguments")
group.add_argument("-u", "--url", action="store", group.add_argument("-u", "--url", action="store",
default=self.def_url, default=self.def_url,
@@ -202,14 +202,14 @@ class Filter(object):
help="Destination server URL " help="Destination server URL "
"(default: same as source)") "(default: same as source)")
group.add_argument("-D", "--dry-run", action="store_true", group.add_argument("-D", "--dry-run", action="store_true",
default = False,
default=False,
help="Just print intervals that would be " help="Just print intervals that would be "
"processed") "processed")
group.add_argument("-q", "--quiet", action="store_true", group.add_argument("-q", "--quiet", action="store_true",
default = False,
default=False,
help="Don't print source and dest stream info") help="Don't print source and dest stream info")
group.add_argument("-F", "--force-metadata", action="store_true", group.add_argument("-F", "--force-metadata", action="store_true",
default = False,
default=False,
help="Force metadata changes if the dest " help="Force metadata changes if the dest "
"doesn't match") "doesn't match")
group.add_argument("-s", "--start", group.add_argument("-s", "--start",
@@ -221,7 +221,7 @@ class Filter(object):
help="Ending timestamp for intervals " help="Ending timestamp for intervals "
"(free-form, noninclusive)") "(free-form, noninclusive)")
group.add_argument("-v", "--version", action="version", group.add_argument("-v", "--version", action="version",
version = nilmtools.__version__)
version=nilmtools.__version__)


if not skip_paths: if not skip_paths:
# Individual filter scripts might want to add these arguments # Individual filter scripts might want to add these arguments
@@ -229,14 +229,14 @@ class Filter(object):
# (for example). "srcpath" and "destpath" arguments must exist, # (for example). "srcpath" and "destpath" arguments must exist,
# though. # though.
group.add_argument("srcpath", action="store", group.add_argument("srcpath", action="store",
help="Path of source stream, e.g. /foo/bar")
help="Path of source stream, eg. /foo/bar")
group.add_argument("destpath", action="store", group.add_argument("destpath", action="store",
help="Path of destination stream, e.g. /foo/bar")
help="Path of destination stream, eg. /foo/bar")
self._parser = parser self._parser = parser
return parser return parser


def set_args(self, url, dest_url, srcpath, destpath, start, end, def set_args(self, url, dest_url, srcpath, destpath, start, end,
parsed_args = None, quiet = True):
parsed_args=None, quiet=True):
"""Set arguments directly from parameters""" """Set arguments directly from parameters"""
if dest_url is None: if dest_url is None:
dest_url = url dest_url = url
@@ -247,7 +247,8 @@ class Filter(object):
self._client_dest = Client(dest_url) self._client_dest = Client(dest_url)


if (not self._interhost) and (srcpath == destpath): if (not self._interhost) and (srcpath == destpath):
raise ArgumentError("source and destination path must be different")
raise ArgumentError(
"source and destination path must be different")


# Open the streams # Open the streams
self.src = get_stream_info(self._client_src, srcpath) self.src = get_stream_info(self._client_src, srcpath)
@@ -267,7 +268,7 @@ class Filter(object):
print("Source:", self.src.string(self._interhost)) print("Source:", self.src.string(self._interhost))
print(" Dest:", self.dest.string(self._interhost)) print(" Dest:", self.dest.string(self._interhost))


def parse_args(self, argv = None):
def parse_args(self, argv=None):
"""Parse arguments from a command line""" """Parse arguments from a command line"""
args = self._parser.parse_args(argv) args = self._parser.parse_args(argv)


@@ -287,25 +288,25 @@ class Filter(object):


if self._interhost: if self._interhost:
# Do the difference ourselves # Do the difference ourselves
s_intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start = self.start, end = self.end) )
d_intervals = ( Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start = self.start, end = self.end) )
s_intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start=self.start, end=self.end))
d_intervals = (Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start=self.start, end=self.end))
intervals = nilmdb.utils.interval.set_difference(s_intervals, intervals = nilmdb.utils.interval.set_difference(s_intervals,
d_intervals) d_intervals)
else: else:
# Let the server do the difference for us # Let the server do the difference for us
intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath = self.dest.path,
start = self.start, end = self.end) )
intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath=self.dest.path,
start=self.start, end=self.end))
# Optimize intervals: join intervals that are adjacent # Optimize intervals: join intervals that are adjacent
for interval in nilmdb.utils.interval.optimize(intervals): for interval in nilmdb.utils.interval.optimize(intervals):
yield interval yield interval
@@ -333,7 +334,7 @@ class Filter(object):
wanted = str(wanted) wanted = str(wanted)
val = metadata.get(key, wanted) val = metadata.get(key, wanted)
if val != wanted and self.dest.rows > 0: if val != wanted and self.dest.rows > 0:
m = "Metadata in destination stream:\n"
m = "Metadata in destination stream:\n"
m += " %s = %s\n" % (key, val) m += " %s = %s\n" % (key, val)
m += "doesn't match desired data:\n" m += "doesn't match desired data:\n"
m += " %s = %s\n" % (key, wanted) m += " %s = %s\n" % (key, wanted)
@@ -346,8 +347,8 @@ class Filter(object):
self._client_dest.stream_update_metadata(self.dest.path, data) self._client_dest.stream_update_metadata(self.dest.path, data)


# The main filter processing method. # The main filter processing method.
def process_numpy(self, function, args = None, rows = 100000,
intervals = None):
def process_numpy(self, function, args=None, rows=100000,
intervals=None):
"""Calls process_numpy_interval for each interval that currently """Calls process_numpy_interval for each interval that currently
exists in self.src, but doesn't exist in self.dest. It will exists in self.src, but doesn't exist in self.dest. It will
process the data in chunks as follows: process the data in chunks as follows:
@@ -368,8 +369,8 @@ class Filter(object):
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context inserter = NumpyClient(self.dest.url).stream_insert_numpy_context


extractor_func = functools.partial(extractor, self.src.path, extractor_func = functools.partial(extractor, self.src.path,
layout = self.src.layout,
maxrows = rows)
layout=self.src.layout,
maxrows=rows)
inserter_func = functools.partial(inserter, self.dest.path) inserter_func = functools.partial(inserter, self.dest.path)


for interval in (intervals or self.intervals()): for interval in (intervals or self.intervals()):
@@ -377,14 +378,16 @@ class Filter(object):
process_numpy_interval(interval, extractor_func, inserter_func, process_numpy_interval(interval, extractor_func, inserter_func,
rows * 3, function, args) rows * 3, function, args)


def main(argv = None):

def main(argv=None):
# This is just a dummy function; actual filters can use the other # This is just a dummy function; actual filters can use the other
# functions to prepare stuff, and then do something with the data. # functions to prepare stuff, and then do something with the data.
f = Filter() f = Filter()
parser = f.setup_parser()
args = f.parse_args(argv)
parser = f.setup_parser() # noqa: F841
args = f.parse_args(argv) # noqa: F841
for i in f.intervals(): for i in f.intervals():
print("Generic filter: need to handle", i.human_string()) print("Generic filter: need to handle", i.human_string())



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 10
- 7
nilmtools/insert.py View File

@@ -1,29 +1,29 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


import nilmdb.client import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.time import (parse_time, timestamp_to_human, from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp, timestamp_to_seconds, seconds_to_timestamp,
rate_to_period, now as time_now) rate_to_period, now as time_now)


import os import os
import nilmtools import nilmtools
import time
import sys import sys
import re
import argparse import argparse
import subprocess import subprocess
import textwrap import textwrap



class ParseError(Exception): class ParseError(Exception):
def __init__(self, filename, error): def __init__(self, filename, error):
msg = filename + ": " + error msg = filename + ": " + error
super(ParseError, self).__init__(msg) super(ParseError, self).__init__(msg)


def parse_args(argv = None):

def parse_args(argv=None):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = textwrap.dedent("""\
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
Insert large amount of data from an external source like ethstream. Insert large amount of data from an external source like ethstream.


This code tracks two timestamps: This code tracks two timestamps:
@@ -129,7 +129,8 @@ def parse_args(argv = None):


return args return args


def main(argv = None):

def main(argv=None):
args = parse_args(argv) args = parse_args(argv)


client = nilmdb.client.Client(args.url) client = nilmdb.client.Client(args.url)
@@ -139,6 +140,7 @@ def main(argv = None):
data_ts_inc = 0 data_ts_inc = 0
data_ts_rate = args.rate data_ts_rate = args.rate
data_ts_delta = 0 data_ts_delta = 0

def get_data_ts(): def get_data_ts():
if args.delta: if args.delta:
return data_ts_base + data_ts_delta return data_ts_base + data_ts_delta
@@ -271,5 +273,6 @@ def main(argv = None):
stream.insert(b"%d %s" % (data_ts, line)) stream.insert(b"%d %s" % (data_ts, line))
print("Done") print("Done")



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 19
- 12
nilmtools/math.py View File

@@ -1,10 +1,11 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


# Miscellaenous useful mathematical functions # Miscellaenous useful mathematical functions
from nilmdb.utils.printf import *
from numpy import * from numpy import *
import scipy import scipy



def numpy_raise_errors(func): def numpy_raise_errors(func):
def wrap(*args, **kwargs): def wrap(*args, **kwargs):
old = seterr('raise') old = seterr('raise')
@@ -14,6 +15,7 @@ def numpy_raise_errors(func):
seterr(**old) seterr(**old)
return wrap return wrap



@numpy_raise_errors @numpy_raise_errors
def sfit4(data, fs): def sfit4(data, fs):
"""(A, f0, phi, C) = sfit4(data, fs) """(A, f0, phi, C) = sfit4(data, fs)
@@ -39,7 +41,9 @@ def sfit4(data, fs):
raise ValueError("bad data") raise ValueError("bad data")
t = linspace(0, (N-1) / float(fs), N) t = linspace(0, (N-1) / float(fs), N)


## Estimate frequency using FFT (step b)
#
# Estimate frequency using FFT (step b)
#
Fc = scipy.fft.fft(data) Fc = scipy.fft.fft(data)
F = abs(Fc) F = abs(Fc)
F[0] = 0 # eliminate DC F[0] = 0 # eliminate DC
@@ -78,21 +82,24 @@ def sfit4(data, fs):
# Now iterate 7 times (step b, plus 6 iterations of step i) # Now iterate 7 times (step b, plus 6 iterations of step i)
for idx in range(7): for idx in range(7):
D = c_[cos(w*t), sin(w*t), ones(N), D = c_[cos(w*t), sin(w*t), ones(N),
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
s = linalg.lstsq(D, data, rcond=None)[0] # eqn B.18
w = w + s[3] # update frequency estimate

## Extract results
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t)] # eqn B.16
s = linalg.lstsq(D, data, rcond=None)[0] # eqn B.18
w = w + s[3] # update frequency estimate

#
# Extract results
#
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
f0 = w / (2*pi) f0 = w / (2*pi)
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
C = s[2] C = s[2]
return (A, f0, phi, C) return (A, f0, phi, C)
except Exception as e: # pragma: no cover (not sure if we can hit this?)
except Exception: # pragma: no cover (not sure if we can hit this?)
# something broke down; just return zeros # something broke down; just return zeros
return (0, 0, 0, 0) return (0, 0, 0, 0)


def peak_detect(data, delta = 0.1):

def peak_detect(data, delta=0.1):
"""Simple min/max peak detection algorithm, taken from my code """Simple min/max peak detection algorithm, taken from my code
in the disagg.m from the 10-8-5 paper. in the disagg.m from the 10-8-5 paper.


@@ -101,7 +108,7 @@ def peak_detect(data, delta = 0.1):
where n is the row number in 'data', and p is 'data[n]', where n is the row number in 'data', and p is 'data[n]',
and is_max is True if this is a maximum, False if it's a minimum, and is_max is True if this is a maximum, False if it's a minimum,
""" """
peaks = [];
peaks = []
cur_min = (None, inf) cur_min = (None, inf)
cur_max = (None, -inf) cur_max = (None, -inf)
lookformax = False lookformax = False


+ 12
- 9
nilmtools/median.py View File

@@ -1,14 +1,16 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import nilmtools.filter, scipy.signal
import nilmtools.filter
import scipy.signal


def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
parser = f.setup_parser("Median Filter") parser = f.setup_parser("Median Filter")
group = parser.add_argument_group("Median filter options") group = parser.add_argument_group("Median filter options")
group.add_argument("-z", "--size", action="store", type=int, default=25, group.add_argument("-z", "--size", action="store", type=int, default=25,
help = "median filter size (default %(default)s)")
help="median filter size (default %(default)s)")
group.add_argument("-d", "--difference", action="store_true", group.add_argument("-d", "--difference", action="store_true",
help = "store difference rather than filtered values")
help="store difference rather than filtered values")


try: try:
args = f.parse_args(argv) args = f.parse_args(argv)
@@ -20,12 +22,12 @@ def main(argv = None):
e.dest.path, e.src.layout)) e.dest.path, e.src.layout))
raise SystemExit(1) raise SystemExit(1)


meta = f.client_src.stream_get_metadata(f.src.path)
f.check_dest_metadata({ "median_filter_source": f.src.path,
"median_filter_size": args.size,
"median_filter_difference": repr(args.difference) })
f.check_dest_metadata({"median_filter_source": f.src.path,
"median_filter_size": args.size,
"median_filter_difference": repr(args.difference)})

f.process_numpy(median_filter, args=(args.size, args.difference))


f.process_numpy(median_filter, args = (args.size, args.difference))


def median_filter(data, interval, args, insert, final): def median_filter(data, interval, args, insert, final):
(size, diff) = args (size, diff) = args
@@ -39,5 +41,6 @@ def median_filter(data, interval, args, insert, final):
insert(data) insert(data)
return rows return rows



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 32
- 26
nilmtools/pipewatch.py View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


import nilmdb.client import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, fprintf
import nilmdb.utils.lock import nilmdb.utils.lock
import nilmtools import nilmtools


@@ -17,10 +17,11 @@ import signal
import queue import queue
import daemon import daemon


def parse_args(argv = None):

def parse_args(argv=None):
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
description = """\
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""\
Pipe data from 'generator' to 'consumer'. This is intended to be Pipe data from 'generator' to 'consumer'. This is intended to be
executed frequently from cron, and will exit if another copy is executed frequently from cron, and will exit if another copy is
already running. If 'generator' or 'consumer' returns an error, already running. If 'generator' or 'consumer' returns an error,
@@ -50,12 +51,13 @@ def parse_args(argv = None):


return args return args



def reader_thread(q, fd): def reader_thread(q, fd):
# Read from a file descriptor, write to queue. # Read from a file descriptor, write to queue.
try: try:
while True: while True:
(r, w, x) = select.select([fd], [], [fd], 0.25) (r, w, x) = select.select([fd], [], [fd], 0.25)
if x: # pragma: no cover -- never expect this to happen
if x: # pragma: no cover -- never expect this to happen
# Very few things are "exceptional conditions"; # Very few things are "exceptional conditions";
# just TCP OOB data, some TTY state changes, etc. # just TCP OOB data, some TTY state changes, etc.
raise Exception raise Exception
@@ -65,12 +67,13 @@ def reader_thread(q, fd):
# when select restarts. # when select restarts.
continue continue
data = os.read(fd, 65536) data = os.read(fd, 65536)
if data == b"": # generator EOF
if data == b"": # generator EOF
raise Exception raise Exception
q.put(data) q.put(data)
except Exception: except Exception:
q.put(None) q.put(None)



def watcher_thread(q, procs): def watcher_thread(q, procs):
# Put None in the queue if either process dies # Put None in the queue if either process dies
while True: while True:
@@ -80,28 +83,29 @@ def watcher_thread(q, procs):
return return
time.sleep(0.25) time.sleep(0.25)



def pipewatch(args): def pipewatch(args):
# Run the processes, etc # Run the processes, etc
with open(os.devnull, "r") as devnull: with open(os.devnull, "r") as devnull:
generator = subprocess.Popen(args.generator, shell = True,
bufsize = -1, close_fds = True,
stdin = devnull,
stdout = subprocess.PIPE,
stderr = None,
preexec_fn = os.setpgrp)
consumer = subprocess.Popen(args.consumer, shell = True,
bufsize = -11, close_fds = True,
stdin = subprocess.PIPE,
stdout = None,
stderr = None,
preexec_fn = os.setpgrp)
q = queue.Queue(maxsize = 4)
reader = threading.Thread(target = reader_thread,
args = (q, generator.stdout.fileno()))
generator = subprocess.Popen(args.generator, shell=True,
bufsize=-1, close_fds=True,
stdin=devnull,
stdout=subprocess.PIPE,
stderr=None,
preexec_fn=os.setpgrp)
consumer = subprocess.Popen(args.consumer, shell=True,
bufsize=-11, close_fds=True,
stdin=subprocess.PIPE,
stdout=None,
stderr=None,
preexec_fn=os.setpgrp)
q = queue.Queue(maxsize=4)
reader = threading.Thread(target=reader_thread,
args=(q, generator.stdout.fileno()))
reader.start() reader.start()
watcher = threading.Thread(target = watcher_thread,
args = (q, [generator, consumer]))
watcher = threading.Thread(target=watcher_thread,
args=(q, [generator, consumer]))
watcher.start() watcher.start()
try: try:
while True: while True:
@@ -154,7 +158,8 @@ def pipewatch(args):
sys.exit(0) sys.exit(0)
sys.exit(1) sys.exit(1)


def main(argv = None):

def main(argv=None):
args = parse_args(argv) args = parse_args(argv)


lockfile = open(args.lock, "w") lockfile = open(args.lock, "w")
@@ -165,7 +170,7 @@ def main(argv = None):
try: try:
# Run as a daemon if requested, otherwise run directly. # Run as a daemon if requested, otherwise run directly.
if args.daemon: # pragma: no cover (hard to do from inside test suite) if args.daemon: # pragma: no cover (hard to do from inside test suite)
with daemon.DaemonContext(files_preserve = [ lockfile ]):
with daemon.DaemonContext(files_preserve=[lockfile]):
pipewatch(args) pipewatch(args)
else: else:
pipewatch(args) pipewatch(args)
@@ -176,5 +181,6 @@ def main(argv = None):
except OSError: except OSError:
pass pass



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 23
- 21
nilmtools/prep.py View File

@@ -3,21 +3,21 @@
# Spectral envelope preprocessor. # Spectral envelope preprocessor.
# Requires two streams as input: the original raw data, and sinefit data. # Requires two streams as input: the original raw data, and sinefit data.


from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf
from nilmdb.utils.time import timestamp_to_human from nilmdb.utils.time import timestamp_to_human
import nilmtools.filter import nilmtools.filter
import nilmdb.client import nilmdb.client
from numpy import *
from numpy import pi, zeros, r_, e, real, imag
import scipy.fftpack import scipy.fftpack
import scipy.signal import scipy.signal
#from matplotlib import pyplot as p
import bisect import bisect
from nilmdb.utils.interval import Interval from nilmdb.utils.interval import Interval


def main(argv = None):

def main(argv=None):
# Set up argument parser # Set up argument parser
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths = True)
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths=True)
group = parser.add_argument_group("Prep options") group = parser.add_argument_group("Prep options")
group.add_argument("-c", "--column", action="store", type=int, group.add_argument("-c", "--column", action="store", type=int,
help="Column number (first data column is 1)") help="Column number (first data column is 1)")
@@ -78,40 +78,41 @@ def main(argv = None):
+ "; expected float32_3") + "; expected float32_3")


# Check and set metadata in prep stream # Check and set metadata in prep stream
f.check_dest_metadata({ "prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column,
"prep_rotation": repr(rotation),
"prep_nshift": args.nshift })
f.check_dest_metadata({"prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column,
"prep_rotation": repr(rotation),
"prep_nshift": args.nshift})


# Find the intersection of the usual set of intervals we'd filter, # Find the intersection of the usual set of intervals we'd filter,
# and the intervals actually present in sinefit data. This is # and the intervals actually present in sinefit data. This is
# what we will process. # what we will process.
filter_int = f.intervals() filter_int = f.intervals()
sinefit_int = ( Interval(start, end) for (start, end) in
client_sinefit.stream_intervals(
args.sinepath, start = f.start, end = f.end) )
sinefit_int = (Interval(start, end) for (start, end) in
client_sinefit.stream_intervals(
args.sinepath, start=f.start, end=f.end))
intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int) intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)


# Run the process (using the helper in the filter module) # Run the process (using the helper in the filter module)
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
args.nharm, rotation, args.nshift),
intervals = intervals)
f.process_numpy(process, args=(client_sinefit, sinefit.path, args.column,
args.nharm, rotation, args.nshift),
intervals=intervals)




def process(data, interval, args, insert_function, final): def process(data, interval, args, insert_function, final):
(client, sinefit_path, column, nharm, rotation, nshift) = args (client, sinefit_path, column, nharm, rotation, nshift) = args
rows = data.shape[0] rows = data.shape[0]
data_timestamps = data[:,0]
data_timestamps = data[:, 0]


if rows < 2: if rows < 2:
return 0 return 0


last_inserted = [nilmdb.utils.time.min_timestamp] last_inserted = [nilmdb.utils.time.min_timestamp]

def insert_if_nonoverlapping(data): def insert_if_nonoverlapping(data):
"""Call insert_function to insert data, but only if this """Call insert_function to insert data, but only if this
data doesn't overlap with other data that we inserted.""" data doesn't overlap with other data that we inserted."""
if data[0][0] <= last_inserted[0]: # pragma: no cover
if data[0][0] <= last_inserted[0]: # pragma: no cover
# Getting coverage here is hard -- not sure exactly when # Getting coverage here is hard -- not sure exactly when
# it gets triggered or why this was added; probably some # it gets triggered or why this was added; probably some
# unlikely edge condition with timestamp rounding or something. # unlikely edge condition with timestamp rounding or something.
@@ -152,8 +153,8 @@ def process(data, interval, args, insert_function, final):
out[0, 0] = round(t_min) out[0, 0] = round(t_min)
for k in range(nharm): for k in range(nharm):
Fk = F[2 * k + 1] * e**(rot * 1j * (k+1)) Fk = F[2 * k + 1] * e**(rot * 1j * (k+1))
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk


insert_if_nonoverlapping(out) insert_if_nonoverlapping(out)
return idx_max return idx_max
@@ -161,7 +162,7 @@ def process(data, interval, args, insert_function, final):
# Extract sinefit data to get zero crossing timestamps. # Extract sinefit data to get zero crossing timestamps.
# t_min = beginning of period # t_min = beginning of period
# t_max = end of period # t_max = end of period
(t_min, f0, A, C) = [ float(x) for x in sinefit_line.split() ]
(t_min, f0, A, C) = [float(x) for x in sinefit_line.split()]
t_max = t_min + 1e6 / f0 t_max = t_min + 1e6 / f0


# Compute prep over shifted windows of the period # Compute prep over shifted windows of the period
@@ -191,5 +192,6 @@ def process(data, interval, args, insert_function, final):
timestamp_to_human(data[0][0]), processed, rows) timestamp_to_human(data[0][0]), processed, rows)
return processed return processed



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 26
- 24
nilmtools/sinefit.py View File

@@ -1,20 +1,19 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


# Sine wave fitting. # Sine wave fitting.
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
import nilmtools.filter import nilmtools.filter
import nilmtools.math import nilmtools.math
import nilmdb.client
from nilmdb.utils.time import (timestamp_to_human, from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds, timestamp_to_seconds,
seconds_to_timestamp) seconds_to_timestamp)


from numpy import *
from scipy import *
#import pylab as p
import numpy
import sys import sys
# import pylab as p


def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter() f = nilmtools.filter.Filter()
parser = f.setup_parser("Sine wave fitting") parser = f.setup_parser("Sine wave fitting")
group = parser.add_argument_group("Sine fit options") group = parser.add_argument_group("Sine fit options")
@@ -53,19 +52,20 @@ def main(argv = None):
if args.max_freq is None: if args.max_freq is None:
args.max_freq = args.frequency * 2 args.max_freq = args.frequency * 2
if (args.min_freq > args.max_freq or if (args.min_freq > args.max_freq or
args.min_freq > args.frequency or
args.max_freq < args.frequency):
args.min_freq > args.frequency or
args.max_freq < args.frequency):
parser.error("invalid min or max frequency") parser.error("invalid min or max frequency")
if args.min_amp < 0: if args.min_amp < 0:
parser.error("min amplitude must be >= 0") parser.error("min amplitude must be >= 0")


f.check_dest_metadata({ "sinefit_source": f.src.path,
"sinefit_column": args.column })
f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
args.min_freq, args.max_freq))
f.check_dest_metadata({"sinefit_source": f.src.path,
"sinefit_column": args.column})
f.process_numpy(process, args=(args.column, args.frequency, args.min_amp,
args.min_freq, args.max_freq))



class SuppressibleWarning(object): class SuppressibleWarning(object):
def __init__(self, maxcount = 10, maxsuppress = 100):
def __init__(self, maxcount=10, maxsuppress=100):
self.maxcount = maxcount self.maxcount = maxcount
self.maxsuppress = maxsuppress self.maxsuppress = maxsuppress
self.count = 0 self.count = 0
@@ -78,19 +78,20 @@ class SuppressibleWarning(object):
now = "" now = ""
sys.stderr.write(now + msg) sys.stderr.write(now + msg)


def warn(self, msg, seconds = None):
def warn(self, msg, seconds=None):
self.count += 1 self.count += 1
if self.count <= self.maxcount: if self.count <= self.maxcount:
self._write(seconds, msg) self._write(seconds, msg)
if (self.count - self.maxcount) >= self.maxsuppress: if (self.count - self.maxcount) >= self.maxsuppress:
self.reset() self.reset()


def reset(self, seconds = None):
def reset(self, seconds=None):
if self.count > self.maxcount: if self.count > self.maxcount:
self._write(seconds, sprintf("(%d warnings suppressed)\n", self._write(seconds, sprintf("(%d warnings suppressed)\n",
self.count - self.maxcount)) self.count - self.maxcount))
self.count = 0 self.count = 0



def process(data, interval, args, insert_function, final): def process(data, interval, args, insert_function, final):
(column, f_expected, a_min, f_min, f_max) = args (column, f_expected, a_min, f_min, f_max) = args
rows = data.shape[0] rows = data.shape[0]
@@ -119,7 +120,7 @@ def process(data, interval, args, insert_function, final):
while start < (rows - N): while start < (rows - N):
this = data[start:start+N, column] this = data[start:start+N, column]
t_min = timestamp_to_seconds(data[start, 0]) t_min = timestamp_to_seconds(data[start, 0])
t_max = timestamp_to_seconds(data[start+N-1, 0])
# t_max = timestamp_to_seconds(data[start+N-1, 0])


# Do 4-parameter sine wave fit # Do 4-parameter sine wave fit
(A, f0, phi, C) = nilmtools.math.sfit4(this, fs) (A, f0, phi, C) = nilmtools.math.sfit4(this, fs)
@@ -138,13 +139,13 @@ def process(data, interval, args, insert_function, final):
start += N start += N
continue continue


#p.plot(arange(N), this)
#p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
# p.plot(arange(N), this)
# p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')


# Period starts when the argument of sine is 0 degrees, # Period starts when the argument of sine is 0 degrees,
# so we're looking for sample number: # so we're looking for sample number:
# n = (0 - phi) / (f0/fs * 2 * pi) # n = (0 - phi) / (f0/fs * 2 * pi)
zc_n = (0 - phi) / (f0 / fs * 2 * pi)
zc_n = (0 - phi) / (f0 / fs * 2 * numpy.pi)
period_n = fs/f0 period_n = fs/f0


# Add periods to make N positive # Add periods to make N positive
@@ -155,14 +156,14 @@ def process(data, interval, args, insert_function, final):
# Mark the zero crossings until we're a half period away # Mark the zero crossings until we're a half period away
# from the end of the window # from the end of the window
while zc_n < (N - period_n/2): while zc_n < (N - period_n/2):
#p.plot(zc_n, C, 'ro')
# p.plot(zc_n, C, 'ro')
t = t_min + zc_n / fs t = t_min + zc_n / fs
if (last_inserted_timestamp is None or if (last_inserted_timestamp is None or
t > last_inserted_timestamp):
t > last_inserted_timestamp):
insert_function([[seconds_to_timestamp(t), f0, A, C]]) insert_function([[seconds_to_timestamp(t), f0, A, C]])
last_inserted_timestamp = t last_inserted_timestamp = t
warn.reset(t) warn.reset(t)
else: # pragma: no cover -- this is hard to trigger,
else: # pragma: no cover -- this is hard to trigger,
# if it's even possible at all; I think it would require # if it's even possible at all; I think it would require
# some jitter in how the waves fit, across a window boundary. # some jitter in how the waves fit, across a window boundary.
warn.warn("timestamp overlap\n", t) warn.warn("timestamp overlap\n", t)
@@ -177,8 +178,8 @@ def process(data, interval, args, insert_function, final):
advance = min(last_zc + period_n/4, N) advance = min(last_zc + period_n/4, N)
else: else:
advance = N/2 advance = N/2
#p.plot(advance, C, 'go')
#p.show()
# p.plot(advance, C, 'go')
# p.show()


start = int(round(start + advance)) start = int(round(start + advance))


@@ -192,5 +193,6 @@ def process(data, interval, args, insert_function, final):
printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start) printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
return start return start



if __name__ == "__main__": if __name__ == "__main__":
main() main()

+ 30
- 24
nilmtools/trainola.py View File

@@ -1,12 +1,10 @@
#!/usr/bin/env python3 #!/usr/bin/env python3


from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
import nilmdb.client import nilmdb.client
import nilmtools.filter import nilmtools.filter
import nilmtools.math import nilmtools.math
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds,
seconds_to_timestamp)
from nilmdb.utils.time import timestamp_to_seconds
import datetime_tz import datetime_tz
from nilmdb.utils.interval import Interval from nilmdb.utils.interval import Interval


@@ -14,23 +12,24 @@ import numpy as np
import scipy import scipy
import scipy.signal import scipy.signal
from numpy.core.umath_tests import inner1d from numpy.core.umath_tests import inner1d
import nilmrun
from collections import OrderedDict from collections import OrderedDict
import sys
import time import time
import functools import functools
import collections import collections



class DataError(ValueError): class DataError(ValueError):
pass pass



def build_column_mapping(colinfo, streaminfo): def build_column_mapping(colinfo, streaminfo):
"""Given the 'columns' list from the JSON data, verify and """Given the 'columns' list from the JSON data, verify and
pull out a dictionary mapping for the column names/numbers.""" pull out a dictionary mapping for the column names/numbers."""
columns = OrderedDict() columns = OrderedDict()
for c in colinfo: for c in colinfo:
col_num = c['index'] + 1 # skip timestamp col_num = c['index'] + 1 # skip timestamp
if (c['name'] in list(columns.keys()) or col_num in list(columns.values())):
if (c['name'] in list(columns.keys()) or
col_num in list(columns.values())):
raise DataError("duplicated columns") raise DataError("duplicated columns")
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count): if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
raise DataError("bad column number") raise DataError("bad column number")
@@ -39,8 +38,9 @@ def build_column_mapping(colinfo, streaminfo):
raise DataError("no columns") raise DataError("no columns")
return columns return columns



class Exemplar(object): class Exemplar(object):
def __init__(self, exinfo, min_rows = 10, max_rows = 100000):
def __init__(self, exinfo, min_rows=10, max_rows=100000):
"""Given a dictionary entry from the 'exemplars' input JSON, """Given a dictionary entry from the 'exemplars' input JSON,
verify the stream, columns, etc. Then, fetch all the data verify the stream, columns, etc. Then, fetch all the data
into self.data.""" into self.data."""
@@ -63,7 +63,8 @@ class Exemplar(object):
self.columns = build_column_mapping(exinfo['columns'], self.info) self.columns = build_column_mapping(exinfo['columns'], self.info)


# Count points # Count points
self.count = self.client.stream_count(self.stream, self.start, self.end)
self.count = self.client.stream_count(self.stream,
self.start, self.end)


# Verify count # Verify count
if self.count == 0: if self.count == 0:
@@ -77,13 +78,13 @@ class Exemplar(object):
datagen = self.client.stream_extract_numpy(self.stream, datagen = self.client.stream_extract_numpy(self.stream,
self.start, self.end, self.start, self.end,
self.info.layout, self.info.layout,
maxrows = self.count)
maxrows=self.count)
self.data = list(datagen)[0] self.data = list(datagen)[0]


# Extract just the columns that were specified in self.columns, # Extract just the columns that were specified in self.columns,
# skipping the timestamp. # skipping the timestamp.
extract_columns = [ value for (key, value) in list(self.columns.items()) ]
self.data = self.data[:,extract_columns]
extract_cols = [value for (key, value) in list(self.columns.items())]
self.data = self.data[:, extract_cols]


# Fix the column indices in e.columns, since we removed/reordered # Fix the column indices in e.columns, since we removed/reordered
# columns in self.data # columns in self.data
@@ -102,20 +103,23 @@ class Exemplar(object):


def __str__(self): def __str__(self):
return sprintf("\"%s\" %s [%s] %s rows", return sprintf("\"%s\" %s [%s] %s rows",
self.name, self.stream, ",".join(list(self.columns.keys())),
self.name, self.stream,
",".join(list(self.columns.keys())),
self.count) self.count)



def timestamp_to_short_human(timestamp): def timestamp_to_short_human(timestamp):
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp)) dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
return dt.strftime("%H:%M:%S") return dt.strftime("%H:%M:%S")



def trainola_matcher(data, interval, args, insert_func, final_chunk): def trainola_matcher(data, interval, args, insert_func, final_chunk):
"""Perform cross-correlation match""" """Perform cross-correlation match"""
( src_columns, dest_count, exemplars ) = args
(src_columns, dest_count, exemplars) = args
nrows = data.shape[0] nrows = data.shape[0]


# We want at least 10% more points than the widest exemplar. # We want at least 10% more points than the widest exemplar.
widest = max([ x.count for x in exemplars ])
widest = max([x.count for x in exemplars])
if (widest * 1.1) > nrows: if (widest * 1.1) > nrows:
return 0 return 0


@@ -201,9 +205,10 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
# Return how many rows we processed # Return how many rows we processed
valid = max(valid, 0) valid = max(valid, 0)
printf(" [%s] matched %d exemplars in %d rows\n", printf(" [%s] matched %d exemplars in %d rows\n",
timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
timestamp_to_short_human(data[0][0]), np.sum(out[:, 1:]), valid)
return valid return valid



def trainola(conf): def trainola(conf):
print("Trainola", nilmtools.__version__) print("Trainola", nilmtools.__version__)


@@ -256,16 +261,16 @@ def trainola(conf):
"available in source data", n, col)) "available in source data", n, col))


# Figure out which intervals we should process # Figure out which intervals we should process
intervals = ( Interval(s, e) for (s, e) in
src_client.stream_intervals(src_path,
diffpath = dest_path,
start = start, end = end) )
intervals = (Interval(s, e) for (s, e) in
src_client.stream_intervals(src_path,
diffpath=dest_path,
start=start, end=end))
intervals = nilmdb.utils.interval.optimize(intervals) intervals = nilmdb.utils.interval.optimize(intervals)


# Do the processing # Do the processing
rows = 100000 rows = 100000
extractor = functools.partial(src_client.stream_extract_numpy, extractor = functools.partial(src_client.stream_extract_numpy,
src.path, layout = src.layout, maxrows = rows)
src.path, layout=src.layout, maxrows=rows)
inserter = functools.partial(dest_client.stream_insert_numpy_context, inserter = functools.partial(dest_client.stream_insert_numpy_context,
dest.path) dest.path)
start = time.time() start = time.time()
@@ -283,7 +288,8 @@ def trainola(conf):
printf("Done. Processed %.2f seconds per second.\n", printf("Done. Processed %.2f seconds per second.\n",
processed_time / elapsed) processed_time / elapsed)


def main(argv = None):

def main(argv=None):
import json import json
import sys import sys


@@ -308,12 +314,12 @@ def main(argv = None):
try: try:
# Passed in a JSON string (e.g. on the command line) # Passed in a JSON string (e.g. on the command line)
conf = json.loads(argv[0]) conf = json.loads(argv[0])
except TypeError as e:
except TypeError:
# Passed in the config dictionary (e.g. from NilmRun) # Passed in the config dictionary (e.g. from NilmRun)
conf = argv[0] conf = argv[0]


return trainola(conf) return trainola(conf)



if __name__ == "__main__": if __name__ == "__main__":
main() main()


+ 1
- 0
setup.cfg View File

@@ -32,6 +32,7 @@ parentdir_prefix=nilmtools-
[flake8] [flake8]
exclude=_version.py exclude=_version.py
extend-ignore=E731 extend-ignore=E731
per-file-ignores=math.py:F403,F405


[pylint] [pylint]
ignore=_version.py ignore=_version.py


Loading…
Cancel
Save