Browse Source

Fix flake8 errors throughout code

This found a small number of real bugs too, for example,
this one that looked weird because of a 2to3 conversion,
but was wrong both before and after:
-        except IndexError as TypeError:
+        except (IndexError, TypeError):
tags/nilmtools-2.0.0^0
Jim Paris 1 year ago
parent
commit
cfc66b6847
14 changed files with 265 additions and 220 deletions
  1. +27
    -19
      nilmtools/cleanup.py
  2. +3
    -4
      nilmtools/copy_one.py
  3. +7
    -6
      nilmtools/copy_wildcard.py
  4. +11
    -10
      nilmtools/decimate.py
  5. +13
    -10
      nilmtools/decimate_auto.py
  6. +51
    -48
      nilmtools/filter.py
  7. +10
    -7
      nilmtools/insert.py
  8. +19
    -12
      nilmtools/math.py
  9. +12
    -9
      nilmtools/median.py
  10. +32
    -26
      nilmtools/pipewatch.py
  11. +23
    -21
      nilmtools/prep.py
  12. +26
    -24
      nilmtools/sinefit.py
  13. +30
    -24
      nilmtools/trainola.py
  14. +1
    -0
      setup.cfg

+ 27
- 19
nilmtools/cleanup.py View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3

from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
from nilmdb.utils.printf import printf, fprintf, sprintf
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp)
from nilmdb.utils.diskusage import human_size
from nilmdb.utils.interval import Interval
@@ -16,15 +16,17 @@ import fnmatch
import re
import os


def warn(msg, *args):
fprintf(sys.stderr, "warning: " + msg + "\n", *args)


class TimePeriod(object):
_units = { 'h': ('hour', 60*60),
'd': ('day', 60*60*24),
'w': ('week', 60*60*24*7),
'm': ('month', 60*60*24*30),
'y': ('year', 60*60*24*365) }
_units = {'h': ('hour', 60*60),
'd': ('day', 60*60*24),
'w': ('week', 60*60*24*7),
'm': ('month', 60*60*24*30),
'y': ('year', 60*60*24*365)}

def __init__(self, val):
for u in self._units:
@@ -50,6 +52,7 @@ class TimePeriod(object):
def __str__(self):
return self.describe_seconds(self.seconds())


class StreamCleanupConfig(object):
def __init__(self, info):
self.path = info[0]
@@ -63,10 +66,11 @@ class StreamCleanupConfig(object):
self.decimated_from = None
self.also_clean_paths = []

def main(argv = None):

def main(argv=None):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = """\
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Clean up old data from streams using a configuration file to specify
which data to remove.

@@ -99,10 +103,10 @@ def main(argv = None):
parser.add_argument("-u", "--url", action="store", default=def_url,
help="NilmDB server URL (default: %(default)s)")
parser.add_argument("-y", "--yes", action="store_true",
default = False,
default=False,
help="Actually remove the data (default: no)")
parser.add_argument("-e", "--estimate", action="store_true",
default = False,
default=False,
help="Estimate how much disk space will be used")
parser.add_argument("configfile", type=argparse.FileType('r'),
help="Configuration file")
@@ -114,7 +118,7 @@ def main(argv = None):

# List all streams
client = nilmdb.client.Client(args.url)
streamlist = client.stream_list(extended = True)
streamlist = client.stream_list(extended=True)

# Create config objects
streams = collections.OrderedDict()
@@ -189,7 +193,7 @@ def main(argv = None):
printf("%17s: %s per row, %s rows per second\n",
"base rate",
human_size(per_row),
round(rate,1))
round(rate, 1))
printf("%17s: %s per hour, %s per day\n",
"base size",
human_size(per_sec * 3600),
@@ -204,7 +208,9 @@ def main(argv = None):
# sum_{k=0..inf} (rate / (n^k)) * d_dtype.itemsize
d_per_row = d_dtype.itemsize
factor = 4.0
d_per_sec = d_per_row * (rate / factor) * (1 / (1 - (1/factor)))
d_per_sec = (d_per_row *
(rate / factor) *
(1 / (1 - (1/factor))))
per_sec += d_per_sec
printf("%17s: %s per hour, %s per day\n",
"with decimation",
@@ -224,8 +230,8 @@ def main(argv = None):
printf("%s: keep %s\n", path, streams[path].keep)

# Figure out the earliest timestamp we should keep.
intervals = [ Interval(start, end) for (start, end) in
reversed(list(client.stream_intervals(path))) ]
intervals = [Interval(start, end) for (start, end) in
reversed(list(client.stream_intervals(path)))]
total = 0
keep = seconds_to_timestamp(streams[path].keep.seconds())
for i in intervals:
@@ -239,12 +245,13 @@ def main(argv = None):
streams[path].keep.describe_seconds(
timestamp_to_seconds(total)))
continue
printf(" removing data before %s\n", timestamp_to_human(remove_before))
printf(" removing data before %s\n",
timestamp_to_human(remove_before))
# Clean in reverse order. Since we only use the primary stream and not
# the decimated streams to figure out which data to remove, removing
# the primary stream last means that we might recover more nicely if
# we are interrupted and restarted.
clean_paths = list(reversed(streams[path].also_clean_paths)) + [ path ]
clean_paths = list(reversed(streams[path].also_clean_paths)) + [path]
for p in clean_paths:
printf(" removing from %s\n", p)
if args.yes:
@@ -255,5 +262,6 @@ def main(argv = None):
printf("Note: specify --yes to actually perform removals\n")
return


if __name__ == "__main__":
main()

+ 3
- 4
nilmtools/copy_one.py View File

@@ -4,12 +4,10 @@
# the Python standard library.

import nilmtools.filter
import nilmdb.client
from nilmdb.client.numpyclient import NumpyClient
import numpy as np
import sys

def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Copy a stream")
parser.add_argument('-n', '--nometa', action='store_true',
@@ -40,5 +38,6 @@ def main(argv = None):
for data in extractor(f.src.path, i.start, i.end):
insert_ctx.insert(data)


if __name__ == "__main__":
main()

+ 7
- 6
nilmtools/copy_wildcard.py View File

@@ -5,17 +5,17 @@
import nilmtools.filter
import nilmtools.copy_one
import nilmdb.client
import argparse
import fnmatch

def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter()
# Reuse filter's parser, since it handles most options we need.
parser = f.setup_parser(description = """\
parser = f.setup_parser(description="""\
Copy all streams matching the given wildcard from one host to another.

Example: %(prog)s -u http://host1/nilmdb -U http://host2/nilmdb /sharon/*
""", skip_paths = True)
""", skip_paths=True)
parser.add_argument('-n', '--nometa', action='store_true',
help="Don't copy or check metadata")
parser.add_argument("path", action="store", nargs="+",
@@ -35,7 +35,7 @@ def main(argv = None):
# Find matching streams
matched = []
for path in args.path:
matched.extend([s for s in client_src.stream_list(extended = True)
matched.extend([s for s in client_src.stream_list(extended=True)
if fnmatch.fnmatch(s[0], path)
and s not in matched])

@@ -51,7 +51,7 @@ def main(argv = None):
# invoked from the command line.
for stream in matched:
new_argv = ["--url", client_src.geturl(),
"--dest-url", client_dest.geturl() ]
"--dest-url", client_dest.geturl()]
if args.start:
new_argv.extend(["--start", "@" + repr(args.start)])
if args.end:
@@ -70,5 +70,6 @@ def main(argv = None):
if e.code != 0: # pragma: no cover (shouldn't happen)
raise


if __name__ == "__main__":
main()

+ 11
- 10
nilmtools/decimate.py View File

@@ -1,11 +1,10 @@
#!/usr/bin/env python3

import nilmtools.filter
import nilmdb.client
import numpy as np
import operator

def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Decimate a stream")
group = parser.add_argument_group("Decimate options")
@@ -38,15 +37,16 @@ def main(argv = None):
if not (args.factor >= 2):
raise Exception("factor needs to be 2 or more")

f.check_dest_metadata({ "decimate_source": f.src.path,
"decimate_factor": args.factor })
f.check_dest_metadata({"decimate_source": f.src.path,
"decimate_factor": args.factor})

# If source is decimated, we have to decimate a bit differently
if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
again = True
else:
again = False
f.process_numpy(decimate, args = (args.factor, again))
f.process_numpy(decimate, args=(args.factor, again))


def decimate(data, interval, args, insert_function, final):
"""Decimate data"""
@@ -70,18 +70,19 @@ def decimate(data, interval, args, insert_function, final):

# Discard extra rows that aren't a multiple of factor
n = n // factor * factor
data = data[:n,:]
data = data[:n, :]

# Reshape it into 3D so we can process 'factor' rows at a time
data = data.reshape(n // factor, factor, m)

# Fill the result
out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
np.min(data[:,:,min_col], axis=1),
np.max(data[:,:,max_col], axis=1) ]
out = np.c_[np.mean(data[:, :, mean_col], axis=1),
np.min(data[:, :, min_col], axis=1),
np.max(data[:, :, max_col], axis=1)]

insert_function(out)
return n


if __name__ == "__main__":
main()

+ 13
- 10
nilmtools/decimate_auto.py View File

@@ -7,10 +7,11 @@ import nilmdb.client
import argparse
import fnmatch

def main(argv = None):

def main(argv=None):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = """\
formatter_class=argparse.RawDescriptionHelpFormatter,
description="""\
Automatically create multiple decimations from a single source
stream, continuing until the last decimated level contains fewer
than 500 points total.
@@ -44,8 +45,8 @@ def main(argv = None):
client = nilmdb.client.Client(args.url)

# Find list of paths to process
streams = [ str(s[0]) for s in client.stream_list() ]
streams = [ s for s in streams if "~decim-" not in s ]
streams = [str(s[0]) for s in client.stream_list()]
streams = [s for s in streams if "~decim-" not in s]
paths = []
for path in args.path:
new = fnmatch.filter(streams, str(path))
@@ -57,6 +58,7 @@ def main(argv = None):
for path in paths:
do_decimation(client, args, path)


def do_decimation(client, args, path):
print("Decimating", path)
info = nilmtools.filter.get_stream_info(client, path)
@@ -71,8 +73,8 @@ def do_decimation(client, args, path):

# Figure out the type we should use for decimated streams
if ('int32' in info.layout_type or
'int64' in info.layout_type or
'float64' in info.layout_type):
'int64' in info.layout_type or
'float64' in info.layout_type):
decimated_type = 'float64_' + str(info.layout_count * 3)
else:
decimated_type = 'float32_' + str(info.layout_count * 3)
@@ -93,10 +95,10 @@ def do_decimation(client, args, path):
client.stream_create(new_path, decimated_type)

# Run the decimation as if it were run from the commandline
new_argv = [ "-u", args.url,
"-f", str(args.factor) ]
new_argv = ["-u", args.url,
"-f", str(args.factor)]
if args.force_metadata:
new_argv.extend([ "--force-metadata" ])
new_argv.extend(["--force-metadata"])
new_argv.extend([info.path, new_path])
nilmtools.decimate.main(new_argv)

@@ -105,5 +107,6 @@ def do_decimation(client, args, path):

return


if __name__ == "__main__":
main()

+ 51
- 48
nilmtools/filter.py View File

@@ -3,32 +3,28 @@
import nilmdb.client
from nilmdb.client import Client
from nilmdb.client.numpyclient import NumpyClient
from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds)
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.interval import Interval

import nilmtools

import itertools
import time
import sys
import os
import re
import argparse
import numpy as np
import io
import functools


class ArgumentError(Exception):
pass


class MissingDestination(Exception):
def __init__(self, args, src, dest):
self.parsed_args = args
self.src = src
self.dest = dest
Exception.__init__(self, "destination path " + dest.path + " not found")
Exception.__init__(self, f"destination path {dest.path} not found")


class StreamInfo(object):
def __init__(self, url, info):
@@ -44,7 +40,7 @@ class StreamInfo(object):
self.timestamp_max = info[3]
self.rows = info[4]
self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
except IndexError as TypeError:
except (IndexError, TypeError):
pass

def string(self, interhost):
@@ -60,17 +56,19 @@ class StreamInfo(object):
self.path, self.layout, self.rows / 1e6,
self.seconds / 3600.0)


def get_stream_info(client, path):
"""Return a StreamInfo object about the given path, or None if it
doesn't exist"""
streams = client.stream_list(path, extended = True)
streams = client.stream_list(path, extended=True)
if len(streams) != 1:
return None
return StreamInfo(client.geturl(), streams[0])


# Filter processing for a single interval of data.
def process_numpy_interval(interval, extractor, inserter, warn_rows,
function, args = None):
function, args=None):
"""For the given 'interval' of data, extract data, process it
through 'function', and insert the result.

@@ -132,6 +130,7 @@ def process_numpy_interval(interval, extractor, inserter, warn_rows,
# we'll not miss any data when we run again later.
insert_ctx.update_end(old_array[processed][0])


def example_callback_function(data, interval, args, insert_func, final):
"""Example of the signature for the function that gets passed
to process_numpy_interval.
@@ -160,9 +159,10 @@ def example_callback_function(data, interval, args, insert_func, final):
"""
raise NotImplementedError("example_callback_function does nothing")


class Filter(object):

def __init__(self, parser_description = None):
def __init__(self, parser_description=None):
self._parser = None
self._client_src = None
self._client_dest = None
@@ -190,10 +190,10 @@ class Filter(object):
raise Exception("Filter dest client is in use; make another")
return self._client_dest

def setup_parser(self, description = "Filter data", skip_paths = False):
def setup_parser(self, description="Filter data", skip_paths=False):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = description)
formatter_class=argparse.RawDescriptionHelpFormatter,
description=description)
group = parser.add_argument_group("General filter arguments")
group.add_argument("-u", "--url", action="store",
default=self.def_url,
@@ -202,14 +202,14 @@ class Filter(object):
help="Destination server URL "
"(default: same as source)")
group.add_argument("-D", "--dry-run", action="store_true",
default = False,
default=False,
help="Just print intervals that would be "
"processed")
group.add_argument("-q", "--quiet", action="store_true",
default = False,
default=False,
help="Don't print source and dest stream info")
group.add_argument("-F", "--force-metadata", action="store_true",
default = False,
default=False,
help="Force metadata changes if the dest "
"doesn't match")
group.add_argument("-s", "--start",
@@ -221,7 +221,7 @@ class Filter(object):
help="Ending timestamp for intervals "
"(free-form, noninclusive)")
group.add_argument("-v", "--version", action="version",
version = nilmtools.__version__)
version=nilmtools.__version__)

if not skip_paths:
# Individual filter scripts might want to add these arguments
@@ -229,14 +229,14 @@ class Filter(object):
# (for example). "srcpath" and "destpath" arguments must exist,
# though.
group.add_argument("srcpath", action="store",
help="Path of source stream, e.g. /foo/bar")
help="Path of source stream, eg. /foo/bar")
group.add_argument("destpath", action="store",
help="Path of destination stream, e.g. /foo/bar")
help="Path of destination stream, eg. /foo/bar")
self._parser = parser
return parser

def set_args(self, url, dest_url, srcpath, destpath, start, end,
parsed_args = None, quiet = True):
parsed_args=None, quiet=True):
"""Set arguments directly from parameters"""
if dest_url is None:
dest_url = url
@@ -247,7 +247,8 @@ class Filter(object):
self._client_dest = Client(dest_url)

if (not self._interhost) and (srcpath == destpath):
raise ArgumentError("source and destination path must be different")
raise ArgumentError(
"source and destination path must be different")

# Open the streams
self.src = get_stream_info(self._client_src, srcpath)
@@ -267,7 +268,7 @@ class Filter(object):
print("Source:", self.src.string(self._interhost))
print(" Dest:", self.dest.string(self._interhost))

def parse_args(self, argv = None):
def parse_args(self, argv=None):
"""Parse arguments from a command line"""
args = self._parser.parse_args(argv)

@@ -287,25 +288,25 @@ class Filter(object):

if self._interhost:
# Do the difference ourselves
s_intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start = self.start, end = self.end) )
d_intervals = ( Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start = self.start, end = self.end) )
s_intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path,
start=self.start, end=self.end))
d_intervals = (Interval(start, end)
for (start, end) in
self._client_dest.stream_intervals(
self.dest.path,
start=self.start, end=self.end))
intervals = nilmdb.utils.interval.set_difference(s_intervals,
d_intervals)
else:
# Let the server do the difference for us
intervals = ( Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath = self.dest.path,
start = self.start, end = self.end) )
intervals = (Interval(start, end)
for (start, end) in
self._client_src.stream_intervals(
self.src.path, diffpath=self.dest.path,
start=self.start, end=self.end))
# Optimize intervals: join intervals that are adjacent
for interval in nilmdb.utils.interval.optimize(intervals):
yield interval
@@ -333,7 +334,7 @@ class Filter(object):
wanted = str(wanted)
val = metadata.get(key, wanted)
if val != wanted and self.dest.rows > 0:
m = "Metadata in destination stream:\n"
m = "Metadata in destination stream:\n"
m += " %s = %s\n" % (key, val)
m += "doesn't match desired data:\n"
m += " %s = %s\n" % (key, wanted)
@@ -346,8 +347,8 @@ class Filter(object):
self._client_dest.stream_update_metadata(self.dest.path, data)

# The main filter processing method.
def process_numpy(self, function, args = None, rows = 100000,
intervals = None):
def process_numpy(self, function, args=None, rows=100000,
intervals=None):
"""Calls process_numpy_interval for each interval that currently
exists in self.src, but doesn't exist in self.dest. It will
process the data in chunks as follows:
@@ -368,8 +369,8 @@ class Filter(object):
inserter = NumpyClient(self.dest.url).stream_insert_numpy_context

extractor_func = functools.partial(extractor, self.src.path,
layout = self.src.layout,
maxrows = rows)
layout=self.src.layout,
maxrows=rows)
inserter_func = functools.partial(inserter, self.dest.path)

for interval in (intervals or self.intervals()):
@@ -377,14 +378,16 @@ class Filter(object):
process_numpy_interval(interval, extractor_func, inserter_func,
rows * 3, function, args)

def main(argv = None):

def main(argv=None):
# This is just a dummy function; actual filters can use the other
# functions to prepare stuff, and then do something with the data.
f = Filter()
parser = f.setup_parser()
args = f.parse_args(argv)
parser = f.setup_parser() # noqa: F841
args = f.parse_args(argv) # noqa: F841
for i in f.intervals():
print("Generic filter: need to handle", i.human_string())


if __name__ == "__main__":
main()

+ 10
- 7
nilmtools/insert.py View File

@@ -1,29 +1,29 @@
#!/usr/bin/env python3

import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
from nilmdb.utils.time import (parse_time, timestamp_to_human,
timestamp_to_seconds, seconds_to_timestamp,
rate_to_period, now as time_now)

import os
import nilmtools
import time
import sys
import re
import argparse
import subprocess
import textwrap


class ParseError(Exception):
def __init__(self, filename, error):
msg = filename + ": " + error
super(ParseError, self).__init__(msg)

def parse_args(argv = None):

def parse_args(argv=None):
parser = argparse.ArgumentParser(
formatter_class = argparse.RawDescriptionHelpFormatter,
description = textwrap.dedent("""\
formatter_class=argparse.RawDescriptionHelpFormatter,
description=textwrap.dedent("""\
Insert large amount of data from an external source like ethstream.

This code tracks two timestamps:
@@ -129,7 +129,8 @@ def parse_args(argv = None):

return args

def main(argv = None):

def main(argv=None):
args = parse_args(argv)

client = nilmdb.client.Client(args.url)
@@ -139,6 +140,7 @@ def main(argv = None):
data_ts_inc = 0
data_ts_rate = args.rate
data_ts_delta = 0

def get_data_ts():
if args.delta:
return data_ts_base + data_ts_delta
@@ -271,5 +273,6 @@ def main(argv = None):
stream.insert(b"%d %s" % (data_ts, line))
print("Done")


if __name__ == "__main__":
main()

+ 19
- 12
nilmtools/math.py View File

@@ -1,10 +1,11 @@
#!/usr/bin/env python3

# Miscellaenous useful mathematical functions
from nilmdb.utils.printf import *
from numpy import *
import scipy


def numpy_raise_errors(func):
def wrap(*args, **kwargs):
old = seterr('raise')
@@ -14,6 +15,7 @@ def numpy_raise_errors(func):
seterr(**old)
return wrap


@numpy_raise_errors
def sfit4(data, fs):
"""(A, f0, phi, C) = sfit4(data, fs)
@@ -39,7 +41,9 @@ def sfit4(data, fs):
raise ValueError("bad data")
t = linspace(0, (N-1) / float(fs), N)

## Estimate frequency using FFT (step b)
#
# Estimate frequency using FFT (step b)
#
Fc = scipy.fft.fft(data)
F = abs(Fc)
F[0] = 0 # eliminate DC
@@ -78,21 +82,24 @@ def sfit4(data, fs):
# Now iterate 7 times (step b, plus 6 iterations of step i)
for idx in range(7):
D = c_[cos(w*t), sin(w*t), ones(N),
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
s = linalg.lstsq(D, data, rcond=None)[0] # eqn B.18
w = w + s[3] # update frequency estimate

## Extract results
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
-s[0] * t * sin(w*t) + s[1] * t * cos(w*t)] # eqn B.16
s = linalg.lstsq(D, data, rcond=None)[0] # eqn B.18
w = w + s[3] # update frequency estimate

#
# Extract results
#
A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
f0 = w / (2*pi)
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
C = s[2]
return (A, f0, phi, C)
except Exception as e: # pragma: no cover (not sure if we can hit this?)
except Exception: # pragma: no cover (not sure if we can hit this?)
# something broke down; just return zeros
return (0, 0, 0, 0)

def peak_detect(data, delta = 0.1):

def peak_detect(data, delta=0.1):
"""Simple min/max peak detection algorithm, taken from my code
in the disagg.m from the 10-8-5 paper.

@@ -101,7 +108,7 @@ def peak_detect(data, delta = 0.1):
where n is the row number in 'data', and p is 'data[n]',
and is_max is True if this is a maximum, False if it's a minimum,
"""
peaks = [];
peaks = []
cur_min = (None, inf)
cur_max = (None, -inf)
lookformax = False


+ 12
- 9
nilmtools/median.py View File

@@ -1,14 +1,16 @@
#!/usr/bin/env python3
import nilmtools.filter, scipy.signal
import nilmtools.filter
import scipy.signal

def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Median Filter")
group = parser.add_argument_group("Median filter options")
group.add_argument("-z", "--size", action="store", type=int, default=25,
help = "median filter size (default %(default)s)")
help="median filter size (default %(default)s)")
group.add_argument("-d", "--difference", action="store_true",
help = "store difference rather than filtered values")
help="store difference rather than filtered values")

try:
args = f.parse_args(argv)
@@ -20,12 +22,12 @@ def main(argv = None):
e.dest.path, e.src.layout))
raise SystemExit(1)

meta = f.client_src.stream_get_metadata(f.src.path)
f.check_dest_metadata({ "median_filter_source": f.src.path,
"median_filter_size": args.size,
"median_filter_difference": repr(args.difference) })
f.check_dest_metadata({"median_filter_source": f.src.path,
"median_filter_size": args.size,
"median_filter_difference": repr(args.difference)})

f.process_numpy(median_filter, args=(args.size, args.difference))

f.process_numpy(median_filter, args = (args.size, args.difference))

def median_filter(data, interval, args, insert, final):
(size, diff) = args
@@ -39,5 +41,6 @@ def median_filter(data, interval, args, insert, final):
insert(data)
return rows


if __name__ == "__main__":
main()

+ 32
- 26
nilmtools/pipewatch.py View File

@@ -1,7 +1,7 @@
#!/usr/bin/env python3

import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, fprintf
import nilmdb.utils.lock
import nilmtools

@@ -17,10 +17,11 @@ import signal
import queue
import daemon

def parse_args(argv = None):

def parse_args(argv=None):
parser = argparse.ArgumentParser(
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
description = """\
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description="""\
Pipe data from 'generator' to 'consumer'. This is intended to be
executed frequently from cron, and will exit if another copy is
already running. If 'generator' or 'consumer' returns an error,
@@ -50,12 +51,13 @@ def parse_args(argv = None):

return args


def reader_thread(q, fd):
# Read from a file descriptor, write to queue.
try:
while True:
(r, w, x) = select.select([fd], [], [fd], 0.25)
if x: # pragma: no cover -- never expect this to happen
if x: # pragma: no cover -- never expect this to happen
# Very few things are "exceptional conditions";
# just TCP OOB data, some TTY state changes, etc.
raise Exception
@@ -65,12 +67,13 @@ def reader_thread(q, fd):
# when select restarts.
continue
data = os.read(fd, 65536)
if data == b"": # generator EOF
if data == b"": # generator EOF
raise Exception
q.put(data)
except Exception:
q.put(None)


def watcher_thread(q, procs):
# Put None in the queue if either process dies
while True:
@@ -80,28 +83,29 @@ def watcher_thread(q, procs):
return
time.sleep(0.25)


def pipewatch(args):
# Run the processes, etc
with open(os.devnull, "r") as devnull:
generator = subprocess.Popen(args.generator, shell = True,
bufsize = -1, close_fds = True,
stdin = devnull,
stdout = subprocess.PIPE,
stderr = None,
preexec_fn = os.setpgrp)
consumer = subprocess.Popen(args.consumer, shell = True,
bufsize = -11, close_fds = True,
stdin = subprocess.PIPE,
stdout = None,
stderr = None,
preexec_fn = os.setpgrp)
q = queue.Queue(maxsize = 4)
reader = threading.Thread(target = reader_thread,
args = (q, generator.stdout.fileno()))
generator = subprocess.Popen(args.generator, shell=True,
bufsize=-1, close_fds=True,
stdin=devnull,
stdout=subprocess.PIPE,
stderr=None,
preexec_fn=os.setpgrp)
consumer = subprocess.Popen(args.consumer, shell=True,
bufsize=-11, close_fds=True,
stdin=subprocess.PIPE,
stdout=None,
stderr=None,
preexec_fn=os.setpgrp)
q = queue.Queue(maxsize=4)
reader = threading.Thread(target=reader_thread,
args=(q, generator.stdout.fileno()))
reader.start()
watcher = threading.Thread(target = watcher_thread,
args = (q, [generator, consumer]))
watcher = threading.Thread(target=watcher_thread,
args=(q, [generator, consumer]))
watcher.start()
try:
while True:
@@ -154,7 +158,8 @@ def pipewatch(args):
sys.exit(0)
sys.exit(1)

def main(argv = None):

def main(argv=None):
args = parse_args(argv)

lockfile = open(args.lock, "w")
@@ -165,7 +170,7 @@ def main(argv = None):
try:
# Run as a daemon if requested, otherwise run directly.
if args.daemon: # pragma: no cover (hard to do from inside test suite)
with daemon.DaemonContext(files_preserve = [ lockfile ]):
with daemon.DaemonContext(files_preserve=[lockfile]):
pipewatch(args)
else:
pipewatch(args)
@@ -176,5 +181,6 @@ def main(argv = None):
except OSError:
pass


if __name__ == "__main__":
main()

+ 23
- 21
nilmtools/prep.py View File

@@ -3,21 +3,21 @@
# Spectral envelope preprocessor.
# Requires two streams as input: the original raw data, and sinefit data.

from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf
from nilmdb.utils.time import timestamp_to_human
import nilmtools.filter
import nilmdb.client
from numpy import *
from numpy import pi, zeros, r_, e, real, imag
import scipy.fftpack
import scipy.signal
#from matplotlib import pyplot as p
import bisect
from nilmdb.utils.interval import Interval

def main(argv = None):

def main(argv=None):
# Set up argument parser
f = nilmtools.filter.Filter()
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths = True)
parser = f.setup_parser("Spectral Envelope Preprocessor", skip_paths=True)
group = parser.add_argument_group("Prep options")
group.add_argument("-c", "--column", action="store", type=int,
help="Column number (first data column is 1)")
@@ -78,40 +78,41 @@ def main(argv = None):
+ "; expected float32_3")

# Check and set metadata in prep stream
f.check_dest_metadata({ "prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column,
"prep_rotation": repr(rotation),
"prep_nshift": args.nshift })
f.check_dest_metadata({"prep_raw_source": f.src.path,
"prep_sinefit_source": sinefit.path,
"prep_column": args.column,
"prep_rotation": repr(rotation),
"prep_nshift": args.nshift})

# Find the intersection of the usual set of intervals we'd filter,
# and the intervals actually present in sinefit data. This is
# what we will process.
filter_int = f.intervals()
sinefit_int = ( Interval(start, end) for (start, end) in
client_sinefit.stream_intervals(
args.sinepath, start = f.start, end = f.end) )
sinefit_int = (Interval(start, end) for (start, end) in
client_sinefit.stream_intervals(
args.sinepath, start=f.start, end=f.end))
intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)

# Run the process (using the helper in the filter module)
f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
args.nharm, rotation, args.nshift),
intervals = intervals)
f.process_numpy(process, args=(client_sinefit, sinefit.path, args.column,
args.nharm, rotation, args.nshift),
intervals=intervals)


def process(data, interval, args, insert_function, final):
(client, sinefit_path, column, nharm, rotation, nshift) = args
rows = data.shape[0]
data_timestamps = data[:,0]
data_timestamps = data[:, 0]

if rows < 2:
return 0

last_inserted = [nilmdb.utils.time.min_timestamp]

def insert_if_nonoverlapping(data):
"""Call insert_function to insert data, but only if this
data doesn't overlap with other data that we inserted."""
if data[0][0] <= last_inserted[0]: # pragma: no cover
if data[0][0] <= last_inserted[0]: # pragma: no cover
# Getting coverage here is hard -- not sure exactly when
# it gets triggered or why this was added; probably some
# unlikely edge condition with timestamp rounding or something.
@@ -152,8 +153,8 @@ def process(data, interval, args, insert_function, final):
out[0, 0] = round(t_min)
for k in range(nharm):
Fk = F[2 * k + 1] * e**(rot * 1j * (k+1))
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk
out[0, 2 * k + 1] = -imag(Fk) # Pk
out[0, 2 * k + 2] = real(Fk) # Qk

insert_if_nonoverlapping(out)
return idx_max
@@ -161,7 +162,7 @@ def process(data, interval, args, insert_function, final):
# Extract sinefit data to get zero crossing timestamps.
# t_min = beginning of period
# t_max = end of period
(t_min, f0, A, C) = [ float(x) for x in sinefit_line.split() ]
(t_min, f0, A, C) = [float(x) for x in sinefit_line.split()]
t_max = t_min + 1e6 / f0

# Compute prep over shifted windows of the period
@@ -191,5 +192,6 @@ def process(data, interval, args, insert_function, final):
timestamp_to_human(data[0][0]), processed, rows)
return processed


if __name__ == "__main__":
main()

+ 26
- 24
nilmtools/sinefit.py View File

@@ -1,20 +1,19 @@
#!/usr/bin/env python3

# Sine wave fitting.
from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
import nilmtools.filter
import nilmtools.math
import nilmdb.client
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds,
seconds_to_timestamp)

from numpy import *
from scipy import *
#import pylab as p
import numpy
import sys
# import pylab as p

def main(argv = None):

def main(argv=None):
f = nilmtools.filter.Filter()
parser = f.setup_parser("Sine wave fitting")
group = parser.add_argument_group("Sine fit options")
@@ -53,19 +52,20 @@ def main(argv = None):
if args.max_freq is None:
args.max_freq = args.frequency * 2
if (args.min_freq > args.max_freq or
args.min_freq > args.frequency or
args.max_freq < args.frequency):
args.min_freq > args.frequency or
args.max_freq < args.frequency):
parser.error("invalid min or max frequency")
if args.min_amp < 0:
parser.error("min amplitude must be >= 0")

f.check_dest_metadata({ "sinefit_source": f.src.path,
"sinefit_column": args.column })
f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
args.min_freq, args.max_freq))
f.check_dest_metadata({"sinefit_source": f.src.path,
"sinefit_column": args.column})
f.process_numpy(process, args=(args.column, args.frequency, args.min_amp,
args.min_freq, args.max_freq))


class SuppressibleWarning(object):
def __init__(self, maxcount = 10, maxsuppress = 100):
def __init__(self, maxcount=10, maxsuppress=100):
self.maxcount = maxcount
self.maxsuppress = maxsuppress
self.count = 0
@@ -78,19 +78,20 @@ class SuppressibleWarning(object):
now = ""
sys.stderr.write(now + msg)

def warn(self, msg, seconds = None):
def warn(self, msg, seconds=None):
self.count += 1
if self.count <= self.maxcount:
self._write(seconds, msg)
if (self.count - self.maxcount) >= self.maxsuppress:
self.reset()

def reset(self, seconds = None):
def reset(self, seconds=None):
if self.count > self.maxcount:
self._write(seconds, sprintf("(%d warnings suppressed)\n",
self.count - self.maxcount))
self.count = 0


def process(data, interval, args, insert_function, final):
(column, f_expected, a_min, f_min, f_max) = args
rows = data.shape[0]
@@ -119,7 +120,7 @@ def process(data, interval, args, insert_function, final):
while start < (rows - N):
this = data[start:start+N, column]
t_min = timestamp_to_seconds(data[start, 0])
t_max = timestamp_to_seconds(data[start+N-1, 0])
# t_max = timestamp_to_seconds(data[start+N-1, 0])

# Do 4-parameter sine wave fit
(A, f0, phi, C) = nilmtools.math.sfit4(this, fs)
@@ -138,13 +139,13 @@ def process(data, interval, args, insert_function, final):
start += N
continue

#p.plot(arange(N), this)
#p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
# p.plot(arange(N), this)
# p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')

# Period starts when the argument of sine is 0 degrees,
# so we're looking for sample number:
# n = (0 - phi) / (f0/fs * 2 * pi)
zc_n = (0 - phi) / (f0 / fs * 2 * pi)
zc_n = (0 - phi) / (f0 / fs * 2 * numpy.pi)
period_n = fs/f0

# Add periods to make N positive
@@ -155,14 +156,14 @@ def process(data, interval, args, insert_function, final):
# Mark the zero crossings until we're a half period away
# from the end of the window
while zc_n < (N - period_n/2):
#p.plot(zc_n, C, 'ro')
# p.plot(zc_n, C, 'ro')
t = t_min + zc_n / fs
if (last_inserted_timestamp is None or
t > last_inserted_timestamp):
t > last_inserted_timestamp):
insert_function([[seconds_to_timestamp(t), f0, A, C]])
last_inserted_timestamp = t
warn.reset(t)
else: # pragma: no cover -- this is hard to trigger,
else: # pragma: no cover -- this is hard to trigger,
# if it's even possible at all; I think it would require
# some jitter in how the waves fit, across a window boundary.
warn.warn("timestamp overlap\n", t)
@@ -177,8 +178,8 @@ def process(data, interval, args, insert_function, final):
advance = min(last_zc + period_n/4, N)
else:
advance = N/2
#p.plot(advance, C, 'go')
#p.show()
# p.plot(advance, C, 'go')
# p.show()

start = int(round(start + advance))

@@ -192,5 +193,6 @@ def process(data, interval, args, insert_function, final):
printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
return start


if __name__ == "__main__":
main()

+ 30
- 24
nilmtools/trainola.py View File

@@ -1,12 +1,10 @@
#!/usr/bin/env python3

from nilmdb.utils.printf import *
from nilmdb.utils.printf import printf, sprintf
import nilmdb.client
import nilmtools.filter
import nilmtools.math
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds,
seconds_to_timestamp)
from nilmdb.utils.time import timestamp_to_seconds
import datetime_tz
from nilmdb.utils.interval import Interval

@@ -14,23 +12,24 @@ import numpy as np
import scipy
import scipy.signal
from numpy.core.umath_tests import inner1d
import nilmrun
from collections import OrderedDict
import sys
import time
import functools
import collections


class DataError(ValueError):
pass


def build_column_mapping(colinfo, streaminfo):
"""Given the 'columns' list from the JSON data, verify and
pull out a dictionary mapping for the column names/numbers."""
columns = OrderedDict()
for c in colinfo:
col_num = c['index'] + 1 # skip timestamp
if (c['name'] in list(columns.keys()) or col_num in list(columns.values())):
if (c['name'] in list(columns.keys()) or
col_num in list(columns.values())):
raise DataError("duplicated columns")
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
raise DataError("bad column number")
@@ -39,8 +38,9 @@ def build_column_mapping(colinfo, streaminfo):
raise DataError("no columns")
return columns


class Exemplar(object):
def __init__(self, exinfo, min_rows = 10, max_rows = 100000):
def __init__(self, exinfo, min_rows=10, max_rows=100000):
"""Given a dictionary entry from the 'exemplars' input JSON,
verify the stream, columns, etc. Then, fetch all the data
into self.data."""
@@ -63,7 +63,8 @@ class Exemplar(object):
self.columns = build_column_mapping(exinfo['columns'], self.info)

# Count points
self.count = self.client.stream_count(self.stream, self.start, self.end)
self.count = self.client.stream_count(self.stream,
self.start, self.end)

# Verify count
if self.count == 0:
@@ -77,13 +78,13 @@ class Exemplar(object):
datagen = self.client.stream_extract_numpy(self.stream,
self.start, self.end,
self.info.layout,
maxrows = self.count)
maxrows=self.count)
self.data = list(datagen)[0]

# Extract just the columns that were specified in self.columns,
# skipping the timestamp.
extract_columns = [ value for (key, value) in list(self.columns.items()) ]
self.data = self.data[:,extract_columns]
extract_cols = [value for (key, value) in list(self.columns.items())]
self.data = self.data[:, extract_cols]

# Fix the column indices in e.columns, since we removed/reordered
# columns in self.data
@@ -102,20 +103,23 @@ class Exemplar(object):

def __str__(self):
return sprintf("\"%s\" %s [%s] %s rows",
self.name, self.stream, ",".join(list(self.columns.keys())),
self.name, self.stream,
",".join(list(self.columns.keys())),
self.count)


def timestamp_to_short_human(timestamp):
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
return dt.strftime("%H:%M:%S")


def trainola_matcher(data, interval, args, insert_func, final_chunk):
"""Perform cross-correlation match"""
( src_columns, dest_count, exemplars ) = args
(src_columns, dest_count, exemplars) = args
nrows = data.shape[0]

# We want at least 10% more points than the widest exemplar.
widest = max([ x.count for x in exemplars ])
widest = max([x.count for x in exemplars])
if (widest * 1.1) > nrows:
return 0

@@ -201,9 +205,10 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
# Return how many rows we processed
valid = max(valid, 0)
printf(" [%s] matched %d exemplars in %d rows\n",
timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
timestamp_to_short_human(data[0][0]), np.sum(out[:, 1:]), valid)
return valid


def trainola(conf):
print("Trainola", nilmtools.__version__)

@@ -256,16 +261,16 @@ def trainola(conf):
"available in source data", n, col))

# Figure out which intervals we should process
intervals = ( Interval(s, e) for (s, e) in
src_client.stream_intervals(src_path,
diffpath = dest_path,
start = start, end = end) )
intervals = (Interval(s, e) for (s, e) in
src_client.stream_intervals(src_path,
diffpath=dest_path,
start=start, end=end))
intervals = nilmdb.utils.interval.optimize(intervals)

# Do the processing
rows = 100000
extractor = functools.partial(src_client.stream_extract_numpy,
src.path, layout = src.layout, maxrows = rows)
src.path, layout=src.layout, maxrows=rows)
inserter = functools.partial(dest_client.stream_insert_numpy_context,
dest.path)
start = time.time()
@@ -283,7 +288,8 @@ def trainola(conf):
printf("Done. Processed %.2f seconds per second.\n",
processed_time / elapsed)

def main(argv = None):

def main(argv=None):
import json
import sys

@@ -308,12 +314,12 @@ def main(argv = None):
try:
# Passed in a JSON string (e.g. on the command line)
conf = json.loads(argv[0])
except TypeError as e:
except TypeError:
# Passed in the config dictionary (e.g. from NilmRun)
conf = argv[0]

return trainola(conf)


if __name__ == "__main__":
main()


+ 1
- 0
setup.cfg View File

@@ -32,6 +32,7 @@ parentdir_prefix=nilmtools-
[flake8]
exclude=_version.py
extend-ignore=E731
per-file-ignores=math.py:F403,F405

[pylint]
ignore=_version.py


Loading…
Cancel
Save