Split off useful math functions to math.py

trainola: suppress peaks if larger ones are nearby
Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.
2013-08-02 17:27:39 -04:00 · 2013-07-31 19:12:16 -04:00 · 2013-07-31 14:47:16 -04:00 · 2013-07-30 20:35:54 -04:00 · 2013-07-30 18:25:47 -04:00 · 2013-07-30 14:55:06 -04:00
17 changed files with 518 additions and 125 deletions
--- a/21
+++ b/21
@@ -8,19 +8,33 @@ else
 	@echo "Try 'make install'"
 endif

-test: test_trainola
+test: test_trainola3
+
+test_pipewatch:
+	nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"

 test_trainola:
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"

+test_trainola2:
+	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
+		/sharon/prep-a-matches
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
+
+test_trainola3:
+	-nilmtool -u "http://bucket/nilmdb" destroy -R /test/jim
+	nilmtool -u "http://bucket/nilmdb" create /test/jim uint8_3
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-3.js)"
+	nilmtool -u "http://bucket/nilmdb" extract /test/jim -s min -e max
+
 test_cleanup:
 	nilmtools/cleanup.py -e extras/cleanup.cfg
 	nilmtools/cleanup.py extras/cleanup.cfg

 test_insert:
-	nilmtools/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --skip --file --dry-run /foo/bar ~/data/20130311T2100.prep1.gz ~/data/20130311T2100.prep1.gz ~/data/20130311T2200.prep1.gz

 test_copy:
 	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
@@ -39,7 +53,8 @@ test_prep: /tmp/raw.dat
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
+	nilmtools/sinefit.py -a 0.5 -c 1 -s '@0' -e '@5000000' /test/raw /test/sinefit
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20

--- a/README.txt
+++ b/README.txt
@@ -5,10 +5,10 @@ by Jim Paris <jim@jtan.com>
 Prerequisites:

  # Runtime and build environments
-  sudo apt-get install python2.7 python2.7-dev python-setuptools python-pip
-  sudo apt-get install python-numpy python-scipy
+  sudo apt-get install python2.7 python2.7-dev python-setuptools
+  sudo apt-get install python-numpy python-scipy python-daemon

-  nilmdb (1.8.1+)
+  nilmdb (1.8.5+)

 Install:

--- a/extras/sample-cron-scripts/capture.sh
+++ b/extras/sample-cron-scripts/capture.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Start the ethstream capture using nilm-pipewatch
+
+# Bail out on errors
+set -e
+
+nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
+    "ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
+    "nilm-insert -m 10 -r 8000 --live /sharon/raw"
--- a/extras/sample-cron-scripts/cleanup.cfg
+++ b/extras/sample-cron-scripts/cleanup.cfg
@@ -0,0 +1,8 @@
+[/sharon/prep-*]
+keep = 1y
+
+[/sharon/raw]
+keep = 2w
+
+[/sharon/sinefit]
+keep = 1y
--- a/extras/sample-cron-scripts/crontab
+++ b/extras/sample-cron-scripts/crontab
@@ -0,0 +1,9 @@
+# Install this by running "crontab crontab" (will replace existing crontab)
+
+# m h dom mon dow cmd
+
+# Run NilmDB processing every 5 minutes
+*/5 * * * * chronic /home/nilm/data/process.sh
+
+# Check the capture process every minute
+*/1 * * * * chronic /home/nilm/data/capture.sh
--- a/extras/sample-cron-scripts/process.sh
+++ b/extras/sample-cron-scripts/process.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Run all necessary processing on NilmDB data.
+
+# Bail out on errors
+set -e
+
+# Ensure only one copy of this code runs at a time:
+LOCKFILE="/tmp/nilmdb-process.lock"
+exec 99>"$LOCKFILE"
+if ! flock -n -x 99 ; then
+    echo "NilmDB processing already running, giving up..."
+    exit 0
+fi
+trap 'rm -f "$LOCKFILE"' 0
+
+# sinefit on phase A voltage
+nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
+
+# prep on A, B, C with appropriate rotations
+nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
+nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
+nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
+
+# decimate raw and prep data
+nilm-decimate-auto /sharon/raw /sharon/prep*
+
+# run cleanup
+nilm-cleanup --yes /home/nilm/data/cleanup.cfg
--- a/extras/trainola-test-param-2.js
+++ b/extras/trainola-test-param-2.js
@@ -0,0 +1,29 @@
+{ "columns" : [ { "index" : 0, "name" : "P1" },
+                { "index" : 1, "name" : "Q1" },
+                { "index" : 2, "name" : "P3" } ],
+  "stream" : "/sharon/prep-a",
+  "url" : "http://bucket.mit.edu/nilmdb",
+  "dest_stream" : "/sharon/prep-a-matches",
+  "start" : 1365153062643133.5,
+  "end"   : 1365168814443575.5,
+  "exemplars" : [ { "columns" : [ { "index" : 0,
+                                    "name" : "P1"
+                                  } ],
+                    "dest_column" : 0,
+                    "end" : 1365073657682000,
+                    "name" : "Turn ON",
+                    "start" : 1365073654321000,
+                    "stream" : "/sharon/prep-a",
+                    "url" : "http://bucket.mit.edu/nilmdb"
+                  },
+                  { "columns" : [ { "index" : 2, "name" : "P3" },
+                                  { "index" : 0, "name" : "P1" } ],
+                    "dest_column" : 1,
+                    "end" : 1365176528818000,
+                    "name" : "Type 2 turn ON",
+                    "start" : 1365176520030000,
+                    "stream" : "/sharon/prep-a",
+                    "url" : "http://bucket.mit.edu/nilmdb"
+                  }
+                ]
+}
--- a/extras/trainola-test-param-3.js
+++ b/extras/trainola-test-param-3.js
@@ -0,0 +1,40 @@
+{
+    "url": "http://bucket/nilmdb",
+    "stream": "/sharon/prep-a",
+    "dest_stream": "/test/jim",
+    "start": 1364184839901599,
+    "end": 1364184942407610.2,
+
+    "columns": [ { "index": 0, "name": "P1" } ],
+
+    "exemplars": [
+        {
+            "name": "A - True DBL Freezer ON",
+            "dest_column": 0,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365277707649000,
+            "end": 1365277710705000
+        },
+        {
+            "name": "A - Boiler 1 Fan OFF",
+            "dest_column": 1,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1364188370735000,
+            "end": 1364188373819000
+        },
+        {
+            "name": "A - True DBL Freezer OFF",
+            "dest_column": 2,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365278087982000,
+            "end": 1365278089340000
+        }
+   ]
+}
+
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
@@ -32,7 +32,7 @@ def main(argv = None):
    extractor = NumpyClient(f.src.url).stream_extract_numpy
    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
-        print "Processing", f.interval_string(i)
+        print "Processing", i.human_string()
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
            for data in extractor(f.src.path, i.start, i.end):
                insert_ctx.insert(data)
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -316,7 +316,8 @@ class Filter(object):
        self._client_dest.stream_update_metadata(self.dest.path, data)

    # The main filter processing method.
-    def process_numpy(self, function, args = None, rows = 100000):
+    def process_numpy(self, function, args = None, rows = 100000,
+                      intervals = None):
        """Calls process_numpy_interval for each interval that currently
        exists in self.src, but doesn't exist in self.dest.  It will
        process the data in chunks as follows:
@@ -325,6 +326,9 @@ class Filter(object):
        corresponding to the data.  The data is converted to a Numpy
        array in chunks of 'rows' rows at a time.

+        If 'intervals' is not None, process those intervals instead of
+        the default list.
+
        'function' should be defined as:
        # def function(data, interval, args, insert_func, final)

@@ -358,7 +362,7 @@ class Filter(object):
                                           maxrows = rows)
        inserter_func = functools.partial(inserter, self.dest.path)

-        for interval in self.intervals():
+        for interval in (intervals or self.intervals()):
            print "Processing", interval.human_string()
            process_numpy_interval(interval, extractor_func, inserter_func,
                                   rows * 3, function, args)
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
@@ -53,7 +53,8 @@ def parse_args(argv = None):
      is stepped forward to match 'clock'.

    - If 'data' is running ahead, there is overlap in the data, and an
-      error is raised.
+      error is raised.  If '--ignore' is specified, the current file
+      is skipped instead of raising an error.
    """))
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -61,6 +62,8 @@ def parse_args(argv = None):
    group = parser.add_argument_group("Misc options")
    group.add_argument("-D", "--dry-run", action="store_true",
                       help="Parse files, but don't insert any data")
+    group.add_argument("-s", "--skip", action="store_true",
+                       help="Skip files if the data would overlap")
    group.add_argument("-m", "--max-gap", action="store", default=10.0,
                       metavar="SEC", type=float,
                       help="Max discrepency between clock and data "
@@ -235,6 +238,10 @@ def main(argv = None):
                                      "is %s but clock time is only %s",
                                      timestamp_to_human(data_ts),
                                      timestamp_to_human(clock_ts))
+                        if args.skip:
+                            printf("%s\n", err)
+                            printf("Skipping the remainder of this file\n")
+                            break
                        raise ParseError(filename, err)

                    if (data_ts + max_gap) < clock_ts:
--- a/nilmtools/math.py
+++ b/nilmtools/math.py
@@ -0,0 +1,107 @@
+#!/usr/bin/python
+
+# Miscellaenous useful mathematical functions
+from nilmdb.utils.printf import *
+from numpy import *
+from scipy import *
+
+def sfit4(data, fs):
+    """(A, f0, phi, C) = sfit4(data, fs)
+
+    Compute 4-parameter (unknown-frequency) least-squares fit to
+    sine-wave data, according to IEEE Std 1241-2010 Annex B
+
+    Input:
+      data  vector of input samples
+      fs    sampling rate (Hz)
+
+    Output:
+      Parameters [A, f0,  phi, C] to fit the equation
+        x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
+      where n is sample number.  Or, as a function of time:
+        x(t) = A * sin(f0 * 2 * pi * t + phi) + C
+
+    by Jim Paris
+    (Verified to match sfit4.m)
+    """
+    N = len(data)
+    t = linspace(0, (N-1) / float(fs), N)
+
+    ## Estimate frequency using FFT (step b)
+    Fc = fft(data)
+    F = abs(Fc)
+    F[0] = 0   # eliminate DC
+
+    # Find pair of spectral lines with largest amplitude:
+    # resulting values are in F(i) and F(i+1)
+    i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
+
+    # Interpolate FFT to get a better result (from Markus [B37])
+    U1 = real(Fc[i])
+    U2 = real(Fc[i+1])
+    V1 = imag(Fc[i])
+    V2 = imag(Fc[i+1])
+    n = 2 * pi / N
+    ni1 = n * i
+    ni2 = n * (i+1)
+    K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
+    Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
+    Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
+    i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
+
+    # Convert to Hz
+    f0 = i * float(fs) / N
+
+    # Fit it.  We'll catch exceptions here and just returns zeros
+    # if something fails with the least squares fit, etc.
+    try:
+        # first guess for A0, B0 using 3-parameter fit (step c)
+        s = zeros(3)
+        w = 2*pi*f0
+
+        # Now iterate 7 times (step b, plus 6 iterations of step i)
+        for idx in range(7):
+            D = c_[cos(w*t), sin(w*t), ones(N),
+                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
+            s = linalg.lstsq(D, data)[0] # eqn B.18
+            w = w + s[3]	# update frequency estimate
+
+        ## Extract results
+        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
+        f0 = w / (2*pi)
+        phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
+        C = s[2]
+        return (A, f0, phi, C)
+    except Exception as e:
+        # something broke down; just return zeros
+        return (0, 0, 0, 0)
+
+def peak_detect(data, delta = 0.1):
+    """Simple min/max peak detection algorithm, taken from my code
+    in the disagg.m from the 10-8-5 paper.
+
+    Returns an array of peaks: each peak is a tuple
+      (n, p, is_max)
+    where n is the row number in 'data', and p is 'data[n]',
+    and is_max is True if this is a maximum, False if it's a minimum,
+    """
+    peaks = [];
+    cur_min = (None, inf)
+    cur_max = (None, -inf)
+    lookformax = False
+    for (n, p) in enumerate(data):
+        if p > cur_max[1]:
+            cur_max = (n, p)
+        if p < cur_min[1]:
+            cur_min = (n, p)
+        if lookformax:
+            if p < (cur_max[1] - delta):
+                peaks.append((cur_max[0], cur_max[1], True))
+                cur_min = (n, p)
+                lookformax = False
+        else:
+            if p > (cur_min[1] + delta):
+                peaks.append((cur_min[0], cur_min[1], False))
+                cur_max = (n, p)
+                lookformax = True
+    return peaks
--- a/nilmtools/pipewatch.py
+++ b/nilmtools/pipewatch.py
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+
+import nilmdb.client
+from nilmdb.utils.printf import *
+import nilmdb.utils.lock
+import nilmtools
+
+import time
+import sys
+import os
+import argparse
+import subprocess
+import tempfile
+import threading
+import select
+import signal
+import Queue
+import daemon
+
+def parse_args(argv = None):
+    parser = argparse.ArgumentParser(
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+        version = nilmtools.__version__,
+        description = """\
+    Pipe data from 'generator' to 'consumer'.  This is intended to be
+    executed frequently from cron, and will exit if another copy is
+    already running.  If 'generator' or 'consumer' returns an error,
+    or if 'generator' stops sending data for a while, it will exit.
+
+    Intended for use with ethstream (generator) and nilm-insert
+    (consumer).  Commands are executed through the shell.
+    """)
+    parser.add_argument("-d", "--daemon", action="store_true",
+                        help="Run in background")
+    parser.add_argument("-l", "--lock", metavar="FILENAME", action="store",
+                        default=tempfile.gettempdir() +
+                        "/nilm-pipewatch.lock",
+                        help="Lock file for detecting running instance")
+    parser.add_argument("-t", "--timeout", metavar="SECONDS", action="store",
+                        type=float, default=30,
+                        help="Restart if no output from " +
+                        "generator for this long")
+    group = parser.add_argument_group("commands to execute")
+    group.add_argument("generator", action="store",
+                       help="Data generator (e.g. \"ethstream -r 8000\")")
+    group.add_argument("consumer", action="store",
+                       help="Data consumer (e.g. \"nilm-insert /foo/bar\")")
+    args = parser.parse_args(argv)
+
+    return args
+
+def reader_thread(queue, fd):
+    # Read from a file descriptor, write to queue.
+    try:
+        while True:
+            (r, w, x) = select.select([fd], [], [fd], 0.25)
+            if x:
+                raise Exception # generator died?
+            if not r:
+                # short timeout -- just try again.  This is to catch the
+                # fd being closed elsewhere, which is only detected
+                # when select restarts.
+                continue
+            data = os.read(fd, 65536)
+            if data == "": # generator EOF
+                raise Exception
+            queue.put(data)
+    except Exception:
+        queue.put(None)
+
+def watcher_thread(queue, procs):
+    # Put None in the queue if either process dies
+    while True:
+        for p in procs:
+            if p.poll() is not None:
+                queue.put(None)
+                return
+        time.sleep(0.25)
+
+def pipewatch(args):
+    # Run the processes, etc
+    with open(os.devnull, "r") as devnull:
+        generator = subprocess.Popen(args.generator, shell = True,
+                                     bufsize = -1, close_fds = True,
+                                     stdin = devnull,
+                                     stdout = subprocess.PIPE,
+                                     stderr = None)
+        consumer = subprocess.Popen(args.consumer, shell = True,
+                                    bufsize = -11, close_fds = True,
+                                    stdin = subprocess.PIPE,
+                                    stdout = None, stderr = None)
+
+        queue = Queue.Queue(maxsize = 32)
+        reader = threading.Thread(target = reader_thread,
+                                  args = (queue, generator.stdout.fileno()))
+        reader.start()
+        watcher = threading.Thread(target = watcher_thread,
+                                   args = (queue, [generator, consumer]))
+        watcher.start()
+        try:
+            while True:
+                try:
+                    data = queue.get(True, args.timeout)
+                    if data is None:
+                        break
+                    consumer.stdin.write(data)
+                except Queue.Empty:
+                    # Timeout: kill the generator
+                    fprintf(sys.stderr, "pipewatch: timeout\n")
+                    generator.terminate()
+                    break
+
+            generator.stdout.close()
+            consumer.stdin.close()
+        except IOError:
+            fprintf(sys.stderr, "pipewatch: I/O error\n")
+
+        def kill(proc):
+            # Wait for a process to end, or kill it
+            def poll_timeout(proc, timeout):
+                for x in range(1+int(timeout / 0.1)):
+                    if proc.poll() is not None:
+                        break
+                    time.sleep(0.1)
+                return proc.poll()
+            try:
+                if poll_timeout(proc, 0.5) is None:
+                    proc.terminate()
+                    if poll_timeout(proc, 0.5) is None:
+                        proc.kill()
+            except OSError:
+                pass
+            return poll_timeout(proc, 0.5)
+
+        # Wait for them to die, or kill them
+        gret = kill(generator)
+        cret = kill(consumer)
+
+        fprintf(sys.stderr, "pipewatch: generator returned %d, " +
+                "consumer returned %d\n", gret, cret)
+        if gret == 0 and cret == 0:
+            sys.exit(0)
+        sys.exit(1)
+
+def main(argv = None):
+    args = parse_args(argv)
+
+    lockfile = open(args.lock, "w")
+    if not nilmdb.utils.lock.exclusive_lock(lockfile):
+        printf("pipewatch process already running (according to %s)\n",
+               args.lock)
+        sys.exit(0)
+    try:
+        # Run as a daemon if requested, otherwise run directly.
+        if args.daemon:
+            with daemon.DaemonContext(files_preserve = [ lockfile ]):
+                pipewatch(args)
+        else:
+            pipewatch(args)
+    finally:
+        # Clean up lockfile
+        try:
+            os.unlink(args.lock)
+        except OSError:
+            pass
+
+if __name__ == "__main__":
+    main()
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -12,6 +12,7 @@ import scipy.fftpack
 import scipy.signal
 #from matplotlib import pyplot as p
 import bisect
+from nilmdb.utils.interval import Interval

 def main(argv = None):
    # Set up argument parser
@@ -82,9 +83,20 @@ def main(argv = None):
                            "prep_column": args.column,
                            "prep_rotation": repr(rotation) })

-    # Run the processing function on all data
+    # Find the intersection of the usual set of intervals we'd filter,
+    # and the intervals actually present in sinefit data.  This is
+    # what we will process.
+    filter_int = f.intervals()
+    sinefit_int = ( Interval(start, end) for (start, end) in
+                    client_sinefit.stream_intervals(
+                        args.sinepath, start = f.start, end = f.end) )
+    intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
+
+    # Run the process (using the helper in the filter module)
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
-                                     args.nharm, rotation, args.nshift))
+                                     args.nharm, rotation, args.nshift),
+                    intervals = intervals)
+

 def process(data, interval, args, insert_function, final):
    (client, sinefit_path, column, nharm, rotation, nshift) = args
--- a/nilmtools/sinefit.py
+++ b/nilmtools/sinefit.py
@@ -3,6 +3,7 @@
 # Sine wave fitting.
 from nilmdb.utils.printf import *
 import nilmtools.filter
+import nilmtools.math
 import nilmdb.client
 from nilmdb.utils.time import (timestamp_to_human,
                               timestamp_to_seconds,
@@ -11,7 +12,6 @@ from nilmdb.utils.time import (timestamp_to_human,
 from numpy import *
 from scipy import *
 #import pylab as p
-import operator
 import sys

 def main(argv = None):
@@ -119,7 +119,7 @@ def process(data, interval, args, insert_function, final):
        t_max = timestamp_to_seconds(data[start+N-1, 0])

        # Do 4-parameter sine wave fit
-        (A, f0, phi, C) = sfit4(this, fs)
+        (A, f0, phi, C) = nilmtools.math.sfit4(this, fs)

        # Check bounds.  If frequency is too crazy, ignore this window
        if f0 < f_min or f0 > f_max:
@@ -187,76 +187,5 @@ def process(data, interval, args, insert_function, final):
    printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
    return start

-def sfit4(data, fs):
-    """(A, f0, phi, C) = sfit4(data, fs)
-
-    Compute 4-parameter (unknown-frequency) least-squares fit to
-    sine-wave data, according to IEEE Std 1241-2010 Annex B
-
-    Input:
-      data  vector of input samples
-      fs    sampling rate (Hz)
-
-    Output:
-      Parameters [A, f0,  phi, C] to fit the equation
-        x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
-      where n is sample number.  Or, as a function of time:
-        x(t) = A * sin(f0 * 2 * pi * t + phi) + C
-
-    by Jim Paris
-    (Verified to match sfit4.m)
-    """
-    N = len(data)
-    t = linspace(0, (N-1) / float(fs), N)
-
-    ## Estimate frequency using FFT (step b)
-    Fc = fft(data)
-    F = abs(Fc)
-    F[0] = 0   # eliminate DC
-
-    # Find pair of spectral lines with largest amplitude:
-    # resulting values are in F(i) and F(i+1)
-    i = argmax(F[0:int(N/2)] + F[1:int(N/2+1)])
-
-    # Interpolate FFT to get a better result (from Markus [B37])
-    U1 = real(Fc[i])
-    U2 = real(Fc[i+1])
-    V1 = imag(Fc[i])
-    V2 = imag(Fc[i+1])
-    n = 2 * pi / N
-    ni1 = n * i
-    ni2 = n * (i+1)
-    K = ((V2-V1)*sin(ni1) + (U2-U1)*cos(ni1)) / (U2-U1)
-    Z1 = V1 * (K - cos(ni1)) / sin(ni1) + U1
-    Z2 = V2 * (K - cos(ni2)) / sin(ni2) + U2
-    i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n
-
-    # Convert to Hz
-    f0 = i * float(fs) / N
-
-    # Fit it.  We'll catch exceptions here and just returns zeros
-    # if something fails with the least squares fit, etc.
-    try:
-        # first guess for A0, B0 using 3-parameter fit (step c)
-        s = zeros(3)
-        w = 2*pi*f0
-
-        # Now iterate 7 times (step b, plus 6 iterations of step i)
-        for idx in range(7):
-            D = c_[cos(w*t), sin(w*t), ones(N),
-                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
-            s = linalg.lstsq(D, data)[0] # eqn B.18
-            w = w + s[3]	# update frequency estimate
-
-        ## Extract results
-        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
-        f0 = w / (2*pi)
-        phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
-        C = s[2]
-        return (A, f0, phi, C)
-    except Exception as e:
-        # something broke down, just return zeros
-        return (0, 0, 0, 0)
-
 if __name__ == "__main__":
    main()
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -3,9 +3,11 @@
 from nilmdb.utils.printf import *
 import nilmdb.client
 import nilmtools.filter
+import nilmtools.math
 from nilmdb.utils.time import (timestamp_to_human,
                               timestamp_to_seconds,
                               seconds_to_timestamp)
+from nilmdb.utils import datetime_tz
 from nilmdb.utils.interval import Interval

 import numpy as np
@@ -15,6 +17,7 @@ from numpy.core.umath_tests import inner1d
 import nilmrun
 from collections import OrderedDict
 import sys
+import time
 import functools
 import collections

@@ -26,12 +29,12 @@ def build_column_mapping(colinfo, streaminfo):
    pull out a dictionary mapping for the column names/numbers."""
    columns = OrderedDict()
    for c in colinfo:
-        if (c['name'] in columns.keys() or
-            c['index'] in columns.values()):
+        col_num = c['index'] + 1  # skip timestamp
+        if (c['name'] in columns.keys() or col_num in columns.values()):
            raise DataError("duplicated columns")
        if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
            raise DataError("bad column number")
-        columns[c['name']] = c['index']
+        columns[c['name']] = col_num
    if not len(columns):
        raise DataError("no columns")
    return columns
@@ -52,6 +55,9 @@ class Exemplar(object):
        # Get stream info
        self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
        self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
+        if not self.info:
+            raise DataError(sprintf("exemplar stream '%s' does not exist " +
+                                    "on server '%s'", self.stream, self.url))

        # Build up name => index mapping for the columns
        self.columns = build_column_mapping(exinfo['columns'], self.info)
@@ -74,10 +80,17 @@ class Exemplar(object):
                                                   maxrows = self.count)
        self.data = list(datagen)[0]

-        # Discard timestamp
-        self.data = self.data[:,1:]
+        # Extract just the columns that were specified in self.columns,
+        # skipping the timestamp.
+        extract_columns = [ value for (key, value) in self.columns.items() ]
+        self.data = self.data[:,extract_columns]

-        # Subtract the mean from each column
+        # Fix the column indices in e.columns, since we removed/reordered
+        # columns in self.data
+        for n, k in enumerate(self.columns):
+            self.columns[k] = n
+
+        # Subtract the means from each column
        self.data = self.data - self.data.mean(axis=0)

        # Get scale factors for each column by computing dot product
@@ -92,30 +105,9 @@ class Exemplar(object):
                       self.name, self.stream, ",".join(self.columns.keys()),
                       self.count)

-def peak_detect(data, delta):
-    """Simple min/max peak detection algorithm, taken from my code
-    in the disagg.m from the 10-8-5 paper"""
-    mins = [];
-    maxs = [];
-    cur_min = (None, np.inf)
-    cur_max = (None, -np.inf)
-    lookformax = False
-    for (n, p) in enumerate(data):
-        if p > cur_max[1]:
-            cur_max = (n, p)
-        if p < cur_min[1]:
-            cur_min = (n, p)
-        if lookformax:
-            if p < (cur_max[1] - delta):
-                maxs.append(cur_max)
-                cur_min = (n, p)
-                lookformax = False
-        else:
-            if p > (cur_min[1] + delta):
-                mins.append(cur_min)
-                cur_max = (n, p)
-                lookformax = True
-    return (mins, maxs)
+def timestamp_to_short_human(timestamp):
+    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
+    return dt.strftime("%H:%M:%S")

 def trainola_matcher(data, interval, args, insert_func, final_chunk):
    """Perform cross-correlation match"""
@@ -138,7 +130,7 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):

        # Compute cross-correlation for each column
        for col_name in e.columns:
-            a = data[:, src_columns[col_name] + 1]
+            a = data[:, src_columns[col_name]]
            b = e.data[:, e.columns[col_name]]
            corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]

@@ -148,11 +140,35 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):

        # Find the peaks using the column with the largest amplitude
        biggest = e.scale.index(max(e.scale))
-        peaks_minmax = peak_detect(corrs[biggest], 0.1)
-        peaks = [ p[0] for p in peaks_minmax[1] ]
+        peaks = nilmtools.math.peak_detect(corrs[biggest], 0.1)

-        # Now look at every peak
-        for row in peaks:
+        # To try to reduce false positives, discard peaks where
+        # there's a higher-magnitude peak (either min or max) within
+        # one exemplar width nearby.
+        good_peak_locations = []
+        for (i, (n, p, is_max)) in enumerate(peaks):
+            if not is_max:
+                continue
+            ok = True
+            # check up to 'e.count' rows before this one
+            j = i-1
+            while ok and j >= 0 and peaks[j][0] > (n - e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j -= 1
+
+            # check up to 'e.count' rows after this one
+            j = i+1
+            while ok and j < len(peaks) and peaks[j][0] < (n + e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j += 1
+
+            if ok:
+                good_peak_locations.append(n)
+
+        # Now look at all good peaks
+        for row in good_peak_locations:
            # Correlation for each column must be close enough to 1.
            for (corr, scale) in zip(corrs, e.scale):
                # The accepted distance from 1 is based on the relative
@@ -183,7 +199,10 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
    insert_func(out)

    # Return how many rows we processed
-    return max(valid, 0)
+    valid = max(valid, 0)
+    printf("  [%s] matched %d exemplars in %d rows\n",
+           timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
+    return valid

 def trainola(conf):
    print "Trainola", nilmtools.__version__
@@ -247,14 +266,20 @@ def trainola(conf):
                                  src.path, layout = src.layout, maxrows = rows)
    inserter = functools.partial(dest_client.stream_insert_numpy_context,
                                 dest.path)
+    start = time.time()
+    processed_time = 0
+    printf("Processing intervals:\n")
    for interval in intervals:
-        printf("Processing interval:\n")
-        printf("  %s\n", interval.human_string())
+        printf("%s\n", interval.human_string())
        nilmtools.filter.process_numpy_interval(
            interval, extractor, inserter, rows * 3,
            trainola_matcher, (src_columns, dest.layout_count, exemplars))
+        processed_time += (timestamp_to_seconds(interval.end) -
+                           timestamp_to_seconds(interval.start))
+    elapsed = max(time.time() - start, 1e-3)

-    return "done"
+    printf("Done. Processed %.2f seconds per second.\n",
+           processed_time / elapsed)

 def main(argv = None):
    import simplejson as json
--- a/setup.py
+++ b/setup.py
@@ -61,9 +61,10 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.8.1',
+      install_requires = [ 'nilmdb >= 1.8.5',
                           'numpy',
                           'scipy',
+                           'python-daemon >= 1.5',
                           #'matplotlib',
                           ],
      packages = [ 'nilmtools',
@@ -80,6 +81,7 @@ setup(name='nilmtools',
              'nilm-cleanup = nilmtools.cleanup:main',
              'nilm-median = nilmtools.median:main',
              'nilm-trainola = nilmtools.trainola:main',
+              'nilm-pipewatch = nilmtools.pipewatch:main',
              ],
          },
      zip_safe = False,
Author	SHA1	Message	Date
Jim Paris	492445a469	Split off useful math functions to math.py	2013-08-02 17:27:39 -04:00
Jim Paris	33c3586bea	trainola: suppress peaks if larger ones are nearby Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.	2013-07-31 19:12:16 -04:00
Jim Paris	c1e0f8ffbc	Fix bug in copy_one	2013-07-31 14:47:16 -04:00
Jim Paris	d2853bdb0e	Add test case for bad trainola detections	2013-07-30 20:35:54 -04:00
Jim Paris	a4d4bc22fc	Add --skip option to nilm-insert	2013-07-30 18:25:47 -04:00
Jim Paris	6090dd6112	prep: only process intervals present in both raw & sinefit	2013-07-30 14:55:06 -04:00
Sharon NILM	9c0d9ad324	Sample scripts from Sharon	2013-07-29 18:37:55 -04:00
Sharon NILM	8b9c5d4898	Fix daemon dependency	2013-07-29 17:40:51 -04:00
Jim Paris	cf2c28b0fb	Add --daemon flag	2013-07-29 17:16:18 -04:00
Jim Paris	87a26c907b	Watch for process termination too	2013-07-29 15:08:49 -04:00
Jim Paris	def465b57c	Improve pipewatch; add nilm-pipewatch script	2013-07-29 14:58:15 -04:00
Jim Paris	0589b8d316	start of pipewatch util	2013-07-29 14:10:56 -04:00
Jim Paris	9c5f07106d	Don't need python-pip	2013-07-20 16:15:29 -04:00
Jim Paris	62e11a11c0	Fix issue with column ordering in the exemplars If the max scale in the exemplar was a column we weren't using, it would bail out when looking for that correlation later. Change things around so exemplars in RAM only keep around the columns we care about.	2013-07-18 22:51:27 -04:00
Jim Paris	2bdcee2c36	More helpful error if exemplar stream doesn't exist	2013-07-15 15:19:52 -04:00
Jim Paris	6dce8c5296	More output	2013-07-11 18:56:53 -04:00