trainola: suppress peaks if larger ones are nearby

Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.
Fix bug in copy_one
2013-07-31 19:12:16 -04:00 · 2013-07-31 14:47:16 -04:00 · 2013-07-30 20:35:54 -04:00 · 2013-07-30 18:25:47 -04:00 · 2013-07-30 14:55:06 -04:00 · 2013-07-29 18:37:55 -04:00
13 changed files with 179 additions and 24 deletions
--- a/20
+++ b/20
@@ -8,26 +8,33 @@ else
 	@echo "Try 'make install'"
 endif

-test: test_pipewatch
+test: test_trainola3

 test_pipewatch:
 	nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"

 test_trainola:
-	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
-		/sharon/prep-a-matches
-	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"

+test_trainola2:
+	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
+		/sharon/prep-a-matches
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
+
+test_trainola3:
+	-nilmtool -u "http://bucket/nilmdb" destroy -R /test/jim
+	nilmtool -u "http://bucket/nilmdb" create /test/jim uint8_3
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-3.js)"
+	nilmtool -u "http://bucket/nilmdb" extract /test/jim -s min -e max

 test_cleanup:
 	nilmtools/cleanup.py -e extras/cleanup.cfg
 	nilmtools/cleanup.py extras/cleanup.cfg

 test_insert:
-	nilmtools/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --skip --file --dry-run /foo/bar ~/data/20130311T2100.prep1.gz ~/data/20130311T2100.prep1.gz ~/data/20130311T2200.prep1.gz

 test_copy:
 	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
@@ -46,7 +53,8 @@ test_prep: /tmp/raw.dat
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
+	nilmtools/sinefit.py -a 0.5 -c 1 -s '@0' -e '@5000000' /test/raw /test/sinefit
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20

--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,7 @@ Prerequisites:
  sudo apt-get install python2.7 python2.7-dev python-setuptools
  sudo apt-get install python-numpy python-scipy python-daemon

-  nilmdb (1.8.1+)
+  nilmdb (1.8.5+)

 Install:

--- a/extras/sample-cron-scripts/capture.sh
+++ b/extras/sample-cron-scripts/capture.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Start the ethstream capture using nilm-pipewatch
+
+# Bail out on errors
+set -e
+
+nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
+    "ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
+    "nilm-insert -m 10 -r 8000 --live /sharon/raw"
--- a/extras/sample-cron-scripts/cleanup.cfg
+++ b/extras/sample-cron-scripts/cleanup.cfg
@@ -0,0 +1,8 @@
+[/sharon/prep-*]
+keep = 1y
+
+[/sharon/raw]
+keep = 2w
+
+[/sharon/sinefit]
+keep = 1y
--- a/extras/sample-cron-scripts/crontab
+++ b/extras/sample-cron-scripts/crontab
@@ -0,0 +1,9 @@
+# Install this by running "crontab crontab" (will replace existing crontab)
+
+# m h dom mon dow cmd
+
+# Run NilmDB processing every 5 minutes
+*/5 * * * * chronic /home/nilm/data/process.sh
+
+# Check the capture process every minute
+*/1 * * * * chronic /home/nilm/data/capture.sh
--- a/extras/sample-cron-scripts/process.sh
+++ b/extras/sample-cron-scripts/process.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Run all necessary processing on NilmDB data.
+
+# Bail out on errors
+set -e
+
+# Ensure only one copy of this code runs at a time:
+LOCKFILE="/tmp/nilmdb-process.lock"
+exec 99>"$LOCKFILE"
+if ! flock -n -x 99 ; then
+    echo "NilmDB processing already running, giving up..."
+    exit 0
+fi
+trap 'rm -f "$LOCKFILE"' 0
+
+# sinefit on phase A voltage
+nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
+
+# prep on A, B, C with appropriate rotations
+nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
+nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
+nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
+
+# decimate raw and prep data
+nilm-decimate-auto /sharon/raw /sharon/prep*
+
+# run cleanup
+nilm-cleanup --yes /home/nilm/data/cleanup.cfg
--- a/extras/trainola-test-param-3.js
+++ b/extras/trainola-test-param-3.js
@@ -0,0 +1,40 @@
+{
+    "url": "http://bucket/nilmdb",
+    "stream": "/sharon/prep-a",
+    "dest_stream": "/test/jim",
+    "start": 1364184839901599,
+    "end": 1364184942407610.2,
+
+    "columns": [ { "index": 0, "name": "P1" } ],
+
+    "exemplars": [
+        {
+            "name": "A - True DBL Freezer ON",
+            "dest_column": 0,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365277707649000,
+            "end": 1365277710705000
+        },
+        {
+            "name": "A - Boiler 1 Fan OFF",
+            "dest_column": 1,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1364188370735000,
+            "end": 1364188373819000
+        },
+        {
+            "name": "A - True DBL Freezer OFF",
+            "dest_column": 2,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365278087982000,
+            "end": 1365278089340000
+        }
+   ]
+}
+
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
@@ -32,7 +32,7 @@ def main(argv = None):
    extractor = NumpyClient(f.src.url).stream_extract_numpy
    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
-        print "Processing", f.interval_string(i)
+        print "Processing", i.human_string()
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
            for data in extractor(f.src.path, i.start, i.end):
                insert_ctx.insert(data)
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -316,7 +316,8 @@ class Filter(object):
        self._client_dest.stream_update_metadata(self.dest.path, data)

    # The main filter processing method.
-    def process_numpy(self, function, args = None, rows = 100000):
+    def process_numpy(self, function, args = None, rows = 100000,
+                      intervals = None):
        """Calls process_numpy_interval for each interval that currently
        exists in self.src, but doesn't exist in self.dest.  It will
        process the data in chunks as follows:
@@ -325,6 +326,9 @@ class Filter(object):
        corresponding to the data.  The data is converted to a Numpy
        array in chunks of 'rows' rows at a time.

+        If 'intervals' is not None, process those intervals instead of
+        the default list.
+
        'function' should be defined as:
        # def function(data, interval, args, insert_func, final)

@@ -358,7 +362,7 @@ class Filter(object):
                                           maxrows = rows)
        inserter_func = functools.partial(inserter, self.dest.path)

-        for interval in self.intervals():
+        for interval in (intervals or self.intervals()):
            print "Processing", interval.human_string()
            process_numpy_interval(interval, extractor_func, inserter_func,
                                   rows * 3, function, args)
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
@@ -53,7 +53,8 @@ def parse_args(argv = None):
      is stepped forward to match 'clock'.

    - If 'data' is running ahead, there is overlap in the data, and an
-      error is raised.
+      error is raised.  If '--ignore' is specified, the current file
+      is skipped instead of raising an error.
    """))
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -61,6 +62,8 @@ def parse_args(argv = None):
    group = parser.add_argument_group("Misc options")
    group.add_argument("-D", "--dry-run", action="store_true",
                       help="Parse files, but don't insert any data")
+    group.add_argument("-s", "--skip", action="store_true",
+                       help="Skip files if the data would overlap")
    group.add_argument("-m", "--max-gap", action="store", default=10.0,
                       metavar="SEC", type=float,
                       help="Max discrepency between clock and data "
@@ -235,6 +238,10 @@ def main(argv = None):
                                      "is %s but clock time is only %s",
                                      timestamp_to_human(data_ts),
                                      timestamp_to_human(clock_ts))
+                        if args.skip:
+                            printf("%s\n", err)
+                            printf("Skipping the remainder of this file\n")
+                            break
                        raise ParseError(filename, err)

                    if (data_ts + max_gap) < clock_ts:
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -12,6 +12,7 @@ import scipy.fftpack
 import scipy.signal
 #from matplotlib import pyplot as p
 import bisect
+from nilmdb.utils.interval import Interval

 def main(argv = None):
    # Set up argument parser
@@ -82,9 +83,20 @@ def main(argv = None):
                            "prep_column": args.column,
                            "prep_rotation": repr(rotation) })

-    # Run the processing function on all data
+    # Find the intersection of the usual set of intervals we'd filter,
+    # and the intervals actually present in sinefit data.  This is
+    # what we will process.
+    filter_int = f.intervals()
+    sinefit_int = ( Interval(start, end) for (start, end) in
+                    client_sinefit.stream_intervals(
+                        args.sinepath, start = f.start, end = f.end) )
+    intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
+
+    # Run the process (using the helper in the filter module)
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
-                                     args.nharm, rotation, args.nshift))
+                                     args.nharm, rotation, args.nshift),
+                    intervals = intervals)
+

 def process(data, interval, args, insert_function, final):
    (client, sinefit_path, column, nharm, rotation, nshift) = args
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -106,9 +106,14 @@ class Exemplar(object):

 def peak_detect(data, delta):
    """Simple min/max peak detection algorithm, taken from my code
-    in the disagg.m from the 10-8-5 paper"""
-    mins = [];
-    maxs = [];
+    in the disagg.m from the 10-8-5 paper.
+
+    Returns an array of peaks: each peak is a tuple
+      (n, p, is_max)
+    where n is the row number in 'data', and p is 'data[n]',
+    and is_max is True if this is a maximum, False if it's a minimum,
+    """
+    peaks = [];
    cur_min = (None, np.inf)
    cur_max = (None, -np.inf)
    lookformax = False
@@ -119,15 +124,15 @@ def peak_detect(data, delta):
            cur_min = (n, p)
        if lookformax:
            if p < (cur_max[1] - delta):
-                maxs.append(cur_max)
+                peaks.append((cur_max[0], cur_max[1], True))
                cur_min = (n, p)
                lookformax = False
        else:
            if p > (cur_min[1] + delta):
-                mins.append(cur_min)
+                peaks.append((cur_min[0], cur_min[1], False))
                cur_max = (n, p)
                lookformax = True
-    return (mins, maxs)
+    return peaks

 def timestamp_to_short_human(timestamp):
    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
@@ -164,11 +169,35 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):

        # Find the peaks using the column with the largest amplitude
        biggest = e.scale.index(max(e.scale))
-        peaks_minmax = peak_detect(corrs[biggest], 0.1)
-        peaks = [ p[0] for p in peaks_minmax[1] ]
+        peaks = peak_detect(corrs[biggest], 0.1)

-        # Now look at every peak
-        for row in peaks:
+        # To try to reduce false positives, discard peaks where
+        # there's a higher-magnitude peak (either min or max) within
+        # one exemplar width nearby.
+        good_peak_locations = []
+        for (i, (n, p, is_max)) in enumerate(peaks):
+            if not is_max:
+                continue
+            ok = True
+            # check up to 'e.count' rows before this one
+            j = i-1
+            while ok and j >= 0 and peaks[j][0] > (n - e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j -= 1
+
+            # check up to 'e.count' rows after this one
+            j = i+1
+            while ok and j < len(peaks) and peaks[j][0] < (n + e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j += 1
+
+            if ok:
+                good_peak_locations.append(n)
+
+        # Now look at all good peaks
+        for row in good_peak_locations:
            # Correlation for each column must be close enough to 1.
            for (corr, scale) in zip(corrs, e.scale):
                # The accepted distance from 1 is based on the relative
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.8.1',
+      install_requires = [ 'nilmdb >= 1.8.5',
                           'numpy',
                           'scipy',
                           'python-daemon >= 1.5',
Author	SHA1	Message	Date
Jim Paris	33c3586bea	trainola: suppress peaks if larger ones are nearby Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.	2013-07-31 19:12:16 -04:00
Jim Paris	c1e0f8ffbc	Fix bug in copy_one	2013-07-31 14:47:16 -04:00
Jim Paris	d2853bdb0e	Add test case for bad trainola detections	2013-07-30 20:35:54 -04:00
Jim Paris	a4d4bc22fc	Add --skip option to nilm-insert	2013-07-30 18:25:47 -04:00
Jim Paris	6090dd6112	prep: only process intervals present in both raw & sinefit	2013-07-30 14:55:06 -04:00
Sharon NILM	9c0d9ad324	Sample scripts from Sharon	2013-07-29 18:37:55 -04:00