trainola: suppress peaks if larger ones are nearby

Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.
Fix bug in copy_one
2013-07-31 19:12:16 -04:00 · 2013-07-31 14:47:16 -04:00 · 2013-07-30 20:35:54 -04:00
4 changed files with 91 additions and 15 deletions
--- a/15
+++ b/15
@@ -8,19 +8,26 @@ else
 	@echo "Try 'make install'"
 endif

-test: test_insert
+test: test_trainola3

 test_pipewatch:
 	nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"

 test_trainola:
-	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
-		/sharon/prep-a-matches
-	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"

+test_trainola2:
+	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
+		/sharon/prep-a-matches
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
+
+test_trainola3:
+	-nilmtool -u "http://bucket/nilmdb" destroy -R /test/jim
+	nilmtool -u "http://bucket/nilmdb" create /test/jim uint8_3
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-3.js)"
+	nilmtool -u "http://bucket/nilmdb" extract /test/jim -s min -e max

 test_cleanup:
 	nilmtools/cleanup.py -e extras/cleanup.cfg
--- a/extras/trainola-test-param-3.js
+++ b/extras/trainola-test-param-3.js
@@ -0,0 +1,40 @@
+{
+    "url": "http://bucket/nilmdb",
+    "stream": "/sharon/prep-a",
+    "dest_stream": "/test/jim",
+    "start": 1364184839901599,
+    "end": 1364184942407610.2,
+
+    "columns": [ { "index": 0, "name": "P1" } ],
+
+    "exemplars": [
+        {
+            "name": "A - True DBL Freezer ON",
+            "dest_column": 0,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365277707649000,
+            "end": 1365277710705000
+        },
+        {
+            "name": "A - Boiler 1 Fan OFF",
+            "dest_column": 1,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1364188370735000,
+            "end": 1364188373819000
+        },
+        {
+            "name": "A - True DBL Freezer OFF",
+            "dest_column": 2,
+            "url": "http://bucket/nilmdb",
+            "stream": "/sharon/prep-a",
+            "columns": [ { "index": 0, "name": "P1" } ],
+            "start": 1365278087982000,
+            "end": 1365278089340000
+        }
+   ]
+}
+
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
@@ -32,7 +32,7 @@ def main(argv = None):
    extractor = NumpyClient(f.src.url).stream_extract_numpy
    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
-        print "Processing", f.interval_string(i)
+        print "Processing", i.human_string()
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
            for data in extractor(f.src.path, i.start, i.end):
                insert_ctx.insert(data)
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -106,9 +106,14 @@ class Exemplar(object):

 def peak_detect(data, delta):
    """Simple min/max peak detection algorithm, taken from my code
-    in the disagg.m from the 10-8-5 paper"""
-    mins = [];
-    maxs = [];
+    in the disagg.m from the 10-8-5 paper.
+
+    Returns an array of peaks: each peak is a tuple
+      (n, p, is_max)
+    where n is the row number in 'data', and p is 'data[n]',
+    and is_max is True if this is a maximum, False if it's a minimum,
+    """
+    peaks = [];
    cur_min = (None, np.inf)
    cur_max = (None, -np.inf)
    lookformax = False
@@ -119,15 +124,15 @@ def peak_detect(data, delta):
            cur_min = (n, p)
        if lookformax:
            if p < (cur_max[1] - delta):
-                maxs.append(cur_max)
+                peaks.append((cur_max[0], cur_max[1], True))
                cur_min = (n, p)
                lookformax = False
        else:
            if p > (cur_min[1] + delta):
-                mins.append(cur_min)
+                peaks.append((cur_min[0], cur_min[1], False))
                cur_max = (n, p)
                lookformax = True
-    return (mins, maxs)
+    return peaks

 def timestamp_to_short_human(timestamp):
    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
@@ -164,11 +169,35 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):

        # Find the peaks using the column with the largest amplitude
        biggest = e.scale.index(max(e.scale))
-        peaks_minmax = peak_detect(corrs[biggest], 0.1)
-        peaks = [ p[0] for p in peaks_minmax[1] ]
+        peaks = peak_detect(corrs[biggest], 0.1)

-        # Now look at every peak
-        for row in peaks:
+        # To try to reduce false positives, discard peaks where
+        # there's a higher-magnitude peak (either min or max) within
+        # one exemplar width nearby.
+        good_peak_locations = []
+        for (i, (n, p, is_max)) in enumerate(peaks):
+            if not is_max:
+                continue
+            ok = True
+            # check up to 'e.count' rows before this one
+            j = i-1
+            while ok and j >= 0 and peaks[j][0] > (n - e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j -= 1
+
+            # check up to 'e.count' rows after this one
+            j = i+1
+            while ok and j < len(peaks) and peaks[j][0] < (n + e.count):
+                if abs(peaks[j][1]) > abs(p):
+                    ok = False
+                j += 1
+
+            if ok:
+                good_peak_locations.append(n)
+
+        # Now look at all good peaks
+        for row in good_peak_locations:
            # Correlation for each column must be close enough to 1.
            for (corr, scale) in zip(corrs, e.scale):
                # The accepted distance from 1 is based on the relative
Author	SHA1	Message	Date
Jim Paris	33c3586bea	trainola: suppress peaks if larger ones are nearby Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.	2013-07-31 19:12:16 -04:00
Jim Paris	c1e0f8ffbc	Fix bug in copy_one	2013-07-31 14:47:16 -04:00
Jim Paris	d2853bdb0e	Add test case for bad trainola detections	2013-07-30 20:35:54 -04:00