trainola: suppress peaks if larger ones are nearby

Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.
Fix bug in copy_one
2013-07-31 19:12:16 -04:00 · 2013-07-31 14:47:16 -04:00 · 2013-07-30 20:35:54 -04:00 · 2013-07-30 18:25:47 -04:00 · 2013-07-30 14:55:06 -04:00 · 2013-07-29 18:37:55 -04:00
13 changed files with 179 additions and 24 deletions
--- a/20
+++ b/20
@@ -8,26 +8,33 @@ else
 	@echo "Try 'make install'"
 endif
-test: test_pipewatch
+test: test_trainola3
 test_pipewatch:
 	nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"
 test_trainola:
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"
 test_trainola2:
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
 test_trainola3:
 	-nilmtool -u "http://bucket/nilmdb" destroy -R /test/jim
 	nilmtool -u "http://bucket/nilmdb" create /test/jim uint8_3
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param-3.js)"
 	nilmtool -u "http://bucket/nilmdb" extract /test/jim -s min -e max
 test_cleanup:
 	nilmtools/cleanup.py -e extras/cleanup.cfg
 	nilmtools/cleanup.py extras/cleanup.cfg
 test_insert:
-	nilmtools/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --skip --file --dry-run /foo/bar ~/data/20130311T2100.prep1.gz ~/data/20130311T2100.prep1.gz ~/data/20130311T2200.prep1.gz
 test_copy:
 	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
@@ -46,7 +53,8 @@ test_prep: /tmp/raw.dat
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
+	nilmtools/sinefit.py -a 0.5 -c 1 -s '@0' -e '@5000000' /test/raw /test/sinefit
 	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20
--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,7 @@ Prerequisites:
  sudo apt-get install python2.7 python2.7-dev python-setuptools
  sudo apt-get install python-numpy python-scipy python-daemon
-  nilmdb (1.8.1+)
+  nilmdb (1.8.5+)
 Install:
--- a/extras/sample-cron-scripts/capture.sh
+++ b/extras/sample-cron-scripts/capture.sh
@@ -0,0 +1,10 @@
 #!/bin/bash
 # Start the ethstream capture using nilm-pipewatch
 # Bail out on errors
 set -e
 nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
    "ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
    "nilm-insert -m 10 -r 8000 --live /sharon/raw"
--- a/extras/sample-cron-scripts/cleanup.cfg
+++ b/extras/sample-cron-scripts/cleanup.cfg
@@ -0,0 +1,8 @@
 [/sharon/prep-*]
 keep = 1y
 [/sharon/raw]
 keep = 2w
 [/sharon/sinefit]
 keep = 1y
--- a/extras/sample-cron-scripts/crontab
+++ b/extras/sample-cron-scripts/crontab
@@ -0,0 +1,9 @@
 # Install this by running "crontab crontab" (will replace existing crontab)
 # m h dom mon dow cmd
 # Run NilmDB processing every 5 minutes
 */5 * * * * chronic /home/nilm/data/process.sh
 # Check the capture process every minute
 */1 * * * * chronic /home/nilm/data/capture.sh
--- a/extras/sample-cron-scripts/process.sh
+++ b/extras/sample-cron-scripts/process.sh
@@ -0,0 +1,28 @@
 #!/bin/bash
 # Run all necessary processing on NilmDB data.
 # Bail out on errors
 set -e
 # Ensure only one copy of this code runs at a time:
 LOCKFILE="/tmp/nilmdb-process.lock"
 exec 99>"$LOCKFILE"
 if ! flock -n -x 99 ; then
    echo "NilmDB processing already running, giving up..."
    exit 0
 fi
 trap 'rm -f "$LOCKFILE"' 0
 # sinefit on phase A voltage
 nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
 # prep on A, B, C with appropriate rotations
 nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
 nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
 nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
 # decimate raw and prep data
 nilm-decimate-auto /sharon/raw /sharon/prep*
 # run cleanup
 nilm-cleanup --yes /home/nilm/data/cleanup.cfg
--- a/extras/trainola-test-param-3.js
+++ b/extras/trainola-test-param-3.js
@@ -0,0 +1,40 @@
 {
    "url": "http://bucket/nilmdb",
    "stream": "/sharon/prep-a",
    "dest_stream": "/test/jim",
    "start": 1364184839901599,
    "end": 1364184942407610.2,
    "columns": [ { "index": 0, "name": "P1" } ],
    "exemplars": [
        {
            "name": "A - True DBL Freezer ON",
            "dest_column": 0,
            "url": "http://bucket/nilmdb",
            "stream": "/sharon/prep-a",
            "columns": [ { "index": 0, "name": "P1" } ],
            "start": 1365277707649000,
            "end": 1365277710705000
        },
        {
            "name": "A - Boiler 1 Fan OFF",
            "dest_column": 1,
            "url": "http://bucket/nilmdb",
            "stream": "/sharon/prep-a",
            "columns": [ { "index": 0, "name": "P1" } ],
            "start": 1364188370735000,
            "end": 1364188373819000
        },
        {
            "name": "A - True DBL Freezer OFF",
            "dest_column": 2,
            "url": "http://bucket/nilmdb",
            "stream": "/sharon/prep-a",
            "columns": [ { "index": 0, "name": "P1" } ],
            "start": 1365278087982000,
            "end": 1365278089340000
        }
   ]
 }
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
@@ -32,7 +32,7 @@ def main(argv = None):
    extractor = NumpyClient(f.src.url).stream_extract_numpy
    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
-        print "Processing", f.interval_string(i)
+        print "Processing", i.human_string()
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
            for data in extractor(f.src.path, i.start, i.end):
                insert_ctx.insert(data)
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -316,7 +316,8 @@ class Filter(object):
        self._client_dest.stream_update_metadata(self.dest.path, data)
    # The main filter processing method.
-    def process_numpy(self, function, args = None, rows = 100000):
+    def process_numpy(self, function, args = None, rows = 100000,
                      intervals = None):
        """Calls process_numpy_interval for each interval that currently
        exists in self.src, but doesn't exist in self.dest.  It will
        process the data in chunks as follows:
@@ -325,6 +326,9 @@ class Filter(object):
        corresponding to the data.  The data is converted to a Numpy
        array in chunks of 'rows' rows at a time.
        If 'intervals' is not None, process those intervals instead of
        the default list.
        'function' should be defined as:
        # def function(data, interval, args, insert_func, final)
@@ -358,7 +362,7 @@ class Filter(object):
                                           maxrows = rows)
        inserter_func = functools.partial(inserter, self.dest.path)
-        for interval in self.intervals():
+        for interval in (intervals or self.intervals()):
            print "Processing", interval.human_string()
            process_numpy_interval(interval, extractor_func, inserter_func,
                                   rows * 3, function, args)
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
@@ -53,7 +53,8 @@ def parse_args(argv = None):
      is stepped forward to match 'clock'.
    - If 'data' is running ahead, there is overlap in the data, and an
-      error is raised.
+      error is raised.  If '--ignore' is specified, the current file
      is skipped instead of raising an error.
    """))
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -61,6 +62,8 @@ def parse_args(argv = None):
    group = parser.add_argument_group("Misc options")
    group.add_argument("-D", "--dry-run", action="store_true",
                       help="Parse files, but don't insert any data")
    group.add_argument("-s", "--skip", action="store_true",
                       help="Skip files if the data would overlap")
    group.add_argument("-m", "--max-gap", action="store", default=10.0,
                       metavar="SEC", type=float,
                       help="Max discrepency between clock and data "
@@ -235,6 +238,10 @@ def main(argv = None):
                                      "is %s but clock time is only %s",
                                      timestamp_to_human(data_ts),
                                      timestamp_to_human(clock_ts))
                        if args.skip:
                            printf("%s\n", err)
                            printf("Skipping the remainder of this file\n")
                            break
                        raise ParseError(filename, err)
                    if (data_ts + max_gap) < clock_ts:
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -12,6 +12,7 @@ import scipy.fftpack
 import scipy.signal
 #from matplotlib import pyplot as p
 import bisect
 from nilmdb.utils.interval import Interval
 def main(argv = None):
    # Set up argument parser
@@ -82,9 +83,20 @@ def main(argv = None):
                            "prep_column": args.column,
                            "prep_rotation": repr(rotation) })
-    # Run the processing function on all data
+    # Find the intersection of the usual set of intervals we'd filter,
    # and the intervals actually present in sinefit data.  This is
    # what we will process.
    filter_int = f.intervals()
    sinefit_int = ( Interval(start, end) for (start, end) in
                    client_sinefit.stream_intervals(
                        args.sinepath, start = f.start, end = f.end) )
    intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
    # Run the process (using the helper in the filter module)
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
-                                     args.nharm, rotation, args.nshift))
+                                     args.nharm, rotation, args.nshift),
                    intervals = intervals)
 def process(data, interval, args, insert_function, final):
    (client, sinefit_path, column, nharm, rotation, nshift) = args
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -106,9 +106,14 @@ class Exemplar(object):
 def peak_detect(data, delta):
    """Simple min/max peak detection algorithm, taken from my code
-    in the disagg.m from the 10-8-5 paper"""
+    in the disagg.m from the 10-8-5 paper.
-    mins = [];
+
-    maxs = [];
+    Returns an array of peaks: each peak is a tuple
      (n, p, is_max)
    where n is the row number in 'data', and p is 'data[n]',
    and is_max is True if this is a maximum, False if it's a minimum,
    """
    peaks = [];
    cur_min = (None, np.inf)
    cur_max = (None, -np.inf)
    lookformax = False
@@ -119,15 +124,15 @@ def peak_detect(data, delta):
            cur_min = (n, p)
        if lookformax:
            if p < (cur_max[1] - delta):
-                maxs.append(cur_max)
+                peaks.append((cur_max[0], cur_max[1], True))
                cur_min = (n, p)
                lookformax = False
        else:
            if p > (cur_min[1] + delta):
-                mins.append(cur_min)
+                peaks.append((cur_min[0], cur_min[1], False))
                cur_max = (n, p)
                lookformax = True
-    return (mins, maxs)
+    return peaks
 def timestamp_to_short_human(timestamp):
    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
@@ -164,11 +169,35 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
        # Find the peaks using the column with the largest amplitude
        biggest = e.scale.index(max(e.scale))
-        peaks_minmax = peak_detect(corrs[biggest], 0.1)
+        peaks = peak_detect(corrs[biggest], 0.1)
        peaks = [ p[0] for p in peaks_minmax[1] ]
-        # Now look at every peak
+        # To try to reduce false positives, discard peaks where
-        for row in peaks:
+        # there's a higher-magnitude peak (either min or max) within
        # one exemplar width nearby.
        good_peak_locations = []
        for (i, (n, p, is_max)) in enumerate(peaks):
            if not is_max:
                continue
            ok = True
            # check up to 'e.count' rows before this one
            j = i-1
            while ok and j >= 0 and peaks[j][0] > (n - e.count):
                if abs(peaks[j][1]) > abs(p):
                    ok = False
                j -= 1
            # check up to 'e.count' rows after this one
            j = i+1
            while ok and j < len(peaks) and peaks[j][0] < (n + e.count):
                if abs(peaks[j][1]) > abs(p):
                    ok = False
                j += 1
            if ok:
                good_peak_locations.append(n)
        # Now look at all good peaks
        for row in good_peak_locations:
            # Correlation for each column must be close enough to 1.
            for (corr, scale) in zip(corrs, e.scale):
                # The accepted distance from 1 is based on the relative
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.8.1',
+      install_requires = [ 'nilmdb >= 1.8.5',
                           'numpy',
                           'scipy',
                           'python-daemon >= 1.5',
Author	SHA1	Message	Date
Jim Paris	33c3586bea	trainola: suppress peaks if larger ones are nearby Might fix the problem Mark noticed where turn-off transients are erroneously matching the drop that follows startup transients.	2013-07-31 19:12:16 -04:00
Jim Paris	c1e0f8ffbc	Fix bug in copy_one	2013-07-31 14:47:16 -04:00
Jim Paris	d2853bdb0e	Add test case for bad trainola detections	2013-07-30 20:35:54 -04:00
Jim Paris	a4d4bc22fc	Add --skip option to nilm-insert	2013-07-30 18:25:47 -04:00
Jim Paris	6090dd6112	prep: only process intervals present in both raw & sinefit	2013-07-30 14:55:06 -04:00
Sharon NILM	9c0d9ad324	Sample scripts from Sharon	2013-07-29 18:37:55 -04:00