Bump required nilmdb version to 1.6.0

filter: Don't include trailing unprocessed data in the inserted intervals
Reverse cleanup order to handle interruptions better
2013-04-11 11:55:11 -04:00 · 2013-04-11 11:53:17 -04:00 · 2013-04-10 18:38:41 -04:00 · 2013-04-10 17:09:52 -04:00 · 2013-04-10 17:01:07 -04:00 · 2013-04-10 16:05:16 -04:00
6 changed files with 113 additions and 42 deletions
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.5.0',
+      install_requires = [ 'nilmdb >= 1.6.0',
                           'numpy',
                           'scipy',
                           'matplotlib',
--- a/src/cleanup.py
+++ b/src/cleanup.py
@@ -238,12 +238,15 @@ def main(argv = None):
                       timestamp_to_seconds(total)))
            continue
        printf("  removing data before %s\n", timestamp_to_human(remove_before))
-        if args.yes:
-            client.stream_remove(path, None, remove_before)
-        for ap in streams[path].also_clean_paths:
-            printf("  also removing from %s\n", ap)
+        # Clean in reverse order.  Since we only use the primary stream and not
+        # the decimated streams to figure out which data to remove, removing
+        # the primary stream last means that we might recover more nicely if
+        # we are interrupted and restarted.
+        clean_paths = list(reversed(streams[path].also_clean_paths)) + [ path ]
+        for p in clean_paths:
+            printf("  removing from %s\n", p)
            if args.yes:
-                client.stream_remove(ap, None, remove_before)
+                client.stream_remove(p, None, remove_before)

    # All done
    if not args.yes:
--- a/src/decimate_auto.py
+++ b/src/decimate_auto.py
@@ -4,15 +4,19 @@ import nilmtools.filter
 import nilmtools.decimate
 import nilmdb.client
 import argparse
+import fnmatch

 def main(argv = None):
    parser = argparse.ArgumentParser(
        formatter_class = argparse.RawDescriptionHelpFormatter,
-        version = "1.0",
+        version = nilmtools.__version__,
        description = """\
    Automatically create multiple decimations from a single source
    stream, continuing until the last decimated level contains fewer
    than 500 points total.
+
+    Wildcards and multiple paths are accepted.  Decimated paths are
+    ignored when matching wildcards.
    """)
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -23,20 +27,36 @@ def main(argv = None):
                        default = False,
                        help="Force metadata changes if the dest "
                        "doesn't match")
-    parser.add_argument("path", action="store",
+    parser.add_argument("path", action="store", nargs='+',
                        help='Path of base stream')
    args = parser.parse_args(argv)

    # Pull out info about the base stream
    client = nilmdb.client.Client(args.url)

-    info = nilmtools.filter.get_stream_info(client, args.path)
-    if not info:
-        raise Exception("path " + args.path + " not found")
+    # Find list of paths to process
+    streams = [ unicode(s[0]) for s in client.stream_list() ]
+    streams = [ s for s in streams if "~decim-" not in s ]
+    paths = []
+    for path in args.path:
+        new = fnmatch.filter(streams, unicode(path))
+        if not new:
+            print "error: no stream matched path:", path
+            raise SystemExit(1)
+        paths.extend(new)

-    meta = client.stream_get_metadata(args.path)
+    for path in paths:
+        do_decimation(client, args, path)
+
+def do_decimation(client, args, path):
+    print "Decimating", path
+    info = nilmtools.filter.get_stream_info(client, path)
+    if not info:
+        raise Exception("path " + path + " not found")
+
+    meta = client.stream_get_metadata(path)
    if "decimate_source" in meta:
-        print "Stream", args.path, "was decimated from", meta["decimate_source"]
+        print "Stream", path, "was decimated from", meta["decimate_source"]
        print "You need to pass the base stream instead"
        raise SystemExit(1)

@@ -53,7 +73,7 @@ def main(argv = None):
        if info.rows <= 500:
            break
        factor *= args.factor
-        new_path = "%s~decim-%d" % (args.path, factor)
+        new_path = "%s~decim-%d" % (path, factor)

        # Create the stream if needed
        new_info = nilmtools.filter.get_stream_info(client, new_path)
@@ -72,5 +92,7 @@ def main(argv = None):
        # Update info using the newly decimated stream
        info = nilmtools.filter.get_stream_info(client, new_path)

+    return
+
 if __name__ == "__main__":
    main()
--- a/src/filter.py
+++ b/src/filter.py
@@ -275,6 +275,10 @@ class Filter(object):
        Return value of 'function' is the number of data rows processed.
        Unprocessed data will be provided again in a subsequent call
        (unless 'final' is True).
+
+        If unprocessed data remains after 'final' is True, the interval
+        being inserted will be ended at the timestamp of the first
+        unprocessed data point.
        """
        if args is None:
            args = []
@@ -319,7 +323,13 @@ class Filter(object):

                # Last call for this contiguous interval
                if old_array.shape[0] != 0:
-                    function(old_array, interval, args, insert_function, True)
+                    processed = function(old_array, interval, args,
+                                         insert_function, True)
+                    if processed != old_array.shape[0]:
+                        # Truncate the interval we're inserting at the first
+                        # unprocessed data point.  This ensures that
+                        # we'll not miss any data when we run again later.
+                        insert_ctx.update_end(old_array[processed][0])

 def main(argv = None):
    # This is just a dummy function; actual filters can use the other
--- a/src/prep.py
+++ b/src/prep.py
@@ -3,6 +3,8 @@
 # Spectral envelope preprocessor.
 # Requires two streams as input: the original raw data, and sinefit data.

+from nilmdb.utils.printf import *
+from nilmdb.utils.time import timestamp_to_human
 import nilmtools.filter
 import nilmdb.client
 from numpy import *
@@ -77,7 +79,8 @@ def main(argv = None):
    # Check and set metadata in prep stream
    f.check_dest_metadata({ "prep_raw_source": f.src.path,
                            "prep_sinefit_source": sinefit.path,
-                            "prep_column": args.column })
+                            "prep_column": args.column,
+                            "prep_rotation": rotation })

    # Run the processing function on all data
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
@@ -105,7 +108,6 @@ def process(data, interval, args, insert_function, final):
    # Pull out sinefit data for the entire time range of this block
    for sinefit_line in client.stream_extract(sinefit_path,
                                              data[0, 0], data[rows-1, 0]):
-
        def prep_period(t_min, t_max, rot):
            """
            Compute prep coefficients from time t_min to t_max, which
@@ -162,7 +164,15 @@ def process(data, interval, args, insert_function, final):
                break
            processed = idx_max

-    print "Processed", processed, "of", rows, "rows"
+    # If we processed no data but there's lots in here, pretend we
+    # processed half of it.
+    if processed == 0 and rows > 10000:
+        processed = rows / 2
+        printf("%s: warning: no periods found; skipping %d rows\n",
+               timestamp_to_human(data[0][0]), processed)
+    else:
+        printf("%s: processed %d of %d rows\n",
+               timestamp_to_human(data[0][0]), processed, rows)
    return processed

 if __name__ == "__main__":
--- a/src/sinefit.py
+++ b/src/sinefit.py
@@ -18,6 +18,15 @@ def main(argv = None):
    group.add_argument('-f', '--frequency', action='store', type=float,
                       default=60.0,
                       help='Approximate frequency (default: %(default)s)')
+    group.add_argument('-m', '--min-freq', action='store', type=float,
+                       help='Minimum valid frequency '
+                       '(default: approximate frequency / 2))')
+    group.add_argument('-M', '--max-freq', action='store', type=float,
+                       help='Maximum valid frequency '
+                       '(default: approximate frequency * 2))')
+    group.add_argument('-a', '--min-amp', action='store', type=float,
+                       default=20.0,
+                       help='Minimum signal amplitude (default: %(default)s)')

    # Parse arguments
    try:
@@ -34,13 +43,24 @@ def main(argv = None):
        parser.error("need a column number >= 1")
    if args.frequency < 0.1:
        parser.error("frequency must be >= 0.1")
+    if args.min_freq is None:
+        args.min_freq = args.frequency / 2
+    if args.max_freq is None:
+        args.max_freq = args.frequency * 2
+    if (args.min_freq > args.max_freq or
+        args.min_freq > args.frequency or
+        args.max_freq < args.frequency):
+        parser.error("invalid min or max frequency")
+    if args.min_amp < 0:
+        parser.error("min amplitude must be >= 0")

    f.check_dest_metadata({ "sinefit_source": f.src.path,
                            "sinefit_column": args.column })
-    f.process_numpy(process, args = (args.column, args.frequency))
+    f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
+                                     args.min_freq, args.max_freq))

 def process(data, interval, args, insert_function, final):
-    (column, f_expected) = args
+    (column, f_expected, a_min, f_min, f_max) = args
    rows = data.shape[0]

    # Estimate sampling frequency from timestamps
@@ -66,8 +86,14 @@ def process(data, interval, args, insert_function, final):
        (A, f0, phi, C) = sfit4(this, fs)

        # Check bounds.  If frequency is too crazy, ignore this window
-        if f0 < (f_expected/2) or f0 > (f_expected*2):
-            print "frequency", f0, "too far from expected value", f_expected
+        if f0 < f_min or f0 > f_max:
+            print "frequency", f0, "outside valid range", f_min, "-", f_max
+            start += N
+            continue
+
+        # If amplitude is too low, results are probably just noise
+        if A < a_min:
+            print "amplitude", A, "below minimum threshold", a_min
            start += N
            continue

@@ -158,30 +184,30 @@ def sfit4(data, fs):
    # Convert to Hz
    f0 = i * fs / N

-    ## Fit it
-    # first guess for A0, B0 using 3-parameter fit (step c)
-    w = 2*pi*f0
-    D = c_[cos(w*t), sin(w*t), ones(N)]
-    s = linalg.lstsq(D, data)[0]
-
-    # Now iterate 6 times (step i)
-    for idx in range(6):
-        D = c_[cos(w*t), sin(w*t), ones(N),
-              -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
-        s = linalg.lstsq(D, data)[0] # eqn B.18
-        w = w + s[3]	# update frequency estimate
-
-    ## Extract results
-    A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
-    f0 = w / (2*pi)
+    # Fit it.  We'll catch exceptions here and just returns zeros
+    # if something fails with the least squares fit, etc.
    try:
+        # first guess for A0, B0 using 3-parameter fit (step c)
+        w = 2*pi*f0
+        D = c_[cos(w*t), sin(w*t), ones(N)]
+        s = linalg.lstsq(D, data)[0]
+
+        # Now iterate 6 times (step i)
+        for idx in range(6):
+            D = c_[cos(w*t), sin(w*t), ones(N),
+                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
+            s = linalg.lstsq(D, data)[0] # eqn B.18
+            w = w + s[3]	# update frequency estimate
+
+        ## Extract results
+        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
+        f0 = w / (2*pi)
        phi = -arctan2(s[1], s[0]) # eqn B.22
-    except TypeError:
+        C = s[2]
+        return (A, f0, phi, C)
+    except Exception as e:
        # something broke down, just return zeros
        return (0, 0, 0, 0)
-    C = s[2]
-
-    return (A, f0, phi, C)

 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
Jim Paris	f4fda837ae	Bump required nilmdb version to 1.6.0	2013-04-11 11:55:11 -04:00
Jim Paris	5547d266d0	filter: Don't include trailing unprocessed data in the inserted intervals	2013-04-11 11:53:17 -04:00
Jim Paris	372e977e4a	Reverse cleanup order to handle interruptions better	2013-04-10 18:38:41 -04:00
Jim Paris	640a680704	Increase default min amplitude in sinefit	2013-04-10 17:09:52 -04:00
Jim Paris	2e74e6cd63	Skip over data if we aren't able to process any. Change output format	2013-04-10 17:01:07 -04:00
Jim Paris	de2a794e00	Support wildcards in nilm-decimate-auto	2013-04-10 16:05:16 -04:00
Jim Paris	065a40f265	sinefit: add minimum amplitude check	2013-04-10 15:33:51 -04:00
Jim Paris	65fa43aff1	sinefit: catch all errors in sfit4	2013-04-10 14:36:50 -04:00
Jim Paris	57c23c3792	sinefit: allow user to override min/max frequency detection	2013-04-10 14:36:40 -04:00
Jim Paris	d4c8e4acb4	Include rotation in metadata	2013-04-10 14:36:05 -04:00