Add --skip option to nilm-insert

prep: only process intervals present in both raw & sinefit
Sample scripts from Sharon
2013-07-30 18:25:47 -04:00 · 2013-07-30 14:55:06 -04:00 · 2013-07-29 18:37:55 -04:00 · 2013-07-29 17:40:51 -04:00 · 2013-07-29 17:16:18 -04:00 · 2013-07-29 15:08:49 -04:00
23 changed files with 894 additions and 144 deletions
--- a/44
+++ b/44
@@ -8,22 +8,37 @@ else
 	@echo "Try 'make install'"
 endif

-test: test_cleanup
+test: test_insert
+
+test_pipewatch:
+	nilmtools/pipewatch.py -t 3 "seq 10 20" "seq 20 30"
+
+test_trainola:
+	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
+		/sharon/prep-a-matches
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
+	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
+		/sharon/prep-a-matches
+	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"
+

 test_cleanup:
-	src/cleanup.py -e extras/cleanup.cfg
-	src/cleanup.py extras/cleanup.cfg
+	nilmtools/cleanup.py -e extras/cleanup.cfg
+	nilmtools/cleanup.py extras/cleanup.cfg

 test_insert:
-	@make install >/dev/null
-	src/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --skip --file --dry-run /foo/bar ~/data/20130311T2100.prep1.gz ~/data/20130311T2100.prep1.gz ~/data/20130311T2200.prep1.gz

 test_copy:
-	@make install >/dev/null
-	src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
+	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*

-test_prep:
-	@make install >/dev/null
+/tmp/raw.dat:
+	octave --eval 'fs = 8000;' \
+	--eval 't = (0:fs*10)*2*pi*60/fs;' \
+	--eval 'raw = transpose([sin(t); 0.3*sin(3*t)+sin(t)]);' \
+	--eval 'save("-ascii","/tmp/raw.dat","raw");'
+
+test_prep: /tmp/raw.dat
 	-nilmtool destroy -R /test/raw
 	-nilmtool destroy -R /test/sinefit
 	-nilmtool destroy -R /test/prep
@@ -31,8 +46,9 @@ test_prep:
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	src/sinefit.py -c 1 /test/raw /test/sinefit
-	src/prep.py -c 2 /test/raw /test/sinefit /test/prep
+	nilmtools/sinefit.py -a 0.5 -c 1 -s '@0' -e '@5000000' /test/raw /test/sinefit
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20

 test_decimate:
@@ -40,8 +56,8 @@ test_decimate:
 	-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
-	time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
-	python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
+	time python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
+	python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16

 version:
 	python setup.py version
@@ -63,4 +79,4 @@ clean::
 gitclean::
 	git clean -dXf

-.PHONY: all version dist sdist install clean gitclean
+.PHONY: all version dist sdist install clean gitclean test
--- a/README.txt
+++ b/README.txt
@@ -6,9 +6,9 @@ Prerequisites:

  # Runtime and build environments
  sudo apt-get install python2.7 python2.7-dev python-setuptools
-  sudo apt-get install python-numpy python-scipy python-matplotlib
+  sudo apt-get install python-numpy python-scipy python-daemon

-  nilmdb (1.5.0+)
+  nilmdb (1.8.5+)

 Install:

--- a/extras/sample-cron-scripts/capture.sh
+++ b/extras/sample-cron-scripts/capture.sh
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# Start the ethstream capture using nilm-pipewatch
+
+# Bail out on errors
+set -e
+
+nilm-pipewatch --daemon --lock "/tmp/nilmdb-capture.lock" --timeout 30 \
+    "ethstream -a 192.168.1.209 -n 9 -r 8000 -N" \
+    "nilm-insert -m 10 -r 8000 --live /sharon/raw"
--- a/extras/sample-cron-scripts/cleanup.cfg
+++ b/extras/sample-cron-scripts/cleanup.cfg
@@ -0,0 +1,8 @@
+[/sharon/prep-*]
+keep = 1y
+
+[/sharon/raw]
+keep = 2w
+
+[/sharon/sinefit]
+keep = 1y
--- a/extras/sample-cron-scripts/crontab
+++ b/extras/sample-cron-scripts/crontab
@@ -0,0 +1,9 @@
+# Install this by running "crontab crontab" (will replace existing crontab)
+
+# m h dom mon dow cmd
+
+# Run NilmDB processing every 5 minutes
+*/5 * * * * chronic /home/nilm/data/process.sh
+
+# Check the capture process every minute
+*/1 * * * * chronic /home/nilm/data/capture.sh
--- a/extras/sample-cron-scripts/process.sh
+++ b/extras/sample-cron-scripts/process.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# Run all necessary processing on NilmDB data.
+
+# Bail out on errors
+set -e
+
+# Ensure only one copy of this code runs at a time:
+LOCKFILE="/tmp/nilmdb-process.lock"
+exec 99>"$LOCKFILE"
+if ! flock -n -x 99 ; then
+    echo "NilmDB processing already running, giving up..."
+    exit 0
+fi
+trap 'rm -f "$LOCKFILE"' 0
+
+# sinefit on phase A voltage
+nilm-sinefit -c 5 /sharon/raw /sharon/sinefit
+
+# prep on A, B, C with appropriate rotations
+nilm-prep -c 1 -r 0 /sharon/raw /sharon/sinefit /sharon/prep-a
+nilm-prep -c 2 -r 120 /sharon/raw /sharon/sinefit /sharon/prep-b
+nilm-prep -c 3 -r 240 /sharon/raw /sharon/sinefit /sharon/prep-c
+
+# decimate raw and prep data
+nilm-decimate-auto /sharon/raw /sharon/prep*
+
+# run cleanup
+nilm-cleanup --yes /home/nilm/data/cleanup.cfg
--- a/extras/trainola-test-param-2.js
+++ b/extras/trainola-test-param-2.js
@@ -0,0 +1,29 @@
+{ "columns" : [ { "index" : 0, "name" : "P1" },
+                { "index" : 1, "name" : "Q1" },
+                { "index" : 2, "name" : "P3" } ],
+  "stream" : "/sharon/prep-a",
+  "url" : "http://bucket.mit.edu/nilmdb",
+  "dest_stream" : "/sharon/prep-a-matches",
+  "start" : 1365153062643133.5,
+  "end"   : 1365168814443575.5,
+  "exemplars" : [ { "columns" : [ { "index" : 0,
+                                    "name" : "P1"
+                                  } ],
+                    "dest_column" : 0,
+                    "end" : 1365073657682000,
+                    "name" : "Turn ON",
+                    "start" : 1365073654321000,
+                    "stream" : "/sharon/prep-a",
+                    "url" : "http://bucket.mit.edu/nilmdb"
+                  },
+                  { "columns" : [ { "index" : 2, "name" : "P3" },
+                                  { "index" : 0, "name" : "P1" } ],
+                    "dest_column" : 1,
+                    "end" : 1365176528818000,
+                    "name" : "Type 2 turn ON",
+                    "start" : 1365176520030000,
+                    "stream" : "/sharon/prep-a",
+                    "url" : "http://bucket.mit.edu/nilmdb"
+                  }
+                ]
+}
--- a/extras/trainola-test-param.js
+++ b/extras/trainola-test-param.js
@@ -0,0 +1,31 @@
+{ "url": "http://bucket.mit.edu/nilmdb",
+  "dest_stream": "/sharon/prep-a-matches",
+  "stream": "/sharon/prep-a",
+  "start": 1366111383280463,
+  "end": 1366126163457797,
+  "columns": [ { "name": "P1", "index": 0 },
+               { "name": "Q1", "index": 1 },
+               { "name": "P3", "index": 2 } ],
+  "exemplars": [
+      { "name": "Boiler Pump ON",
+        "url": "http://bucket.mit.edu/nilmdb",
+        "stream": "/sharon/prep-a",
+        "start": 1366260494269078,
+        "end": 1366260608185031,
+        "dest_column": 0,
+        "columns": [ { "name": "P1", "index": 0 },
+                     { "name": "Q1", "index": 1 }
+                   ]
+      },
+      { "name": "Boiler Pump OFF",
+        "url": "http://bucket.mit.edu/nilmdb",
+        "stream": "/sharon/prep-a",
+        "start": 1366260864215764,
+        "end": 1366260870882998,
+        "dest_column": 1,
+        "columns": [ { "name": "P1", "index": 0 },
+                     { "name": "Q1", "index": 1 }
+                   ]
+      }
+  ]
+}
--- a/nilmtools/init.py
+++ b/nilmtools/init.py
--- a/nilmtools/_version.py
+++ b/nilmtools/_version.py
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)

 tag_prefix = "nilmtools-"
 parentdir_prefix = "nilmtools-"
-versionfile_source = "src/_version.py"
+versionfile_source = "nilmtools/_version.py"

 def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
    variables = { "refnames": git_refnames, "full": git_full }
--- a/nilmtools/cleanup.py
+++ b/nilmtools/cleanup.py
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
--- a/nilmtools/copy_wildcard.py
+++ b/nilmtools/copy_wildcard.py
--- a/nilmtools/decimate.py
+++ b/nilmtools/decimate.py
--- a/nilmtools/decimate_auto.py
+++ b/nilmtools/decimate_auto.py
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -19,6 +19,10 @@ import re
 import argparse
 import numpy as np
 import cStringIO
+import functools
+
+class ArgumentError(Exception):
+    pass

 class MissingDestination(Exception):
    def __init__(self, args, src, dest):
@@ -65,6 +69,70 @@ def get_stream_info(client, path):
        return None
    return StreamInfo(client.geturl(), streams[0])

+# Filter processing for a single interval of data.
+def process_numpy_interval(interval, extractor, inserter, warn_rows,
+                           function, args = None):
+    """For the given 'interval' of data, extract data, process it
+    through 'function', and insert the result.
+
+    'extractor' should be a function like NumpyClient.stream_extract_numpy
+    but with the the interval 'start' and 'end' as the only parameters,
+    e.g.:
+       extractor = functools.partial(NumpyClient.stream_extract_numpy,
+                                     src_path, layout = l, maxrows = m)
+
+    'inserter' should be a function like NumpyClient.stream_insert_context
+    but with the interval 'start' and 'end' as the only parameters, e.g.:
+       inserter = functools.partial(NumpyClient.stream_insert_context,
+                                    dest_path)
+
+    If 'warn_rows' is not None, print a warning to stdout when the
+    number of unprocessed rows exceeds this amount.
+
+    See process_numpy for details on 'function' and 'args'.
+    """
+    if args is None:
+        args = []
+
+    with inserter(interval.start, interval.end) as insert_ctx:
+        insert_func = insert_ctx.insert
+        old_array = np.array([])
+        for new_array in extractor(interval.start, interval.end):
+            # If we still had old data left, combine it
+            if old_array.shape[0] != 0:
+                array = np.vstack((old_array, new_array))
+            else:
+                array = new_array
+
+            # Pass the data to the user provided function
+            processed = function(array, interval, args, insert_func, False)
+
+            # Send any pending data that the user function inserted
+            insert_ctx.send()
+
+            # Save the unprocessed parts
+            if processed >= 0:
+                old_array = array[processed:]
+            else:
+                raise Exception(
+                    sprintf("%s return value %s must be >= 0",
+                            str(function), str(processed)))
+
+            # Warn if there's too much data remaining
+            if warn_rows is not None and old_array.shape[0] > warn_rows:
+                printf("warning: %d unprocessed rows in buffer\n",
+                       old_array.shape[0])
+
+        # Last call for this contiguous interval
+        if old_array.shape[0] != 0:
+            processed = function(old_array, interval, args,
+                                 insert_func, True)
+            if processed != old_array.shape[0]:
+                # Truncate the interval we're inserting at the first
+                # unprocessed data point.  This ensures that
+                # we'll not miss any data when we run again later.
+                insert_ctx.update_end(old_array[processed][0])
+
 class Filter(object):

    def __init__(self, parser_description = None):
@@ -134,63 +202,52 @@ class Filter(object):
        self._parser = parser
        return parser

-    def interval_string(self, interval):
-        return sprintf("[ %s -> %s ]",
-                       timestamp_to_human(interval.start),
-                       timestamp_to_human(interval.end))
-
-    def parse_args(self, argv = None):
-        args = self._parser.parse_args(argv)
-
-        if args.dest_url is None:
-            args.dest_url = args.url
-        if args.url != args.dest_url:
+    def set_args(self, url, dest_url, srcpath, destpath, start, end,
+                 parsed_args = None, quiet = True):
+        """Set arguments directly from parameters"""
+        if dest_url is None:
+            dest_url = url
+        if url != dest_url:
            self.interhost = True

-        self._client_src = Client(args.url)
-        self._client_dest = Client(args.dest_url)
+        self._client_src = Client(url)
+        self._client_dest = Client(dest_url)

-        if (not self.interhost) and (args.srcpath == args.destpath):
-            self._parser.error("source and destination path must be different")
+        if (not self.interhost) and (srcpath == destpath):
+            raise ArgumentError("source and destination path must be different")

-        # Open and print info about the streams
-        self.src = get_stream_info(self._client_src, args.srcpath)
+        # Open the streams
+        self.src = get_stream_info(self._client_src, srcpath)
        if not self.src:
-            self._parser.error("source path " + args.srcpath + " not found")
+            raise ArgumentError("source path " + srcpath + " not found")

-        self.dest = get_stream_info(self._client_dest, args.destpath)
+        self.dest = get_stream_info(self._client_dest, destpath)
        if not self.dest:
-            raise MissingDestination(args, self.src,
-                                     StreamInfo(args.dest_url, [args.destpath]))
+            raise MissingDestination(parsed_args, self.src,
+                                     StreamInfo(dest_url, [destpath]))

-        print "Source:", self.src.string(self.interhost)
-        print "  Dest:", self.dest.string(self.interhost)
+        self.start = start
+        self.end = end

-        if args.dry_run:
-            for interval in self.intervals():
-                print self.interval_string(interval)
-            raise SystemExit(0)
+        # Print info
+        if not quiet:
+            print "Source:", self.src.string(self.interhost)
+            print "  Dest:", self.dest.string(self.interhost)
+
+    def parse_args(self, argv = None):
+        """Parse arguments from a command line"""
+        args = self._parser.parse_args(argv)
+
+        self.set_args(args.url, args.dest_url, args.srcpath, args.destpath,
+                      args.start, args.end, quiet = False, parsed_args = args)

        self.force_metadata = args.force_metadata
-
-        self.start = args.start
-        self.end = args.end
-
+        if args.dry_run:
+            for interval in self.intervals():
+                print interval.human_string()
+            raise SystemExit(0)
        return args

-    def _optimize_int(self, it):
-        """Join and yield adjacent intervals from the iterator 'it'"""
-        saved_int = None
-        for interval in it:
-            if saved_int is not None:
-                if saved_int.end == interval.start:
-                    interval.start = saved_int.start
-                else:
-                    yield saved_int
-            saved_int = interval
-        if saved_int is not None:
-            yield saved_int
-
    def intervals(self):
        """Generate all the intervals that this filter should process"""
        self._using_client = True
@@ -217,12 +274,13 @@ class Filter(object):
                              self.src.path, diffpath = self.dest.path,
                              start = self.start, end = self.end) )
        # Optimize intervals: join intervals that are adjacent
-        for interval in self._optimize_int(intervals):
+        for interval in nilmdb.utils.interval.optimize(intervals):
            yield interval
        self._using_client = False

    # Misc helpers
-    def arg_time(self, toparse):
+    @staticmethod
+    def arg_time(toparse):
        """Parse a time string argument"""
        try:
            return nilmdb.utils.time.parse_time(toparse)
@@ -236,8 +294,14 @@ class Filter(object):
        metadata = self._client_dest.stream_get_metadata(self.dest.path)
        if not self.force_metadata:
            for key in data:
-                wanted = str(data[key])
+                wanted = data[key]
+                if not isinstance(wanted, basestring):
+                    wanted = str(wanted)
                val = metadata.get(key, wanted)
+                # Force UTF-8 encoding for comparison and display
+                wanted = wanted.encode('utf-8')
+                val = val.encode('utf-8')
+                key = key.encode('utf-8')
                if val != wanted and self.dest.rows > 0:
                    m =  "Metadata in destination stream:\n"
                    m += "  %s = %s\n" % (key, val)
@@ -252,14 +316,21 @@ class Filter(object):
        self._client_dest.stream_update_metadata(self.dest.path, data)

    # The main filter processing method.
-    def process_numpy(self, function, args = None, rows = 100000):
-        """For all intervals that exist in self.src but don't exist in
-        self.dest, call 'function' with a Numpy array corresponding to
-        the data.  The data is converted to a Numpy array in chunks of
-        'rows' rows at a time.
+    def process_numpy(self, function, args = None, rows = 100000,
+                      intervals = None):
+        """Calls process_numpy_interval for each interval that currently
+        exists in self.src, but doesn't exist in self.dest.  It will
+        process the data in chunks as follows:
+
+        For each chunk of data, call 'function' with a Numpy array
+        corresponding to the data.  The data is converted to a Numpy
+        array in chunks of 'rows' rows at a time.
+
+        If 'intervals' is not None, process those intervals instead of
+        the default list.

        'function' should be defined as:
-           def function(data, interval, args, insert_func, final)
+        # def function(data, interval, args, insert_func, final)

        'data': array of data to process -- may be empty

@@ -283,56 +354,18 @@ class Filter(object):
        being inserted will be ended at the timestamp of the first
        unprocessed data point.
        """
-        if args is None:
-            args = []
        extractor = NumpyClient(self.src.url).stream_extract_numpy
        inserter = NumpyClient(self.dest.url).stream_insert_numpy_context

-        for interval in self.intervals():
-            print "Processing", self.interval_string(interval)
-            with inserter(self.dest.path,
-                          interval.start, interval.end) as insert_ctx:
-                insert_function = insert_ctx.insert
-                old_array = np.array([])
-                for new_array in extractor(self.src.path,
-                                           interval.start, interval.end,
+        extractor_func = functools.partial(extractor, self.src.path,
                                           layout = self.src.layout,
-                                           maxrows = rows):
-                    # If we still had old data left, combine it
-                    if old_array.shape[0] != 0:
-                        array = np.vstack((old_array, new_array))
-                    else:
-                        array = new_array
+                                           maxrows = rows)
+        inserter_func = functools.partial(inserter, self.dest.path)

-                    # Pass it to the process function
-                    processed = function(array, interval, args,
-                                         insert_function, False)
-
-                    # Send any pending data
-                    insert_ctx.send()
-
-                    # Save the unprocessed parts
-                    if processed >= 0:
-                        old_array = array[processed:]
-                    else:
-                        raise Exception(
-                            sprintf("%s return value %s must be >= 0",
-                                    str(function), str(processed)))
-
-                    # Warn if there's too much data remaining
-                    if old_array.shape[0] > 3 * rows:
-                        printf("warning: %d unprocessed rows in buffer\n",
-                               old_array.shape[0])
-
-                # Last call for this contiguous interval
-                if old_array.shape[0] != 0:
-                    processed = function(old_array, interval, args,
-                                         insert_function, True)
-                    if processed != old_array.shape[0]:
-                        # Truncate the interval we're inserting at the first
-                        # unprocessed data point.  This ensures that
-                        # we'll not miss any data when we run again later.
-                        insert_ctx.update_end(old_array[processed][0])
+        for interval in (intervals or self.intervals()):
+            print "Processing", interval.human_string()
+            process_numpy_interval(interval, extractor_func, inserter_func,
+                                   rows * 3, function, args)

 def main(argv = None):
    # This is just a dummy function; actual filters can use the other
@@ -341,7 +374,7 @@ def main(argv = None):
    parser = f.setup_parser()
    args = f.parse_args(argv)
    for i in f.intervals():
-        print "Generic filter: need to handle", f.interval_string(i)
+        print "Generic filter: need to handle", i.human_string()

 if __name__ == "__main__":
    main()
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
@@ -53,7 +53,8 @@ def parse_args(argv = None):
      is stepped forward to match 'clock'.

    - If 'data' is running ahead, there is overlap in the data, and an
-      error is raised.
+      error is raised.  If '--ignore' is specified, the current file
+      is skipped instead of raising an error.
    """))
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -61,6 +62,8 @@ def parse_args(argv = None):
    group = parser.add_argument_group("Misc options")
    group.add_argument("-D", "--dry-run", action="store_true",
                       help="Parse files, but don't insert any data")
+    group.add_argument("-s", "--skip", action="store_true",
+                       help="Skip files if the data would overlap")
    group.add_argument("-m", "--max-gap", action="store", default=10.0,
                       metavar="SEC", type=float,
                       help="Max discrepency between clock and data "
@@ -235,6 +238,10 @@ def main(argv = None):
                                      "is %s but clock time is only %s",
                                      timestamp_to_human(data_ts),
                                      timestamp_to_human(clock_ts))
+                        if args.skip:
+                            printf("%s\n", err)
+                            printf("Skipping the remainder of this file\n")
+                            break
                        raise ParseError(filename, err)

                    if (data_ts + max_gap) < clock_ts:
--- a/nilmtools/median.py
+++ b/nilmtools/median.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+import nilmtools.filter, scipy.signal
+
+def main(argv = None):
+    f = nilmtools.filter.Filter()
+    parser = f.setup_parser("Median Filter")
+    group = parser.add_argument_group("Median filter options")
+    group.add_argument("-z", "--size", action="store", type=int, default=25,
+                       help = "median filter size (default %(default)s)")
+    group.add_argument("-d", "--difference", action="store_true",
+                       help = "store difference rather than filtered values")
+
+    try:
+        args = f.parse_args(argv)
+    except nilmtools.filter.MissingDestination as e:
+        print "Source is %s (%s)" % (e.src.path, e.src.layout)
+        print "Destination %s doesn't exist" % (e.dest.path)
+        print "You could make it with a command like:"
+        print "  nilmtool -u %s create %s %s" % (e.dest.url,
+                                                 e.dest.path, e.src.layout)
+        raise SystemExit(1)
+
+    meta = f.client_src.stream_get_metadata(f.src.path)
+    f.check_dest_metadata({ "median_filter_source": f.src.path,
+                            "median_filter_size": args.size,
+                            "median_filter_difference": repr(args.difference) })
+
+    f.process_numpy(median_filter, args = (args.size, args.difference))
+
+def median_filter(data, interval, args, insert, final):
+    (size, diff) = args
+    (rows, cols) = data.shape
+    for i in range(cols - 1):
+        filtered = scipy.signal.medfilt(data[:, i+1], size)
+        if diff:
+            data[:, i+1] -= filtered
+        else:
+            data[:, i+1] = filtered
+    insert(data)
+    return rows
+
+if __name__ == "__main__":
+    main()
--- a/nilmtools/pipewatch.py
+++ b/nilmtools/pipewatch.py
@@ -0,0 +1,168 @@
+#!/usr/bin/python
+
+import nilmdb.client
+from nilmdb.utils.printf import *
+import nilmdb.utils.lock
+import nilmtools
+
+import time
+import sys
+import os
+import argparse
+import subprocess
+import tempfile
+import threading
+import select
+import signal
+import Queue
+import daemon
+
+def parse_args(argv = None):
+    parser = argparse.ArgumentParser(
+        formatter_class = argparse.ArgumentDefaultsHelpFormatter,
+        version = nilmtools.__version__,
+        description = """\
+    Pipe data from 'generator' to 'consumer'.  This is intended to be
+    executed frequently from cron, and will exit if another copy is
+    already running.  If 'generator' or 'consumer' returns an error,
+    or if 'generator' stops sending data for a while, it will exit.
+
+    Intended for use with ethstream (generator) and nilm-insert
+    (consumer).  Commands are executed through the shell.
+    """)
+    parser.add_argument("-d", "--daemon", action="store_true",
+                        help="Run in background")
+    parser.add_argument("-l", "--lock", metavar="FILENAME", action="store",
+                        default=tempfile.gettempdir() +
+                        "/nilm-pipewatch.lock",
+                        help="Lock file for detecting running instance")
+    parser.add_argument("-t", "--timeout", metavar="SECONDS", action="store",
+                        type=float, default=30,
+                        help="Restart if no output from " +
+                        "generator for this long")
+    group = parser.add_argument_group("commands to execute")
+    group.add_argument("generator", action="store",
+                       help="Data generator (e.g. \"ethstream -r 8000\")")
+    group.add_argument("consumer", action="store",
+                       help="Data consumer (e.g. \"nilm-insert /foo/bar\")")
+    args = parser.parse_args(argv)
+
+    return args
+
+def reader_thread(queue, fd):
+    # Read from a file descriptor, write to queue.
+    try:
+        while True:
+            (r, w, x) = select.select([fd], [], [fd], 0.25)
+            if x:
+                raise Exception # generator died?
+            if not r:
+                # short timeout -- just try again.  This is to catch the
+                # fd being closed elsewhere, which is only detected
+                # when select restarts.
+                continue
+            data = os.read(fd, 65536)
+            if data == "": # generator EOF
+                raise Exception
+            queue.put(data)
+    except Exception:
+        queue.put(None)
+
+def watcher_thread(queue, procs):
+    # Put None in the queue if either process dies
+    while True:
+        for p in procs:
+            if p.poll() is not None:
+                queue.put(None)
+                return
+        time.sleep(0.25)
+
+def pipewatch(args):
+    # Run the processes, etc
+    with open(os.devnull, "r") as devnull:
+        generator = subprocess.Popen(args.generator, shell = True,
+                                     bufsize = -1, close_fds = True,
+                                     stdin = devnull,
+                                     stdout = subprocess.PIPE,
+                                     stderr = None)
+        consumer = subprocess.Popen(args.consumer, shell = True,
+                                    bufsize = -11, close_fds = True,
+                                    stdin = subprocess.PIPE,
+                                    stdout = None, stderr = None)
+
+        queue = Queue.Queue(maxsize = 32)
+        reader = threading.Thread(target = reader_thread,
+                                  args = (queue, generator.stdout.fileno()))
+        reader.start()
+        watcher = threading.Thread(target = watcher_thread,
+                                   args = (queue, [generator, consumer]))
+        watcher.start()
+        try:
+            while True:
+                try:
+                    data = queue.get(True, args.timeout)
+                    if data is None:
+                        break
+                    consumer.stdin.write(data)
+                except Queue.Empty:
+                    # Timeout: kill the generator
+                    fprintf(sys.stderr, "pipewatch: timeout\n")
+                    generator.terminate()
+                    break
+
+            generator.stdout.close()
+            consumer.stdin.close()
+        except IOError:
+            fprintf(sys.stderr, "pipewatch: I/O error\n")
+
+        def kill(proc):
+            # Wait for a process to end, or kill it
+            def poll_timeout(proc, timeout):
+                for x in range(1+int(timeout / 0.1)):
+                    if proc.poll() is not None:
+                        break
+                    time.sleep(0.1)
+                return proc.poll()
+            try:
+                if poll_timeout(proc, 0.5) is None:
+                    proc.terminate()
+                    if poll_timeout(proc, 0.5) is None:
+                        proc.kill()
+            except OSError:
+                pass
+            return poll_timeout(proc, 0.5)
+
+        # Wait for them to die, or kill them
+        gret = kill(generator)
+        cret = kill(consumer)
+
+        fprintf(sys.stderr, "pipewatch: generator returned %d, " +
+                "consumer returned %d\n", gret, cret)
+        if gret == 0 and cret == 0:
+            sys.exit(0)
+        sys.exit(1)
+
+def main(argv = None):
+    args = parse_args(argv)
+
+    lockfile = open(args.lock, "w")
+    if not nilmdb.utils.lock.exclusive_lock(lockfile):
+        printf("pipewatch process already running (according to %s)\n",
+               args.lock)
+        sys.exit(0)
+    try:
+        # Run as a daemon if requested, otherwise run directly.
+        if args.daemon:
+            with daemon.DaemonContext(files_preserve = [ lockfile ]):
+                pipewatch(args)
+        else:
+            pipewatch(args)
+    finally:
+        # Clean up lockfile
+        try:
+            os.unlink(args.lock)
+        except OSError:
+            pass
+
+if __name__ == "__main__":
+    main()
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -12,6 +12,7 @@ import scipy.fftpack
 import scipy.signal
 #from matplotlib import pyplot as p
 import bisect
+from nilmdb.utils.interval import Interval

 def main(argv = None):
    # Set up argument parser
@@ -80,11 +81,22 @@ def main(argv = None):
    f.check_dest_metadata({ "prep_raw_source": f.src.path,
                            "prep_sinefit_source": sinefit.path,
                            "prep_column": args.column,
-                            "prep_rotation": rotation })
+                            "prep_rotation": repr(rotation) })

-    # Run the processing function on all data
+    # Find the intersection of the usual set of intervals we'd filter,
+    # and the intervals actually present in sinefit data.  This is
+    # what we will process.
+    filter_int = f.intervals()
+    sinefit_int = ( Interval(start, end) for (start, end) in
+                    client_sinefit.stream_intervals(
+                        args.sinepath, start = f.start, end = f.end) )
+    intervals = nilmdb.utils.interval.intersection(filter_int, sinefit_int)
+
+    # Run the process (using the helper in the filter module)
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
-                                     args.nharm, rotation, args.nshift))
+                                     args.nharm, rotation, args.nshift),
+                    intervals = intervals)
+

 def process(data, interval, args, insert_function, final):
    (client, sinefit_path, column, nharm, rotation, nshift) = args
--- a/nilmtools/sinefit.py
+++ b/nilmtools/sinefit.py
@@ -1,13 +1,18 @@
 #!/usr/bin/python

-# Sine wave fitting.  This runs about 5x faster than realtime on raw data.
-
+# Sine wave fitting.
+from nilmdb.utils.printf import *
 import nilmtools.filter
 import nilmdb.client
+from nilmdb.utils.time import (timestamp_to_human,
+                               timestamp_to_seconds,
+                               seconds_to_timestamp)
+
 from numpy import *
 from scipy import *
 #import pylab as p
 import operator
+import sys

 def main(argv = None):
    f = nilmtools.filter.Filter()
@@ -59,12 +64,40 @@ def main(argv = None):
    f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
                                     args.min_freq, args.max_freq))

+class SuppressibleWarning(object):
+    def __init__(self, maxcount = 10, maxsuppress = 100):
+        self.maxcount = maxcount
+        self.maxsuppress = maxsuppress
+        self.count = 0
+        self.last_msg = ""
+
+    def _write(self, sec, msg):
+        if sec:
+            now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
+        else:
+            now = ""
+        sys.stderr.write(now + msg)
+
+    def warn(self, msg, seconds = None):
+        self.count += 1
+        if self.count <= self.maxcount:
+            self._write(seconds, msg)
+        if (self.count - self.maxcount) >= self.maxsuppress:
+            self.reset(seconds)
+
+    def reset(self, seconds = None):
+        if self.count > self.maxcount:
+            self._write(seconds, sprintf("(%d warnings suppressed)\n",
+                                         self.count - self.maxcount))
+        self.count = 0
+
 def process(data, interval, args, insert_function, final):
    (column, f_expected, a_min, f_min, f_max) = args
    rows = data.shape[0]

    # Estimate sampling frequency from timestamps
-    fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
+    fs = (rows-1) / (timestamp_to_seconds(data[-1][0]) -
+                     timestamp_to_seconds(data[0][0]))

    # Pull out about 3.5 periods of data at once;
    # we'll expect to match 3 zero crossings in each window
@@ -74,36 +107,41 @@ def process(data, interval, args, insert_function, final):
    if rows < N:
        return 0

+    warn = SuppressibleWarning(3, 1000)
+
    # Process overlapping windows
    start = 0
    num_zc = 0
+    last_inserted_timestamp = None
    while start < (rows - N):
        this = data[start:start+N, column]
-        t_min = data[start, 0]/1e6
-        t_max = data[start+N-1, 0]/1e6
+        t_min = timestamp_to_seconds(data[start, 0])
+        t_max = timestamp_to_seconds(data[start+N-1, 0])

        # Do 4-parameter sine wave fit
        (A, f0, phi, C) = sfit4(this, fs)

        # Check bounds.  If frequency is too crazy, ignore this window
        if f0 < f_min or f0 > f_max:
-            print "frequency", f0, "outside valid range", f_min, "-", f_max
+            warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
+                              str(f0), str(f_min), str(f_max)), t_min)
            start += N
            continue

        # If amplitude is too low, results are probably just noise
        if A < a_min:
-            print "amplitude", A, "below minimum threshold", a_min
+            warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
+                              str(A), str(a_min)), t_min)
            start += N
            continue

        #p.plot(arange(N), this)
-        #p.plot(arange(N), A * cos(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
+        #p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')

-        # Period starts when the argument of cosine is 3*pi/2 degrees,
+        # Period starts when the argument of sine is 0 degrees,
        # so we're looking for sample number:
-        #     n = (3 * pi / 2 - phi) / (f0/fs * 2 * pi)
-        zc_n = (3 * pi / 2 - phi) / (f0 / fs * 2 * pi)
+        #     n = (0 - phi) / (f0/fs * 2 * pi)
+        zc_n = (0 - phi) / (f0 / fs * 2 * pi)
        period_n = fs/f0

        # Add periods to make N positive
@@ -116,7 +154,13 @@ def process(data, interval, args, insert_function, final):
        while zc_n < (N - period_n/2):
            #p.plot(zc_n, C, 'ro')
            t = t_min + zc_n / fs
-            insert_function([[t * 1e6, f0, A, C]])
+            if (last_inserted_timestamp is None or
+                t > last_inserted_timestamp):
+                insert_function([[seconds_to_timestamp(t), f0, A, C]])
+                last_inserted_timestamp = t
+                warn.reset(t)
+            else:
+                warn.warn("timestamp overlap\n", t)
            num_zc += 1
            last_zc = zc_n
            zc_n += period_n
@@ -134,7 +178,13 @@ def process(data, interval, args, insert_function, final):
        start = int(round(start + advance))

    # Return the number of rows we've processed
-    print "Marked", num_zc, "zero-crossings in", start, "rows"
+    warn.reset(last_inserted_timestamp)
+    if last_inserted_timestamp:
+        now = timestamp_to_human(seconds_to_timestamp(
+            last_inserted_timestamp)) + ": "
+    else:
+        now = ""
+    printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
    return start

 def sfit4(data, fs):
@@ -149,9 +199,9 @@ def sfit4(data, fs):

    Output:
      Parameters [A, f0,  phi, C] to fit the equation
-        x[n] = A * cos(f0/fs * 2 * pi * n + phi) + C
+        x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
      where n is sample number.  Or, as a function of time:
-        x(t) = A * cos(f0 * 2 * pi * t + phi) + C
+        x(t) = A * sin(f0 * 2 * pi * t + phi) + C

    by Jim Paris
    (Verified to match sfit4.m)
@@ -188,12 +238,11 @@ def sfit4(data, fs):
    # if something fails with the least squares fit, etc.
    try:
        # first guess for A0, B0 using 3-parameter fit (step c)
+        s = zeros(3)
        w = 2*pi*f0
-        D = c_[cos(w*t), sin(w*t), ones(N)]
-        s = linalg.lstsq(D, data)[0]

-        # Now iterate 6 times (step i)
-        for idx in range(6):
+        # Now iterate 7 times (step b, plus 6 iterations of step i)
+        for idx in range(7):
            D = c_[cos(w*t), sin(w*t), ones(N),
                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
            s = linalg.lstsq(D, data)[0] # eqn B.18
@@ -202,7 +251,7 @@ def sfit4(data, fs):
        ## Extract results
        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
        f0 = w / (2*pi)
-        phi = -arctan2(s[1], s[0]) # eqn B.22
+        phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
        C = s[2]
        return (A, f0, phi, C)
    except Exception as e:
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -0,0 +1,304 @@
+#!/usr/bin/python
+
+from nilmdb.utils.printf import *
+import nilmdb.client
+import nilmtools.filter
+from nilmdb.utils.time import (timestamp_to_human,
+                               timestamp_to_seconds,
+                               seconds_to_timestamp)
+from nilmdb.utils import datetime_tz
+from nilmdb.utils.interval import Interval
+
+import numpy as np
+import scipy
+import scipy.signal
+from numpy.core.umath_tests import inner1d
+import nilmrun
+from collections import OrderedDict
+import sys
+import time
+import functools
+import collections
+
+class DataError(ValueError):
+    pass
+
+def build_column_mapping(colinfo, streaminfo):
+    """Given the 'columns' list from the JSON data, verify and
+    pull out a dictionary mapping for the column names/numbers."""
+    columns = OrderedDict()
+    for c in colinfo:
+        col_num = c['index'] + 1  # skip timestamp
+        if (c['name'] in columns.keys() or col_num in columns.values()):
+            raise DataError("duplicated columns")
+        if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
+            raise DataError("bad column number")
+        columns[c['name']] = col_num
+    if not len(columns):
+        raise DataError("no columns")
+    return columns
+
+class Exemplar(object):
+    def __init__(self, exinfo, min_rows = 10, max_rows = 100000):
+        """Given a dictionary entry from the 'exemplars' input JSON,
+        verify the stream, columns, etc.  Then, fetch all the data
+        into self.data."""
+
+        self.name = exinfo['name']
+        self.url = exinfo['url']
+        self.stream = exinfo['stream']
+        self.start = exinfo['start']
+        self.end = exinfo['end']
+        self.dest_column = exinfo['dest_column']
+
+        # Get stream info
+        self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
+        self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
+        if not self.info:
+            raise DataError(sprintf("exemplar stream '%s' does not exist " +
+                                    "on server '%s'", self.stream, self.url))
+
+        # Build up name => index mapping for the columns
+        self.columns = build_column_mapping(exinfo['columns'], self.info)
+
+        # Count points
+        self.count = self.client.stream_count(self.stream, self.start, self.end)
+
+        # Verify count
+        if self.count == 0:
+            raise DataError("No data in this exemplar!")
+        if self.count < min_rows:
+            raise DataError("Too few data points: " + str(self.count))
+        if self.count > max_rows:
+            raise DataError("Too many data points: " + str(self.count))
+
+        # Extract the data
+        datagen = self.client.stream_extract_numpy(self.stream,
+                                                   self.start, self.end,
+                                                   self.info.layout,
+                                                   maxrows = self.count)
+        self.data = list(datagen)[0]
+
+        # Extract just the columns that were specified in self.columns,
+        # skipping the timestamp.
+        extract_columns = [ value for (key, value) in self.columns.items() ]
+        self.data = self.data[:,extract_columns]
+
+        # Fix the column indices in e.columns, since we removed/reordered
+        # columns in self.data
+        for n, k in enumerate(self.columns):
+            self.columns[k] = n
+
+        # Subtract the means from each column
+        self.data = self.data - self.data.mean(axis=0)
+
+        # Get scale factors for each column by computing dot product
+        # of each column with itself.
+        self.scale = inner1d(self.data.T, self.data.T)
+
+        # Ensure a minimum (nonzero) scale and convert to list
+        self.scale = np.maximum(self.scale, [1e-9]).tolist()
+
+    def __str__(self):
+        return sprintf("\"%s\" %s [%s] %s rows",
+                       self.name, self.stream, ",".join(self.columns.keys()),
+                       self.count)
+
+def peak_detect(data, delta):
+    """Simple min/max peak detection algorithm, taken from my code
+    in the disagg.m from the 10-8-5 paper"""
+    mins = [];
+    maxs = [];
+    cur_min = (None, np.inf)
+    cur_max = (None, -np.inf)
+    lookformax = False
+    for (n, p) in enumerate(data):
+        if p > cur_max[1]:
+            cur_max = (n, p)
+        if p < cur_min[1]:
+            cur_min = (n, p)
+        if lookformax:
+            if p < (cur_max[1] - delta):
+                maxs.append(cur_max)
+                cur_min = (n, p)
+                lookformax = False
+        else:
+            if p > (cur_min[1] + delta):
+                mins.append(cur_min)
+                cur_max = (n, p)
+                lookformax = True
+    return (mins, maxs)
+
+def timestamp_to_short_human(timestamp):
+    dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
+    return dt.strftime("%H:%M:%S")
+
+def trainola_matcher(data, interval, args, insert_func, final_chunk):
+    """Perform cross-correlation match"""
+    ( src_columns, dest_count, exemplars ) = args
+    nrows = data.shape[0]
+
+    # We want at least 10% more points than the widest exemplar.
+    widest = max([ x.count for x in exemplars ])
+    if (widest * 1.1) > nrows:
+        return 0
+
+    # This is how many points we'll consider valid in the
+    # cross-correlation.
+    valid = nrows + 1 - widest
+    matches = collections.defaultdict(list)
+
+    # Try matching against each of the exemplars
+    for e in exemplars:
+        corrs = []
+
+        # Compute cross-correlation for each column
+        for col_name in e.columns:
+            a = data[:, src_columns[col_name]]
+            b = e.data[:, e.columns[col_name]]
+            corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]
+
+            # Scale by the norm of the exemplar
+            corr = corr / e.scale[e.columns[col_name]]
+            corrs.append(corr)
+
+        # Find the peaks using the column with the largest amplitude
+        biggest = e.scale.index(max(e.scale))
+        peaks_minmax = peak_detect(corrs[biggest], 0.1)
+        peaks = [ p[0] for p in peaks_minmax[1] ]
+
+        # Now look at every peak
+        for row in peaks:
+            # Correlation for each column must be close enough to 1.
+            for (corr, scale) in zip(corrs, e.scale):
+                # The accepted distance from 1 is based on the relative
+                # amplitude of the column.  Use a linear mapping:
+                # scale 1.0 -> distance 0.1
+                # scale 0.0 -> distance 1.0
+                distance = 1 - 0.9 * (scale / e.scale[biggest])
+                if abs(corr[row] - 1) > distance:
+                    # No match
+                    break
+            else:
+                # Successful match
+                matches[row].append(e)
+
+    # Insert matches into destination stream.
+    matched_rows = sorted(matches.keys())
+    out = np.zeros((len(matched_rows), dest_count + 1))
+
+    for n, row in enumerate(matched_rows):
+        # Fill timestamp
+        out[n][0] = data[row, 0]
+
+        # Mark matched exemplars
+        for exemplar in matches[row]:
+            out[n, exemplar.dest_column + 1] = 1.0
+
+    # Insert it
+    insert_func(out)
+
+    # Return how many rows we processed
+    valid = max(valid, 0)
+    printf("  [%s] matched %d exemplars in %d rows\n",
+           timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
+    return valid
+
+def trainola(conf):
+    print "Trainola", nilmtools.__version__
+
+    # Load main stream data
+    url = conf['url']
+    src_path = conf['stream']
+    dest_path = conf['dest_stream']
+    start = conf['start']
+    end = conf['end']
+
+    # Get info for the src and dest streams
+    src_client = nilmdb.client.numpyclient.NumpyClient(url)
+    src = nilmtools.filter.get_stream_info(src_client, src_path)
+    if not src:
+        raise DataError("source path '" + src_path + "' does not exist")
+    src_columns = build_column_mapping(conf['columns'], src)
+
+    dest_client = nilmdb.client.numpyclient.NumpyClient(url)
+    dest = nilmtools.filter.get_stream_info(dest_client, dest_path)
+    if not dest:
+        raise DataError("destination path '" + dest_path + "' does not exist")
+
+    printf("Source:\n")
+    printf("  %s [%s]\n", src.path, ",".join(src_columns.keys()))
+    printf("Destination:\n")
+    printf("  %s (%s columns)\n", dest.path, dest.layout_count)
+
+    # Pull in the exemplar data
+    exemplars = []
+    for n, exinfo in enumerate(conf['exemplars']):
+        printf("Loading exemplar %d:\n", n)
+        e = Exemplar(exinfo)
+        col = e.dest_column
+        if col < 0 or col >= dest.layout_count:
+            raise DataError(sprintf("bad destination column number %d\n" +
+                                    "dest stream only has 0 through %d",
+                                    col, dest.layout_count - 1))
+        printf("  %s, output column %d\n", str(e), col)
+        exemplars.append(e)
+    if len(exemplars) == 0:
+        raise DataError("missing exemplars")
+
+    # Verify that the exemplar columns are all represented in the main data
+    for n, ex in enumerate(exemplars):
+        for col in ex.columns:
+            if col not in src_columns:
+                raise DataError(sprintf("Exemplar %d column %s is not "
+                                        "available in source data", n, col))
+
+    # Figure out which intervals we should process
+    intervals = ( Interval(s, e) for (s, e) in
+                  src_client.stream_intervals(src_path,
+                                              diffpath = dest_path,
+                                              start = start, end = end) )
+    intervals = nilmdb.utils.interval.optimize(intervals)
+
+    # Do the processing
+    rows = 100000
+    extractor = functools.partial(src_client.stream_extract_numpy,
+                                  src.path, layout = src.layout, maxrows = rows)
+    inserter = functools.partial(dest_client.stream_insert_numpy_context,
+                                 dest.path)
+    start = time.time()
+    processed_time = 0
+    printf("Processing intervals:\n")
+    for interval in intervals:
+        printf("%s\n", interval.human_string())
+        nilmtools.filter.process_numpy_interval(
+            interval, extractor, inserter, rows * 3,
+            trainola_matcher, (src_columns, dest.layout_count, exemplars))
+        processed_time += (timestamp_to_seconds(interval.end) -
+                           timestamp_to_seconds(interval.start))
+    elapsed = max(time.time() - start, 1e-3)
+
+    printf("Done. Processed %.2f seconds per second.\n",
+           processed_time / elapsed)
+
+def main(argv = None):
+    import simplejson as json
+    import sys
+
+    if argv is None:
+        argv = sys.argv[1:]
+    if len(argv) != 1:
+        raise DataError("need one argument, either a dictionary or JSON string")
+
+    try:
+        # Passed in a JSON string (e.g. on the command line)
+        conf = json.loads(argv[0])
+    except TypeError as e:
+        # Passed in the config dictionary (e.g. from NilmRun)
+        conf = argv[0]
+
+    return trainola(conf)
+
+if __name__ == "__main__":
+    main()
+
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ except ImportError:
 # Versioneer manages version numbers from git tags.
 # https://github.com/warner/python-versioneer
 import versioneer
-versioneer.versionfile_source = 'src/_version.py'
+versioneer.versionfile_source = 'nilmtools/_version.py'
 versioneer.versionfile_build = 'nilmtools/_version.py'
 versioneer.tag_prefix = 'nilmtools-'
 versioneer.parentdir_prefix = 'nilmtools-'
@@ -61,14 +61,14 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.6.0',
+      install_requires = [ 'nilmdb >= 1.8.5',
                           'numpy',
                           'scipy',
-                           'matplotlib',
+                           'python-daemon >= 1.5',
+                           #'matplotlib',
                           ],
      packages = [ 'nilmtools',
                   ],
-      package_dir = { 'nilmtools': 'src' },
      entry_points = {
          'console_scripts': [
              'nilm-decimate = nilmtools.decimate:main',
@@ -79,6 +79,9 @@ setup(name='nilmtools',
              'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
              'nilm-sinefit = nilmtools.sinefit:main',
              'nilm-cleanup = nilmtools.cleanup:main',
+              'nilm-median = nilmtools.median:main',
+              'nilm-trainola = nilmtools.trainola:main',
+              'nilm-pipewatch = nilmtools.pipewatch:main',
              ],
          },
      zip_safe = False,
Author	SHA1	Message	Date
Jim Paris	a4d4bc22fc	Add --skip option to nilm-insert	2013-07-30 18:25:47 -04:00
Jim Paris	6090dd6112	prep: only process intervals present in both raw & sinefit	2013-07-30 14:55:06 -04:00
Sharon NILM	9c0d9ad324	Sample scripts from Sharon	2013-07-29 18:37:55 -04:00
Sharon NILM	8b9c5d4898	Fix daemon dependency	2013-07-29 17:40:51 -04:00
Jim Paris	cf2c28b0fb	Add --daemon flag	2013-07-29 17:16:18 -04:00
Jim Paris	87a26c907b	Watch for process termination too	2013-07-29 15:08:49 -04:00
Jim Paris	def465b57c	Improve pipewatch; add nilm-pipewatch script	2013-07-29 14:58:15 -04:00
Jim Paris	0589b8d316	start of pipewatch util	2013-07-29 14:10:56 -04:00
Jim Paris	9c5f07106d	Don't need python-pip	2013-07-20 16:15:29 -04:00
Jim Paris	62e11a11c0	Fix issue with column ordering in the exemplars If the max scale in the exemplar was a column we weren't using, it would bail out when looking for that correlation later. Change things around so exemplars in RAM only keep around the columns we care about.	2013-07-18 22:51:27 -04:00
Jim Paris	2bdcee2c36	More helpful error if exemplar stream doesn't exist	2013-07-15 15:19:52 -04:00
Jim Paris	6dce8c5296	More output	2013-07-11 18:56:53 -04:00
Jim Paris	25c35a56f6	Trainola inserts into the destination stream now	2013-07-10 12:59:39 -04:00
Jim Paris	d610deaef0	More trainola work	2013-07-10 11:38:32 -04:00
Jim Paris	d7d5ccc9a7	More filter cleanup	2013-07-09 19:27:20 -04:00
Jim Paris	f28753ff5c	Move process_numpy_interval outside the class	2013-07-09 18:40:49 -04:00
Jim Paris	c9c2e0d5a8	Improve split between process_numpy and process_numpy_interval	2013-07-09 18:09:05 -04:00
Jim Paris	5a2a32bec5	WIP on trainola improvements	2013-07-09 17:56:26 -04:00
Jim Paris	706c3933f9	Add trainola from nilmrun	2013-07-09 17:55:57 -04:00
Jim Paris	cfd1719152	Use nilmdb.utils.interval.optimize; bump nilmdb min version	2013-07-09 17:53:04 -04:00
Jim Paris	c62fb45980	Makefile cleanup; add nilm-trainola binary	2013-07-09 16:53:47 -04:00
Jim Paris	57d856f2fa	Split filter.py internals up a little more This makes it easier to use the filter stuff from other code, but it's also turning it into more of a spaghetti nightmare. Might not be worth continuing down this path.	2013-07-09 16:52:00 -04:00
Jim Paris	5d83d93019	Rename src/ directory to nilmtools/	2013-07-08 11:54:13 -04:00
Jim Paris	5f847a0513	Split process_numpy innards process_numpy_interval	2013-07-03 12:07:22 -04:00
Jim Paris	29cd7eb6c7	Improve test_prep target in Makefile	2013-07-03 12:06:50 -04:00
Jim Paris	62c8af41ea	Cleanup comments	2013-06-06 15:34:23 -04:00
Jim Paris	4f6bc48619	sinefit: include timestamps on marking output too	2013-05-11 11:00:31 -04:00
Jim Paris	cf9eb0ed48	Improve sinefit resiliancy	2013-05-10 14:19:55 -04:00
Jim Paris	32066fc260	Remove hard matplotlib dependency	2013-05-09 13:17:36 -04:00
Jim Paris	739da3f973	Add median filter	2013-05-08 23:36:50 -04:00
Jim Paris	83ad18ebf6	Fix non-string arguments to metadata_check	2013-05-08 12:49:38 -04:00
Jim Paris	c76d527f95	Fix unicode handling in filter metadata match	2013-05-07 12:40:53 -04:00
Jim Paris	b8a73278e7	Always store metadata rotation as a string	2013-04-29 14:25:11 -04:00
Jim Paris	ce0691d6c4	sineefit: Change sfit4 to fit to \sin instead of \cos And adjust the period locator accordingly. Fitting \sin is the same mathematically, it's just conceptually more straightforward since we're locating zero crossings anyway.	2013-04-27 18:12:20 -04:00
Jim Paris	4da658e960	sinefit: move initial estimate into the main iteration loop Just a little less code. Same results.	2013-04-27 17:50:23 -04:00