Allow shorthand method for creating an option-less parser.

This is mostly just intended to make a simple filter example shorter.
Force fs to be a float in sfit4
2013-04-21 16:53:28 -04:00 · 2013-04-17 17:58:15 -04:00 · 2013-04-11 11:55:11 -04:00 · 2013-04-11 11:53:17 -04:00 · 2013-04-10 18:38:41 -04:00 · 2013-04-10 17:09:52 -04:00
12 changed files with 681 additions and 255 deletions
--- a/28
+++ b/28
@@ -8,18 +8,32 @@ else
 	@echo "Try 'make install'"
 endif

-test:
+test: test_cleanup
+
+test_cleanup:
+	src/cleanup.py -e extras/cleanup.cfg
+	src/cleanup.py extras/cleanup.cfg
+
+test_insert:
+	@make install >/dev/null
+	src/insert.py --file --dry-run  /test/foo </dev/null
+
+test_copy:
 	@make install >/dev/null
 	src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*

 test_prep:
 	@make install >/dev/null
-	src/prep.py -c 3 \
-		/lees-compressor/no-leak/raw \
-		/lees-compressor/no-leak/sinefit \
-		/lees-compressor/no-leak/prep \
-	-s '2013-02-19 18:00:00' \
-	-r 0
+	-nilmtool destroy -R /test/raw
+	-nilmtool destroy -R /test/sinefit
+	-nilmtool destroy -R /test/prep
+	nilmtool create /test/raw float32_2
+	nilmtool create /test/sinefit float32_3
+	nilmtool create /test/prep float32_8
+	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
+	src/sinefit.py -c 1 /test/raw /test/sinefit
+	src/prep.py -c 2 /test/raw /test/sinefit /test/prep
+	nilmtool extract -s min -e max /test/prep | head -20

 test_decimate:
 	-@nilmtool destroy /lees-compressor/no-leak/raw/4 || true
--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,7 @@ Prerequisites:
  sudo apt-get install python2.7 python2.7-dev python-setuptools
  sudo apt-get install python-numpy python-scipy python-matplotlib

-  nilmdb (1.3.1+)
+  nilmdb (1.5.0+)

 Install:

--- a/extras/cleanup.cfg
+++ b/extras/cleanup.cfg
@@ -0,0 +1,22 @@
+[/lees-compressor/no-leak/prep]
+keep = 2d
+rate = 60
+
+[*/raw]
+keep = 2d
+
+[*/something]
+rate = 10
+
+[*/sinefit]
+keep = 1w
+decimated = False
+
+[/test/raw]
+keep = 0.01d
+
+[/test/sinefit]
+keep = 0.01d
+
+[/test/prep]
+keep = 0.01d
--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.4.6',
+      install_requires = [ 'nilmdb >= 1.6.0',
                           'numpy',
                           'scipy',
                           'matplotlib',
@@ -75,8 +75,10 @@ setup(name='nilmtools',
              'nilm-decimate-auto = nilmtools.decimate_auto:main',
              'nilm-insert = nilmtools.insert:main',
              'nilm-copy = nilmtools.copy_one:main',
+              'nilm-prep = nilmtools.prep:main',
              'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
              'nilm-sinefit = nilmtools.sinefit:main',
+              'nilm-cleanup = nilmtools.cleanup:main',
              ],
          },
      zip_safe = False,
--- a/src/cleanup.py
+++ b/src/cleanup.py
@@ -0,0 +1,257 @@
+#!/usr/bin/python
+
+from nilmdb.utils.printf import *
+from nilmdb.utils.time import (parse_time, timestamp_to_human,
+                               timestamp_to_seconds, seconds_to_timestamp)
+from nilmdb.utils.diskusage import human_size
+from nilmdb.utils.interval import Interval
+import nilmdb.client
+import nilmdb.client.numpyclient
+import nilmtools
+import argparse
+import ConfigParser
+import sys
+import collections
+import fnmatch
+import re
+
+def warn(msg, *args):
+    fprintf(sys.stderr, "warning: " + msg + "\n", *args)
+
+class TimePeriod(object):
+    _units = { 'h': ('hour',  60*60),
+               'd': ('day',   60*60*24),
+               'w': ('week',  60*60*24*7),
+               'm': ('month', 60*60*24*30),
+               'y': ('year',  60*60*24*365) }
+
+    def __init__(self, val):
+        for u in self._units:
+            if val.endswith(u):
+                self.unit = self._units[u][0]
+                self.scale = self._units[u][1]
+                self.count = float(val[:-len(u)])
+                break
+        else:
+            raise ValueError("unknown units: " + units)
+
+    def seconds(self):
+        return self.count * self.scale
+
+    def describe_seconds(self, seconds):
+        count = seconds / self.scale
+        units = self.unit if count == 1 else (self.unit + "s")
+        if count == int(count):
+            return sprintf("%d %s", count, units)
+        else:
+            return sprintf("%.2f %s", count, units)
+
+    def __str__(self):
+        return self.describe_seconds(self.seconds())
+
+class StreamCleanupConfig(object):
+    def __init__(self, info):
+        self.path = info[0]
+        self.layout = info[1]
+        if info[4] != 0 and info[5] != 0:
+            self.rate = info[4] / timestamp_to_seconds(info[5])
+        else:
+            self.rate = None
+        self.keep = None
+        self.clean_decimated = True
+        self.decimated_from = None
+        self.also_clean_paths = []
+
+def main(argv = None):
+    parser = argparse.ArgumentParser(
+        formatter_class = argparse.RawDescriptionHelpFormatter,
+        version = nilmtools.__version__,
+        description = """\
+    Clean up old data from streams using a configuration file to specify
+    which data to remove.
+
+    The format of the config file is as follows:
+
+      [/stream/path]
+      keep = 3w         # keep up to 3 weeks of data
+      rate = 8000       # optional, used for the --estimate option
+      decimated = false # whether to delete decimated data too (default true)
+
+      [*/prep]
+      keep = 3.5m       # or 2520h or 105d or 15w or 0.29y
+
+    The suffix for 'keep' is 'h' for hours, 'd' for days, 'w' for weeks,
+    'm' for months, or 'y' for years.
+
+    Streams paths may include wildcards.  If a path is matched by more than
+    one config section, data from the last config section counts.
+
+    Decimated streams (paths containing '~decim-') are treated specially:
+      - They don't match wildcards
+      - When deleting data from a parent stream, data is also deleted
+        from its decimated streams, unless decimated=false
+
+    Rate is optional and is only used for the --estimate option.
+    """)
+    parser.add_argument("-u", "--url", action="store",
+                        default="http://localhost/nilmdb/",
+                        help="NilmDB server URL (default: %(default)s)")
+    parser.add_argument("-y", "--yes", action="store_true",
+                        default = False,
+                        help="Actually remove the data (default: no)")
+    parser.add_argument("-e", "--estimate", action="store_true",
+                        default = False,
+                        help="Estimate how much disk space will be used")
+    parser.add_argument("configfile", type=argparse.FileType('r'),
+                        help="Configuration file")
+    args = parser.parse_args(argv)
+
+    # Parse config file
+    config = ConfigParser.RawConfigParser()
+    config.readfp(args.configfile)
+
+    # List all streams
+    client = nilmdb.client.Client(args.url)
+    streamlist = client.stream_list(extended = True)
+
+    # Create config objects
+    streams = collections.OrderedDict()
+    for s in streamlist:
+        streams[s[0]] = StreamCleanupConfig(s)
+        m = re.search(r"^(.*)~decim-[0-9]+$", s[0])
+        if m:
+            streams[s[0]].decimated_from = m.group(1)
+
+    # Build up configuration
+    for section in config.sections():
+        matched = False
+        for path in streams.iterkeys():
+            # Decimated streams only allow exact matches
+            if streams[path].decimated_from and path != section:
+                continue
+            if not fnmatch.fnmatch(path, section):
+                continue
+            matched = True
+            options = config.options(section)
+
+            # Keep period (days, weeks, months, years)
+            if 'keep' in options:
+                streams[path].keep = TimePeriod(config.get(section, 'keep'))
+                options.remove('keep')
+
+            # Rate
+            if 'rate' in options:
+                streams[path].rate = config.getfloat(section, 'rate')
+                options.remove('rate')
+
+            # Decimated
+            if 'decimated' in options:
+                val = config.getboolean(section, 'decimated')
+                streams[path].clean_decimated = val
+                options.remove('decimated')
+
+            for leftover in options:
+                warn("option '%s' for '%s' is unknown", leftover, section)
+
+        if not matched:
+            warn("config for '%s' did not match any existing streams", section)
+
+    # List all decimated streams in the parent stream's info
+    for path in streams.keys():
+        src = streams[path].decimated_from
+        if src and src in streams:
+            if streams[src].clean_decimated:
+                streams[src].also_clean_paths.append(path)
+                del streams[path]
+
+    # Warn about streams that aren't getting cleaned up
+    for path in streams.keys():
+        if streams[path].keep is None or streams[path].keep.seconds() < 0:
+            warn("no config for existing stream '%s'", path)
+            del streams[path]
+
+    if args.estimate:
+        # Estimate disk usage
+        total = 0
+        for path in streams.keys():
+            rate = streams[path].rate
+            if not rate or rate < 0:
+                warn("unable to estimate disk usage for stream '%s' because "
+                     "the data rate is unknown", path)
+                continue
+            printf("%s:\n", path)
+            layout = streams[path].layout
+            dtype = nilmdb.client.numpyclient.layout_to_dtype(layout)
+            per_row = dtype.itemsize
+            per_sec = per_row * rate
+            printf("%17s: %s per row, %s rows per second\n",
+                   "base rate",
+                   human_size(per_row),
+                   round(rate,1))
+            printf("%17s: %s per hour, %s per day\n",
+                   "base size",
+                   human_size(per_sec * 3600),
+                   human_size(per_sec * 3600 * 24))
+
+            # If we'll be cleaning up decimated data, add an
+            # estimation for how much room decimated data takes up.
+            if streams[path].clean_decimated:
+                d_layout = "float32_" + str(3*(int(layout.split('_')[1])))
+                d_dtype = nilmdb.client.numpyclient.layout_to_dtype(d_layout)
+                # Assume the decimations will be a factor of 4
+                # sum_{k=0..inf} (rate / (n^k)) * d_dtype.itemsize
+                d_per_row = d_dtype.itemsize
+                factor = 4.0
+                d_per_sec = d_per_row * (rate / factor) * (1 / (1 - (1/factor)))
+                per_sec += d_per_sec
+                printf("%17s: %s per hour, %s per day\n",
+                       "with decimation",
+                       human_size(per_sec * 3600),
+                       human_size(per_sec * 3600 * 24))
+
+            keep = per_sec * streams[path].keep.seconds()
+            printf("%17s: %s\n\n",
+                   "keep " + str(streams[path].keep), human_size(keep))
+            total += keep
+        printf("Total estimated disk usage for these streams:\n")
+        printf("  %s\n", human_size(total))
+        raise SystemExit(0)
+
+    # Do the cleanup
+    for path in streams:
+        printf("%s: keep %s\n", path, streams[path].keep)
+
+        # Figure out the earliest timestamp we should keep.
+        intervals = [ Interval(start, end) for (start, end) in
+                      reversed(list(client.stream_intervals(path))) ]
+        total = 0
+        keep = seconds_to_timestamp(streams[path].keep.seconds())
+        for i in intervals:
+            total += i.end - i.start
+            if total <= keep:
+                continue
+            remove_before = i.start + (total - keep)
+            break
+        else:
+            printf("  nothing to do (only %s of data present)\n",
+                   streams[path].keep.describe_seconds(
+                       timestamp_to_seconds(total)))
+            continue
+        printf("  removing data before %s\n", timestamp_to_human(remove_before))
+        # Clean in reverse order.  Since we only use the primary stream and not
+        # the decimated streams to figure out which data to remove, removing
+        # the primary stream last means that we might recover more nicely if
+        # we are interrupted and restarted.
+        clean_paths = list(reversed(streams[path].also_clean_paths)) + [ path ]
+        for p in clean_paths:
+            printf("  removing from %s\n", p)
+            if args.yes:
+                client.stream_remove(p, None, remove_before)
+
+    # All done
+    if not args.yes:
+        printf("Note: specify --yes to actually perform removals\n")
+    return
+
+if __name__ == "__main__":
+    main()
--- a/src/copy_one.py
+++ b/src/copy_one.py
@@ -5,6 +5,7 @@

 import nilmtools.filter
 import nilmdb.client
+from nilmdb.client.numpyclient import NumpyClient
 import numpy as np
 import sys

@@ -27,14 +28,14 @@ def main(argv = None):
    meta = f.client_src.stream_get_metadata(f.src.path)
    f.check_dest_metadata(meta)

-    # Copy all rows of data as ASCII strings
-    extractor = nilmdb.client.Client(f.src.url).stream_extract
-    inserter = nilmdb.client.Client(f.dest.url).stream_insert_context
+    # Copy all rows of data using the faster Numpy interfaces
+    extractor = NumpyClient(f.src.url).stream_extract_numpy
+    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
        print "Processing", f.interval_string(i)
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
-            for row in extractor(f.src.path, i.start, i.end):
-                insert_ctx.insert(row + "\n")
+            for data in extractor(f.src.path, i.start, i.end):
+                insert_ctx.insert(data)

 if __name__ == "__main__":
    main()
--- a/src/decimate.py
+++ b/src/decimate.py
@@ -41,41 +41,45 @@ def main(argv = None):

    # If source is decimated, we have to decimate a bit differently
    if "decimate_source" in f.client_src.stream_get_metadata(args.srcpath):
-        n = f.src.layout_count // 3
-        f.process_python(function = decimate_again, rows = args.factor,
-                         args = (n,))
+        again = True
    else:
-        n = f.src.layout_count
-        f.process_python(function = decimate_first, rows = args.factor,
-                         args = (n,))
+        again = False
+    f.process_numpy(decimate, args = (args.factor, again))

-def decimate_first(data, n):
-    """Decimate original data -- result has 3 times as many columns"""
-    # For this simple calculation, converting to a Numpy array
-    # and doing the math is slower than just doing it directly.
-    rows = iter(data)
-    r_sum = r_min = r_max = rows.next()
-    for row in rows:
-        r_sum = map(operator.add, r_sum, row)
-        r_min = map(min, r_min, row)
-        r_max = map(max, r_max, row)
-    r_mean = [ x / len(data) for x in r_sum ]
-    return [ [ r_mean[0] ] + r_mean[1:] + r_min[1:] + r_max[1:] ]
+def decimate(data, interval, args, insert_function, final):
+    """Decimate data"""
+    (factor, again) = args
+    (n, m) = data.shape

-def decimate_again(data, n):
-    """Decimate already-decimated data -- result has the same number
-    of columns"""
-    rows = iter(data)
-    r = rows.next()
-    r_sum = r[0:(n+1)]
-    r_min = r[(n+1):(2*n+1)]
-    r_max = r[(2*n+1):(3*n+1)]
-    for r in rows:
-        r_sum = map(operator.add, r_sum, r[0:(n+1)])
-        r_min = map(min, r_min, r[(n+1):(2*n+1)])
-        r_max = map(max, r_max, r[(2*n+1):(3*n+1)])
-    r_mean = [ x / len(data) for x in r_sum ]
-    return [ r_mean + r_min + r_max ]
+    # Figure out which columns to use as the source for mean, min, and max,
+    # depending on whether this is the first decimation or we're decimating
+    # again.  Note that we include the timestamp in the means.
+    if again:
+        c = (m - 1) // 3
+        # e.g. c = 3
+        # ts mean1 mean2 mean3 min1 min2 min3 max1 max2 max3
+        mean_col = slice(0, c + 1)
+        min_col = slice(c + 1, 2 * c + 1)
+        max_col = slice(2 * c + 1, 3 * c + 1)
+    else:
+        mean_col = slice(0, m)
+        min_col = slice(1, m)
+        max_col = slice(1, m)
+
+    # Discard extra rows that aren't a multiple of factor
+    n = n // factor * factor
+    data = data[:n,:]
+
+    # Reshape it into 3D so we can process 'factor' rows at a time
+    data = data.reshape(n // factor, factor, m)
+
+    # Fill the result
+    out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
+                 np.min(data[:,:,min_col], axis=1),
+                 np.max(data[:,:,max_col], axis=1) ]
+
+    insert_function(out)
+    return n

 if __name__ == "__main__":
    main()
--- a/src/decimate_auto.py
+++ b/src/decimate_auto.py
@@ -4,15 +4,19 @@ import nilmtools.filter
 import nilmtools.decimate
 import nilmdb.client
 import argparse
+import fnmatch

 def main(argv = None):
    parser = argparse.ArgumentParser(
        formatter_class = argparse.RawDescriptionHelpFormatter,
-        version = "1.0",
+        version = nilmtools.__version__,
        description = """\
    Automatically create multiple decimations from a single source
    stream, continuing until the last decimated level contains fewer
    than 500 points total.
+
+    Wildcards and multiple paths are accepted.  Decimated paths are
+    ignored when matching wildcards.
    """)
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
@@ -23,20 +27,36 @@ def main(argv = None):
                        default = False,
                        help="Force metadata changes if the dest "
                        "doesn't match")
-    parser.add_argument("path", action="store",
+    parser.add_argument("path", action="store", nargs='+',
                        help='Path of base stream')
    args = parser.parse_args(argv)

    # Pull out info about the base stream
    client = nilmdb.client.Client(args.url)

-    info = nilmtools.filter.get_stream_info(client, args.path)
-    if not info:
-        raise Exception("path " + args.path + " not found")
+    # Find list of paths to process
+    streams = [ unicode(s[0]) for s in client.stream_list() ]
+    streams = [ s for s in streams if "~decim-" not in s ]
+    paths = []
+    for path in args.path:
+        new = fnmatch.filter(streams, unicode(path))
+        if not new:
+            print "error: no stream matched path:", path
+            raise SystemExit(1)
+        paths.extend(new)

-    meta = client.stream_get_metadata(args.path)
+    for path in paths:
+        do_decimation(client, args, path)
+
+def do_decimation(client, args, path):
+    print "Decimating", path
+    info = nilmtools.filter.get_stream_info(client, path)
+    if not info:
+        raise Exception("path " + path + " not found")
+
+    meta = client.stream_get_metadata(path)
    if "decimate_source" in meta:
-        print "Stream", args.path, "was decimated from", meta["decimate_source"]
+        print "Stream", path, "was decimated from", meta["decimate_source"]
        print "You need to pass the base stream instead"
        raise SystemExit(1)

@@ -53,7 +73,7 @@ def main(argv = None):
        if info.rows <= 500:
            break
        factor *= args.factor
-        new_path = "%s~decim-%d" % (args.path, factor)
+        new_path = "%s~decim-%d" % (path, factor)

        # Create the stream if needed
        new_info = nilmtools.filter.get_stream_info(client, new_path)
@@ -72,5 +92,7 @@ def main(argv = None):
        # Update info using the newly decimated stream
        info = nilmtools.filter.get_stream_info(client, new_path)

+    return
+
 if __name__ == "__main__":
    main()
--- a/src/filter.py
+++ b/src/filter.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import

 import nilmdb.client
 from nilmdb.client import Client
+from nilmdb.client.numpyclient import NumpyClient
 from nilmdb.utils.printf import *
 from nilmdb.utils.time import (parse_time, timestamp_to_human,
                               timestamp_to_seconds)
@@ -66,7 +67,7 @@ def get_stream_info(client, path):

 class Filter(object):

-    def __init__(self):
+    def __init__(self, parser_description = None):
        self._parser = None
        self._client_src = None
        self._client_dest = None
@@ -77,6 +78,9 @@ class Filter(object):
        self.end = None
        self.interhost = False
        self.force_metadata = False
+        if parser_description is not None:
+            self.setup_parser(parser_description)
+            self.parse_args()

    @property
    def client_src(self):
@@ -247,72 +251,7 @@ class Filter(object):
        # All good -- write the metadata in case it's not already there
        self._client_dest.stream_update_metadata(self.dest.path, data)

-    # Main processing helper
-    def process_python(self, function, rows, args = None, partial = False):
-        """Process data in chunks of 'rows' data at a time.
-
-        This provides data as nested Python lists and expects the same
-        back.
-
-        function: function to process the data
-        rows: maximum number of rows to pass to 'function' at once
-        args: tuple containing extra arguments to pass to 'function'
-        partial: if true, less than 'rows' may be passed to 'function'.
-                 if false, partial data at the end of an interval will
-                 be dropped.
-
-        'function' should be defined like:
-            function(data, *args)
-        It will be passed a list containing up to 'rows' rows of
-        data from the source stream, and any arguments passed in
-        'args'.  It should transform the data as desired, and return a
-        new list of rdata, which will be inserted into the destination
-        stream.
-        """
-        if args is None:
-            args = []
-        extractor = Client(self.src.url).stream_extract
-        inserter = Client(self.dest.url).stream_insert_context
-
-        # Parse input data.  We use homogenous types for now, which
-        # means the timestamp type will be either float or int.
-        if "int" in self.src.layout_type:
-            parser = lambda line: [ int(x) for x in line.split() ]
-        else:
-            parser = lambda line: [ float(x) for x in line.split() ]
-
-        # Format output data.
-        formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
-
-        for interval in self.intervals():
-            print "Processing", self.interval_string(interval)
-            with inserter(self.dest.path,
-                          interval.start, interval.end) as insert_ctx:
-                src_array = []
-                for line in extractor(self.src.path,
-                                      interval.start, interval.end):
-                    # Read in data
-                    src_array.append([ float(x) for x in line.split() ])
-
-                    if len(src_array) == rows:
-                        # Pass through filter function
-                        dest_array = function(src_array, *args)
-
-                        # Write result to destination
-                        out = [ formatter(row) for row in dest_array ]
-                        insert_ctx.insert("".join(out))
-
-                        # Clear source array
-                        src_array = []
-
-                # Take care of partial chunk
-                if len(src_array) and partial:
-                    dest_array = function(src_array, *args)
-                    out = [ formatter(row) for row in dest_array ]
-                    insert_ctx.insert("".join(out))
-
-    # Like process_python, but provides Numpy arrays and allows for
-    # partial processing.
+    # The main filter processing method.
    def process_numpy(self, function, args = None, rows = 100000):
        """For all intervals that exist in self.src but don't exist in
        self.dest, call 'function' with a Numpy array corresponding to
@@ -339,37 +278,26 @@ class Filter(object):
        Return value of 'function' is the number of data rows processed.
        Unprocessed data will be provided again in a subsequent call
        (unless 'final' is True).
+
+        If unprocessed data remains after 'final' is True, the interval
+        being inserted will be ended at the timestamp of the first
+        unprocessed data point.
        """
        if args is None:
            args = []
-        extractor = Client(self.src.url).stream_extract
-        inserter = Client(self.dest.url).stream_insert_context
-
-        # Format output data.
-        formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
-
-        def batch(iterable, size):
-            c = itertools.count()
-            for k, g in itertools.groupby(iterable, lambda x: c.next() // size):
-                yield g
+        extractor = NumpyClient(self.src.url).stream_extract_numpy
+        inserter = NumpyClient(self.dest.url).stream_insert_numpy_context

        for interval in self.intervals():
            print "Processing", self.interval_string(interval)
            with inserter(self.dest.path,
                          interval.start, interval.end) as insert_ctx:
-                def insert_function(array):
-                    s = cStringIO.StringIO()
-                    if len(np.shape(array)) != 2:
-                        raise Exception("array must be 2-dimensional")
-                    np.savetxt(s, array)
-                    insert_ctx.insert(s.getvalue())
-
-                extract = extractor(self.src.path, interval.start, interval.end)
+                insert_function = insert_ctx.insert
                old_array = np.array([])
-                for batched in batch(extract, rows):
-                    # Read in this batch of data
-                    new_array = np.loadtxt(batched)
-
+                for new_array in extractor(self.src.path,
+                                           interval.start, interval.end,
+                                           layout = self.src.layout,
+                                           maxrows = rows):
                    # If we still had old data left, combine it
                    if old_array.shape[0] != 0:
                        array = np.vstack((old_array, new_array))
@@ -398,7 +326,13 @@ class Filter(object):

                # Last call for this contiguous interval
                if old_array.shape[0] != 0:
-                    function(old_array, interval, args, insert_function, True)
+                    processed = function(old_array, interval, args,
+                                         insert_function, True)
+                    if processed != old_array.shape[0]:
+                        # Truncate the interval we're inserting at the first
+                        # unprocessed data point.  This ensures that
+                        # we'll not miss any data when we run again later.
+                        insert_ctx.update_end(old_array[processed][0])

 def main(argv = None):
    # This is just a dummy function; actual filters can use the other
--- a/src/insert.py
+++ b/src/insert.py
@@ -12,6 +12,7 @@ import sys
 import re
 import argparse
 import subprocess
+import textwrap

 class ParseError(Exception):
    def __init__(self, filename, error):
@@ -22,32 +23,103 @@ def parse_args(argv = None):
    parser = argparse.ArgumentParser(
        formatter_class = argparse.RawDescriptionHelpFormatter,
        version = nilmtools.__version__,
-        description = """\
-    Insert data from ethstream, either live (using the system time as a
-    reference) or prerecorded (using comments in the file as a reference).
+        description = textwrap.dedent("""\
+    Insert large amount of data from an external source like ethstream.

-    The data is assumed to have been recorded at the specified rate.
-    Small discrepencies between the accumulated timestamps and the
-    reference time are ignored; larger discrepencies cause gaps to be
-    created in the stream.  Overlapping data returns an error.
-    """)
+    This code tracks two timestamps:
+
+    (1) The 'data' timestamp is the precise timestamp corresponding to
+        a particular row of data, and is the timestamp that gets
+        inserted into the database.  It increases by 'data_delta' for
+        every row of input.
+
+        'data_delta' can come from one of two sources.  If '--delta'
+        is specified, it is pulled from the first column of data.  If
+        '--rate' is specified, 'data_delta' is set to a fixed value of
+        (1 / rate).
+
+    (2) The 'clock' timestamp is the less precise timestamp that gives
+        the absolute time.  It can come from two sources.  If '--live'
+        is specified, it is pulled directly from the system clock.  If
+        '--file' is specified, it is extracted from the input filename
+        every time a new file is opened for read, and from comments
+        that appear in the file.
+
+    Small discrepencies between 'data' and 'clock' are ignored.  If
+    the 'data' timestamp ever differs from the 'clock' timestamp by
+    more than 'max_gap' seconds:
+
+    - If 'data' is running behind, there is a gap in the data, so it
+      is stepped forward to match 'clock'.
+
+    - If 'data' is running ahead, there is overlap in the data, and an
+      error is raised.
+    """))
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost/nilmdb/",
                        help="NilmDB server URL (default: %(default)s)")
-    parser.add_argument("-r", "--rate", action="store", default=8000,
-                        type=float,
-                        help="Data rate in Hz (default: %(default)s)")
-    parser.add_argument("-l", "--live", action="store_true",
-                        help="Live capture; use system time to verify rate")
-    parser.add_argument("path", action="store",
-                        help="Path of stream, e.g. /foo/bar")
-    parser.add_argument("infile", type=argparse.FileType('r'), nargs='*',
-                        default=[sys.stdin],
-                        help="Input files (default: stdin)")
+    group = parser.add_argument_group("Misc options")
+    group.add_argument("-D", "--dry-run", action="store_true",
+                       help="Parse files, but don't insert any data")
+    group.add_argument("-m", "--max-gap", action="store", default=10.0,
+                       metavar="SEC", type=float,
+                       help="Max discrepency between clock and data "
+                       "timestamps (default: %(default)s)")
+
+    group = parser.add_argument_group("Data timestamp delta")
+    exc = group.add_mutually_exclusive_group()
+    exc.add_argument("-r", "--rate", action="store", default=8000.0,
+                     type=float,
+                     help="Data_delta is constant 1/RATE "
+                     "(default: %(default)s Hz)")
+    exc.add_argument("-d", "--delta", action="store_true",
+                     help="Data_delta is the first number in each line")
+
+    group = parser.add_argument_group("Clock timestamp source")
+    exc = group.add_mutually_exclusive_group()
+    exc.add_argument("-l", "--live", action="store_true",
+                     help="Use live system time for clock timestamp")
+    exc.add_argument("-f", "--file", action="store_true", default=True,
+                     help="Use filename or comments for clock timestamp")
+    group.add_argument("-o", "--offset-filename", metavar="SEC",
+                       action="store", default=-3600.0, type=float,
+                       help="Offset to add to filename timestamps "
+                       "(default: %(default)s)")
+    group.add_argument("-O", "--offset-comment", metavar="SEC",
+                       action="store", default=0.0, type=float,
+                       help="Offset to add to comment timestamps "
+                       "(default: %(default)s)")
+
+    group = parser.add_argument_group("Database path")
+    group.add_argument("path", action="store",
+                       help="Path of stream, e.g. /foo/bar")
+
+    group = parser.add_argument_group("Input files")
+    group.add_argument("infile", type=argparse.FileType('r'), nargs='*',
+                       default=[sys.stdin],
+                       help="Input files (default: stdin)")
+
    args = parser.parse_args(argv)

-    printf("Stream path: %s\n", args.path)
-    printf("  Data rate: %s Hz\n", repr(args.rate))
+    printf("     Stream path: %s\n", args.path)
+
+    printf("  Data timestamp: ")
+    if args.delta:
+        printf("delta on each input line\n")
+    else:
+        printf("fixed rate %s Hz\n", repr(args.rate))
+
+    printf(" Clock timestamp: ")
+    if args.live:
+        printf("live system clock\n")
+    else:
+        printf("from filenames and comments\n")
+        printf(" Filename offset: %s seconds\n", repr(args.offset_filename))
+        printf("  Comment offset: %s seconds\n", repr(args.offset_comment))
+
+    printf("         Max gap: %s seconds\n", repr(args.max_gap))
+    if args.dry_run:
+        printf("Dry run (no data will be inserted)\n")

    return args

@@ -56,22 +128,26 @@ def main(argv = None):

    client = nilmdb.client.Client(args.url)

-    # Local copies to save dictionary lookups
-    live = args.live
-
    # data_ts is the timestamp that we'll use for the current line
    data_ts_base = 0
    data_ts_inc = 0
    data_ts_rate = args.rate
+    data_ts_delta = 0
+    def get_data_ts():
+        if args.delta:
+            return data_ts_base + data_ts_delta
+        else:
+            return data_ts_base + rate_to_period(data_ts_rate,
+                                                 data_ts_inc)

    # clock_ts is the imprecise "real" timestamp (from the filename,
-    # comments, or or system clock)
+    # comments, or system clock)
    clock_ts = None

    def print_clock_updated():
-        printf("Clock time updated to %s\n", timestamp_to_human(clock_ts))
+        printf("Clock timestamp updated to %s\n", timestamp_to_human(clock_ts))
        if data_ts_base != 0:
-            diff = data_ts - clock_ts
+            diff = get_data_ts() - clock_ts
            if diff >= 0:
                printf("  (data timestamp ahead by %.6f sec)\n",
                       timestamp_to_seconds(diff))
@@ -79,12 +155,17 @@ def main(argv = None):
                printf("  (data timestamp behind by %.6f sec)\n",
                       timestamp_to_seconds(-diff))

+    offset_filename = seconds_to_timestamp(args.offset_filename)
+    offset_comment = seconds_to_timestamp(args.offset_comment)
+    max_gap = seconds_to_timestamp(args.max_gap)
+
    with client.stream_insert_context(args.path) as stream:
        for f in args.infile:
            filename = f.name
            printf("Processing %s\n", filename)

-            # If the filename ends in .gz, open it with gzcat instead.
+            # If the filename ends in .gz, re-open it with gzip to
+            # decompress.
            if filename.endswith(".gz"):
                p = subprocess.Popen(["gzip", "-dc"],
                                     stdin = f, stdout = subprocess.PIPE)
@@ -95,7 +176,7 @@ def main(argv = None):
                # Subtract 1 hour because files are created at the end
                # of the hour.  Hopefully, we'll be able to use
                # internal comments and this value won't matter anyway.
-                clock_ts = parse_time(filename) - seconds_to_timestamp(3600)
+                clock_ts = parse_time(filename) + offset_filename
                print_clock_updated()
            except ValueError:
                pass
@@ -104,8 +185,15 @@ def main(argv = None):

            # Read each line
            for line in f:
-                data_ts = data_ts_base + rate_to_period(data_ts_rate,
-                                                        data_ts_inc)
+                # Once in a while a line might be truncated, if we're
+                # at the end of a file.  Ignore it, but if we ignore
+                # too many, bail out.
+                if line[-1] != '\n':
+                    truncated_lines += 1
+                    if truncated_lines > 3:
+                        raise ParseError(filename, "too many short lines")
+                    printf("Ignoring short line in %s\n", filename)
+                    continue

                # If no content other than the newline, skip it
                if len(line) <= 1:
@@ -114,20 +202,32 @@ def main(argv = None):
                # If line starts with a comment, look for a timestamp
                if line[0] == '#':
                    try:
-                        clock_ts = parse_time(line[1:])
+                        clock_ts = parse_time(line[1:]) + offset_comment
                        print_clock_updated()
                    except ValueError:
                        pass
                    continue

+                # If --delta mode, increment data_ts_delta by the
+                # delta from the file.
+                if args.delta:
+                    try:
+                        (delta, line) = line.split(None, 1)
+                        data_ts_delta += float(delta)
+                    except ValueError:
+                        raise ParseError(filename, "can't parse delta")
+
+                # Calculate data_ts for this row
+                data_ts = get_data_ts()
+
                # If inserting live, use clock timestamp
-                if live:
+                if args.live:
                    clock_ts = time_now()

                # If we have a real timestamp, compare it to the data
                # timestamp, and make sure things match up.
                if clock_ts is not None:
-                    if (data_ts - seconds_to_timestamp(10)) > clock_ts:
+                    if (data_ts - max_gap) > clock_ts:
                        # Accumulated line timestamps are in the future.
                        # If we were to set data_ts=clock_ts, we'd create
                        # an overlap, so we have to just bail out here.
@@ -137,7 +237,7 @@ def main(argv = None):
                                      timestamp_to_human(clock_ts))
                        raise ParseError(filename, err)

-                    if (data_ts + seconds_to_timestamp(10)) < clock_ts:
+                    if (data_ts + max_gap) < clock_ts:
                        # Accumulated line timetamps are in the past.  We
                        # can just skip some time and leave a gap in the
                        # data.
@@ -148,7 +248,7 @@ def main(argv = None):
                                   timestamp_to_human(clock_ts))
                        stream.finalize()
                        data_ts_base = data_ts = clock_ts
-                        data_ts_inc = 0
+                        data_ts_inc = data_ts_delta = 0

                    # Don't use this clock time anymore until we update it
                    clock_ts = None
@@ -156,21 +256,12 @@ def main(argv = None):
                if data_ts_base == 0:
                    raise ParseError(filename, "No idea what timestamp to use")

-                # This line is legit, so increment timestamp
+                # This line is legit, so increment timestamp (for --rate)
                data_ts_inc += 1

-                # Once in a while a line might be truncated, if we're at
-                # the end of a file.  Ignore it, but if we ignore too many,
-                # bail out.
-                if line[-1] != '\n':
-                    truncated_lines += 1
-                    if truncated_lines > 3:
-                        raise ParseError(filename, "too many short lines")
-                    printf("Ignoring short line in %s\n", filename)
-                    continue
-
                # Insert it
-                stream.insert("%d %s" % (data_ts, line))
+                if not args.dry_run:
+                    stream.insert("%d %s" % (data_ts, line))
    print "Done"

 if __name__ == "__main__":
--- a/src/prep.py
+++ b/src/prep.py
@@ -3,12 +3,14 @@
 # Spectral envelope preprocessor.
 # Requires two streams as input: the original raw data, and sinefit data.

+from nilmdb.utils.printf import *
+from nilmdb.utils.time import timestamp_to_human
 import nilmtools.filter
 import nilmdb.client
 from numpy import *
 import scipy.fftpack
 import scipy.signal
-from matplotlib import pyplot as p
+#from matplotlib import pyplot as p
 import bisect

 def main(argv = None):
@@ -19,12 +21,14 @@ def main(argv = None):
    group.add_argument("-c", "--column", action="store", type=int,
                       help="Column number (first data column is 1)")
    group.add_argument("-n", "--nharm", action="store", type=int, default=4,
-                       help="number of odd harmonics to compute")
+                       help="number of odd harmonics to compute (default 4)")
+    group.add_argument("-N", "--nshift", action="store", type=int, default=1,
+                       help="number of shifted FFTs per period (default 1)")
    exc = group.add_mutually_exclusive_group()
    exc.add_argument("-r", "--rotate", action="store", type=float,
-                     help="rotate FFT output by this many degrees")
+                     help="rotate FFT output by this many degrees (default 0)")
    exc.add_argument("-R", "--rotate-rad", action="store", type=float,
-                     help="rotate FFT output by this many radians")
+                     help="rotate FFT output by this many radians (default 0)")

    group.add_argument("srcpath", action="store",
                       help="Path of raw input, e.g. /foo/raw")
@@ -44,6 +48,10 @@ def main(argv = None):
        print "  nilmtool -u %s create %s %s" % (e.dest.url, e.dest.path, rec)
        raise SystemExit(1)

+    if f.dest.layout_count != args.nharm * 2:
+        print "error: need", args.nharm*2, "columns in destination stream"
+        raise SystemExit(1)
+
    # Check arguments
    if args.column is None or args.column < 1:
        parser.error("need a column number >= 1")
@@ -51,6 +59,9 @@ def main(argv = None):
    if args.nharm < 1 or args.nharm > 32:
        parser.error("number of odd harmonics must be 1-32")

+    if args.nshift < 1:
+        parser.error("number of shifted FFTs must be >= 1")
+
    if args.rotate is not None:
        rotation = args.rotate * 2.0 * pi / 360.0
    else:
@@ -68,58 +79,100 @@ def main(argv = None):
    # Check and set metadata in prep stream
    f.check_dest_metadata({ "prep_raw_source": f.src.path,
                            "prep_sinefit_source": sinefit.path,
-                            "prep_column": args.column })
+                            "prep_column": args.column,
+                            "prep_rotation": rotation })

    # Run the processing function on all data
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
-                                     args.nharm, rotation))
+                                     args.nharm, rotation, args.nshift))

 def process(data, interval, args, insert_function, final):
-    (client, sinefit_path, column, nharm, rotation) = args
+    (client, sinefit_path, column, nharm, rotation, nshift) = args
    rows = data.shape[0]
    data_timestamps = data[:,0]

+    if rows < 2:
+        return 0
+
+    last_inserted = [nilmdb.utils.time.min_timestamp]
+    def insert_if_nonoverlapping(data):
+        """Call insert_function to insert data, but only if this
+        data doesn't overlap with other data that we inserted."""
+        if data[0][0] <= last_inserted[0]:
+            return
+        last_inserted[0] = data[-1][0]
+        insert_function(data)
+
    processed = 0
    out = zeros((1, nharm * 2 + 1))
    # Pull out sinefit data for the entire time range of this block
    for sinefit_line in client.stream_extract(sinefit_path,
                                              data[0, 0], data[rows-1, 0]):
-        # Extract sinefit data to get zero crossing timestamps
+        def prep_period(t_min, t_max, rot):
+            """
+            Compute prep coefficients from time t_min to t_max, which
+            are the timestamps of the start and end of one period.
+            Results are rotated by an additional extra_rot before
+            being inserted into the database.  Returns the maximum
+            index processed, or None if the period couldn't be
+            processed.
+            """
+            # Find the indices of data that correspond to (t_min, t_max)
+            idx_min = bisect.bisect_left(data_timestamps, t_min)
+            idx_max = bisect.bisect_left(data_timestamps, t_max)
+            if idx_min >= idx_max or idx_max >= len(data_timestamps):
+                return None
+
+            # Perform FFT over those indices
+            N = idx_max - idx_min
+            d = data[idx_min:idx_max, column]
+            F = scipy.fftpack.fft(d) * 2.0 / N
+
+            # If we wanted more harmonics than the FFT gave us, pad with zeros
+            if N < (nharm * 2):
+                F = r_[F, zeros(nharm * 2 - N)]
+
+            # Fill output data.
+            out[0, 0] = round(t_min)
+            for k in range(nharm):
+                Fk = F[2 * k + 1] * e**(rot * 1j * (k+1))
+                out[0, 2 * k + 1] = -imag(Fk) # Pk
+                out[0, 2 * k + 2] = real(Fk)  # Qk
+
+            insert_if_nonoverlapping(out)
+            return idx_max
+
+        # Extract sinefit data to get zero crossing timestamps.
+        # t_min = beginning of period
+        # t_max = end of period
        (t_min, f0, A, C) = [ float(x) for x in sinefit_line.split() ]
        t_max = t_min + 1e6 / f0

-        # Find the indices of data that correspond to (t_min, t_max)
-        idx_min = bisect.bisect_left(data_timestamps, t_min)
-        idx_max = bisect.bisect_left(data_timestamps, t_max)
-        if idx_min >= idx_max:
-            # something's wonky; ignore this period
-            continue
-        if idx_max >= len(data_timestamps):
-            # max is likely past the end of our chunk, so stop
-            # processing this chunk now.
-            break
+        # Compute prep over shifted windows of the period
+        # (nshift is typically 1)
+        for n in range(nshift):
+            # Compute timestamps and rotations for shifted window
+            time_shift = n * (t_max - t_min) / nshift
+            shifted_min = t_min + time_shift
+            shifted_max = t_max + time_shift
+            angle_shift = n * 2 * pi / nshift
+            shifted_rot = rotation - angle_shift

-        # Perform FFT over those indices
-        N = idx_max - idx_min
-        d = data[idx_min:idx_max, column]
-        F = scipy.fftpack.fft(d) / N
+            # Run prep computation
+            idx_max = prep_period(shifted_min, shifted_max, shifted_rot)
+            if not idx_max:
+                break
+            processed = idx_max

-        # If we wanted more harmonics than we have, pad with zeros
-        if N < (nharm * 2):
-            F = r_[F, zeros(nharm * 2 - N)]
-
-        # Fill output data
-        out[0, 0] = t_min
-        for k in range(nharm):
-            Fk = F[2 * k + 1] * e**(rotation * 1j * k)
-            out[0, 2 * k + 1] = -imag(Fk) # Pk
-            out[0, 2 * k + 2] = real(Fk)  # Qk
-
-        # Insert it and continue
-        insert_function(out)
-        processed = idx_max
-
-    print "Processed", processed, "of", rows, "rows"
+    # If we processed no data but there's lots in here, pretend we
+    # processed half of it.
+    if processed == 0 and rows > 10000:
+        processed = rows / 2
+        printf("%s: warning: no periods found; skipping %d rows\n",
+               timestamp_to_human(data[0][0]), processed)
+    else:
+        printf("%s: processed %d of %d rows\n",
+               timestamp_to_human(data[0][0]), processed, rows)
    return processed

 if __name__ == "__main__":
--- a/src/sinefit.py
+++ b/src/sinefit.py
@@ -18,6 +18,15 @@ def main(argv = None):
    group.add_argument('-f', '--frequency', action='store', type=float,
                       default=60.0,
                       help='Approximate frequency (default: %(default)s)')
+    group.add_argument('-m', '--min-freq', action='store', type=float,
+                       help='Minimum valid frequency '
+                       '(default: approximate frequency / 2))')
+    group.add_argument('-M', '--max-freq', action='store', type=float,
+                       help='Maximum valid frequency '
+                       '(default: approximate frequency * 2))')
+    group.add_argument('-a', '--min-amp', action='store', type=float,
+                       default=20.0,
+                       help='Minimum signal amplitude (default: %(default)s)')

    # Parse arguments
    try:
@@ -34,13 +43,24 @@ def main(argv = None):
        parser.error("need a column number >= 1")
    if args.frequency < 0.1:
        parser.error("frequency must be >= 0.1")
+    if args.min_freq is None:
+        args.min_freq = args.frequency / 2
+    if args.max_freq is None:
+        args.max_freq = args.frequency * 2
+    if (args.min_freq > args.max_freq or
+        args.min_freq > args.frequency or
+        args.max_freq < args.frequency):
+        parser.error("invalid min or max frequency")
+    if args.min_amp < 0:
+        parser.error("min amplitude must be >= 0")

    f.check_dest_metadata({ "sinefit_source": f.src.path,
                            "sinefit_column": args.column })
-    f.process_numpy(process, args = (args.column, args.frequency))
+    f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
+                                     args.min_freq, args.max_freq))

 def process(data, interval, args, insert_function, final):
-    (column, f_expected) = args
+    (column, f_expected, a_min, f_min, f_max) = args
    rows = data.shape[0]

    # Estimate sampling frequency from timestamps
@@ -66,8 +86,14 @@ def process(data, interval, args, insert_function, final):
        (A, f0, phi, C) = sfit4(this, fs)

        # Check bounds.  If frequency is too crazy, ignore this window
-        if f0 < (f_expected/2) or f0 > (f_expected*2):
-            print "frequency", f0, "too far from expected value", f_expected
+        if f0 < f_min or f0 > f_max:
+            print "frequency", f0, "outside valid range", f_min, "-", f_max
+            start += N
+            continue
+
+        # If amplitude is too low, results are probably just noise
+        if A < a_min:
+            print "amplitude", A, "below minimum threshold", a_min
            start += N
            continue

@@ -131,7 +157,7 @@ def sfit4(data, fs):
    (Verified to match sfit4.m)
    """
    N = len(data)
-    t = linspace(0, (N-1) / fs, N)
+    t = linspace(0, (N-1) / float(fs), N)

    ## Estimate frequency using FFT (step b)
    Fc = fft(data)
@@ -156,32 +182,32 @@ def sfit4(data, fs):
    i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n

    # Convert to Hz
-    f0 = i * fs / N
+    f0 = i * float(fs) / N

-    ## Fit it
-    # first guess for A0, B0 using 3-parameter fit (step c)
-    w = 2*pi*f0
-    D = c_[cos(w*t), sin(w*t), ones(N)]
-    s = linalg.lstsq(D, data)[0]
-
-    # Now iterate 6 times (step i)
-    for idx in range(6):
-        D = c_[cos(w*t), sin(w*t), ones(N),
-              -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
-        s = linalg.lstsq(D, data)[0] # eqn B.18
-        w = w + s[3]	# update frequency estimate
-
-    ## Extract results
-    A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
-    f0 = w / (2*pi)
+    # Fit it.  We'll catch exceptions here and just returns zeros
+    # if something fails with the least squares fit, etc.
    try:
+        # first guess for A0, B0 using 3-parameter fit (step c)
+        w = 2*pi*f0
+        D = c_[cos(w*t), sin(w*t), ones(N)]
+        s = linalg.lstsq(D, data)[0]
+
+        # Now iterate 6 times (step i)
+        for idx in range(6):
+            D = c_[cos(w*t), sin(w*t), ones(N),
+                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
+            s = linalg.lstsq(D, data)[0] # eqn B.18
+            w = w + s[3]	# update frequency estimate
+
+        ## Extract results
+        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
+        f0 = w / (2*pi)
        phi = -arctan2(s[1], s[0]) # eqn B.22
-    except TypeError:
+        C = s[2]
+        return (A, f0, phi, C)
+    except Exception as e:
        # something broke down, just return zeros
        return (0, 0, 0, 0)
-    C = s[2]
-
-    return (A, f0, phi, C)

 if __name__ == "__main__":
    main()
Author	SHA1	Message	Date
Jim Paris	8ab31eafc2	Allow shorthand method for creating an option-less parser. This is mostly just intended to make a simple filter example shorter.	2013-04-21 16:53:28 -04:00
Jim Paris	979ab13bff	Force fs to be a float in sfit4	2013-04-17 17:58:15 -04:00
Jim Paris	f4fda837ae	Bump required nilmdb version to 1.6.0	2013-04-11 11:55:11 -04:00
Jim Paris	5547d266d0	filter: Don't include trailing unprocessed data in the inserted intervals	2013-04-11 11:53:17 -04:00
Jim Paris	372e977e4a	Reverse cleanup order to handle interruptions better	2013-04-10 18:38:41 -04:00
Jim Paris	640a680704	Increase default min amplitude in sinefit	2013-04-10 17:09:52 -04:00
Jim Paris	2e74e6cd63	Skip over data if we aren't able to process any. Change output format	2013-04-10 17:01:07 -04:00
Jim Paris	de2a794e00	Support wildcards in nilm-decimate-auto	2013-04-10 16:05:16 -04:00
Jim Paris	065a40f265	sinefit: add minimum amplitude check	2013-04-10 15:33:51 -04:00
Jim Paris	65fa43aff1	sinefit: catch all errors in sfit4	2013-04-10 14:36:50 -04:00
Jim Paris	57c23c3792	sinefit: allow user to override min/max frequency detection	2013-04-10 14:36:40 -04:00
Jim Paris	d4c8e4acb4	Include rotation in metadata	2013-04-10 14:36:05 -04:00
Jim Paris	fd1b33401f	Require a --yes argument before actually cleaning data	2013-04-09 20:13:38 -04:00
Jim Paris	4c748ec00c	Fix minor bugs	2013-04-09 20:08:25 -04:00
Jim Paris	b72d6b6908	Warn if column count is wrong for this nharm value	2013-04-09 19:59:59 -04:00
Jim Paris	80d642e52e	Change nilm-cleanup config file format, tweak output	2013-04-09 19:43:41 -04:00
Jim Paris	001b89b1d2	Support multiple shifted FFTs per period in nilm-prep. New option --nshift controls how many shifted FFT windows to perform per period. "nilm-prep -N 2" is similar to old prep behavior. Note that this is redundant information and takes up extra storage space, though.	2013-04-09 18:53:27 -04:00
Jim Paris	f978823505	Fix prep scaling and fix comments	2013-04-09 17:44:13 -04:00
Jim Paris	ffd6675979	Remove outdated code	2013-04-08 19:46:16 -04:00
Jim Paris	5b67b68fd2	Don't import matplotlib if we don't need it	2013-04-08 18:59:23 -04:00
Jim Paris	97503b73b9	Fix dependencies	2013-04-08 18:50:27 -04:00
Jim Paris	4e64c804bf	Merge branch 'binary'	2013-04-08 18:45:21 -04:00
Jim Paris	189fb9df3a	Use binary interface for copy_one too	2013-04-08 18:45:16 -04:00
Jim Paris	3323c997a7	Use the new stream_insert_numpy_context function	2013-04-08 18:39:14 -04:00
Jim Paris	e09153e34b	Use the new NumpyClient for extracting data in filter	2013-04-07 18:14:35 -04:00
Jim Paris	5c56e9d075	Remove ounused process_python function	2013-04-06 16:39:39 -04:00
Jim Paris	60f09427cf	Update decimate to use process_numpy	2013-04-06 15:56:36 -04:00
Jim Paris	d6d31190eb	Fix fromstring usage	2013-04-06 13:40:09 -04:00
Jim Paris	2ec574c59d	Use np.fromstring instead of np.loadtxt	2013-04-06 13:32:16 -04:00
Jim Paris	1988955671	Accumulate delta separately from data timestamp	2013-04-05 17:41:48 -04:00
Jim Paris	36e5af4be1	Fix data_ts when clock is updated	2013-04-05 16:40:04 -04:00
Jim Paris	ca175bd9dd	Improve nilm-insert to support deltas, etc, for accelerometer data	2013-04-05 16:13:56 -04:00
Jim Paris	aa9656bc10	Fix off-by-one error in prep rotation	2013-04-04 19:23:12 -04:00
Jim Paris	10ab2cc2de	Build nilm-prep tool	2013-04-04 19:07:18 -04:00