Rename src/ directory to nilmtools/

Split process_numpy innards process_numpy_interval
Improve test_prep target in Makefile
2013-07-08 11:54:13 -04:00 · 2013-07-03 12:07:22 -04:00 · 2013-07-03 12:06:50 -04:00 · 2013-06-06 15:34:23 -04:00 · 2013-05-11 11:00:31 -04:00 · 2013-05-10 14:19:55 -04:00
15 changed files with 216 additions and 90 deletions
--- a/24
+++ b/24
@@ -11,18 +11,24 @@ endif
 test: test_cleanup

 test_cleanup:
-	src/cleanup.py -e extras/cleanup.cfg
-	src/cleanup.py extras/cleanup.cfg
+	nilmtools/cleanup.py -e extras/cleanup.cfg
+	nilmtools/cleanup.py extras/cleanup.cfg

 test_insert:
 	@make install >/dev/null
-	src/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --file --dry-run  /test/foo </dev/null

 test_copy:
 	@make install >/dev/null
-	src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
+	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*

-test_prep:
+/tmp/raw.dat:
+	octave --eval 'fs = 8000;' \
+	--eval 't = (0:fs*10)*2*pi*60/fs;' \
+	--eval 'raw = transpose([sin(t); 0.3*sin(3*t)+sin(t)]);' \
+	--eval 'save("-ascii","/tmp/raw.dat","raw");'
+
+test_prep: /tmp/raw.dat
 	@make install >/dev/null
 	-nilmtool destroy -R /test/raw
 	-nilmtool destroy -R /test/sinefit
@@ -31,8 +37,8 @@ test_prep:
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	src/sinefit.py -c 1 /test/raw /test/sinefit
-	src/prep.py -c 2 /test/raw /test/sinefit /test/prep
+	nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20

 test_decimate:
@@ -40,8 +46,8 @@ test_decimate:
 	-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
-	time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
-	python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
+	time python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
+	python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16

 version:
 	python setup.py version
--- a/README.txt
+++ b/README.txt
@@ -5,10 +5,10 @@ by Jim Paris <jim@jtan.com>
 Prerequisites:

  # Runtime and build environments
-  sudo apt-get install python2.7 python2.7-dev python-setuptools
-  sudo apt-get install python-numpy python-scipy python-matplotlib
+  sudo apt-get install python2.7 python2.7-dev python-setuptools python-pip
+  sudo apt-get install python-numpy python-scipy

-  nilmdb (1.5.0+)
+  nilmdb (1.6.3+)

 Install:

--- a/nilmtools/init.py
+++ b/nilmtools/init.py
--- a/nilmtools/_version.py
+++ b/nilmtools/_version.py
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)

 tag_prefix = "nilmtools-"
 parentdir_prefix = "nilmtools-"
-versionfile_source = "src/_version.py"
+versionfile_source = "nilmtools/_version.py"

 def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
    variables = { "refnames": git_refnames, "full": git_full }
--- a/nilmtools/cleanup.py
+++ b/nilmtools/cleanup.py
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
--- a/nilmtools/copy_wildcard.py
+++ b/nilmtools/copy_wildcard.py
--- a/nilmtools/decimate.py
+++ b/nilmtools/decimate.py
--- a/nilmtools/decimate_auto.py
+++ b/nilmtools/decimate_auto.py
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -67,7 +67,7 @@ def get_stream_info(client, path):

 class Filter(object):

-    def __init__(self):
+    def __init__(self, parser_description = None):
        self._parser = None
        self._client_src = None
        self._client_dest = None
@@ -78,6 +78,9 @@ class Filter(object):
        self.end = None
        self.interhost = False
        self.force_metadata = False
+        if parser_description is not None:
+            self.setup_parser(parser_description)
+            self.parse_args()

    @property
    def client_src(self):
@@ -233,8 +236,14 @@ class Filter(object):
        metadata = self._client_dest.stream_get_metadata(self.dest.path)
        if not self.force_metadata:
            for key in data:
-                wanted = str(data[key])
+                wanted = data[key]
+                if not isinstance(wanted, basestring):
+                    wanted = str(wanted)
                val = metadata.get(key, wanted)
+                # Force UTF-8 encoding for comparison and display
+                wanted = wanted.encode('utf-8')
+                val = val.encode('utf-8')
+                key = key.encode('utf-8')
                if val != wanted and self.dest.rows > 0:
                    m =  "Metadata in destination stream:\n"
                    m += "  %s = %s\n" % (key, val)
@@ -248,15 +257,75 @@ class Filter(object):
        # All good -- write the metadata in case it's not already there
        self._client_dest.stream_update_metadata(self.dest.path, data)

+    # Filter processing for a single interval of data.
+    def process_numpy_interval(self, interval, extractor, insert_ctx,
+                               function, args = None, rows = 100000):
+        """For the given 'interval' of data, extract data, process it
+        through 'function', and insert the result.
+
+        'extractor' should be a function like NumpyClient.stream_extract_numpy
+        'insert_ctx' should be a class like StreamInserterNumpy, with member
+        functions 'insert', 'send', and 'update_end'.
+
+        See process_numpy for details on 'function', 'args', and 'rows'.
+        """
+        if args is None:
+            args = []
+
+        insert_function = insert_ctx.insert
+        old_array = np.array([])
+        for new_array in extractor(self.src.path,
+                                   interval.start, interval.end,
+                                   layout = self.src.layout,
+                                   maxrows = rows):
+            # If we still had old data left, combine it
+            if old_array.shape[0] != 0:
+                array = np.vstack((old_array, new_array))
+            else:
+                array = new_array
+
+            # Pass it to the process function
+            processed = function(array, interval, args,
+                                 insert_function, False)
+
+            # Send any pending data
+            insert_ctx.send()
+
+            # Save the unprocessed parts
+            if processed >= 0:
+                old_array = array[processed:]
+            else:
+                raise Exception(
+                    sprintf("%s return value %s must be >= 0",
+                            str(function), str(processed)))
+
+            # Warn if there's too much data remaining
+            if old_array.shape[0] > 3 * rows:
+                printf("warning: %d unprocessed rows in buffer\n",
+                       old_array.shape[0])
+
+        # Last call for this contiguous interval
+        if old_array.shape[0] != 0:
+            processed = function(old_array, interval, args,
+                                 insert_function, True)
+            if processed != old_array.shape[0]:
+                # Truncate the interval we're inserting at the first
+                # unprocessed data point.  This ensures that
+                # we'll not miss any data when we run again later.
+                insert_ctx.update_end(old_array[processed][0])
+
    # The main filter processing method.
    def process_numpy(self, function, args = None, rows = 100000):
-        """For all intervals that exist in self.src but don't exist in
-        self.dest, call 'function' with a Numpy array corresponding to
-        the data.  The data is converted to a Numpy array in chunks of
-        'rows' rows at a time.
+        """Calls process_numpy_interval for each interval that currently
+        exists in self.src, but doesn't exist in self.dest.  It will
+        process the data in chunks as follows:
+
+        For each chunk of data, call 'function' with a Numpy array
+        corresponding to the data.  The data is converted to a Numpy
+        array in chunks of 'rows' rows at a time.

        'function' should be defined as:
-           def function(data, interval, args, insert_func, final)
+        # def function(data, interval, args, insert_func, final)

        'data': array of data to process -- may be empty

@@ -280,8 +349,6 @@ class Filter(object):
        being inserted will be ended at the timestamp of the first
        unprocessed data point.
        """
-        if args is None:
-            args = []
        extractor = NumpyClient(self.src.url).stream_extract_numpy
        inserter = NumpyClient(self.dest.url).stream_insert_numpy_context

@@ -289,47 +356,8 @@ class Filter(object):
            print "Processing", self.interval_string(interval)
            with inserter(self.dest.path,
                          interval.start, interval.end) as insert_ctx:
-                insert_function = insert_ctx.insert
-                old_array = np.array([])
-                for new_array in extractor(self.src.path,
-                                           interval.start, interval.end,
-                                           layout = self.src.layout,
-                                           maxrows = rows):
-                    # If we still had old data left, combine it
-                    if old_array.shape[0] != 0:
-                        array = np.vstack((old_array, new_array))
-                    else:
-                        array = new_array
-
-                    # Pass it to the process function
-                    processed = function(array, interval, args,
-                                         insert_function, False)
-
-                    # Send any pending data
-                    insert_ctx.send()
-
-                    # Save the unprocessed parts
-                    if processed >= 0:
-                        old_array = array[processed:]
-                    else:
-                        raise Exception(
-                            sprintf("%s return value %s must be >= 0",
-                                    str(function), str(processed)))
-
-                    # Warn if there's too much data remaining
-                    if old_array.shape[0] > 3 * rows:
-                        printf("warning: %d unprocessed rows in buffer\n",
-                               old_array.shape[0])
-
-                # Last call for this contiguous interval
-                if old_array.shape[0] != 0:
-                    processed = function(old_array, interval, args,
-                                         insert_function, True)
-                    if processed != old_array.shape[0]:
-                        # Truncate the interval we're inserting at the first
-                        # unprocessed data point.  This ensures that
-                        # we'll not miss any data when we run again later.
-                        insert_ctx.update_end(old_array[processed][0])
+                self.process_numpy_interval(interval, extractor, insert_ctx,
+                                            function, args, rows)

 def main(argv = None):
    # This is just a dummy function; actual filters can use the other
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
--- a/nilmtools/median.py
+++ b/nilmtools/median.py
@@ -0,0 +1,43 @@
+#!/usr/bin/python
+import nilmtools.filter, scipy.signal
+
+def main(argv = None):
+    f = nilmtools.filter.Filter()
+    parser = f.setup_parser("Median Filter")
+    group = parser.add_argument_group("Median filter options")
+    group.add_argument("-z", "--size", action="store", type=int, default=25,
+                       help = "median filter size (default %(default)s)")
+    group.add_argument("-d", "--difference", action="store_true",
+                       help = "store difference rather than filtered values")
+
+    try:
+        args = f.parse_args(argv)
+    except nilmtools.filter.MissingDestination as e:
+        print "Source is %s (%s)" % (e.src.path, e.src.layout)
+        print "Destination %s doesn't exist" % (e.dest.path)
+        print "You could make it with a command like:"
+        print "  nilmtool -u %s create %s %s" % (e.dest.url,
+                                                 e.dest.path, e.src.layout)
+        raise SystemExit(1)
+
+    meta = f.client_src.stream_get_metadata(f.src.path)
+    f.check_dest_metadata({ "median_filter_source": f.src.path,
+                            "median_filter_size": args.size,
+                            "median_filter_difference": repr(args.difference) })
+
+    f.process_numpy(median_filter, args = (args.size, args.difference))
+
+def median_filter(data, interval, args, insert, final):
+    (size, diff) = args
+    (rows, cols) = data.shape
+    for i in range(cols - 1):
+        filtered = scipy.signal.medfilt(data[:, i+1], size)
+        if diff:
+            data[:, i+1] -= filtered
+        else:
+            data[:, i+1] = filtered
+    insert(data)
+    return rows
+
+if __name__ == "__main__":
+    main()
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -80,7 +80,7 @@ def main(argv = None):
    f.check_dest_metadata({ "prep_raw_source": f.src.path,
                            "prep_sinefit_source": sinefit.path,
                            "prep_column": args.column,
-                            "prep_rotation": rotation })
+                            "prep_rotation": repr(rotation) })

    # Run the processing function on all data
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
--- a/nilmtools/sinefit.py
+++ b/nilmtools/sinefit.py
@@ -1,13 +1,18 @@
 #!/usr/bin/python

-# Sine wave fitting.  This runs about 5x faster than realtime on raw data.
-
+# Sine wave fitting.
+from nilmdb.utils.printf import *
 import nilmtools.filter
 import nilmdb.client
+from nilmdb.utils.time import (timestamp_to_human,
+                               timestamp_to_seconds,
+                               seconds_to_timestamp)
+
 from numpy import *
 from scipy import *
 #import pylab as p
 import operator
+import sys

 def main(argv = None):
    f = nilmtools.filter.Filter()
@@ -59,12 +64,40 @@ def main(argv = None):
    f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
                                     args.min_freq, args.max_freq))

+class SuppressibleWarning(object):
+    def __init__(self, maxcount = 10, maxsuppress = 100):
+        self.maxcount = maxcount
+        self.maxsuppress = maxsuppress
+        self.count = 0
+        self.last_msg = ""
+
+    def _write(self, sec, msg):
+        if sec:
+            now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
+        else:
+            now = ""
+        sys.stderr.write(now + msg)
+
+    def warn(self, msg, seconds = None):
+        self.count += 1
+        if self.count <= self.maxcount:
+            self._write(seconds, msg)
+        if (self.count - self.maxcount) >= self.maxsuppress:
+            self.reset(seconds)
+
+    def reset(self, seconds = None):
+        if self.count > self.maxcount:
+            self._write(seconds, sprintf("(%d warnings suppressed)\n",
+                                         self.count - self.maxcount))
+        self.count = 0
+
 def process(data, interval, args, insert_function, final):
    (column, f_expected, a_min, f_min, f_max) = args
    rows = data.shape[0]

    # Estimate sampling frequency from timestamps
-    fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
+    fs = (rows-1) / (timestamp_to_seconds(data[-1][0]) -
+                     timestamp_to_seconds(data[0][0]))

    # Pull out about 3.5 periods of data at once;
    # we'll expect to match 3 zero crossings in each window
@@ -74,36 +107,41 @@ def process(data, interval, args, insert_function, final):
    if rows < N:
        return 0

+    warn = SuppressibleWarning(3, 1000)
+
    # Process overlapping windows
    start = 0
    num_zc = 0
+    last_inserted_timestamp = None
    while start < (rows - N):
        this = data[start:start+N, column]
-        t_min = data[start, 0]/1e6
-        t_max = data[start+N-1, 0]/1e6
+        t_min = timestamp_to_seconds(data[start, 0])
+        t_max = timestamp_to_seconds(data[start+N-1, 0])

        # Do 4-parameter sine wave fit
        (A, f0, phi, C) = sfit4(this, fs)

        # Check bounds.  If frequency is too crazy, ignore this window
        if f0 < f_min or f0 > f_max:
-            print "frequency", f0, "outside valid range", f_min, "-", f_max
+            warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
+                              str(f0), str(f_min), str(f_max)), t_min)
            start += N
            continue

        # If amplitude is too low, results are probably just noise
        if A < a_min:
-            print "amplitude", A, "below minimum threshold", a_min
+            warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
+                              str(A), str(a_min)), t_min)
            start += N
            continue

        #p.plot(arange(N), this)
-        #p.plot(arange(N), A * cos(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')
+        #p.plot(arange(N), A * sin(f0/fs * 2 * pi * arange(N) + phi) + C, 'g')

-        # Period starts when the argument of cosine is 3*pi/2 degrees,
+        # Period starts when the argument of sine is 0 degrees,
        # so we're looking for sample number:
-        #     n = (3 * pi / 2 - phi) / (f0/fs * 2 * pi)
-        zc_n = (3 * pi / 2 - phi) / (f0 / fs * 2 * pi)
+        #     n = (0 - phi) / (f0/fs * 2 * pi)
+        zc_n = (0 - phi) / (f0 / fs * 2 * pi)
        period_n = fs/f0

        # Add periods to make N positive
@@ -116,7 +154,13 @@ def process(data, interval, args, insert_function, final):
        while zc_n < (N - period_n/2):
            #p.plot(zc_n, C, 'ro')
            t = t_min + zc_n / fs
-            insert_function([[t * 1e6, f0, A, C]])
+            if (last_inserted_timestamp is None or
+                t > last_inserted_timestamp):
+                insert_function([[seconds_to_timestamp(t), f0, A, C]])
+                last_inserted_timestamp = t
+                warn.reset(t)
+            else:
+                warn.warn("timestamp overlap\n", t)
            num_zc += 1
            last_zc = zc_n
            zc_n += period_n
@@ -134,7 +178,13 @@ def process(data, interval, args, insert_function, final):
        start = int(round(start + advance))

    # Return the number of rows we've processed
-    print "Marked", num_zc, "zero-crossings in", start, "rows"
+    warn.reset(last_inserted_timestamp)
+    if last_inserted_timestamp:
+        now = timestamp_to_human(seconds_to_timestamp(
+            last_inserted_timestamp)) + ": "
+    else:
+        now = ""
+    printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
    return start

 def sfit4(data, fs):
@@ -149,15 +199,15 @@ def sfit4(data, fs):

    Output:
      Parameters [A, f0,  phi, C] to fit the equation
-        x[n] = A * cos(f0/fs * 2 * pi * n + phi) + C
+        x[n] = A * sin(f0/fs * 2 * pi * n + phi) + C
      where n is sample number.  Or, as a function of time:
-        x(t) = A * cos(f0 * 2 * pi * t + phi) + C
+        x(t) = A * sin(f0 * 2 * pi * t + phi) + C

    by Jim Paris
    (Verified to match sfit4.m)
    """
    N = len(data)
-    t = linspace(0, (N-1) / fs, N)
+    t = linspace(0, (N-1) / float(fs), N)

    ## Estimate frequency using FFT (step b)
    Fc = fft(data)
@@ -182,18 +232,17 @@ def sfit4(data, fs):
    i = arccos((Z2*cos(ni2) - Z1*cos(ni1)) / (Z2-Z1)) / n

    # Convert to Hz
-    f0 = i * fs / N
+    f0 = i * float(fs) / N

    # Fit it.  We'll catch exceptions here and just returns zeros
    # if something fails with the least squares fit, etc.
    try:
        # first guess for A0, B0 using 3-parameter fit (step c)
+        s = zeros(3)
        w = 2*pi*f0
-        D = c_[cos(w*t), sin(w*t), ones(N)]
-        s = linalg.lstsq(D, data)[0]

-        # Now iterate 6 times (step i)
-        for idx in range(6):
+        # Now iterate 7 times (step b, plus 6 iterations of step i)
+        for idx in range(7):
            D = c_[cos(w*t), sin(w*t), ones(N),
                  -s[0] * t * sin(w*t) + s[1] * t * cos(w*t) ] # eqn B.16
            s = linalg.lstsq(D, data)[0] # eqn B.18
@@ -202,7 +251,7 @@ def sfit4(data, fs):
        ## Extract results
        A = sqrt(s[0]*s[0] + s[1]*s[1]) # eqn B.21
        f0 = w / (2*pi)
-        phi = -arctan2(s[1], s[0]) # eqn B.22
+        phi = arctan2(s[0], s[1]) # eqn B.22 (flipped for sin instead of cos)
        C = s[2]
        return (A, f0, phi, C)
    except Exception as e:
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ except ImportError:
 # Versioneer manages version numbers from git tags.
 # https://github.com/warner/python-versioneer
 import versioneer
-versioneer.versionfile_source = 'src/_version.py'
+versioneer.versionfile_source = 'nilmtools/_version.py'
 versioneer.versionfile_build = 'nilmtools/_version.py'
 versioneer.tag_prefix = 'nilmtools-'
 versioneer.parentdir_prefix = 'nilmtools-'
@@ -61,14 +61,13 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.6.0',
+      install_requires = [ 'nilmdb >= 1.6.3',
                           'numpy',
                           'scipy',
-                           'matplotlib',
+                           #'matplotlib',
                           ],
      packages = [ 'nilmtools',
                   ],
-      package_dir = { 'nilmtools': 'src' },
      entry_points = {
          'console_scripts': [
              'nilm-decimate = nilmtools.decimate:main',
@@ -79,6 +78,7 @@ setup(name='nilmtools',
              'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
              'nilm-sinefit = nilmtools.sinefit:main',
              'nilm-cleanup = nilmtools.cleanup:main',
+              'nilm-median = nilmtools.median:main',
              ],
          },
      zip_safe = False,
Author	SHA1	Message	Date
Jim Paris	5d83d93019	Rename src/ directory to nilmtools/	2013-07-08 11:54:13 -04:00
Jim Paris	5f847a0513	Split process_numpy innards process_numpy_interval	2013-07-03 12:07:22 -04:00
Jim Paris	29cd7eb6c7	Improve test_prep target in Makefile	2013-07-03 12:06:50 -04:00
Jim Paris	62c8af41ea	Cleanup comments	2013-06-06 15:34:23 -04:00
Jim Paris	4f6bc48619	sinefit: include timestamps on marking output too	2013-05-11 11:00:31 -04:00
Jim Paris	cf9eb0ed48	Improve sinefit resiliancy	2013-05-10 14:19:55 -04:00
Jim Paris	32066fc260	Remove hard matplotlib dependency	2013-05-09 13:17:36 -04:00
Jim Paris	739da3f973	Add median filter	2013-05-08 23:36:50 -04:00
Jim Paris	83ad18ebf6	Fix non-string arguments to metadata_check	2013-05-08 12:49:38 -04:00
Jim Paris	c76d527f95	Fix unicode handling in filter metadata match	2013-05-07 12:40:53 -04:00
Jim Paris	b8a73278e7	Always store metadata rotation as a string	2013-04-29 14:25:11 -04:00
Jim Paris	ce0691d6c4	sineefit: Change sfit4 to fit to \sin instead of \cos And adjust the period locator accordingly. Fitting \sin is the same mathematically, it's just conceptually more straightforward since we're locating zero crossings anyway.	2013-04-27 18:12:20 -04:00
Jim Paris	4da658e960	sinefit: move initial estimate into the main iteration loop Just a little less code. Same results.	2013-04-27 17:50:23 -04:00
Jim Paris	8ab31eafc2	Allow shorthand method for creating an option-less parser. This is mostly just intended to make a simple filter example shorter.	2013-04-21 16:53:28 -04:00
Jim Paris	979ab13bff	Force fs to be a float in sfit4	2013-04-17 17:58:15 -04:00