Rename src/ directory to nilmtools/

Split process_numpy innards process_numpy_interval
Improve test_prep target in Makefile
2013-07-08 11:54:13 -04:00 · 2013-07-03 12:07:22 -04:00 · 2013-07-03 12:06:50 -04:00 · 2013-06-06 15:34:23 -04:00 · 2013-05-11 11:00:31 -04:00 · 2013-05-10 14:19:55 -04:00
15 changed files with 200 additions and 76 deletions
--- a/24
+++ b/24
@@ -11,18 +11,24 @@ endif
 test: test_cleanup
 test_cleanup:
-	src/cleanup.py -e extras/cleanup.cfg
+	nilmtools/cleanup.py -e extras/cleanup.cfg
-	src/cleanup.py extras/cleanup.cfg
+	nilmtools/cleanup.py extras/cleanup.cfg
 test_insert:
 	@make install >/dev/null
-	src/insert.py --file --dry-run  /test/foo </dev/null
+	nilmtools/insert.py --file --dry-run  /test/foo </dev/null
 test_copy:
 	@make install >/dev/null
-	src/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
+	nilmtools/copy_wildcard.py -U "http://nilmdb.com/bucket/" -D /lees*
-test_prep:
+/tmp/raw.dat:
 	octave --eval 'fs = 8000;' \
 	--eval 't = (0:fs*10)*2*pi*60/fs;' \
 	--eval 'raw = transpose([sin(t); 0.3*sin(3*t)+sin(t)]);' \
 	--eval 'save("-ascii","/tmp/raw.dat","raw");'
 test_prep: /tmp/raw.dat
 	@make install >/dev/null
 	-nilmtool destroy -R /test/raw
 	-nilmtool destroy -R /test/sinefit
@@ -31,8 +37,8 @@ test_prep:
 	nilmtool create /test/sinefit float32_3
 	nilmtool create /test/prep float32_8
 	nilmtool insert -s '@0' -t -r 8000 /test/raw /tmp/raw.dat
-	src/sinefit.py -c 1 /test/raw /test/sinefit
+	nilmtools/sinefit.py -a 0.5 -c 1 /test/raw /test/sinefit
-	src/prep.py -c 2 /test/raw /test/sinefit /test/prep
+	nilmtools/prep.py -c 2 /test/raw /test/sinefit /test/prep
 	nilmtool extract -s min -e max /test/prep | head -20
 test_decimate:
@@ -40,8 +46,8 @@ test_decimate:
 	-@nilmtool destroy /lees-compressor/no-leak/raw/16 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/4 float32_18 || true
 	-@nilmtool create /lees-compressor/no-leak/raw/16 float32_18 || true
-	time python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
+	time python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/1 /lees-compressor/no-leak/raw/4
-	python src/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
+	python nilmtools/decimate.py -s '2013-02-04 18:10:00' -e '2013-02-04 18:11:00' /lees-compressor/no-leak/raw/4 /lees-compressor/no-leak/raw/16
 version:
 	python setup.py version
--- a/README.txt
+++ b/README.txt
@@ -5,10 +5,10 @@ by Jim Paris <jim@jtan.com>
 Prerequisites:
  # Runtime and build environments
-  sudo apt-get install python2.7 python2.7-dev python-setuptools
+  sudo apt-get install python2.7 python2.7-dev python-setuptools python-pip
-  sudo apt-get install python-numpy python-scipy python-matplotlib
+  sudo apt-get install python-numpy python-scipy
-  nilmdb (1.5.0+)
+  nilmdb (1.6.3+)
 Install:
--- a/nilmtools/init.py
+++ b/nilmtools/init.py
--- a/nilmtools/_version.py
+++ b/nilmtools/_version.py
@@ -181,7 +181,7 @@ def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False)
 tag_prefix = "nilmtools-"
 parentdir_prefix = "nilmtools-"
-versionfile_source = "src/_version.py"
+versionfile_source = "nilmtools/_version.py"
 def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
    variables = { "refnames": git_refnames, "full": git_full }
--- a/nilmtools/cleanup.py
+++ b/nilmtools/cleanup.py
--- a/nilmtools/copy_one.py
+++ b/nilmtools/copy_one.py
--- a/nilmtools/copy_wildcard.py
+++ b/nilmtools/copy_wildcard.py
--- a/nilmtools/decimate.py
+++ b/nilmtools/decimate.py
--- a/nilmtools/decimate_auto.py
+++ b/nilmtools/decimate_auto.py
--- a/nilmtools/filter.py
+++ b/nilmtools/filter.py
@@ -236,8 +236,14 @@ class Filter(object):
        metadata = self._client_dest.stream_get_metadata(self.dest.path)
        if not self.force_metadata:
            for key in data:
-                wanted = str(data[key])
+                wanted = data[key]
                if not isinstance(wanted, basestring):
                    wanted = str(wanted)
                val = metadata.get(key, wanted)
                # Force UTF-8 encoding for comparison and display
                wanted = wanted.encode('utf-8')
                val = val.encode('utf-8')
                key = key.encode('utf-8')
                if val != wanted and self.dest.rows > 0:
                    m =  "Metadata in destination stream:\n"
                    m += "  %s = %s\n" % (key, val)
@@ -251,15 +257,75 @@ class Filter(object):
        # All good -- write the metadata in case it's not already there
        self._client_dest.stream_update_metadata(self.dest.path, data)
    # Filter processing for a single interval of data.
    def process_numpy_interval(self, interval, extractor, insert_ctx,
                               function, args = None, rows = 100000):
        """For the given 'interval' of data, extract data, process it
        through 'function', and insert the result.
        'extractor' should be a function like NumpyClient.stream_extract_numpy
        'insert_ctx' should be a class like StreamInserterNumpy, with member
        functions 'insert', 'send', and 'update_end'.
        See process_numpy for details on 'function', 'args', and 'rows'.
        """
        if args is None:
            args = []
        insert_function = insert_ctx.insert
        old_array = np.array([])
        for new_array in extractor(self.src.path,
                                   interval.start, interval.end,
                                   layout = self.src.layout,
                                   maxrows = rows):
            # If we still had old data left, combine it
            if old_array.shape[0] != 0:
                array = np.vstack((old_array, new_array))
            else:
                array = new_array
            # Pass it to the process function
            processed = function(array, interval, args,
                                 insert_function, False)
            # Send any pending data
            insert_ctx.send()
            # Save the unprocessed parts
            if processed >= 0:
                old_array = array[processed:]
            else:
                raise Exception(
                    sprintf("%s return value %s must be >= 0",
                            str(function), str(processed)))
            # Warn if there's too much data remaining
            if old_array.shape[0] > 3 * rows:
                printf("warning: %d unprocessed rows in buffer\n",
                       old_array.shape[0])
        # Last call for this contiguous interval
        if old_array.shape[0] != 0:
            processed = function(old_array, interval, args,
                                 insert_function, True)
            if processed != old_array.shape[0]:
                # Truncate the interval we're inserting at the first
                # unprocessed data point.  This ensures that
                # we'll not miss any data when we run again later.
                insert_ctx.update_end(old_array[processed][0])
    # The main filter processing method.
    def process_numpy(self, function, args = None, rows = 100000):
-        """For all intervals that exist in self.src but don't exist in
+        """Calls process_numpy_interval for each interval that currently
-        self.dest, call 'function' with a Numpy array corresponding to
+        exists in self.src, but doesn't exist in self.dest.  It will
-        the data.  The data is converted to a Numpy array in chunks of
+        process the data in chunks as follows:
-        'rows' rows at a time.
+
        For each chunk of data, call 'function' with a Numpy array
        corresponding to the data.  The data is converted to a Numpy
        array in chunks of 'rows' rows at a time.
        'function' should be defined as:
-           def function(data, interval, args, insert_func, final)
+        # def function(data, interval, args, insert_func, final)
        'data': array of data to process -- may be empty
@@ -283,8 +349,6 @@ class Filter(object):
        being inserted will be ended at the timestamp of the first
        unprocessed data point.
        """
        if args is None:
            args = []
        extractor = NumpyClient(self.src.url).stream_extract_numpy
        inserter = NumpyClient(self.dest.url).stream_insert_numpy_context
@@ -292,47 +356,8 @@ class Filter(object):
            print "Processing", self.interval_string(interval)
            with inserter(self.dest.path,
                          interval.start, interval.end) as insert_ctx:
-                insert_function = insert_ctx.insert
+                self.process_numpy_interval(interval, extractor, insert_ctx,
-                old_array = np.array([])
+                                            function, args, rows)
                for new_array in extractor(self.src.path,
                                           interval.start, interval.end,
                                           layout = self.src.layout,
                                           maxrows = rows):
                    # If we still had old data left, combine it
                    if old_array.shape[0] != 0:
                        array = np.vstack((old_array, new_array))
                    else:
                        array = new_array
                    # Pass it to the process function
                    processed = function(array, interval, args,
                                         insert_function, False)
                    # Send any pending data
                    insert_ctx.send()
                    # Save the unprocessed parts
                    if processed >= 0:
                        old_array = array[processed:]
                    else:
                        raise Exception(
                            sprintf("%s return value %s must be >= 0",
                                    str(function), str(processed)))
                    # Warn if there's too much data remaining
                    if old_array.shape[0] > 3 * rows:
                        printf("warning: %d unprocessed rows in buffer\n",
                               old_array.shape[0])
                # Last call for this contiguous interval
                if old_array.shape[0] != 0:
                    processed = function(old_array, interval, args,
                                         insert_function, True)
                    if processed != old_array.shape[0]:
                        # Truncate the interval we're inserting at the first
                        # unprocessed data point.  This ensures that
                        # we'll not miss any data when we run again later.
                        insert_ctx.update_end(old_array[processed][0])
 def main(argv = None):
    # This is just a dummy function; actual filters can use the other
--- a/nilmtools/insert.py
+++ b/nilmtools/insert.py
--- a/nilmtools/median.py
+++ b/nilmtools/median.py
@@ -0,0 +1,43 @@
 #!/usr/bin/python
 import nilmtools.filter, scipy.signal
 def main(argv = None):
    f = nilmtools.filter.Filter()
    parser = f.setup_parser("Median Filter")
    group = parser.add_argument_group("Median filter options")
    group.add_argument("-z", "--size", action="store", type=int, default=25,
                       help = "median filter size (default %(default)s)")
    group.add_argument("-d", "--difference", action="store_true",
                       help = "store difference rather than filtered values")
    try:
        args = f.parse_args(argv)
    except nilmtools.filter.MissingDestination as e:
        print "Source is %s (%s)" % (e.src.path, e.src.layout)
        print "Destination %s doesn't exist" % (e.dest.path)
        print "You could make it with a command like:"
        print "  nilmtool -u %s create %s %s" % (e.dest.url,
                                                 e.dest.path, e.src.layout)
        raise SystemExit(1)
    meta = f.client_src.stream_get_metadata(f.src.path)
    f.check_dest_metadata({ "median_filter_source": f.src.path,
                            "median_filter_size": args.size,
                            "median_filter_difference": repr(args.difference) })
    f.process_numpy(median_filter, args = (args.size, args.difference))
 def median_filter(data, interval, args, insert, final):
    (size, diff) = args
    (rows, cols) = data.shape
    for i in range(cols - 1):
        filtered = scipy.signal.medfilt(data[:, i+1], size)
        if diff:
            data[:, i+1] -= filtered
        else:
            data[:, i+1] = filtered
    insert(data)
    return rows
 if __name__ == "__main__":
    main()
--- a/nilmtools/prep.py
+++ b/nilmtools/prep.py
@@ -80,7 +80,7 @@ def main(argv = None):
    f.check_dest_metadata({ "prep_raw_source": f.src.path,
                            "prep_sinefit_source": sinefit.path,
                            "prep_column": args.column,
-                            "prep_rotation": rotation })
+                            "prep_rotation": repr(rotation) })
    # Run the processing function on all data
    f.process_numpy(process, args = (client_sinefit, sinefit.path, args.column,
--- a/nilmtools/sinefit.py
+++ b/nilmtools/sinefit.py
@@ -1,13 +1,18 @@
 #!/usr/bin/python
-# Sine wave fitting.  This runs about 5x faster than realtime on raw data.
+# Sine wave fitting.
-
+from nilmdb.utils.printf import *
 import nilmtools.filter
 import nilmdb.client
 from nilmdb.utils.time import (timestamp_to_human,
                               timestamp_to_seconds,
                               seconds_to_timestamp)
 from numpy import *
 from scipy import *
 #import pylab as p
 import operator
 import sys
 def main(argv = None):
    f = nilmtools.filter.Filter()
@@ -59,12 +64,40 @@ def main(argv = None):
    f.process_numpy(process, args = (args.column, args.frequency, args.min_amp,
                                     args.min_freq, args.max_freq))
 class SuppressibleWarning(object):
    def __init__(self, maxcount = 10, maxsuppress = 100):
        self.maxcount = maxcount
        self.maxsuppress = maxsuppress
        self.count = 0
        self.last_msg = ""
    def _write(self, sec, msg):
        if sec:
            now = timestamp_to_human(seconds_to_timestamp(sec)) + ": "
        else:
            now = ""
        sys.stderr.write(now + msg)
    def warn(self, msg, seconds = None):
        self.count += 1
        if self.count <= self.maxcount:
            self._write(seconds, msg)
        if (self.count - self.maxcount) >= self.maxsuppress:
            self.reset(seconds)
    def reset(self, seconds = None):
        if self.count > self.maxcount:
            self._write(seconds, sprintf("(%d warnings suppressed)\n",
                                         self.count - self.maxcount))
        self.count = 0
 def process(data, interval, args, insert_function, final):
    (column, f_expected, a_min, f_min, f_max) = args
    rows = data.shape[0]
    # Estimate sampling frequency from timestamps
-    fs = 1e6 * (rows-1) / (data[-1][0] - data[0][0])
+    fs = (rows-1) / (timestamp_to_seconds(data[-1][0]) -
                     timestamp_to_seconds(data[0][0]))
    # Pull out about 3.5 periods of data at once;
    # we'll expect to match 3 zero crossings in each window
@@ -74,26 +107,31 @@ def process(data, interval, args, insert_function, final):
    if rows < N:
        return 0
    warn = SuppressibleWarning(3, 1000)
    # Process overlapping windows
    start = 0
    num_zc = 0
    last_inserted_timestamp = None
    while start < (rows - N):
        this = data[start:start+N, column]
-        t_min = data[start, 0]/1e6
+        t_min = timestamp_to_seconds(data[start, 0])
-        t_max = data[start+N-1, 0]/1e6
+        t_max = timestamp_to_seconds(data[start+N-1, 0])
        # Do 4-parameter sine wave fit
        (A, f0, phi, C) = sfit4(this, fs)
        # Check bounds.  If frequency is too crazy, ignore this window
        if f0 < f_min or f0 > f_max:
-            print "frequency", f0, "outside valid range", f_min, "-", f_max
+            warn.warn(sprintf("frequency %s outside valid range %s - %s\n",
                              str(f0), str(f_min), str(f_max)), t_min)
            start += N
            continue
        # If amplitude is too low, results are probably just noise
        if A < a_min:
-            print "amplitude", A, "below minimum threshold", a_min
+            warn.warn(sprintf("amplitude %s below minimum threshold %s\n",
                              str(A), str(a_min)), t_min)
            start += N
            continue
@@ -116,7 +154,13 @@ def process(data, interval, args, insert_function, final):
        while zc_n < (N - period_n/2):
            #p.plot(zc_n, C, 'ro')
            t = t_min + zc_n / fs
-            insert_function([[t * 1e6, f0, A, C]])
+            if (last_inserted_timestamp is None or
                t > last_inserted_timestamp):
                insert_function([[seconds_to_timestamp(t), f0, A, C]])
                last_inserted_timestamp = t
                warn.reset(t)
            else:
                warn.warn("timestamp overlap\n", t)
            num_zc += 1
            last_zc = zc_n
            zc_n += period_n
@@ -134,7 +178,13 @@ def process(data, interval, args, insert_function, final):
        start = int(round(start + advance))
    # Return the number of rows we've processed
-    print "Marked", num_zc, "zero-crossings in", start, "rows"
+    warn.reset(last_inserted_timestamp)
    if last_inserted_timestamp:
        now = timestamp_to_human(seconds_to_timestamp(
            last_inserted_timestamp)) + ": "
    else:
        now = ""
    printf("%sMarked %d zero-crossings in %d rows\n", now, num_zc, start)
    return start
 def sfit4(data, fs):
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,7 @@ except ImportError:
 # Versioneer manages version numbers from git tags.
 # https://github.com/warner/python-versioneer
 import versioneer
-versioneer.versionfile_source = 'src/_version.py'
+versioneer.versionfile_source = 'nilmtools/_version.py'
 versioneer.versionfile_build = 'nilmtools/_version.py'
 versioneer.tag_prefix = 'nilmtools-'
 versioneer.parentdir_prefix = 'nilmtools-'
@@ -61,14 +61,13 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.6.0',
+      install_requires = [ 'nilmdb >= 1.6.3',
                           'numpy',
                           'scipy',
-                           'matplotlib',
+                           #'matplotlib',
                           ],
      packages = [ 'nilmtools',
                   ],
      package_dir = { 'nilmtools': 'src' },
      entry_points = {
          'console_scripts': [
              'nilm-decimate = nilmtools.decimate:main',
@@ -79,6 +78,7 @@ setup(name='nilmtools',
              'nilm-copy-wildcard = nilmtools.copy_wildcard:main',
              'nilm-sinefit = nilmtools.sinefit:main',
              'nilm-cleanup = nilmtools.cleanup:main',
              'nilm-median = nilmtools.median:main',
              ],
          },
      zip_safe = False,
Author	SHA1	Message	Date
Jim Paris	5d83d93019	Rename src/ directory to nilmtools/	2013-07-08 11:54:13 -04:00
Jim Paris	5f847a0513	Split process_numpy innards process_numpy_interval	2013-07-03 12:07:22 -04:00
Jim Paris	29cd7eb6c7	Improve test_prep target in Makefile	2013-07-03 12:06:50 -04:00
Jim Paris	62c8af41ea	Cleanup comments	2013-06-06 15:34:23 -04:00
Jim Paris	4f6bc48619	sinefit: include timestamps on marking output too	2013-05-11 11:00:31 -04:00
Jim Paris	cf9eb0ed48	Improve sinefit resiliancy	2013-05-10 14:19:55 -04:00
Jim Paris	32066fc260	Remove hard matplotlib dependency	2013-05-09 13:17:36 -04:00
Jim Paris	739da3f973	Add median filter	2013-05-08 23:36:50 -04:00
Jim Paris	83ad18ebf6	Fix non-string arguments to metadata_check	2013-05-08 12:49:38 -04:00
Jim Paris	c76d527f95	Fix unicode handling in filter metadata match	2013-05-07 12:40:53 -04:00
Jim Paris	b8a73278e7	Always store metadata rotation as a string	2013-04-29 14:25:11 -04:00