Don't import matplotlib if we don't need it

Fix dependencies
Merge branch 'binary'
2013-04-08 18:59:23 -04:00 · 2013-04-08 18:50:27 -04:00 · 2013-04-08 18:45:21 -04:00 · 2013-04-08 18:45:16 -04:00 · 2013-04-08 18:39:14 -04:00 · 2013-04-07 18:14:35 -04:00
6 changed files with 19 additions and 92 deletions
--- a/README.txt
+++ b/README.txt
@@ -8,7 +8,7 @@ Prerequisites:
  sudo apt-get install python2.7 python2.7-dev python-setuptools
  sudo apt-get install python-numpy python-scipy python-matplotlib

-  nilmdb (1.3.1+)
+  nilmdb (1.5.0+)

 Install:

--- a/setup.py
+++ b/setup.py
@@ -61,7 +61,7 @@ setup(name='nilmtools',
      long_description = "NILM Database Tools",
      license = "Proprietary",
      author_email = 'jim@jtan.com',
-      install_requires = [ 'nilmdb >= 1.4.6',
+      install_requires = [ 'nilmdb >= 1.5.0',
                           'numpy',
                           'scipy',
                           'matplotlib',
--- a/src/copy_one.py
+++ b/src/copy_one.py
@@ -5,6 +5,7 @@

 import nilmtools.filter
 import nilmdb.client
+from nilmdb.client.numpyclient import NumpyClient
 import numpy as np
 import sys

@@ -27,14 +28,14 @@ def main(argv = None):
    meta = f.client_src.stream_get_metadata(f.src.path)
    f.check_dest_metadata(meta)

-    # Copy all rows of data as ASCII strings
-    extractor = nilmdb.client.Client(f.src.url).stream_extract
-    inserter = nilmdb.client.Client(f.dest.url).stream_insert_context
+    # Copy all rows of data using the faster Numpy interfaces
+    extractor = NumpyClient(f.src.url).stream_extract_numpy
+    inserter = NumpyClient(f.dest.url).stream_insert_numpy_context
    for i in f.intervals():
        print "Processing", f.interval_string(i)
        with inserter(f.dest.path, i.start, i.end) as insert_ctx:
-            for row in extractor(f.src.path, i.start, i.end):
-                insert_ctx.insert(row + "\n")
+            for data in extractor(f.src.path, i.start, i.end):
+                insert_ctx.insert(data)

 if __name__ == "__main__":
    main()
--- a/src/decimate.py
+++ b/src/decimate.py
@@ -71,7 +71,7 @@ def decimate(data, interval, args, insert_function, final):
    data = data[:n,:]

    # Reshape it into 3D so we can process 'factor' rows at a time
-    data.shape = (n // factor, factor, m)
+    data = data.reshape(n // factor, factor, m)

    # Fill the result
    out = np.c_[ np.mean(data[:,:,mean_col], axis=1),
--- a/src/filter.py
+++ b/src/filter.py
@@ -4,6 +4,7 @@ from __future__ import absolute_import

 import nilmdb.client
 from nilmdb.client import Client
+from nilmdb.client.numpyclient import NumpyClient
 from nilmdb.utils.printf import *
 from nilmdb.utils.time import (parse_time, timestamp_to_human,
                               timestamp_to_seconds)
@@ -247,72 +248,7 @@ class Filter(object):
        # All good -- write the metadata in case it's not already there
        self._client_dest.stream_update_metadata(self.dest.path, data)

-    # Main processing helper
-    def process_python(self, function, rows, args = None, partial = False):
-        """Process data in chunks of 'rows' data at a time.
-
-        This provides data as nested Python lists and expects the same
-        back.
-
-        function: function to process the data
-        rows: maximum number of rows to pass to 'function' at once
-        args: tuple containing extra arguments to pass to 'function'
-        partial: if true, less than 'rows' may be passed to 'function'.
-                 if false, partial data at the end of an interval will
-                 be dropped.
-
-        'function' should be defined like:
-            function(data, *args)
-        It will be passed a list containing up to 'rows' rows of
-        data from the source stream, and any arguments passed in
-        'args'.  It should transform the data as desired, and return a
-        new list of rdata, which will be inserted into the destination
-        stream.
-        """
-        if args is None:
-            args = []
-        extractor = Client(self.src.url).stream_extract
-        inserter = Client(self.dest.url).stream_insert_context
-
-        # Parse input data.  We use homogenous types for now, which
-        # means the timestamp type will be either float or int.
-        if "int" in self.src.layout_type:
-            parser = lambda line: [ int(x) for x in line.split() ]
-        else:
-            parser = lambda line: [ float(x) for x in line.split() ]
-
-        # Format output data.
-        formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
-
-        for interval in self.intervals():
-            print "Processing", self.interval_string(interval)
-            with inserter(self.dest.path,
-                          interval.start, interval.end) as insert_ctx:
-                src_array = []
-                for line in extractor(self.src.path,
-                                      interval.start, interval.end):
-                    # Read in data
-                    src_array.append([ float(x) for x in line.split() ])
-
-                    if len(src_array) == rows:
-                        # Pass through filter function
-                        dest_array = function(src_array, *args)
-
-                        # Write result to destination
-                        out = [ formatter(row) for row in dest_array ]
-                        insert_ctx.insert("".join(out))
-
-                        # Clear source array
-                        src_array = []
-
-                # Take care of partial chunk
-                if len(src_array) and partial:
-                    dest_array = function(src_array, *args)
-                    out = [ formatter(row) for row in dest_array ]
-                    insert_ctx.insert("".join(out))
-
-    # Like process_python, but provides Numpy arrays and allows for
-    # partial processing.
+    # The main filter processing method.
    def process_numpy(self, function, args = None, rows = 100000):
        """For all intervals that exist in self.src but don't exist in
        self.dest, call 'function' with a Numpy array corresponding to
@@ -342,8 +278,8 @@ class Filter(object):
        """
        if args is None:
            args = []
-        extractor = Client(self.src.url).stream_extract
-        inserter = Client(self.dest.url).stream_insert_context
+        extractor = NumpyClient(self.src.url).stream_extract_numpy
+        inserter = NumpyClient(self.dest.url).stream_insert_numpy_context

        # Format output data.
        formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
@@ -357,22 +293,12 @@ class Filter(object):
            print "Processing", self.interval_string(interval)
            with inserter(self.dest.path,
                          interval.start, interval.end) as insert_ctx:
-                def insert_function(array):
-                    s = cStringIO.StringIO()
-                    if len(np.shape(array)) != 2:
-                        raise Exception("array must be 2-dimensional")
-                    np.savetxt(s, array)
-                    insert_ctx.insert(s.getvalue())
-
-                extract = extractor(self.src.path, interval.start, interval.end)
+                insert_function = insert_ctx.insert
                old_array = np.array([])
-                for batched in batch(extract, rows):
-                    # Read in this batch of data.  This turns out to
-                    # be a very fast way to read and convert it (order
-                    # of magnitude faster than numpy.loadtxt)
-                    new_array = np.fromstring("\n".join(batched), sep=' ')
-                    new_array = new_array.reshape(-1, self.src.total_count)
-
+                for new_array in extractor(self.src.path,
+                                           interval.start, interval.end,
+                                           layout = self.src.layout,
+                                           maxrows = rows):
                    # If we still had old data left, combine it
                    if old_array.shape[0] != 0:
                        array = np.vstack((old_array, new_array))
--- a/src/prep.py
+++ b/src/prep.py
@@ -8,7 +8,7 @@ import nilmdb.client
 from numpy import *
 import scipy.fftpack
 import scipy.signal
-from matplotlib import pyplot as p
+#from matplotlib import pyplot as p
 import bisect

 def main(argv = None):
Author	SHA1	Message	Date
Jim Paris	5b67b68fd2	Don't import matplotlib if we don't need it	2013-04-08 18:59:23 -04:00
Jim Paris	97503b73b9	Fix dependencies	2013-04-08 18:50:27 -04:00
Jim Paris	4e64c804bf	Merge branch 'binary'	2013-04-08 18:45:21 -04:00
Jim Paris	189fb9df3a	Use binary interface for copy_one too	2013-04-08 18:45:16 -04:00
Jim Paris	3323c997a7	Use the new stream_insert_numpy_context function	2013-04-08 18:39:14 -04:00
Jim Paris	e09153e34b	Use the new NumpyClient for extracting data in filter	2013-04-07 18:14:35 -04:00
Jim Paris	5c56e9d075	Remove ounused process_python function	2013-04-06 16:39:39 -04:00