Improve trainola matcher

2013-07-05 15:55:28 -04:00 · 2013-07-05 15:55:28 -04:00 · afd21bfef2
commit afd21bfef2
parent b228c3e35f
1 changed files with 20 additions and 17 deletions
--- a/src/trainola.py
+++ b/src/trainola.py
@ -83,13 +83,12 @@ class Data(object):
        # Ensure a minimum (nonzero) scale and convert to list
        self.scale = np.maximum(self.scale, [1e-9]).tolist()

-def process(main, function, args = None, rows = 100000):
+def process(main, function, args = None, rows = 200000):
    """Process through the data; similar to nilmtools.Filter.process_numpy"""
    if args is None:
        args = []

    extractor = main.client.stream_extract_numpy
-    rows = 100000
    old_array = np.array([])
    for new_array in extractor(main.stream, main.start, main.end,
                               layout = main.info.layout, maxrows = rows):
@ -156,10 +155,10 @@ def match(data, args):
    # This is how many points we'll consider valid in the
    # cross-correlation.
    valid = nrows + 1 - widest
-    processed = valid
+    matches = []

    # Try matching against each of the exemplars
-    for e in exemplars:
+    for e_num, e in enumerate(exemplars):
        corrs = []

        # Compute cross-correlation for each column
@ -187,23 +186,27 @@ def match(data, args):
                # scale 0.0 -> distance 1.0
                distance = 1 - 0.9 * (scale / e.scale[biggest])
                if abs(corr[p] - 1) > distance:
+                    # No match
                    break
            else:
-                # Matched!
-                print e.name, "at", p
+                # Successful match
+                matches.append((p, e_num))

-        from matplotlib import pyplot as p
-        #p.subplot(3,1,1)
-        #p.plot(a[0:valid])
-        #p.subplot(3,1,2)
-        #p.plot(corr, 'r')
-        #p.subplot(3,1,3)
-        #p.plot(b)
-        #p.plot(data[:,1:3])
-        #p.show()
-        #raise Exception()
+    # Print matches
+    for (point, e_num) in sorted(matches):
+        # Ignore matches that showed up at the very tail of the window,
+        # and shorten the window accordingly.  This is an attempt to avoid
+        # problems at chunk boundaries.
+        if point > (valid - 50):
+            valid -= 50
+            break
+        print "matched", data[point,0], "exemplar", exemplars[e_num].name

-    return valid
+    #from matplotlib import pyplot as p
+    #p.plot(data[:,1:3])
+    #p.show()
+
+    return max(valid, 0)

 def trainola(conf):
    # Load main stream data