Fix issue with column ordering in the exemplars

If the max scale in the exemplar was a column we weren't using, it would bail out when looking for that correlation later. Change things around so exemplars in RAM only keep around the columns we care about.
10 years ago · 62e11a11c0
--- a/+ 4
+++ b/+ 4
@@ -11,10 +11,14 @@ endif
 test: test_trainola

 test_trainola:
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
 	-nilmtool -u http://bucket/nilmdb remove -s min -e max \
 		/sharon/prep-a-matches
 	nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"


 test_cleanup:
 	nilmtools/cleanup.py -e extras/cleanup.cfg
 	nilmtools/cleanup.py extras/cleanup.cfg
--- a/extras/trainola-test-param-2.js
+++ b/extras/trainola-test-param-2.js
@@ -0,0 +1,29 @@
 { "columns" : [ { "index" : 0, "name" : "P1" },
                { "index" : 1, "name" : "Q1" },
                { "index" : 2, "name" : "P3" } ],
  "stream" : "/sharon/prep-a",
  "url" : "http://bucket.mit.edu/nilmdb",
  "dest_stream" : "/sharon/prep-a-matches",
  "start" : 1365153062643133.5,
  "end"   : 1365168814443575.5,
  "exemplars" : [ { "columns" : [ { "index" : 0,
                                    "name" : "P1"
                                  } ],
                    "dest_column" : 0,
                    "end" : 1365073657682000,
                    "name" : "Turn ON",
                    "start" : 1365073654321000,
                    "stream" : "/sharon/prep-a",
                    "url" : "http://bucket.mit.edu/nilmdb"
                  },
                  { "columns" : [ { "index" : 2, "name" : "P3" },
                                  { "index" : 0, "name" : "P1" } ],
                    "dest_column" : 1,
                    "end" : 1365176528818000,
                    "name" : "Type 2 turn ON",
                    "start" : 1365176520030000,
                    "stream" : "/sharon/prep-a",
                    "url" : "http://bucket.mit.edu/nilmdb"
                  }
                ]
 }
--- a/nilmtools/trainola.py
+++ b/nilmtools/trainola.py
@@ -28,12 +28,12 @@ def build_column_mapping(colinfo, streaminfo):
    pull out a dictionary mapping for the column names/numbers."""
    columns = OrderedDict()
    for c in colinfo:
        if (c['name'] in columns.keys() or
            c['index'] in columns.values()):
        col_num = c['index'] + 1  # skip timestamp
        if (c['name'] in columns.keys() or col_num in columns.values()):
            raise DataError("duplicated columns")
        if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
            raise DataError("bad column number")
        columns[c['name']] = c['index']
        columns[c['name']] = col_num
    if not len(columns):
        raise DataError("no columns")
    return columns
@@ -79,10 +79,17 @@ class Exemplar(object):
                                                   maxrows = self.count)
        self.data = list(datagen)[0]

        # Discard timestamp
        self.data = self.data[:,1:]
        # Extract just the columns that were specified in self.columns,
        # skipping the timestamp.
        extract_columns = [ value for (key, value) in self.columns.items() ]
        self.data = self.data[:,extract_columns]

        # Subtract the mean from each column
        # Fix the column indices in e.columns, since we removed/reordered
        # columns in self.data
        for n, k in enumerate(self.columns):
            self.columns[k] = n

        # Subtract the means from each column
        self.data = self.data - self.data.mean(axis=0)

        # Get scale factors for each column by computing dot product
@@ -147,7 +154,7 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):

        # Compute cross-correlation for each column
        for col_name in e.columns:
            a = data[:, src_columns[col_name] + 1]
            a = data[:, src_columns[col_name]]
            b = e.data[:, e.columns[col_name]]
            corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]