Compare commits

...

4 Commits

Author SHA1 Message Date
9c5f07106d Don't need python-pip 2013-07-20 16:15:29 -04:00
62e11a11c0 Fix issue with column ordering in the exemplars
If the max scale in the exemplar was a column we weren't using, it
would bail out when looking for that correlation later.  Change things
around so exemplars in RAM only keep around the columns we care about.
2013-07-18 22:51:27 -04:00
2bdcee2c36 More helpful error if exemplar stream doesn't exist 2013-07-15 15:19:52 -04:00
6dce8c5296 More output 2013-07-11 18:56:53 -04:00
4 changed files with 70 additions and 12 deletions

View File

@@ -11,10 +11,14 @@ endif
test: test_trainola
test_trainola:
-nilmtool -u http://bucket/nilmdb remove -s min -e max \
/sharon/prep-a-matches
nilmtools/trainola.py "$$(cat extras/trainola-test-param-2.js)"
-nilmtool -u http://bucket/nilmdb remove -s min -e max \
/sharon/prep-a-matches
nilmtools/trainola.py "$$(cat extras/trainola-test-param.js)"
test_cleanup:
nilmtools/cleanup.py -e extras/cleanup.cfg
nilmtools/cleanup.py extras/cleanup.cfg

View File

@@ -5,7 +5,7 @@ by Jim Paris <jim@jtan.com>
Prerequisites:
# Runtime and build environments
sudo apt-get install python2.7 python2.7-dev python-setuptools python-pip
sudo apt-get install python2.7 python2.7-dev python-setuptools
sudo apt-get install python-numpy python-scipy
nilmdb (1.8.1+)

View File

@@ -0,0 +1,29 @@
{ "columns" : [ { "index" : 0, "name" : "P1" },
{ "index" : 1, "name" : "Q1" },
{ "index" : 2, "name" : "P3" } ],
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb",
"dest_stream" : "/sharon/prep-a-matches",
"start" : 1365153062643133.5,
"end" : 1365168814443575.5,
"exemplars" : [ { "columns" : [ { "index" : 0,
"name" : "P1"
} ],
"dest_column" : 0,
"end" : 1365073657682000,
"name" : "Turn ON",
"start" : 1365073654321000,
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb"
},
{ "columns" : [ { "index" : 2, "name" : "P3" },
{ "index" : 0, "name" : "P1" } ],
"dest_column" : 1,
"end" : 1365176528818000,
"name" : "Type 2 turn ON",
"start" : 1365176520030000,
"stream" : "/sharon/prep-a",
"url" : "http://bucket.mit.edu/nilmdb"
}
]
}

View File

@@ -6,6 +6,7 @@ import nilmtools.filter
from nilmdb.utils.time import (timestamp_to_human,
timestamp_to_seconds,
seconds_to_timestamp)
from nilmdb.utils import datetime_tz
from nilmdb.utils.interval import Interval
import numpy as np
@@ -15,6 +16,7 @@ from numpy.core.umath_tests import inner1d
import nilmrun
from collections import OrderedDict
import sys
import time
import functools
import collections
@@ -26,12 +28,12 @@ def build_column_mapping(colinfo, streaminfo):
pull out a dictionary mapping for the column names/numbers."""
columns = OrderedDict()
for c in colinfo:
if (c['name'] in columns.keys() or
c['index'] in columns.values()):
col_num = c['index'] + 1 # skip timestamp
if (c['name'] in columns.keys() or col_num in columns.values()):
raise DataError("duplicated columns")
if (c['index'] < 0 or c['index'] >= streaminfo.layout_count):
raise DataError("bad column number")
columns[c['name']] = c['index']
columns[c['name']] = col_num
if not len(columns):
raise DataError("no columns")
return columns
@@ -52,6 +54,9 @@ class Exemplar(object):
# Get stream info
self.client = nilmdb.client.numpyclient.NumpyClient(self.url)
self.info = nilmtools.filter.get_stream_info(self.client, self.stream)
if not self.info:
raise DataError(sprintf("exemplar stream '%s' does not exist " +
"on server '%s'", self.stream, self.url))
# Build up name => index mapping for the columns
self.columns = build_column_mapping(exinfo['columns'], self.info)
@@ -74,10 +79,17 @@ class Exemplar(object):
maxrows = self.count)
self.data = list(datagen)[0]
# Discard timestamp
self.data = self.data[:,1:]
# Extract just the columns that were specified in self.columns,
# skipping the timestamp.
extract_columns = [ value for (key, value) in self.columns.items() ]
self.data = self.data[:,extract_columns]
# Subtract the mean from each column
# Fix the column indices in e.columns, since we removed/reordered
# columns in self.data
for n, k in enumerate(self.columns):
self.columns[k] = n
# Subtract the means from each column
self.data = self.data - self.data.mean(axis=0)
# Get scale factors for each column by computing dot product
@@ -117,6 +129,10 @@ def peak_detect(data, delta):
lookformax = True
return (mins, maxs)
def timestamp_to_short_human(timestamp):
dt = datetime_tz.datetime_tz.fromtimestamp(timestamp_to_seconds(timestamp))
return dt.strftime("%H:%M:%S")
def trainola_matcher(data, interval, args, insert_func, final_chunk):
"""Perform cross-correlation match"""
( src_columns, dest_count, exemplars ) = args
@@ -138,7 +154,7 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
# Compute cross-correlation for each column
for col_name in e.columns:
a = data[:, src_columns[col_name] + 1]
a = data[:, src_columns[col_name]]
b = e.data[:, e.columns[col_name]]
corr = scipy.signal.fftconvolve(a, np.flipud(b), 'valid')[0:valid]
@@ -183,7 +199,10 @@ def trainola_matcher(data, interval, args, insert_func, final_chunk):
insert_func(out)
# Return how many rows we processed
return max(valid, 0)
valid = max(valid, 0)
printf(" [%s] matched %d exemplars in %d rows\n",
timestamp_to_short_human(data[0][0]), np.sum(out[:,1:]), valid)
return valid
def trainola(conf):
print "Trainola", nilmtools.__version__
@@ -247,14 +266,20 @@ def trainola(conf):
src.path, layout = src.layout, maxrows = rows)
inserter = functools.partial(dest_client.stream_insert_numpy_context,
dest.path)
start = time.time()
processed_time = 0
printf("Processing intervals:\n")
for interval in intervals:
printf("Processing interval:\n")
printf(" %s\n", interval.human_string())
printf("%s\n", interval.human_string())
nilmtools.filter.process_numpy_interval(
interval, extractor, inserter, rows * 3,
trainola_matcher, (src_columns, dest.layout_count, exemplars))
processed_time += (timestamp_to_seconds(interval.end) -
timestamp_to_seconds(interval.start))
elapsed = max(time.time() - start, 1e-3)
return "done"
printf("Done. Processed %.2f seconds per second.\n",
processed_time / elapsed)
def main(argv = None):
import simplejson as json