|
@@ -5,6 +5,8 @@ from nilmdb.utils.printf import * |
|
|
from nilmdb.utils.time import parse_time, format_time |
|
|
from nilmdb.utils.time import parse_time, format_time |
|
|
|
|
|
|
|
|
import nilmtools |
|
|
import nilmtools |
|
|
|
|
|
|
|
|
|
|
|
import itertools |
|
|
import time |
|
|
import time |
|
|
import sys |
|
|
import sys |
|
|
import re |
|
|
import re |
|
@@ -48,6 +50,14 @@ class Filter(object): |
|
|
default = False, |
|
|
default = False, |
|
|
help="Just print intervals that would be " |
|
|
help="Just print intervals that would be " |
|
|
"processed") |
|
|
"processed") |
|
|
|
|
|
group.add_argument("-s", "--start", |
|
|
|
|
|
metavar="TIME", type=self.arg_time, |
|
|
|
|
|
help="Starting timestamp for intervals " |
|
|
|
|
|
"(free-form, inclusive)") |
|
|
|
|
|
group.add_argument("-e", "--end", |
|
|
|
|
|
metavar="TIME", type=self.arg_time, |
|
|
|
|
|
help="Ending timestamp for intervals " |
|
|
|
|
|
"(free-form, noninclusive)") |
|
|
group.add_argument("srcpath", action="store", |
|
|
group.add_argument("srcpath", action="store", |
|
|
help="Path of source stream, e.g. /foo/bar") |
|
|
help="Path of source stream, e.g. /foo/bar") |
|
|
group.add_argument("destpath", action="store", |
|
|
group.add_argument("destpath", action="store", |
|
@@ -89,11 +99,20 @@ class Filter(object): |
|
|
"""Generate all the intervals that this filter should process""" |
|
|
"""Generate all the intervals that this filter should process""" |
|
|
self._using_client = True |
|
|
self._using_client = True |
|
|
for i in self._client.stream_intervals( |
|
|
for i in self._client.stream_intervals( |
|
|
self._args.srcpath, diffpath = self._args.destpath): |
|
|
|
|
|
|
|
|
self._args.srcpath, diffpath = self._args.destpath, |
|
|
|
|
|
start = self._args.start, end = self._args.end): |
|
|
yield i |
|
|
yield i |
|
|
self._using_client = False |
|
|
self._using_client = False |
|
|
|
|
|
|
|
|
# Misc helpers |
|
|
# Misc helpers |
|
|
|
|
|
def arg_time(self, toparse): |
|
|
|
|
|
"""Parse a time string argument""" |
|
|
|
|
|
try: |
|
|
|
|
|
return nilmdb.utils.time.parse_time(toparse).totimestamp() |
|
|
|
|
|
except ValueError as e: |
|
|
|
|
|
raise argparse.ArgumentTypeError(sprintf("%s \"%s\"", |
|
|
|
|
|
str(e), toparse)) |
|
|
|
|
|
|
|
|
def stream_info_string(self, info): |
|
|
def stream_info_string(self, info): |
|
|
"""Print stream info as a string""" |
|
|
"""Print stream info as a string""" |
|
|
return sprintf("%s (%s), %.2fM rows, %.2f hours", |
|
|
return sprintf("%s (%s), %.2fM rows, %.2f hours", |
|
@@ -104,27 +123,89 @@ class Filter(object): |
|
|
return sprintf("[ %s -> %s ]", format_time(interval[0]), |
|
|
return sprintf("[ %s -> %s ]", format_time(interval[0]), |
|
|
format_time(interval[1])) |
|
|
format_time(interval[1])) |
|
|
|
|
|
|
|
|
|
|
|
def check_dest_metadata(self, data): |
|
|
|
|
|
"""See if the metadata jives, and complain if it doesn't. If |
|
|
|
|
|
there's no conflict, update the metadata to match 'data'.""" |
|
|
|
|
|
metadata = self._client.stream_get_metadata(self._args.destpath) |
|
|
|
|
|
rows = self.destinfo[4] |
|
|
|
|
|
for key in data: |
|
|
|
|
|
wanted = str(data[key]) |
|
|
|
|
|
val = metadata.get(key, wanted) |
|
|
|
|
|
if val != wanted and rows > 0: |
|
|
|
|
|
m = "Metadata in destination stream:\n" |
|
|
|
|
|
m += " %s = %s\n" % (key, val) |
|
|
|
|
|
m += "doesn't match desired data:\n" |
|
|
|
|
|
m += " %s = %s\n" % (key, wanted) |
|
|
|
|
|
m += "Refusing to change it. You can change the stream's " |
|
|
|
|
|
m += "metadata manually, or\n" |
|
|
|
|
|
m += "remove existing data from the stream, to prevent " |
|
|
|
|
|
m += "this error.\n" |
|
|
|
|
|
raise Exception(m) |
|
|
|
|
|
# All good -- write the metadata in case it's not already there |
|
|
|
|
|
self._client.stream_update_metadata(self._args.destpath, data) |
|
|
|
|
|
|
|
|
# Main processing helper |
|
|
# Main processing helper |
|
|
def process(self, function, maxlen, args): |
|
|
|
|
|
"""Process data in chunks. |
|
|
|
|
|
|
|
|
def process(self, function, rows, partial = True, args = None): |
|
|
|
|
|
"""Process data in chunks of 'rows' data at a time. |
|
|
|
|
|
|
|
|
function: function to process the data |
|
|
function: function to process the data |
|
|
maxlen: maximum length of data to pass to function, in seconds |
|
|
|
|
|
args: tuple containing extra arguments to pass to function |
|
|
|
|
|
|
|
|
rows: maximum number of rows to pass to 'function' at once |
|
|
|
|
|
args: tuple containing extra arguments to pass to 'function' |
|
|
|
|
|
partial: if true, less than 'rows' may be passed to 'function'. |
|
|
|
|
|
if false, partial data at the end of an interval will |
|
|
|
|
|
be dropped. |
|
|
|
|
|
|
|
|
'function' should be defined like: |
|
|
'function' should be defined like: |
|
|
function(data, start, end, *args) |
|
|
|
|
|
It will be passed a block of data from the source stream, |
|
|
|
|
|
the start and end times of that block, and any arguments |
|
|
|
|
|
that were passed to process in 'args'. The total |
|
|
|
|
|
length of the interval will be at most 'maxlen' seconds. |
|
|
|
|
|
|
|
|
|
|
|
'function' should transform the data as desired, and return |
|
|
|
|
|
a new list of data, which will be inserted into the |
|
|
|
|
|
destination stream.""" |
|
|
|
|
|
|
|
|
function(data, *args) |
|
|
|
|
|
It will be passed an array containing up to 'rows' rows of |
|
|
|
|
|
data from the source stream, and any arguments passed in |
|
|
|
|
|
'args'. It should transform the data as desired, and return a |
|
|
|
|
|
new array of data, which will be inserted into the destination |
|
|
|
|
|
stream. |
|
|
|
|
|
""" |
|
|
|
|
|
if args is None: |
|
|
|
|
|
args = [] |
|
|
|
|
|
extractor = nilmdb.client.Client(self._args.url).stream_extract |
|
|
|
|
|
inserter = nilmdb.client.Client(self._args.url).stream_insert_context |
|
|
|
|
|
src = self._args.srcpath |
|
|
|
|
|
dest = self._args.destpath |
|
|
|
|
|
islice = itertools.islice |
|
|
|
|
|
|
|
|
|
|
|
# Figure out how to format output data |
|
|
|
|
|
dest_layout = self.destinfo[1].split('_')[1] |
|
|
|
|
|
def int_formatter(row): |
|
|
|
|
|
return ("%.6f " % row[0]) + " ".join(str(int(x)) for x in row[1:]) |
|
|
|
|
|
def float_formatter(row): |
|
|
|
|
|
return ("%.6f " % row[0]) + " ".join(repr(x) for x in row[1:]) |
|
|
|
|
|
if "int" in dest_layout: |
|
|
|
|
|
formatter = int_formatter |
|
|
|
|
|
else: |
|
|
|
|
|
formatter = float_formatter |
|
|
|
|
|
|
|
|
for (start, end) in self.intervals(): |
|
|
for (start, end) in self.intervals(): |
|
|
if (end - start) |
|
|
|
|
|
return |
|
|
|
|
|
|
|
|
print "Processing", self.interval_string((start, end)) |
|
|
|
|
|
with inserter(dest, start, end) as insert_ctx: |
|
|
|
|
|
src_array = [] |
|
|
|
|
|
for line in extractor(src, start, end): |
|
|
|
|
|
# Read in data |
|
|
|
|
|
src_array.append([ float(x) for x in line.split() ]) |
|
|
|
|
|
|
|
|
|
|
|
if len(src_array) == rows: |
|
|
|
|
|
# Pass through filter function |
|
|
|
|
|
dest_array = function(src_array, *args) |
|
|
|
|
|
|
|
|
|
|
|
# Write result to destination |
|
|
|
|
|
out = [ formatter(row) for row in dest_array ] |
|
|
|
|
|
insert_ctx.insert("\n".join(out) + "\n") |
|
|
|
|
|
|
|
|
|
|
|
# Clear source array |
|
|
|
|
|
src_array = [] |
|
|
|
|
|
|
|
|
|
|
|
# Take care of partial chunk |
|
|
|
|
|
if len(src_array) and partial: |
|
|
|
|
|
dest_array = function(src_array, *args) |
|
|
|
|
|
out = [ formatter(row) for row in dest_array ] |
|
|
|
|
|
insert_ctx.insert("\n".join(out) + "\n") |
|
|
|
|
|
|
|
|
def main(): |
|
|
def main(): |
|
|
# This is just a dummy function; actual filters can use the other |
|
|
# This is just a dummy function; actual filters can use the other |
|
|