|
- #!/usr/bin/python
-
- import nilmdb.client
- from nilmdb.utils.printf import *
- from nilmdb.utils.time import (parse_time, timestamp_to_human,
- timestamp_to_seconds)
- from nilmdb.utils.interval import Interval
-
- import nilmtools
-
- import itertools
- import time
- import sys
- import re
- import argparse
-
- class MissingDestination(Exception):
- def __init__(self, src, dest):
- self.src = src
- self.dest = dest
- Exception.__init__(self, "destination path " + dest.path + " not found")
-
- class StreamInfo(object):
- def __init__(self, info):
- self.info = info
- try:
- self.path = info[0]
- self.layout = info[1]
- self.layout_type = self.layout.split('_')[0]
- self.layout_count = int(self.layout.split('_')[1])
- self.total_count = self.layout_count + 1
- self.timestamp_min = info[2]
- self.timestamp_max = info[3]
- self.rows = info[4]
- self.seconds = nilmdb.utils.time.timestamp_to_seconds(info[5])
- except IndexError, TypeError:
- pass
-
- def __str__(self):
- """Print stream info as a string"""
- return sprintf("%s (%s), %.2fM rows, %.2f hours",
- self.path, self.layout, self.rows / 1e6,
- self.seconds / 3600.0)
-
- class Filter(object):
-
- def __init__(self):
- self._parser = None
- self._args = None
- self._client = None
- self._using_client = False
- self.src = None
- self.dest = None
-
- @property
- def client(self):
- if self._using_client:
- raise Exception("Filter client is in use; make another")
- return self._client
-
- def setup_parser(self, description = "Filter data"):
- parser = argparse.ArgumentParser(
- formatter_class = argparse.RawDescriptionHelpFormatter,
- version = nilmtools.__version__,
- description = description)
- group = parser.add_argument_group("General filter arguments")
- group.add_argument("-u", "--url", action="store",
- default="http://localhost:12380/",
- help="Server URL (default: %(default)s)")
- group.add_argument("-D", "--dry-run", action="store_true",
- default = False,
- help="Just print intervals that would be "
- "processed")
- group.add_argument("-s", "--start",
- metavar="TIME", type=self.arg_time,
- help="Starting timestamp for intervals "
- "(free-form, inclusive)")
- group.add_argument("-e", "--end",
- metavar="TIME", type=self.arg_time,
- help="Ending timestamp for intervals "
- "(free-form, noninclusive)")
- group.add_argument("srcpath", action="store",
- help="Path of source stream, e.g. /foo/bar")
- group.add_argument("destpath", action="store",
- help="Path of destination stream, e.g. /foo/bar")
- self._parser = parser
- return parser
-
- def interval_string(self, interval):
- return sprintf("[ %s -> %s ]",
- timestamp_to_human(interval.start),
- timestamp_to_human(interval.end))
-
- def parse_args(self):
- args = self._parser.parse_args()
- self._args = args
- self._client = nilmdb.client.Client(args.url)
-
- if args.srcpath == args.destpath:
- raise Exception("source and destination path must be different")
-
- # Open and print info about the streams
- src = self._client.stream_list(args.srcpath, extended = True)
- if len(src) != 1:
- raise Exception("source path " + args.srcpath + " not found")
- self.src = StreamInfo(src[0])
-
- dest = self._client.stream_list(args.destpath, extended = True)
- if len(dest) != 1:
- raise MissingDestination(self.src, StreamInfo([args.destpath]))
- self.dest = StreamInfo(dest[0])
-
- print "Source:", self.src
- print " Dest:", self.dest
-
- if args.dry_run:
- for interval in self.intervals():
- print self.interval_string(interval)
- raise SystemExit(0)
-
- return args
-
- def intervals(self):
- """Generate all the intervals that this filter should process"""
- self._using_client = True
- saved_int = None
- for (start, end) in self._client.stream_intervals(
- self._args.srcpath, diffpath = self._args.destpath,
- start = self._args.start, end = self._args.end):
-
- # Join adjacent intervals
- if saved_int is not None:
- if saved_int.end == start:
- start = saved_int.start
- else:
- yield saved_int
- saved_int = Interval(start, end)
- if saved_int is not None:
- yield saved_int
- self._using_client = False
-
- # Misc helpers
- def arg_time(self, toparse):
- """Parse a time string argument"""
- try:
- return nilmdb.utils.time.parse_time(toparse)
- except ValueError as e:
- raise argparse.ArgumentTypeError(sprintf("%s \"%s\"",
- str(e), toparse))
-
- def check_dest_metadata(self, data):
- """See if the metadata jives, and complain if it doesn't. If
- there's no conflict, update the metadata to match 'data'."""
- metadata = self._client.stream_get_metadata(self._args.destpath)
- for key in data:
- wanted = str(data[key])
- val = metadata.get(key, wanted)
- if val != wanted and self.dest.rows > 0:
- m = "Metadata in destination stream:\n"
- m += " %s = %s\n" % (key, val)
- m += "doesn't match desired data:\n"
- m += " %s = %s\n" % (key, wanted)
- m += "Refusing to change it. You can change the stream's "
- m += "metadata manually, or\n"
- m += "remove existing data from the stream, to prevent "
- m += "this error.\n"
- raise Exception(m)
- # All good -- write the metadata in case it's not already there
- self._client.stream_update_metadata(self._args.destpath, data)
-
- # Main processing helper
- def process_python(self, function, rows, args = None, partial = False):
- """Process data in chunks of 'rows' data at a time.
-
- This provides data as nested Python lists and expects the same
- back.
-
- function: function to process the data
- rows: maximum number of rows to pass to 'function' at once
- args: tuple containing extra arguments to pass to 'function'
- partial: if true, less than 'rows' may be passed to 'function'.
- if false, partial data at the end of an interval will
- be dropped.
-
- 'function' should be defined like:
- function(data, *args)
- It will be passed a list containing up to 'rows' rows of
- data from the source stream, and any arguments passed in
- 'args'. It should transform the data as desired, and return a
- new list of rdata, which will be inserted into the destination
- stream.
- """
- if args is None:
- args = []
- extractor = nilmdb.client.Client(self._args.url).stream_extract
- inserter = nilmdb.client.Client(self._args.url).stream_insert_context
- src = self._args.srcpath
- dest = self._args.destpath
-
- # Parse input data. We use homogenous types for now, which
- # means the timestamp type will be either float or int.
- if "int" in self.src.layout_type:
- parser = lambda line: [ int(x) for x in line.split() ]
- else:
- parser = lambda line: [ float(x) for x in line.split() ]
-
- # Format output data.
- formatter = lambda row: " ".join([repr(x) for x in row]) + "\n"
-
- for interval in self.intervals():
- print "Processing", self.interval_string(interval)
- with inserter(dest, interval.start, interval.end) as insert_ctx:
- src_array = []
- for line in extractor(src, interval.start, interval.end):
- # Read in data
- src_array.append([ float(x) for x in line.split() ])
-
- if len(src_array) == rows:
- # Pass through filter function
- dest_array = function(src_array, *args)
-
- # Write result to destination
- out = [ formatter(row) for row in dest_array ]
- insert_ctx.insert("".join(out))
-
- # Clear source array
- src_array = []
-
- # Take care of partial chunk
- if len(src_array) and partial:
- dest_array = function(src_array, *args)
- out = [ formatter(row) for row in dest_array ]
- insert_ctx.insert("".join(out))
-
- def main():
- # This is just a dummy function; actual filters can use the other
- # functions to prepare stuff, and then do something with the data.
- f = Filter()
- parser = f.setup_parser()
- args = f.parse_args()
- for i in f.intervals():
- print "Generic filter: need to handle", f.interval_string(i)
-
- if __name__ == "__main__":
- main()
|