|
- #!/usr/bin/python
-
- import nilmdb.client
- from nilmdb.utils.printf import *
- from nilmdb.utils.time import (parse_time, timestamp_to_human,
- timestamp_to_seconds, seconds_to_timestamp,
- rate_to_period, now as time_now)
-
- import nilmtools
- import time
- import sys
- import re
- import argparse
- import subprocess
-
- class ParseError(Exception):
- def __init__(self, filename, error):
- msg = filename + ": " + error
- super(ParseError, self).__init__(msg)
-
- def parse_args():
- parser = argparse.ArgumentParser(
- formatter_class = argparse.RawDescriptionHelpFormatter,
- version = nilmtools.__version__,
- description = """\
- Insert data from ethstream, either live (using the system time as a
- reference) or prerecorded (using comments in the file as a reference).
-
- The data is assumed to have been recorded at the specified rate.
- Small discrepencies between the accumulated timestamps and the
- reference time are ignored; larger discrepencies cause gaps to be
- created in the stream. Overlapping data returns an error.
- """)
- parser.add_argument("-u", "--url", action="store",
- default="http://localhost:12380/",
- help="NilmDB server URL (default: %(default)s)")
- parser.add_argument("-r", "--rate", action="store", default=8000,
- type=float,
- help="Data rate in Hz (default: %(default)s)")
- parser.add_argument("-l", "--live", action="store_true",
- help="Live capture; use system time to verify rate")
- parser.add_argument("path", action="store",
- help="Path of stream, e.g. /foo/bar")
- parser.add_argument("infile", type=argparse.FileType('r'), nargs='*',
- default=[sys.stdin],
- help="Input files (default: stdin)")
- args = parser.parse_args()
-
- printf("Stream path: %s\n", args.path)
- printf(" Data rate: %s Hz\n", repr(args.rate))
-
- return args
-
- def main(args = None):
- if args is None:
- args = parse_args()
-
- client = nilmdb.client.Client(args.url)
-
- # Local copies to save dictionary lookups
- live = args.live
-
- # data_ts is the timestamp that we'll use for the current line
- data_ts_base = 0
- data_ts_inc = 0
- data_ts_rate = args.rate
-
- # clock_ts is the imprecise "real" timestamp (from the filename,
- # comments, or or system clock)
- clock_ts = None
-
- def print_clock_updated():
- printf("Clock time updated to %s\n", timestamp_to_human(clock_ts))
- if data_ts_base != 0:
- diff = data_ts - clock_ts
- if diff >= 0:
- printf(" (data timestamp ahead by %.6f sec)\n",
- timestamp_to_seconds(diff))
- else:
- printf(" (data timestamp behind by %.6f sec)\n",
- timestamp_to_seconds(-diff))
-
- with client.stream_insert_context(args.path) as stream:
- for f in args.infile:
- filename = f.name
- printf("Processing %s\n", filename)
-
- # If the filename ends in .gz, open it with gzcat instead.
- if filename.endswith(".gz"):
- p = subprocess.Popen(["gzip", "-dc"],
- stdin = f, stdout = subprocess.PIPE)
- f = p.stdout
-
- # Try to get a real timestamp from the filename
- try:
- # Subtract 1 hour because files are created at the end
- # of the hour. Hopefully, we'll be able to use
- # internal comments and this value won't matter anyway.
- clock_ts = parse_time(filename) - seconds_to_timestamp(3600)
- print_clock_updated()
- except ValueError:
- pass
-
- truncated_lines = 0
-
- # Read each line
- for line in f:
- data_ts = data_ts_base + rate_to_period(data_ts_rate,
- data_ts_inc)
-
- # If no content other than the newline, skip it
- if len(line) <= 1:
- continue
-
- # If line starts with a comment, look for a timestamp
- if line[0] == '#':
- try:
- clock_ts = parse_time(line[1:])
- print_clock_updated()
- except ValueError:
- pass
- continue
-
- # If inserting live, use clock timestamp
- if live:
- clock_ts = time_now()
-
- # If we have a real timestamp, compare it to the data
- # timestamp, and make sure things match up.
- if clock_ts is not None:
- if (data_ts - seconds_to_timestamp(10)) > clock_ts:
- # Accumulated line timestamps are in the future.
- # If we were to set data_ts=clock_ts, we'd create
- # an overlap, so we have to just bail out here.
- err = sprintf("Data is coming in too fast: data time "
- "is %s but clock time is only %s",
- timestamp_to_human(data_ts),
- timestamp_to_human(clock_ts))
- raise ParseError(filename, err)
-
- if (data_ts + seconds_to_timestamp(10)) < clock_ts:
- # Accumulated line timetamps are in the past. We
- # can just skip some time and leave a gap in the
- # data.
- if data_ts_base != 0:
- printf("Skipping data timestamp forward from "
- "%s to %s to match clock time\n",
- timestamp_to_human(data_ts),
- timestamp_to_human(clock_ts))
- stream.finalize()
- data_ts_base = data_ts = clock_ts
- data_ts_inc = 0
-
- # Don't use this clock time anymore until we update it
- clock_ts = None
-
- if data_ts_base == 0:
- raise ParseError(filename, "No idea what timestamp to use")
-
- # This line is legit, so increment timestamp
- data_ts_inc += 1
-
- # Once in a while a line might be truncated, if we're at
- # the end of a file. Ignore it, but if we ignore too many,
- # bail out.
- if line[-1] != '\n':
- truncated_lines += 1
- if truncated_lines > 3:
- raise ParseError(filename, "too many short lines")
- printf("Ignoring short line in %s\n", filename)
- continue
-
- # Insert it
- stream.insert("%d %s" % (data_ts, line))
- print "Done"
-
- if __name__ == "__main__":
- main()
|