nilm
/
nilmtools


			
				
					
						
						
							
							#!/usr/bin/python

import nilmdb.client
from nilmdb.utils.printf import *
from nilmdb.utils.time import (parse_time, timestamp_to_human,
                               timestamp_to_seconds, seconds_to_timestamp,
                               rate_to_period, now as time_now)

import nilmtools
import time
import sys
import re
import argparse
import subprocess

class ParseError(Exception):
    def __init__(self, filename, error):
        msg = filename + ": " + error
        super(ParseError, self).__init__(msg)

def parse_args(argv = None):
    parser = argparse.ArgumentParser(
        formatter_class = argparse.RawDescriptionHelpFormatter,
        version = nilmtools.__version__,
        description = """\
    Insert data from ethstream, either live (using the system time as a
    reference) or prerecorded (using comments in the file as a reference).

    The data is assumed to have been recorded at the specified rate.
    Small discrepencies between the accumulated timestamps and the
    reference time are ignored; larger discrepencies cause gaps to be
    created in the stream.  Overlapping data returns an error.
    """)
    parser.add_argument("-u", "--url", action="store",
                        default="http://localhost:12380/",
                        help="NilmDB server URL (default: %(default)s)")
    parser.add_argument("-r", "--rate", action="store", default=8000,
                        type=float,
                        help="Data rate in Hz (default: %(default)s)")
    parser.add_argument("-l", "--live", action="store_true",
                        help="Live capture; use system time to verify rate")
    parser.add_argument("path", action="store",
                        help="Path of stream, e.g. /foo/bar")
    parser.add_argument("infile", type=argparse.FileType('r'), nargs='*',
                        default=[sys.stdin],
                        help="Input files (default: stdin)")
    args = parser.parse_args(argv or sys.argv)

    printf("Stream path: %s\n", args.path)
    printf("  Data rate: %s Hz\n", repr(args.rate))

    return args

def main(argv = None):
    args = parse_args(argv)

    client = nilmdb.client.Client(args.url)

    # Local copies to save dictionary lookups
    live = args.live

    # data_ts is the timestamp that we'll use for the current line
    data_ts_base = 0
    data_ts_inc = 0
    data_ts_rate = args.rate

    # clock_ts is the imprecise "real" timestamp (from the filename,
    # comments, or or system clock)
    clock_ts = None

    def print_clock_updated():
        printf("Clock time updated to %s\n", timestamp_to_human(clock_ts))
        if data_ts_base != 0:
            diff = data_ts - clock_ts
            if diff >= 0:
                printf("  (data timestamp ahead by %.6f sec)\n",
                       timestamp_to_seconds(diff))
            else:
                printf("  (data timestamp behind by %.6f sec)\n",
                       timestamp_to_seconds(-diff))

    with client.stream_insert_context(args.path) as stream:
        for f in args.infile:
            filename = f.name
            printf("Processing %s\n", filename)

            # If the filename ends in .gz, open it with gzcat instead.
            if filename.endswith(".gz"):
                p = subprocess.Popen(["gzip", "-dc"],
                                     stdin = f, stdout = subprocess.PIPE)
                f = p.stdout

            # Try to get a real timestamp from the filename
            try:
                # Subtract 1 hour because files are created at the end
                # of the hour.  Hopefully, we'll be able to use
                # internal comments and this value won't matter anyway.
                clock_ts = parse_time(filename) - seconds_to_timestamp(3600)
                print_clock_updated()
            except ValueError:
                pass

            truncated_lines = 0

            # Read each line
            for line in f:
                data_ts = data_ts_base + rate_to_period(data_ts_rate,
                                                        data_ts_inc)

                # If no content other than the newline, skip it
                if len(line) <= 1:
                    continue

                # If line starts with a comment, look for a timestamp
                if line[0] == '#':
                    try:
                        clock_ts = parse_time(line[1:])
                        print_clock_updated()
                    except ValueError:
                        pass
                    continue

                # If inserting live, use clock timestamp
                if live:
                    clock_ts = time_now()

                # If we have a real timestamp, compare it to the data
                # timestamp, and make sure things match up.
                if clock_ts is not None:
                    if (data_ts - seconds_to_timestamp(10)) > clock_ts:
                        # Accumulated line timestamps are in the future.
                        # If we were to set data_ts=clock_ts, we'd create
                        # an overlap, so we have to just bail out here.
                        err = sprintf("Data is coming in too fast: data time "
                                      "is %s but clock time is only %s",
                                      timestamp_to_human(data_ts),
                                      timestamp_to_human(clock_ts))
                        raise ParseError(filename, err)

                    if (data_ts + seconds_to_timestamp(10)) < clock_ts:
                        # Accumulated line timetamps are in the past.  We
                        # can just skip some time and leave a gap in the
                        # data.
                        if data_ts_base != 0:
                            printf("Skipping data timestamp forward from "
                                   "%s to %s to match clock time\n",
                                   timestamp_to_human(data_ts),
                                   timestamp_to_human(clock_ts))
                        stream.finalize()
                        data_ts_base = data_ts = clock_ts
                        data_ts_inc = 0

                    # Don't use this clock time anymore until we update it
                    clock_ts = None

                if data_ts_base == 0:
                    raise ParseError(filename, "No idea what timestamp to use")

                # This line is legit, so increment timestamp
                data_ts_inc += 1

                # Once in a while a line might be truncated, if we're at
                # the end of a file.  Ignore it, but if we ignore too many,
                # bail out.
                if line[-1] != '\n':
                    truncated_lines += 1
                    if truncated_lines > 3:
                        raise ParseError(filename, "too many short lines")
                    printf("Ignoring short line in %s\n", filename)
                    continue

                # Insert it
                stream.insert("%d %s" % (data_ts, line))
    print "Done"

if __name__ == "__main__":
    main()