diff --git a/insert.py b/insert.py index 86ee6c7..3d6e867 100755 --- a/insert.py +++ b/insert.py @@ -8,11 +8,12 @@ from nilmdb.utils.time import parse_time, format_time import sys import re import argparse +import subprocess class ParseError(Exception): def __init__(self, filename, linenum, error): - s = filename + ":" + str(linenum) + ": " + error - super(ParseError, self).__init__(error) + msg = filename + ":" + str(linenum+1) + ": " + error + super(ParseError, self).__init__(msg) parser = argparse.ArgumentParser(description = """\ Insert data from ethstream, either live (using the system time as a @@ -49,13 +50,23 @@ data_ts = None # comments, or or system clock) clock_ts = None +lines_ok = 0 + with client.stream_insert_context(args.path) as stream: for f in args.infile: - printf("Processing %s\n", f.name) + filename = f.name + printf("Processing %s\n", filename) + + # If the filename ends in .gz, open it with gzcat just to be + # nice. + if filename.endswith(".gz"): + p = subprocess.Popen(["gzip", "-dc"], + stdin = f, stdout = subprocess.PIPE) + f = p.stdout # Try to get a real timestamp from the filename try: - clock_ts = parse_time(f.name).totimestamp() + clock_ts = parse_time(filename).totimestamp() printf("Clock time updated to %s\n", format_time(clock_ts)) except ValueError: pass @@ -64,7 +75,7 @@ with client.stream_insert_context(args.path) as stream: for (linenum, line) in enumerate(f): # Any comments? - if line.contains('#'): + if line.find('#') >= 0: (line, comment) = line.split('#', 1) line += '\n' else: @@ -87,9 +98,9 @@ with client.stream_insert_context(args.path) as stream: # Make a timestamp for this line if data_ts is None: if clock_ts is None: - err = "No idea what timestamp to use for the first line" - raise ParseError(f.name, linenum, err) - clock_ts = data_ts + err = "No idea what timestamp to use" + raise ParseError(filename, linenum, err) + data_ts = clock_ts else: data_ts += 1.0 / args.rate @@ -103,7 +114,7 @@ with client.stream_insert_context(args.path) as stream: err = sprintf("Data is coming in too fast: data time is %s " "but clock time is only %s", format_time(data_ts), format_time(clock_ts)) - raise ParserError(f.name, linenum, err) + raise ParserError(filename, linenum, err) if (data_ts + 10) < clock_ts: # Accumulated line timetamps are in the past. We @@ -118,4 +129,17 @@ with client.stream_insert_context(args.path) as stream: # Don't use this clock time anymore until we update it clock_ts = None - stream.insert_line(line) + # Once in a while a line might be truncated, if we're at + # the end of a file. Accept it, but only if we've been + # getting good lines too + if line[-1] != '\n': + if lines_ok > 10: + printf("Ignoring short line at %s:%d\n", filename, linenum) + continue + else: + raise ParserError(filename, linenum, "short line") + lines_ok = 0 + + stream.insert_line(repr(data_ts) + " " + line) + lines_ok += 1 +print "Done"