Browse Source

Can now insert data using command line tool. Time to benchmark etc.

git-svn-id: https://bucket.mit.edu/svn/nilm/nilmdb@10709 ddd99763-3ecb-0310-9145-efcb8ce7c51f
tags/bxinterval-last
Jim Paris 12 years ago
parent
commit
7dfa288270
7 changed files with 116 additions and 105 deletions
  1. +6
    -6
      nilmdb/client.py
  2. +47
    -38
      nilmdb/cmdline.py
  3. +1
    -1
      nilmdb/nilmdb.py
  4. +11
    -0
      nilmdb/timestamper.py
  5. +1
    -1
      setup.cfg
  6. +39
    -51
      tests/test_cmdline.py
  7. +11
    -8
      tests/test_timestamper.py

+ 6
- 6
nilmdb/client.py View File

@@ -15,7 +15,7 @@ import cStringIO

version = "1.0"

class NilmCommError(Exception):
class Error(Exception):
"""Base exception for both ClientError and ServerError responses"""
def __init__(self,
status = "Unspecified error",
@@ -36,9 +36,9 @@ class NilmCommError(Exception):
if self.traceback: # pragma: no cover
s += sprintf("\nServer traceback:\n%s", self.traceback)
return s
class ClientError(NilmCommError):
class ClientError(Error):
pass
class ServerError(NilmCommError):
class ServerError(Error):
pass

class MyCurl(object):
@@ -87,8 +87,8 @@ class MyCurl(object):
if code >= 500 and code <= 599:
raise ServerError(**args)
else:
raise NilmCommError(**args)
raise Error(**args)
def _reqjson(self, url, params):
"""GET or POST that returns JSON string"""
self._setup_url(url, params)
@@ -124,7 +124,7 @@ class Client(object):
"""Main client interface to the Nilm database."""

client_version = version
def __init__(self, url):
self.curl = MyCurl(url)



+ 47
- 38
nilmdb/cmdline.py View File

@@ -16,7 +16,6 @@ import os
import urlparse
import argparse
import fnmatch
import subprocess

from argparse import ArgumentDefaultsHelpFormatter as def_form

@@ -138,6 +137,11 @@ class Cmdline(object):
try to deduce timestamps from the file.
""")

group.add_argument("-u", "--utc", action="store_true",
help="""
Assume UTC timestamps if not otherwise
specified (default: localtime)
""")
group.add_argument("-r", "--rate", type=float,
help="""
If needed, rate in Hz (default: based on
@@ -158,10 +162,10 @@ class Cmdline(object):
group.add_argument("path",
help="Path of stream, e.g. /foo/bar")
group.add_argument("file", nargs="*", default=['-'],
help="File(s) to insert (default: stdin)")
help="File(s) to insert (default: - (stdin))")

def die(self, formatstr, *args):
fprintf(sys.stderr, formatstr, *args)
fprintf(sys.stderr, formatstr + "\n", *args)
self.client.close()
sys.exit(-1)

@@ -175,8 +179,8 @@ class Cmdline(object):
# Make a test connection to make sure things work
try:
server_version = self.client.version()
except nilmdb.client.NilmCommError as e:
self.die("Error connecting to server: %s\n", str(e))
except nilmdb.client.Error as e:
self.die("Error connecting to server: %s", str(e))

# Now dispatch client request to appropriate function. Parser
# should have ensured that we don't have any unknown commands
@@ -206,7 +210,7 @@ class Cmdline(object):
try:
self.client.stream_create(self.args.path, self.args.layout)
except nilmdb.client.ClientError as e:
self.die("Error creating stream: %s\n", str(e))
self.die("Error creating stream: %s", str(e))

def cmd_metadata(self):
"""Manipulate metadata"""
@@ -224,21 +228,21 @@ class Cmdline(object):
for keyval in keyvals:
kv = keyval.split('=')
if len(kv) != 2 or kv[0] == "":
self.die("Error parsing key=value argument '%s'\n", keyval)
self.die("Error parsing key=value argument '%s'", keyval)
data[kv[0]] = kv[1]

# Make the call
try:
handler(self.args.path, data)
except nilmdb.client.ClientError as e:
self.die("Error setting/updating metadata: %s\n", str(e))
self.die("Error setting/updating metadata: %s", str(e))
else:
# Get (or unspecified)
keys = self.args.get or None
try:
data = self.client.stream_get_metadata(self.args.path, keys)
except nilmdb.client.ClientError as e:
self.die("Error getting metadata: %s\n", str(e))
self.die("Error getting metadata: %s", str(e))
for key, value in sorted(data.items()):
# Omit nonexistant keys
if value is None:
@@ -249,7 +253,7 @@ class Cmdline(object):
# Find requested stream
streams = self.client.stream_list(self.args.path)
if len(streams) != 1:
self.die("Error getting stream info for path %s\n", self.args.path)
self.die("Error getting stream info for path %s", self.args.path)

layout = streams[0][1]

@@ -258,23 +262,12 @@ class Cmdline(object):

for filename in self.args.file:
if filename == '-':
process = None
infile = sys.stdin
else:
if not os.path.exists(filename):
self.die("Error opening input file %s\n", filename)
try:
# zcat is much faster than python's gzopen. We've
# checked that the file existed, so this isn't too
# likely to fail
process = subprocess.Popen(["zcat", "-f", filename],
bufsize = -1,
stdin = open(os.devnull),
stderr = None,
stdout = PIPE)
infile = process.stdout
except OSError: # pragma: no cover
self.die("Error spawning zcat process\n")
infile = open(filename, "r")
except IOError:
self.die("Error opening input file %s", filename)

# Build a timestamper for this file
if self.args.none:
@@ -284,33 +277,49 @@ class Cmdline(object):
if not self.args.rate:
try:
self.args.rate = nilmdb.layout.named[layout].rate_hz
except KeyError:
self.die("Need to specify --rate\n")
except KeyError: # pragma: no cover
self.die("Need to specify --rate")
rate = self.args.rate

if self.args.start:
try:
start = self.parse_time(self.args.start)
start = self.parse_time(self.args.start, self.args.utc)
except ValueError:
self.die("Error parsing start time '%s'\n",
self.die("Error parsing start time '%s'",
self.args.start)
else:
try:
start = self.parse_time(filename)
start = self.parse_time(filename, self.args.utc)
except ValueError:
self.die("Error extracting time from filename '%s'\n",
self.die("Error extracting time from filename '%s'",
filename)

ts = nilmdb.timestamper.TimestamperRate(infile, start, rate)

print "Input file:", filename
print "Timestamper:", ts
print "Start:", start
print "Rate:", rate
# Print info
if not self.args.quiet:
printf("Input file: %s\n", filename)
printf("Timestamper: %s\n", str(ts))

self.die("not implemented")
# Insert the data
try:
result = self.client.stream_insert(self.args.path, ts)
except nilmdb.client.Error as e:
# TODO: It would be nice to be able to offer better errors
# here, particularly in the case of overlap, which just shows
# ugly bracketed ranges of 16-digit numbers and a mangled URL.
# Need to consider adding something like e.prettyprint()
# that is smarter about the contents of the error.
self.die("Error inserting data: %s", str(e))

return

def parse_time(self, toparse, assume_utc = False):
if assume_utc:
tz = datetime_tz.pytz.utc
else:
tz = datetime_tz.localtz()

def parse_time(self, toparse):
# If string doesn't contain at least 6 digits, consider it
# invalid. smartparse might otherwise accept empty strings
# and strings with just separators.
@@ -319,7 +328,7 @@ class Cmdline(object):

# Try to just parse the time as given
try:
return datetime_tz.datetime_tz.smartparse(toparse)
return datetime_tz.datetime_tz.smartparse(toparse, tz)
except ValueError:
pass

@@ -341,7 +350,7 @@ class Cmdline(object):
r")", toparse)
if res is not None:
try:
return datetime_tz.datetime_tz.smartparse(res.group(2))
return datetime_tz.datetime_tz.smartparse(res.group(2), tz)
except ValueError:
pass



+ 1
- 1
nilmdb/nilmdb.py View File

@@ -303,7 +303,7 @@ class NilmDB(object):
if (not parser.min_timestamp or not parser.max_timestamp or
not len(parser.data)):
raise StreamError("no data provided")
# First check for basic overlap using timestamp info from the parser.
stream_id = self._stream_id(path)
iset = self._get_intervals(stream_id)


+ 11
- 0
nilmdb/timestamper.py View File

@@ -78,6 +78,13 @@ class TimestamperRate(Timestamper):
if "totimestamp" in dir(start):
start = start.totimestamp()
Timestamper.__init__(self, file, iterator(start, rate, end))
self.start = start
self.rate = rate
def __str__(self):
start = datetime_tz.datetime_tz.fromtimestamp(self.start)
start = start.strftime("%a, %d %b %Y %H:%M:%S %Z")
return sprintf("TimestamperRate(..., start=\"%s\", rate=%g)",
str(start), self.rate)

class TimestamperNow(Timestamper):
"""Timestamper that uses current time"""
@@ -87,6 +94,8 @@ class TimestamperNow(Timestamper):
now = datetime_tz.datetime_tz.utcnow().totimestamp()
yield sprintf("%.6f ", now)
Timestamper.__init__(self, file, iterator())
def __str__(self):
return "TimestamperNow(...)"

class TimestamperNull(Timestamper):
"""Timestamper that adds nothing to each line"""
@@ -95,3 +104,5 @@ class TimestamperNull(Timestamper):
while True:
yield ""
Timestamper.__init__(self, file, iterator())
def __str__(self):
return "TimestamperNull(...)"

+ 1
- 1
setup.cfg View File

@@ -10,7 +10,7 @@ cover-erase=
##cover-branches= # need nose 1.1.3 for this
stop=
verbosity=2
tests=tests/test_cmdline.py
#tests=tests/test_cmdline.py
#tests=tests/test_layout.py
#tests=tests/test_interval.py
#tests=tests/test_client.py


+ 39
- 51
tests/test_cmdline.py View File

@@ -239,6 +239,8 @@ class TestCmdline(object):
eq_(cmd.parse_time(str(test)), test)
test = datetime_tz.datetime_tz.smartparse("20120405 1400-0400")
eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test)
eq_(cmd.parse_time("20120405 1800", True), test)
eq_(cmd.parse_time("20120405 1400-0400", True), test)
with assert_raises(ValueError):
print cmd.parse_time("20120405 1400-9999")
with assert_raises(ValueError):
@@ -249,6 +251,8 @@ class TestCmdline(object):
print cmd.parse_time("")
with assert_raises(ValueError):
print cmd.parse_time("14:00")
eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test)
eq_(cmd.parse_time("prep-20120405T1400"), test)

def test_cmdline_6_insert(self):
self.ok("insert --help")
@@ -259,55 +263,39 @@ class TestCmdline(object):
self.fail("insert /newton/prep baz qwer")
self.match("Error opening input file baz\n")

self.ok("insert /newton/prep")

#self.fail("insert /newton/nosuchpath")
#self.contain("No stream at path /newton/nosuchpath")

# def test_client_3_insert(self):
# client = nilmdb.Client(url = "http://localhost:12380/")

# datetime_tz.localtz_set("America/New_York")

# testfile = "tests/data/prep-20120323T1000"
# start = datetime_tz.datetime_tz.smartparse("20120323T1000")
# rate = 120

# # First try a nonexistent path
# data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
# with assert_raises(ClientError) as e:
# result = client.stream_insert("/newton/no-such-path", data)
# in_("404 Not Found", str(e.exception))

# # Now try reversed timestamps
# data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
# data = reversed(list(data))
# with assert_raises(ClientError) as e:
# result = client.stream_insert("/newton/prep", data)
# in_("400 Bad Request", str(e.exception))
# in_("timestamp is not monotonically increasing", str(e.exception))

# # Now try empty data (no server request made)
# empty = cStringIO.StringIO("")
# data = nilmdb.timestamper.TimestamperRate(empty, start, 120)
# result = client.stream_insert("/newton/prep", data)
# eq_(result, None)

# # Try forcing a server request with empty data
# with assert_raises(ClientError) as e:
# client.curl.putjson("stream/insert", "", { "path": "/newton/prep" })
# in_("400 Bad Request", str(e.exception))
# in_("no data provided", str(e.exception))

# # Now do the real load
# data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
# result = client.stream_insert("/newton/prep", data)
# eq_(result, "ok")

# # Try some overlapping data -- just insert it again
# data = nilmdb.timestamper.TimestamperRate(testfile, start, 120)
# with assert_raises(ClientError) as e:
# result = client.stream_insert("/newton/prep", data)
# in_("400 Bad Request", str(e.exception))
# in_("OverlapError", str(e.exception))
self.fail("insert /newton/prep")
self.contain("Error extracting time")

self.fail("insert --start 1234 /newton/prep 1 2 3 4")
self.contain("--start can only be used with one input file")

# insert pre-timestamped data, from stdin
with open("tests/data/prep-20120323T1004-timestamped") as input:
self.ok("insert --none /newton/prep", input)

# insert data with normal timestamper from filename
self.ok("insert -u /newton/prep "
"tests/data/prep-20120323T1000 "
"tests/data/prep-20120323T1002")

# overlap
self.fail("insert --utc /newton/prep "
"tests/data/prep-20120323T1004")
self.contain("overlap")

# not an overlap if we specify a different start
self.fail("insert --start '03/23/2012 06:05:00' /newton/prep "
"tests/data/prep-20120323T1004")
self.contain("overlap")

# wrong format
self.fail("insert --utc /newton/raw "
"tests/data/prep-20120323T1004")
self.contain("Error parsing input data")

# empty data does nothing
self.ok("insert --start '03/23/2012 06:05:00' /newton/prep "
"/dev/null")

# bad start time
self.fail("insert --start 'whatever' /newton/prep /dev/null")

+ 11
- 8
tests/test_timestamper.py View File

@@ -9,18 +9,12 @@ import os
import sys
import cStringIO

def eq_(a, b):
if not a == b:
raise AssertionError("%r != %r" % (a, b))

def ne_(a, b):
if not a != b:
raise AssertionError("unexpected %r == %r" % (a, b))
from test_helpers import *

class TestTimestamper(object):

# Not a very comprehensive test, but it's good enough.

def test_timestamper(self):
def join(list):
return "\n".join(list) + "\n"
@@ -36,6 +30,7 @@ class TestTimestamper(object):
ts = nilmdb.timestamper.TimestamperRate(input, start, 8000)
foo = ts.readlines()
eq_(foo, join(lines_out))
in_("TimestamperRate(..., start=", str(ts))

# first 30 or so bytes means the first 2 lines
input = cStringIO.StringIO(join(lines_in))
@@ -80,9 +75,17 @@ class TestTimestamper(object):
foo = ts.readlines()
ne_(foo, join(lines_out))
eq_(len(foo), len(join(lines_out)))
eq_(str(ts), "TimestamperNow(...)")

# Test passing a file (should be empty)
ts = nilmdb.timestamper.TimestamperNow("/dev/null")
for line in ts:
raise AssertionError
ts.close()

# Test the null timestamper
input = cStringIO.StringIO(join(lines_out)) # note: lines_out
ts = nilmdb.timestamper.TimestamperNull(input)
foo = ts.readlines()
eq_(foo, join(lines_out))
eq_(str(ts), "TimestamperNull(...)")

Loading…
Cancel
Save