Compare commits

...

6 Commits

Author SHA1 Message Date
c083d63c96 Tests for Unicode compliance 2013-01-03 17:03:52 -05:00
0221e3ea21 Update commandline test helpers to better handle Unicode
We replace cStringIO with StringIO subclass that forces UTF-8
encoding, and explicitly convert commandlines to UTF-8 before
shlex.  These changes will only affect tests, not normal commandline
operation.
2013-01-03 17:03:52 -05:00
f5fd2b064e Replace urllib.encode() with a version that encodes Unicode as UTF-8 instead 2013-01-03 17:02:38 -05:00
06e91a6a98 Always use function version of print() 2013-01-03 17:02:38 -05:00
41b3f3c018 Always use UTF-8 for filenames in nilmdb.bulkdata 2013-01-03 17:02:38 -05:00
842076fef4 Cleanup server error handling with decorator 2013-01-03 17:02:38 -05:00
9 changed files with 195 additions and 47 deletions

View File

@@ -29,9 +29,18 @@ class BulkData(object):
def close(self): def close(self):
self.getnode.cache_remove_all() self.getnode.cache_remove_all()
def create(self, path, layout_name): def _encode_filename(self, path):
# Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
# because we want to be able to represent all code points and the user
# will never be directly exposed to filenames. We can then do path
# manipulations on the UTF-8 directly.
if isinstance(path, unicode):
return path.encode('utf-8')
return path
def create(self, unicodepath, layout_name):
""" """
path: path to the data (e.g. '/newton/prep'). unicodepath: path to the data (e.g. u'/newton/prep').
Paths must contain at least two elements, e.g.: Paths must contain at least two elements, e.g.:
/newton/prep /newton/prep
/newton/raw /newton/raw
@@ -40,6 +49,8 @@ class BulkData(object):
layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
""" """
path = self._encode_filename(unicodepath)
if path[0] != '/': if path[0] != '/':
raise ValueError("paths must start with /") raise ValueError("paths must start with /")
[ group, node ] = path.rsplit("/", 1) [ group, node ] = path.rsplit("/", 1)
@@ -92,14 +103,15 @@ class BulkData(object):
raise ValueError("error creating table at that path: " + e.strerror) raise ValueError("error creating table at that path: " + e.strerror)
# Open and cache it # Open and cache it
self.getnode(path) self.getnode(unicodepath)
# Success # Success
return return
def destroy(self, path): def destroy(self, unicodepath):
"""Fully remove all data at a particular path. No way to undo """Fully remove all data at a particular path. No way to undo
it! The group/path structure is removed, too.""" it! The group/path structure is removed, too."""
path = self._encode_filename(unicodepath)
# Get OS path # Get OS path
elements = path.lstrip('/').split('/') elements = path.lstrip('/').split('/')
@@ -125,9 +137,10 @@ class BulkData(object):
# Cache open tables # Cache open tables
@nilmdb.utils.lru_cache(size = table_cache_size, @nilmdb.utils.lru_cache(size = table_cache_size,
onremove = lambda x: x.close()) onremove = lambda x: x.close())
def getnode(self, path): def getnode(self, unicodepath):
"""Return a Table object corresponding to the given database """Return a Table object corresponding to the given database
path, which must exist.""" path, which must exist."""
path = self._encode_filename(unicodepath)
elements = path.lstrip('/').split('/') elements = path.lstrip('/').split('/')
ospath = os.path.join(self.root, *elements) ospath = os.path.join(self.root, *elements)
return Table(ospath) return Table(ospath)

View File

@@ -1,4 +1,5 @@
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import print_function
from nilmdb.utils.printf import * from nilmdb.utils.printf import *
import nilmdb.client import nilmdb.client
import sys import sys
@@ -50,7 +51,7 @@ def cmd_extract(self):
# Strip timestamp (first element). Doesn't make sense # Strip timestamp (first element). Doesn't make sense
# if we are only returning a count. # if we are only returning a count.
dataline = ' '.join(dataline.split(' ')[1:]) dataline = ' '.join(dataline.split(' ')[1:])
print dataline print(dataline)
printed = True printed = True
if not printed: if not printed:
if self.args.annotate: if self.args.annotate:

View File

@@ -10,7 +10,6 @@ import re
import os import os
import simplejson as json import simplejson as json
import urlparse import urlparse
import urllib
import pycurl import pycurl
import cStringIO import cStringIO
@@ -59,7 +58,8 @@ class HTTPClient(object):
def _setup_url(self, url = "", params = ""): def _setup_url(self, url = "", params = ""):
url = urlparse.urljoin(self.baseurl, url) url = urlparse.urljoin(self.baseurl, url)
if params: if params:
url = urlparse.urljoin(url, "?" + urllib.urlencode(params, True)) url = urlparse.urljoin(
url, "?" + nilmdb.utils.urllib.urlencode(params, True))
self.curl.setopt(pycurl.URL, url) self.curl.setopt(pycurl.URL, url)
self.url = url self.url = url

View File

@@ -11,6 +11,7 @@ import sys
import time import time
import os import os
import simplejson as json import simplejson as json
import functools
try: try:
import cherrypy import cherrypy
@@ -39,7 +40,6 @@ def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
# Even if chunked responses are disabled, you may still miss miss # Even if chunked responses are disabled, you may still miss miss
# LookupError, or UnicodeError exceptions due to CherryPy bug # LookupError, or UnicodeError exceptions due to CherryPy bug
# #1200. This throws them as generic Exceptions insteads. # #1200. This throws them as generic Exceptions insteads.
import functools
import traceback import traceback
@functools.wraps(func) @functools.wraps(func)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
@@ -51,6 +51,20 @@ def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
traceback.format_exc()) traceback.format_exc())
return wrapper return wrapper
def exception_to_httperror(response = "400 Bad Request"):
"""Return a decorator that catches Exception and throws
a HTTPError describing it instead"""
def decorator(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
try:
return func(*args, **kwargs)
except Exception as e:
message = sprintf("%s: %s", type(e).__name__, str(e))
raise cherrypy.HTTPError(response, message)
return wrapper
return decorator
# CherryPy apps # CherryPy apps
class Root(NilmApp): class Root(NilmApp):
"""Root application for NILM database""" """Root application for NILM database"""
@@ -104,26 +118,20 @@ class Stream(NilmApp):
# /stream/create?path=/newton/prep&layout=PrepData # /stream/create?path=/newton/prep&layout=PrepData
@cherrypy.expose @cherrypy.expose
@cherrypy.tools.json_out() @cherrypy.tools.json_out()
@exception_to_httperror()
def create(self, path, layout): def create(self, path, layout):
"""Create a new stream in the database. Provide path """Create a new stream in the database. Provide path
and one of the nilmdb.layout.layouts keys. and one of the nilmdb.layout.layouts keys.
""" """
try: return self.db.stream_create(path, layout)
return self.db.stream_create(path, layout)
except Exception as e:
message = sprintf("%s: %s", type(e).__name__, e.message)
raise cherrypy.HTTPError("400 Bad Request", message)
# /stream/destroy?path=/newton/prep # /stream/destroy?path=/newton/prep
@cherrypy.expose @cherrypy.expose
@cherrypy.tools.json_out() @cherrypy.tools.json_out()
@exception_to_httperror()
def destroy(self, path): def destroy(self, path):
"""Delete a stream and its associated data.""" """Delete a stream and its associated data."""
try: return self.db.stream_destroy(path)
return self.db.stream_destroy(path)
except Exception as e:
message = sprintf("%s: %s", type(e).__name__, e.message)
raise cherrypy.HTTPError("400 Bad Request", message)
# /stream/get_metadata?path=/newton/prep # /stream/get_metadata?path=/newton/prep
# /stream/get_metadata?path=/newton/prep&key=foo&key=bar # /stream/get_metadata?path=/newton/prep&key=foo&key=bar
@@ -152,30 +160,24 @@ class Stream(NilmApp):
# /stream/set_metadata?path=/newton/prep&data=<json> # /stream/set_metadata?path=/newton/prep&data=<json>
@cherrypy.expose @cherrypy.expose
@cherrypy.tools.json_out() @cherrypy.tools.json_out()
@exception_to_httperror()
def set_metadata(self, path, data): def set_metadata(self, path, data):
"""Set metadata for the named stream, replacing any """Set metadata for the named stream, replacing any
existing metadata. Data should be a json-encoded existing metadata. Data should be a json-encoded
dictionary""" dictionary"""
try: data_dict = json.loads(data)
data_dict = json.loads(data) self.db.stream_set_metadata(path, data_dict)
self.db.stream_set_metadata(path, data_dict)
except Exception as e:
message = sprintf("%s: %s", type(e).__name__, e.message)
raise cherrypy.HTTPError("400 Bad Request", message)
return "ok" return "ok"
# /stream/update_metadata?path=/newton/prep&data=<json> # /stream/update_metadata?path=/newton/prep&data=<json>
@cherrypy.expose @cherrypy.expose
@cherrypy.tools.json_out() @cherrypy.tools.json_out()
@exception_to_httperror()
def update_metadata(self, path, data): def update_metadata(self, path, data):
"""Update metadata for the named stream. Data """Update metadata for the named stream. Data
should be a json-encoded dictionary""" should be a json-encoded dictionary"""
try: data_dict = json.loads(data)
data_dict = json.loads(data) self.db.stream_update_metadata(path, data_dict)
self.db.stream_update_metadata(path, data_dict)
except Exception as e:
message = sprintf("%s: %s", type(e).__name__, e.message)
raise cherrypy.HTTPError("400 Bad Request", message)
return "ok" return "ok"
# /stream/insert?path=/newton/prep # /stream/insert?path=/newton/prep

View File

@@ -6,3 +6,4 @@ from .serializer import Serializer
from .lrucache import lru_cache from .lrucache import lru_cache
from .diskusage import du from .diskusage import du
from .mustclose import must_close from .mustclose import must_close
from .urllib import urlencode

View File

@@ -5,6 +5,7 @@
# with nilmdb.Timer("flush"): # with nilmdb.Timer("flush"):
# foo.flush() # foo.flush()
from __future__ import print_function
import contextlib import contextlib
import time import time
@@ -18,4 +19,4 @@ def Timer(name = None, tosyslog = False):
import syslog import syslog
syslog.syslog(msg) syslog.syslog(msg)
else: else:
print msg print(msg)

68
nilmdb/utils/urllib.py Normal file
View File

@@ -0,0 +1,68 @@
from __future__ import absolute_import
from urllib import quote_plus, _is_unicode
# urllib.urlencode insists on encoding Unicode as ASCII. This is an
# exact copy of that function, except we encode it as UTF-8 instead.
def urlencode(query, doseq=0):
"""Encode a sequence of two-element tuples or dictionary into a URL query string.
If any values in the query arg are sequences and doseq is true, each
sequence element is converted to a separate parameter.
If the query arg is a sequence of two-element tuples, the order of the
parameters in the output will match the order of parameters in the
input.
"""
if hasattr(query,"items"):
# mapping objects
query = query.items()
else:
# it's a bother at times that strings and string-like objects are
# sequences...
try:
# non-sequence items should not work with len()
# non-empty strings will fail this
if len(query) and not isinstance(query[0], tuple):
raise TypeError
# zero-length sequences of all types will get here and succeed,
# but that's a minor nit - since the original implementation
# allowed empty dicts that type of behavior probably should be
# preserved for consistency
except TypeError:
ty,va,tb = sys.exc_info()
raise TypeError, "not a valid non-string sequence or mapping object", tb
l = []
if not doseq:
# preserve old behavior
for k, v in query:
k = quote_plus(str(k))
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
for k, v in query:
k = quote_plus(str(k))
if isinstance(v, str):
v = quote_plus(v)
l.append(k + '=' + v)
elif _is_unicode(v):
# is there a reasonable way to convert to ASCII?
# encode generates a string, but "replace" or "ignore"
# lose information and "strict" can raise UnicodeError
v = quote_plus(v.encode("utf-8","strict"))
l.append(k + '=' + v)
else:
try:
# is this a sufficient test for sequence-ness?
len(v)
except TypeError:
# not a sequence
v = quote_plus(str(v))
l.append(k + '=' + v)
else:
# loop over the sequence
for elt in v:
l.append(k + '=' + quote_plus(str(elt)))
return '&'.join(l)

View File

@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import nilmdb import nilmdb
from nilmdb.utils.printf import * from nilmdb.utils.printf import *
from nilmdb.client import ClientError, ServerError from nilmdb.client import ClientError, ServerError
@@ -82,6 +84,8 @@ class TestClient(object):
# Bad layout type # Bad layout type
with assert_raises(ClientError): with assert_raises(ClientError):
client.stream_create("/newton/prep", "NoSuchLayout") client.stream_create("/newton/prep", "NoSuchLayout")
# Create three streams
client.stream_create("/newton/prep", "PrepData") client.stream_create("/newton/prep", "PrepData")
client.stream_create("/newton/raw", "RawData") client.stream_create("/newton/raw", "RawData")
client.stream_create("/newton/zzz/rawnotch", "RawNotchedData") client.stream_create("/newton/zzz/rawnotch", "RawNotchedData")
@@ -277,3 +281,40 @@ class TestClient(object):
"end": "123" }, retjson=False) "end": "123" }, retjson=False)
if "transfer-encoding: chunked" not in client.http._headers.lower(): if "transfer-encoding: chunked" not in client.http._headers.lower():
warnings.warn("Non-chunked HTTP response for /stream/extract") warnings.warn("Non-chunked HTTP response for /stream/extract")
def test_client_7_unicode(self):
# Basic Unicode tests
client = nilmdb.Client(url = "http://localhost:12380/")
# Delete streams that exist
for stream in client.stream_list():
client.stream_destroy(stream[0])
# Database is empty
eq_(client.stream_list(), [])
# Create Unicode stream, match it
raw = [ u"/düsseldorf/raw", u"uint16_6" ]
prep = [ u"/düsseldorf/prep", u"uint16_6" ]
client.stream_create(*raw)
eq_(client.stream_list(), [raw])
eq_(client.stream_list(layout=raw[1]), [raw])
eq_(client.stream_list(path=raw[0]), [raw])
client.stream_create(*prep)
eq_(client.stream_list(), [prep, raw])
# Set / get metadata with Unicode keys and values
eq_(client.stream_get_metadata(raw[0]), {})
eq_(client.stream_get_metadata(prep[0]), {})
meta1 = { u"alpha": u"α",
u"β": u"beta" }
meta2 = { u"alpha": u"α" }
meta3 = { u"β": u"beta" }
client.stream_set_metadata(prep[0], meta1)
client.stream_update_metadata(prep[0], {})
client.stream_update_metadata(raw[0], meta2)
client.stream_update_metadata(raw[0], meta3)
eq_(client.stream_get_metadata(prep[0]), meta1)
eq_(client.stream_get_metadata(raw[0]), meta1)
eq_(client.stream_get_metadata(raw[0], [ "alpha" ]), meta2)
eq_(client.stream_get_metadata(raw[0], [ "alpha", "β" ]), meta1)

View File

@@ -1,3 +1,5 @@
# -*- coding: utf-8 -*-
import nilmdb import nilmdb
from nilmdb.utils.printf import * from nilmdb.utils.printf import *
import nilmdb.cmdline import nilmdb.cmdline
@@ -13,7 +15,7 @@ import threading
import urllib2 import urllib2
from urllib2 import urlopen, HTTPError from urllib2 import urlopen, HTTPError
import Queue import Queue
import cStringIO import StringIO
import shlex import shlex
from test_helpers import * from test_helpers import *
@@ -45,13 +47,18 @@ def setup_module():
def teardown_module(): def teardown_module():
server_stop() server_stop()
# Add an encoding property to StringIO so Python will convert Unicode
# properly when writing or reading.
class UTF8StringIO(StringIO.StringIO):
encoding = 'utf-8'
class TestCmdline(object): class TestCmdline(object):
def run(self, arg_string, infile=None, outfile=None): def run(self, arg_string, infile=None, outfile=None):
"""Run a cmdline client with the specified argument string, """Run a cmdline client with the specified argument string,
passing the given input. Returns a tuple with the output and passing the given input. Returns a tuple with the output and
exit code""" exit code"""
#print "TZ=UTC ./nilmtool.py " + arg_string # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
class stdio_wrapper: class stdio_wrapper:
def __init__(self, stdin, stdout, stderr): def __init__(self, stdin, stdout, stderr):
self.io = (stdin, stdout, stderr) self.io = (stdin, stdout, stderr)
@@ -62,15 +69,18 @@ class TestCmdline(object):
( sys.stdin, sys.stdout, sys.stderr ) = self.saved ( sys.stdin, sys.stdout, sys.stderr ) = self.saved
# Empty input if none provided # Empty input if none provided
if infile is None: if infile is None:
infile = cStringIO.StringIO("") infile = UTF8StringIO("")
# Capture stderr # Capture stderr
errfile = cStringIO.StringIO() errfile = UTF8StringIO()
if outfile is None: if outfile is None:
# If no output file, capture stdout with stderr # If no output file, capture stdout with stderr
outfile = errfile outfile = errfile
with stdio_wrapper(infile, outfile, errfile) as s: with stdio_wrapper(infile, outfile, errfile) as s:
try: try:
nilmdb.cmdline.Cmdline(shlex.split(arg_string)).run() # shlex doesn't support Unicode very well. Encode the
# string as UTF-8 explicitly before splitting.
args = shlex.split(arg_string.encode('utf-8'))
nilmdb.cmdline.Cmdline(args).run()
sys.exit(0) sys.exit(0)
except SystemExit as e: except SystemExit as e:
exitcode = e.code exitcode = e.code
@@ -298,16 +308,9 @@ class TestCmdline(object):
eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test) eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test)
eq_(cmd.parse_time("20120405 1800 UTC"), test) eq_(cmd.parse_time("20120405 1800 UTC"), test)
eq_(cmd.parse_time("20120405 1400-0400 UTC"), test) eq_(cmd.parse_time("20120405 1400-0400 UTC"), test)
with assert_raises(ValueError): for badtime in [ "20120405 1400-9999", "hello", "-", "", "14:00" ]:
print cmd.parse_time("20120405 1400-9999") with assert_raises(ValueError):
with assert_raises(ValueError): x = cmd.parse_time(badtime)
print cmd.parse_time("hello")
with assert_raises(ValueError):
print cmd.parse_time("-")
with assert_raises(ValueError):
print cmd.parse_time("")
with assert_raises(ValueError):
print cmd.parse_time("14:00")
eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test) eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test)
eq_(cmd.parse_time("prep-20120405T1400"), test) eq_(cmd.parse_time("prep-20120405T1400"), test)
@@ -519,3 +522,21 @@ class TestCmdline(object):
# Make sure it was created empty # Make sure it was created empty
self.ok("list --detail --path " + path) self.ok("list --detail --path " + path)
self.contain("(no intervals)") self.contain("(no intervals)")
def test_cmdline_11_unicode(self):
# Unicode paths.
self.ok("destroy /newton/asdf/qwer")
self.ok("destroy /newton/prep")
self.ok("destroy /newton/raw")
self.ok("destroy /newton/zzz")
self.ok(u"create /düsseldorf/raw uint16_6")
self.ok("list --detail")
self.contain(u"/düsseldorf/raw uint16_6")
self.contain("(no intervals)")
# Unicode metadata
self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ'")
self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
self.ok(u"metadata /düsseldorf/raw")
self.match(u"α=β ε τ α\nγ\n")