Tests for Unicode compliance

Update commandline test helpers to better handle Unicode
We replace cStringIO with StringIO subclass that forces UTF-8 encoding, and explicitly convert commandlines to UTF-8 before shlex. These changes will only affect tests, not normal commandline operation.
2013-01-03 17:03:52 -05:00 · 2013-01-03 17:03:52 -05:00 · 2013-01-03 17:02:38 -05:00 · 2013-01-03 17:02:38 -05:00 · 2013-01-03 17:02:38 -05:00 · 2013-01-03 17:02:38 -05:00
9 changed files with 195 additions and 47 deletions
--- a/nilmdb/bulkdata.py
+++ b/nilmdb/bulkdata.py
@@ -29,9 +29,18 @@ class BulkData(object):
    def close(self):
        self.getnode.cache_remove_all()
-    def create(self, path, layout_name):
+    def _encode_filename(self, path):
        # Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
        # because we want to be able to represent all code points and the user
        # will never be directly exposed to filenames.  We can then do path
        # manipulations on the UTF-8 directly.
        if isinstance(path, unicode):
            return path.encode('utf-8')
        return path
    def create(self, unicodepath, layout_name):
        """
-        path: path to the data (e.g. '/newton/prep').
+        unicodepath: path to the data (e.g. u'/newton/prep').
        Paths must contain at least two elements, e.g.:
           /newton/prep
           /newton/raw
@@ -40,6 +49,8 @@ class BulkData(object):
        layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8'
        """
        path = self._encode_filename(unicodepath)
        if path[0] != '/':
            raise ValueError("paths must start with /")
        [ group, node ] = path.rsplit("/", 1)
@@ -92,14 +103,15 @@ class BulkData(object):
            raise ValueError("error creating table at that path: " + e.strerror)
        # Open and cache it
-        self.getnode(path)
+        self.getnode(unicodepath)
        # Success
        return
-    def destroy(self, path):
+    def destroy(self, unicodepath):
        """Fully remove all data at a particular path.  No way to undo
        it!  The group/path structure is removed, too."""
        path = self._encode_filename(unicodepath)
        # Get OS path
        elements = path.lstrip('/').split('/')
@@ -125,9 +137,10 @@ class BulkData(object):
    # Cache open tables
    @nilmdb.utils.lru_cache(size = table_cache_size,
                            onremove = lambda x: x.close())
-    def getnode(self, path):
+    def getnode(self, unicodepath):
        """Return a Table object corresponding to the given database
        path, which must exist."""
        path = self._encode_filename(unicodepath)
        elements = path.lstrip('/').split('/')
        ospath = os.path.join(self.root, *elements)
        return Table(ospath)
--- a/nilmdb/cmdline/extract.py
+++ b/nilmdb/cmdline/extract.py
@@ -1,4 +1,5 @@
 from __future__ import absolute_import
 from __future__ import print_function
 from nilmdb.utils.printf import *
 import nilmdb.client
 import sys
@@ -50,7 +51,7 @@ def cmd_extract(self):
            # Strip timestamp (first element).  Doesn't make sense
            # if we are only returning a count.
            dataline = ' '.join(dataline.split(' ')[1:])
-        print dataline
+        print(dataline)
        printed = True
    if not printed:
        if self.args.annotate:
--- a/nilmdb/httpclient.py
+++ b/nilmdb/httpclient.py
@@ -10,7 +10,6 @@ import re
 import os
 import simplejson as json
 import urlparse
 import urllib
 import pycurl
 import cStringIO
@@ -59,7 +58,8 @@ class HTTPClient(object):
    def _setup_url(self, url = "", params = ""):
        url = urlparse.urljoin(self.baseurl, url)
        if params:
-            url = urlparse.urljoin(url, "?" + urllib.urlencode(params, True))
+            url = urlparse.urljoin(
                url, "?" + nilmdb.utils.urllib.urlencode(params, True))
        self.curl.setopt(pycurl.URL, url)
        self.url = url
--- a/nilmdb/server.py
+++ b/nilmdb/server.py
@@ -11,6 +11,7 @@ import sys
 import time
 import os
 import simplejson as json
 import functools
 try:
    import cherrypy
@@ -39,7 +40,6 @@ def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
    # Even if chunked responses are disabled, you may still miss miss
    # LookupError, or UnicodeError exceptions due to CherryPy bug
    # #1200.  This throws them as generic Exceptions insteads.
    import functools
    import traceback
    @functools.wraps(func)
    def wrapper(*args, **kwargs):
@@ -51,6 +51,20 @@ def workaround_cp_bug_1200(func): # pragma: no cover (just a workaround)
                            traceback.format_exc())
    return wrapper
 def exception_to_httperror(response = "400 Bad Request"):
    """Return a decorator that catches Exception and throws
    a HTTPError describing it instead"""
    def decorator(func):
        @functools.wraps(func)
        def wrapper(*args, **kwargs):
            try:
                return func(*args, **kwargs)
            except Exception as e:
                message = sprintf("%s: %s", type(e).__name__, str(e))
                raise cherrypy.HTTPError(response, message)
        return wrapper
    return decorator
 # CherryPy apps
 class Root(NilmApp):
    """Root application for NILM database"""
@@ -104,26 +118,20 @@ class Stream(NilmApp):
    # /stream/create?path=/newton/prep&layout=PrepData
    @cherrypy.expose
    @cherrypy.tools.json_out()
    @exception_to_httperror()
    def create(self, path, layout):
        """Create a new stream in the database.  Provide path
        and one of the nilmdb.layout.layouts keys.
        """
-        try:
+        return self.db.stream_create(path, layout)
            return self.db.stream_create(path, layout)
        except Exception as e:
            message = sprintf("%s: %s", type(e).__name__, e.message)
            raise cherrypy.HTTPError("400 Bad Request", message)
    # /stream/destroy?path=/newton/prep
    @cherrypy.expose
    @cherrypy.tools.json_out()
    @exception_to_httperror()
    def destroy(self, path):
        """Delete a stream and its associated data."""
-        try:
+        return self.db.stream_destroy(path)
            return self.db.stream_destroy(path)
        except Exception as e:
            message = sprintf("%s: %s", type(e).__name__, e.message)
            raise cherrypy.HTTPError("400 Bad Request", message)
    # /stream/get_metadata?path=/newton/prep
    # /stream/get_metadata?path=/newton/prep&key=foo&key=bar
@@ -152,30 +160,24 @@ class Stream(NilmApp):
    # /stream/set_metadata?path=/newton/prep&data=<json>
    @cherrypy.expose
    @cherrypy.tools.json_out()
    @exception_to_httperror()
    def set_metadata(self, path, data):
        """Set metadata for the named stream, replacing any
        existing metadata.  Data should be a json-encoded
        dictionary"""
-        try:
+        data_dict = json.loads(data)
-            data_dict = json.loads(data)
+        self.db.stream_set_metadata(path, data_dict)
            self.db.stream_set_metadata(path, data_dict)
        except Exception as e:
            message = sprintf("%s: %s", type(e).__name__, e.message)
            raise cherrypy.HTTPError("400 Bad Request", message)
        return "ok"
    # /stream/update_metadata?path=/newton/prep&data=<json>
    @cherrypy.expose
    @cherrypy.tools.json_out()
    @exception_to_httperror()
    def update_metadata(self, path, data):
        """Update metadata for the named stream.  Data
        should be a json-encoded dictionary"""
-        try:
+        data_dict = json.loads(data)
-            data_dict = json.loads(data)
+        self.db.stream_update_metadata(path, data_dict)
            self.db.stream_update_metadata(path, data_dict)
        except Exception as e:
            message = sprintf("%s: %s", type(e).__name__, e.message)
            raise cherrypy.HTTPError("400 Bad Request", message)
        return "ok"
    # /stream/insert?path=/newton/prep
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -6,3 +6,4 @@ from .serializer import Serializer
 from .lrucache import lru_cache
 from .diskusage import du
 from .mustclose import must_close
 from .urllib import urlencode
--- a/nilmdb/utils/timer.py
+++ b/nilmdb/utils/timer.py
@@ -5,6 +5,7 @@
 #   with nilmdb.Timer("flush"):
 #       foo.flush()
 from __future__ import print_function
 import contextlib
 import time
@@ -18,4 +19,4 @@ def Timer(name = None, tosyslog = False):
        import syslog
        syslog.syslog(msg)
    else:
-        print msg
+        print(msg)
--- a/nilmdb/utils/urllib.py
+++ b/nilmdb/utils/urllib.py
@@ -0,0 +1,68 @@
 from __future__ import absolute_import
 from urllib import quote_plus, _is_unicode
 # urllib.urlencode insists on encoding Unicode as ASCII.  This is an
 # exact copy of that function, except we encode it as UTF-8 instead.
 def urlencode(query, doseq=0):
    """Encode a sequence of two-element tuples or dictionary into a URL query string.
    If any values in the query arg are sequences and doseq is true, each
    sequence element is converted to a separate parameter.
    If the query arg is a sequence of two-element tuples, the order of the
    parameters in the output will match the order of parameters in the
    input.
    """
    if hasattr(query,"items"):
        # mapping objects
        query = query.items()
    else:
        # it's a bother at times that strings and string-like objects are
        # sequences...
        try:
            # non-sequence items should not work with len()
            # non-empty strings will fail this
            if len(query) and not isinstance(query[0], tuple):
                raise TypeError
            # zero-length sequences of all types will get here and succeed,
            # but that's a minor nit - since the original implementation
            # allowed empty dicts that type of behavior probably should be
            # preserved for consistency
        except TypeError:
            ty,va,tb = sys.exc_info()
            raise TypeError, "not a valid non-string sequence or mapping object", tb
    l = []
    if not doseq:
        # preserve old behavior
        for k, v in query:
            k = quote_plus(str(k))
            v = quote_plus(str(v))
            l.append(k + '=' + v)
    else:
        for k, v in query:
            k = quote_plus(str(k))
            if isinstance(v, str):
                v = quote_plus(v)
                l.append(k + '=' + v)
            elif _is_unicode(v):
                # is there a reasonable way to convert to ASCII?
                # encode generates a string, but "replace" or "ignore"
                # lose information and "strict" can raise UnicodeError
                v = quote_plus(v.encode("utf-8","strict"))
                l.append(k + '=' + v)
            else:
                try:
                    # is this a sufficient test for sequence-ness?
                    len(v)
                except TypeError:
                    # not a sequence
                    v = quote_plus(str(v))
                    l.append(k + '=' + v)
                else:
                    # loop over the sequence
                    for elt in v:
                        l.append(k + '=' + quote_plus(str(elt)))
    return '&'.join(l)
--- a/tests/test_client.py
+++ b/tests/test_client.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
 import nilmdb
 from nilmdb.utils.printf import *
 from nilmdb.client import ClientError, ServerError
@@ -82,6 +84,8 @@ class TestClient(object):
        # Bad layout type
        with assert_raises(ClientError):
            client.stream_create("/newton/prep", "NoSuchLayout")
        # Create three streams
        client.stream_create("/newton/prep", "PrepData")
        client.stream_create("/newton/raw", "RawData")
        client.stream_create("/newton/zzz/rawnotch", "RawNotchedData")
@@ -277,3 +281,40 @@ class TestClient(object):
                              "end": "123" }, retjson=False)
        if "transfer-encoding: chunked" not in client.http._headers.lower():
            warnings.warn("Non-chunked HTTP response for /stream/extract")
    def test_client_7_unicode(self):
        # Basic Unicode tests
        client = nilmdb.Client(url = "http://localhost:12380/")
        # Delete streams that exist
        for stream in client.stream_list():
            client.stream_destroy(stream[0])
        # Database is empty
        eq_(client.stream_list(), [])
        # Create Unicode stream, match it
        raw = [ u"/düsseldorf/raw", u"uint16_6" ]
        prep = [ u"/düsseldorf/prep", u"uint16_6" ]
        client.stream_create(*raw)
        eq_(client.stream_list(), [raw])
        eq_(client.stream_list(layout=raw[1]), [raw])
        eq_(client.stream_list(path=raw[0]), [raw])
        client.stream_create(*prep)
        eq_(client.stream_list(), [prep, raw])
        # Set / get metadata with Unicode keys and values
        eq_(client.stream_get_metadata(raw[0]), {})
        eq_(client.stream_get_metadata(prep[0]), {})
        meta1 = { u"alpha": u"α",
                  u"β": u"beta" }
        meta2 = { u"alpha": u"α" }
        meta3 = { u"β": u"beta" }
        client.stream_set_metadata(prep[0], meta1)
        client.stream_update_metadata(prep[0], {})
        client.stream_update_metadata(raw[0], meta2)
        client.stream_update_metadata(raw[0], meta3)
        eq_(client.stream_get_metadata(prep[0]), meta1)
        eq_(client.stream_get_metadata(raw[0]), meta1)
        eq_(client.stream_get_metadata(raw[0], [ "alpha" ]), meta2)
        eq_(client.stream_get_metadata(raw[0], [ "alpha", "β" ]), meta1)
--- a/tests/test_cmdline.py
+++ b/tests/test_cmdline.py
@@ -1,3 +1,5 @@
 # -*- coding: utf-8 -*-
 import nilmdb
 from nilmdb.utils.printf import *
 import nilmdb.cmdline
@@ -13,7 +15,7 @@ import threading
 import urllib2
 from urllib2 import urlopen, HTTPError
 import Queue
-import cStringIO
+import StringIO
 import shlex
 from test_helpers import *
@@ -45,13 +47,18 @@ def setup_module():
 def teardown_module():
    server_stop()
 # Add an encoding property to StringIO so Python will convert Unicode
 # properly when writing or reading.
 class UTF8StringIO(StringIO.StringIO):
    encoding = 'utf-8'
 class TestCmdline(object):
    def run(self, arg_string, infile=None, outfile=None):
        """Run a cmdline client with the specified argument string,
        passing the given input.  Returns a tuple with the output and
        exit code"""
-        #print "TZ=UTC ./nilmtool.py " + arg_string
+        # printf("TZ=UTC ./nilmtool.py %s\n", arg_string)
        class stdio_wrapper:
            def __init__(self, stdin, stdout, stderr):
                self.io = (stdin, stdout, stderr)
@@ -62,15 +69,18 @@ class TestCmdline(object):
                ( sys.stdin, sys.stdout, sys.stderr ) = self.saved
        # Empty input if none provided
        if infile is None:
-            infile = cStringIO.StringIO("")
+            infile = UTF8StringIO("")
        # Capture stderr
-        errfile = cStringIO.StringIO()
+        errfile = UTF8StringIO()
        if outfile is None:
            # If no output file, capture stdout with stderr
            outfile = errfile
        with stdio_wrapper(infile, outfile, errfile) as s:
            try:
-                nilmdb.cmdline.Cmdline(shlex.split(arg_string)).run()
+                # shlex doesn't support Unicode very well.  Encode the
                # string as UTF-8 explicitly before splitting.
                args = shlex.split(arg_string.encode('utf-8'))
                nilmdb.cmdline.Cmdline(args).run()
                sys.exit(0)
            except SystemExit as e:
                exitcode = e.code
@@ -298,16 +308,9 @@ class TestCmdline(object):
        eq_(cmd.parse_time("hi there 20120405 1400-0400 testing! 123"), test)
        eq_(cmd.parse_time("20120405 1800 UTC"), test)
        eq_(cmd.parse_time("20120405 1400-0400 UTC"), test)
-        with assert_raises(ValueError):
+        for badtime in [ "20120405 1400-9999", "hello", "-", "", "14:00" ]:
-            print cmd.parse_time("20120405 1400-9999")
+            with assert_raises(ValueError):
-        with assert_raises(ValueError):
+                x = cmd.parse_time(badtime)
            print cmd.parse_time("hello")
        with assert_raises(ValueError):
            print cmd.parse_time("-")
        with assert_raises(ValueError):
            print cmd.parse_time("")
        with assert_raises(ValueError):
            print cmd.parse_time("14:00")
        eq_(cmd.parse_time("snapshot-20120405-140000.raw.gz"), test)
        eq_(cmd.parse_time("prep-20120405T1400"), test)
@@ -519,3 +522,21 @@ class TestCmdline(object):
            # Make sure it was created empty
            self.ok("list --detail --path " + path)
            self.contain("(no intervals)")
    def test_cmdline_11_unicode(self):
        # Unicode paths.
        self.ok("destroy /newton/asdf/qwer")
        self.ok("destroy /newton/prep")
        self.ok("destroy /newton/raw")
        self.ok("destroy /newton/zzz")
        self.ok(u"create /düsseldorf/raw uint16_6")
        self.ok("list --detail")
        self.contain(u"/düsseldorf/raw uint16_6")
        self.contain("(no intervals)")
        # Unicode metadata
        self.ok(u"metadata /düsseldorf/raw --set α=beta 'γ=δ'")
        self.ok(u"metadata /düsseldorf/raw --update 'α=β ε τ α'")
        self.ok(u"metadata /düsseldorf/raw")
        self.match(u"α=β ε τ α\nγ=δ\n")
Author	SHA1	Message	Date
Jim Paris	c083d63c96	Tests for Unicode compliance	2013-01-03 17:03:52 -05:00
Jim Paris	0221e3ea21	Update commandline test helpers to better handle Unicode We replace cStringIO with StringIO subclass that forces UTF-8 encoding, and explicitly convert commandlines to UTF-8 before shlex. These changes will only affect tests, not normal commandline operation.	2013-01-03 17:03:52 -05:00
Jim Paris	f5fd2b064e	Replace urllib.encode() with a version that encodes Unicode as UTF-8 instead	2013-01-03 17:02:38 -05:00
Jim Paris	06e91a6a98	Always use function version of print()	2013-01-03 17:02:38 -05:00
Jim Paris	41b3f3c018	Always use UTF-8 for filenames in nilmdb.bulkdata	2013-01-03 17:02:38 -05:00
Jim Paris	842076fef4	Cleanup server error handling with decorator	2013-01-03 17:02:38 -05:00