Browse Source

Merge branch 'py3'

tags/nilmdb-2.0.0
Jim Paris 3 years ago
parent
commit
6821b2a97b
70 changed files with 3419 additions and 2475 deletions
  1. +3
    -2
      .coveragerc
  2. +1
    -4
      .gitignore
  3. +29
    -0
      MANIFEST.in
  4. +12
    -12
      Makefile
  5. +10
    -12
      README.txt
  6. +30
    -1
      docs/design.md
  7. +2
    -2
      extras/fix-oversize-files.py
  8. +4
    -0
      nilmdb/__init__.py
  9. +472
    -149
      nilmdb/_version.py
  10. +11
    -11
      nilmdb/client/client.py
  11. +4
    -4
      nilmdb/client/errors.py
  12. +14
    -16
      nilmdb/client/httpclient.py
  13. +11
    -7
      nilmdb/client/numpyclient.py
  14. +9
    -24
      nilmdb/cmdline/cmdline.py
  15. +6
    -7
      nilmdb/cmdline/extract.py
  16. +2
    -2
      nilmdb/cmdline/insert.py
  17. +5
    -7
      nilmdb/cmdline/metadata.py
  18. +1
    -1
      nilmdb/fsck/__init__.py
  19. +8
    -6
      nilmdb/fsck/fsck.py
  20. +3
    -2
      nilmdb/scripts/nilmdb_fsck.py
  21. +35
    -29
      nilmdb/scripts/nilmdb_server.py
  22. +3
    -15
      nilmdb/server/__init__.py
  23. +63
    -59
      nilmdb/server/bulkdata.py
  24. +2
    -2
      nilmdb/server/errors.py
  25. +15
    -3
      nilmdb/server/interval.pyx
  26. +18
    -26
      nilmdb/server/nilmdb.py
  27. +2
    -0
      nilmdb/server/rbtree.pxd
  28. +1
    -0
      nilmdb/server/rbtree.pyx
  29. +30
    -20
      nilmdb/server/rocket.c
  30. +30
    -37
      nilmdb/server/server.py
  31. +45
    -60
      nilmdb/server/serverutil.py
  32. +1
    -2
      nilmdb/utils/__init__.py
  33. +3
    -11
      nilmdb/utils/atomic.py
  34. +0
    -710
      nilmdb/utils/datetime_tz/__init__.py
  35. +0
    -230
      nilmdb/utils/datetime_tz/pytz_abbr.py
  36. +4
    -5
      nilmdb/utils/diskusage.py
  37. +7
    -37
      nilmdb/utils/fallocate.py
  38. +14
    -4
      nilmdb/utils/interval.py
  39. +1
    -1
      nilmdb/utils/iterator.py
  40. +16
    -29
      nilmdb/utils/lock.py
  41. +4
    -2
      nilmdb/utils/lrucache.py
  42. +15
    -10
      nilmdb/utils/mustclose.py
  43. +1
    -1
      nilmdb/utils/printf.py
  44. +28
    -20
      nilmdb/utils/serializer.py
  45. +12
    -24
      nilmdb/utils/threadsafety.py
  46. +6
    -3
      nilmdb/utils/time.py
  47. +3
    -3
      nilmdb/utils/timer.py
  48. +13
    -13
      nilmdb/utils/timestamper.py
  49. +0
    -29
      nilmdb/utils/unicode.py
  50. +15
    -0
      requirements.txt
  51. +8
    -2
      setup.cfg
  52. +16
    -84
      setup.py
  53. +1
    -1
      tests/data/prep-20120323T1000
  54. +2
    -0
      tests/test.order
  55. +59
    -20
      tests/test_bulkdata.py
  56. +140
    -84
      tests/test_client.py
  57. +128
    -31
      tests/test_cmdline.py
  58. +22
    -9
      tests/test_interval.py
  59. +139
    -0
      tests/test_misc.py
  60. +10
    -3
      tests/test_mustclose.py
  61. +94
    -35
      tests/test_nilmdb.py
  62. +11
    -8
      tests/test_numpyclient.py
  63. +1
    -1
      tests/test_printf.py
  64. +5
    -5
      tests/test_rbtree.py
  65. +20
    -2
      tests/test_serializer.py
  66. +8
    -8
      tests/test_threadsafety.py
  67. +17
    -17
      tests/test_timestamper.py
  68. +36
    -0
      tests/test_wsgi.py
  69. +1
    -1
      tests/testutil/helpers.py
  70. +1677
    -510
      versioneer.py

+ 3
- 2
.coveragerc View File

@@ -1,10 +1,11 @@
# -*- conf -*-

[run]
# branch = True
branch = True

[report]
exclude_lines =
pragma: no cover
if 0:
omit = nilmdb/utils/datetime_tz*,nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck
omit = nilmdb/scripts,nilmdb/_version.py,nilmdb/fsck
show_missing = True

+ 1
- 4
.gitignore View File

@@ -15,10 +15,7 @@ nilmdb/server/rbtree.c
# Setup junk
dist/
nilmdb.egg-info/

# This gets generated as needed by setup.py
MANIFEST.in
MANIFEST
venv/

# Misc
timeit*out


+ 29
- 0
MANIFEST.in View File

@@ -0,0 +1,29 @@
# Root
include README.txt
include setup.cfg
include setup.py
include versioneer.py
include Makefile
include .coveragerc
include .pylintrc
include requirements.txt

# Cython files -- include .pyx source, but not the generated .c files
# (Downstream systems must have cython installed in order to build)
recursive-include nilmdb/server *.pyx *.pyxdep *.pxd
exclude nilmdb/server/interval.c
exclude nilmdb/server/rbtree.c

# Version
include nilmdb/_version.py

# Tests
recursive-include tests *.py
recursive-include tests/data *
include tests/test.order

# Docs
recursive-include docs Makefile *.md

# Extras
recursive-include extras *

+ 12
- 12
Makefile View File

@@ -2,42 +2,42 @@
all: test

version:
python setup.py version
python3 setup.py version

build:
python setup.py build_ext --inplace
python3 setup.py build_ext --inplace

dist: sdist
sdist:
python setup.py sdist
python3 setup.py sdist

install:
python setup.py install
python3 setup.py install

develop:
python setup.py develop
python3 setup.py develop

docs:
make -C docs

lint:
pylint --rcfile=.pylintrc nilmdb
pylint3 --rcfile=.pylintrc nilmdb

test:
ifeq ($(INSIDE_EMACS), t)
ifneq ($(INSIDE_EMACS),)
# Use the slightly more flexible script
python setup.py build_ext --inplace
python tests/runtests.py
python3 setup.py build_ext --inplace
python3 tests/runtests.py
else
# Let setup.py check dependencies, build stuff, and run the test
python setup.py nosetests
python3 setup.py nosetests
endif

clean::
find . -name '*pyc' | xargs rm -f
find . -name '*.pyc' -o -name '__pycache__' -print0 | xargs -0 rm -rf
rm -f .coverage
rm -rf tests/*testdb*
rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so MANIFEST.in
rm -rf nilmdb.egg-info/ build/ nilmdb/server/*.so
make -C docs clean

gitclean::


+ 10
- 12
README.txt View File

@@ -4,25 +4,23 @@ by Jim Paris <jim@jtan.com>
Prerequisites:

# Runtime and build environments
sudo apt-get install python2.7 python2.7-dev python-setuptools cython
sudo apt install python3.7 python3.7-dev python3.7-venv python3-pip

# Base NilmDB dependencies
sudo apt-get install python-cherrypy3 python-decorator python-simplejson
sudo apt-get install python-requests python-dateutil python-tz
sudo apt-get install python-progressbar python-psutil
# Optional: create a new Python virtual environment to isolate
# dependencies. To leave the virtual environment, run "deactivate"
python -m venv venv
source venv/bin/activate

# Other dependencies (required by some modules)
sudo apt-get install python-numpy

# Tools for running tests
sudo apt-get install python-nose python-coverage
# Install all Python dependencies from PyPI.
pip3 install -r requirements.txt

Test:
python setup.py nosetests

python3 setup.py nosetests

Install:

python setup.py install
sudo python3 setup.py install

Usage:



+ 30
- 1
docs/design.md View File

@@ -430,7 +430,7 @@ mod_wsgi requires "WSGIChunkedRequest On" to handle
"Transfer-encoding: Chunked" requests. However, `/stream/insert`
doesn't handle this correctly right now, because:

- The `cherrpy.request.body.read()` call needs to be fixed for chunked requests
- The `cherrypy.request.body.read()` call needs to be fixed for chunked requests

- We don't want to just buffer endlessly in the server, and it will
require some thought on how to handle data in chunks (what to do about
@@ -438,3 +438,32 @@ doesn't handle this correctly right now, because:

It is probably better to just keep the endpoint management on the client
side, so leave "WSGIChunkedRequest off" for now.


Unicode & character encoding
----------------------------

Stream data is passed back and forth as raw `bytes` objects in most
places, including the `nilmdb.client` and command-line interfaces.
This is done partially for performance reasons, and partially to
support the binary insert/extract options, where character-set encoding
would not apply.

For the HTTP server, the raw bytes transferred over HTTP are interpreted
as follows:
- For `/stream/insert`, the client-provided `Content-Type` is ignored,
and the data is read as if it were `application/octet-stream`.
- For `/stream/extract`, the returned data is `application/octet-stream`.
- All other endpoints communicate via JSON, which is specified to always
be encoded as UTF-8. This includes:
- `/version`
- `/dbinfo`
- `/stream/list`
- `/stream/create`
- `/stream/destroy`
- `/stream/rename`
- `/stream/get_metadata`
- `/stream/set_metadata`
- `/stream/update_metadata`
- `/stream/remove`
- `/stream/intervals`

+ 2
- 2
extras/fix-oversize-files.py View File

@@ -2,7 +2,7 @@

import os
import sys
import cPickle as pickle
import pickle
import argparse
import fcntl
import re
@@ -44,7 +44,7 @@ with open(lock, "w") as f:
maxsize = fix[fixpath]
if size > maxsize:
diff = size - maxsize
print diff, "too big:", fn
print(diff, "too big:", fn)
if args.yes:
with open(fn, "a+") as dbfile:
dbfile.truncate(maxsize)

+ 4
- 0
nilmdb/__init__.py View File

@@ -8,3 +8,7 @@
from nilmdb._version import get_versions
__version__ = get_versions()['version']
del get_versions

from ._version import get_versions
__version__ = get_versions()['version']
del get_versions

+ 472
- 149
nilmdb/_version.py View File

@@ -1,197 +1,520 @@

IN_LONG_VERSION_PY = True
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (build by setup.py sdist) and build
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.

# This file is released into the public domain. Generated by
# versioneer-0.7+ (https://github.com/warner/python-versioneer)

# these strings will be replaced by git during git-archive
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
# versioneer-0.18 (https://github.com/warner/python-versioneer)

"""Git implementation of _version.py."""

import errno
import os
import re
import subprocess
import sys

def run_command(args, cwd=None, verbose=False):
try:
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen(args, stdout=subprocess.PIPE, cwd=cwd)
except EnvironmentError:
e = sys.exc_info()[1]

def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = "$Format:%d$"
git_full = "$Format:%H$"
git_date = "$Format:%ci$"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords


class VersioneerConfig:
"""Container for Versioneer configuration parameters."""


def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440"
cfg.tag_prefix = "nilmdb-"
cfg.parentdir_prefix = "nilmdb-"
cfg.versionfile_source = "nilmdb/_version.py"
cfg.verbose = False
return cfg


class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""


LONG_VERSION_PY = {}
HANDLERS = {}


def register_vcs_handler(vcs, method): # decorator
"""Decorator to mark a method as the handler for a particular VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate


def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
p = None
for c in commands:
try:
dispcmd = str([c] + args)
# remember shell=False, so use git.cmd on windows, not just git
p = subprocess.Popen([c] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None))
break
except EnvironmentError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to run %s" % args[0])
print(e)
return None
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = p.communicate()[0].strip()
if sys.version >= '3':
if sys.version_info[0] >= 3:
stdout = stdout.decode()
if p.returncode != 0:
if verbose:
print("unable to run %s (error)" % args[0])
return None
return stdout
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, p.returncode
return stdout, p.returncode


import sys
import re
import os.path
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.

Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []

for i in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
else:
rootdirs.append(root)
root = os.path.dirname(root) # up a level

if verbose:
print("Tried directories %s but none started with prefix %s" %
(str(rootdirs), parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")


def get_expanded_variables(versionfile_source):
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# variables. When used from setup.py, we don't want to import
# _version.py, so we do it with a regexp instead. This function is not
# used from _version.py.
variables = {}
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
for line in open(versionfile_source,"r").readlines():
f = open(versionfile_abs, "r")
for line in f.readlines():
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
variables["refnames"] = mo.group(1)
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
variables["full"] = mo.group(1)
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
f.close()
except EnvironmentError:
pass
return variables
return keywords

def versions_from_expanded_variables(variables, tag_prefix, verbose=False):
refnames = variables["refnames"].strip()

@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if not keywords:
raise NotThisMethod("no keywords at all, weird")
date = keywords.get("date")
if date is not None:
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("variables are unexpanded, not using")
return {} # unexpanded, so not in an unpacked git-archive tarball
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = set([r.strip() for r in refnames.strip("()").split(",")])
for ref in list(refs):
if not re.search(r'\d', ref):
if verbose:
print("discarding '%s', no digits" % ref)
refs.discard(ref)
# Assume all version tags have a digit. git's %d expansion
# behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us
# distinguish between branches and tags. By ignoring refnames
# without digits, we filter out many common branch names like
# "release" and "stabilization", as well as "HEAD" and "master".
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = set([r[len(TAG):] for r in refs if r.startswith(TAG)])
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = set([r for r in refs if re.search(r'\d', r)])
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("remaining refs: %s" % ",".join(sorted(refs)))
for ref in sorted(refs):
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
if verbose:
print("picking %s" % r)
return { "version": r,
"full": variables["full"].strip() }
# no suitable tags, so we use the full revision id
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using full revision id")
return { "version": variables["full"].strip(),
"full": variables["full"].strip() }

def versions_from_vcs(tag_prefix, versionfile_source, verbose=False):
# this runs 'git' from the root of the source tree. That either means
# someone ran a setup.py command (and this code is in versioneer.py, so
# IN_LONG_VERSION_PY=False, thus the containing directory is the root of
# the source tree), or someone ran a project-specific entry point (and
# this code is in _version.py, so IN_LONG_VERSION_PY=True, thus the
# containing directory is somewhere deeper in the source tree). This only
# gets called if the git-archive 'subst' variables were *not* expanded,
# and _version.py hasn't already been rewritten with a short version
# string, meaning we're inside a checked out source tree.
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}

try:
here = os.path.abspath(__file__)
except NameError:
# some py2exe/bbfreeze/non-CPython implementations don't do __file__
return {} # not always correct

# versionfile_source is the relative path from the top of the source tree
# (where the .git directory might live) to this file. Invert this to find
# the root from __file__.
root = here
if IN_LONG_VERSION_PY:
for i in range(len(versionfile_source.split("/"))):
root = os.path.dirname(root)
else:
root = os.path.dirname(here)
if not os.path.exists(os.path.join(root, ".git")):
if verbose:
print("no .git in %s" % root)
return {}

GIT = "git"
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, run_command=run_command):
"""Get version from 'git describe' in the root of the source tree.

This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GIT = "git.cmd"
stdout = run_command([GIT, "describe", "--tags", "--dirty", "--always"],
cwd=root)
if stdout is None:
return {}
if not stdout.startswith(tag_prefix):
GITS = ["git.cmd", "git.exe"]

out, rc = run_command(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=True)
if rc != 0:
if verbose:
print("tag '%s' doesn't start with prefix '%s'" % (stdout, tag_prefix))
return {}
tag = stdout[len(tag_prefix):]
stdout = run_command([GIT, "rev-parse", "HEAD"], cwd=root)
if stdout is None:
return {}
full = stdout.strip()
if tag.endswith("-dirty"):
full += "-dirty"
return {"version": tag, "full": full}


def versions_from_parentdir(parentdir_prefix, versionfile_source, verbose=False):
if IN_LONG_VERSION_PY:
# We're running from _version.py. If it's from a source tree
# (execute-in-place), we can work upwards to find the root of the
# tree, and then check the parent directory for a version string. If
# it's in an installed application, there's no hope.
try:
here = os.path.abspath(__file__)
except NameError:
# py2exe/bbfreeze/non-CPython don't have __file__
return {} # without __file__, we have no hope
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")

# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = run_command(GITS, ["describe", "--tags", "--dirty",
"--always", "--long",
"--match", "%s*" % tag_prefix],
cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = run_command(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()

pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None

# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out

# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]

# now we have TAG-NUM-gHEX or HEX

if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparseable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces

# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
% (full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]

# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))

# commit: short hex revision ID
pieces["short"] = mo.group(3)

else:
# HEX: no tags
pieces["closest-tag"] = None
count_out, rc = run_command(GITS, ["rev-list", "HEAD", "--count"],
cwd=root)
pieces["distance"] = int(count_out) # total number of commits

# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = run_command(GITS, ["show", "-s", "--format=%ci", "HEAD"],
cwd=root)[0].strip()
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)

return pieces


def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"


def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".

Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty

Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered


def render_pep440_pre(pieces):
"""TAG[.post.devDISTANCE] -- No -dirty.

Exceptions:
1: no tags. 0.post.devDISTANCE
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += ".post.dev%d" % pieces["distance"]
else:
# exception #1
rendered = "0.post.dev%d" % pieces["distance"]
return rendered


def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .

The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.

Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered


def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .

The ".dev0" means dirty.

Eexceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered


def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].

Like 'git describe --tags --dirty --always'.

Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered


def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].

Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.

Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered


def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}

if not style or style == "default":
style = "pep440" # the default

if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)

return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}


def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.

cfg = get_config()
verbose = cfg.verbose

try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
except NotThisMethod:
pass

try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree to _version.py. Invert this to find the root from __file__.
root = here
for i in range(len(versionfile_source.split("/"))):
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for i in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
else:
# we're running from versioneer.py, which means we're running from
# the setup.py in a source tree. sys.argv[0] is setup.py in the root.
here = os.path.abspath(sys.argv[0])
root = os.path.dirname(here)

# Source tarballs conventionally unpack into a directory that includes
# both the project name and a version string.
dirname = os.path.basename(root)
if not dirname.startswith(parentdir_prefix):
if verbose:
print("guessing rootdir is '%s', but '%s' doesn't start with prefix '%s'" %
(root, dirname, parentdir_prefix))
return None
return {"version": dirname[len(parentdir_prefix):], "full": ""}

tag_prefix = "nilmdb-"
parentdir_prefix = "nilmdb-"
versionfile_source = "nilmdb/_version.py"

def get_versions(default={"version": "unknown", "full": ""}, verbose=False):
variables = { "refnames": git_refnames, "full": git_full }
ver = versions_from_expanded_variables(variables, tag_prefix, verbose)
if not ver:
ver = versions_from_vcs(tag_prefix, versionfile_source, verbose)
if not ver:
ver = versions_from_parentdir(parentdir_prefix, versionfile_source,
verbose)
if not ver:
ver = default
return ver
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}

try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass

try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass

return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}

+ 11
- 11
nilmdb/client/client.py View File

@@ -7,7 +7,7 @@ import nilmdb.client.httpclient
from nilmdb.client.errors import ClientError

import time
import simplejson as json
import json
import contextlib

from nilmdb.utils.time import timestamp_to_string, string_to_timestamp
@@ -159,7 +159,7 @@ class Client(object):
so it will be broken into reasonably-sized chunks and
start/end will be deduced if missing."""
with self.stream_insert_context(path, start, end) as ctx:
if isinstance(data, basestring):
if isinstance(data, bytes):
ctx.insert(data)
else:
for chunk in data:
@@ -181,7 +181,7 @@ class Client(object):
}
if binary:
params["binary"] = 1
return self.http.put("stream/insert", data, params, binary = binary)
return self.http.put("stream/insert", data, params)

def stream_intervals(self, path, start = None, end = None, diffpath = None):
"""
@@ -331,7 +331,7 @@ class StreamInserter(object):
# Send the block once we have enough data
if self._block_len >= maxdata:
self._send_block(final = False)
if self._block_len >= self._max_data_after_send: # pragma: no cover
if self._block_len >= self._max_data_after_send:
raise ValueError("too much data left over after trying"
" to send intermediate block; is it"
" missing newlines or malformed?")
@@ -370,10 +370,10 @@ class StreamInserter(object):
there isn't one."""
start = 0
while True:
end = block.find('\n', start)
end = block.find(b'\n', start)
if end < 0:
raise IndexError
if block[start] != '#':
if block[start] != b'#'[0]:
return (start, (end + 1))
start = end + 1

@@ -381,12 +381,12 @@ class StreamInserter(object):
"""Return the (start, end) indices of the last full line in
block[:length] that isn't a comment, or raise IndexError if
there isn't one."""
end = block.rfind('\n')
end = block.rfind(b'\n')
if end <= 0:
raise IndexError
while True:
start = block.rfind('\n', 0, end)
if block[start + 1] != '#':
start = block.rfind(b'\n', 0, end)
if block[start + 1] != b'#'[0]:
return ((start + 1), end)
if start == -1:
raise IndexError
@@ -396,7 +396,7 @@ class StreamInserter(object):
"""Send data currently in the block. The data sent will
consist of full lines only, so some might be left over."""
# Build the full string to send
block = "".join(self._block_data)
block = b"".join(self._block_data)

start_ts = self._interval_start
if start_ts is None:
@@ -413,7 +413,7 @@ class StreamInserter(object):
# or the timestamp of the last line plus epsilon.
end_ts = self._interval_end
try:
if block[-1] != '\n':
if block[-1] != b'\n'[0]:
raise ValueError("final block didn't end with a newline")
if end_ts is None:
(spos, epos) = self._get_last_noncomment(block)


+ 4
- 4
nilmdb/client/errors.py View File

@@ -9,7 +9,7 @@ class Error(Exception):
message = None,
url = None,
traceback = None):
Exception.__init__(self, status)
super().__init__(status)
self.status = status # e.g. "400 Bad Request"
self.message = message # textual message from the server
self.url = url # URL we were requesting
@@ -18,14 +18,14 @@ class Error(Exception):
s = sprintf("[%s]", self.status)
if self.message:
s += sprintf(" %s", self.message)
if show_url and self.url: # pragma: no cover
if show_url and self.url:
s += sprintf(" (%s)", self.url)
if self.traceback: # pragma: no cover
if self.traceback:
s += sprintf("\nServer traceback:\n%s", self.traceback)
return s
def __str__(self):
return self._format_error(show_url = False)
def __repr__(self): # pragma: no cover
def __repr__(self):
return self._format_error(show_url = True)
class ClientError(Error):
pass


+ 14
- 16
nilmdb/client/httpclient.py View File

@@ -3,8 +3,8 @@
import nilmdb.utils
from nilmdb.client.errors import ClientError, ServerError, Error

import simplejson as json
import urlparse
import json
import urllib.parse
import requests

class HTTPClient(object):
@@ -13,9 +13,9 @@ class HTTPClient(object):
"""If baseurl is supplied, all other functions that take
a URL can be given a relative URL instead."""
# Verify / clean up URL
reparsed = urlparse.urlparse(baseurl).geturl()
reparsed = urllib.parse.urlparse(baseurl).geturl()
if '://' not in reparsed:
reparsed = urlparse.urlparse("http://" + baseurl).geturl()
reparsed = urllib.parse.urlparse("http://" + baseurl).geturl()
self.baseurl = reparsed.rstrip('/') + '/'

# Note whether we want SSL verification
@@ -42,11 +42,11 @@ class HTTPClient(object):
args["status"] = jsonerror["status"]
args["message"] = jsonerror["message"]
args["traceback"] = jsonerror["traceback"]
except Exception: # pragma: no cover
except Exception:
pass
if code >= 400 and code <= 499:
raise ClientError(**args)
else: # pragma: no cover
else:
if code >= 500 and code <= 599:
if args["message"] is None:
args["message"] = ("(no message; try disabling " +
@@ -60,7 +60,7 @@ class HTTPClient(object):
pass

def _do_req(self, method, url, query_data, body_data, stream, headers):
url = urlparse.urljoin(self.baseurl, url)
url = urllib.parse.urljoin(self.baseurl, url)
try:
# Create a new session, ensure we send "Connection: close",
# and explicitly close connection after the transfer.
@@ -87,7 +87,7 @@ class HTTPClient(object):
session.close()
except requests.RequestException as e:
raise ServerError(status = "502 Error", url = url,
message = str(e.message))
message = str(e))
if response.status_code != 200:
self._handle_error(url, response.status_code, response.content)
self._last_response = response
@@ -122,12 +122,10 @@ class HTTPClient(object):
else:
return self._req("POST", url, None, params)

def put(self, url, data, params = None, binary = False):
def put(self, url, data, params = None,
content_type = "application/octet-stream"):
"""Simple PUT (parameters in URL, data in body)"""
if binary:
h = { 'Content-type': 'application/octet-stream' }
else:
h = { 'Content-type': 'text/plain; charset=utf-8' }
h = { 'Content-type': content_type }
return self._req("PUT", url, query = params, body = data, headers = h)

# Generator versions that return data one line at a time.
@@ -156,7 +154,7 @@ class HTTPClient(object):
pending = tmp[-1]
for line in lines:
yield line
if pending is not None: # pragma: no cover (missing newline)
if pending is not None:
yield pending

# Yield the chunks or lines as requested
@@ -165,11 +163,11 @@ class HTTPClient(object):
yield chunk
elif isjson:
for line in lines(response.iter_content(chunk_size = 1),
ending = '\r\n'):
ending = b'\r\n'):
yield json.loads(line)
else:
for line in lines(response.iter_content(chunk_size = 65536),
ending = '\n'):
ending = b'\n'):
yield line

def get_gen(self, url, params = None, binary = False):


+ 11
- 7
nilmdb/client/numpyclient.py View File

@@ -12,20 +12,24 @@ import contextlib
from nilmdb.utils.time import timestamp_to_string, string_to_timestamp

import numpy
import cStringIO
import io

def layout_to_dtype(layout):
ltype = layout.split('_')[0]
lcount = int(layout.split('_')[1])
if ltype.startswith('int'):
atype = '<i' + str(int(ltype[3:]) / 8)
atype = '<i' + str(int(ltype[3:]) // 8)
elif ltype.startswith('uint'):
atype = '<u' + str(int(ltype[4:]) / 8)
atype = '<u' + str(int(ltype[4:]) // 8)
elif ltype.startswith('float'):
atype = '<f' + str(int(ltype[5:]) / 8)
atype = '<f' + str(int(ltype[5:]) // 8)
else:
raise ValueError("bad layout")
return numpy.dtype([('timestamp', '<i8'), ('data', atype, lcount)])
if lcount == 1:
dtype = [('timestamp', '<i8'), ('data', atype)]
else:
dtype = [('timestamp', '<i8'), ('data', atype, lcount)]
return numpy.dtype(dtype)

class NumpyClient(nilmdb.client.client.Client):
"""Subclass of nilmdb.client.Client that adds additional methods for
@@ -70,14 +74,14 @@ class NumpyClient(nilmdb.client.client.Client):

# See if we have enough to make the requested Numpy array
while total_len >= maxsize:
assembled = "".join(chunks)
assembled = b"".join(chunks)
total_len -= maxsize
chunks = [ assembled[maxsize:] ]
block = assembled[:maxsize]
yield to_numpy(block)

if total_len:
yield to_numpy("".join(chunks))
yield to_numpy(b"".join(chunks))

@contextlib.contextmanager
def stream_insert_numpy_context(self, path, start = None, end = None,


+ 9
- 24
nilmdb/cmdline/cmdline.py View File

@@ -3,7 +3,7 @@
import nilmdb.client

from nilmdb.utils.printf import *
from nilmdb.utils import datetime_tz
import datetime_tz
import nilmdb.utils.time

import sys
@@ -12,10 +12,7 @@ import argparse
from argparse import ArgumentDefaultsHelpFormatter as def_form
import signal

try: # pragma: no cover
import argcomplete
except ImportError: # pragma: no cover
argcomplete = None
import argcomplete

# Valid subcommands. Defined in separate files just to break
# things up -- they're still called with Cmdline as self.
@@ -40,7 +37,7 @@ class JimArgumentParser(argparse.ArgumentParser):
self.print_usage(sys.stderr)
self.exit(2, sprintf("error: %s\n", message))

class Complete(object): # pragma: no cover
class Complete(object):
# Completion helpers, for using argcomplete (see
# extras/nilmtool-bash-completion.sh)
def escape(self, s):
@@ -80,11 +77,7 @@ class Complete(object): # pragma: no cover
if not path:
return []
results = []
# prefix comes in as UTF-8, but results need to be Unicode,
# weird. Still doesn't work in all cases, but that's bugs in
# argcomplete.
prefix = nilmdb.utils.unicode.decode(prefix)
for (k,v) in client.stream_get_metadata(path).iteritems():
for (k,v) in client.stream_get_metadata(path).items():
kv = self.escape(k + '=' + v)
if kv.startswith(prefix):
results.append(kv)
@@ -94,16 +87,11 @@ class Cmdline(object):

def __init__(self, argv = None):
self.argv = argv or sys.argv[1:]
try:
# Assume command line arguments are encoded with stdin's encoding,
# and reverse it. Won't be needed in Python 3, but for now..
self.argv = [ x.decode(sys.stdin.encoding) for x in self.argv ]
except Exception: # pragma: no cover
pass
self.client = None
self.def_url = os.environ.get("NILMDB_URL", "http://localhost/nilmdb/")
self.subcmd = {}
self.complete = Complete()
self.complete_output_stream = None # overridden by test suite

def arg_time(self, toparse):
"""Parse a time string argument"""
@@ -131,7 +119,7 @@ class Cmdline(object):
).completer = self.complete.url

sub = self.parser.add_subparsers(
title="Commands", dest="command",
title="Commands", dest="command", required=True,
description="Use 'help command' or 'command --help' for more "
"details on a particular command.")

@@ -148,10 +136,7 @@ class Cmdline(object):
def run(self):
# Set SIGPIPE to its default handler -- we don't need Python
# to catch it for us.
try:
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
except ValueError: # pragma: no cover
pass
signal.signal(signal.SIGPIPE, signal.SIG_DFL)

# Clear cached timezone, so that we can pick up timezone changes
# while running this from the test suite.
@@ -159,8 +144,8 @@ class Cmdline(object):

# Run parser
self.parser_setup()
if argcomplete: # pragma: no cover
argcomplete.autocomplete(self.parser)
argcomplete.autocomplete(self.parser, exit_method=sys.exit,
output_stream=self.complete_output_stream)
self.args = self.parser.parse_args(self.argv)

# Run arg verify handler if there is one


+ 6
- 7
nilmdb/cmdline/extract.py View File

@@ -1,4 +1,4 @@
from __future__ import print_function
from nilmdb.utils.printf import *
import nilmdb.client
import sys
@@ -41,9 +41,8 @@ def setup(self, sub):
return cmd

def cmd_extract_verify(self):
if self.args.start is not None and self.args.end is not None:
if self.args.start > self.args.end:
self.parser.error("start is after end")
if self.args.start > self.args.end:
self.parser.error("start is after end")

if self.args.binary:
if (self.args.bare or self.args.annotate or self.args.markup or
@@ -69,9 +68,9 @@ def cmd_extract(self):

printed = False
if self.args.binary:
printer = sys.stdout.write
printer = sys.stdout.buffer.write
else:
printer = print
printer = lambda x: print(x.decode('utf-8'))
bare = self.args.bare
count = self.args.count
for dataline in self.client.stream_extract(self.args.path,
@@ -83,7 +82,7 @@ def cmd_extract(self):
if bare and not count:
# Strip timestamp (first element). Doesn't make sense
# if we are only returning a count.
dataline = ' '.join(dataline.split(' ')[1:])
dataline = b' '.join(dataline.split(b' ')[1:])
printer(dataline)
printed = True
if not printed:


+ 2
- 2
nilmdb/cmdline/insert.py View File

@@ -87,7 +87,7 @@ def cmd_insert(self):
try:
filename = arg.file
if filename == '-':
infile = sys.stdin
infile = sys.stdin.buffer
else:
try:
infile = open(filename, "rb")
@@ -104,7 +104,7 @@ def cmd_insert(self):
if arg.timestamp:
data = timestamper.TimestamperRate(infile, arg.start, arg.rate)
else:
data = iter(lambda: infile.read(1048576), '')
data = iter(lambda: infile.read(1048576), b'')

# Print info
if not arg.quiet:


+ 5
- 7
nilmdb/cmdline/metadata.py View File

@@ -41,10 +41,10 @@ def cmd_metadata(self):
if self.args.set is not None or self.args.update is not None:
# Either set, or update
if self.args.set is not None:
keyvals = map(nilmdb.utils.unicode.decode, self.args.set)
keyvals = self.args.set
handler = self.client.stream_set_metadata
else:
keyvals = map(nilmdb.utils.unicode.decode, self.args.update)
keyvals = self.args.update
handler = self.client.stream_update_metadata

# Extract key=value pairs
@@ -64,7 +64,7 @@ def cmd_metadata(self):
# Delete (by setting values to empty strings)
keys = None
if self.args.delete:
keys = map(nilmdb.utils.unicode.decode, self.args.delete)
keys = list(self.args.delete)
try:
data = self.client.stream_get_metadata(self.args.path, keys)
for key in data:
@@ -76,7 +76,7 @@ def cmd_metadata(self):
# Get (or unspecified)
keys = None
if self.args.get:
keys = map(nilmdb.utils.unicode.decode, self.args.get)
keys = list(self.args.get)
try:
data = self.client.stream_get_metadata(self.args.path, keys)
except nilmdb.client.ClientError as e:
@@ -85,6 +85,4 @@ def cmd_metadata(self):
# Print nonexistant keys as having empty value
if value is None:
value = ""
printf("%s=%s\n",
nilmdb.utils.unicode.encode(key),
nilmdb.utils.unicode.encode(value))
printf("%s=%s\n", key, value)

+ 1
- 1
nilmdb/fsck/__init__.py View File

@@ -1,5 +1,5 @@
"""nilmdb.fsck"""

from __future__ import absolute_import

from nilmdb.fsck.fsck import Fsck

+ 8
- 6
nilmdb/fsck/fsck.py View File

@@ -1,5 +1,7 @@
# -*- coding: utf-8 -*-

raise Exception("todo: fix path bytes issues")

"""Check database consistency, with some ability to fix problems.
This should be able to fix cases where a database gets corrupted due
to unexpected system shutdown, and detect other cases that may cause
@@ -21,7 +23,7 @@ import progressbar
import re
import time
import shutil
import cPickle as pickle
import pickle
import numpy

class FsckError(Exception):
@@ -179,7 +181,7 @@ class Fsck(object):
### Check streams and basic interval overlap

def check_streams(self):
ids = self.stream_path.keys()
ids = list(self.stream_path.keys())
log("checking %s streams\n", "{:,d}".format(len(ids)))
with Progress(len(ids)) as pbar:
for i, sid in enumerate(ids):
@@ -187,7 +189,7 @@ class Fsck(object):
path = self.stream_path[sid]

# unique path, valid layout
if self.stream_path.values().count(path) != 1:
if list(self.stream_path.values()).count(path) != 1:
raise FsckError("duplicated path %s", path)
layout = self.stream_layout[sid].split('_')[0]
if layout not in ('int8', 'int16', 'int32', 'int64',
@@ -269,7 +271,7 @@ class Fsck(object):
for subdir in subdirs:
# Find all files in that dir
subpath = os.path.join(bulk, subdir)
files = filter(regex.search, os.listdir(subpath))
files = list(filter(regex.search, os.listdir(subpath)))
if not files:
self.fix_empty_subdir(subpath)
raise RetryFsck
@@ -315,7 +317,7 @@ class Fsck(object):
### Check interval endpoints

def check_intervals(self):
total_ints = sum(len(x) for x in self.stream_interval.values())
total_ints = sum(len(x) for x in list(self.stream_interval.values()))
log("checking %s intervals\n", "{:,d}".format(total_ints))
done = 0
with Progress(total_ints) as pbar:
@@ -398,7 +400,7 @@ class Fsck(object):

def check_data(self):
total_rows = sum(sum((y[3] - y[2]) for y in x)
for x in self.stream_interval.values())
for x in list(self.stream_interval.values()))
log("checking %s rows of data\n", "{:,d}".format(total_rows))
done = 0
with Progress(total_rows) as pbar:


+ 3
- 2
nilmdb/scripts/nilmdb_fsck.py View File

@@ -10,8 +10,9 @@ def main():

parser = argparse.ArgumentParser(
description = 'Check database consistency',
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
version = nilmdb.__version__)
formatter_class = argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-v", "--version", action="version",
version = nilmdb.__version__)
parser.add_argument("-f", "--fix", action="store_true",
default=False, help = 'Fix errors when possible '
'(which may involve removing data)')


+ 35
- 29
nilmdb/scripts/nilmdb_server.py View File

@@ -4,14 +4,18 @@ import nilmdb.server
import argparse
import os
import socket
import cherrypy
import sys

def main():
"""Main entry point for the 'nilmdb-server' command line script"""

parser = argparse.ArgumentParser(
description = 'Run the NilmDB server',
formatter_class = argparse.ArgumentDefaultsHelpFormatter,
version = nilmdb.__version__)
formatter_class = argparse.ArgumentDefaultsHelpFormatter)

parser.add_argument("-v", "--version", action="version",
version = nilmdb.__version__)

group = parser.add_argument_group("Standard options")
group.add_argument('-a', '--address',
@@ -39,47 +43,49 @@ def main():
db = nilmdb.utils.serializer_proxy(nilmdb.server.NilmDB)(args.database)

# Configure the server
if args.quiet:
embedded = True
else:
embedded = False
if not args.quiet:
cherrypy._cpconfig.environments['embedded']['log.screen'] = True
server = nilmdb.server.Server(db,
host = args.address,
port = args.port,
embedded = embedded,
force_traceback = args.traceback)

# Print info
if not args.quiet:
print "Version: %s" % nilmdb.__version__
print "Database: %s" % (os.path.realpath(args.database))
print("Version: %s" % nilmdb.__version__)
print("Database: %s" % (os.path.realpath(args.database)))
if args.address == '0.0.0.0' or args.address == '::':
host = socket.getfqdn()
else:
host = args.address
print "Server URL: http://%s:%d/" % ( host, args.port)
print "----"
print("Server URL: http://%s:%d/" % ( host, args.port))
print("----")

# Run it
if args.yappi:
print "Running in yappi"
try:
import yappi
yappi.start()
try:
if args.yappi:
print("Running in yappi")
try:
import yappi
yappi.start()
server.start(blocking = True)
finally:
yappi.stop()
stats = yappi.get_func_stats()
stats.sort("ttot")
stats.print_all()
from IPython import embed
embed(header = "Use the `yappi` or `stats` object to explore "
"further, quit to exit")
else:
server.start(blocking = True)
finally:
yappi.stop()
yappi.print_stats(sort_type = yappi.SORTTYPE_TTOT, limit = 50)
from IPython import embed
embed(header = "Use the yappi object to explore further, "
"quit to exit")
else:
server.start(blocking = True)

# Clean up
if not args.quiet:
print "Closing database"
db.close()
except nilmdb.server.serverutil.CherryPyExit as e:
print("Exiting due to CherryPy error", file=sys.stderr)
raise
finally:
if not args.quiet:
print("Closing database")
db.close()

if __name__ == "__main__":
main()

+ 3
- 15
nilmdb/server/__init__.py View File

@@ -1,20 +1,8 @@
"""nilmdb.server"""

from __future__ import absolute_import

# Try to set up pyximport to automatically rebuild Cython modules. If
# this doesn't work, it's OK, as long as the modules were built externally.
# (e.g. python setup.py build_ext --inplace)
try: # pragma: no cover
import Cython
import distutils.version
if (distutils.version.LooseVersion(Cython.__version__) <
distutils.version.LooseVersion("0.17")): # pragma: no cover
raise ImportError("Cython version too old")
import pyximport
pyximport.install(inplace = True, build_in_temp = False)
except (ImportError, TypeError): # pragma: no cover
pass
# Set up pyximport to automatically rebuild Cython modules if needed.
import pyximport
pyximport.install(inplace = True, build_in_temp = False)

from nilmdb.server.nilmdb import NilmDB
from nilmdb.server.server import Server, wsgi_application


+ 63
- 59
nilmdb/server/bulkdata.py View File

@@ -2,14 +2,14 @@

# Need absolute_import so that "import nilmdb" won't pull in
# nilmdb.py, but will pull the parent nilmdb module instead.
from __future__ import absolute_import
from __future__ import division
from nilmdb.utils.printf import *
from nilmdb.utils.time import timestamp_to_string as timestamp_to_string
import nilmdb.utils

import os
import cPickle as pickle
import pickle
import re
import sys
import tempfile
@@ -25,25 +25,28 @@ fd_cache_size = 8
@nilmdb.utils.must_close(wrap_verify = False)
class BulkData(object):
def __init__(self, basepath, **kwargs):
self.basepath = basepath
self.root = os.path.join(self.basepath, "data")
self.lock = self.root + ".lock"
if isinstance(basepath, str):
self.basepath = self._encode_filename(basepath)
else:
self.basepath = basepath
self.root = os.path.join(self.basepath, b"data")
self.lock = self.root + b".lock"
self.lockfile = None

# Tuneables
if "file_size" in kwargs:
if "file_size" in kwargs and kwargs["file_size"] is not None:
self.file_size = kwargs["file_size"]
else:
# Default to approximately 128 MiB per file
self.file_size = 128 * 1024 * 1024

if "files_per_dir" in kwargs:
if "files_per_dir" in kwargs and kwargs["files_per_dir"] is not None:
self.files_per_dir = kwargs["files_per_dir"]
else:
# 32768 files per dir should work even on FAT32
self.files_per_dir = 32768

if "initial_nrows" in kwargs:
if "initial_nrows" in kwargs and kwargs["initial_nrows"] is not None:
self.initial_nrows = kwargs["initial_nrows"]
else:
# First row is 0
@@ -56,7 +59,8 @@ class BulkData(object):
# Create the lock
self.lockfile = open(self.lock, "w")
if not nilmdb.utils.lock.exclusive_lock(self.lockfile):
raise IOError('database at "' + self.basepath +
raise IOError('database at "' +
self._decode_filename(self.basepath) +
'" is already locked by another process')

def close(self):
@@ -66,21 +70,21 @@ class BulkData(object):
self.lockfile.close()
try:
os.unlink(self.lock)
except OSError: # pragma: no cover
except OSError:
pass
self.lockfile = None

def _encode_filename(self, path):
# Encode all paths to UTF-8, regardless of sys.getfilesystemencoding(),
# because we want to be able to represent all code points and the user
# will never be directly exposed to filenames. We can then do path
# manipulations on the UTF-8 directly.
if isinstance(path, unicode):
return path.encode('utf-8')
return path
# Translate unicode strings to raw bytes, if needed. We
# always manipulate paths internally as bytes.
return path.encode('utf-8')
def _decode_filename(self, path):
# Translate raw bytes to unicode strings, escaping if needed
return path.decode('utf-8', errors='backslashreplace')

def _create_check_ospath(self, ospath):
if ospath[-1] == '/':
if ospath[-1:] == b'/':
raise ValueError("invalid path; should not end with a /")
if Table.exists(ospath):
raise ValueError("stream already exists at this path")
@@ -97,13 +101,13 @@ class BulkData(object):
don't exist. Returns a list of elements that got created."""
path = self._encode_filename(unicodepath)

if path[0] != '/':
raise ValueError("paths must start with /")
[ group, node ] = path.rsplit("/", 1)
if group == '':
if path[0:1] != b'/':
raise ValueError("paths must start with / ")
[ group, node ] = path.rsplit(b"/", 1)
if group == b'':
raise ValueError("invalid path; path must contain at least one "
"folder")
if node == '':
if node == b'':
raise ValueError("invalid path; should not end with a /")
if not Table.valid_path(path):
raise ValueError("path name is invalid or contains reserved words")
@@ -114,7 +118,7 @@ class BulkData(object):
# os.path.join)

# Make directories leading up to this one
elements = path.lstrip('/').split('/')
elements = path.lstrip(b'/').split(b'/')
made_dirs = []
try:
# Make parent elements
@@ -126,14 +130,10 @@ class BulkData(object):
os.mkdir(ospath)
made_dirs.append(ospath)
except Exception as e:
# Try to remove paths that we created; ignore errors
exc_info = sys.exc_info()
for ospath in reversed(made_dirs): # pragma: no cover (hard to hit)
try:
os.rmdir(ospath)
except OSError:
pass
raise exc_info[1], None, exc_info[2]
# Remove paths that we created
for ospath in reversed(made_dirs):
os.rmdir(ospath)
raise

return elements

@@ -168,7 +168,7 @@ class BulkData(object):
os.rmdir(ospath)
except OSError:
pass
raise exc_info[1], None, exc_info[2]
raise exc_info[1].with_traceback(exc_info[2])

# Success
return
@@ -176,8 +176,8 @@ class BulkData(object):
def _remove_leaves(self, unicodepath):
"""Remove empty directories starting at the leaves of unicodepath"""
path = self._encode_filename(unicodepath)
elements = path.lstrip('/').split('/')
for i in reversed(range(len(elements))):
elements = path.lstrip(b'/').split(b'/')
for i in reversed(list(range(len(elements)))):
ospath = os.path.join(self.root, *elements[0:i+1])
try:
os.rmdir(ospath)
@@ -191,9 +191,9 @@ class BulkData(object):
newpath = self._encode_filename(newunicodepath)

# Get OS paths
oldelements = oldpath.lstrip('/').split('/')
oldelements = oldpath.lstrip(b'/').split(b'/')
oldospath = os.path.join(self.root, *oldelements)
newelements = newpath.lstrip('/').split('/')
newelements = newpath.lstrip(b'/').split(b'/')
newospath = os.path.join(self.root, *newelements)

# Basic checks
@@ -204,8 +204,8 @@ class BulkData(object):
self.getnode.cache_remove(self, oldunicodepath)

# Move the table to a temporary location
tmpdir = tempfile.mkdtemp(prefix = "rename-", dir = self.root)
tmppath = os.path.join(tmpdir, "table")
tmpdir = tempfile.mkdtemp(prefix = b"rename-", dir = self.root)
tmppath = os.path.join(tmpdir, b"table")
os.rename(oldospath, tmppath)

try:
@@ -233,7 +233,7 @@ class BulkData(object):
path = self._encode_filename(unicodepath)

# Get OS path
elements = path.lstrip('/').split('/')
elements = path.lstrip(b'/').split(b'/')
ospath = os.path.join(self.root, *elements)

# Remove Table object from cache
@@ -258,7 +258,7 @@ class BulkData(object):
"""Return a Table object corresponding to the given database
path, which must exist."""
path = self._encode_filename(unicodepath)
elements = path.lstrip('/').split('/')
elements = path.lstrip(b'/').split(b'/')
ospath = os.path.join(self.root, *elements)
return Table(ospath, self.initial_nrows)

@@ -271,12 +271,12 @@ class Table(object):
@classmethod
def valid_path(cls, root):
"""Return True if a root path is a valid name"""
return "_format" not in root.split("/")
return b"_format" not in root.split(b"/")

@classmethod
def exists(cls, root):
"""Return True if a table appears to exist at this OS path"""
return os.path.isfile(os.path.join(root, "_format"))
return os.path.isfile(os.path.join(root, b"_format"))

@classmethod
def create(cls, root, layout, file_size, files_per_dir):
@@ -293,7 +293,7 @@ class Table(object):
"files_per_dir": files_per_dir,
"layout": layout,
"version": 3 }
with open(os.path.join(root, "_format"), "wb") as f:
with open(os.path.join(root, b"_format"), "wb") as f:
pickle.dump(fmt, f, 2)

# Normal methods
@@ -303,10 +303,10 @@ class Table(object):
self.initial_nrows = initial_nrows

# Load the format
with open(os.path.join(self.root, "_format"), "rb") as f:
with open(os.path.join(self.root, b"_format"), "rb") as f:
fmt = pickle.load(f)

if fmt["version"] != 3: # pragma: no cover
if fmt["version"] != 3:
# Old versions used floating point timestamps, which aren't
# valid anymore.
raise NotImplementedError("old version " + str(fmt["version"]) +
@@ -336,7 +336,7 @@ class Table(object):
# greater than the row number of any piece of data that
# currently exists, not necessarily all data that _ever_
# existed.
regex = re.compile("^[0-9a-f]{4,}$")
regex = re.compile(b"^[0-9a-f]{4,}$")

# Find the last directory. We sort and loop through all of them,
# starting with the numerically greatest, because the dirs could be
@@ -348,8 +348,8 @@ class Table(object):
for subdir in subdirs:
# Now find the last file in that dir
path = os.path.join(self.root, subdir)
files = filter(regex.search, os.listdir(path))
if not files: # pragma: no cover (shouldn't occur)
files = list(filter(regex.search, os.listdir(path)))
if not files:
# Empty dir: try the next one
continue

@@ -380,8 +380,8 @@ class Table(object):
filenum = row // self.rows_per_file
# It's OK if these format specifiers are too short; the filenames
# will just get longer but will still sort correctly.
dirname = sprintf("%04x", filenum // self.files_per_dir)
filename = sprintf("%04x", filenum % self.files_per_dir)
dirname = sprintf(b"%04x", filenum // self.files_per_dir)
filename = sprintf(b"%04x", filenum % self.files_per_dir)
offset = (row % self.rows_per_file) * self.row_size
count = self.rows_per_file - (row % self.rows_per_file)
return (dirname, filename, offset, count)
@@ -389,7 +389,7 @@ class Table(object):
def _row_from_offset(self, subdir, filename, offset):
"""Return the row number that corresponds to the given
'subdir/filename' and byte-offset within that file."""
if (offset % self.row_size) != 0: # pragma: no cover
if (offset % self.row_size) != 0:
# this shouldn't occur, unless there is some corruption somewhere
raise ValueError("file offset is not a multiple of data size")
filenum = int(subdir, 16) * self.files_per_dir + int(filename, 16)
@@ -436,6 +436,8 @@ class Table(object):
are non-monotonic, or don't fall between 'start' and 'end',
a ValueError is raised.

Note that data is always of 'bytes' type.

If 'binary' is True, the data should be in raw binary format
instead: little-endian, matching the current table's layout,
including the int64 timestamp.
@@ -476,9 +478,9 @@ class Table(object):
if binary:
raise IndexError
bad = data.splitlines()[linenum-1]
bad += '\n' + ' ' * (colnum - 1) + '^'
bad += b'\n' + b' ' * (colnum - 1) + b'^'
except IndexError:
bad = ""
bad = b""
if errtype == rocket.ERR_NON_MONOTONIC:
err = "timestamp is not monotonically increasing"
elif errtype == rocket.ERR_OUT_OF_INTERVAL:
@@ -492,8 +494,9 @@ class Table(object):
timestamp_to_string(end))
else:
err = str(obj)
bad_str = bad.decode('utf-8', errors='backslashreplace')
raise ValueError("error parsing input data: " +
where + err + "\n" + bad)
where + err + "\n" + bad_str)
tot_rows += added_rows
except Exception:
# Some failure, so try to roll things back by truncating or
@@ -556,7 +559,7 @@ class Table(object):
# file. Only when the list covers the entire extent of the
# file will that file be removed.
datafile = os.path.join(self.root, subdir, filename)
cachefile = datafile + ".removed"
cachefile = datafile + b".removed"
try:
with open(cachefile, "rb") as f:
ranges = pickle.load(f)
@@ -583,8 +586,9 @@ class Table(object):
# Not connected; append previous and start again
merged.append(prev)
prev = new
if prev is not None:
merged.append(prev)
# Last range we were looking at goes into the file. We know
# there was at least one (the one we just removed).