Browse Source

Fix flake8-reported issues with fsck

tags/nilmdb-2.1.0
Jim Paris 3 years ago
parent
commit
4cdaef51c1
4 changed files with 62 additions and 41 deletions
  1. +45
    -27
      nilmdb/fsck/fsck.py
  2. +9
    -9
      nilmdb/scripts/nilmdb_fsck.py
  3. +2
    -2
      nilmdb/scripts/nilmdb_server.py
  4. +6
    -3
      setup.cfg

+ 45
- 27
nilmdb/fsck/fsck.py View File

@@ -1,7 +1,5 @@
# -*- coding: utf-8 -*-

raise Exception("todo: fix path bytes issues")

"""Check database consistency, with some ability to fix problems.
This should be able to fix cases where a database gets corrupted due
to unexpected system shutdown, and detect other cases that may cause
@@ -13,7 +11,6 @@ import nilmdb.client.numpyclient
from nilmdb.utils.interval import IntervalError
from nilmdb.server.interval import Interval, IntervalSet
from nilmdb.utils.printf import printf, fprintf, sprintf
from nilmdb.utils.time import timestamp_to_string

from collections import defaultdict
import sqlite3
@@ -21,70 +18,81 @@ import os
import sys
import progressbar
import re
import time
import shutil
import pickle
import numpy


class FsckError(Exception):
def __init__(self, msg = "", *args):
def __init__(self, msg="", *args):
if args:
msg = sprintf(msg, *args)
Exception.__init__(self, msg)


class FixableFsckError(FsckError):
def __init__(self, msg = "", *args):
def __init__(self, msg="", *args):
if args:
msg = sprintf(msg, *args)
FsckError.__init__(self, "%s\nThis may be fixable with \"--fix\".", msg)
FsckError.__init__(self, f'{msg}\nThis may be fixable with "--fix".')


class RetryFsck(FsckError):
pass


def log(format, *args):
printf(format, *args)


def err(format, *args):
fprintf(sys.stderr, format, *args)


# Decorator that retries a function if it returns a specific value
def retry_if_raised(exc, message = None, max_retries = 100):
def retry_if_raised(exc, message=None, max_retries=100):
def f1(func):
def f2(*args, **kwargs):
for n in range(max_retries):
try:
return func(*args, **kwargs)
except exc as e:
except exc:
if message:
log("%s\n\n", message)
raise Exception("Max number of retries (%d) exceeded; giving up")
return f2
return f1


class Progress(object):
def __init__(self, maxval):
if maxval == 0:
maxval = 1
self.bar = progressbar.ProgressBar(
maxval = maxval,
widgets = [ progressbar.Percentage(), ' ',
progressbar.Bar(), ' ',
progressbar.ETA() ])
maxval=maxval,
widgets=[progressbar.Percentage(), ' ',
progressbar.Bar(), ' ',
progressbar.ETA()])
if self.bar.term_width == 0:
self.bar.term_width = 75

def __enter__(self):
self.bar.start()
self.last_update = 0
return self

def __exit__(self, exc_type, exc_value, traceback):
if exc_type is None:
self.bar.finish()
else:
printf("\n")

def update(self, val):
self.bar.update(val)

class Fsck(object):

def __init__(self, path, fix = False):
class Fsck(object):
def __init__(self, path, fix=False):
self.basepath = path
self.sqlpath = os.path.join(path, "data.sql")
self.bulkpath = os.path.join(path, "data")
@@ -94,7 +102,7 @@ class Fsck(object):
### Main checks

@retry_if_raised(RetryFsck, "Something was fixed: restarting fsck")
def check(self, skip_data = False):
def check(self, skip_data=False):
self.bulk = None
self.sql = None
try:
@@ -164,7 +172,7 @@ class Fsck(object):
"ORDER BY start_time")
for r in result:
if r[0] not in self.stream_path:
raise FsckError("interval ID %d not in streams", k)
raise FsckError("interval ID %d not in streams", r[0])
self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4]))

log(" loading metadata\n")
@@ -172,10 +180,11 @@ class Fsck(object):
result = cur.execute("SELECT stream_id, key, value FROM metadata")
for r in result:
if r[0] not in self.stream_path:
raise FsckError("metadata ID %d not in streams", k)
raise FsckError("metadata ID %d not in streams", r[0])
if r[1] in self.stream_meta[r[0]]:
raise FsckError("duplicate metadata key '%s' for stream %d",
r[1], r[0])
raise FsckError(
"duplicate metadata key '%s' for stream %d",
r[1], r[0])
self.stream_meta[r[0]][r[1]] = r[2]

### Check streams and basic interval overlap
@@ -253,7 +262,11 @@ class Fsck(object):
raise FsckError("%s: bad or unsupported bulkdata version %d",
path, fmt["version"])
row_per_file = int(fmt["rows_per_file"])
if row_per_file < 1:
raise FsckError(f"{path}: bad row_per_file {row_per_file}")
files_per_dir = int(fmt["files_per_dir"])
if files_per_dir < 1:
raise FsckError(f"{path}: bad files_per_dir {files_per_dir}")
layout = fmt["layout"]
if layout != self.stream_layout[sid]:
raise FsckError("%s: layout mismatch %s != %s", path,
@@ -267,7 +280,7 @@ class Fsck(object):
# Find all directories
regex = re.compile("^[0-9a-f]{4,}$")
subdirs = sorted(filter(regex.search, os.listdir(bulk)),
key = lambda x: int(x, 16), reverse = True)
key=lambda x: int(x, 16), reverse=True)
for subdir in subdirs:
# Find all files in that dir
subpath = os.path.join(bulk, subdir)
@@ -325,8 +338,10 @@ class Fsck(object):
try:
bulk = self.bulkpath + self.stream_path[sid]
tab = nilmdb.server.bulkdata.Table(bulk)

def update(x):
pbar.update(done + x)

ints = self.stream_interval[sid]
done += self.check_table_intervals(sid, ints, tab, update)
finally:
@@ -335,7 +350,7 @@ class Fsck(object):
def check_table_intervals(self, sid, ints, tab, update):
# look in the table to make sure we can pick out the interval's
# endpoints
path = self.stream_path[sid]
path = self.stream_path[sid] # noqa: F841 unused
tab.file_open.cache_remove_all()
for (i, intv) in enumerate(ints):
update(i)
@@ -343,8 +358,8 @@ class Fsck(object):
if spos == epos and spos >= 0 and spos <= tab.nrows:
continue
try:
srow = tab[spos]
erow = tab[epos-1]
srow = tab[spos] # noqa: F841 unused
erow = tab[epos-1] # noqa: F841 unused
except Exception as e:
self.fix_bad_interval(sid, intv, tab, str(e))
raise RetryFsck
@@ -408,8 +423,10 @@ class Fsck(object):
try:
bulk = self.bulkpath + self.stream_path[sid]
tab = nilmdb.server.bulkdata.Table(bulk)

def update(x):
pbar.update(done + x)

ints = self.stream_interval[sid]
done += self.check_table_data(sid, ints, tab, update)
finally:
@@ -438,11 +455,12 @@ class Fsck(object):

# Get raw data, convert to NumPy arary
try:
raw = tab.get_data(start, stop, binary = True)
raw = tab.get_data(start, stop, binary=True)
data = numpy.fromstring(raw, dtype)
except Exception as e:
raise FsckError("%s: failed to grab rows %d through %d: %s",
path, start, stop, repr(e))
raise FsckError(
"%s: failed to grab rows %d through %d: %s",
path, start, stop, repr(e))

# Verify that timestamps are monotonic
if (numpy.diff(data['timestamp']) <= 0).any():


+ 9
- 9
nilmdb/scripts/nilmdb_fsck.py View File

@@ -2,26 +2,26 @@

import nilmdb.fsck
import argparse
import os
import sys


def main():
"""Main entry point for the 'nilmdb-fsck' command line script"""

parser = argparse.ArgumentParser(
description = 'Check database consistency',
formatter_class = argparse.ArgumentDefaultsHelpFormatter)
description='Check database consistency',
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("-v", "--version", action="version",
version = nilmdb.__version__)
version=nilmdb.__version__)
parser.add_argument("-f", "--fix", action="store_true",
default=False, help = 'Fix errors when possible '
default=False, help='Fix errors when possible '
'(which may involve removing data)')
parser.add_argument("-n", "--no-data", action="store_true",
default=False, help = 'Skip the slow full-data check')
parser.add_argument('database', help = 'Database directory')
default=False, help='Skip the slow full-data check')
parser.add_argument('database', help='Database directory')
args = parser.parse_args()

nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data = args.no_data)
nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data=args.no_data)


if __name__ == "__main__":
main()

+ 2
- 2
nilmdb/scripts/nilmdb_server.py View File

@@ -80,8 +80,8 @@ def main():
stats.print_all()
try:
from IPython import embed
embed(header="Use the `yappi` or `stats` object to explore "
"further, quit to exit")
embed(header="Use the `yappi` or `stats` object to "
"explore further, `quit` to exit")
except ModuleNotFoundError:
print("\nInstall ipython to explore further")
else:


+ 6
- 3
setup.cfg View File

@@ -47,10 +47,13 @@ tag_prefix=nilmdb-
parentdir_prefix=nilmdb-

[flake8]
exclude=_version.py,fsck.py,nilmdb_fsck.py
exclude=_version.py
extend-ignore=E731
per-file-ignores=__init__.py:F401,E402 serializer.py:E722 mustclose.py:E722
per-file-ignores=__init__.py:F401,E402 \
serializer.py:E722 \
mustclose.py:E722 \
fsck.py:E266

[pylint]
ignore=_version.py,fsck.py,nilmdb_fsck.py
ignore=_version.py
disable=C0103,C0111,R0913,R0914

Loading…
Cancel
Save