diff --git a/nilmdb/fsck/fsck.py b/nilmdb/fsck/fsck.py index b6ba8ca..a40c327 100644 --- a/nilmdb/fsck/fsck.py +++ b/nilmdb/fsck/fsck.py @@ -1,7 +1,5 @@ # -*- coding: utf-8 -*- -raise Exception("todo: fix path bytes issues") - """Check database consistency, with some ability to fix problems. This should be able to fix cases where a database gets corrupted due to unexpected system shutdown, and detect other cases that may cause @@ -13,7 +11,6 @@ import nilmdb.client.numpyclient from nilmdb.utils.interval import IntervalError from nilmdb.server.interval import Interval, IntervalSet from nilmdb.utils.printf import printf, fprintf, sprintf -from nilmdb.utils.time import timestamp_to_string from collections import defaultdict import sqlite3 @@ -21,70 +18,81 @@ import os import sys import progressbar import re -import time import shutil import pickle import numpy + class FsckError(Exception): - def __init__(self, msg = "", *args): + def __init__(self, msg="", *args): if args: msg = sprintf(msg, *args) Exception.__init__(self, msg) + + class FixableFsckError(FsckError): - def __init__(self, msg = "", *args): + def __init__(self, msg="", *args): if args: msg = sprintf(msg, *args) - FsckError.__init__(self, "%s\nThis may be fixable with \"--fix\".", msg) + FsckError.__init__(self, f'{msg}\nThis may be fixable with "--fix".') + + class RetryFsck(FsckError): pass + def log(format, *args): printf(format, *args) + def err(format, *args): fprintf(sys.stderr, format, *args) + # Decorator that retries a function if it returns a specific value -def retry_if_raised(exc, message = None, max_retries = 100): +def retry_if_raised(exc, message=None, max_retries=100): def f1(func): def f2(*args, **kwargs): for n in range(max_retries): try: return func(*args, **kwargs) - except exc as e: + except exc: if message: log("%s\n\n", message) raise Exception("Max number of retries (%d) exceeded; giving up") return f2 return f1 + class Progress(object): def __init__(self, maxval): if maxval == 0: maxval = 1 self.bar = progressbar.ProgressBar( - maxval = maxval, - widgets = [ progressbar.Percentage(), ' ', - progressbar.Bar(), ' ', - progressbar.ETA() ]) + maxval=maxval, + widgets=[progressbar.Percentage(), ' ', + progressbar.Bar(), ' ', + progressbar.ETA()]) if self.bar.term_width == 0: self.bar.term_width = 75 + def __enter__(self): self.bar.start() self.last_update = 0 return self + def __exit__(self, exc_type, exc_value, traceback): if exc_type is None: self.bar.finish() else: printf("\n") + def update(self, val): self.bar.update(val) -class Fsck(object): - def __init__(self, path, fix = False): +class Fsck(object): + def __init__(self, path, fix=False): self.basepath = path self.sqlpath = os.path.join(path, "data.sql") self.bulkpath = os.path.join(path, "data") @@ -94,7 +102,7 @@ class Fsck(object): ### Main checks @retry_if_raised(RetryFsck, "Something was fixed: restarting fsck") - def check(self, skip_data = False): + def check(self, skip_data=False): self.bulk = None self.sql = None try: @@ -164,7 +172,7 @@ class Fsck(object): "ORDER BY start_time") for r in result: if r[0] not in self.stream_path: - raise FsckError("interval ID %d not in streams", k) + raise FsckError("interval ID %d not in streams", r[0]) self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4])) log(" loading metadata\n") @@ -172,10 +180,11 @@ class Fsck(object): result = cur.execute("SELECT stream_id, key, value FROM metadata") for r in result: if r[0] not in self.stream_path: - raise FsckError("metadata ID %d not in streams", k) + raise FsckError("metadata ID %d not in streams", r[0]) if r[1] in self.stream_meta[r[0]]: - raise FsckError("duplicate metadata key '%s' for stream %d", - r[1], r[0]) + raise FsckError( + "duplicate metadata key '%s' for stream %d", + r[1], r[0]) self.stream_meta[r[0]][r[1]] = r[2] ### Check streams and basic interval overlap @@ -253,7 +262,11 @@ class Fsck(object): raise FsckError("%s: bad or unsupported bulkdata version %d", path, fmt["version"]) row_per_file = int(fmt["rows_per_file"]) + if row_per_file < 1: + raise FsckError(f"{path}: bad row_per_file {row_per_file}") files_per_dir = int(fmt["files_per_dir"]) + if files_per_dir < 1: + raise FsckError(f"{path}: bad files_per_dir {files_per_dir}") layout = fmt["layout"] if layout != self.stream_layout[sid]: raise FsckError("%s: layout mismatch %s != %s", path, @@ -267,7 +280,7 @@ class Fsck(object): # Find all directories regex = re.compile("^[0-9a-f]{4,}$") subdirs = sorted(filter(regex.search, os.listdir(bulk)), - key = lambda x: int(x, 16), reverse = True) + key=lambda x: int(x, 16), reverse=True) for subdir in subdirs: # Find all files in that dir subpath = os.path.join(bulk, subdir) @@ -325,8 +338,10 @@ class Fsck(object): try: bulk = self.bulkpath + self.stream_path[sid] tab = nilmdb.server.bulkdata.Table(bulk) + def update(x): pbar.update(done + x) + ints = self.stream_interval[sid] done += self.check_table_intervals(sid, ints, tab, update) finally: @@ -335,7 +350,7 @@ class Fsck(object): def check_table_intervals(self, sid, ints, tab, update): # look in the table to make sure we can pick out the interval's # endpoints - path = self.stream_path[sid] + path = self.stream_path[sid] # noqa: F841 unused tab.file_open.cache_remove_all() for (i, intv) in enumerate(ints): update(i) @@ -343,8 +358,8 @@ class Fsck(object): if spos == epos and spos >= 0 and spos <= tab.nrows: continue try: - srow = tab[spos] - erow = tab[epos-1] + srow = tab[spos] # noqa: F841 unused + erow = tab[epos-1] # noqa: F841 unused except Exception as e: self.fix_bad_interval(sid, intv, tab, str(e)) raise RetryFsck @@ -408,8 +423,10 @@ class Fsck(object): try: bulk = self.bulkpath + self.stream_path[sid] tab = nilmdb.server.bulkdata.Table(bulk) + def update(x): pbar.update(done + x) + ints = self.stream_interval[sid] done += self.check_table_data(sid, ints, tab, update) finally: @@ -438,11 +455,12 @@ class Fsck(object): # Get raw data, convert to NumPy arary try: - raw = tab.get_data(start, stop, binary = True) + raw = tab.get_data(start, stop, binary=True) data = numpy.fromstring(raw, dtype) except Exception as e: - raise FsckError("%s: failed to grab rows %d through %d: %s", - path, start, stop, repr(e)) + raise FsckError( + "%s: failed to grab rows %d through %d: %s", + path, start, stop, repr(e)) # Verify that timestamps are monotonic if (numpy.diff(data['timestamp']) <= 0).any(): diff --git a/nilmdb/scripts/nilmdb_fsck.py b/nilmdb/scripts/nilmdb_fsck.py index 37c841d..e7683fc 100755 --- a/nilmdb/scripts/nilmdb_fsck.py +++ b/nilmdb/scripts/nilmdb_fsck.py @@ -2,26 +2,26 @@ import nilmdb.fsck import argparse -import os -import sys + def main(): """Main entry point for the 'nilmdb-fsck' command line script""" parser = argparse.ArgumentParser( - description = 'Check database consistency', - formatter_class = argparse.ArgumentDefaultsHelpFormatter) + description='Check database consistency', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument("-v", "--version", action="version", - version = nilmdb.__version__) + version=nilmdb.__version__) parser.add_argument("-f", "--fix", action="store_true", - default=False, help = 'Fix errors when possible ' + default=False, help='Fix errors when possible ' '(which may involve removing data)') parser.add_argument("-n", "--no-data", action="store_true", - default=False, help = 'Skip the slow full-data check') - parser.add_argument('database', help = 'Database directory') + default=False, help='Skip the slow full-data check') + parser.add_argument('database', help='Database directory') args = parser.parse_args() - nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data = args.no_data) + nilmdb.fsck.Fsck(args.database, args.fix).check(skip_data=args.no_data) + if __name__ == "__main__": main() diff --git a/nilmdb/scripts/nilmdb_server.py b/nilmdb/scripts/nilmdb_server.py index 1419219..9cb7046 100755 --- a/nilmdb/scripts/nilmdb_server.py +++ b/nilmdb/scripts/nilmdb_server.py @@ -80,8 +80,8 @@ def main(): stats.print_all() try: from IPython import embed - embed(header="Use the `yappi` or `stats` object to explore " - "further, quit to exit") + embed(header="Use the `yappi` or `stats` object to " + "explore further, `quit` to exit") except ModuleNotFoundError: print("\nInstall ipython to explore further") else: diff --git a/setup.cfg b/setup.cfg index e1af4df..fbcc81c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -47,10 +47,13 @@ tag_prefix=nilmdb- parentdir_prefix=nilmdb- [flake8] -exclude=_version.py,fsck.py,nilmdb_fsck.py +exclude=_version.py extend-ignore=E731 -per-file-ignores=__init__.py:F401,E402 serializer.py:E722 mustclose.py:E722 +per-file-ignores=__init__.py:F401,E402 \ + serializer.py:E722 \ + mustclose.py:E722 \ + fsck.py:E266 [pylint] -ignore=_version.py,fsck.py,nilmdb_fsck.py +ignore=_version.py disable=C0103,C0111,R0913,R0914