|
|
@@ -1,7 +1,5 @@ |
|
|
|
# -*- coding: utf-8 -*- |
|
|
|
|
|
|
|
raise Exception("todo: fix path bytes issues") |
|
|
|
|
|
|
|
"""Check database consistency, with some ability to fix problems. |
|
|
|
This should be able to fix cases where a database gets corrupted due |
|
|
|
to unexpected system shutdown, and detect other cases that may cause |
|
|
@@ -13,7 +11,6 @@ import nilmdb.client.numpyclient |
|
|
|
from nilmdb.utils.interval import IntervalError |
|
|
|
from nilmdb.server.interval import Interval, IntervalSet |
|
|
|
from nilmdb.utils.printf import printf, fprintf, sprintf |
|
|
|
from nilmdb.utils.time import timestamp_to_string |
|
|
|
|
|
|
|
from collections import defaultdict |
|
|
|
import sqlite3 |
|
|
@@ -21,70 +18,81 @@ import os |
|
|
|
import sys |
|
|
|
import progressbar |
|
|
|
import re |
|
|
|
import time |
|
|
|
import shutil |
|
|
|
import pickle |
|
|
|
import numpy |
|
|
|
|
|
|
|
|
|
|
|
class FsckError(Exception): |
|
|
|
def __init__(self, msg = "", *args): |
|
|
|
def __init__(self, msg="", *args): |
|
|
|
if args: |
|
|
|
msg = sprintf(msg, *args) |
|
|
|
Exception.__init__(self, msg) |
|
|
|
|
|
|
|
|
|
|
|
class FixableFsckError(FsckError): |
|
|
|
def __init__(self, msg = "", *args): |
|
|
|
def __init__(self, msg="", *args): |
|
|
|
if args: |
|
|
|
msg = sprintf(msg, *args) |
|
|
|
FsckError.__init__(self, "%s\nThis may be fixable with \"--fix\".", msg) |
|
|
|
FsckError.__init__(self, f'{msg}\nThis may be fixable with "--fix".') |
|
|
|
|
|
|
|
|
|
|
|
class RetryFsck(FsckError): |
|
|
|
pass |
|
|
|
|
|
|
|
|
|
|
|
def log(format, *args): |
|
|
|
printf(format, *args) |
|
|
|
|
|
|
|
|
|
|
|
def err(format, *args): |
|
|
|
fprintf(sys.stderr, format, *args) |
|
|
|
|
|
|
|
|
|
|
|
# Decorator that retries a function if it returns a specific value |
|
|
|
def retry_if_raised(exc, message = None, max_retries = 100): |
|
|
|
def retry_if_raised(exc, message=None, max_retries=100): |
|
|
|
def f1(func): |
|
|
|
def f2(*args, **kwargs): |
|
|
|
for n in range(max_retries): |
|
|
|
try: |
|
|
|
return func(*args, **kwargs) |
|
|
|
except exc as e: |
|
|
|
except exc: |
|
|
|
if message: |
|
|
|
log("%s\n\n", message) |
|
|
|
raise Exception("Max number of retries (%d) exceeded; giving up") |
|
|
|
return f2 |
|
|
|
return f1 |
|
|
|
|
|
|
|
|
|
|
|
class Progress(object): |
|
|
|
def __init__(self, maxval): |
|
|
|
if maxval == 0: |
|
|
|
maxval = 1 |
|
|
|
self.bar = progressbar.ProgressBar( |
|
|
|
maxval = maxval, |
|
|
|
widgets = [ progressbar.Percentage(), ' ', |
|
|
|
progressbar.Bar(), ' ', |
|
|
|
progressbar.ETA() ]) |
|
|
|
maxval=maxval, |
|
|
|
widgets=[progressbar.Percentage(), ' ', |
|
|
|
progressbar.Bar(), ' ', |
|
|
|
progressbar.ETA()]) |
|
|
|
if self.bar.term_width == 0: |
|
|
|
self.bar.term_width = 75 |
|
|
|
|
|
|
|
def __enter__(self): |
|
|
|
self.bar.start() |
|
|
|
self.last_update = 0 |
|
|
|
return self |
|
|
|
|
|
|
|
def __exit__(self, exc_type, exc_value, traceback): |
|
|
|
if exc_type is None: |
|
|
|
self.bar.finish() |
|
|
|
else: |
|
|
|
printf("\n") |
|
|
|
|
|
|
|
def update(self, val): |
|
|
|
self.bar.update(val) |
|
|
|
|
|
|
|
class Fsck(object): |
|
|
|
|
|
|
|
def __init__(self, path, fix = False): |
|
|
|
class Fsck(object): |
|
|
|
def __init__(self, path, fix=False): |
|
|
|
self.basepath = path |
|
|
|
self.sqlpath = os.path.join(path, "data.sql") |
|
|
|
self.bulkpath = os.path.join(path, "data") |
|
|
@@ -94,7 +102,7 @@ class Fsck(object): |
|
|
|
### Main checks |
|
|
|
|
|
|
|
@retry_if_raised(RetryFsck, "Something was fixed: restarting fsck") |
|
|
|
def check(self, skip_data = False): |
|
|
|
def check(self, skip_data=False): |
|
|
|
self.bulk = None |
|
|
|
self.sql = None |
|
|
|
try: |
|
|
@@ -164,7 +172,7 @@ class Fsck(object): |
|
|
|
"ORDER BY start_time") |
|
|
|
for r in result: |
|
|
|
if r[0] not in self.stream_path: |
|
|
|
raise FsckError("interval ID %d not in streams", k) |
|
|
|
raise FsckError("interval ID %d not in streams", r[0]) |
|
|
|
self.stream_interval[r[0]].append((r[1], r[2], r[3], r[4])) |
|
|
|
|
|
|
|
log(" loading metadata\n") |
|
|
@@ -172,10 +180,11 @@ class Fsck(object): |
|
|
|
result = cur.execute("SELECT stream_id, key, value FROM metadata") |
|
|
|
for r in result: |
|
|
|
if r[0] not in self.stream_path: |
|
|
|
raise FsckError("metadata ID %d not in streams", k) |
|
|
|
raise FsckError("metadata ID %d not in streams", r[0]) |
|
|
|
if r[1] in self.stream_meta[r[0]]: |
|
|
|
raise FsckError("duplicate metadata key '%s' for stream %d", |
|
|
|
r[1], r[0]) |
|
|
|
raise FsckError( |
|
|
|
"duplicate metadata key '%s' for stream %d", |
|
|
|
r[1], r[0]) |
|
|
|
self.stream_meta[r[0]][r[1]] = r[2] |
|
|
|
|
|
|
|
### Check streams and basic interval overlap |
|
|
@@ -253,7 +262,11 @@ class Fsck(object): |
|
|
|
raise FsckError("%s: bad or unsupported bulkdata version %d", |
|
|
|
path, fmt["version"]) |
|
|
|
row_per_file = int(fmt["rows_per_file"]) |
|
|
|
if row_per_file < 1: |
|
|
|
raise FsckError(f"{path}: bad row_per_file {row_per_file}") |
|
|
|
files_per_dir = int(fmt["files_per_dir"]) |
|
|
|
if files_per_dir < 1: |
|
|
|
raise FsckError(f"{path}: bad files_per_dir {files_per_dir}") |
|
|
|
layout = fmt["layout"] |
|
|
|
if layout != self.stream_layout[sid]: |
|
|
|
raise FsckError("%s: layout mismatch %s != %s", path, |
|
|
@@ -267,7 +280,7 @@ class Fsck(object): |
|
|
|
# Find all directories |
|
|
|
regex = re.compile("^[0-9a-f]{4,}$") |
|
|
|
subdirs = sorted(filter(regex.search, os.listdir(bulk)), |
|
|
|
key = lambda x: int(x, 16), reverse = True) |
|
|
|
key=lambda x: int(x, 16), reverse=True) |
|
|
|
for subdir in subdirs: |
|
|
|
# Find all files in that dir |
|
|
|
subpath = os.path.join(bulk, subdir) |
|
|
@@ -325,8 +338,10 @@ class Fsck(object): |
|
|
|
try: |
|
|
|
bulk = self.bulkpath + self.stream_path[sid] |
|
|
|
tab = nilmdb.server.bulkdata.Table(bulk) |
|
|
|
|
|
|
|
def update(x): |
|
|
|
pbar.update(done + x) |
|
|
|
|
|
|
|
ints = self.stream_interval[sid] |
|
|
|
done += self.check_table_intervals(sid, ints, tab, update) |
|
|
|
finally: |
|
|
@@ -335,7 +350,7 @@ class Fsck(object): |
|
|
|
def check_table_intervals(self, sid, ints, tab, update): |
|
|
|
# look in the table to make sure we can pick out the interval's |
|
|
|
# endpoints |
|
|
|
path = self.stream_path[sid] |
|
|
|
path = self.stream_path[sid] # noqa: F841 unused |
|
|
|
tab.file_open.cache_remove_all() |
|
|
|
for (i, intv) in enumerate(ints): |
|
|
|
update(i) |
|
|
@@ -343,8 +358,8 @@ class Fsck(object): |
|
|
|
if spos == epos and spos >= 0 and spos <= tab.nrows: |
|
|
|
continue |
|
|
|
try: |
|
|
|
srow = tab[spos] |
|
|
|
erow = tab[epos-1] |
|
|
|
srow = tab[spos] # noqa: F841 unused |
|
|
|
erow = tab[epos-1] # noqa: F841 unused |
|
|
|
except Exception as e: |
|
|
|
self.fix_bad_interval(sid, intv, tab, str(e)) |
|
|
|
raise RetryFsck |
|
|
@@ -408,8 +423,10 @@ class Fsck(object): |
|
|
|
try: |
|
|
|
bulk = self.bulkpath + self.stream_path[sid] |
|
|
|
tab = nilmdb.server.bulkdata.Table(bulk) |
|
|
|
|
|
|
|
def update(x): |
|
|
|
pbar.update(done + x) |
|
|
|
|
|
|
|
ints = self.stream_interval[sid] |
|
|
|
done += self.check_table_data(sid, ints, tab, update) |
|
|
|
finally: |
|
|
@@ -438,11 +455,12 @@ class Fsck(object): |
|
|
|
|
|
|
|
# Get raw data, convert to NumPy arary |
|
|
|
try: |
|
|
|
raw = tab.get_data(start, stop, binary = True) |
|
|
|
raw = tab.get_data(start, stop, binary=True) |
|
|
|
data = numpy.fromstring(raw, dtype) |
|
|
|
except Exception as e: |
|
|
|
raise FsckError("%s: failed to grab rows %d through %d: %s", |
|
|
|
path, start, stop, repr(e)) |
|
|
|
raise FsckError( |
|
|
|
"%s: failed to grab rows %d through %d: %s", |
|
|
|
path, start, stop, repr(e)) |
|
|
|
|
|
|
|
# Verify that timestamps are monotonic |
|
|
|
if (numpy.diff(data['timestamp']) <= 0).any(): |
|
|
|