More fsck tools, including fixes
This commit is contained in:
parent
71cd7ed9b7
commit
0f745b3047
4
Makefile
4
Makefile
|
@ -1,5 +1,5 @@
|
|||
# By default, run the tests.
|
||||
all: test
|
||||
all: fscktest
|
||||
|
||||
version:
|
||||
python setup.py version
|
||||
|
@ -25,7 +25,7 @@ lint:
|
|||
|
||||
fscktest:
|
||||
# python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/wsgi/db').check()"
|
||||
python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/mnt/bucket/mnt/sharon/data/db').check()"
|
||||
python -c "import nilmdb.fsck; nilmdb.fsck.Fsck('/home/jim/mnt/bucket/mnt/sharon/data/db', True).check()"
|
||||
|
||||
test:
|
||||
ifeq ($(INSIDE_EMACS), t)
|
||||
|
|
|
@ -1 +0,0 @@
|
|||
jim@pilot.lees.18066:1373305995
|
|
@ -12,13 +12,25 @@ from nilmdb.utils.time import timestamp_to_string
|
|||
from collections import defaultdict
|
||||
import sqlite3
|
||||
import os
|
||||
import sys
|
||||
import progressbar
|
||||
import re
|
||||
import time
|
||||
import shutil
|
||||
import cPickle as pickle
|
||||
|
||||
class FsckError(Exception):
|
||||
def __init__(self, format, *args):
|
||||
Exception.__init__(self, sprintf(format, *args))
|
||||
def __init__(self, msg = "", *args):
|
||||
if args:
|
||||
msg = sprintf(msg, *args)
|
||||
Exception.__init__(self, msg)
|
||||
class FixableFsckError(FsckError):
|
||||
def __init__(self, msg = "", *args):
|
||||
if args:
|
||||
msg = sprintf(msg, *args)
|
||||
FsckError.__init__(self, "%s\nThis may be fixable with \"-y\".", msg)
|
||||
class RetryFsck(FsckError):
|
||||
pass
|
||||
|
||||
def log(format, *args):
|
||||
printf(format, *args)
|
||||
|
@ -26,6 +38,19 @@ def log(format, *args):
|
|||
def err(format, *args):
|
||||
fprintf(sys.stderr, format, *args)
|
||||
|
||||
# Decorator that retries a function if it returns a specific value
|
||||
def retry_if_raised(exc, message = None):
|
||||
def f1(func):
|
||||
def f2(*args, **kwargs):
|
||||
while True:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except exc as e:
|
||||
if message:
|
||||
log("%s\n\n", message)
|
||||
return f2
|
||||
return f1
|
||||
|
||||
class Progress(object):
|
||||
def __init__(self, maxval):
|
||||
self.bar = progressbar.ProgressBar(maxval = maxval)
|
||||
|
@ -42,31 +67,30 @@ class Progress(object):
|
|||
printf("\n")
|
||||
def update(self, val):
|
||||
self.bar.update(val)
|
||||
#now = time.time()
|
||||
#if now - self.last_update < 0.005:
|
||||
# time.sleep(0.005)
|
||||
#self.last_update = now
|
||||
|
||||
class Fsck(object):
|
||||
|
||||
def __init__(self, path):
|
||||
def __init__(self, path, fix = False):
|
||||
self.basepath = path
|
||||
self.sqlpath = os.path.join(path, "data.sql")
|
||||
self.bulkpath = os.path.join(path, "data")
|
||||
self.bulklock = os.path.join(path, "data.lock")
|
||||
self.fix = fix
|
||||
|
||||
@retry_if_raised(RetryFsck, "Something was fixed: restarting fsck")
|
||||
def check(self):
|
||||
self.check_paths()
|
||||
self.check_sql()
|
||||
self.check_streams()
|
||||
self.check_intervals()
|
||||
log("ok\n")
|
||||
|
||||
def check_paths(self):
|
||||
log("checking paths\n")
|
||||
if not os.path.isfile(self.sqlpath):
|
||||
raise FsckError("SQL database missing")
|
||||
raise FsckError("SQL database missing (%s)", self.sqlpath)
|
||||
if not os.path.isdir(self.bulkpath):
|
||||
raise FsckError("Bulk data directory missing")
|
||||
raise FsckError("Bulk data directory missing (%s)", self.bulkpath)
|
||||
with open(self.bulklock, "w") as lockfile:
|
||||
if not nilmdb.utils.lock.exclusive_lock(lockfile):
|
||||
raise FsckError('database already locked by another process')
|
||||
|
@ -116,8 +140,8 @@ class Fsck(object):
|
|||
self.stream_meta[r[0]][r[1]] = r[2]
|
||||
|
||||
def check_streams(self):
|
||||
log("checking streams\n")
|
||||
ids = self.stream_path.keys()
|
||||
log("checking %d streams\n", len(ids))
|
||||
with Progress(len(ids)) as pbar:
|
||||
for i, sid in enumerate(ids):
|
||||
pbar.update(i)
|
||||
|
@ -152,7 +176,7 @@ class Fsck(object):
|
|||
timeiset += new
|
||||
except IntervalError:
|
||||
raise FsckError("%s: overlap in intervals:\n"
|
||||
"set: %s\nnew: %s\n",
|
||||
"set: %s\nnew: %s",
|
||||
path, str(timeiset), str(new))
|
||||
if spos != epos:
|
||||
new = Interval(spos, epos)
|
||||
|
@ -160,14 +184,13 @@ class Fsck(object):
|
|||
posiset += new
|
||||
except IntervalError:
|
||||
raise FsckError("%s: overlap in file offsets:\n"
|
||||
"set: %s\nnew: %s\n",
|
||||
"set: %s\nnew: %s",
|
||||
path, str(posiset), str(new))
|
||||
|
||||
# check bulkdata
|
||||
self.check_bulkdata(sid, path, bulk)
|
||||
|
||||
continue
|
||||
# verify we can can open it with bulkdata
|
||||
# Check that we can open bulkdata
|
||||
try:
|
||||
tab = None
|
||||
try:
|
||||
|
@ -175,11 +198,43 @@ class Fsck(object):
|
|||
except Exception as e:
|
||||
raise FsckError("%s: can't open bulkdata: %s",
|
||||
path, str(e))
|
||||
self.check_bulkdata(path, tab)
|
||||
finally:
|
||||
if tab:
|
||||
tab.close()
|
||||
|
||||
def fix_empty_subdir(self, subpath):
|
||||
msg = sprintf("bulkdata path %s is missing data files", subpath)
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
# Try to fix it by just deleting whatever is present,
|
||||
# as long as it's only ".removed" files.
|
||||
err("\n%s\n", msg)
|
||||
for fn in os.listdir(subpath):
|
||||
if not fn.endswith(".removed"):
|
||||
raise FsckError("can't fix automatically: please manually "
|
||||
"remove the file %s and try again",
|
||||
os.path.join(subpath, fn))
|
||||
# Remove the whole thing
|
||||
err("Removing empty subpath\n")
|
||||
shutil.rmtree(subpath)
|
||||
raise RetryFsck
|
||||
|
||||
def fix_bad_filesize(self, path, filepath, offset, row_size):
|
||||
extra = offset % row_size
|
||||
msg = sprintf("%s: size of file %s (%d) is not a multiple" +
|
||||
" of row size (%d): %d extra bytes present",
|
||||
path, filepath, offset, row_size, extra)
|
||||
if not self.fix:
|
||||
raise FixableFsckError(msg)
|
||||
# Try to fix it by just truncating the file
|
||||
err("\n%s\n", msg)
|
||||
newsize = offset - extra
|
||||
err("Truncating file to %d bytes and retrying\n", newsize)
|
||||
with open(filepath, "r+b") as f:
|
||||
f.truncate(newsize)
|
||||
raise RetryFsck
|
||||
|
||||
@retry_if_raised(RetryFsck)
|
||||
def check_bulkdata(self, sid, path, bulk):
|
||||
with open(os.path.join(bulk, "_format"), "rb") as f:
|
||||
fmt = pickle.load(f)
|
||||
|
@ -192,3 +247,61 @@ class Fsck(object):
|
|||
if layout != self.stream_layout[sid]:
|
||||
raise FsckError("%s: layout mismatch %s != %s", path,
|
||||
layout, self.stream_layout[sid])
|
||||
|
||||
# Every file should have a size that's the multiple of the row size
|
||||
rkt = nilmdb.server.rocket.Rocket(layout, None)
|
||||
row_size = rkt.binary_size
|
||||
rkt.close()
|
||||
|
||||
# Find all directories
|
||||
regex = re.compile("^[0-9a-f]{4,}$")
|
||||
subdirs = sorted(filter(regex.search, os.listdir(bulk)),
|
||||
key = lambda x: int(x, 16), reverse = True)
|
||||
for subdir in subdirs:
|
||||
# Find all files in that dir
|
||||
subpath = os.path.join(bulk, subdir)
|
||||
files = filter(regex.search, os.listdir(subpath))
|
||||
if not files:
|
||||
self.fix_empty_subdir(subpath)
|
||||
raise RetryFsck
|
||||
# Verify that their size is a multiple of the row size
|
||||
for filename in files:
|
||||
filepath = os.path.join(subpath, filename)
|
||||
offset = os.path.getsize(filepath)
|
||||
if offset % row_size:
|
||||
self.fix_bad_filesize(path, filepath, offset, row_size)
|
||||
|
||||
def check_intervals(self):
|
||||
total_ints = sum(len(x) for x in self.stream_interval.values())
|
||||
log("checking %d intervals\n", total_ints)
|
||||
checked = 0
|
||||
with Progress(total_ints) as pbar:
|
||||
for sid in self.stream_interval:
|
||||
try:
|
||||
bulk = self.bulkpath + self.stream_path[sid]
|
||||
tab = nilmdb.server.bulkdata.Table(bulk)
|
||||
def update(x):
|
||||
pbar.update(checked + x)
|
||||
ints = self.stream_interval[sid]
|
||||
path = self.stream_path[sid]
|
||||
self.check_table_intervals(path, ints, tab, update)
|
||||
checked += len(ints)
|
||||
finally:
|
||||
tab.close()
|
||||
|
||||
def check_table_intervals(self, path, ints, tab, update):
|
||||
# look in the table to make sure we can pick out the interval's
|
||||
# endpoints
|
||||
tab.file_open.cache_remove_all()
|
||||
for (i, intv) in enumerate(ints):
|
||||
(stime, etime, spos, epos) = intv
|
||||
update(i)
|
||||
if spos == epos:
|
||||
continue
|
||||
try:
|
||||
srow = tab[spos]
|
||||
erow = tab[epos-1]
|
||||
except Exception as e:
|
||||
msg = sprintf("%s: interval %s error accessing rows: %s",
|
||||
path, str(intv), str(e))
|
||||
raise FsckError(msg)
|
||||
|
|
|
@ -13,11 +13,12 @@ def main():
|
|||
formatter_class = argparse.ArgumentDefaultsHelpFormatter)
|
||||
parser.add_argument("-V", "--version", action="version",
|
||||
version = nilmdb.__version__)
|
||||
parser.add_argument('-d', '--database', help = 'Database directory',
|
||||
default = "./db")
|
||||
parser.add_argument('-y', dest="fix", action="store_true",
|
||||
default=False, help = 'Fix errors')
|
||||
parser.add_argument('database', help = 'Database directory')
|
||||
args = parser.parse_args()
|
||||
|
||||
nilmdb.fsck.Fsck(args.database).check()
|
||||
nilmdb.fsck.Fsck(args.database).check(args.fix)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
Loading…
Reference in New Issue
Block a user