|
|
@@ -12,13 +12,25 @@ from nilmdb.utils.time import timestamp_to_string |
|
|
|
from collections import defaultdict |
|
|
|
import sqlite3 |
|
|
|
import os |
|
|
|
import sys |
|
|
|
import progressbar |
|
|
|
import re |
|
|
|
import time |
|
|
|
import shutil |
|
|
|
import cPickle as pickle |
|
|
|
|
|
|
|
class FsckError(Exception): |
|
|
|
def __init__(self, format, *args): |
|
|
|
Exception.__init__(self, sprintf(format, *args)) |
|
|
|
def __init__(self, msg = "", *args): |
|
|
|
if args: |
|
|
|
msg = sprintf(msg, *args) |
|
|
|
Exception.__init__(self, msg) |
|
|
|
class FixableFsckError(FsckError): |
|
|
|
def __init__(self, msg = "", *args): |
|
|
|
if args: |
|
|
|
msg = sprintf(msg, *args) |
|
|
|
FsckError.__init__(self, "%s\nThis may be fixable with \"-y\".", msg) |
|
|
|
class RetryFsck(FsckError): |
|
|
|
pass |
|
|
|
|
|
|
|
def log(format, *args): |
|
|
|
printf(format, *args) |
|
|
@@ -26,6 +38,19 @@ def log(format, *args): |
|
|
|
def err(format, *args): |
|
|
|
fprintf(sys.stderr, format, *args) |
|
|
|
|
|
|
|
# Decorator that retries a function if it returns a specific value |
|
|
|
def retry_if_raised(exc, message = None): |
|
|
|
def f1(func): |
|
|
|
def f2(*args, **kwargs): |
|
|
|
while True: |
|
|
|
try: |
|
|
|
return func(*args, **kwargs) |
|
|
|
except exc as e: |
|
|
|
if message: |
|
|
|
log("%s\n\n", message) |
|
|
|
return f2 |
|
|
|
return f1 |
|
|
|
|
|
|
|
class Progress(object): |
|
|
|
def __init__(self, maxval): |
|
|
|
self.bar = progressbar.ProgressBar(maxval = maxval) |
|
|
@@ -42,31 +67,30 @@ class Progress(object): |
|
|
|
printf("\n") |
|
|
|
def update(self, val): |
|
|
|
self.bar.update(val) |
|
|
|
#now = time.time() |
|
|
|
#if now - self.last_update < 0.005: |
|
|
|
# time.sleep(0.005) |
|
|
|
#self.last_update = now |
|
|
|
|
|
|
|
class Fsck(object): |
|
|
|
|
|
|
|
def __init__(self, path): |
|
|
|
def __init__(self, path, fix = False): |
|
|
|
self.basepath = path |
|
|
|
self.sqlpath = os.path.join(path, "data.sql") |
|
|
|
self.bulkpath = os.path.join(path, "data") |
|
|
|
self.bulklock = os.path.join(path, "data.lock") |
|
|
|
self.fix = fix |
|
|
|
|
|
|
|
@retry_if_raised(RetryFsck, "Something was fixed: restarting fsck") |
|
|
|
def check(self): |
|
|
|
self.check_paths() |
|
|
|
self.check_sql() |
|
|
|
self.check_streams() |
|
|
|
self.check_intervals() |
|
|
|
log("ok\n") |
|
|
|
|
|
|
|
def check_paths(self): |
|
|
|
log("checking paths\n") |
|
|
|
if not os.path.isfile(self.sqlpath): |
|
|
|
raise FsckError("SQL database missing") |
|
|
|
raise FsckError("SQL database missing (%s)", self.sqlpath) |
|
|
|
if not os.path.isdir(self.bulkpath): |
|
|
|
raise FsckError("Bulk data directory missing") |
|
|
|
raise FsckError("Bulk data directory missing (%s)", self.bulkpath) |
|
|
|
with open(self.bulklock, "w") as lockfile: |
|
|
|
if not nilmdb.utils.lock.exclusive_lock(lockfile): |
|
|
|
raise FsckError('database already locked by another process') |
|
|
@@ -116,8 +140,8 @@ class Fsck(object): |
|
|
|
self.stream_meta[r[0]][r[1]] = r[2] |
|
|
|
|
|
|
|
def check_streams(self): |
|
|
|
log("checking streams\n") |
|
|
|
ids = self.stream_path.keys() |
|
|
|
log("checking %d streams\n", len(ids)) |
|
|
|
with Progress(len(ids)) as pbar: |
|
|
|
for i, sid in enumerate(ids): |
|
|
|
pbar.update(i) |
|
|
@@ -152,7 +176,7 @@ class Fsck(object): |
|
|
|
timeiset += new |
|
|
|
except IntervalError: |
|
|
|
raise FsckError("%s: overlap in intervals:\n" |
|
|
|
"set: %s\nnew: %s\n", |
|
|
|
"set: %s\nnew: %s", |
|
|
|
path, str(timeiset), str(new)) |
|
|
|
if spos != epos: |
|
|
|
new = Interval(spos, epos) |
|
|
@@ -160,14 +184,13 @@ class Fsck(object): |
|
|
|
posiset += new |
|
|
|
except IntervalError: |
|
|
|
raise FsckError("%s: overlap in file offsets:\n" |
|
|
|
"set: %s\nnew: %s\n", |
|
|
|
"set: %s\nnew: %s", |
|
|
|
path, str(posiset), str(new)) |
|
|
|
|
|
|
|
# check bulkdata |
|
|
|
self.check_bulkdata(sid, path, bulk) |
|
|
|
|
|
|
|
continue |
|
|
|
# verify we can can open it with bulkdata |
|
|
|
# Check that we can open bulkdata |
|
|
|
try: |
|
|
|
tab = None |
|
|
|
try: |
|
|
@@ -175,11 +198,43 @@ class Fsck(object): |
|
|
|
except Exception as e: |
|
|
|
raise FsckError("%s: can't open bulkdata: %s", |
|
|
|
path, str(e)) |
|
|
|
self.check_bulkdata(path, tab) |
|
|
|
finally: |
|
|
|
if tab: |
|
|
|
tab.close() |
|
|
|
|
|
|
|
def fix_empty_subdir(self, subpath): |
|
|
|
msg = sprintf("bulkdata path %s is missing data files", subpath) |
|
|
|
if not self.fix: |
|
|
|
raise FixableFsckError(msg) |
|
|
|
# Try to fix it by just deleting whatever is present, |
|
|
|
# as long as it's only ".removed" files. |
|
|
|
err("\n%s\n", msg) |
|
|
|
for fn in os.listdir(subpath): |
|
|
|
if not fn.endswith(".removed"): |
|
|
|
raise FsckError("can't fix automatically: please manually " |
|
|
|
"remove the file %s and try again", |
|
|
|
os.path.join(subpath, fn)) |
|
|
|
# Remove the whole thing |
|
|
|
err("Removing empty subpath\n") |
|
|
|
shutil.rmtree(subpath) |
|
|
|
raise RetryFsck |
|
|
|
|
|
|
|
def fix_bad_filesize(self, path, filepath, offset, row_size): |
|
|
|
extra = offset % row_size |
|
|
|
msg = sprintf("%s: size of file %s (%d) is not a multiple" + |
|
|
|
" of row size (%d): %d extra bytes present", |
|
|
|
path, filepath, offset, row_size, extra) |
|
|
|
if not self.fix: |
|
|
|
raise FixableFsckError(msg) |
|
|
|
# Try to fix it by just truncating the file |
|
|
|
err("\n%s\n", msg) |
|
|
|
newsize = offset - extra |
|
|
|
err("Truncating file to %d bytes and retrying\n", newsize) |
|
|
|
with open(filepath, "r+b") as f: |
|
|
|
f.truncate(newsize) |
|
|
|
raise RetryFsck |
|
|
|
|
|
|
|
@retry_if_raised(RetryFsck) |
|
|
|
def check_bulkdata(self, sid, path, bulk): |
|
|
|
with open(os.path.join(bulk, "_format"), "rb") as f: |
|
|
|
fmt = pickle.load(f) |
|
|
@@ -192,3 +247,61 @@ class Fsck(object): |
|
|
|
if layout != self.stream_layout[sid]: |
|
|
|
raise FsckError("%s: layout mismatch %s != %s", path, |
|
|
|
layout, self.stream_layout[sid]) |
|
|
|
|
|
|
|
# Every file should have a size that's the multiple of the row size |
|
|
|
rkt = nilmdb.server.rocket.Rocket(layout, None) |
|
|
|
row_size = rkt.binary_size |
|
|
|
rkt.close() |
|
|
|
|
|
|
|
# Find all directories |
|
|
|
regex = re.compile("^[0-9a-f]{4,}$") |
|
|
|
subdirs = sorted(filter(regex.search, os.listdir(bulk)), |
|
|
|
key = lambda x: int(x, 16), reverse = True) |
|
|
|
for subdir in subdirs: |
|
|
|
# Find all files in that dir |
|
|
|
subpath = os.path.join(bulk, subdir) |
|
|
|
files = filter(regex.search, os.listdir(subpath)) |
|
|
|
if not files: |
|
|
|
self.fix_empty_subdir(subpath) |
|
|
|
raise RetryFsck |
|
|
|
# Verify that their size is a multiple of the row size |
|
|
|
for filename in files: |
|
|
|
filepath = os.path.join(subpath, filename) |
|
|
|
offset = os.path.getsize(filepath) |
|
|
|
if offset % row_size: |
|
|
|
self.fix_bad_filesize(path, filepath, offset, row_size) |
|
|
|
|
|
|
|
def check_intervals(self): |
|
|
|
total_ints = sum(len(x) for x in self.stream_interval.values()) |
|
|
|
log("checking %d intervals\n", total_ints) |
|
|
|
checked = 0 |
|
|
|
with Progress(total_ints) as pbar: |
|
|
|
for sid in self.stream_interval: |
|
|
|
try: |
|
|
|
bulk = self.bulkpath + self.stream_path[sid] |
|
|
|
tab = nilmdb.server.bulkdata.Table(bulk) |
|
|
|
def update(x): |
|
|
|
pbar.update(checked + x) |
|
|
|
ints = self.stream_interval[sid] |
|
|
|
path = self.stream_path[sid] |
|
|
|
self.check_table_intervals(path, ints, tab, update) |
|
|
|
checked += len(ints) |
|
|
|
finally: |
|
|
|
tab.close() |
|
|
|
|
|
|
|
def check_table_intervals(self, path, ints, tab, update): |
|
|
|
# look in the table to make sure we can pick out the interval's |
|
|
|
# endpoints |
|
|
|
tab.file_open.cache_remove_all() |
|
|
|
for (i, intv) in enumerate(ints): |
|
|
|
(stime, etime, spos, epos) = intv |
|
|
|
update(i) |
|
|
|
if spos == epos: |
|
|
|
continue |
|
|
|
try: |
|
|
|
srow = tab[spos] |
|
|
|
erow = tab[epos-1] |
|
|
|
except Exception as e: |
|
|
|
msg = sprintf("%s: interval %s error accessing rows: %s", |
|
|
|
path, str(intv), str(e)) |
|
|
|
raise FsckError(msg) |