Compare commits
5 Commits
b6bba16505
...
7538c6201b
Author | SHA1 | Date | |
---|---|---|---|
7538c6201b | |||
4d9a106ca1 | |||
e90a79ddad | |||
7056c5b4ec | |||
df4e7f0967 |
|
@ -52,7 +52,7 @@ def err(format, *args):
|
||||||
|
|
||||||
|
|
||||||
# Decorator that retries a function if it returns a specific value
|
# Decorator that retries a function if it returns a specific value
|
||||||
def retry_if_raised(exc, message=None, max_retries=100):
|
def retry_if_raised(exc, message=None, max_retries=1000):
|
||||||
def f1(func):
|
def f1(func):
|
||||||
def f2(*args, **kwargs):
|
def f2(*args, **kwargs):
|
||||||
for n in range(max_retries):
|
for n in range(max_retries):
|
||||||
|
@ -60,7 +60,7 @@ def retry_if_raised(exc, message=None, max_retries=100):
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
except exc:
|
except exc:
|
||||||
if message:
|
if message:
|
||||||
log("%s\n\n", message)
|
log(f"{message} ({n+1})\n\n")
|
||||||
raise Exception("Max number of retries (%d) exceeded; giving up" %
|
raise Exception("Max number of retries (%d) exceeded; giving up" %
|
||||||
max_retries)
|
max_retries)
|
||||||
return f2
|
return f2
|
||||||
|
@ -238,9 +238,7 @@ class Fsck(object):
|
||||||
try:
|
try:
|
||||||
posiset += new
|
posiset += new
|
||||||
except IntervalError:
|
except IntervalError:
|
||||||
raise FsckError("%s: overlap in file offsets:\n"
|
self.fix_row_overlap(sid, path, posiset, new)
|
||||||
"set: %s\nnew: %s",
|
|
||||||
path, str(posiset), str(new))
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Check bulkdata
|
# Check bulkdata
|
||||||
|
@ -268,6 +266,48 @@ class Fsck(object):
|
||||||
path, str(e))
|
path, str(e))
|
||||||
tab.close()
|
tab.close()
|
||||||
|
|
||||||
|
|
||||||
|
def fix_row_overlap(self, sid, path, existing, new):
|
||||||
|
# If the file rows (spos, epos) overlap in the interval table,
|
||||||
|
# and the overlapping ranges look like this:
|
||||||
|
# A --------- C
|
||||||
|
# B -------- D
|
||||||
|
# Then we can try changing the first interval to go from
|
||||||
|
# A to B instead.
|
||||||
|
msg = (f"{path}: overlap in file offsets:\n"
|
||||||
|
f"existing ranges: {existing}\n"
|
||||||
|
f"overlapping interval: {new}")
|
||||||
|
if not self.fix:
|
||||||
|
raise FixableFsckError(msg)
|
||||||
|
err(f"\n{msg}\nSeeing if we can truncate one of them...\n")
|
||||||
|
|
||||||
|
# See if there'e exactly one interval that overlaps the
|
||||||
|
# conflicting one in the right way
|
||||||
|
match = None
|
||||||
|
for intv in self.stream_interval[sid]:
|
||||||
|
(stime, etime, spos, epos) = intv
|
||||||
|
if spos < new.start and epos > new.start:
|
||||||
|
if match:
|
||||||
|
err(f"no, more than one interval matched:\n"
|
||||||
|
f"{intv}\n{match}\n")
|
||||||
|
raise FsckError(f"{path}: unfixable overlap")
|
||||||
|
match = intv
|
||||||
|
if match is None:
|
||||||
|
err(f"no intervals overlapped in the right way\n")
|
||||||
|
raise FsckError(f"{path}: unfixable overlap")
|
||||||
|
|
||||||
|
# Truncate the file position
|
||||||
|
err(f"truncating {match}\n")
|
||||||
|
with self.sql:
|
||||||
|
cur = self.sql.cursor()
|
||||||
|
cur.execute("UPDATE ranges SET end_pos=? "
|
||||||
|
"WHERE stream_id=? AND start_time=? AND "
|
||||||
|
"end_time=? AND start_pos=? AND end_pos=?",
|
||||||
|
(new.start, sid, *match))
|
||||||
|
if cur.rowcount != 1: # pragma: no cover (shouldn't fail)
|
||||||
|
raise FsckError("failed to fix SQL database")
|
||||||
|
raise RetryFsck
|
||||||
|
|
||||||
### Check that bulkdata is good enough to be opened
|
### Check that bulkdata is good enough to be opened
|
||||||
|
|
||||||
@retry_if_raised(RetryFsck)
|
@retry_if_raised(RetryFsck)
|
||||||
|
@ -438,8 +478,8 @@ class Fsck(object):
|
||||||
|
|
||||||
# Otherwise, the only hope is to delete the interval entirely.
|
# Otherwise, the only hope is to delete the interval entirely.
|
||||||
err("*** Deleting the entire interval from SQL.\n")
|
err("*** Deleting the entire interval from SQL.\n")
|
||||||
err("This may leave stale data on disk. To fix that, copy all\n")
|
err("This may leave stale data on disk. To fix that, copy all "
|
||||||
err("data from this stream to a new stream using nilm-copy, then\n")
|
"data from this stream to a new stream using nilm-copy, then\n")
|
||||||
err("remove all data from and destroy %s.\n", path)
|
err("remove all data from and destroy %s.\n", path)
|
||||||
with self.sql:
|
with self.sql:
|
||||||
cur = self.sql.cursor()
|
cur = self.sql.cursor()
|
||||||
|
@ -512,19 +552,33 @@ class Fsck(object):
|
||||||
match = (ts < stime) | (ts >= etime)
|
match = (ts < stime) | (ts >= etime)
|
||||||
if match.any():
|
if match.any():
|
||||||
row = numpy.argmax(match)
|
row = numpy.argmax(match)
|
||||||
|
if ts[row] != 0:
|
||||||
raise FsckError("%s: data timestamp %d at row %d "
|
raise FsckError("%s: data timestamp %d at row %d "
|
||||||
"outside interval range [%d,%d)",
|
"outside interval range [%d,%d)",
|
||||||
path, ts[row], row + start,
|
path, ts[row], row + start,
|
||||||
stime, etime)
|
stime, etime)
|
||||||
|
|
||||||
|
# Timestamp is zero and out of the expected range;
|
||||||
|
# assume file ends with zeroed data and just truncate it.
|
||||||
|
self.fix_table_by_truncating(
|
||||||
|
path, tab, row + start,
|
||||||
|
"data timestamp is out of range, and zero")
|
||||||
|
|
||||||
# Verify that timestamps are monotonic
|
# Verify that timestamps are monotonic
|
||||||
match = numpy.diff(ts) <= 0
|
match = numpy.diff(ts) <= 0
|
||||||
if match.any():
|
if match.any():
|
||||||
row = numpy.argmax(match)
|
row = numpy.argmax(match)
|
||||||
|
if ts[row+1] != 0:
|
||||||
raise FsckError("%s: non-monotonic timestamp (%d -> %d)"
|
raise FsckError("%s: non-monotonic timestamp (%d -> %d)"
|
||||||
" at row %d", path, ts[row], ts[row+1],
|
" at row %d", path, ts[row], ts[row+1],
|
||||||
row + start)
|
row + start)
|
||||||
|
|
||||||
|
# Timestamp is zero and non-monotonic;
|
||||||
|
# assume file ends with zeroed data and just truncate it.
|
||||||
|
self.fix_table_by_truncating(
|
||||||
|
path, tab, row + start + 1,
|
||||||
|
"data timestamp is non-monotonic, and zero")
|
||||||
|
|
||||||
first_ts = ts[0]
|
first_ts = ts[0]
|
||||||
if last_ts is not None and first_ts <= last_ts:
|
if last_ts is not None and first_ts <= last_ts:
|
||||||
raise FsckError("%s: first interval timestamp %d is not "
|
raise FsckError("%s: first interval timestamp %d is not "
|
||||||
|
@ -542,3 +596,15 @@ class Fsck(object):
|
||||||
done += count
|
done += count
|
||||||
update(done)
|
update(done)
|
||||||
return done
|
return done
|
||||||
|
|
||||||
|
def fix_table_by_truncating(self, path, tab, row, reason):
|
||||||
|
# Simple fix for bad data: truncate the table at the given row.
|
||||||
|
# On retry, fix_bad_interval will correct the database and timestamps
|
||||||
|
# to account for this truncation.
|
||||||
|
msg = f"{path}: bad data in table, starting at row {row}: {reason}"
|
||||||
|
if not self.fix:
|
||||||
|
raise FixableFsckError(msg)
|
||||||
|
err(f"\n{msg}\nWill try truncating table\n")
|
||||||
|
(subdir, fname, offs, count) = tab._offset_from_row(row)
|
||||||
|
tab._remove_or_truncate_file(subdir, fname, offs)
|
||||||
|
raise RetryFsck
|
||||||
|
|
BIN
tests/fsck-data/test2w1/data.sql
Normal file
BIN
tests/fsck-data/test2w1/data.sql
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2w1/data/a/b/0000/0000
Normal file
BIN
tests/fsck-data/test2w1/data/a/b/0000/0000
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2w1/data/a/b/_format
Normal file
BIN
tests/fsck-data/test2w1/data/a/b/_format
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2w2/data.sql
Normal file
BIN
tests/fsck-data/test2w2/data.sql
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2w2/data/a/b/0000/0000
Normal file
BIN
tests/fsck-data/test2w2/data/a/b/0000/0000
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2w2/data/a/b/_format
Normal file
BIN
tests/fsck-data/test2w2/data/a/b/_format
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x1/data.sql
Normal file
BIN
tests/fsck-data/test2x1/data.sql
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x1/data.sql-shm
Normal file
BIN
tests/fsck-data/test2x1/data.sql-shm
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x1/data.sql-wal
Normal file
BIN
tests/fsck-data/test2x1/data.sql-wal
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x1/data/a/b/0000/0000
Normal file
BIN
tests/fsck-data/test2x1/data/a/b/0000/0000
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x1/data/a/b/_format
Normal file
BIN
tests/fsck-data/test2x1/data/a/b/_format
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x2/data.sql
Normal file
BIN
tests/fsck-data/test2x2/data.sql
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x2/data.sql-shm
Normal file
BIN
tests/fsck-data/test2x2/data.sql-shm
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x2/data.sql-wal
Normal file
BIN
tests/fsck-data/test2x2/data.sql-wal
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x2/data/a/b/0000/0000
Normal file
BIN
tests/fsck-data/test2x2/data/a/b/0000/0000
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x2/data/a/b/_format
Normal file
BIN
tests/fsck-data/test2x2/data/a/b/_format
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x3/data.sql
Normal file
BIN
tests/fsck-data/test2x3/data.sql
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x3/data.sql-shm
Normal file
BIN
tests/fsck-data/test2x3/data.sql-shm
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x3/data.sql-wal
Normal file
BIN
tests/fsck-data/test2x3/data.sql-wal
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x3/data/a/b/0000/0000
Normal file
BIN
tests/fsck-data/test2x3/data/a/b/0000/0000
Normal file
Binary file not shown.
BIN
tests/fsck-data/test2x3/data/a/b/_format
Normal file
BIN
tests/fsck-data/test2x3/data/a/b/_format
Normal file
Binary file not shown.
|
@ -123,7 +123,7 @@ class TestFsck(object):
|
||||||
self.failmsg("test2h", "missing bulkdata dir")
|
self.failmsg("test2h", "missing bulkdata dir")
|
||||||
self.failmsg("test2i", "bad bulkdata table")
|
self.failmsg("test2i", "bad bulkdata table")
|
||||||
self.failmsg("test2j", "overlap in intervals")
|
self.failmsg("test2j", "overlap in intervals")
|
||||||
self.failmsg("test2k", "overlap in file offsets")
|
self.failmsg("test2k", "overlap in file offsets", fix=False)
|
||||||
self.ok("test2k1")
|
self.ok("test2k1")
|
||||||
self.failmsg("test2l", "unsupported bulkdata version")
|
self.failmsg("test2l", "unsupported bulkdata version")
|
||||||
self.failmsg("test2m", "bad rows_per_file")
|
self.failmsg("test2m", "bad rows_per_file")
|
||||||
|
@ -168,3 +168,17 @@ class TestFsck(object):
|
||||||
self.failmsg("test2v1", "bad bulkdata table")
|
self.failmsg("test2v1", "bad bulkdata table")
|
||||||
self.failmsg("test2v2", "empty, with corrupted format file", fix=False)
|
self.failmsg("test2v2", "empty, with corrupted format file", fix=False)
|
||||||
self.okmsg("test2v2", "empty, with corrupted format file")
|
self.okmsg("test2v2", "empty, with corrupted format file")
|
||||||
|
|
||||||
|
self.failmsg("test2w1", "out of range, and zero", fix=False)
|
||||||
|
self.okmsg("test2w1", "Will try truncating table")
|
||||||
|
self.contain("Deleting the entire interval")
|
||||||
|
|
||||||
|
self.failmsg("test2w2", "non-monotonic, and zero", fix=False)
|
||||||
|
self.okmsg("test2w2", "Will try truncating table")
|
||||||
|
self.contain("new end: time 237000001, pos 238")
|
||||||
|
|
||||||
|
self.failmsg("test2x1", "overlap in file offsets", fix=False)
|
||||||
|
self.okmsg("test2x1", "truncating")
|
||||||
|
|
||||||
|
self.failmsg("test2x2", "unfixable overlap")
|
||||||
|
self.failmsg("test2x3", "unfixable overlap")
|
||||||
|
|
Loading…
Reference in New Issue
Block a user