Browse Source

Add fsck scan for any data timestamps outside interval range

tags/nilmdb-2.1.0
Jim Paris 2 years ago
parent
commit
83daeb148a
11 changed files with 26 additions and 11 deletions
  1. +22
    -10
      nilmdb/fsck/fsck.py
  2. BIN
      tests/fsck-data/test2r/data.sql
  3. BIN
      tests/fsck-data/test2r1/data.sql
  4. BIN
      tests/fsck-data/test2s/data/a/b/0000/0000
  5. BIN
      tests/fsck-data/test2u/data.sql
  6. BIN
      tests/fsck-data/test2u/data/a/b/0000/0000
  7. BIN
      tests/fsck-data/test2u/data/a/b/_format
  8. BIN
      tests/fsck-data/test2u1/data.sql
  9. BIN
      tests/fsck-data/test2u1/data/a/b/0000/0000
  10. BIN
      tests/fsck-data/test2u1/data/a/b/_format
  11. +4
    -1
      tests/test_fsck.py

+ 22
- 10
nilmdb/fsck/fsck.py View File

@@ -468,23 +468,35 @@ class Fsck(object):
"%s: failed to grab rows %d through %d: %s",
path, start, stop, repr(e))

ts = data['timestamp']

# Verify that all timestamps are in range.
match = (ts < stime) | (ts >= etime)
if match.any():
row = start + numpy.argmax(match)
raise FsckError("%s: data timestamp %d at row %d "
"outside interval range [%d,%d)",
path, data['timestamp'][row], row,
stime, etime)

# Verify that timestamps are monotonic
if (numpy.diff(data['timestamp']) <= 0).any():
raise FsckError("%s: non-monotonic timestamp(s) in rows "
"%d through %d", path, start, stop)
first_ts = data['timestamp'][0]
print("first_ts", first_ts, "last_ts", last_ts)
match = numpy.diff(ts) <= 0
if match.any():
row = start + numpy.argmax(match)
raise FsckError("%s: non-monotonic timestamp (%d -> %d) "
"at row %d", path, ts[row], ts[row+1], row)
first_ts = ts[0]
if last_ts is not None and first_ts <= last_ts:
raise FsckError("%s: first interval timestamp %d is not "
"greater than the previous last interval "
"timestamp %d, at row %d",
path, first_ts, last_ts, start)
last_ts = data['timestamp'][-1]
print("last_ts", last_ts)
last_ts = ts[-1]

# These are probably fixable, by removing the offending
# intervals. But I'm not going to bother implementing
# that yet.
# The previous errors are fixable, by removing the
# offending intervals, or changing the data
# timestamps. But these are probably unlikely errors,
# so it's not worth implementing that yet.

# Done
done += count


BIN
tests/fsck-data/test2r/data.sql View File


BIN
tests/fsck-data/test2r1/data.sql View File


BIN
tests/fsck-data/test2s/data/a/b/0000/0000 View File


BIN
tests/fsck-data/test2u/data.sql View File


BIN
tests/fsck-data/test2u/data/a/b/0000/0000 View File


BIN
tests/fsck-data/test2u/data/a/b/_format View File


BIN
tests/fsck-data/test2u1/data.sql View File


BIN
tests/fsck-data/test2u1/data/a/b/0000/0000 View File


BIN
tests/fsck-data/test2u1/data/a/b/_format View File


+ 4
- 1
tests/test_fsck.py View File

@@ -143,7 +143,7 @@ class TestFsck(object):
self.contain("restarting fsck")
self.ok("test2r2")

self.failmsg("test2s", "non-monotonic timestamp(s)")
self.failmsg("test2s", "non-monotonic timestamp (1000000 -> 12345)")

def check_small_maxrows(f):
f.maxrows_override = 1
@@ -153,6 +153,9 @@ class TestFsck(object):

self.ok("test2t", skip=True)

self.failmsg("test2u", "data timestamp 1234567890 at row 28 outside")
self.failmsg("test2u1", "data timestamp 7 at row 0 outside")

@nilmdb.fsck.fsck.retry_if_raised(Exception, max_retries=3)
def foo():
raise Exception("hi")


Loading…
Cancel
Save