You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

fix-oversize-files.py 1.7 KiB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. #!/usr/bin/env python3
  2. import os
  3. import sys
  4. import pickle
  5. import argparse
  6. import fcntl
  7. import re
  8. from nilmdb.client.numpyclient import layout_to_dtype
  9. parser = argparse.ArgumentParser(
  10. description = """
  11. Fix database corruption where binary writes caused too much data to be
  12. written to the file. Truncates files to the correct length. This was
  13. fixed by b98ff1331a515ad47fd3203615e835b529b039f9.
  14. """)
  15. parser.add_argument("path", action="store", help='Database root path')
  16. parser.add_argument("-y", "--yes", action="store_true", help='Fix them')
  17. args = parser.parse_args()
  18. lock = os.path.join(args.path, "data.lock")
  19. with open(lock, "w") as f:
  20. fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
  21. fix = {}
  22. for (path, dirs, files) in os.walk(args.path):
  23. if "_format" in files:
  24. with open(os.path.join(path, "_format")) as format:
  25. fmt = pickle.load(format)
  26. rowsize = layout_to_dtype(fmt["layout"]).itemsize
  27. maxsize = rowsize * fmt["rows_per_file"]
  28. fix[path] = maxsize
  29. if maxsize < 128000000: # sanity check
  30. raise Exception("bad maxsize " + str(maxsize))
  31. for fixpath in fix:
  32. for (path, dirs, files) in os.walk(fixpath):
  33. for fn in files:
  34. if not re.match("^[0-9a-f]{4,}$", fn):
  35. continue
  36. fn = os.path.join(path, fn)
  37. size = os.path.getsize(fn)
  38. maxsize = fix[fixpath]
  39. if size > maxsize:
  40. diff = size - maxsize
  41. print(diff, "too big:", fn)
  42. if args.yes:
  43. with open(fn, "a+") as dbfile:
  44. dbfile.truncate(maxsize)