My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

181 lines
6.2 KiB

  1. #!.venv/bin/python
  2. import os
  3. import sys
  4. import stat
  5. from typing import Optional, Tuple
  6. import humanfriendly # type: ignore
  7. import wcmatch.glob # type: ignore
  8. import re
  9. import dataclasses
  10. import enum
  11. class MatchResult(enum.Enum):
  12. INCLUDE_IF_SIZE_OK = 0
  13. INCLUDE_ALWAYS = 1
  14. EXCLUDE_ALWAYS = 2
  15. @dataclasses.dataclass
  16. class PatternRule:
  17. re_inc: list[re.Pattern]
  18. re_exc: list[re.Pattern]
  19. def match(self, path: str) -> Tuple[bool, bool]:
  20. if "big" in path:
  21. print(self, file=sys.stderr)
  22. for inc in self.re_inc:
  23. if inc.match(path):
  24. break
  25. else:
  26. return
  27. for exc in self.re_exc:
  28. if exc.match(path):
  29. return False
  30. return True
  31. class Lister:
  32. def __init__(self, one_file_system: bool, max_size: bool):
  33. self.one_file_system = one_file_system
  34. self.max_size = max_size
  35. if max_size is None:
  36. max_size = float('inf')
  37. self.stdout = os.fdopen(sys.stdout.fileno(), "wb", closefd=False)
  38. # Remember files we've skipped because they were too big, so that
  39. # we can warn again at the end.
  40. self.skipped_size: set[bytes] = set()
  41. # Remember errors
  42. self.skipped_error: set[bytes] = set()
  43. def __del__(self):
  44. self.stdout.close()
  45. def out(self, path: bytes):
  46. # Use '\0\n' as a separator, so that we can both separate it
  47. # cleanly in Borg, and also view it on stdout.
  48. self.stdout.write(path + b'\0\n')
  49. def log(self, letter: str, msg: str):
  50. colors = { 'E': 31, 'W': 33, 'I': 36 };
  51. if letter in colors:
  52. c = colors[letter]
  53. else:
  54. c = 0
  55. sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
  56. def scan(self, path: bytes,
  57. parent_st: os.stat_result=None,
  58. rules: list[PatternRule]=[]):
  59. """If the given path should be backed up, print it. If it's
  60. a directory and its contents should be included, recurse."""
  61. # Copy the path in string form, for logging and pathspec
  62. # parsing. Otherwise, we use bytes directly.
  63. pathstr = path.decode(errors='backslashreplace')
  64. try:
  65. # See if we match any rules
  66. for r in rules:
  67. if r.match(pathstr):
  68. self.log('I', f"ignore {pathstr}")
  69. return
  70. # Stat the path
  71. st = os.lstat(path)
  72. is_dir = stat.S_ISDIR(st.st_mode)
  73. if is_dir:
  74. # Skip if it crosses a mount point
  75. if self.one_file_system:
  76. if parent_st is not None and st.st_dev != parent_st.st_dev:
  77. self.log('I', f"skipping {pathstr}: "
  78. "on different filesystem")
  79. return
  80. # Add contents of any .nobackup file to our
  81. # parser rules
  82. child_rules = rules
  83. try:
  84. def prepend_base(regex):
  85. if regex[0] != '^':
  86. raise Exception(f'bad regex: {regex}')
  87. return '^' + os.path.join(pathstr, '') + regex[1:]
  88. with open(os.path.join(path, b".nobackup")) as f:
  89. rule = PatternRule([], [])
  90. for line in f:
  91. if line[0] == '#':
  92. continue
  93. (inc, exc) = wcmatch.glob.translate(
  94. [ line.rstrip('\r\n') ],
  95. flags=(wcmatch.glob.NEGATE |
  96. wcmatch.glob.GLOBSTAR |
  97. wcmatch.glob.DOTGLOB |
  98. wcmatch.glob.EXTGLOB |
  99. wcmatch.glob.BRACE))
  100. for x in inc:
  101. rule.re_inc.append(re.compile(prepend_base(x)))
  102. for x in exc:
  103. rule.re_exc.append(re.compile(prepend_base(x)))
  104. child_rules.append(rule)
  105. except FileNotFoundError:
  106. pass
  107. # Recurse and process each entry
  108. with os.scandir(path) as it:
  109. for entry in it:
  110. self.scan(entry.path, st, child_rules)
  111. else:
  112. # For regular files, ensure they're not too big
  113. if stat.S_ISREG(st.st_mode) and st.st_size > self.max_size:
  114. def format_size(n):
  115. return humanfriendly.format_size(
  116. n, keep_width=True, binary=True)
  117. a = format_size(st.st_size)
  118. b = format_size(self.max_size)
  119. self.log('W', f"skipping {pathstr}: "
  120. + f"file size {a} exceeds limit {b}")
  121. self.skipped_size.add(path)
  122. return
  123. # Every other filename gets printed; devices, symlinks, etc
  124. # will get handled by Borg
  125. self.out(path)
  126. except PermissionError as e:
  127. self.log('E', f"can't read {pathstr}")
  128. self.skipped_error.add(path)
  129. return
  130. def main(argv):
  131. import argparse
  132. def humansize(string):
  133. return humanfriendly.parse_size(string)
  134. parser = argparse.ArgumentParser(
  135. prog=argv[0],
  136. description="Build up a directory and file list for backups")
  137. parser.add_argument('-s', '--max-size', type=humansize,
  138. help="Ignore files bigger than this, by default")
  139. parser.add_argument('-x', '--one-file-system', action='store_true',
  140. help="Don't cross mount points when recursing")
  141. parser.add_argument('dirs', metavar='DIR', nargs='+',
  142. help="Root directories to scan recursively")
  143. args = parser.parse_args()
  144. lister = Lister(one_file_system=args.one_file_system,
  145. max_size=args.max_size)
  146. for p in args.dirs:
  147. lister.scan(os.fsencode(p))
  148. if __name__ == "__main__":
  149. import sys
  150. main(sys.argv)