My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

297 lines
10 KiB

  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import stat
  10. import time
  11. import pathlib
  12. import subprocess
  13. import typing
  14. import yaml
  15. import wcmatch.glob # type: ignore
  16. import humanfriendly # type: ignore
  17. def pstr(path: bytes) -> str:
  18. return path.decode(errors='backslashreplace')
  19. class Config:
  20. roots: list[bytes]
  21. max_file_size: typing.Optional[int]
  22. one_file_system: bool
  23. exclude_caches: bool
  24. exclude: list[bytes]
  25. force_include: list[bytes]
  26. notify_email: typing.Optional[str]
  27. def __init__(self, configfile: str):
  28. # Read config
  29. with open(configfile, 'r') as f:
  30. config = yaml.safe_load(f)
  31. self.one_file_system = config.get('one-file-system', False)
  32. self.exclude_caches = config.get('exclude-caches', False)
  33. if 'max-file-size' in config:
  34. self.max_file_size = humanfriendly.parse_size(
  35. config['max-file-size'])
  36. else:
  37. self.max_file_size = None
  38. raw = config.get('roots', '').encode().split(b'\n')
  39. self.roots = []
  40. for x in raw:
  41. if not len(x):
  42. continue
  43. self.roots.append(x)
  44. self.roots.sort(key=len)
  45. def process_match_list(config_name):
  46. raw = config.get(config_name, '').encode().split(b'\n')
  47. pats = []
  48. # Prepend '**/' to any relative patterns
  49. for x in raw:
  50. if not len(x):
  51. continue
  52. if x.startswith(b'/'):
  53. pats.append(x)
  54. else:
  55. pats.append(b'**/' + x)
  56. return pats
  57. self.exclude = process_match_list('exclude')
  58. self.force_include = process_match_list('force-include')
  59. self.notify_email = config.get('notify-email', None)
  60. # Compile patterns
  61. flags = (wcmatch.glob.GLOBSTAR |
  62. wcmatch.glob.DOTGLOB |
  63. wcmatch.glob.NODOTDIR |
  64. wcmatch.glob.EXTGLOB |
  65. wcmatch.glob.BRACE)
  66. # Path matches if it matches at least one regex in "a" and no
  67. # regex in "b"
  68. (a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
  69. self.exclude_re = ([ re.compile(x) for x in a ],
  70. [ re.compile(x) for x in b ])
  71. (a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
  72. self.force_include_re = ([ re.compile(x) for x in a ],
  73. [ re.compile(x) for x in b ])
  74. def match_re(self, re: tuple[list[typing.Pattern],
  75. list[typing.Pattern]], path: bytes):
  76. # Path matches if it matches at least one regex in
  77. # re[0] and no regex in re[1].
  78. for a in re[0]:
  79. if a.match(path):
  80. for b in re[1]:
  81. if b.match(path):
  82. return False
  83. return True
  84. return False
  85. class Backup:
  86. def __init__(self, config: Config, dry_run: bool):
  87. self.config = config
  88. self.dry_run = dry_run
  89. self.root_seen: dict[bytes, bool] = {}
  90. # All logged messages, with severity
  91. self.logs: list[tuple[str, str]] = []
  92. def out(self, path: bytes):
  93. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  94. def log(self, letter: str, msg: str):
  95. colors = { 'E': 31, 'W': 33, 'I': 36 };
  96. if letter in colors:
  97. c = colors[letter]
  98. else:
  99. c = 0
  100. sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
  101. self.logs.append((letter, msg))
  102. def run(self, outfile: typing.IO[bytes]):
  103. self.outfile = outfile
  104. for root in self.config.roots:
  105. if root in self.root_seen:
  106. self.log('I', f"ignoring root, already seen: {pstr(root)}")
  107. continue
  108. try:
  109. st = os.lstat(root)
  110. if not stat.S_ISDIR(st.st_mode):
  111. raise NotADirectoryError
  112. except FileNotFoundError:
  113. self.log('W', f"ignoring root, does not exist: {pstr(root)}")
  114. continue
  115. except NotADirectoryError:
  116. self.log('W', f"ignoring root, not a directory: {pstr(root)}")
  117. continue
  118. self.log('I', f"processing root {pstr(root)}")
  119. self.scan(root)
  120. def scan(self, path: bytes, parent_st: os.stat_result=None):
  121. """If the given path should be backed up, print it. If it's
  122. a directory and its contents should be included, recurse.
  123. """
  124. try:
  125. st = os.lstat(path)
  126. is_dir = stat.S_ISDIR(st.st_mode)
  127. is_reg = stat.S_ISREG(st.st_mode)
  128. size = st.st_blocks * 512
  129. # Decorated path ends with a '/' if it's a directory.
  130. decorated_path = path
  131. if is_dir and not decorated_path.endswith(b'/'):
  132. decorated_path += b'/'
  133. # See if there's a reason to exclude it
  134. exclude_reason = None
  135. if self.config.match_re(self.config.exclude_re, decorated_path):
  136. # Config file says to exclude
  137. exclude_reason = ('I', f"skipping, excluded by config file")
  138. elif (self.config.one_file_system
  139. and parent_st is not None
  140. and is_dir
  141. and st.st_dev != parent_st.st_dev):
  142. # Crosses a mount point
  143. exclude_reason = ('I', "skipping, on different filesystem")
  144. elif (is_reg
  145. and self.config.max_file_size
  146. and size > self.config.max_file_size):
  147. # Too big
  148. def format_size(n):
  149. return humanfriendly.format_size(
  150. n, keep_width=True, binary=True)
  151. a = format_size(size)
  152. b = format_size(self.config.max_file_size)
  153. exclude_reason = ('W', f"file size {a} exceeds limit {b}")
  154. # If we have a reason to exclude it, stop now unless it's
  155. # force-included
  156. force = self.config.match_re(self.config.force_include_re,
  157. decorated_path)
  158. if exclude_reason and not force:
  159. self.log(exclude_reason[0],
  160. f"{exclude_reason[1]}: {pstr(path)}")
  161. return
  162. # Print path for Borg
  163. self.out(path)
  164. # Process directories
  165. if is_dir:
  166. if path in self.config.roots:
  167. self.root_seen[path] = True
  168. if decorated_path in self.config.roots:
  169. self.root_seen[decorated_path] = True
  170. # Skip if it contains CACHEDIR.TAG
  171. # (mirroring the --exclude-caches borg option)
  172. if self.config.exclude_caches:
  173. try:
  174. tag = b'Signature: 8a477f597d28d172789f06886806bc55'
  175. with open(path + b'/CACHEDIR.TAG', 'rb') as f:
  176. if f.read(len(tag)) == tag:
  177. self.log(
  178. 'I', f"skipping, cache dir: {pstr(path)}")
  179. return
  180. except:
  181. pass
  182. # Recurse
  183. with os.scandir(path) as it:
  184. for entry in it:
  185. self.scan(path=entry.path, parent_st=st)
  186. except PermissionError as e:
  187. self.log('E', f"can't read {pstr(path)}")
  188. return
  189. def main(argv: list[str]):
  190. import argparse
  191. def humansize(string):
  192. return humanfriendly.parse_size(string)
  193. parser = argparse.ArgumentParser(
  194. prog=argv[0],
  195. description="Back up the local system using borg",
  196. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  197. base = pathlib.Path(__file__).parent
  198. parser.add_argument('-c', '--config',
  199. help="Config file", default=str(base / "config.yaml"))
  200. parser.add_argument('-b', '--borg',
  201. help="Borg command", default=str(base / "borg.sh"))
  202. parser.add_argument('-n', '--dry-run', action="store_true",
  203. help="Just print log output, don't run borg")
  204. parser.add_argument('-d', '--debug', action="store_true",
  205. help="Print filenames for --dry-run")
  206. args = parser.parse_args()
  207. config = Config(args.config)
  208. backup = Backup(config, args.dry_run)
  209. if args.dry_run:
  210. if args.debug:
  211. backup.run(sys.stdout.buffer)
  212. else:
  213. with open(os.devnull, "wb") as out:
  214. backup.run(out)
  215. else:
  216. borg = subprocess.Popen([args.borg,
  217. "create",
  218. "--verbose",
  219. "--list",
  220. "--filter", "E",
  221. "--stats",
  222. "--checkpoint-interval", "900",
  223. "--compression", "zstd,3",
  224. "--paths-from-stdin",
  225. "--paths-delimiter", "\\0",
  226. "::'{hostname}-{now:%Y%m%d-%H%M%S}'"],
  227. stdin=subprocess.PIPE)
  228. if borg.stdin is None:
  229. raise Exception("no pipe")
  230. try:
  231. # Give borg some time to start, just to clean up stdout
  232. time.sleep(2)
  233. backup.run(borg.stdin)
  234. except BrokenPipeError:
  235. sys.stderr.write(f"broken pipe\n")
  236. finally:
  237. try:
  238. borg.stdin.close()
  239. except BrokenPipeError:
  240. pass
  241. borg.wait()
  242. ret = borg.returncode
  243. if ret < 0:
  244. sys.stderr.write(f"error: process exited with signal {-ret}\n")
  245. return 1
  246. elif ret != 0:
  247. sys.stderr.write(f"error: process exited with return code {ret}\n")
  248. return ret
  249. return 0
  250. if __name__ == "__main__":
  251. import sys
  252. raise SystemExit(main(sys.argv))