My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

514 lines
19 KiB

  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import json
  10. import stat
  11. import time
  12. import select
  13. import pathlib
  14. import threading
  15. import subprocess
  16. import _thread # for interrupt_main
  17. import typing
  18. import yaml
  19. import wcmatch.glob # type: ignore
  20. import humanfriendly # type: ignore
  21. def b2s(raw: bytes) -> str:
  22. return raw.decode(errors='backslashreplace')
  23. def format_size(n: int) -> str:
  24. return humanfriendly.format_size(n, keep_width=True, binary=True)
  25. # Type corresponding to patterns that are generated by
  26. # wcmatch.translate: two lists of compiled REs (a,b). A path matches
  27. # if it matches at least one regex in "a" and none in "b".
  28. MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]
  29. class Config:
  30. roots: typing.List[bytes]
  31. one_file_system: bool
  32. exclude_caches: bool
  33. exclude: MatchPatterns
  34. unexclude: MatchPatterns
  35. max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
  36. notify_email: typing.Optional[str]
  37. def __init__(self, configfile: str):
  38. # Helper to process lists of patterns into regexes
  39. def process_match_list(config_entry):
  40. raw = config_entry.encode().split(b'\n')
  41. pats = []
  42. # Prepend '**/' to any relative patterns
  43. for x in raw:
  44. if not len(x):
  45. continue
  46. if x.startswith(b'/'):
  47. pats.append(x)
  48. else:
  49. pats.append(b'**/' + x)
  50. # Compile patterns.
  51. (a, b) = wcmatch.glob.translate(
  52. pats, flags=(wcmatch.glob.GLOBSTAR |
  53. wcmatch.glob.DOTGLOB |
  54. wcmatch.glob.NODOTDIR |
  55. wcmatch.glob.EXTGLOB |
  56. wcmatch.glob.BRACE))
  57. return ([ re.compile(x) for x in a ],
  58. [ re.compile(x) for x in b ])
  59. # Read config
  60. with open(configfile, 'r') as f:
  61. config = yaml.safe_load(f)
  62. self.one_file_system = config.get('one-file-system', False)
  63. self.exclude_caches = config.get('exclude-caches', False)
  64. raw = config.get('roots', '').encode().split(b'\n')
  65. self.roots = []
  66. for x in raw:
  67. if not len(x):
  68. continue
  69. self.roots.append(x)
  70. self.roots.sort(key=len)
  71. self.exclude = process_match_list(config.get('exclude', ''))
  72. self.unexclude = process_match_list(config.get('unexclude', ''))
  73. self.max_size_rules = []
  74. rules = { humanfriendly.parse_size(k): v
  75. for k, v in config.get('max-size-rules', {}).items() }
  76. for size in reversed(sorted(rules)):
  77. self.max_size_rules.append(
  78. (size, process_match_list(rules[size])))
  79. self.notify_email = config.get('notify-email', None)
  80. def match_re(self, r: MatchPatterns, path: bytes):
  81. # Path matches if it matches at least one regex in
  82. # r[0] and no regex in r[1].
  83. for a in r[0]:
  84. if a.match(path):
  85. for b in r[1]:
  86. if b.match(path):
  87. return False
  88. return True
  89. return False
  90. class Backup:
  91. def __init__(self, config: Config, dry_run: bool):
  92. self.config = config
  93. self.dry_run = dry_run
  94. self.root_seen: typing.Dict[bytes, bool] = {}
  95. # Saved log messages (which includes borg output)
  96. self.logs: typing.List[typing.Tuple[str, str]] = []
  97. def out(self, path: bytes):
  98. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  99. def log(self, letter: str, msg: str, bold: bool=False):
  100. colors = {
  101. 'E': 31, # red: error
  102. 'W': 33, # yellow: warning
  103. 'N': 34, # blue: notice, a weaker warning (no email generated)
  104. 'I': 36, # cyan: info, backup.py script output
  105. 'O': 37, # white: regular output from borg
  106. };
  107. c = colors[letter] if letter in colors else 0
  108. b = "" if bold else "\033[22m"
  109. sys.stdout.write(f"\033[1;{c}m{letter}:{b} {msg}\033[0m\n")
  110. sys.stdout.flush()
  111. self.logs.append((letter, msg))
  112. def run(self, outfile: typing.IO[bytes]):
  113. self.outfile = outfile
  114. for root in self.config.roots:
  115. if root in self.root_seen:
  116. self.log('I', f"ignoring root, already seen: {b2s(root)}")
  117. continue
  118. try:
  119. st = os.lstat(root)
  120. if not stat.S_ISDIR(st.st_mode):
  121. raise NotADirectoryError
  122. except FileNotFoundError:
  123. self.log('E', f"root does not exist: {b2s(root)}")
  124. continue
  125. except NotADirectoryError:
  126. self.log('E', f"root is not a directory: {b2s(root)}")
  127. continue
  128. self.log('I', f"processing root {b2s(root)}")
  129. self.scan(root)
  130. def scan(self, path: bytes, parent_st: os.stat_result=None):
  131. """If the given path should be backed up, print it. If it's
  132. a directory and its contents should be included, recurse.
  133. """
  134. try:
  135. st = os.lstat(path)
  136. is_dir = stat.S_ISDIR(st.st_mode)
  137. is_reg = stat.S_ISREG(st.st_mode)
  138. size = st.st_blocks * 512
  139. # Decorated path ends with a '/' if it's a directory.
  140. decorated_path = path
  141. if is_dir and not decorated_path.endswith(b'/'):
  142. decorated_path += b'/'
  143. # See if there's a reason to exclude it
  144. exclude_reason = None
  145. if self.config.match_re(self.config.exclude, decorated_path):
  146. # Config file says to exclude
  147. exclude_reason = ('I', f"skipping, excluded by config file")
  148. elif (self.config.one_file_system
  149. and parent_st is not None
  150. and is_dir
  151. and st.st_dev != parent_st.st_dev):
  152. # Crosses a mount point
  153. exclude_reason = ('I', "skipping, on different filesystem")
  154. elif (is_reg
  155. and len(self.config.max_size_rules)
  156. and size > self.config.max_size_rules[-1][0]):
  157. # Check file sizes against our list.
  158. # Only need to check if the size is bigger than the smallest
  159. # entry on the list; then, we need to check it against all rules
  160. # to see which one applies.
  161. for (max_size, patterns) in self.config.max_size_rules:
  162. if self.config.match_re(patterns, decorated_path):
  163. if size > max_size:
  164. a = format_size(size)
  165. b = format_size(max_size)
  166. exclude_reason = (
  167. 'W', f"file size {a} exceeds limit {b}")
  168. break
  169. # If we have a reason to exclude it, stop now unless it's
  170. # force-included
  171. force = self.config.match_re(self.config.unexclude, decorated_path)
  172. if exclude_reason and not force:
  173. self.log(exclude_reason[0],
  174. f"{exclude_reason[1]}: {b2s(path)}")
  175. return
  176. # Print path for Borg
  177. self.out(path)
  178. # Process directories
  179. if is_dir:
  180. if path in self.config.roots:
  181. self.root_seen[path] = True
  182. if decorated_path in self.config.roots:
  183. self.root_seen[decorated_path] = True
  184. # Skip if it contains CACHEDIR.TAG
  185. # (mirroring the --exclude-caches borg option)
  186. if self.config.exclude_caches:
  187. try:
  188. tag = b'Signature: 8a477f597d28d172789f06886806bc55'
  189. with open(path + b'/CACHEDIR.TAG', 'rb') as f:
  190. if f.read(len(tag)) == tag:
  191. self.log(
  192. 'I', f"skipping, cache dir: {b2s(path)}")
  193. return
  194. except:
  195. pass
  196. # Recurse
  197. with os.scandir(path) as it:
  198. for entry in it:
  199. self.scan(path=entry.path, parent_st=st)
  200. except (FileNotFoundError,
  201. IsADirectoryError,
  202. NotADirectoryError,
  203. PermissionError) as e:
  204. self.log('E', f"can't read {b2s(path)}: {str(e)}")
  205. return
  206. def run_borg(self, argv: typing.List[str],
  207. stdin_writer: typing.Callable[[typing.IO[bytes]],
  208. typing.Any]=None):
  209. """Run a borg command, capturing and displaying output, while feeding
  210. input using stdin_writer. Returns True on Borg success, False on error.
  211. """
  212. borg = subprocess.Popen(argv,
  213. stdin=subprocess.PIPE,
  214. stdout=subprocess.PIPE,
  215. stderr=subprocess.STDOUT)
  216. if borg.stdin is None:
  217. raise Exception("no pipe")
  218. # Count warnings and errors from Borg, so we can interpret its
  219. # error codes correctly (e.g. ignoring exit codes if warnings
  220. # were all harmless).
  221. borg_saw_warnings = 0
  222. borg_saw_errors = 0
  223. # Use a thread to capture output
  224. def reader_thread(fh):
  225. nonlocal borg_saw_warnings
  226. nonlocal borg_saw_errors
  227. last_progress = 0
  228. for line in fh:
  229. try:
  230. data = json.loads(line)
  231. if data['type'] == 'log_message':
  232. changed_msg = "file changed while we backed it up"
  233. if data['levelname'] == 'WARNING':
  234. if changed_msg in data['message']:
  235. # harmless; don't count as a Borg warning
  236. outlevel = 'N'
  237. else:
  238. borg_saw_warnings += 1
  239. outlevel = 'W'
  240. output = "warning: "
  241. elif data['levelname'] not in ('DEBUG', 'INFO'):
  242. borg_saw_errors += 1
  243. outlevel = 'E'
  244. output = "error: "
  245. else:
  246. outlevel = 'O'
  247. output = ""
  248. output += data['message']
  249. elif (data['type'] == 'progress_message'
  250. and 'message' in data):
  251. outlevel = 'O'
  252. output = data['message']
  253. elif data['type'] == 'archive_progress':
  254. now = time.time()
  255. if now - last_progress > 10:
  256. last_progress = now
  257. def size(short: str, full: str) -> str:
  258. return f" {short}={format_size(data[full])}"
  259. outlevel = 'O'
  260. output = (f"progress:" +
  261. f" files={data['nfiles']}" +
  262. size('orig', 'original_size') +
  263. size('comp', 'compressed_size') +
  264. size('dedup', 'deduplicated_size'))
  265. else:
  266. continue
  267. else:
  268. # ignore unknown progress line
  269. continue
  270. except Exception as e:
  271. # on error, print raw line with exception
  272. outlevel = 'E'
  273. output = f"[exception: {str(e)}] " + b2s(line).rstrip()
  274. self.log(outlevel, output)
  275. fh.close()
  276. def _reader_thread(fh):
  277. try:
  278. return reader_thread(fh)
  279. except BrokenPipeError:
  280. pass
  281. except Exception:
  282. _thread.interrupt_main()
  283. reader = threading.Thread(target=_reader_thread, args=(borg.stdout,))
  284. reader.daemon = True
  285. reader.start()
  286. try:
  287. if stdin_writer:
  288. # Give borg some time to start, just to clean up stdout
  289. time.sleep(1)
  290. stdin_writer(borg.stdin)
  291. except BrokenPipeError:
  292. self.log('E', "<broken pipe>")
  293. finally:
  294. try:
  295. borg.stdin.close()
  296. except BrokenPipeError:
  297. pass
  298. borg.wait()
  299. reader.join()
  300. ret = borg.returncode
  301. if ret < 0:
  302. self.log('E', f"borg exited with signal {-ret}")
  303. elif ret == 2 or borg_saw_errors:
  304. self.log('E', f"borg exited with errors (ret={ret})")
  305. elif ret == 1:
  306. if borg_saw_warnings:
  307. self.log('W', f"borg exited with warnings (ret={ret})")
  308. else:
  309. return True
  310. elif ret != 0:
  311. self.log('E', f"borg exited with unknown error code {ret}")
  312. else:
  313. return True
  314. return False
  315. def main(argv: typing.List[str]):
  316. import argparse
  317. def humansize(string):
  318. return humanfriendly.parse_size(string)
  319. # Parse args
  320. parser = argparse.ArgumentParser(
  321. prog=argv[0],
  322. description="Back up the local system using borg",
  323. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  324. base = pathlib.Path(__file__).parent
  325. parser.add_argument('-c', '--config',
  326. help="Config file", default=str(base / "config.yaml"))
  327. parser.add_argument('-v', '--vars',
  328. help="Variables file", default=str(base / "vars.sh"))
  329. parser.add_argument('-n', '--dry-run', action="store_true",
  330. help="Just print log output, don't run borg")
  331. parser.add_argument('-d', '--debug', action="store_true",
  332. help="Print filenames for --dry-run")
  333. args = parser.parse_args()
  334. config = Config(args.config)
  335. backup = Backup(config, args.dry_run)
  336. # Parse variables from vars.sh
  337. hostname = os.uname().nodename
  338. borg_sh = str(base / "borg.sh")
  339. notify_sh = str(base / "notify.sh")
  340. try:
  341. with open(args.vars) as f:
  342. for line in f:
  343. m = re.match(r"\s*export\s*([A-Z_]+)=(.*)", line)
  344. if not m:
  345. continue
  346. var = m.group(1)
  347. value = m.group(2)
  348. if var == "HOSTNAME":
  349. hostname = value
  350. if var == "BORG":
  351. borg_sh = value
  352. if var == "BORG_DIR":
  353. notify_sh = str(pathlib.Path(value) / "notify.sh")
  354. except Exception as e:
  355. backup.log('W', f"failed to parse variables from {args.vars}: {str(e)}")
  356. # Run backup
  357. if args.dry_run:
  358. if args.debug:
  359. backup.run(sys.stdout.buffer)
  360. else:
  361. with open(os.devnull, "wb") as out:
  362. backup.run(out)
  363. sys.stdout.flush()
  364. else:
  365. if backup.run_borg([borg_sh,
  366. "create",
  367. "--verbose",
  368. "--progress",
  369. "--log-json",
  370. "--list",
  371. "--filter", "E",
  372. "--stats",
  373. "--checkpoint-interval", "900",
  374. "--compression", "zstd,3",
  375. "--paths-from-stdin",
  376. "--paths-delimiter", "\\0",
  377. "::" + hostname + "-{now:%Y%m%d-%H%M%S}"],
  378. stdin_writer=backup.run):
  379. # backup success; run prune. Note that this won't actually free
  380. # space until a "./borg.sh --rw compact", because we're in
  381. # append-only mode.
  382. backup.log('I', f"pruning archives", bold=True)
  383. backup.run_borg([borg_sh,
  384. "prune",
  385. "--verbose",
  386. "--list",
  387. "--progress",
  388. "--log-json",
  389. "--stats",
  390. "--keep-within=7d",
  391. "--keep-daily=14",
  392. "--keep-weekly=8",
  393. "--keep-monthly=-1",
  394. "--glob-archives", hostname + "-????????-??????"])
  395. # See if we had any errors
  396. warnings = sum(1 for (letter, msg) in backup.logs if letter == 'W')
  397. errors = sum(1 for (letter, msg) in backup.logs if letter == 'E')
  398. def plural(num: int, word: str) -> str:
  399. suffix = "" if num == 1 else "s"
  400. return f"{num} {word}{suffix}"
  401. warnmsg = plural(warnings, "warning") if warnings else None
  402. errmsg = plural(errors, "error") if errors else None
  403. if not warnings and not errors:
  404. backup.log('I', f"backup successful", bold=True)
  405. else:
  406. if warnmsg:
  407. backup.log('W', f"reported {warnmsg}", bold=True)
  408. if errors:
  409. backup.log('E', f"reported {errmsg}", bold=True)
  410. # Send a notification of errors
  411. email = backup.config.notify_email
  412. if email and not args.dry_run:
  413. backup.log('I', f"sending error notification to {email}")
  414. def write_logs(title, only_include=None):
  415. body = [ title ]
  416. for (letter, msg) in backup.logs:
  417. if only_include and letter not in only_include:
  418. continue
  419. # Use a ":" prefix for warnings/errors/notices so that
  420. # the mail reader highlights them.
  421. if letter in "EWN":
  422. prefix = ":"
  423. else:
  424. prefix = " "
  425. body.append(f"{prefix}{letter}: {msg}")
  426. return "\n".join(body).encode()
  427. body_text = write_logs("Logged errors and warnings:", "EWN")
  428. body_text += b"\n\n"
  429. body_text += write_logs("All log messages:")
  430. # Subject summary
  431. if errmsg and warnmsg:
  432. summary = f"{errmsg}, {warnmsg}"
  433. elif errors:
  434. summary = errmsg or ""
  435. else:
  436. summary = warnmsg or ""
  437. # Call notify.sh
  438. res = subprocess.run([notify_sh, summary, email], input=body_text)
  439. if res.returncode != 0:
  440. backup.log('E', f"failed to send notification")
  441. errors += 1
  442. # Exit with an error code if we had any errors
  443. if errors:
  444. return 1
  445. return 0
  446. if __name__ == "__main__":
  447. import sys
  448. raise SystemExit(main(sys.argv))