My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

446 lines
16 KiB

  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import json
  10. import stat
  11. import time
  12. import select
  13. import pathlib
  14. import threading
  15. import subprocess
  16. import typing
  17. import yaml
  18. import wcmatch.glob # type: ignore
  19. import humanfriendly # type: ignore
  20. def pstr(path: bytes) -> str:
  21. return path.decode(errors='backslashreplace')
  22. def format_size(n: int) -> str:
  23. return humanfriendly.format_size(n, keep_width=True, binary=True)
  24. class Config:
  25. roots: list[bytes]
  26. max_file_size: typing.Optional[int]
  27. one_file_system: bool
  28. exclude_caches: bool
  29. exclude: list[bytes]
  30. force_include: list[bytes]
  31. notify_email: typing.Optional[str]
  32. def __init__(self, configfile: str):
  33. # Read config
  34. with open(configfile, 'r') as f:
  35. config = yaml.safe_load(f)
  36. self.one_file_system = config.get('one-file-system', False)
  37. self.exclude_caches = config.get('exclude-caches', False)
  38. if 'max-file-size' in config:
  39. self.max_file_size = humanfriendly.parse_size(
  40. config['max-file-size'])
  41. else:
  42. self.max_file_size = None
  43. raw = config.get('roots', '').encode().split(b'\n')
  44. self.roots = []
  45. for x in raw:
  46. if not len(x):
  47. continue
  48. self.roots.append(x)
  49. self.roots.sort(key=len)
  50. def process_match_list(config_name):
  51. raw = config.get(config_name, '').encode().split(b'\n')
  52. pats = []
  53. # Prepend '**/' to any relative patterns
  54. for x in raw:
  55. if not len(x):
  56. continue
  57. if x.startswith(b'/'):
  58. pats.append(x)
  59. else:
  60. pats.append(b'**/' + x)
  61. return pats
  62. self.exclude = process_match_list('exclude')
  63. self.force_include = process_match_list('force-include')
  64. self.notify_email = config.get('notify-email', None)
  65. # Compile patterns
  66. flags = (wcmatch.glob.GLOBSTAR |
  67. wcmatch.glob.DOTGLOB |
  68. wcmatch.glob.NODOTDIR |
  69. wcmatch.glob.EXTGLOB |
  70. wcmatch.glob.BRACE)
  71. # Path matches if it matches at least one regex in "a" and no
  72. # regex in "b"
  73. (a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
  74. self.exclude_re = ([ re.compile(x) for x in a ],
  75. [ re.compile(x) for x in b ])
  76. (a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
  77. self.force_include_re = ([ re.compile(x) for x in a ],
  78. [ re.compile(x) for x in b ])
  79. def match_re(self, re: tuple[list[typing.Pattern],
  80. list[typing.Pattern]], path: bytes):
  81. # Path matches if it matches at least one regex in
  82. # re[0] and no regex in re[1].
  83. for a in re[0]:
  84. if a.match(path):
  85. for b in re[1]:
  86. if b.match(path):
  87. return False
  88. return True
  89. return False
  90. class Backup:
  91. def __init__(self, config: Config, dry_run: bool):
  92. self.config = config
  93. self.dry_run = dry_run
  94. self.root_seen: dict[bytes, bool] = {}
  95. # Saved log messages
  96. self.logs: list[tuple[str, str]] = []
  97. def out(self, path: bytes):
  98. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  99. def log(self, letter: str, msg: str, bold: bool=False):
  100. colors = { 'E': 31, 'W': 33, 'I': 36 };
  101. c = colors[letter] if letter in colors else 0
  102. b = "" if bold else "\033[22m"
  103. sys.stderr.write(f"\033[1;{c}m{letter}:{b} {msg}\033[0m\n")
  104. self.logs.append((letter, msg))
  105. def run(self, outfile: typing.IO[bytes]):
  106. self.outfile = outfile
  107. for root in self.config.roots:
  108. if root in self.root_seen:
  109. self.log('I', f"ignoring root, already seen: {pstr(root)}")
  110. continue
  111. try:
  112. st = os.lstat(root)
  113. if not stat.S_ISDIR(st.st_mode):
  114. raise NotADirectoryError
  115. except FileNotFoundError:
  116. self.log('E', f"root does not exist: {pstr(root)}")
  117. continue
  118. except NotADirectoryError:
  119. self.log('E', f"root is not a directory: {pstr(root)}")
  120. continue
  121. self.log('I', f"processing root {pstr(root)}")
  122. self.scan(root)
  123. def scan(self, path: bytes, parent_st: os.stat_result=None):
  124. """If the given path should be backed up, print it. If it's
  125. a directory and its contents should be included, recurse.
  126. """
  127. try:
  128. st = os.lstat(path)
  129. is_dir = stat.S_ISDIR(st.st_mode)
  130. is_reg = stat.S_ISREG(st.st_mode)
  131. size = st.st_blocks * 512
  132. # Decorated path ends with a '/' if it's a directory.
  133. decorated_path = path
  134. if is_dir and not decorated_path.endswith(b'/'):
  135. decorated_path += b'/'
  136. # See if there's a reason to exclude it
  137. exclude_reason = None
  138. if self.config.match_re(self.config.exclude_re, decorated_path):
  139. # Config file says to exclude
  140. exclude_reason = ('I', f"skipping, excluded by config file")
  141. elif (self.config.one_file_system
  142. and parent_st is not None
  143. and is_dir
  144. and st.st_dev != parent_st.st_dev):
  145. # Crosses a mount point
  146. exclude_reason = ('I', "skipping, on different filesystem")
  147. elif (is_reg
  148. and self.config.max_file_size
  149. and size > self.config.max_file_size):
  150. # Too big
  151. a = format_size(size)
  152. b = format_size(self.config.max_file_size)
  153. exclude_reason = ('W', f"file size {a} exceeds limit {b}")
  154. # If we have a reason to exclude it, stop now unless it's
  155. # force-included
  156. force = self.config.match_re(self.config.force_include_re,
  157. decorated_path)
  158. if exclude_reason and not force:
  159. self.log(exclude_reason[0],
  160. f"{exclude_reason[1]}: {pstr(path)}")
  161. return
  162. # Print path for Borg
  163. self.out(path)
  164. # Process directories
  165. if is_dir:
  166. if path in self.config.roots:
  167. self.root_seen[path] = True
  168. if decorated_path in self.config.roots:
  169. self.root_seen[decorated_path] = True
  170. # Skip if it contains CACHEDIR.TAG
  171. # (mirroring the --exclude-caches borg option)
  172. if self.config.exclude_caches:
  173. try:
  174. tag = b'Signature: 8a477f597d28d172789f06886806bc55'
  175. with open(path + b'/CACHEDIR.TAG', 'rb') as f:
  176. if f.read(len(tag)) == tag:
  177. self.log(
  178. 'I', f"skipping, cache dir: {pstr(path)}")
  179. return
  180. except:
  181. pass
  182. # Recurse
  183. with os.scandir(path) as it:
  184. for entry in it:
  185. self.scan(path=entry.path, parent_st=st)
  186. except (FileNotFoundError,
  187. IsADirectoryError,
  188. NotADirectoryError,
  189. PermissionError) as e:
  190. self.log('E', f"can't read {pstr(path)}: {str(e)}")
  191. return
  192. def main(argv: list[str]):
  193. import argparse
  194. def humansize(string):
  195. return humanfriendly.parse_size(string)
  196. # Parse args
  197. parser = argparse.ArgumentParser(
  198. prog=argv[0],
  199. description="Back up the local system using borg",
  200. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  201. base = pathlib.Path(__file__).parent
  202. parser.add_argument('-c', '--config',
  203. help="Config file", default=str(base / "config.yaml"))
  204. parser.add_argument('-v', '--vars',
  205. help="Variables file", default=str(base / "vars.sh"))
  206. parser.add_argument('-n', '--dry-run', action="store_true",
  207. help="Just print log output, don't run borg")
  208. parser.add_argument('-d', '--debug', action="store_true",
  209. help="Print filenames for --dry-run")
  210. args = parser.parse_args()
  211. config = Config(args.config)
  212. backup = Backup(config, args.dry_run)
  213. # Parse variables from vars.sh
  214. hostname = os.uname().nodename
  215. borg_sh = str(base / "borg.sh")
  216. notify_sh = str(base / "notify.sh")
  217. try:
  218. with open(args.vars) as f:
  219. for line in f:
  220. m = re.match(r"\s*export\s*([A-Z_]+)=(.*)", line)
  221. if not m:
  222. continue
  223. var = m.group(1)
  224. value = m.group(2)
  225. if var == "HOSTNAME":
  226. hostname = value
  227. if var == "BORG":
  228. borg_sh = value
  229. if var == "BORG_DIR":
  230. notify_sh = str(pathlib.Path(value) / "notify.sh")
  231. except Exception as e:
  232. backup.log('W', f"failed to parse variables from {args.vars}: {str(e)}")
  233. # Run backup
  234. captured_output: list[bytes] = []
  235. if args.dry_run:
  236. if args.debug:
  237. backup.run(sys.stdout.buffer)
  238. else:
  239. with open(os.devnull, "wb") as out:
  240. backup.run(out)
  241. sys.stdout.flush()
  242. else:
  243. borg = subprocess.Popen([borg_sh,
  244. "create",
  245. "--verbose",
  246. "--progress",
  247. "--log-json",
  248. "--list",
  249. "--filter", "E",
  250. "--stats",
  251. "--checkpoint-interval", "900",
  252. "--compression", "zstd,3",
  253. "--paths-from-stdin",
  254. "--paths-delimiter", "\\0",
  255. "::" + hostname + "-{now:%Y%m%d-%H%M%S}"],
  256. stdin=subprocess.PIPE,
  257. stdout=subprocess.PIPE,
  258. stderr=subprocess.STDOUT)
  259. if borg.stdin is None:
  260. raise Exception("no pipe")
  261. borg_saw_warnings = 0
  262. borg_saw_errors = 0
  263. # Use a thread to capture output
  264. def reader_thread(fh):
  265. last_progress = 0
  266. for line in fh:
  267. try:
  268. data = json.loads(line)
  269. if ((data['type'] == 'log_message' or
  270. data['type'] == 'progress_message')
  271. and 'message' in data):
  272. # Count warnings and errors, but ignore some.
  273. changed_msg = "file changed while we backed it up"
  274. if data['levelname'] == 'WARNING':
  275. prefix = "warning: "
  276. if changed_msg not in data['message']:
  277. borg_saw_warnings += 1
  278. elif data['levelname'] not in ('DEBUG', 'INFO'):
  279. prefix = "error: "
  280. borg_saw_errors += 1
  281. else:
  282. prefix = ""
  283. line = (prefix + data['message'] + '\n').encode()
  284. elif data['type'] == 'archive_progress':
  285. now = time.time()
  286. if now - last_progress > 10:
  287. last_progress = now
  288. def size(short: str, full: str) -> str:
  289. return f" {short}={format_size(data[full])}"
  290. line = (f"progress:" +
  291. f" files={data['nfiles']}" +
  292. size('orig', 'original_size') +
  293. size('comp', 'compressed_size') +
  294. size('dedup', 'deduplicated_size') +
  295. f" path={data['path']}" +
  296. "\n").encode()
  297. else:
  298. continue
  299. else:
  300. # ignore unknown progress line
  301. continue
  302. except Exception as e:
  303. # on error, print raw line
  304. pass
  305. sys.stdout.buffer.write(line)
  306. sys.stdout.flush()
  307. captured_output.append(line)
  308. fh.close()
  309. reader = threading.Thread(target=reader_thread, args=(borg.stdout,))
  310. reader.daemon = True
  311. reader.start()
  312. try:
  313. # Give borg some time to start, just to clean up stdout
  314. time.sleep(1)
  315. backup.run(borg.stdin)
  316. except BrokenPipeError:
  317. sys.stderr.write(f"broken pipe\n")
  318. finally:
  319. try:
  320. borg.stdin.close()
  321. except BrokenPipeError:
  322. pass
  323. borg.wait()
  324. reader.join()
  325. ret = borg.returncode
  326. if ret < 0:
  327. backup.log('E', f"borg exited with signal {-ret}")
  328. elif ret == 2 or borg_saw_errors:
  329. backup.log('E', f"borg exited with errors (ret={ret})")
  330. elif ret == 1 and borg_saw_warnings:
  331. backup.log('W', f"borg exited with warnings (ret={ret})")
  332. elif ret != 0:
  333. backup.log('E', f"borg exited with unknown error code {ret}")
  334. # See if we had any errors
  335. warnings = sum(1 for (letter, msg) in backup.logs if letter == 'W')
  336. errors = sum(1 for (letter, msg) in backup.logs if letter == 'E')
  337. def plural(num: int, word: str) -> str:
  338. suffix = "" if num == 1 else "s"
  339. return f"{num} {word}{suffix}"
  340. warnmsg = plural(warnings, "warning") if warnings else None
  341. errmsg = plural(errors, "error") if errors else None
  342. if not warnings and not errors:
  343. backup.log('I', f"backup successful", bold=True)
  344. else:
  345. if warnmsg:
  346. backup.log('W', f"reported {warnmsg}", bold=True)
  347. if errors:
  348. backup.log('E', f"reported {errmsg}", bold=True)
  349. # Send a notification of errors
  350. email = backup.config.notify_email
  351. if email and not args.dry_run:
  352. backup.log('I', f"sending error notification to {email}")
  353. # Show all of our warnings and errors. Use a ">" prefix
  354. # so warnings and errors get highlighted by the mail reader.
  355. body = [ "Logs from backup.py:" ]
  356. for (letter, msg) in backup.logs:
  357. if letter == "E" or letter == "W":
  358. prefix = ">"
  359. else:
  360. prefix = " "
  361. body.append(f"{prefix}{letter}: {msg}")
  362. body_text = "\n".join(body).encode()
  363. # Followed by borg output
  364. body_text += b"\n\nBorg output:\n" + b"".join(captured_output)
  365. # Subject summary
  366. if errmsg and warnmsg:
  367. summary = f"{errmsg}, {warnmsg}"
  368. elif errors:
  369. summary = errmsg or ""
  370. else:
  371. summary = warnmsg or ""
  372. # Call notify.sh
  373. res = subprocess.run([notify_sh, summary, email], input=body_text)
  374. if res.returncode != 0:
  375. backup.log('E', f"failed to send notification")
  376. errors += 1
  377. # Exit with an error code if we had any errors
  378. if errors:
  379. return 1
  380. return 0
  381. if __name__ == "__main__":
  382. import sys
  383. raise SystemExit(main(sys.argv))