My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

backup.py 9.9 KiB

10 months ago
10 months ago
10 months ago
10 months ago
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import stat
  10. import time
  11. import pathlib
  12. import subprocess
  13. import typing
  14. import yaml
  15. import wcmatch.glob # type: ignore
  16. import humanfriendly # type: ignore
  17. def pstr(path: bytes) -> str:
  18. return path.decode(errors='backslashreplace')
  19. class Config:
  20. root: bytes
  21. max_file_size: typing.Optional[int]
  22. one_file_system: bool
  23. exclude_caches: bool
  24. exclude: list[bytes]
  25. force_include: list[bytes]
  26. notify_email: typing.Optional[str]
  27. def __init__(self, configfile: str):
  28. # Read config
  29. with open(configfile, 'r') as f:
  30. config = yaml.safe_load(f)
  31. self.root = config['root'].encode()
  32. self.one_file_system = config.get('one-file-system', False)
  33. self.exclude_caches = config.get('exclude-caches', False)
  34. if 'max-file-size' in config:
  35. self.max_file_size = humanfriendly.parse_size(
  36. config['max-file-size'])
  37. else:
  38. self.max_file_size = None
  39. def process_match_list(config_name):
  40. raw = config.get(config_name, '').encode().split(b'\n')
  41. pats = []
  42. # Prepend '**/' to any relative patterns
  43. for x in raw:
  44. if not len(x):
  45. continue
  46. if x.startswith(b'/'):
  47. pats.append(x)
  48. else:
  49. pats.append(b'**/' + x)
  50. return pats
  51. self.exclude = process_match_list('exclude')
  52. self.force_include = process_match_list('force-include')
  53. self.notify_email = config.get('notify-email', None)
  54. # Compile patterns
  55. flags = (wcmatch.glob.GLOBSTAR |
  56. wcmatch.glob.DOTGLOB |
  57. wcmatch.glob.NODOTDIR |
  58. wcmatch.glob.EXTGLOB |
  59. wcmatch.glob.BRACE)
  60. # Path matches if it matches at least one regex in "a" and no
  61. # regex in "b"
  62. (a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
  63. self.exclude_re = ([ re.compile(x) for x in a ],
  64. [ re.compile(x) for x in b ])
  65. (a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
  66. self.force_include_re = ([ re.compile(x) for x in a ],
  67. [ re.compile(x) for x in b ])
  68. def match_re(self, re: tuple[list[typing.Pattern],
  69. list[typing.Pattern]], path: bytes):
  70. # Path matches if it matches at least one regex in
  71. # re[0] and no regex in re[1].
  72. for a in re[0]:
  73. if a.match(path):
  74. for b in re[1]:
  75. if b.match(path):
  76. return False
  77. return True
  78. return False
  79. class Backup:
  80. def __init__(self, config: Config, dry_run: bool):
  81. self.config = config
  82. self.dry_run = dry_run
  83. # All logged messages, with severity
  84. self.logs: list[tuple[str, str]] = []
  85. def out(self, path: bytes):
  86. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  87. def log(self, letter: str, msg: str):
  88. colors = { 'E': 31, 'W': 33, 'I': 36 };
  89. if letter in colors:
  90. c = colors[letter]
  91. else:
  92. c = 0
  93. sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
  94. self.logs.append((letter, msg))
  95. def run(self, outfile: typing.IO[bytes]):
  96. self.outfile = outfile
  97. # Base should not end with a slash, but full path should
  98. if self.config.root.endswith(b'/'):
  99. base = self.config.root[:-1]
  100. path = self.config.root
  101. else:
  102. base = self.config.root
  103. path = self.config.root + b'/'
  104. self.scan(base, path)
  105. def scan(self, base: bytes, path: bytes,
  106. parent_st: os.stat_result=None):
  107. """If the given path should be backed up, print it. If it's
  108. a directory and its contents should be included, recurse.
  109. """
  110. if base.endswith(b'/'):
  111. raise Exception("base must not end with /")
  112. relpath = path[len(base):]
  113. if not relpath.startswith(b'/'):
  114. raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
  115. + f" must start with /")
  116. try:
  117. st = os.lstat(path)
  118. is_dir = stat.S_ISDIR(st.st_mode)
  119. is_reg = stat.S_ISREG(st.st_mode)
  120. size = st.st_blocks * 512
  121. # Decorated path ends with a '/' if it's a directory.
  122. decorated_path = path
  123. if is_dir and not decorated_path.endswith(b'/'):
  124. decorated_path += b'/'
  125. # See if there's a reason to exclude it
  126. exclude_reason = None
  127. if self.config.match_re(self.config.exclude_re, decorated_path):
  128. # Config file says to exclude
  129. exclude_reason = ('I', f"skipping, excluded by config file")
  130. elif (self.config.one_file_system
  131. and parent_st is not None
  132. and is_dir
  133. and st.st_dev != parent_st.st_dev):
  134. # Crosses a mount point
  135. exclude_reason = ('I', "skipping, on different filesystem")
  136. elif (is_reg
  137. and self.config.max_file_size
  138. and size > self.config.max_file_size):
  139. # Too big
  140. def format_size(n):
  141. return humanfriendly.format_size(
  142. n, keep_width=True, binary=True)
  143. a = format_size(size)
  144. b = format_size(self.config.max_file_size)
  145. exclude_reason = ('W', f"file size {a} exceeds limit {b}")
  146. # If we have a reason to exclude it, stop now unless it's
  147. # force-included
  148. force = self.config.match_re(self.config.force_include_re,
  149. decorated_path)
  150. if exclude_reason and not force:
  151. self.log(exclude_reason[0],
  152. f"{exclude_reason[1]}: {pstr(path)}")
  153. return
  154. # Print path for Borg
  155. self.out(path)
  156. # Process directories
  157. if is_dir:
  158. # Skip if it contains CACHEDIR.TAG
  159. # (mirroring the --exclude-caches borg option)
  160. if self.config.exclude_caches:
  161. try:
  162. tag = b'Signature: 8a477f597d28d172789f06886806bc55'
  163. with open(path + b'/CACHEDIR.TAG', 'rb') as f:
  164. if f.read(len(tag)) == tag:
  165. self.log(
  166. 'I', f"skipping, cache dir: {pstr(path)}")
  167. return
  168. except:
  169. pass
  170. # Recurse
  171. with os.scandir(path) as it:
  172. for entry in it:
  173. self.scan(base=base, path=entry.path,
  174. parent_st=st)
  175. except PermissionError as e:
  176. self.log('E', f"can't read {pstr(path)}")
  177. return
  178. def main(argv: list[str]):
  179. import argparse
  180. def humansize(string):
  181. return humanfriendly.parse_size(string)
  182. parser = argparse.ArgumentParser(
  183. prog=argv[0],
  184. description="Back up the local system using borg",
  185. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  186. base = pathlib.Path(__file__).parent
  187. parser.add_argument('-c', '--config',
  188. help="Config file", default=str(base / "config.yaml"))
  189. parser.add_argument('-b', '--borg',
  190. help="Borg command", default=str(base / "borg.sh"))
  191. parser.add_argument('-n', '--dry-run', action="store_true",
  192. help="Just print log output, don't run borg")
  193. parser.add_argument('-d', '--debug', action="store_true",
  194. help="Print filenames for --dry-run")
  195. args = parser.parse_args()
  196. config = Config(args.config)
  197. backup = Backup(config, args.dry_run)
  198. if args.dry_run:
  199. if args.debug:
  200. backup.run(sys.stdout.buffer)
  201. else:
  202. with open(os.devnull, "wb") as out:
  203. backup.run(out)
  204. else:
  205. borg = subprocess.Popen([args.borg,
  206. "create",
  207. "--verbose",
  208. "--list",
  209. "--filter", "E",
  210. "--stats",
  211. "--checkpoint-interval", "900",
  212. "--compression", "zstd,3",
  213. "--paths-from-stdin",
  214. "--paths-delimiter", "\\0",
  215. "::'{hostname}-{now:%Y%m%d-%H%M%S}'"],
  216. stdin=subprocess.PIPE)
  217. if borg.stdin is None:
  218. raise Exception("no pipe")
  219. try:
  220. # Give borg some time to start, just to clean up stdout
  221. time.sleep(2)
  222. backup.run(borg.stdin)
  223. except BrokenPipeError:
  224. sys.stderr.write(f"broken pipe\n")
  225. finally:
  226. try:
  227. borg.stdin.close()
  228. except BrokenPipeError:
  229. pass
  230. borg.wait()
  231. ret = borg.returncode
  232. if ret < 0:
  233. sys.stderr.write(f"error: process exited with signal {-ret}\n")
  234. return 1
  235. elif ret != 0:
  236. sys.stderr.write(f"error: process exited with return code {ret}\n")
  237. return ret
  238. return 0
  239. if __name__ == "__main__":
  240. import sys
  241. raise SystemExit(main(sys.argv))