My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

backup.py 8.9 KiB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import stat
  10. import pathlib
  11. import subprocess
  12. import typing
  13. import yaml
  14. import wcmatch.glob # type: ignore
  15. import humanfriendly # type: ignore
  16. class Config:
  17. root: bytes
  18. max_file_size: typing.Optional[int]
  19. one_file_system: bool
  20. exclude_caches: bool
  21. exclude: list[bytes]
  22. force_include: list[bytes]
  23. notify_email: typing.Optional[str]
  24. def __init__(self, configfile: str):
  25. # Read config
  26. with open(configfile, 'r') as f:
  27. config = yaml.safe_load(f)
  28. self.root = config['root'].encode()
  29. self.one_file_system = config.get('one-file-system', False)
  30. self.exclude_caches = config.get('exclude-caches', False)
  31. if 'max-file-size' in config:
  32. self.max_file_size = humanfriendly.parse_size(
  33. config['max-file-size'])
  34. else:
  35. self.max_file_size = None
  36. def process_match_list(config_name):
  37. raw = config.get(config_name, '').encode().split(b'\n')
  38. pats = []
  39. # Prepend '**/' to any relative patterns
  40. for x in raw:
  41. if not len(x):
  42. continue
  43. if x.startswith(b'/'):
  44. pats.append(x)
  45. else:
  46. pats.append(b'**/' + x)
  47. return pats
  48. self.exclude = process_match_list('exclude')
  49. self.force_include = process_match_list('force-include')
  50. self.notify_email = config.get('notify-email', None)
  51. # Compile patterns
  52. flags = (wcmatch.glob.GLOBSTAR |
  53. wcmatch.glob.DOTGLOB |
  54. wcmatch.glob.NODOTDIR |
  55. wcmatch.glob.EXTGLOB |
  56. wcmatch.glob.BRACE)
  57. # Path matches if it matches at least one regex in "a" and no
  58. # regex in "b"
  59. (a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
  60. self.exclude_re = ([ re.compile(x) for x in a ],
  61. [ re.compile(x) for x in b ])
  62. (a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
  63. self.force_include_re = ([ re.compile(x) for x in a ],
  64. [ re.compile(x) for x in b ])
  65. def match_re(self, re: tuple[list[typing.Pattern],
  66. list[typing.Pattern]],
  67. path: bytes, is_dir: bool):
  68. if b'data.ext4.win000' in path:
  69. print(path)
  70. print(re)
  71. # If it's a directory, try matching against a trailing slash
  72. # first.
  73. if is_dir and self.match_re(re, path + b'/', False):
  74. return True
  75. # Path matches if it matches at least one regex in
  76. # re[0] and no regex in re[1].
  77. for a in re[0]:
  78. if a.match(path):
  79. for b in re[1]:
  80. if b.match(path):
  81. return False
  82. return True
  83. return False
  84. class Backup:
  85. def __init__(self, config: Config, dry_run: bool):
  86. self.config = config
  87. self.dry_run = dry_run
  88. # All logged messages, with severity
  89. self.logs: list[tuple[str, str]] = []
  90. def out(self, path: bytes):
  91. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  92. def log(self, letter: str, msg: str):
  93. colors = { 'E': 31, 'W': 33, 'I': 36 };
  94. if letter in colors:
  95. c = colors[letter]
  96. else:
  97. c = 0
  98. sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
  99. self.logs.append((letter, msg))
  100. def run(self, outfile: typing.BinaryIO):
  101. self.outfile = outfile
  102. # Base should not end with a slash, but full path should
  103. if self.config.root.endswith(b'/'):
  104. base = self.config.root[:-1]
  105. path = self.config.root
  106. else:
  107. base = self.config.root
  108. path = self.config.root + b'/'
  109. self.scan(base, path)
  110. def scan(self, base: bytes, path: bytes,
  111. parent_st: os.stat_result=None):
  112. """If the given path should be backed up, print it. If it's
  113. a directory and its contents should be included, recurse.
  114. """
  115. if base.endswith(b'/'):
  116. raise Exception("base must not end with /")
  117. relpath = path[len(base):]
  118. if not relpath.startswith(b'/'):
  119. raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
  120. + f" must start with /")
  121. # Copy the path in string form, for logging. Otherwise, we use
  122. # bytes directly.
  123. pathstr = path.decode(errors='backslashreplace')
  124. try:
  125. st = os.lstat(path)
  126. is_dir = stat.S_ISDIR(st.st_mode)
  127. is_reg = stat.S_ISREG(st.st_mode)
  128. # See if there's a reason to exclude it
  129. exclude_reason = None
  130. if self.config.match_re(self.config.exclude_re, relpath, is_dir):
  131. # Config file says to exclude
  132. exclude_reason = ('I', f"skipping, excluded by config file")
  133. elif (self.config.one_file_system
  134. and parent_st is not None
  135. and is_dir
  136. and st.st_dev != parent_st.st_dev):
  137. # Crosses a mount point
  138. exclude_reason = ('I', "skipping, on different filesystem")
  139. elif (self.config.max_file_size
  140. and is_reg
  141. and (st.st_blocks * 512) > self.config.max_file_size):
  142. # Too big
  143. def format_size(n):
  144. return humanfriendly.format_size(
  145. n, keep_width=True, binary=True)
  146. a = format_size(st.st_blocks * 512)
  147. b = format_size(self.config.max_file_size)
  148. exclude_reason = ('W', f"file size {a} exceeds limit {b}")
  149. # If we have a reason to exclude it, stop now unless it's
  150. # force-included
  151. force = self.config.match_re(
  152. self.config.force_include_re, relpath, is_dir)
  153. if exclude_reason and not force:
  154. self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
  155. return
  156. # Print path for Borg
  157. self.out(path)
  158. # Process directories
  159. if is_dir:
  160. # Skip if it contains CACHEDIR.TAG
  161. # (mirroring the --exclude-caches borg option)
  162. if self.config.exclude_caches:
  163. try:
  164. tag = b'Signature: 8a477f597d28d172789f06886806bc55'
  165. with open(path + b'/CACHEDIR.TAG', 'rb') as f:
  166. if f.read(len(tag)) == tag:
  167. self.log(
  168. 'I', f"skipping, cache dir: {pathstr}")
  169. return
  170. except:
  171. pass
  172. # Recurse
  173. with os.scandir(path) as it:
  174. for entry in it:
  175. self.scan(base=base, path=entry.path,
  176. parent_st=st)
  177. except PermissionError as e:
  178. self.log('E', f"can't read {pathstr}")
  179. return
  180. def main(argv: list[str]):
  181. import argparse
  182. def humansize(string):
  183. return humanfriendly.parse_size(string)
  184. parser = argparse.ArgumentParser(
  185. prog=argv[0],
  186. description="Back up the local system using borg",
  187. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  188. base = pathlib.Path(__file__).parent
  189. parser.add_argument('-c', '--config',
  190. help="Config file", default=str(base / "config.yaml"))
  191. parser.add_argument('-b', '--borg',
  192. help="Borg command", default=str(base / "borg.sh"))
  193. parser.add_argument('-n', '--dry-run', action="store_true",
  194. help="Just print filenames, don't run borg")
  195. args = parser.parse_args()
  196. config = Config(args.config)
  197. backup = Backup(config, args.dry_run)
  198. if args.dry_run:
  199. with open(os.devnull, "wb") as out:
  200. backup.run(out)
  201. else:
  202. # subprocess.Popen([args.borg,
  203. # "create",
  204. # "--verbose",
  205. # "--list",
  206. # "--filter", "E",
  207. # "--stats",
  208. # "--exclude-caches"])
  209. # --stats \
  210. # --exclude-caches \
  211. # --one-file-system \
  212. # --checkpoint-interval 900 \
  213. # --compression zstd,3 \
  214. # ::'{hostname}-{now:%Y%m%d-%H%M%S}' \
  215. # $BORG_BACKUP_DIRS
  216. backup.run(sys.stdout.buffer)
  217. if __name__ == "__main__":
  218. import sys
  219. main(sys.argv)