My backup scripts and tools
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

208 lines
7.1 KiB

  1. #!.venv/bin/python
  2. # Scan filesystem to generate a list of files to back up, based on a
  3. # configuration file. Pass this list to borg to actually create the
  4. # backup. Execute a notification script on the remote server to
  5. # report the backup status.
  6. import os
  7. import re
  8. import sys
  9. import stat
  10. import pathlib
  11. import typing
  12. import yaml
  13. import wcmatch.glob # type: ignore
  14. import humanfriendly # type: ignore
  15. class Config:
  16. root: str
  17. max_file_size: typing.Optional[int]
  18. one_file_system: bool
  19. exclude: list[bytes]
  20. force_include: list[bytes]
  21. notify_email: typing.Optional[str]
  22. def __init__(self, configfile: str):
  23. # Read config
  24. with open(configfile, 'r') as f:
  25. config = yaml.safe_load(f)
  26. self.root = config['root'].encode()
  27. self.one_file_system = config.get('one-file-system', False)
  28. if 'max-file-size' in config:
  29. self.max_file_size = humanfriendly.parse_size(
  30. config['max-file-size'])
  31. else:
  32. self.max_file_size = None
  33. utf = config.get('exclude', '').encode()
  34. self.exclude = list(filter(len, utf.split(b'\n')))
  35. utf = config.get('force-include', '').encode()
  36. self.force_include = list(filter(len, utf.split(b'\n')))
  37. self.notify_email = config.get('notify-email', None)
  38. # Compile patterns
  39. flags = (wcmatch.glob.GLOBSTAR |
  40. wcmatch.glob.DOTGLOB |
  41. wcmatch.glob.NODOTDIR |
  42. wcmatch.glob.EXTGLOB |
  43. wcmatch.glob.BRACE)
  44. # Path matches if it matches at least one regex in "a" and no
  45. # regex in "b"
  46. (a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
  47. self.exclude_re = ([ re.compile(x) for x in a ],
  48. [ re.compile(x) for x in b ])
  49. (a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
  50. self.force_include_re = ([ re.compile(x) for x in a ],
  51. [ re.compile(x) for x in b ])
  52. def match_compiled(self, re: tuple[list[typing.Pattern],
  53. list[typing.Pattern]],
  54. path: bytes):
  55. # Path matches if it matches at least one regex in
  56. # re[0] and no regex in re[1]
  57. for a in re[0]:
  58. if a.match(path):
  59. for b in re[1]:
  60. if b.match(path):
  61. return False
  62. return True
  63. return False
  64. def __str__(self):
  65. d = { 'root': self.root }
  66. if self.max_file_size:
  67. d['max-file-size'] = self.max_file_size
  68. if self.exclude:
  69. utf = b'\n'.join(self.exclude)
  70. d['exclude'] = utf.decode(errors='backslashreplace')
  71. if self.force_include:
  72. utf = b'\n'.join(self.force_include)
  73. d['force-include'] = utf.decode(errors='backslashreplace')
  74. if self.notify_email:
  75. d['notify-email'] = self.notify_email
  76. return yaml.dump(d, default_flow_style=False)
  77. class Backup:
  78. def __init__(self, config: Config, dry_run: bool, out: typing.BinaryIO):
  79. self.config = config
  80. self.outfile = out
  81. self.dry_run = dry_run
  82. # All logged messages, with severity
  83. self.logs: list[tuple[str, str]] = []
  84. def out(self, path: bytes):
  85. self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
  86. def log(self, letter: str, msg: str):
  87. colors = { 'E': 31, 'W': 33, 'I': 36 };
  88. if letter in colors:
  89. c = colors[letter]
  90. else:
  91. c = 0
  92. sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
  93. self.logs.append((letter, msg))
  94. def run(self):
  95. self.scan(self.config.root)
  96. def scan(self, path: bytes, parent_st: os.stat_result=None):
  97. """If the given path should be backed up, print it. If it's
  98. a directory and its contents should be included, recurse."""
  99. # Copy the path in string form, for logging. Otherwise, we use
  100. # bytes directly.
  101. pathstr = path.decode(errors='backslashreplace')
  102. try:
  103. # See if this path should be excluded or force-included
  104. # Only stat the file when we need it
  105. cached_st = None
  106. def st():
  107. nonlocal cached_st
  108. if not cached_st:
  109. cached_st = os.lstat(path)
  110. return cached_st
  111. # See if there's a reason to exclude it
  112. exclude_reason = None
  113. if self.config.match_compiled(self.config.exclude_re, path):
  114. # Config file says to exclude
  115. exclude_reason = ('I', f"skipping, excluded by config file")
  116. elif (stat.S_ISDIR(st().st_mode)
  117. and self.config.one_file_system
  118. and parent_st is not None
  119. and st().st_dev != parent_st.st_dev):
  120. # Crosses a mount point
  121. exclude_reason = ('I', "skipping, on different filesystem")
  122. elif (stat.S_ISREG(st().st_mode)
  123. and self.config.max_file_size
  124. and st().st_size > self.config.max_file_size):
  125. # Too big
  126. def format_size(n):
  127. return humanfriendly.format_size(
  128. n, keep_width=True, binary=True)
  129. a = format_size(st().st_size)
  130. b = format_size(self.config.max_file_size)
  131. exclude_reason = ('W', f"file size {a} exceeds limit {b}")
  132. # If we have a reason to exclude it, stop now unless it's
  133. # force-included
  134. if (exclude_reason
  135. and not self.config.match_compiled(
  136. self.config.force_include_re, path)):
  137. self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
  138. return
  139. # Print name of this path
  140. self.out(path)
  141. # If it's a directory, recurse
  142. if stat.S_ISDIR(st().st_mode):
  143. with os.scandir(path) as it:
  144. for entry in it:
  145. self.scan(path=entry.path, parent_st=st())
  146. except PermissionError as e:
  147. self.log('E', f"can't read {pathstr}")
  148. return
  149. def main(argv: list[str]):
  150. import argparse
  151. def humansize(string):
  152. return humanfriendly.parse_size(string)
  153. parser = argparse.ArgumentParser(
  154. prog=argv[0],
  155. description="Back up the local system using borg",
  156. formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  157. default_config = str(pathlib.Path(__file__).parent / "config.yaml")
  158. parser.add_argument('-c', '--config',
  159. help="Config file", default=default_config)
  160. parser.add_argument('-n', '--dry-run', action="store_true",
  161. help="Just print filenames, don't run borg")
  162. args = parser.parse_args()
  163. config = Config(args.config)
  164. backup = Backup(config, args.dry_run, sys.stdout.buffer)
  165. backup.run()
  166. if __name__ == "__main__":
  167. import sys
  168. main(sys.argv)