Compare commits

..

7 Commits

Author SHA1 Message Date
43ceb39120 backup: support multiple roots; remove "relative absolute path" nonsense
Support multiple roots in config file, not just one.
The absolute path stuff before would match against exclusions/inclusions
based on paths from the root dir, but that doesn't make sense when we
have multiple roots, and added needless complexity.
2021-10-14 12:33:07 -04:00
35c72e7ce6 backup: calculate size only once
We need to calculate size so we get an idea of actual used disk space
(which is closer to how much maximum space will be used in the backup,
in case files have huge holes).  Calculate it once to avoid errors.
2021-10-14 12:33:07 -04:00
27213033a2 backup: use decorated paths for matching patterns
By ensuring that directory names end in '/', the behavior of
"match only directories if the pattern ends with /" comes for
free based on how wcmatch.glob works, so we don't need to run
the regex match twice.
2021-10-14 12:33:07 -04:00
5152a316c6 backup: use helper to format binary paths as strings 2021-10-14 12:33:07 -04:00
46195daaaa Improve borg process spawning and result checking 2021-10-14 12:33:07 -04:00
ffe13a45e6 Add --debug option 2021-10-14 12:33:07 -04:00
34817890b2 Update README cheat sheet 2021-10-14 12:33:07 -04:00
3 changed files with 94 additions and 52 deletions

View File

@ -29,9 +29,13 @@ See when next backup is scheduled:
systemctl list-timers borg-backup.timer systemctl list-timers borg-backup.timer
See progress of most recent backup: See status of most recent backup:
systemctl status -l -n 99999 borg-backup systemctl status --full --lines 999999 --no-pager --all borg-backup
Watch log:
journalctl --all --follow --unit borg-backup
Start backup now: Start backup now:

126
backup.py
View File

@ -9,6 +9,7 @@ import os
import re import re
import sys import sys
import stat import stat
import time
import pathlib import pathlib
import subprocess import subprocess
@ -18,8 +19,11 @@ import yaml
import wcmatch.glob # type: ignore import wcmatch.glob # type: ignore
import humanfriendly # type: ignore import humanfriendly # type: ignore
def pstr(path: bytes) -> str:
return path.decode(errors='backslashreplace')
class Config: class Config:
root: bytes roots: list[bytes]
max_file_size: typing.Optional[int] max_file_size: typing.Optional[int]
one_file_system: bool one_file_system: bool
exclude_caches: bool exclude_caches: bool
@ -31,7 +35,6 @@ class Config:
# Read config # Read config
with open(configfile, 'r') as f: with open(configfile, 'r') as f:
config = yaml.safe_load(f) config = yaml.safe_load(f)
self.root = config['root'].encode()
self.one_file_system = config.get('one-file-system', False) self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False) self.exclude_caches = config.get('exclude-caches', False)
@ -41,6 +44,14 @@ class Config:
else: else:
self.max_file_size = None self.max_file_size = None
raw = config.get('roots', '').encode().split(b'\n')
self.roots = []
for x in raw:
if not len(x):
continue
self.roots.append(x)
self.roots.sort(key=len)
def process_match_list(config_name): def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n') raw = config.get(config_name, '').encode().split(b'\n')
pats = [] pats = []
@ -77,12 +88,7 @@ class Config:
[ re.compile(x) for x in b ]) [ re.compile(x) for x in b ])
def match_re(self, re: tuple[list[typing.Pattern], def match_re(self, re: tuple[list[typing.Pattern],
list[typing.Pattern]], list[typing.Pattern]], path: bytes):
path: bytes, is_dir: bool):
# If it's a directory, try matching against a trailing slash
# first.
if is_dir and self.match_re(re, path + b'/', False):
return True
# Path matches if it matches at least one regex in # Path matches if it matches at least one regex in
# re[0] and no regex in re[1]. # re[0] and no regex in re[1].
for a in re[0]: for a in re[0]:
@ -97,6 +103,7 @@ class Backup:
def __init__(self, config: Config, dry_run: bool): def __init__(self, config: Config, dry_run: bool):
self.config = config self.config = config
self.dry_run = dry_run self.dry_run = dry_run
self.root_seen: dict[bytes, bool] = {}
# All logged messages, with severity # All logged messages, with severity
self.logs: list[tuple[str, str]] = [] self.logs: list[tuple[str, str]] = []
@ -115,41 +122,44 @@ class Backup:
def run(self, outfile: typing.IO[bytes]): def run(self, outfile: typing.IO[bytes]):
self.outfile = outfile self.outfile = outfile
# Base should not end with a slash, but full path should for root in self.config.roots:
if self.config.root.endswith(b'/'): if root in self.root_seen:
base = self.config.root[:-1] self.log('I', f"ignoring root, already seen: {pstr(root)}")
path = self.config.root continue
else:
base = self.config.root
path = self.config.root + b'/'
self.scan(base, path)
def scan(self, base: bytes, path: bytes, try:
parent_st: os.stat_result=None): st = os.lstat(root)
if not stat.S_ISDIR(st.st_mode):
raise NotADirectoryError
except FileNotFoundError:
self.log('W', f"ignoring root, does not exist: {pstr(root)}")
continue
except NotADirectoryError:
self.log('W', f"ignoring root, not a directory: {pstr(root)}")
continue
self.log('I', f"processing root {pstr(root)}")
self.scan(root)
def scan(self, path: bytes, parent_st: os.stat_result=None):
"""If the given path should be backed up, print it. If it's """If the given path should be backed up, print it. If it's
a directory and its contents should be included, recurse. a directory and its contents should be included, recurse.
""" """
if base.endswith(b'/'):
raise Exception("base must not end with /")
relpath = path[len(base):]
if not relpath.startswith(b'/'):
raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
+ f" must start with /")
# Copy the path in string form, for logging. Otherwise, we use
# bytes directly.
pathstr = path.decode(errors='backslashreplace')
try: try:
st = os.lstat(path) st = os.lstat(path)
is_dir = stat.S_ISDIR(st.st_mode) is_dir = stat.S_ISDIR(st.st_mode)
is_reg = stat.S_ISREG(st.st_mode) is_reg = stat.S_ISREG(st.st_mode)
size = st.st_blocks * 512
# Decorated path ends with a '/' if it's a directory.
decorated_path = path
if is_dir and not decorated_path.endswith(b'/'):
decorated_path += b'/'
# See if there's a reason to exclude it # See if there's a reason to exclude it
exclude_reason = None exclude_reason = None
if self.config.match_re(self.config.exclude_re, relpath, is_dir): if self.config.match_re(self.config.exclude_re, decorated_path):
# Config file says to exclude # Config file says to exclude
exclude_reason = ('I', f"skipping, excluded by config file") exclude_reason = ('I', f"skipping, excluded by config file")
@ -160,23 +170,24 @@ class Backup:
# Crosses a mount point # Crosses a mount point
exclude_reason = ('I', "skipping, on different filesystem") exclude_reason = ('I', "skipping, on different filesystem")
elif (self.config.max_file_size elif (is_reg
and is_reg and self.config.max_file_size
and (st.st_blocks * 512) > self.config.max_file_size): and size > self.config.max_file_size):
# Too big # Too big
def format_size(n): def format_size(n):
return humanfriendly.format_size( return humanfriendly.format_size(
n, keep_width=True, binary=True) n, keep_width=True, binary=True)
a = format_size(st.st_blocks * 512) a = format_size(size)
b = format_size(self.config.max_file_size) b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}") exclude_reason = ('W', f"file size {a} exceeds limit {b}")
# If we have a reason to exclude it, stop now unless it's # If we have a reason to exclude it, stop now unless it's
# force-included # force-included
force = self.config.match_re( force = self.config.match_re(self.config.force_include_re,
self.config.force_include_re, relpath, is_dir) decorated_path)
if exclude_reason and not force: if exclude_reason and not force:
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}") self.log(exclude_reason[0],
f"{exclude_reason[1]}: {pstr(path)}")
return return
# Print path for Borg # Print path for Borg
@ -185,6 +196,11 @@ class Backup:
# Process directories # Process directories
if is_dir: if is_dir:
if path in self.config.roots:
self.root_seen[path] = True
if decorated_path in self.config.roots:
self.root_seen[decorated_path] = True
# Skip if it contains CACHEDIR.TAG # Skip if it contains CACHEDIR.TAG
# (mirroring the --exclude-caches borg option) # (mirroring the --exclude-caches borg option)
if self.config.exclude_caches: if self.config.exclude_caches:
@ -193,7 +209,7 @@ class Backup:
with open(path + b'/CACHEDIR.TAG', 'rb') as f: with open(path + b'/CACHEDIR.TAG', 'rb') as f:
if f.read(len(tag)) == tag: if f.read(len(tag)) == tag:
self.log( self.log(
'I', f"skipping, cache dir: {pathstr}") 'I', f"skipping, cache dir: {pstr(path)}")
return return
except: except:
pass pass
@ -201,11 +217,10 @@ class Backup:
# Recurse # Recurse
with os.scandir(path) as it: with os.scandir(path) as it:
for entry in it: for entry in it:
self.scan(base=base, path=entry.path, self.scan(path=entry.path, parent_st=st)
parent_st=st)
except PermissionError as e: except PermissionError as e:
self.log('E', f"can't read {pathstr}") self.log('E', f"can't read {pstr(path)}")
return return
def main(argv: list[str]): def main(argv: list[str]):
@ -225,15 +240,20 @@ def main(argv: list[str]):
parser.add_argument('-b', '--borg', parser.add_argument('-b', '--borg',
help="Borg command", default=str(base / "borg.sh")) help="Borg command", default=str(base / "borg.sh"))
parser.add_argument('-n', '--dry-run', action="store_true", parser.add_argument('-n', '--dry-run', action="store_true",
help="Just print filenames, don't run borg") help="Just print log output, don't run borg")
parser.add_argument('-d', '--debug', action="store_true",
help="Print filenames for --dry-run")
args = parser.parse_args() args = parser.parse_args()
config = Config(args.config) config = Config(args.config)
backup = Backup(config, args.dry_run) backup = Backup(config, args.dry_run)
if args.dry_run: if args.dry_run:
with open(os.devnull, "wb") as out: if args.debug:
backup.run(out) backup.run(sys.stdout.buffer)
else:
with open(os.devnull, "wb") as out:
backup.run(out)
else: else:
borg = subprocess.Popen([args.borg, borg = subprocess.Popen([args.borg,
"create", "create",
@ -249,9 +269,19 @@ def main(argv: list[str]):
stdin=subprocess.PIPE) stdin=subprocess.PIPE)
if borg.stdin is None: if borg.stdin is None:
raise Exception("no pipe") raise Exception("no pipe")
backup.run(borg.stdin) try:
borg.stdin.close() # Give borg some time to start, just to clean up stdout
ret = borg.wait() time.sleep(2)
backup.run(borg.stdin)
except BrokenPipeError:
sys.stderr.write(f"broken pipe\n")
finally:
try:
borg.stdin.close()
except BrokenPipeError:
pass
borg.wait()
ret = borg.returncode
if ret < 0: if ret < 0:
sys.stderr.write(f"error: process exited with signal {-ret}\n") sys.stderr.write(f"error: process exited with signal {-ret}\n")
return 1 return 1

View File

@ -1,4 +1,13 @@
root: "/" # List multiple roots, in case they come from different file systems.
# Any paths already included by another root will be excluded, so it's
# OK if these paths actually live on the same filesystem.
roots: |
/
/boot
/efi
/usr
/var
one-file-system: true one-file-system: true
exclude-caches: true exclude-caches: true
@ -9,7 +18,6 @@ exclude-caches: true
max-file-size: 500MiB max-file-size: 500MiB
# Files/dirs to exclude from backup. # Files/dirs to exclude from backup.
# Absolute paths here start at the root directory.
# Relative paths are treated as if starting with **/ # Relative paths are treated as if starting with **/
# Paths ending in / will only match directories. # Paths ending in / will only match directories.
exclude: | exclude: |