Compare commits

...

7 Commits

Author SHA1 Message Date
43ceb39120 backup: support multiple roots; remove "relative absolute path" nonsense
Support multiple roots in config file, not just one.
The absolute path stuff before would match against exclusions/inclusions
based on paths from the root dir, but that doesn't make sense when we
have multiple roots, and added needless complexity.
2021-10-14 12:33:07 -04:00
35c72e7ce6 backup: calculate size only once
We need to calculate size so we get an idea of actual used disk space
(which is closer to how much maximum space will be used in the backup,
in case files have huge holes).  Calculate it once to avoid errors.
2021-10-14 12:33:07 -04:00
27213033a2 backup: use decorated paths for matching patterns
By ensuring that directory names end in '/', the behavior of
"match only directories if the pattern ends with /" comes for
free based on how wcmatch.glob works, so we don't need to run
the regex match twice.
2021-10-14 12:33:07 -04:00
5152a316c6 backup: use helper to format binary paths as strings 2021-10-14 12:33:07 -04:00
46195daaaa Improve borg process spawning and result checking 2021-10-14 12:33:07 -04:00
ffe13a45e6 Add --debug option 2021-10-14 12:33:07 -04:00
34817890b2 Update README cheat sheet 2021-10-14 12:33:07 -04:00
3 changed files with 94 additions and 52 deletions

View File

@ -29,9 +29,13 @@ See when next backup is scheduled:
systemctl list-timers borg-backup.timer
See progress of most recent backup:
See status of most recent backup:
systemctl status -l -n 99999 borg-backup
systemctl status --full --lines 999999 --no-pager --all borg-backup
Watch log:
journalctl --all --follow --unit borg-backup
Start backup now:

126
backup.py
View File

@ -9,6 +9,7 @@ import os
import re
import sys
import stat
import time
import pathlib
import subprocess
@ -18,8 +19,11 @@ import yaml
import wcmatch.glob # type: ignore
import humanfriendly # type: ignore
def pstr(path: bytes) -> str:
return path.decode(errors='backslashreplace')
class Config:
root: bytes
roots: list[bytes]
max_file_size: typing.Optional[int]
one_file_system: bool
exclude_caches: bool
@ -31,7 +35,6 @@ class Config:
# Read config
with open(configfile, 'r') as f:
config = yaml.safe_load(f)
self.root = config['root'].encode()
self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False)
@ -41,6 +44,14 @@ class Config:
else:
self.max_file_size = None
raw = config.get('roots', '').encode().split(b'\n')
self.roots = []
for x in raw:
if not len(x):
continue
self.roots.append(x)
self.roots.sort(key=len)
def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n')
pats = []
@ -77,12 +88,7 @@ class Config:
[ re.compile(x) for x in b ])
def match_re(self, re: tuple[list[typing.Pattern],
list[typing.Pattern]],
path: bytes, is_dir: bool):
# If it's a directory, try matching against a trailing slash
# first.
if is_dir and self.match_re(re, path + b'/', False):
return True
list[typing.Pattern]], path: bytes):
# Path matches if it matches at least one regex in
# re[0] and no regex in re[1].
for a in re[0]:
@ -97,6 +103,7 @@ class Backup:
def __init__(self, config: Config, dry_run: bool):
self.config = config
self.dry_run = dry_run
self.root_seen: dict[bytes, bool] = {}
# All logged messages, with severity
self.logs: list[tuple[str, str]] = []
@ -115,41 +122,44 @@ class Backup:
def run(self, outfile: typing.IO[bytes]):
self.outfile = outfile
# Base should not end with a slash, but full path should
if self.config.root.endswith(b'/'):
base = self.config.root[:-1]
path = self.config.root
else:
base = self.config.root
path = self.config.root + b'/'
self.scan(base, path)
for root in self.config.roots:
if root in self.root_seen:
self.log('I', f"ignoring root, already seen: {pstr(root)}")
continue
def scan(self, base: bytes, path: bytes,
parent_st: os.stat_result=None):
try:
st = os.lstat(root)
if not stat.S_ISDIR(st.st_mode):
raise NotADirectoryError
except FileNotFoundError:
self.log('W', f"ignoring root, does not exist: {pstr(root)}")
continue
except NotADirectoryError:
self.log('W', f"ignoring root, not a directory: {pstr(root)}")
continue
self.log('I', f"processing root {pstr(root)}")
self.scan(root)
def scan(self, path: bytes, parent_st: os.stat_result=None):
"""If the given path should be backed up, print it. If it's
a directory and its contents should be included, recurse.
"""
if base.endswith(b'/'):
raise Exception("base must not end with /")
relpath = path[len(base):]
if not relpath.startswith(b'/'):
raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
+ f" must start with /")
# Copy the path in string form, for logging. Otherwise, we use
# bytes directly.
pathstr = path.decode(errors='backslashreplace')
try:
st = os.lstat(path)
is_dir = stat.S_ISDIR(st.st_mode)
is_reg = stat.S_ISREG(st.st_mode)
size = st.st_blocks * 512
# Decorated path ends with a '/' if it's a directory.
decorated_path = path
if is_dir and not decorated_path.endswith(b'/'):
decorated_path += b'/'
# See if there's a reason to exclude it
exclude_reason = None
if self.config.match_re(self.config.exclude_re, relpath, is_dir):
if self.config.match_re(self.config.exclude_re, decorated_path):
# Config file says to exclude
exclude_reason = ('I', f"skipping, excluded by config file")
@ -160,23 +170,24 @@ class Backup:
# Crosses a mount point
exclude_reason = ('I', "skipping, on different filesystem")
elif (self.config.max_file_size
and is_reg
and (st.st_blocks * 512) > self.config.max_file_size):
elif (is_reg
and self.config.max_file_size
and size > self.config.max_file_size):
# Too big
def format_size(n):
return humanfriendly.format_size(
n, keep_width=True, binary=True)
a = format_size(st.st_blocks * 512)
a = format_size(size)
b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
# If we have a reason to exclude it, stop now unless it's
# force-included
force = self.config.match_re(
self.config.force_include_re, relpath, is_dir)
force = self.config.match_re(self.config.force_include_re,
decorated_path)
if exclude_reason and not force:
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
self.log(exclude_reason[0],
f"{exclude_reason[1]}: {pstr(path)}")
return
# Print path for Borg
@ -185,6 +196,11 @@ class Backup:
# Process directories
if is_dir:
if path in self.config.roots:
self.root_seen[path] = True
if decorated_path in self.config.roots:
self.root_seen[decorated_path] = True
# Skip if it contains CACHEDIR.TAG
# (mirroring the --exclude-caches borg option)
if self.config.exclude_caches:
@ -193,7 +209,7 @@ class Backup:
with open(path + b'/CACHEDIR.TAG', 'rb') as f:
if f.read(len(tag)) == tag:
self.log(
'I', f"skipping, cache dir: {pathstr}")
'I', f"skipping, cache dir: {pstr(path)}")
return
except:
pass
@ -201,11 +217,10 @@ class Backup:
# Recurse
with os.scandir(path) as it:
for entry in it:
self.scan(base=base, path=entry.path,
parent_st=st)
self.scan(path=entry.path, parent_st=st)
except PermissionError as e:
self.log('E', f"can't read {pathstr}")
self.log('E', f"can't read {pstr(path)}")
return
def main(argv: list[str]):
@ -225,15 +240,20 @@ def main(argv: list[str]):
parser.add_argument('-b', '--borg',
help="Borg command", default=str(base / "borg.sh"))
parser.add_argument('-n', '--dry-run', action="store_true",
help="Just print filenames, don't run borg")
help="Just print log output, don't run borg")
parser.add_argument('-d', '--debug', action="store_true",
help="Print filenames for --dry-run")
args = parser.parse_args()
config = Config(args.config)
backup = Backup(config, args.dry_run)
if args.dry_run:
with open(os.devnull, "wb") as out:
backup.run(out)
if args.debug:
backup.run(sys.stdout.buffer)
else:
with open(os.devnull, "wb") as out:
backup.run(out)
else:
borg = subprocess.Popen([args.borg,
"create",
@ -249,9 +269,19 @@ def main(argv: list[str]):
stdin=subprocess.PIPE)
if borg.stdin is None:
raise Exception("no pipe")
backup.run(borg.stdin)
borg.stdin.close()
ret = borg.wait()
try:
# Give borg some time to start, just to clean up stdout
time.sleep(2)
backup.run(borg.stdin)
except BrokenPipeError:
sys.stderr.write(f"broken pipe\n")
finally:
try:
borg.stdin.close()
except BrokenPipeError:
pass
borg.wait()
ret = borg.returncode
if ret < 0:
sys.stderr.write(f"error: process exited with signal {-ret}\n")
return 1

View File

@ -1,4 +1,13 @@
root: "/"
# List multiple roots, in case they come from different file systems.
# Any paths already included by another root will be excluded, so it's
# OK if these paths actually live on the same filesystem.
roots: |
/
/boot
/efi
/usr
/var
one-file-system: true
exclude-caches: true
@ -9,7 +18,6 @@ exclude-caches: true
max-file-size: 500MiB
# Files/dirs to exclude from backup.
# Absolute paths here start at the root directory.
# Relative paths are treated as if starting with **/
# Paths ending in / will only match directories.
exclude: |