Compare commits

..

No commits in common. "43ceb3912012e64d91def9fe1435feb7b7a7a9e4" and "0af42b82178748f21e7f1fbb438d866318e17669" have entirely different histories.

3 changed files with 52 additions and 94 deletions

View File

@ -29,13 +29,9 @@ See when next backup is scheduled:
systemctl list-timers borg-backup.timer
See status of most recent backup:
See progress of most recent backup:
systemctl status --full --lines 999999 --no-pager --all borg-backup
Watch log:
journalctl --all --follow --unit borg-backup
systemctl status -l -n 99999 borg-backup
Start backup now:

118
backup.py
View File

@ -9,7 +9,6 @@ import os
import re
import sys
import stat
import time
import pathlib
import subprocess
@ -19,11 +18,8 @@ import yaml
import wcmatch.glob # type: ignore
import humanfriendly # type: ignore
def pstr(path: bytes) -> str:
return path.decode(errors='backslashreplace')
class Config:
roots: list[bytes]
root: bytes
max_file_size: typing.Optional[int]
one_file_system: bool
exclude_caches: bool
@ -35,6 +31,7 @@ class Config:
# Read config
with open(configfile, 'r') as f:
config = yaml.safe_load(f)
self.root = config['root'].encode()
self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False)
@ -44,14 +41,6 @@ class Config:
else:
self.max_file_size = None
raw = config.get('roots', '').encode().split(b'\n')
self.roots = []
for x in raw:
if not len(x):
continue
self.roots.append(x)
self.roots.sort(key=len)
def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n')
pats = []
@ -88,7 +77,12 @@ class Config:
[ re.compile(x) for x in b ])
def match_re(self, re: tuple[list[typing.Pattern],
list[typing.Pattern]], path: bytes):
list[typing.Pattern]],
path: bytes, is_dir: bool):
# If it's a directory, try matching against a trailing slash
# first.
if is_dir and self.match_re(re, path + b'/', False):
return True
# Path matches if it matches at least one regex in
# re[0] and no regex in re[1].
for a in re[0]:
@ -103,7 +97,6 @@ class Backup:
def __init__(self, config: Config, dry_run: bool):
self.config = config
self.dry_run = dry_run
self.root_seen: dict[bytes, bool] = {}
# All logged messages, with severity
self.logs: list[tuple[str, str]] = []
@ -122,44 +115,41 @@ class Backup:
def run(self, outfile: typing.IO[bytes]):
self.outfile = outfile
for root in self.config.roots:
if root in self.root_seen:
self.log('I', f"ignoring root, already seen: {pstr(root)}")
continue
# Base should not end with a slash, but full path should
if self.config.root.endswith(b'/'):
base = self.config.root[:-1]
path = self.config.root
else:
base = self.config.root
path = self.config.root + b'/'
self.scan(base, path)
try:
st = os.lstat(root)
if not stat.S_ISDIR(st.st_mode):
raise NotADirectoryError
except FileNotFoundError:
self.log('W', f"ignoring root, does not exist: {pstr(root)}")
continue
except NotADirectoryError:
self.log('W', f"ignoring root, not a directory: {pstr(root)}")
continue
self.log('I', f"processing root {pstr(root)}")
self.scan(root)
def scan(self, path: bytes, parent_st: os.stat_result=None):
def scan(self, base: bytes, path: bytes,
parent_st: os.stat_result=None):
"""If the given path should be backed up, print it. If it's
a directory and its contents should be included, recurse.
"""
if base.endswith(b'/'):
raise Exception("base must not end with /")
relpath = path[len(base):]
if not relpath.startswith(b'/'):
raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
+ f" must start with /")
# Copy the path in string form, for logging. Otherwise, we use
# bytes directly.
pathstr = path.decode(errors='backslashreplace')
try:
st = os.lstat(path)
is_dir = stat.S_ISDIR(st.st_mode)
is_reg = stat.S_ISREG(st.st_mode)
size = st.st_blocks * 512
# Decorated path ends with a '/' if it's a directory.
decorated_path = path
if is_dir and not decorated_path.endswith(b'/'):
decorated_path += b'/'
# See if there's a reason to exclude it
exclude_reason = None
if self.config.match_re(self.config.exclude_re, decorated_path):
if self.config.match_re(self.config.exclude_re, relpath, is_dir):
# Config file says to exclude
exclude_reason = ('I', f"skipping, excluded by config file")
@ -170,24 +160,23 @@ class Backup:
# Crosses a mount point
exclude_reason = ('I', "skipping, on different filesystem")
elif (is_reg
and self.config.max_file_size
and size > self.config.max_file_size):
elif (self.config.max_file_size
and is_reg
and (st.st_blocks * 512) > self.config.max_file_size):
# Too big
def format_size(n):
return humanfriendly.format_size(
n, keep_width=True, binary=True)
a = format_size(size)
a = format_size(st.st_blocks * 512)
b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
# If we have a reason to exclude it, stop now unless it's
# force-included
force = self.config.match_re(self.config.force_include_re,
decorated_path)
force = self.config.match_re(
self.config.force_include_re, relpath, is_dir)
if exclude_reason and not force:
self.log(exclude_reason[0],
f"{exclude_reason[1]}: {pstr(path)}")
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
return
# Print path for Borg
@ -196,11 +185,6 @@ class Backup:
# Process directories
if is_dir:
if path in self.config.roots:
self.root_seen[path] = True
if decorated_path in self.config.roots:
self.root_seen[decorated_path] = True
# Skip if it contains CACHEDIR.TAG
# (mirroring the --exclude-caches borg option)
if self.config.exclude_caches:
@ -209,7 +193,7 @@ class Backup:
with open(path + b'/CACHEDIR.TAG', 'rb') as f:
if f.read(len(tag)) == tag:
self.log(
'I', f"skipping, cache dir: {pstr(path)}")
'I', f"skipping, cache dir: {pathstr}")
return
except:
pass
@ -217,10 +201,11 @@ class Backup:
# Recurse
with os.scandir(path) as it:
for entry in it:
self.scan(path=entry.path, parent_st=st)
self.scan(base=base, path=entry.path,
parent_st=st)
except PermissionError as e:
self.log('E', f"can't read {pstr(path)}")
self.log('E', f"can't read {pathstr}")
return
def main(argv: list[str]):
@ -240,18 +225,13 @@ def main(argv: list[str]):
parser.add_argument('-b', '--borg',
help="Borg command", default=str(base / "borg.sh"))
parser.add_argument('-n', '--dry-run', action="store_true",
help="Just print log output, don't run borg")
parser.add_argument('-d', '--debug', action="store_true",
help="Print filenames for --dry-run")
help="Just print filenames, don't run borg")
args = parser.parse_args()
config = Config(args.config)
backup = Backup(config, args.dry_run)
if args.dry_run:
if args.debug:
backup.run(sys.stdout.buffer)
else:
with open(os.devnull, "wb") as out:
backup.run(out)
else:
@ -269,19 +249,9 @@ def main(argv: list[str]):
stdin=subprocess.PIPE)
if borg.stdin is None:
raise Exception("no pipe")
try:
# Give borg some time to start, just to clean up stdout
time.sleep(2)
backup.run(borg.stdin)
except BrokenPipeError:
sys.stderr.write(f"broken pipe\n")
finally:
try:
borg.stdin.close()
except BrokenPipeError:
pass
borg.wait()
ret = borg.returncode
ret = borg.wait()
if ret < 0:
sys.stderr.write(f"error: process exited with signal {-ret}\n")
return 1

View File

@ -1,13 +1,4 @@
# List multiple roots, in case they come from different file systems.
# Any paths already included by another root will be excluded, so it's
# OK if these paths actually live on the same filesystem.
roots: |
/
/boot
/efi
/usr
/var
root: "/"
one-file-system: true
exclude-caches: true
@ -18,6 +9,7 @@ exclude-caches: true
max-file-size: 500MiB
# Files/dirs to exclude from backup.
# Absolute paths here start at the root directory.
# Relative paths are treated as if starting with **/
# Paths ending in / will only match directories.
exclude: |