Compare commits

...

10 Commits

8 changed files with 345 additions and 146 deletions

8
.gitignore vendored
View File

@ -1,3 +1,7 @@
*.html
Borg.bin
cache/
config/
key.txt
passphrase
ssh/

View File

@ -10,7 +10,7 @@ all:
@echo
.PHONY: ctrl
ctrl: test-setup
ctrl: test-backup
.venv:
mkdir .venv
@ -19,15 +19,16 @@ ctrl: test-setup
.PHONY: test-backup
test-backup: .venv
.venv/bin/mypy backup.py
./backup.py --max-size 1GiB --one-file-system /tmp | grep -a 'bigf'
./backup.py | tr '\0' '\n' #-n
.PHONY: test-setup
test-setup:
shellcheck -f gcc initial-setup.sh
rm -rf /tmp/test-borg
mkdir /tmp/test-borg
: "normally this would be a git clone, but we want the working tree..."
git ls-files -z | tar --null -T - -cf - | tar -C /tmp/test-borg -xvf -
git clone . /tmp/test-borg
#: "normally this would be a git clone, but we want the working tree..."
#git ls-files -z | tar --null -T - -cf - | tar -C /tmp/test-borg -xvf -
/tmp/test-borg/initial-setup.sh
.PHONY: clean

View File

@ -5,9 +5,12 @@ name = "pypi"
[packages]
humanfriendly = "*"
wcmatch = "*"
pyyaml = "*"
[dev-packages]
mypy = "*"
types-pyyaml = "*"
[requires]
python_version = "3"

60
Pipfile.lock generated
View File

@ -1,7 +1,7 @@
{
"_meta": {
"hash": {
"sha256": "4f504c785e3ed5b203a82a5f40516507f80a01b8d1d0ad5a905f139cafc41a51"
"sha256": "902260ee06bc3bac3fe1ea87c09d4fc28e5aceef95635b3c72b43b6905050278"
},
"pipfile-spec": 6,
"requires": {
@ -16,6 +16,13 @@
]
},
"default": {
"bracex": {
"hashes": [
"sha256:01f715cd0ed7a622ec8b32322e715813f7574de531f09b70f6f3b2c10f682425",
"sha256:64e2a6d14de9c8e022cf40539ac8468ba7c4b99550a2b05fc87fd20e392e568f"
],
"version": "==2.1.1"
},
"humanfriendly": {
"hashes": [
"sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477",
@ -23,6 +30,49 @@
],
"index": "pypi",
"version": "==10.0"
},
"pyyaml": {
"hashes": [
"sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf",
"sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696",
"sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393",
"sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77",
"sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922",
"sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5",
"sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8",
"sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10",
"sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc",
"sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018",
"sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e",
"sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253",
"sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347",
"sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183",
"sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541",
"sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb",
"sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185",
"sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc",
"sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db",
"sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa",
"sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46",
"sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122",
"sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b",
"sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63",
"sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df",
"sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc",
"sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247",
"sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6",
"sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"
],
"index": "pypi",
"version": "==5.4.1"
},
"wcmatch": {
"hashes": [
"sha256:4d54ddb506c90b5a5bba3a96a1cfb0bb07127909e19046a71d689ddfb18c3617",
"sha256:9146b1ab9354e0797ef6ef69bc89cb32cb9f46d1b9eeef69c559aeec8f3bffb6"
],
"index": "pypi",
"version": "==8.2"
}
},
"develop": {
@ -69,6 +119,14 @@
],
"version": "==0.10.2"
},
"types-pyyaml": {
"hashes": [
"sha256:1d9e431e9f1f78a65ea957c558535a3b15ad67ea4912bce48a6c1b613dcf81ad",
"sha256:f1d1357168988e45fa20c65aecb3911462246a84809015dd889ebf8b1db74124"
],
"index": "pypi",
"version": "==5.4.10"
},
"typing-extensions": {
"hashes": [
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",

View File

@ -6,15 +6,15 @@ Run on client:
sudo git clone https://git.jim.sh/jim/borg-setup.git /opt/borg
sudo /opt/borg/initial-setup.sh
Customize `/opt/borg/backup.yaml` as desired.
Customize `/opt/borg/config.yaml` as desired.
Cheat sheet
===========
*The copy of this file left on the client will have the variables
in this section filled in automatically*
*After setup, the copy of this file on the client will have the
variables in this section filled in automatically*
### Configuration
@ -98,6 +98,6 @@ Design
/etc/systemd/system/borg-backup.timer -> /opt/borg/borg-backup.timer
- Backup script `/opt/borg/backup.py` uses configuration in
`/opt/borg/backup.yaml` to generate our own list of files, excluding
`/opt/borg/config.yaml` to generate our own list of files, excluding
anything that's too large by default. This requires borg 1.2.0b1
or newer, which is why the setup scripts download a specific version.

332
backup.py
View File

@ -1,62 +1,108 @@
#!.venv/bin/python
# Scan filesystem to generate a list of files to back up, based on a
# configuration file. Pass this list to borg to actually create the
# backup. Execute a notification script on the remote server to
# report the backup status.
import os
import re
import sys
import stat
from typing import Optional, Tuple
import humanfriendly # type: ignore
import pathlib
import subprocess
import typing
import yaml
import wcmatch.glob # type: ignore
import re
import dataclasses
import enum
import humanfriendly # type: ignore
class MatchResult(enum.Enum):
INCLUDE_IF_SIZE_OK = 0
INCLUDE_ALWAYS = 1
EXCLUDE_ALWAYS = 2
class Config:
root: bytes
max_file_size: typing.Optional[int]
one_file_system: bool
exclude_caches: bool
exclude: list[bytes]
force_include: list[bytes]
notify_email: typing.Optional[str]
@dataclasses.dataclass
class PatternRule:
re_inc: list[re.Pattern]
re_exc: list[re.Pattern]
def __init__(self, configfile: str):
# Read config
with open(configfile, 'r') as f:
config = yaml.safe_load(f)
self.root = config['root'].encode()
self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False)
def match(self, path: str) -> Tuple[bool, bool]:
if "big" in path:
print(self, file=sys.stderr)
if 'max-file-size' in config:
self.max_file_size = humanfriendly.parse_size(
config['max-file-size'])
else:
self.max_file_size = None
for inc in self.re_inc:
if inc.match(path):
break
else:
return
def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n')
pats = []
# Prepend '**/' to any relative patterns
for x in raw:
if not len(x):
continue
if x.startswith(b'/'):
pats.append(x)
else:
pats.append(b'**/' + x)
return pats
for exc in self.re_exc:
if exc.match(path):
return False
self.exclude = process_match_list('exclude')
self.force_include = process_match_list('force-include')
self.notify_email = config.get('notify-email', None)
# Compile patterns
flags = (wcmatch.glob.GLOBSTAR |
wcmatch.glob.DOTGLOB |
wcmatch.glob.NODOTDIR |
wcmatch.glob.EXTGLOB |
wcmatch.glob.BRACE)
# Path matches if it matches at least one regex in "a" and no
# regex in "b"
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
self.exclude_re = ([ re.compile(x) for x in a ],
[ re.compile(x) for x in b ])
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
self.force_include_re = ([ re.compile(x) for x in a ],
[ re.compile(x) for x in b ])
def match_re(self, re: tuple[list[typing.Pattern],
list[typing.Pattern]],
path: bytes, is_dir: bool):
# If it's a directory, try matching against a trailing slash
# first.
if is_dir and self.match_re(re, path + b'/', False):
return True
# Path matches if it matches at least one regex in
# re[0] and no regex in re[1].
for a in re[0]:
if a.match(path):
for b in re[1]:
if b.match(path):
return False
return True
return False
class Lister:
def __init__(self, one_file_system: bool, max_size: bool):
self.one_file_system = one_file_system
self.max_size = max_size
if max_size is None:
max_size = float('inf')
self.stdout = os.fdopen(sys.stdout.fileno(), "wb", closefd=False)
class Backup:
def __init__(self, config: Config, dry_run: bool):
self.config = config
self.dry_run = dry_run
# Remember files we've skipped because they were too big, so that
# we can warn again at the end.
self.skipped_size: set[bytes] = set()
# Remember errors
self.skipped_error: set[bytes] = set()
def __del__(self):
self.stdout.close()
# All logged messages, with severity
self.logs: list[tuple[str, str]] = []
def out(self, path: bytes):
# Use '\0\n' as a separator, so that we can both separate it
# cleanly in Borg, and also view it on stdout.
self.stdout.write(path + b'\0\n')
self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
def log(self, letter: str, msg: str):
colors = { 'E': 31, 'W': 33, 'I': 36 };
@ -65,93 +111,104 @@ class Lister:
else:
c = 0
sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
self.logs.append((letter, msg))
def scan(self, path: bytes,
parent_st: os.stat_result=None,
rules: list[PatternRule]=[]):
def run(self, outfile: typing.IO[bytes]):
self.outfile = outfile
# Base should not end with a slash, but full path should
if self.config.root.endswith(b'/'):
base = self.config.root[:-1]
path = self.config.root
else:
base = self.config.root
path = self.config.root + b'/'
self.scan(base, path)
def scan(self, base: bytes, path: bytes,
parent_st: os.stat_result=None):
"""If the given path should be backed up, print it. If it's
a directory and its contents should be included, recurse."""
a directory and its contents should be included, recurse.
"""
# Copy the path in string form, for logging and pathspec
# parsing. Otherwise, we use bytes directly.
if base.endswith(b'/'):
raise Exception("base must not end with /")
relpath = path[len(base):]
if not relpath.startswith(b'/'):
raise Exception(f"relative path (from {repr(base)}, {repr(path)})"
+ f" must start with /")
# Copy the path in string form, for logging. Otherwise, we use
# bytes directly.
pathstr = path.decode(errors='backslashreplace')
try:
# See if we match any rules
for r in rules:
if r.match(pathstr):
self.log('I', f"ignore {pathstr}")
return
# Stat the path
st = os.lstat(path)
is_dir = stat.S_ISDIR(st.st_mode)
is_reg = stat.S_ISREG(st.st_mode)
# See if there's a reason to exclude it
exclude_reason = None
if self.config.match_re(self.config.exclude_re, relpath, is_dir):
# Config file says to exclude
exclude_reason = ('I', f"skipping, excluded by config file")
elif (self.config.one_file_system
and parent_st is not None
and is_dir
and st.st_dev != parent_st.st_dev):
# Crosses a mount point
exclude_reason = ('I', "skipping, on different filesystem")
elif (self.config.max_file_size
and is_reg
and (st.st_blocks * 512) > self.config.max_file_size):
# Too big
def format_size(n):
return humanfriendly.format_size(
n, keep_width=True, binary=True)
a = format_size(st.st_blocks * 512)
b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
# If we have a reason to exclude it, stop now unless it's
# force-included
force = self.config.match_re(
self.config.force_include_re, relpath, is_dir)
if exclude_reason and not force:
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
return
# Print path for Borg
self.out(path)
# Process directories
if is_dir:
# Skip if it crosses a mount point
if self.one_file_system:
if parent_st is not None and st.st_dev != parent_st.st_dev:
self.log('I', f"skipping {pathstr}: "
"on different filesystem")
return
# Add contents of any .nobackup file to our
# parser rules
child_rules = rules
# Skip if it contains CACHEDIR.TAG
# (mirroring the --exclude-caches borg option)
if self.config.exclude_caches:
try:
tag = b'Signature: 8a477f597d28d172789f06886806bc55'
with open(path + b'/CACHEDIR.TAG', 'rb') as f:
if f.read(len(tag)) == tag:
self.log(
'I', f"skipping, cache dir: {pathstr}")
return
except:
pass
try:
def prepend_base(regex):
if regex[0] != '^':
raise Exception(f'bad regex: {regex}')
return '^' + os.path.join(pathstr, '') + regex[1:]
with open(os.path.join(path, b".nobackup")) as f:
rule = PatternRule([], [])
for line in f:
if line[0] == '#':
continue
(inc, exc) = wcmatch.glob.translate(
[ line.rstrip('\r\n') ],
flags=(wcmatch.glob.NEGATE |
wcmatch.glob.GLOBSTAR |
wcmatch.glob.DOTGLOB |
wcmatch.glob.EXTGLOB |
wcmatch.glob.BRACE))
for x in inc:
rule.re_inc.append(re.compile(prepend_base(x)))
for x in exc:
rule.re_exc.append(re.compile(prepend_base(x)))
child_rules.append(rule)
except FileNotFoundError:
pass
# Recurse and process each entry
# Recurse
with os.scandir(path) as it:
for entry in it:
self.scan(entry.path, st, child_rules)
else:
# For regular files, ensure they're not too big
if stat.S_ISREG(st.st_mode) and st.st_size > self.max_size:
def format_size(n):
return humanfriendly.format_size(
n, keep_width=True, binary=True)
a = format_size(st.st_size)
b = format_size(self.max_size)
self.log('W', f"skipping {pathstr}: "
+ f"file size {a} exceeds limit {b}")
self.skipped_size.add(path)
return
# Every other filename gets printed; devices, symlinks, etc
# will get handled by Borg
self.out(path)
self.scan(base=base, path=entry.path,
parent_st=st)
except PermissionError as e:
self.log('E', f"can't read {pathstr}")
self.skipped_error.add(path)
return
def main(argv):
def main(argv: list[str]):
import argparse
def humansize(string):
@ -159,22 +216,51 @@ def main(argv):
parser = argparse.ArgumentParser(
prog=argv[0],
description="Build up a directory and file list for backups")
description="Back up the local system using borg",
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument('-s', '--max-size', type=humansize,
help="Ignore files bigger than this, by default")
parser.add_argument('-x', '--one-file-system', action='store_true',
help="Don't cross mount points when recursing")
parser.add_argument('dirs', metavar='DIR', nargs='+',
help="Root directories to scan recursively")
base = pathlib.Path(__file__).parent
parser.add_argument('-c', '--config',
help="Config file", default=str(base / "config.yaml"))
parser.add_argument('-b', '--borg',
help="Borg command", default=str(base / "borg.sh"))
parser.add_argument('-n', '--dry-run', action="store_true",
help="Just print filenames, don't run borg")
args = parser.parse_args()
config = Config(args.config)
lister = Lister(one_file_system=args.one_file_system,
max_size=args.max_size)
for p in args.dirs:
lister.scan(os.fsencode(p))
backup = Backup(config, args.dry_run)
if args.dry_run:
with open(os.devnull, "wb") as out:
backup.run(out)
else:
borg = subprocess.Popen([args.borg,
"create",
"--verbose",
"--list",
"--filter", "E",
"--stats",
"--checkpoint-interval", "900",
"--compression", "zstd,3",
"--paths-from-stdin",
"--paths-delimiter", "\\0",
"::'{hostname}-{now:%Y%m%d-%H%M%S}'"],
stdin=subprocess.PIPE)
if borg.stdin is None:
raise Exception("no pipe")
backup.run(borg.stdin)
borg.stdin.close()
ret = borg.wait()
if ret < 0:
sys.stderr.write(f"error: process exited with signal {-ret}\n")
return 1
elif ret != 0:
sys.stderr.write(f"error: process exited with return code {ret}\n")
return ret
return 0
if __name__ == "__main__":
import sys
main(sys.argv)
raise SystemExit(main(sys.argv))

30
config.yaml Normal file
View File

@ -0,0 +1,30 @@
root: "/"
one-file-system: true
exclude-caches: true
# Files larger than this are excluded. If a large file isn't
# explicitly mentioned in "excludes" below, it also generates a
# warning. Note that this counts used blocks, so files with large
# holes will still be considered small (since they'll compress easily)
max-file-size: 500MiB
# Files/dirs to exclude from backup.
# Absolute paths here start at the root directory.
# Relative paths are treated as if starting with **/
# Paths ending in / will only match directories.
exclude: |
/var/tmp/
/tmp/
/var/cache/apt/archives/
Steam/steamapps/
Steam/ubuntu*/
.cache/
# Files that are always included, even if they would have been
# excluded due to file size or the "exclude" list.
# Matching rules are the same as above.
force-include: |
.git/objects/pack/*.pack
# Email address for notification at end of backup
notify-email: jim@jim.sh

View File

@ -11,6 +11,7 @@ BORG_SHA256=8dd6c2769d9bf3ca7a65ebf6781302029fc3b15105aff63d33195c007f897360
# Main dir is where this repo was checked out
BORG_DIR="$(realpath "$(dirname "$0")")"
cd "${BORG_DIR}"
# This is named with uppercase so that it doesn't tab-complete for
# "./b<tab>", which should give us "./borg.sh"
@ -29,14 +30,14 @@ trap 'error_handler ${BASH_SOURCE} ${LINENO} $?' ERR
set -o errexit
set -o errtrace
if [ -e "$BORG_DIR/.setup-complete" ]; then
if [ -e ".setup-complete" ]; then
echo "Error: BORG_DIR $BORG_DIR was already set up; giving up."
echo "Use \"git clean\" to return it to original state if desired"
exit 1
fi
# Make a temp dir to work in
TMP=$(mktemp -d --tmpdir="$BORG_DIR")
TMP=$(mktemp -d)
# Install some cleanup handlers
cleanup()
@ -71,7 +72,8 @@ error() { msg 31 "Error:" "$@" ; exit 1 ; }
# Create pip environment
setup_venv()
{
( cd "${BORG_DIR}" && mkdir .venv && pipenv install )
mkdir .venv
pipenv install
}
# Install borg
@ -127,8 +129,8 @@ generate_keys()
{
PASS_SSH=$(print_random_key)
PASS_REPOKEY=$(print_random_key)
echo "$PASS_REPOKEY" > "${BORG_DIR}/passphrase"
chmod 600 "${BORG_DIR}/passphrase"
echo "$PASS_REPOKEY" > passphrase
chmod 600 passphrase
}
# Run a command on the remote host over an existing SSH tunnel
@ -192,6 +194,7 @@ EOF
run_ssh_command "if cmp -s $backup $keys; then rm $backup ; fi"
run_ssh_command "cat >> .ssh/authorized_keys" <<EOF
command="$cmd --append-only",restrict $(cat "$SSH/id_ecdsa_appendonly.pub")
command="borg/notify.sh",restrict $(cat "$SSH/id_ecdsa_appendonly.pub")
command="$cmd",restrict $(cat "$SSH/id_ecdsa.pub")
EOF
@ -216,9 +219,9 @@ create_repo()
export_keys()
{
log "Exporting keys"
$BORG key export --paper '' "${BORG_DIR}/key.txt"
chmod 600 "${BORG_DIR}/key.txt"
cat >>"${BORG_DIR}/key.txt" <<EOF
$BORG key export --paper '' key.txt
chmod 600 key.txt
cat >>key.txt <<EOF
Repository: ${BORG_REPO}
Passphrase: ${PASS_REPOKEY}
@ -291,7 +294,19 @@ update_readme()
-e "s!\${BACKUP_USER}!${BACKUP_USER}!g" \
-e "s!\${BACKUP_HOST}!${BACKUP_HOST}!g" \
-e "s!\${BACKUP_REPO}!${BACKUP_REPO}!g" \
"${BORG_DIR}/README.md"
README.md
}
git_setup()
{
if ! git checkout -b "setup-$(hostname)" ; then
warn "Git setup failed; ignoring"
return
fi
log "Committing local changes to git"
git add README.md borg-backup.service borg-backup.timer borg.sh
git commit -a -m "autocommit after initial setup on $(hostname)"
}
log "Configuration:"
@ -308,6 +323,7 @@ create_repo
export_keys
configure_systemd
update_readme
git_setup
echo
notice "Add these two passwords to Bitwarden:"
@ -319,10 +335,11 @@ notice ""
notice " Name: borg $(hostname)"
notice " Username: repo key"
notice " Password: $PASS_REPOKEY"
notice " Notes: (paste the following key)"
sed -ne '/BORG/,/^$/{/./p}' "${BORG_DIR}/key.txt"
notice ""
notice ""
notice "Test the backup file list with"
notice " sudo ${BORG_DIR}/backup.py --dry-run"
notice "and make any necessary adjustments to:"
notice " ${BORG_DIR}/config.yaml"
echo