Implement filesystem scanning with configurable filters
This commit is contained in:
parent
6978cfc012
commit
0039ca1ee0
4
Makefile
4
Makefile
|
@ -10,7 +10,7 @@ all:
|
|||
@echo
|
||||
|
||||
.PHONY: ctrl
|
||||
ctrl: test-setup
|
||||
ctrl: test-backup
|
||||
|
||||
.venv:
|
||||
mkdir .venv
|
||||
|
@ -19,7 +19,7 @@ ctrl: test-setup
|
|||
.PHONY: test-backup
|
||||
test-backup: .venv
|
||||
.venv/bin/mypy backup.py
|
||||
./backup.py --max-size 1GiB --one-file-system /tmp | grep -a 'bigf'
|
||||
./backup.py -n >/dev/null
|
||||
|
||||
.PHONY: test-setup
|
||||
test-setup:
|
||||
|
|
3
Pipfile
3
Pipfile
|
@ -5,9 +5,12 @@ name = "pypi"
|
|||
|
||||
[packages]
|
||||
humanfriendly = "*"
|
||||
wcmatch = "*"
|
||||
pyyaml = "*"
|
||||
|
||||
[dev-packages]
|
||||
mypy = "*"
|
||||
types-pyyaml = "*"
|
||||
|
||||
[requires]
|
||||
python_version = "3"
|
||||
|
|
60
Pipfile.lock
generated
60
Pipfile.lock
generated
|
@ -1,7 +1,7 @@
|
|||
{
|
||||
"_meta": {
|
||||
"hash": {
|
||||
"sha256": "4f504c785e3ed5b203a82a5f40516507f80a01b8d1d0ad5a905f139cafc41a51"
|
||||
"sha256": "902260ee06bc3bac3fe1ea87c09d4fc28e5aceef95635b3c72b43b6905050278"
|
||||
},
|
||||
"pipfile-spec": 6,
|
||||
"requires": {
|
||||
|
@ -16,6 +16,13 @@
|
|||
]
|
||||
},
|
||||
"default": {
|
||||
"bracex": {
|
||||
"hashes": [
|
||||
"sha256:01f715cd0ed7a622ec8b32322e715813f7574de531f09b70f6f3b2c10f682425",
|
||||
"sha256:64e2a6d14de9c8e022cf40539ac8468ba7c4b99550a2b05fc87fd20e392e568f"
|
||||
],
|
||||
"version": "==2.1.1"
|
||||
},
|
||||
"humanfriendly": {
|
||||
"hashes": [
|
||||
"sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477",
|
||||
|
@ -23,6 +30,49 @@
|
|||
],
|
||||
"index": "pypi",
|
||||
"version": "==10.0"
|
||||
},
|
||||
"pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:08682f6b72c722394747bddaf0aa62277e02557c0fd1c42cb853016a38f8dedf",
|
||||
"sha256:0f5f5786c0e09baddcd8b4b45f20a7b5d61a7e7e99846e3c799b05c7c53fa696",
|
||||
"sha256:129def1b7c1bf22faffd67b8f3724645203b79d8f4cc81f674654d9902cb4393",
|
||||
"sha256:294db365efa064d00b8d1ef65d8ea2c3426ac366c0c4368d930bf1c5fb497f77",
|
||||
"sha256:3b2b1824fe7112845700f815ff6a489360226a5609b96ec2190a45e62a9fc922",
|
||||
"sha256:3bd0e463264cf257d1ffd2e40223b197271046d09dadf73a0fe82b9c1fc385a5",
|
||||
"sha256:4465124ef1b18d9ace298060f4eccc64b0850899ac4ac53294547536533800c8",
|
||||
"sha256:49d4cdd9065b9b6e206d0595fee27a96b5dd22618e7520c33204a4a3239d5b10",
|
||||
"sha256:4e0583d24c881e14342eaf4ec5fbc97f934b999a6828693a99157fde912540cc",
|
||||
"sha256:5accb17103e43963b80e6f837831f38d314a0495500067cb25afab2e8d7a4018",
|
||||
"sha256:607774cbba28732bfa802b54baa7484215f530991055bb562efbed5b2f20a45e",
|
||||
"sha256:6c78645d400265a062508ae399b60b8c167bf003db364ecb26dcab2bda048253",
|
||||
"sha256:72a01f726a9c7851ca9bfad6fd09ca4e090a023c00945ea05ba1638c09dc3347",
|
||||
"sha256:74c1485f7707cf707a7aef42ef6322b8f97921bd89be2ab6317fd782c2d53183",
|
||||
"sha256:895f61ef02e8fed38159bb70f7e100e00f471eae2bc838cd0f4ebb21e28f8541",
|
||||
"sha256:8c1be557ee92a20f184922c7b6424e8ab6691788e6d86137c5d93c1a6ec1b8fb",
|
||||
"sha256:bb4191dfc9306777bc594117aee052446b3fa88737cd13b7188d0e7aa8162185",
|
||||
"sha256:bfb51918d4ff3d77c1c856a9699f8492c612cde32fd3bcd344af9be34999bfdc",
|
||||
"sha256:c20cfa2d49991c8b4147af39859b167664f2ad4561704ee74c1de03318e898db",
|
||||
"sha256:cb333c16912324fd5f769fff6bc5de372e9e7a202247b48870bc251ed40239aa",
|
||||
"sha256:d2d9808ea7b4af864f35ea216be506ecec180628aced0704e34aca0b040ffe46",
|
||||
"sha256:d483ad4e639292c90170eb6f7783ad19490e7a8defb3e46f97dfe4bacae89122",
|
||||
"sha256:dd5de0646207f053eb0d6c74ae45ba98c3395a571a2891858e87df7c9b9bd51b",
|
||||
"sha256:e1d4970ea66be07ae37a3c2e48b5ec63f7ba6804bdddfdbd3cfd954d25a82e63",
|
||||
"sha256:e4fac90784481d221a8e4b1162afa7c47ed953be40d31ab4629ae917510051df",
|
||||
"sha256:fa5ae20527d8e831e8230cbffd9f8fe952815b2b7dae6ffec25318803a7528fc",
|
||||
"sha256:fd7f6999a8070df521b6384004ef42833b9bd62cfee11a09bda1079b4b704247",
|
||||
"sha256:fdc842473cd33f45ff6bce46aea678a54e3d21f1b61a7750ce3c498eedfe25d6",
|
||||
"sha256:fe69978f3f768926cfa37b867e3843918e012cf83f680806599ddce33c2c68b0"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.4.1"
|
||||
},
|
||||
"wcmatch": {
|
||||
"hashes": [
|
||||
"sha256:4d54ddb506c90b5a5bba3a96a1cfb0bb07127909e19046a71d689ddfb18c3617",
|
||||
"sha256:9146b1ab9354e0797ef6ef69bc89cb32cb9f46d1b9eeef69c559aeec8f3bffb6"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==8.2"
|
||||
}
|
||||
},
|
||||
"develop": {
|
||||
|
@ -69,6 +119,14 @@
|
|||
],
|
||||
"version": "==0.10.2"
|
||||
},
|
||||
"types-pyyaml": {
|
||||
"hashes": [
|
||||
"sha256:1d9e431e9f1f78a65ea957c558535a3b15ad67ea4912bce48a6c1b613dcf81ad",
|
||||
"sha256:f1d1357168988e45fa20c65aecb3911462246a84809015dd889ebf8b1db74124"
|
||||
],
|
||||
"index": "pypi",
|
||||
"version": "==5.4.10"
|
||||
},
|
||||
"typing-extensions": {
|
||||
"hashes": [
|
||||
"sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e",
|
||||
|
|
|
@ -6,7 +6,7 @@ Run on client:
|
|||
sudo git clone https://git.jim.sh/jim/borg-setup.git /opt/borg
|
||||
sudo /opt/borg/initial-setup.sh
|
||||
|
||||
Customize `/opt/borg/backup.yaml` as desired.
|
||||
Customize `/opt/borg/config.yaml` as desired.
|
||||
|
||||
|
||||
|
||||
|
@ -98,6 +98,6 @@ Design
|
|||
/etc/systemd/system/borg-backup.timer -> /opt/borg/borg-backup.timer
|
||||
|
||||
- Backup script `/opt/borg/backup.py` uses configuration in
|
||||
`/opt/borg/backup.yaml` to generate our own list of files, excluding
|
||||
`/opt/borg/config.yaml` to generate our own list of files, excluding
|
||||
anything that's too large by default. This requires borg 1.2.0b1
|
||||
or newer, which is why the setup scripts download a specific version.
|
||||
|
|
273
backup.py
273
backup.py
|
@ -1,62 +1,106 @@
|
|||
#!.venv/bin/python
|
||||
|
||||
# Scan filesystem to generate a list of files to back up, based on a
|
||||
# configuration file. Pass this list to borg to actually create the
|
||||
# backup. Execute a notification script on the remote server to
|
||||
# report the backup status.
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import stat
|
||||
from typing import Optional, Tuple
|
||||
import humanfriendly # type: ignore
|
||||
import pathlib
|
||||
|
||||
import typing
|
||||
|
||||
import yaml
|
||||
import wcmatch.glob # type: ignore
|
||||
import re
|
||||
import dataclasses
|
||||
import enum
|
||||
import humanfriendly # type: ignore
|
||||
|
||||
class MatchResult(enum.Enum):
|
||||
INCLUDE_IF_SIZE_OK = 0
|
||||
INCLUDE_ALWAYS = 1
|
||||
EXCLUDE_ALWAYS = 2
|
||||
class Config:
|
||||
root: str
|
||||
max_file_size: typing.Optional[int]
|
||||
one_file_system: bool
|
||||
exclude: list[bytes]
|
||||
force_include: list[bytes]
|
||||
notify_email: typing.Optional[str]
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PatternRule:
|
||||
re_inc: list[re.Pattern]
|
||||
re_exc: list[re.Pattern]
|
||||
def __init__(self, configfile: str):
|
||||
# Read config
|
||||
with open(configfile, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
self.root = config['root'].encode()
|
||||
self.one_file_system = config.get('one-file-system', False)
|
||||
|
||||
def match(self, path: str) -> Tuple[bool, bool]:
|
||||
if "big" in path:
|
||||
print(self, file=sys.stderr)
|
||||
if 'max-file-size' in config:
|
||||
self.max_file_size = humanfriendly.parse_size(
|
||||
config['max-file-size'])
|
||||
else:
|
||||
self.max_file_size = None
|
||||
|
||||
for inc in self.re_inc:
|
||||
if inc.match(path):
|
||||
break
|
||||
else:
|
||||
return
|
||||
utf = config.get('exclude', '').encode()
|
||||
self.exclude = list(filter(len, utf.split(b'\n')))
|
||||
|
||||
for exc in self.re_exc:
|
||||
if exc.match(path):
|
||||
return False
|
||||
return True
|
||||
utf = config.get('force-include', '').encode()
|
||||
self.force_include = list(filter(len, utf.split(b'\n')))
|
||||
|
||||
class Lister:
|
||||
def __init__(self, one_file_system: bool, max_size: bool):
|
||||
self.one_file_system = one_file_system
|
||||
self.max_size = max_size
|
||||
if max_size is None:
|
||||
max_size = float('inf')
|
||||
self.stdout = os.fdopen(sys.stdout.fileno(), "wb", closefd=False)
|
||||
self.notify_email = config.get('notify-email', None)
|
||||
|
||||
# Remember files we've skipped because they were too big, so that
|
||||
# we can warn again at the end.
|
||||
self.skipped_size: set[bytes] = set()
|
||||
# Compile patterns
|
||||
flags = (wcmatch.glob.GLOBSTAR |
|
||||
wcmatch.glob.DOTGLOB |
|
||||
wcmatch.glob.NODOTDIR |
|
||||
wcmatch.glob.EXTGLOB |
|
||||
wcmatch.glob.BRACE)
|
||||
|
||||
# Remember errors
|
||||
self.skipped_error: set[bytes] = set()
|
||||
# Path matches if it matches at least one regex in "a" and no
|
||||
# regex in "b"
|
||||
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
|
||||
self.exclude_re = ([ re.compile(x) for x in a ],
|
||||
[ re.compile(x) for x in b ])
|
||||
|
||||
def __del__(self):
|
||||
self.stdout.close()
|
||||
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
|
||||
self.force_include_re = ([ re.compile(x) for x in a ],
|
||||
[ re.compile(x) for x in b ])
|
||||
|
||||
def match_compiled(self, re: tuple[list[typing.Pattern],
|
||||
list[typing.Pattern]],
|
||||
path: bytes):
|
||||
# Path matches if it matches at least one regex in
|
||||
# re[0] and no regex in re[1]
|
||||
for a in re[0]:
|
||||
if a.match(path):
|
||||
for b in re[1]:
|
||||
if b.match(path):
|
||||
return False
|
||||
return True
|
||||
return False
|
||||
|
||||
def __str__(self):
|
||||
d = { 'root': self.root }
|
||||
if self.max_file_size:
|
||||
d['max-file-size'] = self.max_file_size
|
||||
if self.exclude:
|
||||
utf = b'\n'.join(self.exclude)
|
||||
d['exclude'] = utf.decode(errors='backslashreplace')
|
||||
if self.force_include:
|
||||
utf = b'\n'.join(self.force_include)
|
||||
d['force-include'] = utf.decode(errors='backslashreplace')
|
||||
if self.notify_email:
|
||||
d['notify-email'] = self.notify_email
|
||||
return yaml.dump(d, default_flow_style=False)
|
||||
|
||||
class Backup:
|
||||
def __init__(self, config: Config, dry_run: bool, out: typing.BinaryIO):
|
||||
self.config = config
|
||||
self.outfile = out
|
||||
self.dry_run = dry_run
|
||||
|
||||
# All logged messages, with severity
|
||||
self.logs: list[tuple[str, str]] = []
|
||||
|
||||
def out(self, path: bytes):
|
||||
# Use '\0\n' as a separator, so that we can both separate it
|
||||
# cleanly in Borg, and also view it on stdout.
|
||||
self.stdout.write(path + b'\0\n')
|
||||
self.outfile.write(path + (b'\n' if self.dry_run else b'\0'))
|
||||
|
||||
def log(self, letter: str, msg: str):
|
||||
colors = { 'E': 31, 'W': 33, 'I': 36 };
|
||||
|
@ -65,93 +109,78 @@ class Lister:
|
|||
else:
|
||||
c = 0
|
||||
sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n")
|
||||
self.logs.append((letter, msg))
|
||||
|
||||
def scan(self, path: bytes,
|
||||
parent_st: os.stat_result=None,
|
||||
rules: list[PatternRule]=[]):
|
||||
def run(self):
|
||||
self.scan(self.config.root)
|
||||
|
||||
def scan(self, path: bytes, parent_st: os.stat_result=None):
|
||||
"""If the given path should be backed up, print it. If it's
|
||||
a directory and its contents should be included, recurse."""
|
||||
a directory and its contents should be included, recurse."""
|
||||
|
||||
# Copy the path in string form, for logging and pathspec
|
||||
# parsing. Otherwise, we use bytes directly.
|
||||
# Copy the path in string form, for logging. Otherwise, we use
|
||||
# bytes directly.
|
||||
pathstr = path.decode(errors='backslashreplace')
|
||||
|
||||
try:
|
||||
# See if we match any rules
|
||||
for r in rules:
|
||||
if r.match(pathstr):
|
||||
self.log('I', f"ignore {pathstr}")
|
||||
return
|
||||
# See if this path should be excluded or force-included
|
||||
|
||||
# Stat the path
|
||||
st = os.lstat(path)
|
||||
is_dir = stat.S_ISDIR(st.st_mode)
|
||||
# Only stat the file when we need it
|
||||
cached_st = None
|
||||
def st():
|
||||
nonlocal cached_st
|
||||
if not cached_st:
|
||||
cached_st = os.lstat(path)
|
||||
return cached_st
|
||||
|
||||
if is_dir:
|
||||
# Skip if it crosses a mount point
|
||||
if self.one_file_system:
|
||||
if parent_st is not None and st.st_dev != parent_st.st_dev:
|
||||
self.log('I', f"skipping {pathstr}: "
|
||||
"on different filesystem")
|
||||
return
|
||||
# See if there's a reason to exclude it
|
||||
exclude_reason = None
|
||||
|
||||
# Add contents of any .nobackup file to our
|
||||
# parser rules
|
||||
child_rules = rules
|
||||
if self.config.match_compiled(self.config.exclude_re, path):
|
||||
# Config file says to exclude
|
||||
exclude_reason = ('I', f"skipping, excluded by config file")
|
||||
|
||||
try:
|
||||
def prepend_base(regex):
|
||||
if regex[0] != '^':
|
||||
raise Exception(f'bad regex: {regex}')
|
||||
return '^' + os.path.join(pathstr, '') + regex[1:]
|
||||
with open(os.path.join(path, b".nobackup")) as f:
|
||||
rule = PatternRule([], [])
|
||||
for line in f:
|
||||
if line[0] == '#':
|
||||
continue
|
||||
(inc, exc) = wcmatch.glob.translate(
|
||||
[ line.rstrip('\r\n') ],
|
||||
flags=(wcmatch.glob.NEGATE |
|
||||
wcmatch.glob.GLOBSTAR |
|
||||
wcmatch.glob.DOTGLOB |
|
||||
wcmatch.glob.EXTGLOB |
|
||||
wcmatch.glob.BRACE))
|
||||
for x in inc:
|
||||
rule.re_inc.append(re.compile(prepend_base(x)))
|
||||
for x in exc:
|
||||
rule.re_exc.append(re.compile(prepend_base(x)))
|
||||
child_rules.append(rule)
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
elif (stat.S_ISDIR(st().st_mode)
|
||||
and self.config.one_file_system
|
||||
and parent_st is not None
|
||||
and st().st_dev != parent_st.st_dev):
|
||||
# Crosses a mount point
|
||||
exclude_reason = ('I', "skipping, on different filesystem")
|
||||
|
||||
# Recurse and process each entry
|
||||
elif (stat.S_ISREG(st().st_mode)
|
||||
and self.config.max_file_size
|
||||
and st().st_size > self.config.max_file_size):
|
||||
# Too big
|
||||
def format_size(n):
|
||||
return humanfriendly.format_size(
|
||||
n, keep_width=True, binary=True)
|
||||
a = format_size(st().st_size)
|
||||
b = format_size(self.config.max_file_size)
|
||||
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
|
||||
|
||||
# If we have a reason to exclude it, stop now unless it's
|
||||
# force-included
|
||||
if (exclude_reason
|
||||
and not self.config.match_compiled(
|
||||
self.config.force_include_re, path)):
|
||||
|
||||
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}")
|
||||
return
|
||||
|
||||
# Print name of this path
|
||||
self.out(path)
|
||||
|
||||
# If it's a directory, recurse
|
||||
if stat.S_ISDIR(st().st_mode):
|
||||
with os.scandir(path) as it:
|
||||
for entry in it:
|
||||
self.scan(entry.path, st, child_rules)
|
||||
|
||||
else:
|
||||
# For regular files, ensure they're not too big
|
||||
if stat.S_ISREG(st.st_mode) and st.st_size > self.max_size:
|
||||
def format_size(n):
|
||||
return humanfriendly.format_size(
|
||||
n, keep_width=True, binary=True)
|
||||
a = format_size(st.st_size)
|
||||
b = format_size(self.max_size)
|
||||
self.log('W', f"skipping {pathstr}: "
|
||||
+ f"file size {a} exceeds limit {b}")
|
||||
self.skipped_size.add(path)
|
||||
return
|
||||
|
||||
# Every other filename gets printed; devices, symlinks, etc
|
||||
# will get handled by Borg
|
||||
self.out(path)
|
||||
self.scan(path=entry.path, parent_st=st())
|
||||
|
||||
except PermissionError as e:
|
||||
self.log('E', f"can't read {pathstr}")
|
||||
self.skipped_error.add(path)
|
||||
return
|
||||
|
||||
def main(argv):
|
||||
def main(argv: list[str]):
|
||||
import argparse
|
||||
|
||||
def humansize(string):
|
||||
|
@ -159,21 +188,19 @@ def main(argv):
|
|||
|
||||
parser = argparse.ArgumentParser(
|
||||
prog=argv[0],
|
||||
description="Build up a directory and file list for backups")
|
||||
description="Back up the local system using borg",
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
|
||||
|
||||
parser.add_argument('-s', '--max-size', type=humansize,
|
||||
help="Ignore files bigger than this, by default")
|
||||
parser.add_argument('-x', '--one-file-system', action='store_true',
|
||||
help="Don't cross mount points when recursing")
|
||||
parser.add_argument('dirs', metavar='DIR', nargs='+',
|
||||
help="Root directories to scan recursively")
|
||||
default_config = str(pathlib.Path(__file__).parent / "config.yaml")
|
||||
parser.add_argument('-c', '--config',
|
||||
help="Config file", default=default_config)
|
||||
parser.add_argument('-n', '--dry-run', action="store_true",
|
||||
help="Just print filenames, don't run borg")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
lister = Lister(one_file_system=args.one_file_system,
|
||||
max_size=args.max_size)
|
||||
for p in args.dirs:
|
||||
lister.scan(os.fsencode(p))
|
||||
config = Config(args.config)
|
||||
backup = Backup(config, args.dry_run, sys.stdout.buffer)
|
||||
backup.run()
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
|
23
config.yaml
Normal file
23
config.yaml
Normal file
|
@ -0,0 +1,23 @@
|
|||
root: "/tmp"
|
||||
one-file-system: true
|
||||
|
||||
# Files larger than this are excluded. If a large file isn't
|
||||
# explicitly mentioned in "excludes" below, it also generates a
|
||||
# warning.
|
||||
max-file-size: 500MiB
|
||||
|
||||
# Files/dirs to exclude from backup.
|
||||
# Paths should be absolute, or start with **/
|
||||
exclude: |
|
||||
**/Steam/steamapps
|
||||
**/Steam/ubuntu*
|
||||
/tmp/bigfile
|
||||
/tmp/out.ps
|
||||
|
||||
# Files that are always included, even if they would have been
|
||||
# excluded due to file size or the "exclude" list.
|
||||
# Paths should be absolute, or start with **/
|
||||
force-include: |
|
||||
|
||||
# Email address for notification at end of backup
|
||||
notify-email: jim@jim.sh
|
|
@ -192,6 +192,7 @@ EOF
|
|||
run_ssh_command "if cmp -s $backup $keys; then rm $backup ; fi"
|
||||
run_ssh_command "cat >> .ssh/authorized_keys" <<EOF
|
||||
command="$cmd --append-only",restrict $(cat "$SSH/id_ecdsa_appendonly.pub")
|
||||
command="borg/notify.sh",restrict $(cat "$SSH/id_ecdsa_appendonly.pub")
|
||||
command="$cmd",restrict $(cat "$SSH/id_ecdsa.pub")
|
||||
EOF
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user