From 883f984aef7fd4c6513a72f36a7f8eeeacb634b3 Mon Sep 17 00:00:00 2001 From: Jim Paris Date: Fri, 8 Oct 2021 16:08:03 -0400 Subject: [PATCH] Restructure things; we will clone this repo directly on each client --- .gitignore | 3 +- Makefile | 35 ++++++------ Pipfile | 13 +++++ Pipfile.lock | 80 +++++++++++++++++++++++++++ README.md | 44 +++++++++------ borg-setup.sh => initial-setup.sh | 0 lister.py | 89 ++++++++++++++++++++++--------- requirements.txt | 3 -- 8 files changed, 209 insertions(+), 58 deletions(-) create mode 100644 Pipfile create mode 100644 Pipfile.lock rename borg-setup.sh => initial-setup.sh (100%) mode change 100755 => 100644 delete mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore index 5ceb386..2927db0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -venv +README.html + diff --git a/Makefile b/Makefile index a39fa4e..214b55b 100644 --- a/Makefile +++ b/Makefile @@ -1,25 +1,30 @@ .PHONY: all -all: test-list +all: + @echo + @echo "For initial setup, run" + @echo " sudo ./initial-setup.sh" + @echo + @echo "Or run borg commands with e.g.:" + @echo " ./borg.sh info" + @echo " ./borg.sh list" + @echo -venv: requirements.txt - python3 -m venv venv - venv/bin/pip3 install -r requirements.txt +.PHONY: ctrl +ctrl: test -.PHONY: test-list -test-list: venv +.PHONY: test-lister +test-lister: .venv venv/bin/mypy lister.py - venv/bin/python lister.py --max-size 1GiB --one-file-system /tmp >/dev/null + venv/bin/python lister.py --max-size 1GiB --one-file-system /tmp | grep -a 'bigf' .PHONY: check check: - shellcheck -f gcc borg-setup.sh + shellcheck -f gcc initial-setup.sh .PHONY: test -test: +test: check rm -rf /tmp/test-borg - BORG_DIR=/tmp/test-borg ./borg-setup.sh - ls -al /tmp/test-borg - -.PHONY: deploy -deploy: - scp borg-setup.sh psy:/www/psy + mkdir /tmp/test-borg + : "normally this would be a git clone, but we want the working tree..." + git ls-files -z | tar --null -T - -cf - | tar -C /tmp/test-borg -xvf - + /tmp/test-borg/initial-setup.sh diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..cf118cf --- /dev/null +++ b/Pipfile @@ -0,0 +1,13 @@ +[[source]] +url = "https://pypi.python.org/simple" +verify_ssl = true +name = "pypi" + +[packages] +humanfriendly = "*" +mypy = "*" + +[dev-packages] + +[requires] +python_version = "3.9" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..b5b2a44 --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,80 @@ +{ + "_meta": { + "hash": { + "sha256": "775048a9d9eea3ab29a1e53636271f45f9fe40ec250225818155d3eced6034e7" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.9" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.python.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "humanfriendly": { + "hashes": [ + "sha256:1697e1a8a8f550fd43c2865cd84542fc175a61dcb779b6fee18cf6b6ccba1477", + "sha256:6b0b831ce8f15f7300721aa49829fc4e83921a9a301cc7f606be6686a2288ddc" + ], + "index": "pypi", + "version": "==10.0" + }, + "mypy": { + "hashes": [ + "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9", + "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a", + "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9", + "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e", + "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2", + "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212", + "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b", + "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885", + "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150", + "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703", + "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072", + "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457", + "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e", + "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0", + "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb", + "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97", + "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8", + "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811", + "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6", + "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de", + "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504", + "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921", + "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d" + ], + "index": "pypi", + "version": "==0.910" + }, + "mypy-extensions": { + "hashes": [ + "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d", + "sha256:2d82818f5bb3e369420cb3c4060a7970edba416647068eb4c5343488a6c604a8" + ], + "version": "==0.4.3" + }, + "toml": { + "hashes": [ + "sha256:806143ae5bfb6a3c6e736a764057db0e6a0e05e338b5630894a5f779cabb4f9b", + "sha256:b3bda1d108d5dd99f4a20d24d9c348e91c4db7ab1b749200bded2f839ccbe68f" + ], + "version": "==0.10.2" + }, + "typing-extensions": { + "hashes": [ + "sha256:49f75d16ff11f1cd258e1b988ccff82a3ca5570217d7ad8c5f48205dd99a677e", + "sha256:d8226d10bc02a29bcc81df19a26e56a9647f8b0a6d4a83924139f4a8b01f17b7", + "sha256:f1d25edafde516b146ecd0613dabcc61409817af4766fbbcfb8d1ad4ec441a34" + ], + "version": "==3.10.0.2" + } + }, + "develop": {} +} diff --git a/README.md b/README.md index 39d142f..22c0ea9 100644 --- a/README.md +++ b/README.md @@ -1,28 +1,42 @@ # Design -- On bucket, we have a separate user account "jim-backups". Password - for this account is in bitwarden. +- On server, we have a separate user account "jim-backups". Password + for this account is in bitwarden in the "Backups" folder, under `ssh + backup.jim.sh`. -- Repository keys are repokeys, with passphrases saved on clients - and in bitwarden. +- Repository keys are repokeys, which get stored on the server, inside + the repo. Passphrases are stored: + - on clients (in `/opt/borg/passphrase`, for making backups) + - in bitwarden (under `borg `, user `repo key`) -- Each client has two SSH keys: one for append-only operation (no - pass) and one for read-write (password in bitwarden) +- Each client has two SSH keys for connecting to the server: + - `/opt/borg/ssh/id_ecdsa_appendonly` + - configured on server for append-only operation + - used for making backups + - no password + - `/opt/borg/ssh/id_ecdsa` + - configured on server for read-write operation + - used for manual recovery, management, pruning + - password in bitwarden (under `borg [hostname]`, user `read-write ssh key`) -- Pruning requires the password and is a manual operation (run `sudo - /opt/borg/prune.sh`) +- Pruning requires the password and is a manual operation, and should only + be run when the client has not been compromised. -- Systemd timers start daily backups + sudo /opt/borg/prune.sh -# Setup +- Systemd timers start daily backups: - python3 -m venv venv - venv/bin/pip3 install -r requirements.txt - venv/bin/python3 lister.py + /etc/systemd/system/borg-backup.service -> /opt/borg/borg-backup.service + /etc/systemd/system/borg-backup.timer -> /opt/borg/borg-backup.timer + +- Backup script `/opt/borg/backup.py` uses configuration in + `/opt/borg/backup.yaml` to generate our own list of files, excluding + anything that's too large by default. This requires borg 1.2.0b1 + or newer, which is why the setup scripts download a specific version. # Usage Run on client: - wget https://psy.jim.sh/borg-setup.sh - sudo ./borg-setup.sh + sudo git clone https://git.jim.sh/jim/borg-setup.git /opt/borg + sudo /opt/borg/initial-setup.sh diff --git a/borg-setup.sh b/initial-setup.sh old mode 100755 new mode 100644 similarity index 100% rename from borg-setup.sh rename to initial-setup.sh diff --git a/lister.py b/lister.py index 1310ae8..f0c9af5 100755 --- a/lister.py +++ b/lister.py @@ -5,10 +5,37 @@ import sys import stat from typing import Optional import humanfriendly # type: ignore -import igittigitt +import wcmatch.glob # type: ignore +import re +import dataclasses +import enum + +class MatchResult(Enum): + INCLUDE_IF_SIZE_OK = 0 + INCLUDE_ALWAYS = 1 + EXCLUDE_ALWAYS = 2 + +@dataclasses.dataclass +class PatternRule: + re_inc: list[re.Pattern] + re_exc: list[re.Pattern] + + def match(self, path: str) -> (bool, bool): + if "big" in path: + print(self, file=sys.stderr) + + for inc in self.re_inc: + if inc.match(path): + break + else: + return -class Lister: + for exc in self.re_exc: + if exc.match(path): + return False + return True +class Lister: def __init__(self, one_file_system: bool, max_size: bool): self.one_file_system = one_file_system self.max_size = max_size @@ -23,9 +50,6 @@ class Lister: # Remember errors self.skipped_error: set[bytes] = set() - # Parse gitignore-style rules to exclude files from backup - self.parser = igittigitt.IgnoreParser() - def __del__(self): self.stdout.close() @@ -42,30 +66,27 @@ class Lister: c = 0 sys.stderr.write(f"\033[1;{c}m{letter}:\033[22m {msg}\033[0m\n") - def path_string(self, path: bytes) -> str: - return path.decode(errors='backslashreplace') - - def scan(self, path: bytes, parent_st: os.stat_result=None): + def scan(self, path: bytes, + parent_st: os.stat_result=None, + rules: list[PatternRule]=[]): """If the given path should be backed up, print it. If it's a directory and its contents should be included, recurse.""" - # Need the path in string form, for igittigitt parser - pathstr = self.path_string(path) + # Copy the path in string form, for logging and pathspec + # parsing. Otherwise, we use bytes directly. + pathstr = path.decode(errors='backslashreplace') try: + # See if we match any rules + for r in rules: + if r.match(pathstr): + self.log('I', f"ignore {pathstr}") + return + # Stat the path st = os.lstat(path) - is_dir = stat.S_ISDIR(st.st_mode) - match = self.parser._match_rules(pathstr, not is_dir) - if match: - match = self.parser._match_negation_rules(pathstr) - - if match: - self.log('I', f"ignored {pathstr}") - return - if is_dir: # Skip if it crosses a mount point if self.one_file_system: @@ -76,17 +97,37 @@ class Lister: # Add contents of any .nobackup file to our # parser rules + child_rules = rules + try: + def prepend_base(regex): + if regex[0] != '^': + raise Exception(f'bad regex: {regex}') + return '^' + os.path.join(pathstr, '') + regex[1:] with open(os.path.join(path, b".nobackup")) as f: + rule = PatternRule([], []) for line in f: - self.parser.add_rule(line, base_path=pathstr) + if line[0] == '#': + continue + (inc, exc) = wcmatch.glob.translate( + [ line.rstrip('\r\n') ], + flags=(wcmatch.glob.NEGATE | + wcmatch.glob.GLOBSTAR | + wcmatch.glob.DOTGLOB | + wcmatch.glob.EXTGLOB | + wcmatch.glob.BRACE)) + for x in inc: + rule.re_inc.append(re.compile(prepend_base(x))) + for x in exc: + rule.re_exc.append(re.compile(prepend_base(x))) + child_rules.append(rule) except FileNotFoundError: pass # Recurse and process each entry with os.scandir(path) as it: for entry in it: - self.scan(entry.path, st) + self.scan(entry.path, st, child_rules) else: # For regular files, ensure they're not too big @@ -96,7 +137,7 @@ class Lister: n, keep_width=True, binary=True) a = format_size(st.st_size) b = format_size(self.max_size) - self.log('W', f"skipping {self.path_string(path)}: " + self.log('W', f"skipping {pathstr}: " + f"file size {a} exceeds limit {b}") self.skipped_size.add(path) return @@ -106,7 +147,7 @@ class Lister: self.out(path) except PermissionError as e: - self.log('E', f"can't read {self.path_string(path)}") + self.log('E', f"can't read {pathstr}") self.skipped_error.add(path) return diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 45ec677..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -humanfriendly>=9.2 -igittigitt>=2.0.4 -mypy