Compare commits

..

No commits in common. "4a30b82e39b7bf8e2422931bc8d63924dfe53dd0" and "97b90603446d2fdf755b0a0e93006393a924117b" have entirely different histories.

2 changed files with 64 additions and 73 deletions

116
backup.py
View File

@ -29,51 +29,28 @@ def pstr(path: bytes) -> str:
def format_size(n: int) -> str:
return humanfriendly.format_size(n, keep_width=True, binary=True)
# Type corresponding to patterns that are generated by
# wcmatch.translate: two lists of compiled REs (a,b). A path matches
# if it matches at least one regex in "a" and none in "b".
MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]
class Config:
roots: typing.List[bytes]
max_file_size: typing.Optional[int]
one_file_system: bool
exclude_caches: bool
exclude: MatchPatterns
unexclude: MatchPatterns
max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
exclude: typing.List[bytes]
force_include: typing.List[bytes]
notify_email: typing.Optional[str]
def __init__(self, configfile: str):
# Helper to process lists of patterns into regexes
def process_match_list(config_entry):
raw = config_entry.encode().split(b'\n')
pats = []
# Prepend '**/' to any relative patterns
for x in raw:
if not len(x):
continue
if x.startswith(b'/'):
pats.append(x)
else:
pats.append(b'**/' + x)
# Compile patterns.
(a, b) = wcmatch.glob.translate(
pats, flags=(wcmatch.glob.GLOBSTAR |
wcmatch.glob.DOTGLOB |
wcmatch.glob.NODOTDIR |
wcmatch.glob.EXTGLOB |
wcmatch.glob.BRACE))
return ([ re.compile(x) for x in a ],
[ re.compile(x) for x in b ])
# Read config
with open(configfile, 'r') as f:
config = yaml.safe_load(f)
self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False)
if 'max-file-size' in config:
self.max_file_size = humanfriendly.parse_size(
config['max-file-size'])
else:
self.max_file_size = None
raw = config.get('roots', '').encode().split(b'\n')
self.roots = []
for x in raw:
@ -82,24 +59,50 @@ class Config:
self.roots.append(x)
self.roots.sort(key=len)
self.exclude = process_match_list(config.get('exclude', ''))
self.unexclude = process_match_list(config.get('unexclude', ''))
def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n')
pats = []
# Prepend '**/' to any relative patterns
for x in raw:
if not len(x):
continue
if x.startswith(b'/'):
pats.append(x)
else:
pats.append(b'**/' + x)
return pats
self.max_size_rules = []
rules = { humanfriendly.parse_size(k): v
for k, v in config.get('max-size-rules', {}).items() }
for size in reversed(sorted(rules)):
self.max_size_rules.append(
(size, process_match_list(rules[size])))
self.exclude = process_match_list('exclude')
self.force_include = process_match_list('force-include')
self.notify_email = config.get('notify-email', None)
def match_re(self, r: MatchPatterns, path: bytes):
# Compile patterns
flags = (wcmatch.glob.GLOBSTAR |
wcmatch.glob.DOTGLOB |
wcmatch.glob.NODOTDIR |
wcmatch.glob.EXTGLOB |
wcmatch.glob.BRACE)
# Path matches if it matches at least one regex in "a" and no
# regex in "b"
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
self.exclude_re = ([ re.compile(x) for x in a ],
[ re.compile(x) for x in b ])
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
self.force_include_re = ([ re.compile(x) for x in a ],
[ re.compile(x) for x in b ])
def match_re(self,
re: typing.Tuple[typing.List[typing.Pattern],
typing.List[typing.Pattern]],
path: bytes):
# Path matches if it matches at least one regex in
# r[0] and no regex in r[1].
for a in r[0]:
# re[0] and no regex in re[1].
for a in re[0]:
if a.match(path):
for b in r[1]:
for b in re[1]:
if b.match(path):
return False
return True
@ -163,7 +166,7 @@ class Backup:
# See if there's a reason to exclude it
exclude_reason = None
if self.config.match_re(self.config.exclude, decorated_path):
if self.config.match_re(self.config.exclude_re, decorated_path):
# Config file says to exclude
exclude_reason = ('I', f"skipping, excluded by config file")
@ -175,24 +178,17 @@ class Backup:
exclude_reason = ('I', "skipping, on different filesystem")
elif (is_reg
and len(self.config.max_size_rules)
and size > self.config.max_size_rules[-1][0]):
# Check file sizes against our list.
# Only need to check if the size is bigger than the smallest
# entry on the list; then, we need to check it against all rules
# to see which one applies.
for (max_size, patterns) in self.config.max_size_rules:
if self.config.match_re(patterns, decorated_path):
if size > max_size:
a = format_size(size)
b = format_size(max_size)
exclude_reason = (
'W', f"file size {a} exceeds limit {b}")
break
and self.config.max_file_size
and size > self.config.max_file_size):
# Too big
a = format_size(size)
b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
# If we have a reason to exclude it, stop now unless it's
# force-included
force = self.config.match_re(self.config.unexclude, decorated_path)
force = self.config.match_re(self.config.force_include_re,
decorated_path)
if exclude_reason and not force:
self.log(exclude_reason[0],
f"{exclude_reason[1]}: {pstr(path)}")

View File

@ -10,6 +10,12 @@ roots: |
one-file-system: true
exclude-caches: true
# Files larger than this are excluded. If a large file isn't
# explicitly mentioned in "excludes" below, it also generates a
# warning. Note that this counts used blocks, so files with large
# holes will still be considered small (since they'll compress easily)
max-file-size: 500MiB
# Files/dirs to exclude from backup.
# Relative paths are treated as if starting with **/
# Paths ending in / will only match directories.
@ -21,21 +27,10 @@ exclude: |
Steam/ubuntu*/
.cache/
# Rules to exclude files based on file size.
# This is a dict of sizes, each with a list of rules.
# For a given path, the largest size with a matching rule applies.
# Matching follows the same behavior as the "exclude" list.
# Size is calculated as used blocks (think "du", not "du --apparent-size").
max-size-rules:
500 MiB: |
*
# 1.0 GiB: |
# *.mp4
# Files that are always included, even if they would have been
# excluded due to file size or the "exclude" list.
# Matching follows the same behavior as the "exclude" list.
unexclude: |
# Matching rules are the same as above.
force-include: |
.git/objects/pack/*.pack
# Email address for notification at end of backup