From ac12b42cadaadd52db014f3f93a8ac1904067b8d Mon Sep 17 00:00:00 2001 From: Jim Paris Date: Mon, 18 Oct 2021 16:25:23 -0400 Subject: [PATCH] backup: rename force-include to unexclude Force-include is a misnomer because it won't include files that weren't considered at all (like files in an excluded subdir). Instead, call it "unexclude" to make it slightly clearer that this will just override the exclusions. --- backup.py | 81 +++++++++++++++++++++++++---------------------------- config.yaml | 2 +- 2 files changed, 39 insertions(+), 44 deletions(-) diff --git a/backup.py b/backup.py index 6570cce..d4aa9d3 100755 --- a/backup.py +++ b/backup.py @@ -29,16 +29,45 @@ def pstr(path: bytes) -> str: def format_size(n: int) -> str: return humanfriendly.format_size(n, keep_width=True, binary=True) +# Type corresponding to patterns that are generated by +# wcmatch.translate: two lists of compiled REs (a,b). A path matches +# if it matches at least one regex in "a" and none in "b". +MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]] + class Config: roots: typing.List[bytes] max_file_size: typing.Optional[int] one_file_system: bool exclude_caches: bool - exclude: typing.List[bytes] - force_include: typing.List[bytes] + exclude: MatchPatterns + unexclude: MatchPatterns notify_email: typing.Optional[str] def __init__(self, configfile: str): + + # Helper to process lists of patterns into regexes + def process_match_list(config_name): + raw = config.get(config_name, '').encode().split(b'\n') + pats = [] + # Prepend '**/' to any relative patterns + for x in raw: + if not len(x): + continue + if x.startswith(b'/'): + pats.append(x) + else: + pats.append(b'**/' + x) + + # Compile patterns. + (a, b) = wcmatch.glob.translate( + pats, flags=(wcmatch.glob.GLOBSTAR | + wcmatch.glob.DOTGLOB | + wcmatch.glob.NODOTDIR | + wcmatch.glob.EXTGLOB | + wcmatch.glob.BRACE)) + return ([ re.compile(x) for x in a ], + [ re.compile(x) for x in b ]) + # Read config with open(configfile, 'r') as f: config = yaml.safe_load(f) @@ -59,50 +88,17 @@ class Config: self.roots.append(x) self.roots.sort(key=len) - def process_match_list(config_name): - raw = config.get(config_name, '').encode().split(b'\n') - pats = [] - # Prepend '**/' to any relative patterns - for x in raw: - if not len(x): - continue - if x.startswith(b'/'): - pats.append(x) - else: - pats.append(b'**/' + x) - return pats - self.exclude = process_match_list('exclude') - self.force_include = process_match_list('force-include') + self.unexclude = process_match_list('unexclude') self.notify_email = config.get('notify-email', None) - # Compile patterns - flags = (wcmatch.glob.GLOBSTAR | - wcmatch.glob.DOTGLOB | - wcmatch.glob.NODOTDIR | - wcmatch.glob.EXTGLOB | - wcmatch.glob.BRACE) - - # Path matches if it matches at least one regex in "a" and no - # regex in "b" - (a, b) = wcmatch.glob.translate(self.exclude, flags=flags) - self.exclude_re = ([ re.compile(x) for x in a ], - [ re.compile(x) for x in b ]) - - (a, b) = wcmatch.glob.translate(self.force_include, flags=flags) - self.force_include_re = ([ re.compile(x) for x in a ], - [ re.compile(x) for x in b ]) - - def match_re(self, - re: typing.Tuple[typing.List[typing.Pattern], - typing.List[typing.Pattern]], - path: bytes): + def match_re(self, r: MatchPatterns, path: bytes): # Path matches if it matches at least one regex in - # re[0] and no regex in re[1]. - for a in re[0]: + # r[0] and no regex in r[1]. + for a in r[0]: if a.match(path): - for b in re[1]: + for b in r[1]: if b.match(path): return False return True @@ -166,7 +162,7 @@ class Backup: # See if there's a reason to exclude it exclude_reason = None - if self.config.match_re(self.config.exclude_re, decorated_path): + if self.config.match_re(self.config.exclude, decorated_path): # Config file says to exclude exclude_reason = ('I', f"skipping, excluded by config file") @@ -187,8 +183,7 @@ class Backup: # If we have a reason to exclude it, stop now unless it's # force-included - force = self.config.match_re(self.config.force_include_re, - decorated_path) + force = self.config.match_re(self.config.unexclude, decorated_path) if exclude_reason and not force: self.log(exclude_reason[0], f"{exclude_reason[1]}: {pstr(path)}") diff --git a/config.yaml b/config.yaml index 7639197..20c9f39 100644 --- a/config.yaml +++ b/config.yaml @@ -30,7 +30,7 @@ exclude: | # Files that are always included, even if they would have been # excluded due to file size or the "exclude" list. # Matching rules are the same as above. -force-include: | +unexclude: | .git/objects/pack/*.pack # Email address for notification at end of backup