Compare commits
No commits in common. "4a30b82e39b7bf8e2422931bc8d63924dfe53dd0" and "97b90603446d2fdf755b0a0e93006393a924117b" have entirely different histories.
4a30b82e39
...
97b9060344
116
backup.py
116
backup.py
|
@ -29,51 +29,28 @@ def pstr(path: bytes) -> str:
|
||||||
def format_size(n: int) -> str:
|
def format_size(n: int) -> str:
|
||||||
return humanfriendly.format_size(n, keep_width=True, binary=True)
|
return humanfriendly.format_size(n, keep_width=True, binary=True)
|
||||||
|
|
||||||
# Type corresponding to patterns that are generated by
|
|
||||||
# wcmatch.translate: two lists of compiled REs (a,b). A path matches
|
|
||||||
# if it matches at least one regex in "a" and none in "b".
|
|
||||||
MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]
|
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
roots: typing.List[bytes]
|
roots: typing.List[bytes]
|
||||||
|
max_file_size: typing.Optional[int]
|
||||||
one_file_system: bool
|
one_file_system: bool
|
||||||
exclude_caches: bool
|
exclude_caches: bool
|
||||||
exclude: MatchPatterns
|
exclude: typing.List[bytes]
|
||||||
unexclude: MatchPatterns
|
force_include: typing.List[bytes]
|
||||||
max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
|
|
||||||
notify_email: typing.Optional[str]
|
notify_email: typing.Optional[str]
|
||||||
|
|
||||||
def __init__(self, configfile: str):
|
def __init__(self, configfile: str):
|
||||||
|
|
||||||
# Helper to process lists of patterns into regexes
|
|
||||||
def process_match_list(config_entry):
|
|
||||||
raw = config_entry.encode().split(b'\n')
|
|
||||||
pats = []
|
|
||||||
# Prepend '**/' to any relative patterns
|
|
||||||
for x in raw:
|
|
||||||
if not len(x):
|
|
||||||
continue
|
|
||||||
if x.startswith(b'/'):
|
|
||||||
pats.append(x)
|
|
||||||
else:
|
|
||||||
pats.append(b'**/' + x)
|
|
||||||
|
|
||||||
# Compile patterns.
|
|
||||||
(a, b) = wcmatch.glob.translate(
|
|
||||||
pats, flags=(wcmatch.glob.GLOBSTAR |
|
|
||||||
wcmatch.glob.DOTGLOB |
|
|
||||||
wcmatch.glob.NODOTDIR |
|
|
||||||
wcmatch.glob.EXTGLOB |
|
|
||||||
wcmatch.glob.BRACE))
|
|
||||||
return ([ re.compile(x) for x in a ],
|
|
||||||
[ re.compile(x) for x in b ])
|
|
||||||
|
|
||||||
# Read config
|
# Read config
|
||||||
with open(configfile, 'r') as f:
|
with open(configfile, 'r') as f:
|
||||||
config = yaml.safe_load(f)
|
config = yaml.safe_load(f)
|
||||||
self.one_file_system = config.get('one-file-system', False)
|
self.one_file_system = config.get('one-file-system', False)
|
||||||
self.exclude_caches = config.get('exclude-caches', False)
|
self.exclude_caches = config.get('exclude-caches', False)
|
||||||
|
|
||||||
|
if 'max-file-size' in config:
|
||||||
|
self.max_file_size = humanfriendly.parse_size(
|
||||||
|
config['max-file-size'])
|
||||||
|
else:
|
||||||
|
self.max_file_size = None
|
||||||
|
|
||||||
raw = config.get('roots', '').encode().split(b'\n')
|
raw = config.get('roots', '').encode().split(b'\n')
|
||||||
self.roots = []
|
self.roots = []
|
||||||
for x in raw:
|
for x in raw:
|
||||||
|
@ -82,24 +59,50 @@ class Config:
|
||||||
self.roots.append(x)
|
self.roots.append(x)
|
||||||
self.roots.sort(key=len)
|
self.roots.sort(key=len)
|
||||||
|
|
||||||
self.exclude = process_match_list(config.get('exclude', ''))
|
def process_match_list(config_name):
|
||||||
self.unexclude = process_match_list(config.get('unexclude', ''))
|
raw = config.get(config_name, '').encode().split(b'\n')
|
||||||
|
pats = []
|
||||||
|
# Prepend '**/' to any relative patterns
|
||||||
|
for x in raw:
|
||||||
|
if not len(x):
|
||||||
|
continue
|
||||||
|
if x.startswith(b'/'):
|
||||||
|
pats.append(x)
|
||||||
|
else:
|
||||||
|
pats.append(b'**/' + x)
|
||||||
|
return pats
|
||||||
|
|
||||||
self.max_size_rules = []
|
self.exclude = process_match_list('exclude')
|
||||||
rules = { humanfriendly.parse_size(k): v
|
self.force_include = process_match_list('force-include')
|
||||||
for k, v in config.get('max-size-rules', {}).items() }
|
|
||||||
for size in reversed(sorted(rules)):
|
|
||||||
self.max_size_rules.append(
|
|
||||||
(size, process_match_list(rules[size])))
|
|
||||||
|
|
||||||
self.notify_email = config.get('notify-email', None)
|
self.notify_email = config.get('notify-email', None)
|
||||||
|
|
||||||
def match_re(self, r: MatchPatterns, path: bytes):
|
# Compile patterns
|
||||||
|
flags = (wcmatch.glob.GLOBSTAR |
|
||||||
|
wcmatch.glob.DOTGLOB |
|
||||||
|
wcmatch.glob.NODOTDIR |
|
||||||
|
wcmatch.glob.EXTGLOB |
|
||||||
|
wcmatch.glob.BRACE)
|
||||||
|
|
||||||
|
# Path matches if it matches at least one regex in "a" and no
|
||||||
|
# regex in "b"
|
||||||
|
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
|
||||||
|
self.exclude_re = ([ re.compile(x) for x in a ],
|
||||||
|
[ re.compile(x) for x in b ])
|
||||||
|
|
||||||
|
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
|
||||||
|
self.force_include_re = ([ re.compile(x) for x in a ],
|
||||||
|
[ re.compile(x) for x in b ])
|
||||||
|
|
||||||
|
def match_re(self,
|
||||||
|
re: typing.Tuple[typing.List[typing.Pattern],
|
||||||
|
typing.List[typing.Pattern]],
|
||||||
|
path: bytes):
|
||||||
# Path matches if it matches at least one regex in
|
# Path matches if it matches at least one regex in
|
||||||
# r[0] and no regex in r[1].
|
# re[0] and no regex in re[1].
|
||||||
for a in r[0]:
|
for a in re[0]:
|
||||||
if a.match(path):
|
if a.match(path):
|
||||||
for b in r[1]:
|
for b in re[1]:
|
||||||
if b.match(path):
|
if b.match(path):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
@ -163,7 +166,7 @@ class Backup:
|
||||||
# See if there's a reason to exclude it
|
# See if there's a reason to exclude it
|
||||||
exclude_reason = None
|
exclude_reason = None
|
||||||
|
|
||||||
if self.config.match_re(self.config.exclude, decorated_path):
|
if self.config.match_re(self.config.exclude_re, decorated_path):
|
||||||
# Config file says to exclude
|
# Config file says to exclude
|
||||||
exclude_reason = ('I', f"skipping, excluded by config file")
|
exclude_reason = ('I', f"skipping, excluded by config file")
|
||||||
|
|
||||||
|
@ -175,24 +178,17 @@ class Backup:
|
||||||
exclude_reason = ('I', "skipping, on different filesystem")
|
exclude_reason = ('I', "skipping, on different filesystem")
|
||||||
|
|
||||||
elif (is_reg
|
elif (is_reg
|
||||||
and len(self.config.max_size_rules)
|
and self.config.max_file_size
|
||||||
and size > self.config.max_size_rules[-1][0]):
|
and size > self.config.max_file_size):
|
||||||
# Check file sizes against our list.
|
# Too big
|
||||||
# Only need to check if the size is bigger than the smallest
|
a = format_size(size)
|
||||||
# entry on the list; then, we need to check it against all rules
|
b = format_size(self.config.max_file_size)
|
||||||
# to see which one applies.
|
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
|
||||||
for (max_size, patterns) in self.config.max_size_rules:
|
|
||||||
if self.config.match_re(patterns, decorated_path):
|
|
||||||
if size > max_size:
|
|
||||||
a = format_size(size)
|
|
||||||
b = format_size(max_size)
|
|
||||||
exclude_reason = (
|
|
||||||
'W', f"file size {a} exceeds limit {b}")
|
|
||||||
break
|
|
||||||
|
|
||||||
# If we have a reason to exclude it, stop now unless it's
|
# If we have a reason to exclude it, stop now unless it's
|
||||||
# force-included
|
# force-included
|
||||||
force = self.config.match_re(self.config.unexclude, decorated_path)
|
force = self.config.match_re(self.config.force_include_re,
|
||||||
|
decorated_path)
|
||||||
if exclude_reason and not force:
|
if exclude_reason and not force:
|
||||||
self.log(exclude_reason[0],
|
self.log(exclude_reason[0],
|
||||||
f"{exclude_reason[1]}: {pstr(path)}")
|
f"{exclude_reason[1]}: {pstr(path)}")
|
||||||
|
|
21
config.yaml
21
config.yaml
|
@ -10,6 +10,12 @@ roots: |
|
||||||
one-file-system: true
|
one-file-system: true
|
||||||
exclude-caches: true
|
exclude-caches: true
|
||||||
|
|
||||||
|
# Files larger than this are excluded. If a large file isn't
|
||||||
|
# explicitly mentioned in "excludes" below, it also generates a
|
||||||
|
# warning. Note that this counts used blocks, so files with large
|
||||||
|
# holes will still be considered small (since they'll compress easily)
|
||||||
|
max-file-size: 500MiB
|
||||||
|
|
||||||
# Files/dirs to exclude from backup.
|
# Files/dirs to exclude from backup.
|
||||||
# Relative paths are treated as if starting with **/
|
# Relative paths are treated as if starting with **/
|
||||||
# Paths ending in / will only match directories.
|
# Paths ending in / will only match directories.
|
||||||
|
@ -21,21 +27,10 @@ exclude: |
|
||||||
Steam/ubuntu*/
|
Steam/ubuntu*/
|
||||||
.cache/
|
.cache/
|
||||||
|
|
||||||
# Rules to exclude files based on file size.
|
|
||||||
# This is a dict of sizes, each with a list of rules.
|
|
||||||
# For a given path, the largest size with a matching rule applies.
|
|
||||||
# Matching follows the same behavior as the "exclude" list.
|
|
||||||
# Size is calculated as used blocks (think "du", not "du --apparent-size").
|
|
||||||
max-size-rules:
|
|
||||||
500 MiB: |
|
|
||||||
*
|
|
||||||
# 1.0 GiB: |
|
|
||||||
# *.mp4
|
|
||||||
|
|
||||||
# Files that are always included, even if they would have been
|
# Files that are always included, even if they would have been
|
||||||
# excluded due to file size or the "exclude" list.
|
# excluded due to file size or the "exclude" list.
|
||||||
# Matching follows the same behavior as the "exclude" list.
|
# Matching rules are the same as above.
|
||||||
unexclude: |
|
force-include: |
|
||||||
.git/objects/pack/*.pack
|
.git/objects/pack/*.pack
|
||||||
|
|
||||||
# Email address for notification at end of backup
|
# Email address for notification at end of backup
|
||||||
|
|
Loading…
Reference in New Issue
Block a user