Compare commits
2 Commits
97b9060344
...
4a30b82e39
Author | SHA1 | Date | |
---|---|---|---|
4a30b82e39 | |||
ac12b42cad |
116
backup.py
116
backup.py
|
@ -29,38 +29,25 @@ def pstr(path: bytes) -> str:
|
||||||
def format_size(n: int) -> str:
|
def format_size(n: int) -> str:
|
||||||
return humanfriendly.format_size(n, keep_width=True, binary=True)
|
return humanfriendly.format_size(n, keep_width=True, binary=True)
|
||||||
|
|
||||||
|
# Type corresponding to patterns that are generated by
|
||||||
|
# wcmatch.translate: two lists of compiled REs (a,b). A path matches
|
||||||
|
# if it matches at least one regex in "a" and none in "b".
|
||||||
|
MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]
|
||||||
|
|
||||||
class Config:
|
class Config:
|
||||||
roots: typing.List[bytes]
|
roots: typing.List[bytes]
|
||||||
max_file_size: typing.Optional[int]
|
|
||||||
one_file_system: bool
|
one_file_system: bool
|
||||||
exclude_caches: bool
|
exclude_caches: bool
|
||||||
exclude: typing.List[bytes]
|
exclude: MatchPatterns
|
||||||
force_include: typing.List[bytes]
|
unexclude: MatchPatterns
|
||||||
|
max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
|
||||||
notify_email: typing.Optional[str]
|
notify_email: typing.Optional[str]
|
||||||
|
|
||||||
def __init__(self, configfile: str):
|
def __init__(self, configfile: str):
|
||||||
# Read config
|
|
||||||
with open(configfile, 'r') as f:
|
|
||||||
config = yaml.safe_load(f)
|
|
||||||
self.one_file_system = config.get('one-file-system', False)
|
|
||||||
self.exclude_caches = config.get('exclude-caches', False)
|
|
||||||
|
|
||||||
if 'max-file-size' in config:
|
# Helper to process lists of patterns into regexes
|
||||||
self.max_file_size = humanfriendly.parse_size(
|
def process_match_list(config_entry):
|
||||||
config['max-file-size'])
|
raw = config_entry.encode().split(b'\n')
|
||||||
else:
|
|
||||||
self.max_file_size = None
|
|
||||||
|
|
||||||
raw = config.get('roots', '').encode().split(b'\n')
|
|
||||||
self.roots = []
|
|
||||||
for x in raw:
|
|
||||||
if not len(x):
|
|
||||||
continue
|
|
||||||
self.roots.append(x)
|
|
||||||
self.roots.sort(key=len)
|
|
||||||
|
|
||||||
def process_match_list(config_name):
|
|
||||||
raw = config.get(config_name, '').encode().split(b'\n')
|
|
||||||
pats = []
|
pats = []
|
||||||
# Prepend '**/' to any relative patterns
|
# Prepend '**/' to any relative patterns
|
||||||
for x in raw:
|
for x in raw:
|
||||||
|
@ -70,39 +57,49 @@ class Config:
|
||||||
pats.append(x)
|
pats.append(x)
|
||||||
else:
|
else:
|
||||||
pats.append(b'**/' + x)
|
pats.append(b'**/' + x)
|
||||||
return pats
|
|
||||||
|
|
||||||
self.exclude = process_match_list('exclude')
|
# Compile patterns.
|
||||||
self.force_include = process_match_list('force-include')
|
(a, b) = wcmatch.glob.translate(
|
||||||
|
pats, flags=(wcmatch.glob.GLOBSTAR |
|
||||||
self.notify_email = config.get('notify-email', None)
|
|
||||||
|
|
||||||
# Compile patterns
|
|
||||||
flags = (wcmatch.glob.GLOBSTAR |
|
|
||||||
wcmatch.glob.DOTGLOB |
|
wcmatch.glob.DOTGLOB |
|
||||||
wcmatch.glob.NODOTDIR |
|
wcmatch.glob.NODOTDIR |
|
||||||
wcmatch.glob.EXTGLOB |
|
wcmatch.glob.EXTGLOB |
|
||||||
wcmatch.glob.BRACE)
|
wcmatch.glob.BRACE))
|
||||||
|
return ([ re.compile(x) for x in a ],
|
||||||
# Path matches if it matches at least one regex in "a" and no
|
|
||||||
# regex in "b"
|
|
||||||
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
|
|
||||||
self.exclude_re = ([ re.compile(x) for x in a ],
|
|
||||||
[ re.compile(x) for x in b ])
|
[ re.compile(x) for x in b ])
|
||||||
|
|
||||||
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
|
# Read config
|
||||||
self.force_include_re = ([ re.compile(x) for x in a ],
|
with open(configfile, 'r') as f:
|
||||||
[ re.compile(x) for x in b ])
|
config = yaml.safe_load(f)
|
||||||
|
self.one_file_system = config.get('one-file-system', False)
|
||||||
|
self.exclude_caches = config.get('exclude-caches', False)
|
||||||
|
|
||||||
def match_re(self,
|
raw = config.get('roots', '').encode().split(b'\n')
|
||||||
re: typing.Tuple[typing.List[typing.Pattern],
|
self.roots = []
|
||||||
typing.List[typing.Pattern]],
|
for x in raw:
|
||||||
path: bytes):
|
if not len(x):
|
||||||
|
continue
|
||||||
|
self.roots.append(x)
|
||||||
|
self.roots.sort(key=len)
|
||||||
|
|
||||||
|
self.exclude = process_match_list(config.get('exclude', ''))
|
||||||
|
self.unexclude = process_match_list(config.get('unexclude', ''))
|
||||||
|
|
||||||
|
self.max_size_rules = []
|
||||||
|
rules = { humanfriendly.parse_size(k): v
|
||||||
|
for k, v in config.get('max-size-rules', {}).items() }
|
||||||
|
for size in reversed(sorted(rules)):
|
||||||
|
self.max_size_rules.append(
|
||||||
|
(size, process_match_list(rules[size])))
|
||||||
|
|
||||||
|
self.notify_email = config.get('notify-email', None)
|
||||||
|
|
||||||
|
def match_re(self, r: MatchPatterns, path: bytes):
|
||||||
# Path matches if it matches at least one regex in
|
# Path matches if it matches at least one regex in
|
||||||
# re[0] and no regex in re[1].
|
# r[0] and no regex in r[1].
|
||||||
for a in re[0]:
|
for a in r[0]:
|
||||||
if a.match(path):
|
if a.match(path):
|
||||||
for b in re[1]:
|
for b in r[1]:
|
||||||
if b.match(path):
|
if b.match(path):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
@ -166,7 +163,7 @@ class Backup:
|
||||||
# See if there's a reason to exclude it
|
# See if there's a reason to exclude it
|
||||||
exclude_reason = None
|
exclude_reason = None
|
||||||
|
|
||||||
if self.config.match_re(self.config.exclude_re, decorated_path):
|
if self.config.match_re(self.config.exclude, decorated_path):
|
||||||
# Config file says to exclude
|
# Config file says to exclude
|
||||||
exclude_reason = ('I', f"skipping, excluded by config file")
|
exclude_reason = ('I', f"skipping, excluded by config file")
|
||||||
|
|
||||||
|
@ -178,17 +175,24 @@ class Backup:
|
||||||
exclude_reason = ('I', "skipping, on different filesystem")
|
exclude_reason = ('I', "skipping, on different filesystem")
|
||||||
|
|
||||||
elif (is_reg
|
elif (is_reg
|
||||||
and self.config.max_file_size
|
and len(self.config.max_size_rules)
|
||||||
and size > self.config.max_file_size):
|
and size > self.config.max_size_rules[-1][0]):
|
||||||
# Too big
|
# Check file sizes against our list.
|
||||||
|
# Only need to check if the size is bigger than the smallest
|
||||||
|
# entry on the list; then, we need to check it against all rules
|
||||||
|
# to see which one applies.
|
||||||
|
for (max_size, patterns) in self.config.max_size_rules:
|
||||||
|
if self.config.match_re(patterns, decorated_path):
|
||||||
|
if size > max_size:
|
||||||
a = format_size(size)
|
a = format_size(size)
|
||||||
b = format_size(self.config.max_file_size)
|
b = format_size(max_size)
|
||||||
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
|
exclude_reason = (
|
||||||
|
'W', f"file size {a} exceeds limit {b}")
|
||||||
|
break
|
||||||
|
|
||||||
# If we have a reason to exclude it, stop now unless it's
|
# If we have a reason to exclude it, stop now unless it's
|
||||||
# force-included
|
# force-included
|
||||||
force = self.config.match_re(self.config.force_include_re,
|
force = self.config.match_re(self.config.unexclude, decorated_path)
|
||||||
decorated_path)
|
|
||||||
if exclude_reason and not force:
|
if exclude_reason and not force:
|
||||||
self.log(exclude_reason[0],
|
self.log(exclude_reason[0],
|
||||||
f"{exclude_reason[1]}: {pstr(path)}")
|
f"{exclude_reason[1]}: {pstr(path)}")
|
||||||
|
|
21
config.yaml
21
config.yaml
|
@ -10,12 +10,6 @@ roots: |
|
||||||
one-file-system: true
|
one-file-system: true
|
||||||
exclude-caches: true
|
exclude-caches: true
|
||||||
|
|
||||||
# Files larger than this are excluded. If a large file isn't
|
|
||||||
# explicitly mentioned in "excludes" below, it also generates a
|
|
||||||
# warning. Note that this counts used blocks, so files with large
|
|
||||||
# holes will still be considered small (since they'll compress easily)
|
|
||||||
max-file-size: 500MiB
|
|
||||||
|
|
||||||
# Files/dirs to exclude from backup.
|
# Files/dirs to exclude from backup.
|
||||||
# Relative paths are treated as if starting with **/
|
# Relative paths are treated as if starting with **/
|
||||||
# Paths ending in / will only match directories.
|
# Paths ending in / will only match directories.
|
||||||
|
@ -27,10 +21,21 @@ exclude: |
|
||||||
Steam/ubuntu*/
|
Steam/ubuntu*/
|
||||||
.cache/
|
.cache/
|
||||||
|
|
||||||
|
# Rules to exclude files based on file size.
|
||||||
|
# This is a dict of sizes, each with a list of rules.
|
||||||
|
# For a given path, the largest size with a matching rule applies.
|
||||||
|
# Matching follows the same behavior as the "exclude" list.
|
||||||
|
# Size is calculated as used blocks (think "du", not "du --apparent-size").
|
||||||
|
max-size-rules:
|
||||||
|
500 MiB: |
|
||||||
|
*
|
||||||
|
# 1.0 GiB: |
|
||||||
|
# *.mp4
|
||||||
|
|
||||||
# Files that are always included, even if they would have been
|
# Files that are always included, even if they would have been
|
||||||
# excluded due to file size or the "exclude" list.
|
# excluded due to file size or the "exclude" list.
|
||||||
# Matching rules are the same as above.
|
# Matching follows the same behavior as the "exclude" list.
|
||||||
force-include: |
|
unexclude: |
|
||||||
.git/objects/pack/*.pack
|
.git/objects/pack/*.pack
|
||||||
|
|
||||||
# Email address for notification at end of backup
|
# Email address for notification at end of backup
|
||||||
|
|
Loading…
Reference in New Issue
Block a user