Compare commits
2 Commits
97b9060344
...
4a30b82e39
Author | SHA1 | Date | |
---|---|---|---|
4a30b82e39 | |||
ac12b42cad |
124
backup.py
124
backup.py
|
@ -29,38 +29,25 @@ def pstr(path: bytes) -> str:
|
|||
def format_size(n: int) -> str:
|
||||
return humanfriendly.format_size(n, keep_width=True, binary=True)
|
||||
|
||||
# Type corresponding to patterns that are generated by
|
||||
# wcmatch.translate: two lists of compiled REs (a,b). A path matches
|
||||
# if it matches at least one regex in "a" and none in "b".
|
||||
MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]
|
||||
|
||||
class Config:
|
||||
roots: typing.List[bytes]
|
||||
max_file_size: typing.Optional[int]
|
||||
one_file_system: bool
|
||||
exclude_caches: bool
|
||||
exclude: typing.List[bytes]
|
||||
force_include: typing.List[bytes]
|
||||
exclude: MatchPatterns
|
||||
unexclude: MatchPatterns
|
||||
max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
|
||||
notify_email: typing.Optional[str]
|
||||
|
||||
def __init__(self, configfile: str):
|
||||
# Read config
|
||||
with open(configfile, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
self.one_file_system = config.get('one-file-system', False)
|
||||
self.exclude_caches = config.get('exclude-caches', False)
|
||||
|
||||
if 'max-file-size' in config:
|
||||
self.max_file_size = humanfriendly.parse_size(
|
||||
config['max-file-size'])
|
||||
else:
|
||||
self.max_file_size = None
|
||||
|
||||
raw = config.get('roots', '').encode().split(b'\n')
|
||||
self.roots = []
|
||||
for x in raw:
|
||||
if not len(x):
|
||||
continue
|
||||
self.roots.append(x)
|
||||
self.roots.sort(key=len)
|
||||
|
||||
def process_match_list(config_name):
|
||||
raw = config.get(config_name, '').encode().split(b'\n')
|
||||
# Helper to process lists of patterns into regexes
|
||||
def process_match_list(config_entry):
|
||||
raw = config_entry.encode().split(b'\n')
|
||||
pats = []
|
||||
# Prepend '**/' to any relative patterns
|
||||
for x in raw:
|
||||
|
@ -70,39 +57,49 @@ class Config:
|
|||
pats.append(x)
|
||||
else:
|
||||
pats.append(b'**/' + x)
|
||||
return pats
|
||||
|
||||
self.exclude = process_match_list('exclude')
|
||||
self.force_include = process_match_list('force-include')
|
||||
# Compile patterns.
|
||||
(a, b) = wcmatch.glob.translate(
|
||||
pats, flags=(wcmatch.glob.GLOBSTAR |
|
||||
wcmatch.glob.DOTGLOB |
|
||||
wcmatch.glob.NODOTDIR |
|
||||
wcmatch.glob.EXTGLOB |
|
||||
wcmatch.glob.BRACE))
|
||||
return ([ re.compile(x) for x in a ],
|
||||
[ re.compile(x) for x in b ])
|
||||
|
||||
# Read config
|
||||
with open(configfile, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
self.one_file_system = config.get('one-file-system', False)
|
||||
self.exclude_caches = config.get('exclude-caches', False)
|
||||
|
||||
raw = config.get('roots', '').encode().split(b'\n')
|
||||
self.roots = []
|
||||
for x in raw:
|
||||
if not len(x):
|
||||
continue
|
||||
self.roots.append(x)
|
||||
self.roots.sort(key=len)
|
||||
|
||||
self.exclude = process_match_list(config.get('exclude', ''))
|
||||
self.unexclude = process_match_list(config.get('unexclude', ''))
|
||||
|
||||
self.max_size_rules = []
|
||||
rules = { humanfriendly.parse_size(k): v
|
||||
for k, v in config.get('max-size-rules', {}).items() }
|
||||
for size in reversed(sorted(rules)):
|
||||
self.max_size_rules.append(
|
||||
(size, process_match_list(rules[size])))
|
||||
|
||||
self.notify_email = config.get('notify-email', None)
|
||||
|
||||
# Compile patterns
|
||||
flags = (wcmatch.glob.GLOBSTAR |
|
||||
wcmatch.glob.DOTGLOB |
|
||||
wcmatch.glob.NODOTDIR |
|
||||
wcmatch.glob.EXTGLOB |
|
||||
wcmatch.glob.BRACE)
|
||||
|
||||
# Path matches if it matches at least one regex in "a" and no
|
||||
# regex in "b"
|
||||
(a, b) = wcmatch.glob.translate(self.exclude, flags=flags)
|
||||
self.exclude_re = ([ re.compile(x) for x in a ],
|
||||
[ re.compile(x) for x in b ])
|
||||
|
||||
(a, b) = wcmatch.glob.translate(self.force_include, flags=flags)
|
||||
self.force_include_re = ([ re.compile(x) for x in a ],
|
||||
[ re.compile(x) for x in b ])
|
||||
|
||||
def match_re(self,
|
||||
re: typing.Tuple[typing.List[typing.Pattern],
|
||||
typing.List[typing.Pattern]],
|
||||
path: bytes):
|
||||
def match_re(self, r: MatchPatterns, path: bytes):
|
||||
# Path matches if it matches at least one regex in
|
||||
# re[0] and no regex in re[1].
|
||||
for a in re[0]:
|
||||
# r[0] and no regex in r[1].
|
||||
for a in r[0]:
|
||||
if a.match(path):
|
||||
for b in re[1]:
|
||||
for b in r[1]:
|
||||
if b.match(path):
|
||||
return False
|
||||
return True
|
||||
|
@ -166,7 +163,7 @@ class Backup:
|
|||
# See if there's a reason to exclude it
|
||||
exclude_reason = None
|
||||
|
||||
if self.config.match_re(self.config.exclude_re, decorated_path):
|
||||
if self.config.match_re(self.config.exclude, decorated_path):
|
||||
# Config file says to exclude
|
||||
exclude_reason = ('I', f"skipping, excluded by config file")
|
||||
|
||||
|
@ -178,17 +175,24 @@ class Backup:
|
|||
exclude_reason = ('I', "skipping, on different filesystem")
|
||||
|
||||
elif (is_reg
|
||||
and self.config.max_file_size
|
||||
and size > self.config.max_file_size):
|
||||
# Too big
|
||||
a = format_size(size)
|
||||
b = format_size(self.config.max_file_size)
|
||||
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
|
||||
and len(self.config.max_size_rules)
|
||||
and size > self.config.max_size_rules[-1][0]):
|
||||
# Check file sizes against our list.
|
||||
# Only need to check if the size is bigger than the smallest
|
||||
# entry on the list; then, we need to check it against all rules
|
||||
# to see which one applies.
|
||||
for (max_size, patterns) in self.config.max_size_rules:
|
||||
if self.config.match_re(patterns, decorated_path):
|
||||
if size > max_size:
|
||||
a = format_size(size)
|
||||
b = format_size(max_size)
|
||||
exclude_reason = (
|
||||
'W', f"file size {a} exceeds limit {b}")
|
||||
break
|
||||
|
||||
# If we have a reason to exclude it, stop now unless it's
|
||||
# force-included
|
||||
force = self.config.match_re(self.config.force_include_re,
|
||||
decorated_path)
|
||||
force = self.config.match_re(self.config.unexclude, decorated_path)
|
||||
if exclude_reason and not force:
|
||||
self.log(exclude_reason[0],
|
||||
f"{exclude_reason[1]}: {pstr(path)}")
|
||||
|
|
21
config.yaml
21
config.yaml
|
@ -10,12 +10,6 @@ roots: |
|
|||
one-file-system: true
|
||||
exclude-caches: true
|
||||
|
||||
# Files larger than this are excluded. If a large file isn't
|
||||
# explicitly mentioned in "excludes" below, it also generates a
|
||||
# warning. Note that this counts used blocks, so files with large
|
||||
# holes will still be considered small (since they'll compress easily)
|
||||
max-file-size: 500MiB
|
||||
|
||||
# Files/dirs to exclude from backup.
|
||||
# Relative paths are treated as if starting with **/
|
||||
# Paths ending in / will only match directories.
|
||||
|
@ -27,10 +21,21 @@ exclude: |
|
|||
Steam/ubuntu*/
|
||||
.cache/
|
||||
|
||||
# Rules to exclude files based on file size.
|
||||
# This is a dict of sizes, each with a list of rules.
|
||||
# For a given path, the largest size with a matching rule applies.
|
||||
# Matching follows the same behavior as the "exclude" list.
|
||||
# Size is calculated as used blocks (think "du", not "du --apparent-size").
|
||||
max-size-rules:
|
||||
500 MiB: |
|
||||
*
|
||||
# 1.0 GiB: |
|
||||
# *.mp4
|
||||
|
||||
# Files that are always included, even if they would have been
|
||||
# excluded due to file size or the "exclude" list.
|
||||
# Matching rules are the same as above.
|
||||
force-include: |
|
||||
# Matching follows the same behavior as the "exclude" list.
|
||||
unexclude: |
|
||||
.git/objects/pack/*.pack
|
||||
|
||||
# Email address for notification at end of backup
|
||||
|
|
Loading…
Reference in New Issue
Block a user