Browse Source

backup: replace simple max size with rule-based system

Now individual files or patterns can have their own maximum sizes.
master
Jim Paris 1 year ago
parent
commit
4a30b82e39
2 changed files with 38 additions and 24 deletions
  1. +26
    -17
      backup.py
  2. +12
    -7
      config.yaml

+ 26
- 17
backup.py View File

@@ -36,18 +36,18 @@ MatchPatterns = typing.Tuple[typing.List[re.Pattern], typing.List[re.Pattern]]

class Config:
roots: typing.List[bytes]
max_file_size: typing.Optional[int]
one_file_system: bool
exclude_caches: bool
exclude: MatchPatterns
unexclude: MatchPatterns
max_size_rules: typing.List[typing.Tuple[int, MatchPatterns]]
notify_email: typing.Optional[str]

def __init__(self, configfile: str):

# Helper to process lists of patterns into regexes
def process_match_list(config_name):
raw = config.get(config_name, '').encode().split(b'\n')
def process_match_list(config_entry):
raw = config_entry.encode().split(b'\n')
pats = []
# Prepend '**/' to any relative patterns
for x in raw:
@@ -74,12 +74,6 @@ class Config:
self.one_file_system = config.get('one-file-system', False)
self.exclude_caches = config.get('exclude-caches', False)

if 'max-file-size' in config:
self.max_file_size = humanfriendly.parse_size(
config['max-file-size'])
else:
self.max_file_size = None

raw = config.get('roots', '').encode().split(b'\n')
self.roots = []
for x in raw:
@@ -88,8 +82,15 @@ class Config:
self.roots.append(x)
self.roots.sort(key=len)

self.exclude = process_match_list('exclude')
self.unexclude = process_match_list('unexclude')
self.exclude = process_match_list(config.get('exclude', ''))
self.unexclude = process_match_list(config.get('unexclude', ''))

self.max_size_rules = []
rules = { humanfriendly.parse_size(k): v
for k, v in config.get('max-size-rules', {}).items() }
for size in reversed(sorted(rules)):
self.max_size_rules.append(
(size, process_match_list(rules[size])))

self.notify_email = config.get('notify-email', None)

@@ -174,12 +175,20 @@ class Backup:
exclude_reason = ('I', "skipping, on different filesystem")

elif (is_reg
and self.config.max_file_size
and size > self.config.max_file_size):
# Too big
a = format_size(size)
b = format_size(self.config.max_file_size)
exclude_reason = ('W', f"file size {a} exceeds limit {b}")
and len(self.config.max_size_rules)
and size > self.config.max_size_rules[-1][0]):
# Check file sizes against our list.
# Only need to check if the size is bigger than the smallest
# entry on the list; then, we need to check it against all rules
# to see which one applies.
for (max_size, patterns) in self.config.max_size_rules:
if self.config.match_re(patterns, decorated_path):
if size > max_size:
a = format_size(size)
b = format_size(max_size)
exclude_reason = (
'W', f"file size {a} exceeds limit {b}")
break

# If we have a reason to exclude it, stop now unless it's
# force-included


+ 12
- 7
config.yaml View File

@@ -10,12 +10,6 @@ roots: |
one-file-system: true
exclude-caches: true

# Files larger than this are excluded. If a large file isn't
# explicitly mentioned in "excludes" below, it also generates a
# warning. Note that this counts used blocks, so files with large
# holes will still be considered small (since they'll compress easily)
max-file-size: 500MiB

# Files/dirs to exclude from backup.
# Relative paths are treated as if starting with **/
# Paths ending in / will only match directories.
@@ -27,9 +21,20 @@ exclude: |
Steam/ubuntu*/
.cache/

# Rules to exclude files based on file size.
# This is a dict of sizes, each with a list of rules.
# For a given path, the largest size with a matching rule applies.
# Matching follows the same behavior as the "exclude" list.
# Size is calculated as used blocks (think "du", not "du --apparent-size").
max-size-rules:
500 MiB: |
*
# 1.0 GiB: |
# *.mp4

# Files that are always included, even if they would have been
# excluded due to file size or the "exclude" list.
# Matching rules are the same as above.
# Matching follows the same behavior as the "exclude" list.
unexclude: |
.git/objects/pack/*.pack



Loading…
Cancel
Save