|
|
@@ -21,6 +21,7 @@ class Config: |
|
|
|
root: bytes |
|
|
|
max_file_size: typing.Optional[int] |
|
|
|
one_file_system: bool |
|
|
|
exclude_caches: bool |
|
|
|
exclude: list[bytes] |
|
|
|
force_include: list[bytes] |
|
|
|
notify_email: typing.Optional[str] |
|
|
@@ -31,6 +32,7 @@ class Config: |
|
|
|
config = yaml.safe_load(f) |
|
|
|
self.root = config['root'].encode() |
|
|
|
self.one_file_system = config.get('one-file-system', False) |
|
|
|
self.exclude_caches = config.get('exclude-caches', False) |
|
|
|
|
|
|
|
if 'max-file-size' in config: |
|
|
|
self.max_file_size = humanfriendly.parse_size( |
|
|
@@ -38,11 +40,21 @@ class Config: |
|
|
|
else: |
|
|
|
self.max_file_size = None |
|
|
|
|
|
|
|
utf = config.get('exclude', '').encode() |
|
|
|
self.exclude = list(filter(len, utf.split(b'\n'))) |
|
|
|
|
|
|
|
utf = config.get('force-include', '').encode() |
|
|
|
self.force_include = list(filter(len, utf.split(b'\n'))) |
|
|
|
def process_match_list(config_name): |
|
|
|
raw = config.get(config_name, '').encode().split(b'\n') |
|
|
|
pats = [] |
|
|
|
# Prepend '**/' to any relative patterns |
|
|
|
for x in raw: |
|
|
|
if not len(x): |
|
|
|
continue |
|
|
|
if x[0] == b'/': |
|
|
|
pats.append(x) |
|
|
|
else: |
|
|
|
pats.append(b'**/' + x) |
|
|
|
return pats |
|
|
|
|
|
|
|
self.exclude = process_match_list('exclude') |
|
|
|
self.force_include = process_match_list('force_include') |
|
|
|
|
|
|
|
self.notify_email = config.get('notify-email', None) |
|
|
|
|
|
|
@@ -63,11 +75,15 @@ class Config: |
|
|
|
self.force_include_re = ([ re.compile(x) for x in a ], |
|
|
|
[ re.compile(x) for x in b ]) |
|
|
|
|
|
|
|
def match_compiled(self, re: tuple[list[typing.Pattern], |
|
|
|
list[typing.Pattern]], |
|
|
|
path: bytes): |
|
|
|
def match_re(self, re: tuple[list[typing.Pattern], |
|
|
|
list[typing.Pattern]], |
|
|
|
path: bytes, is_dir: bool): |
|
|
|
# If it's a directory, try matching against a trailing slash |
|
|
|
# first. |
|
|
|
if is_dir and self.match_re(re, path + b'/', False): |
|
|
|
return True |
|
|
|
# Path matches if it matches at least one regex in |
|
|
|
# re[0] and no regex in re[1] |
|
|
|
# re[0] and no regex in re[1]. |
|
|
|
for a in re[0]: |
|
|
|
if a.match(path): |
|
|
|
for b in re[1]: |
|
|
@@ -76,20 +92,6 @@ class Config: |
|
|
|
return True |
|
|
|
return False |
|
|
|
|
|
|
|
def __str__(self): |
|
|
|
d = { 'root': self.root } |
|
|
|
if self.max_file_size: |
|
|
|
d['max-file-size'] = self.max_file_size |
|
|
|
if self.exclude: |
|
|
|
utf = b'\n'.join(self.exclude) |
|
|
|
d['exclude'] = utf.decode(errors='backslashreplace') |
|
|
|
if self.force_include: |
|
|
|
utf = b'\n'.join(self.force_include) |
|
|
|
d['force-include'] = utf.decode(errors='backslashreplace') |
|
|
|
if self.notify_email: |
|
|
|
d['notify-email'] = self.notify_email |
|
|
|
return yaml.dump(d, default_flow_style=False) |
|
|
|
|
|
|
|
class Backup: |
|
|
|
def __init__(self, config: Config, dry_run: bool): |
|
|
|
self.config = config |
|
|
@@ -112,69 +114,94 @@ class Backup: |
|
|
|
|
|
|
|
def run(self, outfile: typing.BinaryIO): |
|
|
|
self.outfile = outfile |
|
|
|
self.scan(self.config.root) |
|
|
|
# Base should not end with a slash, but full path should |
|
|
|
if self.config.root.endswith(b'/'): |
|
|
|
base = self.config.root[:-1] |
|
|
|
path = self.config.root |
|
|
|
else: |
|
|
|
base = self.config.root |
|
|
|
path = self.config.root + b'/' |
|
|
|
self.scan(base, path) |
|
|
|
|
|
|
|
def scan(self, path: bytes, parent_st: os.stat_result=None): |
|
|
|
def scan(self, base: bytes, path: bytes, |
|
|
|
parent_st: os.stat_result=None): |
|
|
|
"""If the given path should be backed up, print it. If it's |
|
|
|
a directory and its contents should be included, recurse.""" |
|
|
|
a directory and its contents should be included, recurse. |
|
|
|
""" |
|
|
|
|
|
|
|
if base.endswith(b'/'): |
|
|
|
raise Exception("base must not end with /") |
|
|
|
relpath = path[len(base):] |
|
|
|
if not relpath.startswith(b'/'): |
|
|
|
raise Exception(f"relative path (from {repr(base)}, {repr(path)})" |
|
|
|
+ f" must start with /") |
|
|
|
|
|
|
|
# Copy the path in string form, for logging. Otherwise, we use |
|
|
|
# bytes directly. |
|
|
|
pathstr = path.decode(errors='backslashreplace') |
|
|
|
|
|
|
|
try: |
|
|
|
# See if this path should be excluded or force-included |
|
|
|
|
|
|
|
# Only stat the file when we need it |
|
|
|
cached_st = None |
|
|
|
def st(): |
|
|
|
nonlocal cached_st |
|
|
|
if not cached_st: |
|
|
|
cached_st = os.lstat(path) |
|
|
|
return cached_st |
|
|
|
st = os.lstat(path) |
|
|
|
is_dir = stat.S_ISDIR(st.st_mode) |
|
|
|
is_reg = stat.S_ISREG(st.st_mode) |
|
|
|
|
|
|
|
# See if there's a reason to exclude it |
|
|
|
exclude_reason = None |
|
|
|
|
|
|
|
if self.config.match_compiled(self.config.exclude_re, path): |
|
|
|
if self.config.match_re(self.config.exclude_re, relpath, is_dir): |
|
|
|
# Config file says to exclude |
|
|
|
exclude_reason = ('I', f"skipping, excluded by config file") |
|
|
|
|
|
|
|
elif (self.config.one_file_system |
|
|
|
and parent_st is not None |
|
|
|
and stat.S_ISDIR(st().st_mode) |
|
|
|
and st().st_dev != parent_st.st_dev): |
|
|
|
and is_dir |
|
|
|
and st.st_dev != parent_st.st_dev): |
|
|
|
# Crosses a mount point |
|
|
|
exclude_reason = ('I', "skipping, on different filesystem") |
|
|
|
|
|
|
|
elif (self.config.max_file_size |
|
|
|
and stat.S_ISREG(st().st_mode) |
|
|
|
and (st().st_blocks * 512) > self.config.max_file_size): |
|
|
|
and is_reg |
|
|
|
and (st.st_blocks * 512) > self.config.max_file_size): |
|
|
|
# Too big |
|
|
|
def format_size(n): |
|
|
|
return humanfriendly.format_size( |
|
|
|
n, keep_width=True, binary=True) |
|
|
|
a = format_size(st().st_size) |
|
|
|
a = format_size(st.st_size) |
|
|
|
b = format_size(self.config.max_file_size) |
|
|
|
exclude_reason = ('W', f"file size {a} exceeds limit {b}") |
|
|
|
|
|
|
|
# If we have a reason to exclude it, stop now unless it's |
|
|
|
# force-included |
|
|
|
if (exclude_reason |
|
|
|
and not self.config.match_compiled( |
|
|
|
self.config.force_include_re, path)): |
|
|
|
|
|
|
|
force = self.config.match_re( |
|
|
|
self.config.force_include_re, relpath, is_dir) |
|
|
|
if exclude_reason and not force: |
|
|
|
self.log(exclude_reason[0], f"{exclude_reason[1]}: {pathstr}") |
|
|
|
return |
|
|
|
|
|
|
|
# Print name of this path |
|
|
|
# Print path for Borg |
|
|
|
self.out(path) |
|
|
|
|
|
|
|
# If it's a directory, recurse |
|
|
|
if stat.S_ISDIR(st().st_mode): |
|
|
|
# Process directories |
|
|
|
if is_dir: |
|
|
|
|
|
|
|
# Skip if it contains CACHEDIR.TAG |
|
|
|
# (mirroring the --exclude-caches borg option) |
|
|
|
if self.config.exclude_caches: |
|
|
|
try: |
|
|
|
tag = b'Signature: 8a477f597d28d172789f06886806bc55' |
|
|
|
with open(path + b'/CACHEDIR.TAG', 'rb') as f: |
|
|
|
if f.read(len(tag)) == tag: |
|
|
|
self.log( |
|
|
|
'I', f"skipping cache dir: {pathstr}") |
|
|
|
return |
|
|
|
except: |
|
|
|
pass |
|
|
|
|
|
|
|
# Recurse |
|
|
|
with os.scandir(path) as it: |
|
|
|
for entry in it: |
|
|
|
self.scan(path=entry.path, parent_st=st()) |
|
|
|
self.scan(base=base, path=entry.path, |
|
|
|
parent_st=st) |
|
|
|
|
|
|
|
except PermissionError as e: |
|
|
|
self.log('E', f"can't read {pathstr}") |
|
|
|