Browse Source

When removing data from files, try to punch a hole.

Requires fallocate(2) support with FALLOC_FL_PUNCH_HOLE, as
well as a filesystem that supports it (in Linux 3.7,
tmpfs, btrfs, xfs, or ext4)
tags/nilmdb-1.2.4
Jim Paris 11 years ago
parent
commit
c5f079f61f
3 changed files with 68 additions and 2 deletions
  1. +18
    -2
      nilmdb/server/bulkdata.py
  2. +1
    -0
      nilmdb/utils/__init__.py
  3. +49
    -0
      nilmdb/utils/fallocate.py

+ 18
- 2
nilmdb/server/bulkdata.py View File

@@ -410,8 +410,16 @@ class Table(object):

def _remove_rows(self, subdir, filename, start, stop):
"""Helper to mark specific rows as being removed from a
file, and potentially removing or truncating the file itself."""
# Import an existing list of deleted rows for this file
file, and potentially remove or truncate the file itself."""
# Close potentially open file in file_open LRU cache
self.file_open.cache_remove(self, subdir, filename)

# We keep a file like 0000.removed that contains a list of
# which rows have been "removed". Note that we never have to
# remove entries from this list, because we never decrease
# self.nrows, and so we will never overwrite those locations in the
# file. Only when the list covers the entire extent of the
# file will that file be removed.
datafile = os.path.join(self.root, subdir, filename)
cachefile = datafile + ".removed"
try:
@@ -465,6 +473,14 @@ class Table(object):
except:
pass
else:
# File needs to stick around. This means we can get
# degenerate cases where we have large files containing as
# little as one row. Try to punch a hole in the file,
# so that this region doesn't take up filesystem space.
offset = start * self.packer.size
count = (stop - start) * self.packer.size
nilmdb.utils.fallocate.punch_hole(datafile, offset, count)

# Update cache. Try to do it atomically.
nilmdb.utils.atomic.replace_file(cachefile,
pickle.dumps(merged, 2))


+ 1
- 0
nilmdb/utils/__init__.py View File

@@ -8,3 +8,4 @@ from nilmdb.utils.diskusage import du, human_size
from nilmdb.utils.mustclose import must_close
from nilmdb.utils import atomic
import nilmdb.utils.threadsafety
import nilmdb.utils.fallocate

+ 49
- 0
nilmdb/utils/fallocate.py View File

@@ -0,0 +1,49 @@
# Implementation of hole punching via fallocate, if the OS
# and filesystem support it.

try:
import os
import ctypes
import ctypes.util

def make_fallocate():
libc_name = ctypes.util.find_library('c')
libc = ctypes.CDLL(libc_name, use_errno=True)

_fallocate = libc.fallocate
_fallocate.restype = ctypes.c_int
_fallocate.argtypes = [ ctypes.c_int, ctypes.c_int,
ctypes.c_int64, ctypes.c_int64 ]

del libc
del libc_name

def fallocate(fd, mode, offset, len_):
res = _fallocate(fd, mode, offset, len_)
if res != 0: # pragma: no cover
errno = ctypes.get_errno()
raise IOError(errno, os.strerror(errno))
return fallocate

fallocate = make_fallocate()
del make_fallocate
except Exception: # pragma: no cover
fallocate = None

FALLOC_FL_KEEP_SIZE = 0x01
FALLOC_FL_PUNCH_HOLE = 0x02

def punch_hole(filename, offset, length, ignore_errors = True):
"""Punch a hole in the file. This isn't well supported, so errors
are ignored by default."""
try:
if fallocate is None: # pragma: no cover
raise IOError("fallocate not available")
with open(filename, "r+") as f:
fallocate(f.fileno(),
FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
offset, length)
except IOError: # pragma: no cover
if ignore_errors:
return
raise

Loading…
Cancel
Save