When removing data from files, try to punch a hole.

Requires fallocate(2) support with FALLOC_FL_PUNCH_HOLE, as well as a filesystem that supports it (in Linux 3.7, tmpfs, btrfs, xfs, or ext4)
11 years ago · c5f079f61f
--- a/nilmdb/server/bulkdata.py
+++ b/nilmdb/server/bulkdata.py
@@ -410,8 +410,16 @@ class Table(object):

    def _remove_rows(self, subdir, filename, start, stop):
        """Helper to mark specific rows as being removed from a
        file, and potentially removing or truncating the file itself."""
        # Import an existing list of deleted rows for this file
        file, and potentially remove or truncate the file itself."""
        # Close potentially open file in file_open LRU cache
        self.file_open.cache_remove(self, subdir, filename)

        # We keep a file like 0000.removed that contains a list of
        # which rows have been "removed".  Note that we never have to
        # remove entries from this list, because we never decrease
        # self.nrows, and so we will never overwrite those locations in the
        # file.  Only when the list covers the entire extent of the
        # file will that file be removed.
        datafile = os.path.join(self.root, subdir, filename)
        cachefile = datafile + ".removed"
        try:
@@ -465,6 +473,14 @@ class Table(object):
            except:
                pass
        else:
            # File needs to stick around.  This means we can get
            # degenerate cases where we have large files containing as
            # little as one row.  Try to punch a hole in the file,
            # so that this region doesn't take up filesystem space.
            offset = start * self.packer.size
            count = (stop - start) * self.packer.size
            nilmdb.utils.fallocate.punch_hole(datafile, offset, count)

            # Update cache.  Try to do it atomically.
            nilmdb.utils.atomic.replace_file(cachefile,
                                             pickle.dumps(merged, 2))
--- a/nilmdb/utils/init.py
+++ b/nilmdb/utils/init.py
@@ -8,3 +8,4 @@ from nilmdb.utils.diskusage import du, human_size
 from nilmdb.utils.mustclose import must_close
 from nilmdb.utils import atomic
 import nilmdb.utils.threadsafety
 import nilmdb.utils.fallocate
--- a/nilmdb/utils/fallocate.py
+++ b/nilmdb/utils/fallocate.py
@@ -0,0 +1,49 @@
 # Implementation of hole punching via fallocate, if the OS
 # and filesystem support it.

 try:
    import os
    import ctypes
    import ctypes.util

    def make_fallocate():
        libc_name = ctypes.util.find_library('c')
        libc = ctypes.CDLL(libc_name, use_errno=True)

        _fallocate = libc.fallocate
        _fallocate.restype = ctypes.c_int
        _fallocate.argtypes = [ ctypes.c_int, ctypes.c_int,
                                ctypes.c_int64, ctypes.c_int64 ]

        del libc
        del libc_name

        def fallocate(fd, mode, offset, len_):
            res = _fallocate(fd, mode, offset, len_)
            if res != 0: # pragma: no cover
                errno = ctypes.get_errno()
                raise IOError(errno, os.strerror(errno))
        return fallocate

    fallocate = make_fallocate()
    del make_fallocate
 except Exception: # pragma: no cover
    fallocate = None

 FALLOC_FL_KEEP_SIZE = 0x01
 FALLOC_FL_PUNCH_HOLE = 0x02

 def punch_hole(filename, offset, length, ignore_errors = True):
    """Punch a hole in the file.  This isn't well supported, so errors
    are ignored by default."""
    try:
        if fallocate is None: # pragma: no cover
            raise IOError("fallocate not available")
        with open(filename, "r+") as f:
            fallocate(f.fileno(),
                      FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
                      offset, length)
    except IOError: # pragma: no cover
        if ignore_errors:
            return
        raise