Browse Source

Inserting bulk data is essentially done, not tested

tags/replace-pytables
Jim Paris 9 years ago
parent
commit
f41ff0a6e8
2 changed files with 39 additions and 16 deletions
  1. +38
    -15
      nilmdb/bulkdata.py
  2. +1
    -1
      setup.cfg

+ 38
- 15
nilmdb/bulkdata.py View File

@@ -10,6 +10,7 @@ import sys
import cPickle as pickle
import struct
import fnmatch
import mmap

# Up to 256 open file descriptors at any given time
table_cache_size = 16
@@ -204,27 +205,49 @@ class Table(object):

# Cache open files
@nilmdb.utils.lru_cache(size = fd_cache_size,
onremove = lambda x: self.mmap_close(x))
def mmap_open(self, file):
"""Open and map a given filename (relative to self.root)"""
onremove = lambda x: x.close())
def mmap_open(self, file, newsize = None):
"""Open and map a given filename (relative to self.root).
Will be automatically closed when evicted from the cache.

If 'newsize' is provided, the file is truncated to the given
size before the mapping is returned. (Note that the LRU cache
on this function means the truncate will only happen if the
object isn't already cached; mmap.resize should be used too)"""
f = open(os.path.join(self.root, file), "a+", 0)
f.seek(0, 2)
mm = mmap.mmap(f.fileno(), f.tell())
if newsize is not None:
# mmap can't map a zero-length file, so this allows the
# caller to set the filesize between file creation and
# mmap.
f.truncate(newsize)
mm = mmap.mmap(f.fileno(), 0)
return mm

def mmap_close(self, mm):
"""Close a mmap object"""
mm.close()

def append(self, data):
"""Append the data and flush it to disk.
data is a nested Python list [[row],[row],[...]]"""
(filename, offset, count) = self._fnoffset_from_row(self.nrows)
### TODO: resize mmap, actually write the data
return 0
raise NotImplementedError()
self.table.append(data)
self.table.flush()
remaining = len(data)
dataiter = iter(data)
while remaining:
# See how many rows we can fit into the current file, and open it
(filename, offset, count) = self._fnoffset_from_row(self.nrows)
if count > remaining:
count = remaining
newsize = offset + count * self.packer.size
mm = self.mmap_open(filename, newsize)
mm.seek(offset)

# Extend the file to the target length. We specified
# newsize when opening, but that may have been ignored if
# the mmap_open returned a cached object.
mm.resize(newsize)

# Write the data
for i in range(count):
row = dataiter.next()
mm.write(self.packer.pack(*row))
remaining -= count
self.nrows += count

def __getitem__(self, val):
"""Needs to support simple indexing (table[n]) and


+ 1
- 1
setup.cfg View File

@@ -12,7 +12,7 @@ stop=
verbosity=2
#tests=tests/test_mustclose.py
#tests=tests/test_lrucache.py
#tests=tests/test_cmdline.py
tests=tests/test_cmdline.py
#tests=tests/test_layout.py
#tests=tests/test_rbtree.py
#tests=tests/test_interval.py


Loading…
Cancel
Save