# Fixed record size bulk data storage from __future__ import absolute_import from __future__ import division import nilmdb from nilmdb.utils.printf import * import os import sys import cPickle as pickle import struct import fnmatch import mmap # Up to 256 open file descriptors at any given time table_cache_size = 16 fd_cache_size = 16 @nilmdb.utils.must_close() class BulkData(object): def __init__(self, basepath): self.basepath = basepath self.root = os.path.join(self.basepath, "data") # Make root path if not os.path.isdir(self.root): os.mkdir(self.root) def close(self): self.getnode.cache_remove_all() def create(self, path, layout_name): """ path: path to the data (e.g. '/newton/prep'). Paths must contain at least two elements, e.g.: /newton/prep /newton/raw /newton/upstairs/prep /newton/upstairs/raw layout_name: string for nilmdb.layout.get_named(), e.g. 'float32_8' """ if path[0] != '/': raise ValueError("paths must start with /") [ group, node ] = path.rsplit("/", 1) if group == '': raise ValueError("invalid path") # Get layout, and build format string for struct module try: layout = nilmdb.layout.get_named(layout_name) struct_fmt = ' remaining: count = remaining newsize = offset + count * self.packer.size mm = self.mmap_open(filename, newsize) mm.seek(offset) # Extend the file to the target length. We specified # newsize when opening, but that may have been ignored if # the mmap_open returned a cached object. mm.resize(newsize) # Write the data for i in xrange(count): row = dataiter.next() mm.write(self.packer.pack(*row)) remaining -= count self.nrows += count def __getitem__(self, key): """Extract data and return it. Supports simple indexing (table[n]) and range slices (table[n:m]). Returns a nested Python list [[row],[row],[...]]""" # Handle simple slices if isinstance(key, slice): # Fall back to brute force if the slice isn't simple if ((key.step is not None and key.step != 1) or key.start is None or key.stop is None or key.start >= key.stop or key.start < 0 or key.stop > self.nrows): return [ self[x] for x in xrange(*key.indices(self.nrows)) ] ret = [] row = key.start remaining = key.stop - key.start while remaining: (filename, offset, count) = self._fnoffset_from_row(row) if count > remaining: count = remaining mm = self.mmap_open(filename) for i in xrange(count): ret.append(list(self.packer.unpack_from(mm, offset))) offset += self.packer.size remaining -= count row += count return ret # Handle single points if key < 0 or key >= self.nrows: raise IndexError("Index out of range") (filename, offset, count) = self._fnoffset_from_row(key) mm = self.mmap_open(filename) # unpack_from ignores the mmap object's current seek position return self.packer.unpack_from(mm, offset) class TimestampOnlyTable(object): """Helper that lets us pass a Tables object into bisect, by returning only the timestamp when a particular row is requested.""" def __init__(self, table): self.table = table def __getitem__(self, index): return self.table[index][0]