A two-level index. More...
#include <ibin.h>
Public Member Functions | |
| virtual void | adjustLength (uint32_t nrows) |
| Fill the bitmaps to the specified size. More... | |
| virtual long | append (const char *dt, const char *df, uint32_t nnew) |
| Create index for the data in df and append the result to the index in dt. | |
| long | append (const ibis::zone &tail) |
| virtual void | binBoundaries (std::vector< double > &) const |
| The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively. More... | |
| virtual void | binWeights (std::vector< uint32_t > &) const |
| virtual void | estimate (const ibis::qContinuousRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
| Provide an estimation based on the current index. More... | |
| virtual long | evaluate (const ibis::qContinuousRange &expr, ibis::bitvector &hits) const |
| To evaluate the exact hits. More... | |
| virtual long | evaluate (const ibis::qDiscreteRange &expr, ibis::bitvector &hits) const |
| To evaluate the exact hits. More... | |
| virtual const char * | name () const |
Returns the name of the index, similar to the function type, but returns a string instead. More... | |
| virtual uint32_t | numBins () const |
| virtual void | print (std::ostream &out) const |
| Prints human readable information. More... | |
| virtual int | read (const char *idxfile) |
| Read the metadata about an index from the specified location. More... | |
| virtual int | read (ibis::fileManager::storage *st) |
| Read the metadata of an index from a storage object. | |
| virtual void | speedTest (std::ostream &out) const |
| Time some logical operations and print out their speed. | |
| virtual INDEX_TYPE | type () const |
| Returns an index type identifier. | |
| virtual float | undecidable (const ibis::qContinuousRange &expr, ibis::bitvector &iffy) const |
| Mark the position of the rows that can not be decided with this index. More... | |
| virtual int | write (const char *dt) const |
| Write the index to the specified location. More... | |
| zone (const ibis::column *c, ibis::fileManager::storage *st, size_t offset=8) | |
| Reconstruct zone from content of a storage object. More... | |
| zone (const ibis::bin &rhs) | |
| Copy constructor. More... | |
Public Member Functions inherited from ibis::bin | |
| long | append (const ibis::bin &tail) |
Append the tail to this index. | |
| long | append (const array_t< uint32_t > &ind) |
| Append a list of integers representing bin numbers. | |
| bin (const ibis::bin &rhs) | |
| Copy constructor. It performs a deep copy. | |
| bin (const ibis::column *c=0, const char *f=0) | |
| Constructor. Construct a bitmap index from current data. | |
| bin (const ibis::column *c, ibis::fileManager::storage *st, size_t offset=8) | |
| bin (const ibis::column *c, const char *f, const array_t< double > &bd) | |
| Constructor. Construct an index with the given bin boundaries. | |
| bin (const ibis::column *c, const char *f, const std::vector< double > &bd) | |
| Constructor. Construct an index with the given bin boundaries. | |
| long | checkBin (const ibis::qRange &cmp, uint32_t jbin, ibis::bitvector &res) const |
| Candidate check using the binned values. More... | |
| long | checkBin (const ibis::qRange &cmp, uint32_t jbin, const ibis::bitvector &mask, ibis::bitvector &res) const |
| Candidate check using the binned values. More... | |
| void | construct (const char *) |
| Construct a binned bitmap index. More... | |
| template<typename E > | |
| void | construct (const array_t< E > &varr) |
| Construction function for in-memory data. More... | |
| virtual int | contractRange (ibis::qContinuousRange &rng) const |
| virtual uint32_t | estimate (const ibis::qContinuousRange &expr) const |
| Compute an upper bound on the number of hits. | |
| virtual void | estimate (const ibis::deprecatedJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| Estimate the hits for symmetric joins. More... | |
| virtual void | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| virtual void | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| Evaluating a join condition with one (likely composite) index. | |
| virtual int64_t | estimate (const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
| virtual void | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| Estimate the number of hits for nonsymmetric joins. | |
| virtual void | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| virtual void | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| virtual int64_t | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr) const |
| virtual int64_t | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask) const |
| virtual int64_t | estimate (const ibis::bin &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
| virtual double | estimateCost (const ibis::qContinuousRange &expr) const |
| Estimate the cost of evaluating a range condition. | |
| virtual double | estimateCost (const ibis::qDiscreteRange &expr) const |
| Estimate the cost of evaluating a range condition. | |
| virtual int | expandRange (ibis::qContinuousRange &rng) const |
| The functions expandRange and contractRange expands or contracts the boundaries of a range condition so that the new range will have exact answers using the function estimate. More... | |
| virtual long | getCumulativeDistribution (std::vector< double > &bds, std::vector< uint32_t > &cts) const |
| Compute the cumulative distribution from the binned index. | |
| virtual long | getDistribution (std::vector< double > &bbs, std::vector< uint32_t > &cts) const |
| Compute a histogram from the binned index. | |
| virtual double | getMax () const |
| Compute the actual maximum value from the binned index. | |
| virtual double | getMin () const |
| Compute the actual minimum value from the binned index. | |
| virtual double | getSum () const |
| Compute the approximate value of the sum from the binned index. | |
| array_t< uint32_t > * | indices (const ibis::bitvector &mask) const |
| int | read (int fdes, size_t offset, const char *fname, const char *header) |
| Read an ibis::bin embedded inside a file. More... | |
| virtual long | select (const ibis::qContinuousRange &, void *) const |
| Select the rows that satisfy the range condition. More... | |
| virtual long | select (const ibis::qContinuousRange &, void *, ibis::bitvector &) const |
| Select the rows that satisfy the range condition. More... | |
Public Member Functions inherited from ibis::index | |
| void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Add the sum of bits[ib] through bits[ie-1] to res. More... | |
| void | addBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, const ibis::bitvector &tot) const |
Compute the sum of bit vectors [ib, ie). More... | |
| virtual void | estimate (const ibis::qDiscreteRange &expr, ibis::bitvector &lower, ibis::bitvector &upper) const |
| Estimate the hits for discrete ranges, i.e., those translated from 'a IN (x, y, ..)'. More... | |
| virtual uint32_t | estimate (const ibis::qDiscreteRange &expr) const |
| virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| Estimate the pairs for the range join operator. | |
| virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| Estimate the pairs for the range join operator. More... | |
| virtual void | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2, ibis::bitvector64 &lower, ibis::bitvector64 &upper) const |
| virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr) const |
| Estimate an upper bound for the number of pairs. | |
| virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask) const |
| Estimate an upper bound for the number of pairs produced from marked records. More... | |
| virtual int64_t | estimate (const ibis::index &idx2, const ibis::deprecatedJoin &expr, const ibis::bitvector &mask, const ibis::qRange *const range1, const ibis::qRange *const range2) const |
| virtual const ibis::bitvector * | getBitvector (uint32_t i) const |
| Return a pointer to the ith bitvector used in the index (may be 0). | |
| uint32_t | getNRows () const |
| Return the number of rows represented by this object. | |
| template<typename E > | |
| void | mapValues (const array_t< E > &val, VMap &bmap) |
| template<typename E > | |
| void | mapValues (const array_t< E > &val, histogram &hist, uint32_t count) |
| template<typename E > | |
| void | mapValues (const array_t< E > &val, array_t< E > &bounds, std::vector< uint32_t > &cnts) |
| template<typename E1 , typename E2 > | |
| void | mapValues (const array_t< E1 > &val1, const array_t< E2 > &val2, array_t< E1 > &bnd1, array_t< E2 > &bnd2, std::vector< uint32_t > &cnts) |
| Compute a two-dimensional histogram. More... | |
| virtual uint32_t | numBitvectors () const |
| Returns the number of bit vectors used by the index. | |
| float | sizeInBytes () const |
| Estiamte the size of this index object measured in bytes. More... | |
| void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res) const |
Sum up bits[ib:ie-1] and place the result in res. More... | |
| void | sumBins (uint32_t ib, uint32_t ie, ibis::bitvector &res, uint32_t ib0, uint32_t ie0) const |
| Compute a new sum for bit vectors [ib, ie) by taking advantage of the old sum for bitvectors [ib0, ie0). More... | |
| void | sumBins (const ibis::array_t< uint32_t > &, ibis::bitvector &) const |
| Sum up the bits in in the specified bins. | |
| virtual float | undecidable (const ibis::qDiscreteRange &expr, ibis::bitvector &iffy) const |
| virtual | ~index () |
| The destructor. | |
Protected Member Functions | |
| virtual void | clear () |
| Clear the existing content. More... | |
| virtual size_t | getSerialSize () const throw () |
| Get an estimate of the size of index on disk. More... | |
Protected Member Functions inherited from ibis::bin | |
| void | addBounds (double lbd, double rbd, uint32_t nbins, uint32_t eqw) |
| The function used by setBoudaries() to actually generate the bounds. | |
| bin (const ibis::column *c, const uint32_t nbits, ibis::fileManager::storage *st, size_t offset=8) | |
| A constructor to accommodate multicomponent encodings. More... | |
| void | binning (const char *f, const std::vector< double > &bd) |
| Generate bins according to the specified boundaries. More... | |
| void | binning (const char *f, const array_t< double > &bd) |
| void | binning (const char *f) |
| Read the data file and partition the values into bins according to the specified bin boundary. More... | |
| template<typename E > | |
| void | binning (const array_t< E > &varr) |
| template<typename E > | |
| void | binning (const array_t< E > &varr, const array_t< double > &bd) |
| template<typename E > | |
| void | binningT (const char *fname) |
| Read the data file, partition the values, and write out the bin ordered data with .bin suffix. More... | |
| long | binOrder (const char *fname) const |
| Write bin-ordered values. | |
| template<typename E > | |
| long | binOrderT (const char *fname) const |
| Write bin-ordered values. | |
| template<typename E > | |
| long | checkBin0 (const ibis::qRange &cmp, uint32_t jbin, ibis::bitvector &res) const |
| template<typename E > | |
| long | checkBin1 (const ibis::qRange &cmp, uint32_t jbin, const ibis::bitvector &mask, ibis::bitvector &res) const |
| virtual double | computeSum () const |
| Compute the sum of values from the information in the index. More... | |
| void | convertGranules (granuleMap &gmap) |
| Convert the granule map into binned index. More... | |
| void | divideBitmaps (const array_t< bitvector * > &bms, std::vector< unsigned > &parts) const |
| Partition the bitmaps into groups of takes about the same amount of storage. More... | |
| virtual uint32_t | locate (const double &val) const |
| Find the bin containing val. More... | |
| virtual void | locate (const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1) const |
| Find the outer boundaries of the range expression. More... | |
| virtual void | locate (const ibis::qContinuousRange &expr, uint32_t &cand0, uint32_t &cand1, uint32_t &hit0, uint32_t &hit1) const |
| Find the bins related to the range expression. More... | |
| template<typename E > | |
| void | mapGranules (const array_t< E > &, granuleMap &gmap) const |
| template<typename T > | |
| long | mergeValues (const ibis::qContinuousRange &, ibis::array_t< T > &) const |
| Extract values only. More... | |
| template<typename T > | |
| long | mergeValues (const ibis::qContinuousRange &, ibis::array_t< T > &, ibis::bitvector &) const |
| Extract values and record the positions. More... | |
| void | printGranules (std::ostream &out, const granuleMap &gmap) const |
| void | readBinBoundaries (const char *name, uint32_t nb) |
| Read a file containing a list of floating-point numbers. | |
| template<typename E > | |
| void | scanAndPartition (const array_t< E > &, unsigned) |
| void | scanAndPartition (const char *, unsigned, uint32_t nbins=0) |
| Partition the range based on the (approximate) histogram of the data. More... | |
| void | setBoundaries (const char *f) |
| Set bin boundaries. More... | |
| void | setBoundaries (array_t< double > &bnds, const ibis::bin &bin0) const |
| void | setBoundaries (array_t< double > &bnds, const ibis::bin &idx1, const array_t< uint32_t > cnt1, const array_t< uint32_t > cnt0) const |
| template<typename E > | |
| void | setBoundaries (const array_t< E > &varr) |
| void | swap (bin &rhs) |
| Swap the content of the index. | |
| int | write32 (int fptr) const |
| Write the content to a file already open. | |
| int | write64 (int fptr) const |
| write the content to a file already open. | |
Protected Member Functions inherited from ibis::index | |
| virtual void | activate () const |
| Regenerate all bitvectors from the underlying storage. More... | |
| virtual void | activate (uint32_t i) const |
| Regenerate the ith bitvector from the underlying storage. | |
| virtual void | activate (uint32_t i, uint32_t j) const |
| Regenerate bitvectors i (inclusive) through j (exclusive) from the underlying storage. More... | |
| void | computeMinMax (const char *f, double &min, double &max) const |
| void | dataFileName (std::string &name, const char *f=0) const |
| Generate data file name from "f". More... | |
| index (const ibis::column *c=0) | |
| Default constructor. More... | |
| index (const ibis::column *c, ibis::fileManager::storage *s) | |
| Constructor with a storage object. More... | |
| void | indexFileName (std::string &name, const char *f=0) const |
| Generates index file name from "f". More... | |
| void | initBitmaps (int fdes) |
| Prepare the bitmaps. More... | |
| void | initBitmaps (ibis::fileManager::storage *st) |
| Prepare bitmaps from the given storage object. More... | |
| int | initOffsets (int fdes, const char offsize, size_t start, uint32_t nobs) |
| Read in the offset array. More... | |
| int | initOffsets (ibis::fileManager::storage *st, size_t start, uint32_t nobs) |
| Regenerate the offsets array from the given storage object. More... | |
| void | mapValues (const char *f, VMap &bmap) const |
| Map the positions of each individual value. More... | |
| void | mapValues (const char *f, histogram &hist, uint32_t count=0) const |
| Generate a histogram. More... | |
| void | optionalUnpack (array_t< ibis::bitvector * > &bits, const char *opt) |
| A function to decide whether to uncompress the bitvectors. More... | |
| void | printHeader (std::ostream &, const char *) const |
Additional Inherited Members | |
Public Types inherited from ibis::bin | |
|
typedef std::map< double, granule * > | granuleMap |
Static Public Member Functions inherited from ibis::bin | |
| static uint32_t | parseNbins (const ibis::column &) |
| Parse the index specs to determine eqw and nbins. More... | |
| static unsigned | parsePrec (const ibis::column &) |
| Parse the index spec to extract precision. | |
| static unsigned | parseScale (const ibis::column &) |
| Parse the specification about scaling. More... | |
| static unsigned | parseScale (const char *) |
Static Protected Member Functions inherited from ibis::index | |
| static void | indexFileName (std::string &name, const ibis::column *col1, const ibis::column *col2, const char *f=0) |
| Generate the index file name for the composite index fromed on two columns. More... | |
Protected Attributes inherited from ibis::bin | |
| array_t< double > | bounds |
| The nominal boundaries. | |
| array_t< double > | maxval |
| The maximal values in each bin. | |
| array_t< double > | minval |
| The minimal values in each bin. | |
| uint32_t | nobs |
| Number of bitvectors. | |
A two-level index.
Both levels are not cumulative, i.e., both levels are equality encoded.
| ibis::zone::zone | ( | const ibis::column * | c, |
| ibis::fileManager::storage * | st, | ||
| size_t | start = 8 |
||
| ) |
Reconstruct zone from content of a storage object.
In addition to the common content for index::bin, the following are inserted after minval array:
After the bit vectors of this level are written, the next level ibis::zone are written without header.
References ibis::fileManager::storage::begin(), clear(), ibis::index::col, ibis::gVerbose, ibis::column::name(), ibis::part::name(), ibis::bin::nobs, ibis::index::nrows, print(), and ibis::array_t< T >::size().
|
explicit |
Copy constructor.
Generate a ibis::zone from ibis::bin.
References ibis::index::bits, ibis::bin::bounds, clear(), ibis::index::col, ibis::array_t< T >::copy(), ibis::bitvector::copy(), ibis::bin::divideBitmaps(), ibis::gVerbose, ibis::column::indexSpec(), ibis::bin::maxval, ibis::bin::minval, ibis::column::name(), ibis::part::name(), ibis::bin::nobs, ibis::index::nrows, print(), and ibis::array_t< T >::resize().
|
virtual |
Fill the bitmaps to the specified size.
Fill the bitvectors with zeros so that they all contain nrows bits.
Truncate the bitvectors if they have more bits.
Reimplemented from ibis::bin.
References ibis::bin::adjustLength().
|
virtual |
The function binBoundaries and binWeights return bin boundaries and counts of each bin respectively.
Reimplemented from ibis::bin.
|
protectedvirtual |
Clear the existing content.
Free the objectes pointed to by the pointers.
Reimplemented from ibis::bin.
References ibis::bin::clear().
Referenced by zone().
|
virtual |
Provide an estimation based on the current index.
Set bits in lower are hits for certain, set bits in upper are candidates. Set bits in (upper - lower) should be checked to verifies which ones are actually hits. If the bitvector upper contain less bits than bitvector lower, the content of upper is assumed to be the same as lower.
Reimplemented from ibis::bin.
References ibis::bitvector::clear(), ibis::bitvector::copy(), ibis::util::eq2range(), ibis::gVerbose, ibis::util::incrDouble(), ibis::qContinuousRange::leftBound(), ibis::qContinuousRange::rightBound(), and ibis::bitvector::set().
|
virtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Reimplemented from ibis::bin.
References ibis::bitvector::cnt(), and ibis::bitvector::size().
|
inlinevirtual |
To evaluate the exact hits.
On success, return the number of hits, otherwise a negative value is returned.
Reimplemented from ibis::bin.
References ibis::index::evaluate().
|
protectedvirtual |
Get an estimate of the size of index on disk.
This function is used to determine whether to use 64-bit offsets or 32-bit offsets. For the purpose of this estimation, we assume 64-bit offsets are needed. This function recursively calls itself to determine the size of sub-indexes.
Reimplemented from ibis::bin.
|
inlinevirtual |
Returns the name of the index, similar to the function type, but returns a string instead.
Reimplemented from ibis::bin.
|
virtual |
Prints human readable information.
Outputs information about the index as text to the specified output stream.
Reimplemented from ibis::bin.
References ibis::gVerbose.
Referenced by zone().
|
virtual |
Read the metadata about an index from the specified location.
The incoming arugment can be directory name of a file name. The actual index file name is determined by the function indexFileName. It returns 0 on successful completion and a negative number to indicate error.
Reimplemented from ibis::bin.
References ibis::util::clear(), ibis::gVerbose, ibis::fileManager::instance(), ibis::fileManager::recordPages(), ibis::array_t< T >::size(), ibis::util::strnewdup(), ibis::array_t< T >::swap(), UnixOpen, and ibis::index::ZONE.
|
virtual |
Mark the position of the rows that can not be decided with this index.
| expr | the range conditions to be evaluated. |
| iffy | the bitvector marking the positions of rows that can not be decided using the index. Return value is the expected fraction of undecided rows that might satisfy the range conditions. |
Reimplemented from ibis::bin.
References ibis::bitvector::cnt(), ibis::qContinuousRange::leftBound(), ibis::qContinuousRange::rightBound(), ibis::bitvector::set(), and ibis::bitvector::size().
|
virtual |
Write the index to the specified location.
The incoming argument can be either a directory name or a file name. The actual index file name is generated by the function indexFileName.
Reimplemented from ibis::bin.
References ibis::index::BINNING, ibis::fileManager::flushFile(), ibis::gVerbose, ibis::fileManager::instance(), UnixOpen, ibis::bin::write32(), ibis::bin::write64(), and ibis::index::ZONE.
|
| |