00001
00002
00003
00004 #ifndef IBIS_COLUMN_H
00005 #define IBIS_COLUMN_H
00013 #include "table.h" // ibis::TYPE_T
00014 #include "qExpr.h"
00015 #include "bitvector.h"
00016 #include <string>
00017
00018 namespace ibis {
00019
00020 class category;
00021 class text;
00022 class blob;
00023 class dictionary;
00024
00025
00026
00027 class colInts;
00028 class colUInts;
00029 class colLongs;
00030 class colULongs;
00031 class colFloats;
00032 class colDoubles;
00033 class colStrings;
00034 }
00035
00046 class FASTBIT_CXX_DLLSPEC ibis::column {
00047 public:
00048
00049 virtual ~column();
00051 column(const part* tbl, FILE* file);
00053 column(const part* tbl, ibis::TYPE_T t, const char* name,
00054 const char* desc="", double low=DBL_MAX, double high=-DBL_MAX);
00055 column(const column& rhs);
00056
00057
00059 ibis::TYPE_T type() const {return m_type;}
00060 const char* name() const {return m_name.c_str();}
00061 const char* description() const {return m_desc.c_str();}
00062 const double& lowerBound() const {return lower;}
00063 const double& upperBound() const {return upper;}
00064 int elementSize() const;
00065 bool isFloat() const;
00066 bool isInteger() const;
00067 bool isSignedInteger() const;
00068 bool isUnsignedInteger() const;
00069 bool isNumeric() const;
00070 bool isSorted() const {return m_sorted;}
00071 void description(const char* d) {m_desc = d;}
00072 void lowerBound(double d) {lower = d;}
00073 void upperBound(double d) {upper = d;}
00074 const part* partition() const {return thePart;}
00075 void isSorted(bool);
00076
00077
00078 const char* indexSpec() const;
00079 uint32_t numBins() const;
00080
00081 void indexSpec(const char* spec) {m_bins=spec;}
00083 void preferredBounds(std::vector<double>&) const;
00085 void binWeights(std::vector<uint32_t>&) const;
00086
00090 virtual void computeMinMax();
00091 virtual void computeMinMax(const char *dir);
00095 virtual void computeMinMax(const char *dir,
00096 double& min, double &max) const;
00097
00098 virtual void loadIndex(const char* opt=0, int readall=0) const throw ();
00099 void unloadIndex() const;
00101 virtual long indexSize() const;
00104 void indexSpeedTest() const;
00106 void purgeIndexFile(const char *dir=0) const;
00107
00111 const char* dataFileName(std::string& fname, const char *dir=0) const;
00113 const char* nullMaskName(std::string& fname) const;
00114 void getNullMask(bitvector& mask) const;
00115
00118 virtual void getString(uint32_t i, std::string &val) const {};
00122 virtual const char* findString(const char* str) const
00123 {return static_cast<const char*>(0);}
00124
00127 array_t<int32_t>* getIntArray() const;
00129 array_t<float>* getFloatArray() const;
00131 array_t<double>* getDoubleArray() const;
00133 virtual int getValuesArray(void* vals) const;
00135 ibis::fileManager::storage* getRawData() const;
00136
00139 virtual array_t<char>* selectBytes(const bitvector& mask) const;
00141 virtual array_t<unsigned char>* selectUBytes(const bitvector& mask) const;
00143 virtual array_t<int16_t>* selectShorts(const bitvector& mask) const;
00145 virtual array_t<uint16_t>* selectUShorts(const bitvector& mask) const;
00147 virtual array_t<int32_t>* selectInts(const bitvector& mask) const;
00149 virtual array_t<uint32_t>* selectUInts(const bitvector& mask) const;
00151 virtual array_t<int64_t>* selectLongs(const bitvector& mask) const;
00153 virtual array_t<uint64_t>* selectULongs(const bitvector& mask) const;
00155 virtual array_t<float>* selectFloats(const bitvector& mask) const;
00157 virtual array_t<double>* selectDoubles(const bitvector& mask) const;
00158 virtual std::vector<std::string>*
00159 selectStrings(const bitvector& mask) const;
00160
00161 long selectValues(const bitvector& mask, void* vals) const;
00162 long selectValues(const bitvector& mask,
00163 void* vals, array_t<uint32_t>& inds) const;
00164
00166 virtual void write(FILE* file) const;
00168 virtual void print(std::ostream& out) const;
00170 void logMessage(const char* event, const char* fmt, ...) const;
00172 void logWarning(const char* event, const char* fmt, ...) const;
00173
00176 int expandRange(ibis::qContinuousRange& rng) const;
00179 int contractRange(ibis::qContinuousRange& rng) const;
00180
00190 virtual long estimateRange(const ibis::qContinuousRange& cmp,
00191 ibis::bitvector& low,
00192 ibis::bitvector& high) const;
00194 virtual long estimateRange(const ibis::qDiscreteRange& cmp,
00195 ibis::bitvector& low,
00196 ibis::bitvector& high) const;
00197
00200 virtual long evaluateRange(const ibis::qContinuousRange& cmp,
00201 const ibis::bitvector& mask,
00202 ibis::bitvector& res) const;
00204 virtual long evaluateRange(const ibis::qDiscreteRange& cmp,
00205 const ibis::bitvector& mask,
00206 ibis::bitvector& res) const;
00207
00211 virtual long estimateRange(const ibis::qContinuousRange& cmp) const;
00213 virtual long estimateRange(const ibis::qDiscreteRange& cmp) const;
00214
00216 virtual double estimateCost(const ibis::qContinuousRange& cmp) const;
00218 virtual double estimateCost(const ibis::qDiscreteRange& cmp) const;
00220 virtual double estimateCost(const ibis::qString& cmp) const {
00221 return 0;}
00223 virtual double estimateCost(const ibis::qMultiString& cmp) const {
00224 return 0;}
00225
00226 virtual float getUndecidable(const ibis::qContinuousRange& cmp,
00227 ibis::bitvector& iffy) const;
00229 virtual float getUndecidable(const ibis::qDiscreteRange& cmp,
00230 ibis::bitvector& iffy) const;
00231
00233 virtual long append(const char* dt, const char* df, const uint32_t nold,
00234 const uint32_t nnew, uint32_t nbuf, char* buf);
00235
00236 virtual long append(const void* vals, const ibis::bitvector& msk);
00237 virtual long writeData(const char* dir, uint32_t nold, uint32_t nnew,
00238 ibis::bitvector& mask, const void *va1,
00239 void *va2=0);
00240 template <typename T>
00241 long castAndWrite(const array_t<double>& vals, ibis::bitvector& mask,
00242 const T special);
00243 virtual long saveSelected(const ibis::bitvector& sel, const char *dest,
00244 char *buf, uint32_t nbuf);
00245 virtual long truncateData(const char* dir, uint32_t nent,
00246 ibis::bitvector& mask) const;
00247
00253 virtual double getActualMin() const;
00256 virtual double getActualMax() const;
00258 virtual double getSum() const;
00265 long getCumulativeDistribution(std::vector<double>& bounds,
00266 std::vector<uint32_t>& counts) const;
00278 long getDistribution(std::vector<double>& bbs,
00279 std::vector<uint32_t>& counts) const;
00281 class info;
00282 class indexLock;
00283 class mutexLock;
00284
00285 protected:
00286
00287 const part* thePart;
00288 ibis::bitvector mask_;
00289 ibis::TYPE_T m_type;
00290 std::string m_name;
00291 std::string m_desc;
00292 std::string m_bins;
00293 bool m_sorted;
00294 double lower;
00295 double upper;
00296
00297 mutable ibis::index* idx;
00299 mutable ibis::util::sharedInt32 idxcnt;
00300
00302 void logError(const char* event, const char* fmt, ...) const;
00305 long string2int(int fptr, dictionary& dic, uint32_t nbuf, char* buf,
00306 array_t<uint32_t>& out) const;
00308 double computeMin() const;
00310 double computeMax() const;
00312 double computeSum() const;
00315 void actualMinMax(const char *fname, const ibis::bitvector& mask,
00316 double &min, double &max) const;
00318 template <typename T>
00319 void actualMinMax(const array_t<T>& vals, const ibis::bitvector& mask,
00320 double& min, double& max) const;
00322 template <typename T>
00323 T computeMin(const array_t<T>& vals,
00324 const ibis::bitvector& mask) const;
00326 template <typename T>
00327 T computeMax(const array_t<T>& vals,
00328 const ibis::bitvector& mask) const;
00330 template <typename T>
00331 double computeSum(const array_t<T>& vals,
00332 const ibis::bitvector& mask) const;
00333
00335 virtual int searchSorted(const ibis::qContinuousRange&,
00336 ibis::bitvector&) const;
00338 virtual int searchSorted(const ibis::qDiscreteRange&,
00339 ibis::bitvector&) const;
00341 template <typename T> int
00342 searchSortedICC(const array_t<T>& vals,
00343 const ibis::qContinuousRange& rng,
00344 ibis::bitvector& hits) const;
00346 template <typename T> int
00347 searchSortedICD(const array_t<T>& vals,
00348 const ibis::qDiscreteRange& rng,
00349 ibis::bitvector& hits) const;
00351 template <typename T> int
00352 searchSortedOOCC(const char* fname,
00353 const ibis::qContinuousRange& rng,
00354 ibis::bitvector& hits) const;
00356 template <typename T> int
00357 searchSortedOOCD(const char* fname,
00358 const ibis::qDiscreteRange& rng,
00359 ibis::bitvector& hits) const;
00361 template <typename T> uint32_t
00362 findLower(int fdes, const uint32_t nr, const T tgt) const;
00364 template <typename T> uint32_t
00365 findUpper(int fdes, const uint32_t nr, const T tgt) const;
00366 template <typename T>
00367 long selectValuesT(const bitvector& mask, array_t<T>& vals) const;
00368 template <typename T>
00369 long selectValuesT(const bitvector& mask,
00370 array_t<T>& vals, array_t<uint32_t>& inds) const;
00372 template <typename T>
00373 long appendValues(const array_t<T>&, const ibis::bitvector&);
00375 long appendStrings(const std::vector<std::string>&, const ibis::bitvector&);
00376
00377 class readLock;
00378 class writeLock;
00379 class softWriteLock;
00380 friend class readLock;
00381 friend class writeLock;
00382 friend class indexLock;
00383 friend class mutexLock;
00384 friend class softWriteLock;
00385
00386 private:
00389 mutable pthread_rwlock_t rwlock;
00391 mutable pthread_mutex_t mutex;
00392
00393 column& operator=(const column&);
00394 };
00395
00398 class FASTBIT_CXX_DLLSPEC ibis::column::info {
00399 public:
00400 const char* name;
00401 const char* description;
00402 const double expectedMin;
00403 const double expectedMax;
00404 const ibis::TYPE_T type;
00405 info(const ibis::column& col)
00406 : name(col.name()), description(col.description()),
00407 expectedMin(col.lowerBound()),
00408 expectedMax(col.upperBound()), type(col.type()) {};
00409 };
00410
00414 class ibis::column::indexLock {
00415 public:
00416 ~indexLock();
00417 indexLock(const ibis::column* col, const char* m);
00418 const ibis::index* getIndex() const {return theColumn->idx;};
00419
00420 private:
00421 const ibis::column* theColumn;
00422 const char* mesg;
00423
00424 indexLock();
00425 indexLock(const indexLock&);
00426 indexLock& operator=(const indexLock&);
00427 };
00428
00430 class ibis::column::mutexLock {
00431 public:
00432 mutexLock(const ibis::column* col, const char* m)
00433 : theColumn(col), mesg(m) {
00434 if (ibis::gVerbose > 9)
00435 col->logMessage("gainExclusiveAccess",
00436 "pthread_mutex_lock for %s", m);
00437 int ierr = pthread_mutex_lock(&(col->mutex));
00438 if (0 != ierr)
00439 col->logWarning("gainExclusiveAccess", "pthread_mutex_lock for %s "
00440 "returned %d (%s)", m, ierr, strerror(ierr));
00441 }
00442 ~mutexLock() {
00443 if (ibis::gVerbose > 9)
00444 theColumn->logMessage("releaseExclusiveAccess",
00445 "pthread_mutex_unlock for %s", mesg);
00446 int ierr = pthread_mutex_unlock(&(theColumn->mutex));
00447 if (0 != ierr)
00448 theColumn->logWarning("releaseExclusiveAccess",
00449 "pthread_mutex_unlock for %s returned %d "
00450 "(%s)", mesg, ierr, strerror(ierr));
00451 }
00452
00453 private:
00454 const ibis::column* theColumn;
00455 const char* mesg;
00456
00457 mutexLock() {};
00458 mutexLock(const mutexLock&) {};
00459 mutexLock& operator=(const mutexLock&);
00460 };
00461
00463 class ibis::column::writeLock {
00464 public:
00465 writeLock(const ibis::column* col, const char* m)
00466 : theColumn(col), mesg(m) {
00467 #if defined(DEBUG) && DEBUG > 0
00468 ibis::util::logMessage("ibis::column::writeLock",
00469 "locking column %s for %s", col->name(),
00470 (m ? m : "?"));
00471 #endif
00472 int ierr = pthread_rwlock_wrlock(&(col->rwlock));
00473 if (0 != ierr)
00474 col->logWarning("gainWriteAccess", "pthread_rwlock_wrlock for %s "
00475 "returned %d (%s)", m, ierr, strerror(ierr));
00476 else if (ibis::gVerbose > 9)
00477 col->logMessage("gainWriteAccess",
00478 "pthread_rwlock_wrlock for %s", m);
00479 }
00480 ~writeLock() {
00481 #if defined(DEBUG) && DEBUG > 0
00482 ibis::util::logMessage("ibis::column::writeLock",
00483 "unlocking column %s (%s)", theColumn->name(),
00484 (mesg ? mesg : "?"));
00485 #endif
00486 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00487 if (0 != ierr)
00488 theColumn->logWarning("releaseWriteAccess",
00489 "pthread_rwlock_unlock() for %s returned %d "
00490 "(%s)", mesg, ierr, strerror(ierr));
00491 else if (ibis::gVerbose > 9)
00492 theColumn->logMessage("releaseWriteAccess",
00493 "pthread_rwlock_unlock for %s", mesg);
00494 }
00495
00496 private:
00497 const ibis::column* theColumn;
00498 const char* mesg;
00499
00500 writeLock();
00501 writeLock(const writeLock&);
00502 writeLock& operator=(const writeLock&);
00503 };
00504
00506 class ibis::column::softWriteLock {
00507 public:
00508 softWriteLock(const ibis::column* col, const char* m)
00509 : theColumn(col), mesg(m),
00510 locked(0 == pthread_rwlock_trywrlock(&(col->rwlock))) {
00511 #if defined(DEBUG) && DEBUG > 0
00512 ibis::util::logMessage("ibis::column::softWriteLock",
00513 "locking column %s for %s", col->name(),
00514 (m ? m : "?"));
00515 #endif
00516 if (ibis::gVerbose > 9 && locked)
00517 col->logMessage("gainWriteAccess",
00518 "pthread_rwlock_wrlock for %s", m);
00519 }
00520 ~softWriteLock() {
00521 #if defined(DEBUG) && DEBUG > 0
00522 ibis::util::logMessage("ibis::column::softWriteLock",
00523 "unlocking column %s (%s)", theColumn->name(),
00524 (mesg ? mesg : "?"));
00525 #endif
00526 if (locked) {
00527 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00528 if (0 != ierr)
00529 theColumn->logWarning("releaseWriteAccess",
00530 "pthread_rwlock_unlock for %s returned "
00531 "%d (%s)", mesg, ierr, strerror(ierr));
00532 else if (ibis::gVerbose > 9)
00533 theColumn->logMessage("releaseWriteAccess",
00534 "pthread_rwlock_unlock for %s", mesg);
00535 }
00536 }
00537 bool isLocked() const {return locked;}
00538
00539 private:
00540 const ibis::column* theColumn;
00541 const char* mesg;
00542 const bool locked;
00543
00544 softWriteLock();
00545 softWriteLock(const softWriteLock&);
00546 softWriteLock& operator=(const softWriteLock&);
00547 };
00548
00550 class ibis::column::readLock {
00551 public:
00552 readLock(const ibis::column* col, const char* m)
00553 : theColumn(col), mesg(m) {
00554 #if defined(DEBUG) && DEBUG > 0
00555 ibis::util::logMessage("ibis::column::readLock",
00556 "locking column %s for %s", col->name(),
00557 (m ? m : "?"));
00558 #endif
00559 int ierr = pthread_rwlock_rdlock(&(col->rwlock));
00560 if (0 != ierr)
00561 col->logWarning("gainReadAccess", "pthread_rwlock_rdlock for %s "
00562 "returned %d (%s)", m, ierr, strerror(ierr));
00563 else if (ibis::gVerbose > 9)
00564 col->logMessage("gainReadAccess",
00565 "pthread_rwlock_rdlock for %s", m);
00566 }
00567 ~readLock() {
00568 #if defined(DEBUG) && DEBUG > 0
00569 ibis::util::logMessage("ibis::column::readLock",
00570 "unlocking column %s (%s)", theColumn->name(),
00571 (mesg ? mesg : "?"));
00572 #endif
00573 int ierr = pthread_rwlock_unlock(&(theColumn->rwlock));
00574 if (0 != ierr)
00575 theColumn->logWarning("releaseReadAccess",
00576 "pthread_rwlock_unlock for %s returned %d "
00577 "(%s)", mesg, ierr, strerror(ierr));
00578 else if (ibis::gVerbose > 9)
00579 theColumn->logMessage("releaseReadAccess",
00580 "pthread_rwlock_unlock for %s", mesg);
00581 }
00582
00583 private:
00584 const ibis::column* theColumn;
00585 const char* mesg;
00586
00587 readLock();
00588 readLock(const readLock&);
00589 readLock& operator=(const readLock&);
00590 };
00591
00593 inline int ibis::column::elementSize() const {
00594 int sz = -1;
00595 switch (m_type) {
00596 case ibis::OID: sz = sizeof(rid_t); break;
00597 case ibis::INT: sz = sizeof(int32_t); break;
00598 case ibis::UINT: sz = sizeof(uint32_t); break;
00599 case ibis::LONG: sz = sizeof(int64_t); break;
00600 case ibis::ULONG: sz = sizeof(uint64_t); break;
00601 case ibis::FLOAT: sz = sizeof(float); break;
00602 case ibis::DOUBLE: sz = sizeof(double); break;
00603 case ibis::BYTE: sz = sizeof(char); break;
00604 case ibis::UBYTE: sz = sizeof(unsigned char); break;
00605 case ibis::SHORT: sz = sizeof(int16_t); break;
00606 case ibis::USHORT: sz = sizeof(uint16_t); break;
00607 case ibis::CATEGORY: sz = 0; break;
00608 case ibis::TEXT: sz = 0; break;
00609 default: sz = -1; break;
00610 }
00611 return sz;
00612 }
00613
00615 inline bool ibis::column::isFloat() const {
00616 return(m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00617 }
00618
00620 inline bool ibis::column::isInteger() const {
00621 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00622 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00623 m_type == ibis::INT || m_type == ibis::UINT ||
00624 m_type == ibis::LONG || m_type == ibis::ULONG);
00625 }
00626
00628 inline bool ibis::column::isSignedInteger() const {
00629 return(m_type == ibis::BYTE || m_type == ibis::SHORT ||
00630 m_type == ibis::INT || m_type == ibis::LONG);
00631 }
00632
00634 inline bool ibis::column::isUnsignedInteger() const {
00635 return(m_type == ibis::UBYTE || m_type == ibis::USHORT ||
00636 m_type == ibis::UINT || m_type == ibis::ULONG);
00637 }
00638
00640 inline bool ibis::column::isNumeric() const {
00641 return(m_type == ibis::BYTE || m_type == ibis::UBYTE ||
00642 m_type == ibis::SHORT || m_type == ibis::USHORT ||
00643 m_type == ibis::INT || m_type == ibis::UINT ||
00644 m_type == ibis::LONG || m_type == ibis::ULONG ||
00645 m_type == ibis::FLOAT || m_type == ibis::DOUBLE);
00646 }
00647
00654 inline std::vector<std::string>*
00655 ibis::column::selectStrings(const bitvector& mask) const {
00656 return 0;
00657 }
00658
00659
00660 inline std::ostream& operator<<(std::ostream& out, const ibis::column& prop) {
00661 prop.print(out);
00662 return out;
00663 }
00664 #endif // IBIS_COLUMN_H