table.h
Go to the documentation of this file.
00001 // File: $Id$
00002 // Author: John Wu <John.Wu at ACM.org>
00003 // Copyright 2007-2012 the Regents of the University of California
00004 #ifndef IBIS_TABLE_H
00005 #define IBIS_TABLE_H
00006 
00016 #include <iostream>     // std::ostream
00017 #include <vector>       // std::vector
00018 #include <map>          // std::map
00019 #include <string>       // std::string
00020 #include "const.h"      // intxx_t, uintxx_t, ... from stdint.h
00021 
00022 namespace ibis {
00023 
00025     enum TYPE_T {
00027         UNKNOWN_TYPE=0,
00029         OID,
00030         BYTE,   
00031         UBYTE,  
00032         SHORT,  
00033         USHORT, 
00034         INT,    
00035         UINT,   
00036         LONG,   
00037         ULONG,  
00038         FLOAT,  
00039         DOUBLE, 
00040 
00041 
00042         CATEGORY,
00046         TEXT,
00051         BLOB
00052     };
00054     FASTBIT_CXX_DLLSPEC extern const char** TYPESTRING;
00056     FASTBIT_CXX_DLLSPEC extern const char* TYPECODE;
00057 
00058     class table;
00059     class tablex;
00060     class tableList;
00061 } // namespace ibis
00062 
00072 class FASTBIT_CXX_DLLSPEC ibis::table {
00073 public:
00077     static ibis::table* create(ibis::part&);
00081     static ibis::table* create(const ibis::partList&);
00085     static ibis::table* create(const char* dir);
00091     static ibis::table* create(const char* dir1, const char* dir2);
00092 
00094     virtual ~table() {};
00095 
00098     virtual const char* name() const {return name_.c_str();}
00100     virtual const char* description() const {return desc_.c_str();}
00102     virtual uint64_t nRows() const =0;
00104     virtual uint32_t nColumns() const =0;
00105 
00109     typedef ibis::array_t<const char*> stringList;
00111     typedef ibis::array_t<ibis::TYPE_T> typeList;
00115     typedef ibis::array_t<void *> bufferList;
00117     typedef std::map<const char*, ibis::TYPE_T, ibis::lessi> namesTypes;
00118 
00119     virtual stringList columnNames() const =0; 
00120     virtual typeList columnTypes() const =0; 
00121 
00123     virtual void describe(std::ostream&) const =0;
00125     virtual void dumpNames(std::ostream& out, const char* del=", ") const =0;
00129     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00131     virtual int dump(std::ostream& out, uint64_t nr,
00132                      const char* del=", ") const =0;
00135     virtual int dump(std::ostream& out, uint64_t offset, uint64_t nr,
00136                      const char* del=", ") const =0;
00140     virtual int backup(const char* dir, const char* tname=0,
00141                        const char* tdesc=0) const =0;
00142 
00145     virtual void estimate(const char* cond,
00146                           uint64_t& nmin, uint64_t& nmax) const =0;
00149     virtual void estimate(const ibis::qExpr* cond,
00150                           uint64_t& nmin, uint64_t& nmax) const =0;
00153     virtual table* select(const char* sel, const char* cond) const =0;
00156     virtual table* select(const char* sel, const ibis::qExpr* cond) const;
00157 
00159     static table* select(const ibis::constPartList& parts,
00160                          const char* sel, const char* cond);
00162     static table* select(const ibis::constPartList& parts,
00163                          const char* sel, const ibis::qExpr* cond);
00165     static int64_t computeHits(const ibis::constPartList& parts,
00166                                const char* cond);
00168     static int64_t computeHits(const ibis::constPartList& parts,
00169                                const ibis::qExpr* cond);
00170 
00177     virtual table* groupby(const stringList&) const =0;
00180     virtual table* groupby(const char*) const;
00188     virtual void orderby(const stringList&)=0;
00189     virtual void orderby(const stringList&, const std::vector<bool>&)=0;
00191     virtual void orderby(const char*);
00193     virtual void reverseRows()=0;
00194 
00207     virtual int addPartition(const char*) {return -1;}
00209     virtual int getPartitions(ibis::constPartList&) const {
00210         return -1;}
00211 
00226     virtual int buildIndex(const char* colname, const char* option=0) =0;
00231     virtual int buildIndexes(const char* options=0) =0;
00234     virtual const char* indexSpec(const char* colname=0) const =0;
00237     virtual void indexSpec(const char* opt, const char* colname=0) =0;
00249     virtual int combineCategories(const stringList&) {return 0;}
00251 
00269     virtual int64_t
00270         getColumnAsBytes(const char* cname, char* vals,
00271                          uint64_t begin=0, uint64_t end=0) const =0;
00272     virtual int64_t
00273         getColumnAsUBytes(const char* cname, unsigned char* vals,
00274                           uint64_t begin=0, uint64_t end=0) const =0;
00275     virtual int64_t
00276         getColumnAsShorts(const char* cname, int16_t* vals,
00277                           uint64_t begin=0, uint64_t end=0) const =0;
00278     virtual int64_t
00279         getColumnAsUShorts(const char* cname, uint16_t* vals,
00280                            uint64_t begin=0, uint64_t end=0) const =0;
00281     virtual int64_t
00282         getColumnAsInts(const char* cname, int32_t* vals,
00283                         uint64_t begin=0, uint64_t end=0) const =0;
00284     virtual int64_t
00285         getColumnAsUInts(const char* cname, uint32_t* vals,
00286                          uint64_t begin=0, uint64_t end=0) const =0;
00287     virtual int64_t
00288         getColumnAsLongs(const char* cname, int64_t* vals,
00289                          uint64_t begin=0, uint64_t end=0) const =0;
00290     virtual int64_t
00291         getColumnAsULongs(const char* cname, uint64_t* vals,
00292                           uint64_t begin=0, uint64_t end=0) const =0;
00293     virtual int64_t
00294         getColumnAsFloats(const char* cname, float* vals,
00295                           uint64_t begin=0, uint64_t end=0) const =0;
00296     virtual int64_t
00297         getColumnAsDoubles(const char* cname, double* vals,
00298                            uint64_t begin=0, uint64_t end=0) const =0;
00299     virtual int64_t
00300         getColumnAsDoubles(const char* cname, std::vector<double>& vals,
00301                            uint64_t begin=0, uint64_t end=0) const =0;
00305     virtual int64_t
00306         getColumnAsStrings(const char* cname, std::vector<std::string>& vals,
00307                            uint64_t begin=0, uint64_t end=0) const =0;
00308 
00314     virtual double getColumnMin(const char* cname) const =0;
00320     virtual double getColumnMax(const char* cname) const =0;
00322 
00336     virtual long getHistogram(const char* constraints,
00337                               const char* cname,
00338                               double begin, double end, double stride,
00339                               std::vector<uint32_t>& counts) const =0;
00346     virtual long getHistogram2D(const char* constraints,
00347                                 const char* cname1,
00348                                 double begin1, double end1, double stride1,
00349                                 const char* cname2,
00350                                 double begin2, double end2, double stride2,
00351                                 std::vector<uint32_t>& counts) const =0;
00358     virtual long getHistogram3D(const char* constraints,
00359                                 const char* cname1,
00360                                 double begin1, double end1, double stride1,
00361                                 const char* cname2,
00362                                 double begin2, double end2, double stride2,
00363                                 const char* cname3,
00364                                 double begin3, double end3, double stride3,
00365                                 std::vector<uint32_t>& counts) const =0;
00367 
00369     struct row {
00370         std::vector<std::string>   bytesnames; 
00371         std::vector<signed char>   bytesvalues;
00372         std::vector<std::string>   ubytesnames; 
00373         std::vector<unsigned char> ubytesvalues;
00374         std::vector<std::string>   shortsnames; 
00375         std::vector<int16_t>       shortsvalues;
00376         std::vector<std::string>   ushortsnames; 
00377         std::vector<uint16_t>      ushortsvalues;
00378         std::vector<std::string>   intsnames; 
00379         std::vector<int32_t>       intsvalues;
00380         std::vector<std::string>   uintsnames; 
00381         std::vector<uint32_t>      uintsvalues;
00382         std::vector<std::string>   longsnames; 
00383         std::vector<int64_t>       longsvalues;
00384         std::vector<std::string>   ulongsnames; 
00385         std::vector<uint64_t>      ulongsvalues;
00386         std::vector<std::string>   floatsnames; 
00387         std::vector<float>         floatsvalues;
00388         std::vector<std::string>   doublesnames; 
00389         std::vector<double>        doublesvalues;
00390         std::vector<std::string>   catsnames; 
00391         std::vector<std::string>   catsvalues;
00392         std::vector<std::string>   textsnames; 
00393         std::vector<std::string>   textsvalues;
00394         std::vector<std::string>   blobsnames; 
00395         std::vector<std::string>   blobsvalues;
00396 
00398         void clear();
00400         void clearValues();
00402         uint32_t nColumns() const {
00403             return bytesvalues.size() + ubytesvalues.size() +
00404                 shortsvalues.size() + ushortsvalues.size() +
00405                 intsvalues.size() + uintsvalues.size() +
00406                 longsvalues.size() + ulongsvalues.size() +
00407                 floatsvalues.size() + doublesvalues.size() +
00408                 catsvalues.size() + textsvalues.size() + blobsvalues.size();}
00409     }; // struct row
00410 
00411     // Cursor class for row-wise data accesses.
00412     class cursor;
00414     virtual cursor* createCursor() const =0;
00415 
00416     static void parseNames(char* in, stringList& out);
00417     static void parseOrderby(char* in, stringList& out,
00418                              std::vector<bool>& direc);
00419 
00420     static void* allocateBuffer(ibis::TYPE_T, size_t);
00421     static void freeBuffer(void* buffer, ibis::TYPE_T type);
00422     static void freeBuffers(bufferList&, typeList&);
00423 
00424 protected:
00425 
00426     std::string name_;  
00427     std::string desc_;  
00428 
00430     table() {};
00432     table(const char* na, const char* de)
00433         : name_(na?na:""), desc_(de?de:na?na:"") {};
00434 
00435 private:
00436     // re-enforce the prohibitions on copying and assignment.
00437     table(const table&);
00438     table& operator=(const table&);
00439 }; // class ibis::table
00440 
00453 class FASTBIT_CXX_DLLSPEC ibis::tablex {
00454 public:
00456     static ibis::tablex* create();
00457 //     /// Make the incoming table expandable.  Not yet implemented
00458 //     static ibis::tablex* makeExtensible(ibis::table* t);
00459 
00460     virtual ~tablex() {}; // nothing to do.
00461 
00463     virtual int addColumn(const char* cname, ibis::TYPE_T ctype,
00464                           const char* cdesc=0, const char* idx=0) =0;
00465 
00486     virtual int append(const char* cname, uint64_t begin, uint64_t end,
00487                        void* values) =0;
00488 
00518     virtual int appendRow(const ibis::table::row&) =0;
00524     virtual int appendRow(const char* line, const char* delimiters=0) = 0;
00532     virtual int appendRows(const std::vector<ibis::table::row>&) =0;
00533 
00556     virtual int readCSV(const char* inputfile, int maxrows=0,
00557                         const char* outputdir=0, const char* delimiters=0) =0;
00571     virtual int readSQLDump(const char* inputfile, std::string& tname,
00572                             int maxrows=0, const char* outputdir=0) =0;
00573 
00575     virtual int readNamesAndTypes(const char* filename);
00577     virtual int parseNamesAndTypes(const char* txt);
00578 
00616     virtual int write(const char* dir, const char* tname=0,
00617                       const char* tdesc=0, const char* idx=0,
00618                       const char* nvpairs=0) const =0;
00632     virtual int writeMetaData(const char* dir, const char* tname=0,
00633                               const char* tdesc=0, const char* idx=0,
00634                               const char* nvpairs=0) const =0;
00635 
00639     virtual void clearData() =0;
00651     virtual int32_t reserveSpace(uint32_t) {return 0;}
00661     virtual uint32_t capacity() const {return 0;}
00662 
00664     virtual uint32_t mRows() const =0;
00666     virtual uint32_t mColumns() const =0;
00668     virtual void describe(std::ostream&) const =0;
00669 
00675     virtual table* toTable(const char* nm=0, const char* de=0) =0;
00676 
00677 protected:
00678     tablex() {}; // Derived classes need this.
00679 
00680 private:
00681     tablex(const tablex&); // no copying
00682     tablex& operator=(const tablex&); // no assignment
00683 }; // class ibis::tablex
00684 
00688 class FASTBIT_CXX_DLLSPEC ibis::tableList {
00689 public:
00690     typedef std::map< const char*, ibis::table*, ibis::lessi > tableSet;
00691     typedef tableSet::const_iterator iterator;
00692 
00695     bool empty() const {return tables.empty();}
00697     uint32_t size() const {return tables.size();}
00699     iterator begin() const {return tables.begin();}
00703     iterator end() const {return tables.end();}
00704 
00707     const ibis::table* operator[](const char* tname) const {
00708         tableSet::const_iterator it = tables.find(tname);
00709         if (it != tables.end())
00710             return (*it).second;
00711         else
00712             return 0;
00713     }
00714 
00720     void add(ibis::table*& tb) {
00721         tableSet::iterator it = tables.find(tb->name());
00722         if (it == tables.end()) {
00723             tables[tb->name()] = tb;
00724             tb=0;
00725         }
00726         else {
00727             ibis::table* tmp = (*it).second;
00728             tables[tb->name()] = tb;
00729             tb = tmp;
00730         }
00731     }
00732 
00736     void remove(const char* tname) {
00737         tableSet::iterator it = tables.find(tname);
00738         if (it != tables.end()) {
00739             ibis::table* tmp = (*it).second;
00740             tables.erase(it);
00741             delete tmp;
00742         }
00743     }
00744 
00746     tableList() {};
00747 
00749     ~tableList() {
00750         while (! tables.empty()) {
00751             tableSet::iterator it = tables.begin();
00752             ibis::table* tmp = (*it).second;
00753             tables.erase(it);
00754             delete tmp;
00755         }
00756     }
00757 
00758 private:
00760     tableSet tables;
00761 
00762     // Can not copy or assign.
00763     tableList(const tableList&);
00764     tableList& operator=(const tableList&);
00765 }; // ibis::tableList
00766 
00772 class FASTBIT_CXX_DLLSPEC ibis::table::cursor {
00773 public:
00774     virtual ~cursor() {};
00775     virtual uint64_t nRows() const =0;
00776     virtual uint32_t nColumns() const =0;
00777     virtual ibis::table::typeList columnTypes() const =0;
00778     virtual ibis::table::stringList columnNames() const =0;
00781     virtual int fetch() =0;
00785     virtual int fetch(uint64_t rownum) =0;
00790     virtual uint64_t getCurrentRowNumber() const =0;
00791 
00794     virtual int fetch(ibis::table::row&) =0;
00797     virtual int fetch(uint64_t rownum, ibis::table::row&) =0;
00798 
00800     virtual int dump(std::ostream& out, const char* del=", ") const =0;
00801 
00805     virtual int getColumnAsByte(const char* cname, char&) const =0;
00806     virtual int getColumnAsUByte(const char* cname, unsigned char&) const =0;
00807     virtual int getColumnAsShort(const char* cname, int16_t&) const =0;
00808     virtual int getColumnAsUShort(const char* cname, uint16_t&) const =0;
00809     virtual int getColumnAsInt(const char* cname, int32_t&) const =0;
00810     virtual int getColumnAsUInt(const char* cname, uint32_t&) const =0;
00811     virtual int getColumnAsLong(const char* cname, int64_t&) const =0;
00812     virtual int getColumnAsULong(const char* cname, uint64_t&) const =0;
00813     virtual int getColumnAsFloat(const char* cname, float&) const =0;
00814     virtual int getColumnAsDouble(const char* cname, double&) const =0;
00815     virtual int getColumnAsString(const char* cname, std::string&) const =0;
00816 
00822     virtual int getColumnAsByte(uint32_t cnum, char& val) const =0;
00823     virtual int getColumnAsUByte(uint32_t cnum, unsigned char& val) const =0;
00824     virtual int getColumnAsShort(uint32_t cnum, int16_t& val) const =0;
00825     virtual int getColumnAsUShort(uint32_t cnum, uint16_t& val) const =0;
00826     virtual int getColumnAsInt(uint32_t cnum, int32_t& val) const =0;
00827     virtual int getColumnAsUInt(uint32_t cnum, uint32_t& val) const =0;
00828     virtual int getColumnAsLong(uint32_t cnum, int64_t& val) const =0;
00829     virtual int getColumnAsULong(uint32_t cnum, uint64_t& val) const =0;
00830     virtual int getColumnAsFloat(uint32_t cnum, float& val) const =0;
00831     virtual int getColumnAsDouble(uint32_t cnum, double& val) const =0;
00832     virtual int getColumnAsString(uint32_t cnum, std::string& val) const =0;
00833 
00834 protected:
00835     cursor() {};
00836     cursor(const cursor&); // not implemented
00837     cursor& operator=(const cursor&) ; // not implemented
00838 }; // ibis::table::cursor
00839 
00840 inline void ibis::table::row::clear() {
00841     bytesnames.clear();
00842     bytesvalues.clear();
00843     ubytesnames.clear();
00844     ubytesvalues.clear();
00845     shortsnames.clear();
00846     shortsvalues.clear();
00847     ushortsnames.clear();
00848     ushortsvalues.clear();
00849     intsnames.clear();
00850     intsvalues.clear();
00851     uintsnames.clear();
00852     uintsvalues.clear();
00853     longsnames.clear();
00854     longsvalues.clear();
00855     ulongsnames.clear();
00856     ulongsvalues.clear();
00857     floatsnames.clear();
00858     floatsvalues.clear();
00859     doublesnames.clear();
00860     doublesvalues.clear();
00861     catsnames.clear();
00862     catsvalues.clear();
00863     textsnames.clear();
00864     textsvalues.clear();
00865     blobsnames.clear();
00866     blobsvalues.clear();
00867 } // ibis::table::row::clear
00868 
00869 inline void ibis::table::row::clearValues() {
00870     bytesvalues.clear();
00871     ubytesvalues.clear();
00872     shortsvalues.clear();
00873     ushortsvalues.clear();
00874     intsvalues.clear();
00875     uintsvalues.clear();
00876     longsvalues.clear();
00877     ulongsvalues.clear();
00878     floatsvalues.clear();
00879     doublesvalues.clear();
00880     catsvalues.clear();
00881     textsvalues.clear();
00882     blobsvalues.clear();
00883 } // ibis::table::row::clearValues
00884 #endif // IBIS_TABLE_H

Make It A Bit Faster
Contact us
Disclaimers
FastBit source code
FastBit mailing list archive