Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

HashTable.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // HashTable
00015 //
00016 // 9 January 2004 - tds
00017 //
00018 
00019 #ifndef LEMUR_HASHTABLE_HPP
00020 #define LEMUR_HASHTABLE_HPP
00021 
00022 #include <utility>
00023 #include "indri/RegionAllocator.hpp"
00024 namespace indri
00025 {
00026   namespace utility
00027   {
00028     
00029     //
00030     // GenericHash<_Key>
00031     //
00032 
00033     template<class _Key>
00034     class GenericHash { 
00035     public:
00036       size_t operator() ( const _Key& k ) const {
00037         return (size_t) k;
00038       }
00039     };
00040 
00041     //
00042     // GenericComparator<_Key>
00043     //
00044 
00045     template<class _Key>
00046     class GenericComparator {
00047     public:
00048       size_t operator() ( const _Key& one, const _Key& two ) const {
00049         return (size_t) (one - two);
00050       }
00051     };
00052 
00053     //
00054     // GenericHash<const char *>
00055     //
00056 
00057     template<>
00058     class GenericHash<const char*> {
00059     public:
00060       size_t operator() ( const char* const& kp ) const {
00061         // attributed to Dan Bernstein, comp.lang.c 
00062         size_t hash = 5381;
00063         const char* k = kp;
00064         char c;
00065 
00066         for( ; c = *k; k++ ){
00067           hash = ((hash << 5) + hash) + c;
00068         }
00069 
00070         return hash;
00071       }
00072     };
00073 
00074     //
00075     // GenericComparator<const char *>
00076     //
00077 
00078     template<>
00079     class GenericComparator<const char*> {
00080     public:
00081       size_t operator () ( const char* const& one, const char* const& two ) const {
00082         return strcmp( one, two );
00083       }
00084     };
00085 
00086     //
00087     // GenericHash<std::string>
00088     // (formerly StringHash from TaggedTextParser.hpp)
00089     //
00090 
00091     template<>
00092     class GenericHash<std::string> {
00093     public:
00094       size_t operator() ( const std::string& key ) const {
00095         GenericHash<const char*> charHash;
00096         return charHash( key.c_str() );
00097       }
00098     };
00099 
00100     //
00101     // GenericComparator<std::string>
00102     // (formerly StringComparator from TaggedTextParser.hpp)
00103     //
00104 
00105     template<>
00106     class GenericComparator<std::string> {
00107     public:
00108       size_t operator() ( const std::string& one, const std::string& two ) const {
00109         return one.compare(two);
00110       }
00111     };
00112 
00113     //
00114     // HashBucket<_Key, _Value>
00115     //
00116 
00117     template<class _Key, class _Value>
00118     struct HashBucket {
00119       _Key key;
00120       _Value value;
00121       HashBucket<_Key, _Value>* next;
00122 
00123       HashBucket() : next( 0 ) {};
00124       HashBucket( const _Key& k, HashBucket<_Key, _Value>* n ) : key(k), next(n) {};
00125       HashBucket( const _Key& k, const _Value& v, HashBucket<_Key, _Value>* n ) : key(k), value(v), next(n) {};
00126     };
00127 
00128     //
00129     // HashTableIterator<_Key, _Value, _Comparator>
00130     //
00131 
00132     template<class _Key, class _Value, class _Comparator>
00133     class HashTableIterator {
00134     private:
00135       typedef HashBucket<_Key, _Value> bucket_type;
00136       bucket_type** _table;
00137       bucket_type* _currentEntry;
00138       size_t _currentBucket;
00139       size_t _totalBuckets;
00140       std::pair<_Key*, _Value*> _pair;
00141 
00142       void next() {
00143         // already at end
00144         if( _currentBucket == (size_t)-1 )
00145           return;
00146 
00147         // in a chain with more entries left
00148         if( _currentEntry && _currentEntry->next != 0 ) {
00149           _currentEntry = _currentEntry->next;
00150           return;
00151         }
00152 
00153         if( _currentEntry )
00154           _currentBucket++;
00155 
00156         for( ; _currentBucket < _totalBuckets; _currentBucket++ ) {
00157           _currentEntry = _table[_currentBucket];
00158 
00159           if( _currentEntry ) {
00160             return;
00161           }
00162         }
00163 
00164         // none left
00165         _currentEntry = 0;
00166         _currentBucket = (size_t)-1;
00167       }
00168 
00169     public:
00170       HashTableIterator() {
00171         _currentBucket = (size_t)-1;
00172         _currentEntry = 0;
00173       }
00174 
00175       HashTableIterator( bucket_type** table, size_t totalBuckets ) {
00176         _table = table;
00177         _totalBuckets = totalBuckets;
00178 
00179         _currentBucket = 0;
00180         _currentEntry = 0;
00181         next();
00182       }
00183 
00184       bool operator == ( const HashTableIterator& other ) {
00185         if( other._currentEntry == _currentEntry &&
00186             other._currentBucket == _currentBucket )
00187           return true;
00188         return false;
00189       }
00190 
00191       bool operator != ( const HashTableIterator& other ) {
00192         if( other._currentEntry == _currentEntry &&
00193             other._currentBucket == _currentBucket )
00194           return false;
00195         return true;
00196       }
00197 
00198       void operator++ ( int ) {
00199         next();
00200       }
00201 
00202       std::pair<_Key*, _Value*>& operator* () {
00203         _pair.first = &_currentEntry->key;
00204         _pair.second = &_currentEntry->value;
00205         return _pair;
00206       }
00207 
00208       std::pair<_Key*, _Value*>* operator-> () {
00209         return &(*(*this));
00210       }
00211     };
00212 
00213     template<class _Key, class _Value, class _HashFunction = GenericHash<_Key>, class _Comparator = GenericComparator<_Key> >
00214     class HashTable {
00215     public:
00216       friend class HashTableIterator<_Key, _Value, _Comparator>;
00217 
00218       typedef HashBucket<_Key, _Value> bucket_type;
00219       typedef _Key key_type;
00220       typedef _Value value_type;
00221       typedef _HashFunction hash_type;
00222       typedef _Comparator compare_type;
00223       typedef class HashTableIterator<_Key, _Value, _Comparator> iterator;
00224 
00225     private:
00226       indri::utility::RegionAllocator* _allocator;
00227       bucket_type** _table;
00228       hash_type _hash;
00229       compare_type _compare;
00230       size_t _buckets;
00231       iterator _end;
00232       size_t _count;
00233 
00234       void _deleteBucket( bucket_type* b ) {
00235         if( !_allocator )
00236           delete b;
00237       }
00238 
00239       bucket_type* _newBucket( const _Key& k, bucket_type* p ) {
00240         bucket_type* b = 0;
00241 
00242         if( _allocator ) {
00243           b = (bucket_type*) _allocator->allocate( sizeof (bucket_type) );
00244           new(b) bucket_type( k, p );
00245         } else {
00246           b = new bucket_type( k, p );
00247         }
00248 
00249         return b;
00250       }
00251 
00252       bucket_type* _newBucket( const _Key& k, const _Value& v, bucket_type* p ) {
00253         bucket_type* b = 0;
00254 
00255         if( _allocator ) {
00256           b = (bucket_type*) _allocator->allocate( sizeof (bucket_type) );
00257           new(b) bucket_type( k, v, p );
00258         } else {
00259           b = new bucket_type( k, v, p );
00260         }
00261 
00262         return b;
00263       }
00264 
00265       bucket_type** _parentBucket( const _Key& k ) const {
00266         size_t index = _hash(k) % _buckets;
00267         return &_table[index];
00268       }
00269 
00270     public:
00271       HashTable( size_t size = 16384, indri::utility::RegionAllocator* allocator = 0 ) {
00272         _allocator = allocator;
00273         _buckets = size / sizeof(bucket_type*);
00274         _table = reinterpret_cast<bucket_type**>(new char[_buckets * sizeof(bucket_type*)]);
00275         _count = 0;
00276     
00277         memset( _table, 0, _buckets * sizeof(bucket_type*) );
00278       }
00279 
00280       ~HashTable() {
00281         clear();
00282         delete[] reinterpret_cast<char*>(_table);
00283       }
00284 
00285       _Value* find( const _Key& k ) const {
00286         bucket_type* bucket = *_parentBucket(k);
00287 
00288         for( ; bucket; bucket = bucket->next ) {
00289           if( _compare( k, bucket->key ) == 0 ) {
00290             return &bucket->value;
00291           }
00292         }
00293 
00294         return 0;
00295       }
00296 
00297       _Value* insert( const _Key& k ) {
00298         bucket_type** bucket = _parentBucket(k);
00299         _count++;
00300 
00301         // go to the end of the chain
00302         while( *bucket )
00303         bucket = &(*bucket)->next;
00304 
00305         // insert a new item
00306         bucket_type* newItem = _newBucket( k, 0 );
00307         *bucket = newItem;
00308 
00309         return &newItem->value;
00310       }
00311 
00312       _Value* insert( const _Key& k, const _Value& v ) {
00313         bucket_type** bucket = _parentBucket(k);
00314         _count++;
00315 
00316         // go to the end of the chain
00317         while( *bucket )
00318         bucket = &(*bucket)->next;
00319 
00320         // insert a new item
00321         bucket_type* newItem = _newBucket( k, v, 0 );
00322         *bucket = newItem;
00323 
00324         return &newItem->value;
00325       }
00326 
00327       void remove( const _Key& k ) {
00328         bucket_type** bucket = _parentBucket(k);
00329 
00330         while( *bucket ) {
00331           if( _compare( k, (*bucket)->key ) == 0 ) {
00332             bucket_type* nextItem = (*bucket)->next;
00333             bucket_type* thisItem = (*bucket);
00334 
00335             *bucket = nextItem;
00336             _deleteBucket( thisItem );
00337             _count--;
00338             break;
00339           }
00340 
00341           bucket = &(*bucket)->next;
00342         }
00343       }
00344 
00345       void clear() {
00346         if( _allocator ) {
00347           memset( _table, 0, sizeof(bucket_type*) * _buckets );
00348         } else {
00349           for( size_t i=0; i<_buckets; i++ ) {
00350             bucket_type* item = _table[i];
00351 
00352             while( item ) {
00353               bucket_type* nextItem = item->next;
00354               _deleteBucket( item );
00355               item = nextItem;
00356             }
00357 
00358             _table[i] = 0;
00359           }
00360         }
00361 
00362         _count = 0;
00363       }
00364 
00365       const iterator& end() {
00366         return _end;
00367       }
00368 
00369       iterator begin() {
00370         return iterator( _table, _buckets );
00371       }
00372 
00373       size_t size() {
00374         return _count;
00375       }
00376     };
00377   }
00378 }
00379 
00380 #endif // LEMUR_HASHTABLE_HPP

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4