Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ISet.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 // -------------------------------------------------------------------
00014 // ISet.hpp (set with index)
00015 // ALB
00016 // Template set class derived from PSet.hpp
00017 //    . retrieve objects in the order in which they were inserted 
00018 //      via operator[int]     
00019 //   . growable
00020 // -------------------------------------------------------------------
00021 
00022 #ifndef _ISETH_
00023 #define _ISETH_
00024 
00025 #include "PSet.hpp"
00026 
00027 namespace lemur
00028 {
00029   namespace utility
00030   {
00031     
00032     template <class ObjType>
00033     class ISet : public PSet<ObjType>
00034     {
00035     public:
00036       ISet(): PSet<ObjType>(), index(0) {}
00037       ISet(const int maxSize_p): index(0) { open(maxSize_p); }
00038       ~ISet() { close(); }  
00039 
00040       void open(const int maxSize_p) {
00041         PSet<ObjType>::open(maxSize_p);
00042         index = new typename PSet<ObjType>::SET_NODE* [this->maxSize+1];
00043         memset(index, 0, (this->maxSize+1)*sizeof(typename PSet<ObjType>::SET_NODE*));
00044       }
00045   
00046       void close() {
00047         PSet<ObjType>::close();
00048         delete [] index; 
00049         index=0;
00050       }
00051 
00052       void clear() {
00053         if (this->maxSize==0) return;
00054         close();
00055         open(this->maxSize);
00056       }
00057 
00058       int size() const { return this->currentSize; }
00059 
00060       int add(const ObjType& u) {
00061         typename PSet<ObjType>::SET_NODE *sn = PSet<ObjType>::internalAdd(u);
00062         if (sn==0) return -1;
00063         index[sn->idx] = sn;
00064         int retval = sn->idx;
00065         if (++this->currentSize > this->maxSize) grow((int) (this->currentSize*GROW_FACTOR+1));
00066         return retval; // grow may void sn
00067       }
00068 
00069       int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set
00070         const int idx = internalRemove(u);
00071         if (idx==-1) return 0;                 // not a member
00072         this->currentSize--;
00073         return 1;                              // was a member (not anymore)
00074       }
00075 
00076       int operator+=(const ObjType& u)  // add an elt to set: returns 1 if added. 
00077       { return add(u); }
00078   
00079       int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed.
00080       { return remove(u); }
00081 
00082       // NB: When user removes elts. from set, the set is re-indexed, so
00083       // what is the n'th elt. now may be the n-m'th elt. sometime later
00084       ObjType& operator[](const int idx) const {   // get n'th elt
00085         assert(idx<this->currentSize);
00086         return index[idx]->u; 
00087       }
00088   
00089       int operator[](const ObjType& u) const {    // get idx of u, -1 if not there
00090         int hashval = computeHash(u);    
00091         typename PSet<ObjType>::SET_NODE *p = this->hashTable[hashval];
00092         while(p!=0 && !(p->u==u)) p=p->next;
00093         return ((p==0)? -1: p->idx);
00094       }
00095   
00096       void grow(const int newSize) {
00097         this->maxSize = newSize;
00098         this->hashTableSize = this->smallestPrimeGreaterThan((int) (this->maxSize*SPARSENESS));
00099         typename PSet<ObjType>::SET_NODE **newIndex = new typename PSet<ObjType>::SET_NODE* [this->maxSize+1];
00100         typename PSet<ObjType>::SET_NODE **newHashTable = new typename PSet<ObjType>::SET_NODE* [this->hashTableSize];
00101         memset(newHashTable, 0, this->hashTableSize*sizeof(typename PSet<ObjType>::SET_NODE *));
00102         for (int i=0; i<this->currentSize; i++) {
00103           typename PSet<ObjType>::SET_NODE *sn = index[i];
00104           const int hashval = computeHash(sn->u);
00105           typename PSet<ObjType>::SET_NODE *snNew = createNode(sn->u);
00106           snNew->idx = i;
00107           snNew->next = newHashTable[hashval];
00108           newHashTable[hashval] = snNew;
00109           deleteNode(sn);
00110           newIndex[i] = snNew;
00111         }
00112         delete [] index;
00113         delete [] this->hashTable;
00114         index = newIndex;
00115         this->hashTable = newHashTable;
00116       }
00117  
00118     protected:
00119       int internalRemove(const ObjType &u) {
00120         // Return idx of removed node. For efficiency, we re-index the set
00121         // so that what once was the last member now is the idx'th member.
00122         // (rather than renumbering the entire set starting from idx)
00123         int idx=PSet<ObjType>::internalRemove(u);
00124         if (idx==-1) return -1;
00125         index[idx] = index[this->currentSize-1];
00126         index[idx]->idx = idx;
00127         index[this->currentSize-1] = 0;
00128         return idx;
00129       }
00130 
00131       int internalRemove(const ObjType &u, const int idx) {
00132         PSet<ObjType>::internalRemove(u);
00133         index[idx] = index[this->currentSize-1];
00134         if (index[idx]->idx != -2) index[idx]->idx=idx;
00135         index[this->currentSize-1] = 0;
00136         return idx;
00137       }
00138 
00139     protected:
00140       typename PSet<ObjType>::SET_NODE* (*index);              // goes from [dense idx] -> node
00141     };
00142   }
00143 }
00144 
00145 #endif

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4