Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

PSet.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 // -------------------------------------------------------------------
00014 // PSet.hpp (Plain set)
00015 // ALB
00016 //
00017 // Template set class (base class of family of set classes)
00018 //    . add objects using operator+() or add()
00019 //    . remove objects using operator-() or remove()
00020 //    . query for the presence of an object in set via operator[object]
00021 //    . if user specifies no removals will occur, uses (efficient) block memory
00022 //
00023 // Required member functions of template ObjType:
00024 //     hash(): returns a 32-bit hash of object
00025 //         ==: returns 1 if objects are equal
00026 //          =: assignment operator
00027 // 
00028 // Note also: ISet (set with index)
00029 // -------------------------------------------------------------------
00030 
00031 #ifndef _PSETH_
00032 #define _PSETH_
00033 
00034 #include "common_headers.hpp"
00035 #include <cmath>
00036 #include <memory.h>
00037 #include <cassert>
00038 namespace lemur 
00039 {
00040   namespace utility 
00041   {
00042     
00043     static const float SPARSENESS = 1.5;
00044     static const float GROW_FACTOR = 2.0;
00045 
00046 
00047     template <class ObjType>
00048     class PSet
00049     {
00050     protected:
00051       struct SET_NODE {
00052         ObjType          u;
00053         int              idx;           // used in derived classes, but not here
00054         struct SET_NODE *next;
00055       } ;
00056     
00057     public:
00058       PSet(): currentSize(0),maxSize(0),hashTable(0) {}
00059       PSet(const int maxSize_p) : currentSize(0),hashTable(0) { open(maxSize_p); }
00060       ~PSet() { close(); }
00061   
00062       const int size()    const { return currentSize; }
00063       const int maxsize() const { return maxSize;     }
00064 
00065       void open(const int maxSize_p) {
00066         assert(hashTable==0);   // cannot call open() twice!
00067         assert(maxSize_p>0); 
00068         maxSize = maxSize_p;
00069         if (maxSize<2) maxSize=2;
00070         hashTableSize = smallestPrimeGreaterThan((int) (maxSize*SPARSENESS));
00071         hashTable = new SET_NODE * [hashTableSize];
00072         memset(hashTable, 0, hashTableSize*sizeof(SET_NODE *));
00073         setNodeSize = sizeof(SET_NODE);
00074       }
00075   
00076       void close() {
00077         if (hashTable) {
00078           for (int i=0; i<hashTableSize; i++) {
00079             SET_NODE *node=hashTable[i]; 
00080             while (node) {
00081               SET_NODE *next = node->next; 
00082               delete node;
00083               node = next;
00084             }
00085           }
00086           delete [] hashTable; 
00087         }
00088         hashTable=0;
00089         currentSize=0;
00090       }
00091   
00092       void clear() {
00093         if (maxSize==0) return;
00094         close();
00095         open(maxSize);
00096       }
00097   
00098       int add(const ObjType& u) {
00099         SET_NODE *sn = internalAdd(u);
00100         if (sn==0) return -1;                       // already a member
00101         assert(currentSize++<=maxSize);   
00102         return sn->idx;                            // new member
00103       }
00104 
00105       int remove(const ObjType& u) { // remove u from set: returns 1 iff u was in set
00106         const int idx = internalRemove(u);
00107         if (idx==-1) return 0;                 // not a member
00108         currentSize--;
00109         return 1;                              // was a member (not anymore)
00110       }
00111   
00112       int operator+=(const ObjType& u)  // add an elt to set: returns 1 if added. 
00113       { return add(u); }
00114   
00115       int operator-=(const ObjType& u)// remove elt from set: returns 1 if removed.
00116       { return remove(u); }
00117   
00118       const ObjType &operator[](const int idx) const {
00119         int a;
00120         a=idx; // suppress compiler warnings about not using idx
00121         cerr <<"PSet: operator[] (int) not supported!"<<endl;
00122         abort();
00123       }
00124 
00125       int operator[] (const ObjType& u) const { // returns +1 if found in set, -1 o/w
00126         int hashval = computeHash(u);
00127         SET_NODE *p = hashTable[hashval];
00128         while(p!=0 && !(p->u==u)) p=p->next;
00129         return ((p==0)? -1: 1);
00130       }
00131   
00132 
00133     protected:
00134       SET_NODE *internalAdd(const ObjType &u) {
00135         int hashval = computeHash(u);
00136 
00137         // look for this object in the appropriate linked list
00138         for (SET_NODE* p=hashTable[hashval]; p!=0; p=p->next)
00139           if (p->u==u) return 0;                 // already a member
00140     
00141         // create new node and stick at head of linked list
00142         SET_NODE *sn = createNode(u);
00143         sn->idx  = currentSize;
00144         sn->next = hashTable[hashval];
00145         hashTable[hashval] = sn;    
00146         return sn;
00147       }
00148   
00149       int internalRemove(const ObjType &u) {
00150         // returns index of extracted object, or -1 if not there.
00151         int hashval = computeHash(u);
00152         SET_NODE* sn = hashTable[hashval];
00153         SET_NODE** pLast = &(hashTable[hashval]);
00154         while (sn) {
00155           if (sn->u == u) {
00156             int idx = sn->idx;
00157             *pLast = sn->next;
00158             deleteNode(sn);
00159             return idx;
00160           }
00161           pLast = &(sn->next);
00162           sn = sn->next;
00163         }
00164         return -1;
00165       }
00166   
00167       long smallestPrimeGreaterThan(const int n) {
00168         for (int i=n+1;; i++) {   
00169           // look for a j<sqrt(i) which divides evenly into i
00170           int s = (int) sqrt((float) i);
00171           int j;
00172           for (j=2; j<=s; j++)
00173             if ((i % j)==0) break; 
00174           if (j>s) return i;
00175         }
00176       }
00177   
00178     protected:
00179       int computeHash(const ObjType &u) const {
00180         int h = u.hash() % hashTableSize;
00181         if (h<0) h*=-1;
00182         return h;
00183       }
00184   
00185       // memory management of nodes
00186       SET_NODE *createNode(const ObjType &u) {
00187         SET_NODE *n = new SET_NODE;
00188         n->u = u;              
00189         return n;
00190       }
00191   
00192       void deleteNode(SET_NODE *node) { delete node; }
00193   
00194     protected:
00195       int currentSize;                    // # of elts currently in set 
00196       int maxSize;                        // # of elts allowable in set 
00197       int hashTableSize;              // size of hash table containing set  
00198       SET_NODE** hashTable;
00199       int setNodeSize;
00200     };
00201   }
00202 }
00203 
00204 #endif
00205 
00206 
00207 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4