Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

InvDocList.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 #ifndef _INVDOCLIST_HPP
00014 #define _INVDOCLIST_HPP
00015 
00016 /*
00017  * NAME DATE - COMMENTS
00018  * tnt 03/2001 - created
00019  *
00020  *========================================================================*/
00021 #include <cmath>
00022 #include "InvFPTypes.hpp"
00023 #include "common_headers.hpp"
00024 #include "DocInfoList.hpp"
00025 #include "MemCache.hpp"
00026 #include "RVLCompress.hpp"
00027 
00028 extern "C" {
00029 #include <cstdio>
00030 }
00031 
00032 #define DEFAULT 9
00033 namespace lemur 
00034 {
00035   namespace index 
00036   {
00037     
00038     class InvDocList: public lemur::api::DocInfoList {
00039     public:
00040       InvDocList();
00041 
00044       InvDocList(lemur::api::TERMID_T id, int len);
00046       InvDocList(lemur::utility::MemCache* mc, lemur::api::TERMID_T id, int len);  
00047       InvDocList(lemur::utility::MemCache* mc, lemur::api::TERMID_T id, int len, 
00048                  lemur::api::DOCID_T docid, lemur::api::LOC_T location);
00050       InvDocList(lemur::api::TERMID_T id, int listlen, 
00051                  lemur::api::LOC_T* list, int fr, 
00052                  lemur::api::DOCID_T* ldocid, int len);
00053       ~InvDocList();
00054 
00060       void setList(lemur::api::TERMID_T id, int listlen, 
00061                    lemur::api::LOC_T* list, int fr, 
00062                    lemur::api::DOCID_T* ldocid=NULL, int len=0);
00063 
00067       void setListSafe(lemur::api::TERMID_T id, int listlen, 
00068                        lemur::api::LOC_T* list, int fr, 
00069                        lemur::api::DOCID_T* ldocid, int len);
00070 
00074       void reset();
00075 
00078       void resetFree();
00079 
00080       bool allocMem();
00081       bool hasNoMem();
00082 
00084       virtual bool addTerm(lemur::api::DOCID_T docid);
00085 
00087       virtual bool append(InvDocList* tail);
00088 
00089       virtual void startIteration() const;
00090       virtual bool hasMore() const;
00091       virtual lemur::api::DocInfo* nextEntry() const;
00092       virtual void nextEntry(lemur::api::DocInfo* info) const;
00093 
00094       lemur::api::DOCID_T curDocID() const{ 
00095         if (lastid == NULL) return -1; return *lastid; 
00096       };
00097       lemur::api::COUNT_T docFreq() const{ return df; };
00098       int length() const{ return end-begin; };
00099       lemur::api::TERMID_T termID() const{ return uid; };
00100       int termLen() const{ return strlength; };
00101       virtual lemur::api::COUNT_T termCTF() const;
00102       int curDocIDdiff() const{ return lastid-begin; };
00103       int curDocIDtf() const{ return *(lastid+1); };
00104       int memorySize() const{ return size; };
00105 
00107       void binWrite(ofstream& of);
00108 
00110       bool binRead(ifstream& inf);
00111 
00113       void binWriteC(ofstream& of);
00114 
00116       bool binReadC(ifstream& inf);
00117 
00118     protected:
00119       // Helper functions for iterator, subclasses should override
00121       virtual lemur::api::DocInfo* getElement(lemur::api::DocInfo* elem, 
00122                                               lemur::api::POS_T position) const;
00124       virtual lemur::api::POS_T beginPosition() const { return (lemur::api::POS_T) 0; }
00126       virtual lemur::api::POS_T endPosition() const { return (lemur::api::POS_T) (end - begin); }
00128       virtual lemur::api::POS_T nextPosition(lemur::api::POS_T position) const;
00129 
00133       bool getMoreMem();
00134       int logb2(int num);
00135 
00138       virtual void deltaEncode();
00139 
00142       virtual void deltaDecode();
00143 
00144       // Use LOC_T* for TERMID/DOCID/COUNT/LOC.
00145       lemur::api::LOC_T* begin;         // pointer to the beginning of this list
00146       lemur::api::LOC_T* lastid;        // pointer to the most recent DocID added
00147       lemur::api::LOC_T* freq;          // pointer to the frequency of the last DocID
00148       lemur::api::LOC_T * end;            // pointer to the next free memory
00149       mutable lemur::api::LOC_T* iter;    // pointer tells us where we are in iteration
00150       int  size;                // how big are we, increment in powers of 2, start at 16K
00151       int  LOC_Tsize;   // sizeof(LOC_T) value
00152       int  strlength;       // the character length of our corresponding string
00153       lemur::api::TERMID_T  uid;                          // a unique ID for our string
00154       lemur::api::COUNT_T  df;                    // the document frequency for current term
00155       lemur::utility::MemCache* cache;      // the cache to get memory from
00156       bool hascache;        // remember if we have our own cache
00157 
00158       bool READ_ONLY;    // flag for whether this list can be added
00159     private:
00160       mutable lemur::api::DocInfo entry;
00161     };
00162   }
00163 }
00164 
00165 #endif

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4