Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

InvFPTermList.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 /*
00013   10/18/2002 -- dmf Add binReadC, binWriteC, deltaDecode, and deltEncode
00014   for compression of TermInfoLists.
00015 */
00016 
00017 #ifndef _INVFPTERMLIST_HPP
00018 #define _INVFPTERMLIST_HPP
00019 
00020 #include "common_headers.hpp"
00021 #include "InvFPTerm.hpp"
00022 #include "InvFPTypes.hpp"
00023 #include "RVLCompress.hpp"
00024 
00025 namespace lemur 
00026 {
00027   namespace file 
00028   {
00029     class File;
00030   }
00031 }
00032 
00033 namespace lemur 
00034 {
00035   namespace index 
00036   {
00042     class InvFPTermList : public lemur::api::TermInfoList {
00043     public:
00044       InvFPTermList();
00045       InvFPTermList(lemur::api::DOCID_T did, int len, vector<LocatedTerm> &tls);
00046       ~InvFPTermList();
00047 
00049       void startIteration() const;
00050 
00052       bool hasMore() const;
00053 
00055       lemur::api::TermInfo *nextEntry() const;
00056 
00058       virtual int size();
00059 
00062       virtual lemur::api::TermInfo* operator[](int index) const;
00063 
00065       lemur::api::COUNT_T docLength() const{ return length; }
00066 
00068       lemur::api::COUNT_T termCount()  const{ return listlen; }
00069 
00071       lemur::api::DOCID_T docID()  const{ return uid; }
00072 
00075       bool binRead(ifstream& infile);
00077       bool binReadC(ifstream& infile);
00079       void binWriteC(ofstream& ofile);
00080 
00081       bool binReadC( lemur::file::File& infile );
00082       void binWriteC( lemur::file::File& outfile );
00083 
00086       virtual void deltaDecode();
00089       virtual void deltaEncode();
00090 
00092       void countTerms();
00093 
00094     protected:
00095       // Helper functions for iterator, subclasses should override
00097       virtual lemur::api::TermInfo* newElement() const { return new InvFPTerm(); }
00099       virtual lemur::api::TermInfo* getElement(lemur::api::TermInfo* elem, lemur::api::POS_T position) const;
00101       virtual void assignElement(lemur::api::TermInfo* to, lemur::api::TermInfo* from) const {
00102         *static_cast<InvFPTerm*>(to) = *static_cast<InvFPTerm*>(from);
00103       }
00105       virtual lemur::api::POS_T beginPosition() const { return (lemur::api::POS_T) 0; }
00107       virtual lemur::api::POS_T endPosition() const { return (lemur::api::POS_T) listlen; }
00109       virtual lemur::api::POS_T nextPosition(lemur::api::POS_T position) const;
00110 
00111       lemur::api::DOCID_T uid; // this doc's id
00112       lemur::api::COUNT_T length;  // length of this document (terms + stopwords)
00113       LocatedTerm* list; // list of terms and locations
00114       LLTerm* listcounted; // list of terms and location lists
00115       lemur::api::COUNT_T listlen; // number of items we have in list  (same as number of terms)
00116       mutable int index;   // index for iterator
00117       lemur::api::LOC_T* counts; // keep track of counts of terms for bag of word
00118       mutable InvFPTerm entry;
00119       mutable vector<lemur::api::LOC_T> loclist; //list of locations to return
00120 
00121     };
00122   }
00123 }
00124 #endif

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4