Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

DocListIterator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // DocListIterator
00015 //
00016 // 9 January 2004 - tds
00017 //
00018 
00019 #ifndef INDRI_DOCLISTITERATOR_HPP
00020 #define INDRI_DOCLISTITERATOR_HPP
00021 
00022 #include "indri/greedy_vector"
00023 #include "indri/TermData.hpp"
00024 #include "IndexTypes.hpp"
00025 
00026 namespace indri {
00027   namespace index {
00028     class DocListIterator {
00029     public:
00030       struct DocumentData {
00031         lemur::api::DOCID_T document;
00032         indri::utility::greedy_vector<int> positions;
00033       };
00034 
00035       struct TopDocument {
00036         struct less {
00037           bool operator() ( const TopDocument& one, const TopDocument& two ) const {
00038             double oneFrac = double(one.count) / double(one.length);
00039             double twoFrac = double(two.count) / double(two.length);
00040             return (oneFrac < twoFrac);
00041           }
00042         };
00043 
00044         struct greater {
00045           bool operator() ( const TopDocument& one, const TopDocument& two ) const {
00046             double oneFrac = double(one.count) / double(one.length);
00047             double twoFrac = double(two.count) / double(two.length);
00048             return (oneFrac > twoFrac);
00049           }
00050         };
00051 
00052         struct docid_less {
00053           bool operator() ( const TopDocument& one, const TopDocument& two ) const {
00054             return one.document < two.document;
00055           }
00056         };
00057 
00058         TopDocument( lemur::api::DOCID_T _document, int _count, int _length ) :
00059           document(_document),
00060           count(_count),
00061           length(_length)
00062         {
00063         }
00064 
00065         lemur::api::DOCID_T document;
00066         int count;
00067         int length;
00068       };
00069       
00070       virtual ~DocListIterator() {};
00071 
00072       // get the iterator ready to return data; call this before calling currentEntry or nextEntry
00073       virtual void startIteration() = 0;
00074 
00075       // get the termData structure associated with this term
00076       virtual TermData* termData() = 0;
00077 
00078       // get a list of top documents for this iterator (must call startIteration() first)
00079       virtual const indri::utility::greedy_vector<TopDocument>& topDocuments() = 0;
00080 
00081       // return the current document entry if we're not finished, null otherwise.
00082       virtual DocumentData* currentEntry() = 0;
00083     
00084       // move to the next document in the list; return false if there are no more valid documents
00085       virtual bool nextEntry() = 0;
00086 
00087       // find the first document that contains this term that has an id >= documentID.
00088       // returns false if no such document exists.
00089       virtual bool nextEntry( lemur::api::DOCID_T documentID ) = 0;
00090 
00091       // returns true if the iterator has no more entries
00092       virtual bool finished() = 0;
00093     };
00094   }
00095 }
00096 
00097 #endif // INDRI_DOCLISTITERATOR_HPP
00098 
00099 

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4