Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

MemoryIndexDocListFileIterator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // MemoryIndexDocListFileIterator
00014 //
00015 // 23 November 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP
00019 #define INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP
00020 
00021 #include "indri/Mutex.hpp"
00022 #include "indri/TermData.hpp"
00023 #include "indri/DocListFileIterator.hpp"
00024 #include "indri/DocListMemoryBuilder.hpp"
00025 #include <algorithm>
00026 #include <iostream> // DEBUG
00027 
00028 namespace indri {
00029   namespace index {
00030     class MemoryIndexDocListFileIterator : public DocListFileIterator {
00031     private:
00032       const std::vector<MemoryIndex::term_entry*>& _termData;
00033       std::vector<MemoryIndex::term_entry*> _alphabetical;
00034       std::vector<MemoryIndex::term_entry*>::iterator _currentTerm;
00035       DocListMemoryBuilderIterator _iterator;
00036       DocListData _data;
00037       bool _finished;
00038 
00039     public:
00040       MemoryIndexDocListFileIterator( const std::vector<MemoryIndex::term_entry*>& termData ) :
00041         _termData(termData)
00042       {
00043       }
00044 
00045       void startIteration() {
00046         _finished = false;
00047         _alphabetical.clear();
00048         _alphabetical.reserve( _termData.size() );
00049 
00050         for( size_t i=0; i<_termData.size(); i++ ) {
00051           _alphabetical.push_back( _termData[i] );
00052         }
00053 
00054         std::sort( _alphabetical.begin(), _alphabetical.end(), MemoryIndex::term_entry::term_less() );
00055 
00056         _currentTerm = _alphabetical.begin();
00057         _data.termData = 0;
00058         _data.iterator = 0;
00059 
00060         if( _currentTerm != _alphabetical.end() ) {
00061           _data.termData = (*_currentTerm)->termData;
00062           _data.iterator = &_iterator;
00063           _iterator.reset( (*_currentTerm)->list, _data.termData );
00064 
00065           assert( (*_currentTerm)->list.documentFrequency() == _data.termData->corpus.documentCount );
00066           assert( (*_currentTerm)->list.termFrequency() == _data.termData->corpus.totalCount );
00067         } else {
00068           _finished = true;
00069         }
00070       }
00071       
00072       bool finished() const {
00073         return _finished;      
00074       }
00075 
00076       DocListData* currentEntry() { 
00077         if( !_finished )
00078           return &_data;
00079 
00080         return 0;
00081       }
00082       
00083       const DocListData* currentEntry() const { 
00084         if( !_finished )
00085           return &_data;
00086 
00087         return 0;
00088       }
00089       
00090       bool nextEntry() {
00091         if( _finished )
00092           return false;
00093         _currentTerm++;
00094         
00095         if( _currentTerm == _alphabetical.end() ) {
00096           _finished = true;
00097           return false;
00098         }
00099         
00100         _data.termData = (*_currentTerm)->termData;
00101         _iterator.reset( (*_currentTerm)->list, _data.termData );
00102 
00103         assert( (*_currentTerm)->list.documentFrequency() == _data.termData->corpus.documentCount );
00104         assert( (*_currentTerm)->list.termFrequency() == _data.termData->corpus.totalCount );
00105         return true;
00106       }
00107     };
00108   }
00109 }
00110 
00111 #endif // INDRI_MEMORYINDEXDOCLISTFILEITERATOR_HPP
00112 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4