Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

MemoryIndexVocabularyIterator.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // MemoryIndexVocabularyIterator
00014 //
00015 // 23 November 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP
00019 #define INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP
00020 
00021 #include "indri/Mutex.hpp"
00022 #include "indri/TermData.hpp"
00023 #include "indri/DiskTermData.hpp"
00024 
00025 namespace indri {
00026   namespace index {
00027     class MemoryIndexVocabularyIterator : public VocabularyIterator {
00028     private:
00029       typedef std::vector<MemoryIndex::term_entry*> VTermEntry;
00030       VTermEntry& _termData;
00031       VTermEntry::iterator _iterator;
00032       DiskTermData _diskTermData;
00033 
00034       // this tells us if the last nextEntry() came from 
00035       // a start iteration or not - needed for nextEntry(const char*)
00036       // call
00037       bool _justStartedIteration;
00038       
00039     public:
00040       MemoryIndexVocabularyIterator( VTermEntry& termData ) :
00041         _termData(termData)
00042       {
00043       }
00044       
00045       void startIteration() {
00046         _iterator = _termData.begin();
00047 
00048         _diskTermData.length = 0;
00049         _diskTermData.startOffset = 0;
00050 
00051         if( _iterator != _termData.end() ) {
00052           _diskTermData.termData = (*_iterator)->termData;
00053           _diskTermData.termID = (*_iterator)->termID;
00054         }
00055 
00056         _justStartedIteration=true;
00057       }
00058       
00059       DiskTermData* currentEntry() { 
00060         if( _iterator == _termData.end() )
00061           return 0;
00062         
00063         return &_diskTermData;
00064       }
00065       
00066       bool nextEntry() {
00067         if( finished() )
00068           return false;
00069         
00070         _iterator++;
00071 
00072         if( finished() )
00073           return false;
00074 
00075         _diskTermData.termID++;
00076         _diskTermData.termData = (*_iterator)->termData;
00077         return true;
00078       }
00079 
00080       bool nextEntry(const char *skipTo) {
00081         assert(skipTo!=NULL);
00082 
00083         int termLength=strlen(skipTo);
00084         if (!termLength) {
00085           startIteration();
00086           return true;
00087         }
00088 
00089         if (!_justStartedIteration) {
00090           _iterator++;
00091         }
00092 
00093         _justStartedIteration=false;
00094 
00095         while (_iterator!=_termData.end()) {
00096 
00097           if (strstr((*_iterator)->term, skipTo)==(*_iterator)->term) {
00098             return true;
00099           }
00100           _iterator++;
00101         }
00102 
00103         // return false...
00104         return false;
00105       }
00106 
00107       bool finished() {
00108         return _iterator == _termData.end();
00109       }
00110     };
00111   }
00112 }
00113 
00114 #endif // INDRI_MEMORYINDEXVOCABULARYITERATOR_HPP
00115 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4