Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

DocListMemoryBuilder.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // DocListMemoryBuilder.hpp
00015 //
00016 // tds - 17 December 2003
00017 //
00018 
00019 #ifndef LEMUR_KEYFILEDOCLISTMEMORYBUILDER_HPP
00020 #define LEMUR_KEYFILEDOCLISTMEMORYBUILDER_HPP
00021 
00022 #include "RVLCompress.hpp"
00023 #include <vector>
00024 #include <assert.h>
00025 #include "indri/greedy_vector"
00026 #include "indri/DocListIterator.hpp"
00027 #include "indri/RegionAllocator.hpp"
00028 
00029 namespace indri {
00030   namespace index {
00031     struct DocListMemoryBuilderSegment {
00032       DocListMemoryBuilderSegment( char* b, char* d, char* c ) {
00033         base = b;
00034         data = d;
00035         capacity = c;
00036       }
00037 
00038       char* base;
00039       char* data;
00040       char* capacity;
00041     };
00042 
00043     class DocListMemoryBuilderIterator : public DocListIterator {
00044       const indri::utility::greedy_vector< DocListMemoryBuilderSegment, 4 >* _lists;
00045       indri::utility::greedy_vector< DocListMemoryBuilderSegment, 4 >::const_iterator _current;
00046       indri::index::DocListIterator::DocumentData _data;
00047       indri::utility::greedy_vector<DocListIterator::TopDocument> _emptyTopDocuments;
00048       
00049       const char* _list;
00050       const char* _listEnd;
00051       bool _finished;
00052 
00053       TermData* _termData;
00054 
00055     public:
00056       DocListMemoryBuilderIterator();
00057       DocListMemoryBuilderIterator( class DocListMemoryBuilder& builder, TermData* termData );
00058 
00059       void reset( class DocListMemoryBuilder& builder, TermData* termData );
00060       void reset( const indri::utility::greedy_vector< DocListMemoryBuilderSegment, 4 >& lists, TermData* termData );
00061 
00062       void startIteration();
00063       bool finished();
00064       bool nextEntry( lemur::api::DOCID_T documentID );
00065       bool nextEntry();
00066       TermData* termData();
00067       DocListIterator::DocumentData* currentEntry();
00068       indri::utility::greedy_vector<DocListIterator::TopDocument>& topDocuments();
00069     };
00070 
00071     class DocListMemoryBuilder {
00072     public:
00073       typedef DocListMemoryBuilderIterator iterator;
00074       friend class DocListMemoryBuilderIterator;
00075 
00076     private:
00077       int _documentFrequency;
00078       int _termFrequency;
00079 
00080       indri::utility::greedy_vector< DocListMemoryBuilderSegment, 4 > _lists;
00081 
00082       char* _list;
00083       char* _listBegin;
00084       char* _listEnd;
00085 
00086       char* _documentPointer;
00087       char* _locationCountPointer;
00088 
00089       int _lastLocation;
00090       int _lastDocument;
00091       int _lastTermFrequency;
00092 
00093       indri::utility::RegionAllocator* _allocator;
00094 
00095       inline void _safeAddLocation( int position );
00096       size_t _roundUp( size_t amount );
00097       void _grow();
00098       void _terminateDocument();
00099 
00100     public:
00101       DocListMemoryBuilder( indri::utility::RegionAllocator* allocator );
00102       ~DocListMemoryBuilder();
00103       const DocListMemoryBuilder& operator=( DocListMemoryBuilder& other );
00104       
00105       void startDocument( int docID );
00106       void addLocation( int location );
00107       void endDocument();
00108 
00109       void clear();
00110       void flush();
00111       bool empty();
00112 
00113       int documentFrequency() const;
00114       int termFrequency() const;
00115       size_t memorySize() const;
00116     };
00117   }
00118 }
00119 
00120 #endif // LEMUR_DOCLISTMEMORYBUILDER_HPP

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4