Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TermList.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010 */
00011 
00012 
00013 //
00014 // TermList
00015 //
00016 // 23 November 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_TERMLIST_HPP
00020 #define INDRI_TERMLIST_HPP
00021 
00022 #include "indri/greedy_vector"
00023 #include "indri/FieldExtent.hpp"
00024 #include "RVLCompress.hpp"
00025 #include "indri/Buffer.hpp"
00026 #include "indri/RVLCompressStream.hpp"
00027 #include "indri/RVLDecompressStream.hpp"
00028 #include "IndexTypes.hpp"
00029 
00030 namespace indri {
00031   namespace index {
00032     class TermList {
00033     private:
00034       indri::utility::greedy_vector<lemur::api::TERMID_T> _terms;
00035       indri::utility::greedy_vector<FieldExtent> _fields;
00036 
00037     public:
00038       void clear() {
00039         _terms.clear();
00040         _fields.clear();
00041       }
00042       
00043       void addField( const indri::index::FieldExtent& field ) {
00044         _fields.push_back( field );
00045       }
00046       
00047       void addTerm( const lemur::api::TERMID_T termID ) {
00048         _terms.push_back( termID );
00049       }
00050       
00051       indri::utility::greedy_vector<lemur::api::TERMID_T>& terms() {
00052         return _terms;
00053       }
00054       
00055       const indri::utility::greedy_vector<lemur::api::TERMID_T>& terms() const {
00056         return _terms;
00057       }
00058       
00059       indri::utility::greedy_vector<indri::index::FieldExtent>& fields() {
00060         return _fields;
00061       }
00062       
00063       const indri::utility::greedy_vector<indri::index::FieldExtent>& fields() const {
00064         return _fields;
00065       }
00066       
00067       void read( const char* buffer, int size ) {
00068         clear();
00069         indri::utility::RVLDecompressStream stream( buffer, size );
00070         
00071         int termCount;
00072         int fieldCount;
00073         
00074         stream >> termCount
00075                >> fieldCount;
00076         
00077         for( int i=0; i<termCount; i++ ) {
00078           lemur::api::TERMID_T termID;
00079           stream >> termID;
00080 
00081           assert( termID >= 0 );
00082           _terms.push_back( termID ); 
00083         }
00084         
00085         for( int i=0; i<fieldCount; i++ ) {
00086           FieldExtent extent;
00087           
00088           stream >> extent.id
00089                  >> extent.parentOrdinal
00090                  >> extent.begin
00091                  >> extent.end
00092                  >> extent.number;
00093           
00094           assert( extent.id >= 0 );
00095           assert( extent.parentOrdinal >= 0 );
00096           assert( extent.begin >= 0 );
00097           assert( extent.end >= extent.begin );
00098           
00099           extent.ordinal = i + 1;
00100 
00101           _fields.push_back( extent );
00102         }
00103       }
00104       
00105       void write( indri::utility::Buffer& buffer ) {
00106         // format:
00107         //   term count
00108         //   field count
00109         //   termID * termCount (compressed)
00110         //   ( fieldID, begin, end, number ) * fieldCount
00111         
00112         indri::utility::RVLCompressStream out( buffer );
00113         
00114         // write count of terms and fields in the document first
00115         int termCount = (int)_terms.size();
00116         int fieldCount = (int)_fields.size();
00117 
00118         out << termCount
00119             << fieldCount;
00120         
00121         // write out terms
00122         for( size_t i=0; i<_terms.size(); i++ ) {
00123           assert( _terms[i] >= 0 );
00124           out << _terms[i];
00125         }
00126 
00127         // write out fields
00128         for( size_t i=0; i<_fields.size(); i++ ) {
00129 
00130           assert( _fields[i].id >= 0 );
00131           assert( _fields[i].ordinal == i + 1 );
00132 
00133           out << _fields[i].id
00134               << _fields[i].parentOrdinal
00135               << _fields[i].begin
00136               << _fields[i].end
00137               << _fields[i].number;
00138         }
00139       }
00140     };
00141   }
00142 }
00143 
00144 #endif // INDRI_TERMLIST_HPP

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4