Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

RelevanceModel.hpp

Go to the documentation of this file.
00001 
00002 /*==========================================================================
00003  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00004  *
00005  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00006  * is subject to the terms of the software license set forth in the LICENSE
00007  * file included with this software, and also available at
00008  * http://www.lemurproject.org/license.html
00009  *
00010  *==========================================================================
00011 */
00012 
00013 
00014 //
00015 // RelevanceModel
00016 //
00017 // 23 June 2005 -- tds
00018 //
00019 
00020 
00021 #include <string>
00022 #include <vector>
00023 #include "indri/TermFieldStatistics.hpp"
00024 #include "indri/TermScoreFunction.hpp"
00025 #include "indri/TermScoreFunctionFactory.hpp"
00026 #include "indri/HashTable.hpp"
00027 #include "indri/greedy_vector"
00028 #include "indri/QueryEnvironment.hpp"
00029 
00030 namespace indri {
00031   namespace query {
00032     class RelevanceModel {
00033     public:
00034       struct Gram {
00035         std::vector<std::string> terms;
00036         double weight;
00037 
00038         struct hash {
00039           int operator() ( const Gram* one ) const {
00040             indri::utility::GenericHash<const char*> h;
00041             int accumulator = 0;
00042 
00043             for( size_t i=0; i<one->terms.size(); i++ ) {
00044               accumulator *= 7;
00045               accumulator += h( one->terms[i].c_str() );
00046             }
00047 
00048             return accumulator;
00049           }
00050         };
00051 
00052         struct weight_greater {
00053           bool operator() ( const Gram* o, const Gram* t ) const {
00054             return t->weight < o->weight;
00055           }
00056         };
00057 
00058         struct string_comparator {
00059           int operator() ( const Gram* o, const Gram* t ) const {
00060             const Gram& one = *o;
00061             const Gram& two = *t;
00062 
00063             if( one.terms.size() != two.terms.size() ) {
00064               if( one.terms.size() < two.terms.size() ) {
00065                 return 1;
00066               } else {
00067                 return -1;
00068               }
00069             }
00070 
00071             for( size_t i=0; i<one.terms.size(); i++ ) {
00072               const std::string& oneString = one.terms[i];
00073               const std::string& twoString = two.terms[i];
00074 
00075               if( oneString != twoString ) {
00076                 if( oneString < twoString )
00077                   return -1;
00078                 else
00079                   return 1;
00080               }
00081             }
00082 
00083             return 0;
00084           }
00085         };
00086       };
00087 
00088     private:
00089       struct GramCounts {
00090         Gram gram;
00091         indri::utility::greedy_vector< std::pair< int, int > > counts;
00092       };
00093 
00094       indri::api::QueryEnvironment& _environment;
00095       int _maxGrams;
00096       std::string _smoothing;
00097       int _documents;
00098 
00099       typedef indri::utility::HashTable< Gram*, GramCounts*, Gram::hash, Gram::string_comparator > HGram;
00100       HGram _gramTable;
00101 
00102       std::vector<indri::api::ScoredExtentResult> _results;
00103       std::vector<lemur::api::DOCID_T> _documentIDs;
00104       std::vector<Gram*> _grams;
00105       std::vector<indri::api::DocumentVector*> _vectors;
00106 
00107       void _countGrams();
00108       void _scoreGrams();
00109       void _sortGrams();
00110       void _extractDocuments();
00111 
00112     public:
00113       RelevanceModel( indri::api::QueryEnvironment& environment,
00114                       const std::string& smoothing,\
00115                       int maxGrams,
00116                       int documents );
00117       ~RelevanceModel();
00118 
00119       void generate( const std::string& query );
00120       // generate from an existing result set
00121       void generate( const std::string &query , const std::vector<indri::api::ScoredExtentResult>& results );
00122       const std::vector<indri::api::ScoredExtentResult>& getQueryResults() const;
00123       const std::vector<Gram*>& getGrams() const;
00124     };
00125   }
00126 }

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4