Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

QueryExpander.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // QueryExpander
00014 //
00015 // 18 Aug 2004 -- dam
00016 //
00017 
00018 #ifndef INDRI_QUERYEXPANDER_HPP
00019 #define INDRI_QUERYEXPANDER_HPP
00020 
00021 #include <string>
00022 #include <vector>
00023 #include <map>
00024 
00025 #include "indri/QueryEnvironment.hpp"
00026 #include "indri/Parameters.hpp"
00027 namespace indri
00028 {
00029   namespace query
00030   {
00031     
00032     struct QueryExpanderSort {
00033     public:
00034       bool operator() ( const std::pair<std::string, double>& one, const std::pair<std::string, double>& two ) const {
00035         return one.second > two.second;
00036       }
00037     };
00038 
00039     class QueryExpander {
00040     private:
00041       std::map<std::string, UINT64> _cf_cache;
00042   
00043     protected:
00044       indri::api::QueryEnvironment * _env;
00045       indri::api::Parameters _param;
00046 
00047       std::map< std::string, bool > _stopwords;
00048 
00049       std::vector<indri::api::DocumentVector*> getDocumentVectors( std::vector<indri::api::ScoredExtentResult>& results, int rmDocs );
00050       std::vector<std::string> * getVocabulary( std::vector<indri::api::ScoredExtentResult>& results, int rmDocs );
00051       std::vector<std::string> * getVocabulary( std::vector<indri::api::DocumentVector*>& docVectors );
00052       std::string buildQuery( const std::string& originalQuery, double originalWeight,
00053                               const std::vector< std::pair<std::string, double> >& expansionTerms,
00054                               int termCount );
00055       UINT64 getCF( const std::string& term );
00056   
00057     public:
00058       QueryExpander( indri::api::QueryEnvironment * env , indri::api::Parameters& param );
00059       virtual ~QueryExpander() {};
00060 
00061       // runs original query, expands query based on results ( via expand( .. ) ), then runs expanded query
00062       std::vector<indri::api::ScoredExtentResult> runExpandedQuery( std::string originalQuery , int resultsRequested , bool verbose = false );
00063   
00064       // creates expanded query from an original query and a ranked list of documents
00065       virtual std::string expand( std::string originalQuery , std::vector<indri::api::ScoredExtentResult>& results ) = 0;
00066     };
00067   }
00068 }
00069 
00070 
00071 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4