Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

QueryEnvironment.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // QueryEnvironment
00015 //
00016 // 9 March 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_QUERYENVIRONMENT_HPP
00020 #define INDRI_QUERYENVIRONMENT_HPP
00021 
00022 #include <map>
00023 #include "indri/ScoredExtentResult.hpp"
00024 #include "indri/QueryServer.hpp"
00025 #include "indri/NetworkStream.hpp"
00026 #include "indri/NetworkMessageStream.hpp"
00027 #include "indri/Parameters.hpp"
00028 #include "indri/ParsedDocument.hpp"
00029 #include "indri/Repository.hpp"
00030 #include "indri/QueryAnnotation.hpp"
00031 #include "IndexTypes.hpp"
00032 
00033 namespace indri 
00034 {
00035   namespace api 
00036   {
00043     typedef struct QueryRequest 
00044     {
00047       enum Options {
00049         HTMLSnippet = 1,
00051         TextSnippet = 2
00052       };
00054       std::string query;
00056       std::vector<std::string> formulators;
00058       std::vector<std::string>  metadata;
00060       std::vector<lemur::api::DOCID_T>  docSet;
00062       int resultsRequested;
00064       int startNum;
00066       enum Options options;
00067     } QueryRequest;
00068 
00071     typedef struct MetadataPair 
00072     {
00074       std::string key;
00076       std::string value;
00077     } MetadataPair;
00078     
00085     typedef struct QueryResult
00086     {
00088       std::string snippet;
00090       std::string documentName;
00092       lemur::api::DOCID_T docid;
00094       double score;
00096       int begin;
00098       int end;
00100       std::vector<indri::api::MetadataPair> metadata;
00101     } QueryResult;
00102 
00108     typedef struct QueryResults 
00109     {
00111       float parseTime;
00113       float executeTime;
00115       float documentsTime;
00117       int estimatedMatches;
00119       std::vector<QueryResult> results;
00120     } QueryResults;
00121     
00128     class QueryEnvironment {
00129     private:
00130       // first is entry in _servers, second is entry _streams
00131       // derive idx to erase from those.
00132       std::map<std::string, std::pair<indri::server::QueryServer *, indri::net::NetworkStream *> > _serverNameMap;
00133       std::vector<indri::server::QueryServer*> _servers;
00134       // first is entry _servers, second is entry in _repositories.
00135       // derive idx to erase from those.
00136       std::map<std::string, std::pair<indri::server::QueryServer *, indri::collection::Repository *> > _repositoryNameMap;
00137       std::vector<indri::collection::Repository*> _repositories;
00138       std::vector<indri::net::NetworkStream*> _streams;
00139       std::vector<indri::net::NetworkMessageStream*> _messageStreams;
00140 
00141       Parameters _parameters;
00142       bool _baseline;
00143       
00144       void _mergeQueryResults( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::server::QueryServerResponse*>& responses );
00145       void _copyStatistics( std::vector<indri::lang::RawScorerNode*>& scorerNodes, indri::infnet::InferenceNetwork::MAllResults& statisticsResults );
00146 
00147       std::vector<indri::server::QueryServerResponse*> _runServerQuery( std::vector<indri::lang::Node*>& roots, int resultsRequested );
00148       void _sumServerQuery( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::lang::Node*>& roots, int resultsRequested );
00149       void _mergeServerQuery( indri::infnet::InferenceNetwork::MAllResults& results, std::vector<indri::lang::Node*>& roots, int resultsRequested );
00150       void _annotateQuery( indri::infnet::InferenceNetwork::MAllResults& results, const std::vector<lemur::api::DOCID_T>& documentIDs, std::string& annotatorName, indri::lang::Node* queryRoot );
00151 
00152       std::vector<indri::api::ScoredExtentResult> _runQuery( indri::infnet::InferenceNetwork::MAllResults& results,
00153                                                              const std::string& q,
00154                                                              int resultsRequested,
00155                                                              const std::vector<lemur::api::DOCID_T>* documentIDs,
00156                                                              QueryAnnotation** annotation,
00157                                                              const std::string &queryType = "indri" );
00158       void _scoredQuery( indri::infnet::InferenceNetwork::MAllResults& results, indri::lang::Node* queryRoot, std::string& accumulatorName, int resultsRequested, const std::vector<lemur::api::DOCID_T>* documentSet );
00159 
00160       QueryEnvironment( QueryEnvironment& other ) {}
00161 
00162     public:
00163       QueryEnvironment();
00164       ~QueryEnvironment();
00167       void setMemory( UINT64 memory );
00170       void setBaseline(const std::string &baseline);
00171       void setSingleBackgroundModel( bool background );
00174       void setScoringRules( const std::vector<std::string>& rules );
00177       void setStopwords( const std::vector<std::string>& stopwords );
00180       void addServer( const std::string& hostname );
00183       void addIndex( const std::string& pathname );
00187       void addIndex( class IndexEnvironment& environment );
00189       void close();
00192       void removeServer( const std::string& hostname );
00195       void removeIndex( const std::string& pathname );
00196 
00200       QueryResults runQuery(QueryRequest &request);
00201 
00206       std::vector<indri::api::ScoredExtentResult> runQuery( const std::string& query, int resultsRequested, const std::string &queryType = "indri" );
00207 
00213       std::vector<indri::api::ScoredExtentResult> runQuery( const std::string& query, const std::vector<lemur::api::DOCID_T>& documentSet, int resultsRequested, const std::string &queryType = "indri" );
00214 
00219       QueryAnnotation* runAnnotatedQuery( const std::string& query, int resultsRequested, const std::string &queryType = "indri" );  
00220 
00226       QueryAnnotation* runAnnotatedQuery( const std::string& query, const std::vector<lemur::api::DOCID_T>& documentSet, int resultsRequested, const std::string &queryType = "indri" );
00227 
00228 
00233       std::vector<indri::api::ParsedDocument*> documents( const std::vector<lemur::api::DOCID_T>& documentIDs );
00238       std::vector<indri::api::ParsedDocument*> documents( const std::vector<indri::api::ScoredExtentResult>& results );
00243       std::vector<std::string> documentMetadata( const std::vector<lemur::api::DOCID_T>& documentIDs, const std::string& attributeName );
00248       std::vector<std::string> documentMetadata( const std::vector<indri::api::ScoredExtentResult>& documentIDs, const std::string& attributeName );
00249 
00253       std::vector<std::string> pathNames( const std::vector<indri::api::ScoredExtentResult>& results );
00254 
00255 
00260       std::vector<indri::api::ParsedDocument*> documentsFromMetadata( const std::string& attributeName, const std::vector<std::string>& attributeValues );
00261 
00266       std::vector<lemur::api::DOCID_T> documentIDsFromMetadata( const std::string& attributeName, const std::vector<std::string>& attributeValue );
00267 
00270       INT64 termCount();
00274       INT64 termCount( const std::string& term );
00278       INT64 stemCount( const std::string& term );
00284       INT64 termFieldCount( const std::string& term, const std::string& field );
00290       INT64 stemFieldCount( const std::string& term, const std::string& field );
00293       double expressionCount( const std::string& expression,
00294                               const std::string &queryType = "indri" );
00300       std::vector<ScoredExtentResult> expressionList( const std::string& expression, 
00301                                                       const std::string& queryType = "indri" );
00304       std::vector<std::string> fieldList();
00307       INT64 documentCount();
00311       INT64 documentCount( const std::string& term );
00312 
00316       INT64 documentStemCount( const std::string& stem );
00317 
00321       int documentLength(lemur::api::DOCID_T documentID);
00322       
00327       std::vector<DocumentVector*> documentVectors( const std::vector<lemur::api::DOCID_T>& documentIDs );
00328 
00332       void setMaxWildcardTerms(int maxTerms);
00333       const std::vector<indri::server::QueryServer*>& getServers() const { return _servers;
00334       }
00335       
00336     };
00337   }
00338 }
00339 
00340 #endif // INDRI_QUERYENVIRONMENT_HPP
00341 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4