Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

QryBasedSampler.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _QRYBASEDSAMPLER_HPP
00013 #define _QRYBASEDSAMPLER_HPP
00014 
00015 
00016 
00017 #include "FreqCounter.hpp"
00018 #include "DBManager.hpp"
00019 
00020 namespace lemur 
00021 {
00022   namespace distrib 
00023   {
00024     
00026     typedef set<docid_t, less<string> > docidset;
00027 
00029 #define T_NDOCS 1
00030 
00031 #define T_NWORDS 2
00032 
00033 #define T_NQRYS 4
00034 
00038     class QryBasedSampler {
00039     public:
00040       QryBasedSampler();
00041       ~QryBasedSampler();
00042 
00044       bool probe(const char * initQuery);
00045   
00047       void setDBManager(const DBManager * database);
00048 
00050       const DBManager * getDBManager() const;
00051 
00052 
00055       void setFreqCounter(FreqCounter * counter);
00056 
00058       const FreqCounter * getFreqCounter() const;
00059 
00060 
00064       void setOutputPrefix(const string &prefix);
00065   
00067       const string &getOutputPrefix() const;
00068 
00070       void setNumDocs(int n);
00071 
00073       int getNumDocs() const;
00074 
00075 
00077       void setNumWords(int n);
00078 
00080       int getNumWords() const;
00081 
00082 
00084       void setNumQueries(int n);
00085 
00087       int getNumQueries() const;
00088 
00089 
00096       void setTermMode(int m);
00097 
00099       int getTermMode() const;
00100   
00101 
00103       void setDocsPerQuery(int n);
00104   
00106       int getDocsPerQuery() const;
00107 
00108 
00109     private:
00110 
00111       /* for querying a db */
00112       const DBManager * db;
00113 
00114 
00115       /* for building a description of a db */
00116       FreqCounter * freqCounter;
00117 
00118 
00119       /* output prefix for filenames */
00120       string outputPrefix;
00121 
00122 
00123       /* termination mode of the probe -
00124        * either T_NDOCS or T_NWORDS */
00125       int termMode;
00126   
00127       /* number unique docs to retrieve - only used if
00128        * termMode == T_NDOCS */
00129       int numDocs;
00130 
00131       /* number unique words to retrieve - only used if
00132        * termMode == T_NWORDS */
00133       int numWords;
00134 
00135       /* number of queries to run - only used if
00136        * termMode == T_NQRYS */
00137       int numQueries;
00138 
00139       /* documents per query to use */
00140       int docsPerQuery;
00141 
00142       /* stores the ids of the document already retrieved
00143        * from the system.  used to prevent parsing
00144        * a document multiple times */
00145       docidset seenDocs;
00146     };
00147   }
00148 }
00149 
00150 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4