Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

StructQryDocRep.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _STRUCTQRYDOCREP_HPP
00013 #define _STRUCTQRYDOCREP_HPP
00014 #include "DocumentRep.hpp"
00015 #include <cmath>
00016 
00017 namespace lemur 
00018 {
00019   namespace retrieval
00020   {
00021     
00024     class StructQryDocRep : public lemur::api::DocumentRep {
00025     public:
00027       StructQryDocRep(lemur::api::DOCID_T docID, double *idfValue, int docLength, int docCount,
00028                       double docLengthAverage, double db) : 
00029         lemur::api::DocumentRep(docID), did(docID), idf(idfValue), end(docLength),
00030         docEnd(docLength), size(docLength), start(0),
00031         dla(docLengthAverage), defaultBelief(db) {
00032         oneMinusDB = 1 - defaultBelief;
00033         denom = log(docCount + 1.0);
00034         numer = docCount + 0.5;
00035       }
00037       virtual ~StructQryDocRep() {}
00039       virtual double termWeight(lemur::api::TERMID_T termID, const lemur::api::DocInfo *info) const { return 0;}
00041       virtual double termWeight(lemur::api::TERMID_T termID, double dtf, int df) const{
00042         if (idf)
00043           return beliefScore(dtf, idf[termID]);
00044         else
00045           return beliefScore(dtf, computeIdfScore(df));
00046       }
00048       virtual double scoreConstant() const { return 0;}
00049 
00051       void startPassageIteration(int sz) const {
00052         size = sz;
00053         increment = size/2;
00054         start = 0;
00055         end = size < docEnd ? size : docEnd;
00056       }
00058       bool hasMorePassage() const {
00059         // still some terms in the list.
00060         return(start < docEnd);
00061       }
00062 
00064       void nextPassage() const{
00065         if(start + increment < docEnd)
00066           start += increment;
00067         else
00068           start = docEnd;
00069         end = (start + size) < docEnd ? (start + size) : docEnd;
00070       }
00071 
00074       double computeIdfScore(double df) const {
00075         return log(numer/df)/denom;
00076       }
00077 
00079       double beliefScore(double df, double idf) const {
00080         return (defaultBelief + oneMinusDB
00081                 * (df / (df + 0.5 + 1.5* ((end - start)/dla))) * idf);
00082       }
00083 
00085       lemur::api::DOCID_T did;
00087       mutable int start; 
00089       mutable int end;
00090 
00091     private:
00093       double *idf;
00095       mutable int size; 
00097       mutable int increment; 
00099       int docEnd;  
00101       double dla;
00103       double numer, denom;
00105       double defaultBelief, oneMinusDB;
00106     };
00107   }
00108 }
00109 
00110 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4