Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

FrequencyListCopier.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // FrequencyListCopier
00014 //
00015 // 24 August 2004 -- tds
00016 //
00017 // Finds IndexTerm nodes that only need to return frequency information,
00018 // not positions, and inserts IndexFrequencyTerms instead.
00019 //
00020 
00021 #ifndef INDRI_FREQUENCYLISTCOPIER_HPP
00022 #define INDRI_FREQUENCYLISTCOPIER_HPP
00023 
00024 #include "ListCache.hpp"
00025 namespace indri
00026 {
00027   namespace lang
00028   {
00029     
00030     class FrequencyListCopier : public indri::lang::Copier {
00031     private:
00032       std::vector<indri::lang::Node*> _nodes;
00033       std::stack<indri::lang::Node*> _disqualifiers;
00034       indri::lang::IndexTerm* _lastTerm;
00035       bool _disqualifiedTree;
00036 
00037       ListCache* _listCache;
00038 
00039     public:
00040       FrequencyListCopier( ListCache* listCache ) : _listCache(listCache), _lastTerm(0), _disqualifiedTree(false) {}
00041 
00042       indri::lang::Node* defaultAfter( indri::lang::Node* oldNode, indri::lang::Node* newNode ) {
00043         if( _disqualifiers.size() && oldNode == _disqualifiers.top() )
00044           _disqualifiers.pop();
00045     
00046         _nodes.push_back( newNode );
00047         return newNode;
00048       }
00049 
00050       ~FrequencyListCopier() {
00051         indri::utility::delete_vector_contents<indri::lang::Node*>( _nodes );
00052       }
00053 
00054       void before( indri::lang::ExtentAnd* exAnd ) {
00055         _disqualifiers.push(exAnd);
00056       }
00057 
00058       void before( indri::lang::ExtentOr* exOr ) {
00059         _disqualifiedTree = true;
00060       }
00061 
00062       void before( indri::lang::ExtentInside* exInside ) {
00063         _disqualifiedTree = true;
00064       }
00065 
00066       void before( indri::lang::NestedExtentInside* nestExInside ) {
00067         _disqualifiedTree = true;
00068       }
00069 
00070       void before( indri::lang::ExtentRestriction* exRestrict ) {
00071         _disqualifiers.push(exRestrict);
00072       }
00073 
00074       void before( indri::lang::ExtentEnforcement* exEnforce ) {
00075         _disqualifiers.push(exEnforce);
00076       }
00077 
00078       void before( indri::lang::FixedPassage* fixedPassage ) {
00079         _disqualifiers.push(fixedPassage);
00080       }
00081 
00082       void before( indri::lang::ContextCounterNode* context ) {
00083         if( context->getContext() != NULL ) {
00084           _disqualifiedTree = true;
00085         }
00086       }
00087       
00088       void before( indri::lang::WeightedExtentOr* wExOr ) {
00089         _disqualifiedTree = true;
00090       }
00091 
00092       void before( indri::lang::ODNode* odNode ) {
00093         _disqualifiedTree = true;
00094       }
00095 
00096       void before( indri::lang::UWNode* uwNode ) {
00097         _disqualifiedTree = true;
00098       }
00099 
00100       void before( indri::lang::BAndNode* bandNode ) {
00101         _disqualifiedTree = true;
00102       }
00103 
00104       indri::lang::Node* after( indri::lang::IndexTerm* oldNode, indri::lang::IndexTerm* newNode ) {
00105         _lastTerm = newNode;
00106         return defaultAfter( oldNode, newNode );
00107       }
00108 
00109       void before( indri::lang::RawScorerNode* oldNode, indri::lang::RawScorerNode* newNode ) {
00110         _lastTerm = 0;
00111         _disqualifiedTree = false;
00112       }
00113 
00114 
00115       void before( indri::lang::NestedRawScorerNode* oldNode, indri::lang::NestedRawScorerNode* newNode ) {
00116         before( (indri::lang::RawScorerNode*) oldNode, (indri::lang::RawScorerNode*) newNode );
00117       }
00118 
00119       indri::lang::Node* after( indri::lang::RawScorerNode* oldNode, indri::lang::RawScorerNode* newNode ) {
00120         indri::lang::Node* result = 0;
00121 
00122         if( _lastTerm && !_disqualifiers.size() && !_disqualifiedTree && oldNode->getContext() == NULL ) {
00123           indri::lang::TermFrequencyScorerNode* scorerNode;
00124           // there's a term to score, and nothing to disqualify us from doing frequency scoring
00125           scorerNode = new indri::lang::TermFrequencyScorerNode( _lastTerm->getText(),
00126                                                                  _lastTerm->getStemmed() );
00127 
00128           scorerNode->setNodeName( oldNode->nodeName() );
00129           scorerNode->setSmoothing( oldNode->getSmoothing() );
00130           scorerNode->setStatistics( oldNode->getOccurrences(), oldNode->getContextSize(), oldNode->getDocumentOccurrences(), oldNode->getDocumentCount() );
00131 
00132           delete newNode;
00133           result = defaultAfter( oldNode, scorerNode );
00134         } else if( !_disqualifiers.size() ) {
00135           ListCache::CachedList* list = 0; 
00136 
00137           if( _listCache )
00138             list = _listCache->find( newNode->getRawExtent(), newNode->getContext() );
00139       
00140           if( list ) {
00141             indri::lang::CachedFrequencyScorerNode* cachedNode;
00142             cachedNode = new indri::lang::CachedFrequencyScorerNode( newNode->getRawExtent(), newNode->getContext() );
00143             cachedNode->setNodeName( newNode->nodeName() );
00144             cachedNode->setSmoothing( newNode->getSmoothing() );
00145             cachedNode->setList( list );
00146 
00147             delete newNode;
00148             result = defaultAfter( oldNode, cachedNode );
00149           } else {
00150             result = defaultAfter( oldNode, newNode );
00151           }
00152         } else {
00153           result = defaultAfter( oldNode, newNode );
00154         }
00155 
00156         _disqualifiedTree = false;
00157         return result; 
00158       }
00159 
00160       indri::lang::Node* after( indri::lang::NestedRawScorerNode* oldNode, indri::lang::NestedRawScorerNode* newNode ) {
00161         return after( (indri::lang::RawScorerNode*) oldNode, (indri::lang::RawScorerNode*) newNode );
00162       }
00163     };
00164   }
00165 }
00166 
00167 #endif // INDRI_FREQUENCYLISTCOPIER_HPP
00168 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4