Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

WeightFoldingCopier.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 //
00013 // WeightFoldingCopier
00014 //
00015 // 17 September 2004 -- tds
00016 //
00017 
00018 #ifndef INDRI_WEIGHTFOLDINGCOPIER_HPP
00019 #define INDRI_WEIGHTFOLDINGCOPIER_HPP
00020 namespace indri
00021 {
00022   namespace lang 
00023   {
00024     
00025     class WeightFoldingCopier : public indri::lang::Copier {
00026     private:
00027       std::vector<indri::lang::Node*> _nodes;
00028 
00029     public:
00030       ~WeightFoldingCopier() {
00031         indri::utility::delete_vector_contents( _nodes );
00032       }
00033 
00034       indri::lang::Node* defaultAfter( indri::lang::Node* old, indri::lang::Node* newNode ) {
00035         _nodes.push_back( newNode );
00036         return newNode;
00037       }
00038 
00039       indri::lang::Node* after( indri::lang::WeightNode* oldWeightNode, indri::lang::WeightNode* newWeightNode ) {
00040         indri::lang::WeightNode* newerWeightNode = new indri::lang::WeightNode();
00041         const std::vector< std::pair<double, indri::lang::ScoredExtentNode*> >& children = newWeightNode->getChildren();
00042     
00043         for( size_t i=0; i<children.size(); i++ ) {
00044           // is this a weight node?
00045           indri::lang::WeightNode* childWeightNode = dynamic_cast<indri::lang::WeightNode*>( children[i].second );
00046 
00047           // is this a combine node?
00048           indri::lang::CombineNode* childCombineNode = dynamic_cast<indri::lang::CombineNode*>( children[i].second );
00049 
00050           if( !childWeightNode && !childCombineNode ) {
00051             // child is not a weight node, so just add it directly
00052             newerWeightNode->addChild( children[i].first, children[i].second );
00053           } else if( childCombineNode ) {
00054             const std::vector< indri::lang::ScoredExtentNode* >& grandkids = childCombineNode->getChildren();
00055             double kidWeight = children[i].first / double(grandkids.size());
00056         
00057             for( size_t j=0; j<grandkids.size(); j++ ) {
00058               newerWeightNode->addChild( kidWeight, grandkids[j] );
00059             }
00060           } else {
00061             // child _is_ a weight node, so we're going to fold all its children up to this level
00062             const std::vector< std::pair<double, indri::lang::ScoredExtentNode*> >& grandkids = childWeightNode->getChildren();
00063             double parentWeight = children[i].first;
00064             double normalizer = 0.0;
00065 
00066             // need to normalize all weights to sum to 1
00067             for( size_t j=0; j<grandkids.size(); j++ ) {
00068               normalizer += grandkids[j].first;
00069             }
00070     
00071             for( size_t j=0; j<grandkids.size(); j++ ) {
00072               // have to normalize the weight by including the parent weight as well
00073               newerWeightNode->addChild( parentWeight * grandkids[j].first / normalizer,
00074                                          grandkids[j].second );
00075             }
00076           }
00077         }
00078 
00079         newerWeightNode->setNodeName( newWeightNode->nodeName() );
00080         delete newWeightNode;
00081         _nodes.push_back( newerWeightNode );
00082 
00083         return newerWeightNode;
00084       }
00085     };
00086   }
00087 }
00088 
00089 #endif // INDRI_WEIGHTFOLDINGCOPIER_HPP
00090 

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4