Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

QuerySpec.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 #ifndef INDRI_QUERYSPEC_HPP
00014 #define INDRI_QUERYSPEC_HPP
00015 
00016 #include <vector>
00017 #include <string>
00018 #include <sstream>
00019 #include <indri/greedy_vector>
00020 #include <algorithm>
00021 #include "lemur-platform.h"
00022 
00023 #include "indri/Walker.hpp"
00024 #include "indri/Copier.hpp"
00025 #include "indri/Packer.hpp"
00026 #include "indri/Unpacker.hpp"
00027 
00028 #include "Exception.hpp"
00029 #include "indri/HashTable.hpp"
00030 #include "indri/ref_ptr.hpp"
00031 
00032 template<class T>
00033 bool equal( const std::vector<T>& one, const std::vector<T>& two ) {
00034   if( one.size() != two.size() )
00035     return false;
00036 
00037   for( size_t i=0; i<one.size(); i++ ) {
00038     if( *one[i] == *two[i] )
00039       continue;
00040 
00041     return false;
00042   }
00043 
00044   return true;
00045 }
00046 
00047 template<class T>
00048 bool unordered_equal( std::vector<T>& one, std::vector<T>& two ) {
00049   if( one.size() != two.size() )
00050     return false;
00051 
00052   std::vector<T> one_copy;
00053   for( size_t i=0; i<one.size(); i++ ) {
00054     one_copy.push_back( one[i] );
00055   }
00056 
00057   // this algorithm is n^2 as opposed to n log n if
00058   // we sorted things, but windows tend to be short
00059   for( size_t i=0; i<two.size(); i++ ) {
00060     for( size_t j=0; j<one_copy.size(); j++ ) {
00061       if( *one_copy[j] == *two[i] ) {
00062         // we remove each match--if they all match, the array will be empty
00063         one_copy.erase( one_copy.begin() + j );
00064         break;
00065       }
00066     }
00067   }
00068 
00069   return one_copy.size() == 0;
00070 }
00071 
00072 namespace indri {
00073   namespace lang {
00074     /* abstract */ class Node {
00075     protected:
00076       std::string _name;
00077 
00078     public:
00079       Node() {
00080         std::stringstream s;
00081         s << PTR_TO_INT(this);
00082         _name = s.str();
00083       }
00084 
00085       virtual ~Node() {
00086       }
00087       
00088       void setNodeName( const std::string& name ) {
00089         _name = name;
00090       }
00091 
00092       const std::string& nodeName() const {
00093         return _name;
00094       }
00095 
00096       virtual std::string typeName() const {
00097         return "Node";
00098       }
00099 
00100       virtual std::string queryText() const = 0;
00101 
00102       virtual bool operator < ( Node& other ) {
00103         // TODO: make this faster
00104         if( typeName() != other.typeName() )
00105           return typeName() < other.typeName();
00106 
00107         return queryText() < other.queryText();
00108       }
00109      
00110       virtual bool operator== ( Node& other ) {
00111         return &other == this; 
00112       }
00113 
00114       virtual UINT64 hashCode() const = 0;
00115       virtual void pack( Packer& packer ) = 0;
00116       virtual void walk( Walker& walker ) = 0;
00117       virtual Node* copy( Copier& copier ) = 0;
00118     };
00119 
00120     /* abstract */ class RawExtentNode : public Node {};
00121     /* abstract */ class ScoredExtentNode : public Node {};
00122     /* abstract */ class AccumulatorNode : public Node {};
00123     
00124     class IndexTerm : public RawExtentNode {
00125     private:
00126       std::string _text;
00127       bool _stemmed;
00128 
00129     public:
00130       IndexTerm( const std::string& text, bool stemmed = false ) : _text(text), _stemmed(stemmed)
00131       {
00132       }
00133 
00134       IndexTerm( Unpacker& unpacker ) {
00135         _text = unpacker.getString( "termName" );
00136         _stemmed = unpacker.getBoolean( "stemmed" );
00137       }
00138 
00139       const std::string& getText() { return _text; }
00140 
00141       bool operator==( Node& node ) {
00142         IndexTerm* other = dynamic_cast<IndexTerm*>(&node);
00143 
00144         if( !other )
00145           return false;
00146 
00147         if( other == this )
00148           return true;
00149         
00150         return other->_text == _text;
00151       }
00152 
00153       std::string typeName() const {
00154         return "IndexTerm";
00155       }
00156 
00157       std::string queryText() const {
00158         std::stringstream qtext;
00159 
00160         if( _stemmed ) {
00161           qtext << '"' << _text << '"';
00162         } else {
00163           qtext << _text;
00164         }
00165 
00166         return qtext.str();
00167       }
00168 
00169       void setStemmed(bool stemmed) {
00170         _stemmed = stemmed;
00171       }
00172 
00173       bool getStemmed() const {
00174         return _stemmed;
00175       }
00176 
00177       UINT64 hashCode() const {
00178         int accumulator = 1;
00179 
00180         if( _stemmed )
00181           accumulator += 3;
00182 
00183         indri::utility::GenericHash<const char*> hash;
00184         return accumulator + hash( _text.c_str() );
00185       }
00186 
00187       void pack( Packer& packer ) {
00188         packer.before(this);
00189         packer.put( "termName", _text );
00190         packer.put( "stemmed", _stemmed );
00191         packer.after(this);
00192       }
00193 
00194       void walk( Walker& walker ) {
00195         walker.before(this);
00196         walker.after(this);
00197       }
00198 
00199       Node* copy( Copier& copier ) {
00200         copier.before(this);
00201         IndexTerm* termCopy = new IndexTerm(*this);
00202         return copier.after(this, termCopy);
00203       }
00204     };
00205 
00206     class Field : public RawExtentNode {
00207     private:
00208       std::string _fieldName;
00209 
00210     public:
00211       Field( const std::string& name ) : _fieldName(name)
00212       {
00213       }
00214 
00215       Field( Unpacker& unpacker ) {
00216         _fieldName = unpacker.getString( "fieldName" );
00217       }
00218 
00219       const std::string& getFieldName() const { return _fieldName; }
00220 
00221       std::string typeName() const {
00222         return "Field";
00223       }
00224 
00225       std::string queryText() const {
00226         return _fieldName;
00227       }
00228 
00229       UINT64 hashCode() const {
00230         indri::utility::GenericHash<const char*> hash;
00231         return 5 + hash( _fieldName.c_str() );
00232       }
00233 
00234       void pack( Packer& packer ) {
00235         packer.before(this);
00236         packer.put( "fieldName", _fieldName );
00237         packer.after(this);
00238       }
00239       
00240       void walk( Walker& walker ) {
00241         walker.before(this);
00242         walker.after(this);
00243       }
00244 
00245       Node* copy( Copier& copier ) {
00246         copier.before(this);
00247         Field* newField = new Field(*this);
00248         return copier.after(this, newField);
00249       }
00250 
00251       bool operator== ( Node& other ) {
00252         Field* otherField = dynamic_cast<Field*>(&other);
00253 
00254         if( !otherField )
00255           return false;
00256 
00257         return otherField->getFieldName() == getFieldName();
00258       }
00259     };
00260 
00261     class ExtentInside : public RawExtentNode {
00262     protected:
00263       RawExtentNode* _inner;
00264       RawExtentNode* _outer;
00265 
00266     public:
00267       ExtentInside( RawExtentNode* inner, RawExtentNode* outer ) :
00268         _inner(inner),
00269         _outer(outer)
00270       {
00271       }
00272 
00273       ExtentInside( Unpacker& unpacker ) {
00274         _inner = unpacker.getRawExtentNode( "inner" );
00275         _outer = unpacker.getRawExtentNode( "outer" );
00276       }
00277 
00278       virtual bool operator== ( Node& o ) {
00279         ExtentInside* other = dynamic_cast<ExtentInside*>(&o);
00280   
00281         return other &&
00282           *_inner == *other->_inner &&
00283           *_outer == *other->_outer;
00284       }
00285       
00286       virtual std::string typeName() const {
00287         return "ExtentInside";
00288       }
00289 
00290       virtual UINT64 hashCode() const {
00291         return 7 + _inner->hashCode() + (_inner->hashCode() * 7);
00292       }
00293 
00294       std::string queryText() const {
00295         std::stringstream qtext;
00296         qtext << _inner->queryText()
00297               << "."
00298               << _outer->queryText();
00299 
00300         return qtext.str();
00301       }
00302 
00303       void setInner( RawExtentNode * inner ) {
00304         _inner = inner;
00305       }
00306 
00307       void setOuter( RawExtentNode * outer ) {
00308         _outer = outer;
00309       }
00310 
00311       RawExtentNode* getInner() {
00312         return _inner;
00313       }
00314 
00315       RawExtentNode* getOuter() {
00316         return _outer;
00317       }
00318 
00319       void pack( Packer& packer ) {
00320         packer.before(this);
00321         packer.put( "inner", _inner );
00322         packer.put( "outer", _outer );
00323         packer.after(this);
00324       }
00325 
00326       void walk( Walker& walker ) {
00327         walker.before(this);
00328         _inner->walk(walker);
00329         _outer->walk(walker);
00330         walker.after(this);
00331       }
00332 
00333       virtual Node* copy( Copier& copier ) {
00334         copier.before(this);
00335         
00336         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
00337         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
00338         ExtentInside* extentInsideCopy = new ExtentInside( newInner, newOuter );
00339         extentInsideCopy->setNodeName( nodeName() );
00340 
00341         return copier.after(this, extentInsideCopy);
00342       }
00343     };
00344 
00345     class WeightedExtentOr : public RawExtentNode {
00346     private:
00347       std::vector<RawExtentNode*> _children;
00348       std::vector<double> _weights;
00349 
00350     public:
00351       WeightedExtentOr() {}
00352       WeightedExtentOr( const std::vector<double>& weights, const std::vector<RawExtentNode*>& children ) :
00353         _children(children),
00354         _weights(weights)
00355       {
00356       }
00357 
00358       WeightedExtentOr( Unpacker& unpacker ) {
00359         _children = unpacker.getRawExtentVector( "children" );
00360         _weights = unpacker.getDoubleVector( "weights" );
00361       }
00362 
00363       std::string typeName() const {
00364         return "WeightedExtentOr";
00365       }
00366 
00367       std::string queryText() const {
00368         std::stringstream qtext;
00369 
00370         qtext << "#wsyn(";
00371 
00372         for( size_t i=0; i<_children.size(); i++ ) {
00373           qtext << " " << _children[i]->queryText();
00374         }
00375 
00376         qtext << " )";
00377         return qtext.str();
00378       }
00379 
00380       UINT64 hashCode() const {
00381         UINT64 hash = 11;
00382 
00383         for( size_t i=0; i<_children.size(); i++ ) {
00384           hash += (UINT64) (_weights[i] * 1000) + _children[i]->hashCode();
00385         }
00386         
00387         return hash;
00388       }
00389 
00390       void addChild( double weight, RawExtentNode* child ) {
00391         _children.push_back( child );
00392         _weights.push_back( weight );
00393       }
00394 
00395       std::vector<RawExtentNode*>& getChildren() {
00396         return _children;
00397       }
00398 
00399       std::vector<double>& getWeights() {
00400         return _weights;
00401       }
00402 
00403       bool operator == ( Node& node ) {
00404         WeightedExtentOr* other = dynamic_cast<WeightedExtentOr*>(&node);
00405 
00406         if( other == this )
00407           return true;
00408 
00409         // TODO: use better checking here to eliminate duplicate nodes
00410         return false;
00411       }
00412 
00413       void pack( Packer& packer ) {
00414         packer.before( this );
00415         packer.put( "weights", _weights );
00416         packer.put( "children", _children );
00417         packer.after( this );
00418       }
00419 
00420       void walk( Walker& walker ) {
00421         walker.before( this );
00422         for( size_t i=0; i<_children.size(); i++ ) {
00423           _children[i]->walk( walker );
00424         }
00425         walker.after( this );
00426       }
00427 
00428       Node* copy( Copier& copier ) {
00429         copier.before( this );
00430 
00431         WeightedExtentOr* duplicate = new WeightedExtentOr();
00432         for( size_t i=0; i<_children.size(); i++ ) {
00433           RawExtentNode* child = dynamic_cast<RawExtentNode*>(_children[i]->copy( copier ));
00434           duplicate->addChild( _weights[i], child );
00435         }
00436 
00437         return copier.after( this, duplicate );
00438       }
00439     };
00440 
00441     class ExtentOr : public RawExtentNode {
00442     private:
00443       std::vector<RawExtentNode*> _children;
00444 
00445     public:
00446       ExtentOr() {}
00447       ExtentOr( const std::vector<RawExtentNode*>& children ) :
00448         _children(children)
00449       {
00450       }
00451 
00452       ExtentOr( Unpacker& unpacker ) {
00453         _children = unpacker.getRawExtentVector( "children" );
00454       } 
00455 
00456       std::string typeName() const {
00457         return "ExtentOr";
00458       }
00459 
00460       std::string queryText() const {
00461         std::stringstream qtext;
00462 
00463         for( size_t i=0; i<_children.size(); i++ ) {
00464           if(i>0) qtext << " ";
00465           qtext << _children[i]->queryText();
00466         }
00467 
00468         return qtext.str();
00469       }
00470 
00471       UINT64 hashCode() const {
00472         UINT64 hash = 13;
00473 
00474         for( size_t i=0; i<_children.size(); i++ ) {
00475           hash += _children[i]->hashCode();
00476         }
00477         
00478         return hash;
00479       }
00480 
00481       void addChild( RawExtentNode* node ) {
00482         _children.push_back(node);
00483       }
00484 
00485       std::vector<RawExtentNode*>& getChildren() {
00486         return _children;
00487       }
00488 
00489       bool operator== ( Node& node ) {
00490         ExtentOr* other = dynamic_cast<ExtentOr*>(&node);
00491 
00492         if( other == this )
00493           return true;
00494 
00495         if( !other )
00496           return false;
00497 
00498         return unordered_equal( other->_children, _children );
00499       }
00500 
00501       void pack( Packer& packer ) {
00502         packer.before(this);
00503         packer.put( "children", _children );
00504         packer.after(this);
00505       }
00506 
00507       void walk( Walker& walker ) {
00508         walker.before(this);
00509         for( size_t i=0; i<_children.size(); i++ ) {
00510           _children[i]->walk(walker);
00511         }
00512         walker.after(this);
00513       }
00514 
00515       Node* copy( Copier& copier ) {
00516         copier.before(this);
00517         
00518         ExtentOr* duplicate = new ExtentOr();
00519         duplicate->setNodeName( nodeName() );
00520         for( size_t i=0; i<_children.size(); i++ ) {
00521           indri::lang::Node* childNode = _children[i]->copy(copier);
00522           duplicate->addChild( dynamic_cast<RawExtentNode*>(childNode) );
00523         }
00524 
00525         return copier.after(this, duplicate);
00526       }
00527     };
00528 
00529     class ExtentAnd : public RawExtentNode {
00530     private:
00531       std::vector<RawExtentNode*> _children;
00532 
00533     public:
00534       ExtentAnd() {}
00535       ExtentAnd( const std::vector<RawExtentNode*>& children ) :
00536         _children(children)
00537       {
00538       }
00539 
00540       ExtentAnd( Unpacker& unpacker ) {
00541         _children = unpacker.getRawExtentVector( "children" );
00542       }
00543 
00544       std::string typeName() const {
00545         return "ExtentAnd";
00546       }
00547 
00548       std::string queryText() const {
00549         std::stringstream qtext;
00550 
00551         for( size_t i=0; i<_children.size(); i++ ) {
00552           if(i>0) qtext << ",";
00553           qtext << _children[i]->queryText();
00554         }
00555 
00556         return qtext.str();
00557       }
00558 
00559       UINT64 hashCode() const {
00560         UINT64 hash = 15;
00561 
00562         for( size_t i=0; i<_children.size(); i++ ) {
00563           hash += _children[i]->hashCode();
00564         }
00565         
00566         return hash;
00567       }
00568 
00569       void addChild( RawExtentNode* node ) {
00570         _children.push_back(node);
00571       }
00572 
00573       std::vector<RawExtentNode*>& getChildren() {
00574         return _children;
00575       }
00576 
00577       bool operator== ( Node& node ) {
00578         ExtentAnd* other = dynamic_cast<ExtentAnd*>(&node);
00579 
00580         if( other == this )
00581           return true;
00582 
00583         if( !other )
00584           return false;
00585 
00586         return unordered_equal( other->_children, _children );
00587       }
00588 
00589       void pack( Packer& packer ) {
00590         packer.before(this);
00591         packer.put( "children", _children );
00592         packer.after(this);
00593       }
00594 
00595       void walk( Walker& walker ) {
00596         walker.before(this);
00597         for( size_t i=0; i<_children.size(); i++ ) {
00598           _children[i]->walk(walker);
00599         }
00600         walker.after(this);
00601       }
00602 
00603       Node* copy( Copier& copier ) {
00604         copier.before(this);
00605 
00606         ExtentAnd* duplicate = new ExtentAnd();
00607         duplicate->setNodeName( nodeName() );
00608         for( size_t i=0; i<_children.size(); i++ ) {
00609           Node* child = _children[i]->copy(copier);
00610           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00611         }
00612 
00613         return copier.after(this, duplicate);
00614       }
00615     };
00616 
00617     class BAndNode : public RawExtentNode {
00618     private:
00619       std::vector<RawExtentNode*> _children;
00620 
00621     public:
00622       BAndNode() {}
00623 
00624       BAndNode( Unpacker& unpacker ) {
00625         _children = unpacker.getRawExtentVector( "children" );
00626       }
00627 
00628       std::string typeName() const {
00629         return "BAndNode";
00630       }
00631 
00632       std::string queryText() const {
00633         std::stringstream qtext;
00634         qtext << "#band(";
00635         for( size_t i=0; i<_children.size(); i++ ) {
00636           qtext << _children[i]->queryText() << " ";
00637         }
00638         qtext << ")";
00639 
00640         return qtext.str();
00641       } 
00642 
00643       UINT64 hashCode() const {
00644         UINT64 hash = 17;
00645 
00646         for( size_t i=0; i<_children.size(); i++ ) {
00647           hash += _children[i]->hashCode();
00648         }
00649         
00650         return hash;
00651       }
00652 
00653       const std::vector<RawExtentNode*>& getChildren() const {
00654         return _children;
00655       }
00656 
00657       void addChild( RawExtentNode* node ) {
00658         _children.push_back( node );
00659       }
00660 
00661       void pack( Packer& packer ) {
00662         packer.before(this);
00663         packer.put( "children", _children );
00664         packer.after(this);
00665       }
00666 
00667       void walk( Walker& walker ) {
00668         walker.before(this);
00669         for( size_t i=0; i<_children.size(); i++ ) {
00670           _children[i]->walk(walker);
00671         }
00672         walker.after(this);
00673       }
00674       
00675       Node* copy( Copier& copier ) {
00676         copier.before(this);
00677         BAndNode* duplicate = new BAndNode();
00678 
00679         duplicate->setNodeName( nodeName() );
00680         for(size_t i=0; i<_children.size(); i++) {
00681           Node* child = _children[i]->copy(copier);
00682           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00683         }
00684 
00685         return copier.after(this, duplicate);
00686       }
00687     };
00688 
00689     class UWNode : public RawExtentNode {
00690     private:
00691       std::vector<RawExtentNode*> _children;
00692       int _windowSize;
00693 
00694     public:
00695       UWNode() :
00696         _windowSize(-1) // default is unlimited window size
00697       {
00698       }
00699 
00700       UWNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00701         _windowSize(windowSize),
00702         _children(children)
00703       {
00704       }
00705 
00706       UWNode( Unpacker& unpacker ) {
00707         _windowSize = (int) unpacker.getInteger( "windowSize" );
00708         _children = unpacker.getRawExtentVector( "children" );
00709       }
00710 
00711       std::string typeName() const {
00712         return "UWNode";
00713       }
00714 
00715       std::string queryText() const {
00716         std::stringstream qtext;
00717         
00718         if( _windowSize >= 0 )
00719           qtext << "#uw" << _windowSize << "( ";
00720         else
00721           qtext << "#uw( ";
00722           
00723         for( size_t i=0; i<_children.size(); i++ ) {
00724           qtext << _children[i]->queryText() << " ";
00725         }
00726         qtext << ")";
00727         return qtext.str();
00728       }
00729 
00730       UINT64 hashCode() const {
00731         UINT64 hash = 19;
00732         hash += _windowSize;
00733 
00734         for( size_t i=0; i<_children.size(); i++ ) {
00735           hash += _children[i]->hashCode();
00736         }
00737         
00738         return hash;
00739       }
00740 
00741       void setWindowSize( int windowSize ) {
00742         _windowSize = windowSize;
00743       }
00744 
00745       void setWindowSize( const std::string& windowSize ) {
00746         setWindowSize( atoi( windowSize.c_str() ) );
00747       }
00748 
00749       int getWindowSize() const {
00750         return _windowSize;
00751       }
00752 
00753       const std::vector<RawExtentNode*>& getChildren() const {
00754         return _children;
00755       }
00756 
00757       void addChild( RawExtentNode* node ) {
00758         _children.push_back( node );
00759       }
00760 
00761       bool operator== ( Node& node ) {
00762         UWNode* other = dynamic_cast<UWNode*>(&node);
00763 
00764         if( !other )
00765           return false;
00766 
00767         if( other == this )
00768           return true;
00769 
00770         if( other->_windowSize != _windowSize ) {
00771           return false;
00772         }
00773 
00774         return unordered_equal( _children, other->_children );
00775       }
00776 
00777       void pack( Packer& packer ) {
00778         packer.before(this);
00779         packer.put( "windowSize", _windowSize );
00780         packer.put( "children", _children );
00781         packer.after(this);
00782       }
00783 
00784       void walk( Walker& walker ) {
00785         walker.before(this);
00786         for(size_t i=0; i<_children.size(); i++) {
00787           _children[i]->walk(walker);
00788         }
00789         walker.after(this);
00790       }
00791 
00792       Node* copy( Copier& copier ) {
00793         copier.before(this);
00794         
00795         UWNode* duplicate = new UWNode();
00796         duplicate->setNodeName( nodeName() );
00797         duplicate->setWindowSize( _windowSize );
00798         for(size_t i=0; i<_children.size(); i++) {
00799           Node* child = _children[i]->copy(copier);
00800           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00801         }
00802 
00803         return copier.after(this, duplicate);
00804       }
00805     };
00806 
00807     class ODNode : public RawExtentNode {
00808     private:
00809       int _windowSize;
00810       std::vector<RawExtentNode*> _children;
00811 
00812     public:
00813       ODNode( int windowSize, std::vector<RawExtentNode*>& children ) :
00814         _windowSize(windowSize),
00815         _children(children)
00816       {
00817       }
00818 
00819       ODNode() :
00820         _windowSize(-1) // default is unlimited window size
00821       {
00822       }
00823 
00824       ODNode( Unpacker& unpacker ) {
00825         _windowSize = (int) unpacker.getInteger( "windowSize" );
00826         _children = unpacker.getRawExtentVector( "children" );
00827       }
00828 
00829       std::string typeName() const {
00830         return "ODNode";
00831       }
00832 
00833       std::string queryText() const {
00834         std::stringstream qtext;
00835         if( _windowSize >= 0 )
00836           qtext << "#" << _windowSize << "( ";
00837         else
00838           qtext << "#od( ";
00839 
00840         for( size_t i=0; i<_children.size(); i++ ) {
00841           qtext << _children[i]->queryText() << " ";
00842         }
00843         qtext << ")";
00844         return qtext.str();
00845       }
00846 
00847       UINT64 hashCode() const {
00848         UINT64 hash = 23;
00849         hash += _windowSize;
00850 
00851         for( size_t i=0; i<_children.size(); i++ ) {
00852           hash *= 7;
00853           hash += _children[i]->hashCode();
00854         }
00855         
00856         return hash;
00857       }
00858 
00859       const std::vector<RawExtentNode*>& getChildren() const {
00860         return _children;
00861       }
00862 
00863       void setWindowSize( int windowSize ) {
00864         _windowSize = windowSize;
00865       }
00866 
00867       void setWindowSize( const std::string& windowSize ) {
00868         setWindowSize( atoi( windowSize.c_str() ) );
00869       }
00870 
00871       int getWindowSize() const {
00872         return _windowSize;
00873       }
00874 
00875       void addChild( RawExtentNode* node ) {
00876         _children.push_back( node );
00877       }
00878 
00879       bool operator== ( Node& node ) {
00880         ODNode* other = dynamic_cast<ODNode*>(&node);
00881 
00882         if( ! other )
00883           return false;
00884 
00885         if( other == this )
00886           return true;
00887 
00888         if( other->_windowSize != _windowSize )
00889           return false;
00890 
00891         if( _children.size() != other->_children.size() )
00892           return false;
00893 
00894         return equal( _children, other->_children );
00895       }
00896 
00897       void pack( Packer& packer ) {
00898         packer.before(this);
00899         packer.put( "windowSize", _windowSize );
00900         packer.put( "children", _children );
00901         packer.after(this);
00902       }
00903 
00904       void walk( Walker& walker ) {
00905         walker.before(this);
00906         for(size_t i=0; i<_children.size(); i++) {
00907           _children[i]->walk(walker);
00908         }
00909         walker.after(this);
00910       }
00911 
00912       Node* copy( Copier& copier ) {
00913         copier.before(this);
00914         
00915         ODNode* duplicate = new ODNode();
00916         duplicate->setNodeName( nodeName() );
00917         duplicate->setWindowSize( _windowSize );
00918         for(size_t i=0; i<_children.size(); i++) {
00919           Node* child = _children[i]->copy(copier);
00920           duplicate->addChild( dynamic_cast<RawExtentNode*>(child) );
00921         }
00922 
00923         return copier.after(this, duplicate);
00924       }
00925     };
00926 
00927     class FilReqNode : public ScoredExtentNode {
00928     private:
00929       RawExtentNode* _filter;
00930       ScoredExtentNode* _required;
00931 
00932     public:
00933       FilReqNode( RawExtentNode* filter, ScoredExtentNode* required ) {
00934         _filter = filter;
00935         _required = required;
00936       }
00937 
00938       FilReqNode( Unpacker& unpacker ) {
00939         _filter = unpacker.getRawExtentNode( "filter" );
00940         _required = unpacker.getScoredExtentNode( "required" );
00941       }
00942 
00943       std::string typeName() const {
00944         return "FilReqNode";
00945       }
00946 
00947       UINT64 hashCode() const {
00948         return 27 +
00949           _filter->hashCode() * 3 +
00950           _required->hashCode();
00951       }
00952 
00953       std::string queryText() const {
00954         std::stringstream qtext;
00955 
00956         qtext << "#filreq("
00957               << _filter->queryText()
00958               << " "
00959               << _required->queryText()
00960               << ")";
00961         return qtext.str();
00962       }
00963 
00964       RawExtentNode* getFilter() {
00965         return _filter;
00966       }
00967 
00968       ScoredExtentNode* getRequired() {
00969         return _required;
00970       }
00971 
00972       bool operator== ( Node& node ) {
00973         FilReqNode* other = dynamic_cast<FilReqNode*>(&node);
00974 
00975         if( !other )
00976           return false;
00977 
00978         return (*_filter) == (*other->getFilter()) &&
00979           (*_required) == (*other->getRequired());
00980       }
00981 
00982       void pack( Packer& packer ) {
00983         packer.before(this);
00984         packer.put("filter", _filter);
00985         packer.put("required", _required);
00986         packer.after(this);
00987       }
00988 
00989       void walk( Walker& walker ) {
00990         walker.before(this);
00991         _filter->walk(walker);
00992         _required->walk(walker);
00993         walker.after(this);
00994       }
00995 
00996       Node* copy( Copier& copier ) {
00997         copier.before(this);
00998         RawExtentNode* filterDuplicate = dynamic_cast<RawExtentNode*>(_filter->copy(copier));
00999         ScoredExtentNode* requiredDuplicate = dynamic_cast<ScoredExtentNode*>(_required->copy(copier));
01000         FilReqNode* duplicate = new FilReqNode( filterDuplicate, requiredDuplicate );
01001         return copier.after(this, duplicate);
01002       }
01003     };
01004 
01005     class FilRejNode : public ScoredExtentNode {
01006     private:
01007       RawExtentNode* _filter;
01008       ScoredExtentNode* _disallowed;
01009 
01010     public:
01011       FilRejNode( RawExtentNode* filter, ScoredExtentNode* disallowed ) {
01012         _filter = filter;
01013         _disallowed = disallowed;
01014       }
01015 
01016       FilRejNode( Unpacker& unpacker ) {
01017         _filter = unpacker.getRawExtentNode( "filter" );
01018         _disallowed = unpacker.getScoredExtentNode( "disallowed" );
01019       }
01020 
01021       std::string typeName() const {
01022         return "FilRejNode";
01023       }
01024 
01025       std::string queryText() const {
01026         std::stringstream qtext;
01027 
01028         qtext << "#filrej("
01029               << _filter->queryText()
01030               << " "
01031               << _disallowed->queryText()
01032               << ")";
01033 
01034         return qtext.str();
01035       }
01036 
01037       UINT64 hashCode() const {
01038         return 29 +
01039           _filter->hashCode() * 3 +
01040           _disallowed->hashCode();
01041       }
01042 
01043       RawExtentNode* getFilter() {
01044         return _filter;
01045       }
01046 
01047       ScoredExtentNode* getDisallowed() {
01048         return _disallowed;
01049       }
01050 
01051       bool operator== ( Node& node ) {
01052         FilRejNode* other = dynamic_cast<FilRejNode*>(&node);
01053 
01054         if( !other )
01055           return false;
01056 
01057         return (*_filter) == (*other->getFilter()) &&
01058           (*_disallowed) == (*other->getDisallowed());
01059       }
01060 
01061       void pack( Packer& packer ) {
01062         packer.before(this);
01063         packer.put("filter", _filter);
01064         packer.put("disallowed", _disallowed);
01065         packer.after(this);
01066       }
01067 
01068       void walk( Walker& walker ) {
01069         walker.before(this);
01070         _filter->walk(walker);
01071         _disallowed->walk(walker);
01072         walker.after(this);
01073       }
01074 
01075       Node* copy( Copier& copier ) {
01076         copier.before(this);
01077         RawExtentNode* filterDuplicate = dynamic_cast<RawExtentNode*>(_filter->copy(copier));
01078         ScoredExtentNode* disallowedDuplicate = dynamic_cast<ScoredExtentNode*>(_disallowed->copy(copier));
01079         FilRejNode* duplicate = new FilRejNode( filterDuplicate, disallowedDuplicate );
01080         return copier.after(this, duplicate);
01081       }
01082     };
01083 
01084     class FieldLessNode : public RawExtentNode {
01085     private:
01086       RawExtentNode* _field;
01087       INT64 _constant;
01088 
01089     public:
01090       FieldLessNode( RawExtentNode* field, INT64 constant ) :
01091         _field(field),
01092         _constant(constant) {
01093       }
01094       
01095       FieldLessNode( Unpacker& unpacker ) {
01096         _field = unpacker.getRawExtentNode( "field" );
01097         _constant = unpacker.getInteger("constant");
01098       }
01099 
01100       std::string typeName() const {
01101         return "FieldLessNode";
01102       }
01103 
01104       std::string queryText() const {
01105         std::stringstream qtext;
01106         qtext << "#less(" << _field->queryText() << " " << _constant << ")";
01107         return qtext.str();
01108       }
01109 
01110       UINT64 hashCode() const {
01111         return 31 +
01112           _constant +
01113           _field->hashCode();
01114       }
01115 
01116       INT64 getConstant() const {
01117         return _constant;
01118       }
01119 
01120       RawExtentNode* getField() {
01121         return _field;
01122       }
01123 
01124       bool operator== ( Node& node ) {
01125         FieldLessNode* other = dynamic_cast<FieldLessNode*>(&node);
01126 
01127         return other &&
01128           other->getConstant() == _constant &&
01129           *other->getField() == *_field;
01130       }
01131 
01132       Node* copy( Copier& copier ) {
01133         copier.before(this);
01134         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01135         FieldLessNode* duplicate = new FieldLessNode( fieldDuplicate, _constant );
01136         return copier.after(this, duplicate);
01137       }
01138 
01139       void walk( Walker& walker ) {
01140         walker.before(this);
01141         _field->walk(walker);
01142         walker.after(this);
01143       }
01144 
01145       void pack( Packer& packer ) {
01146         packer.before(this);
01147         packer.put("field", _field);
01148         packer.put("constant", _constant);
01149         packer.after(this);
01150       }
01151     };
01152 
01153     class FieldGreaterNode : public RawExtentNode {
01154     private:
01155       RawExtentNode* _field;
01156       INT64 _constant;
01157 
01158     public:
01159       FieldGreaterNode( RawExtentNode* field, INT64 constant ) :
01160         _field(field),
01161         _constant(constant) {
01162       }
01163       
01164       FieldGreaterNode( Unpacker& unpacker ) {
01165         _field = unpacker.getRawExtentNode( "field" );
01166         _constant = unpacker.getInteger("constant");
01167       }
01168 
01169       std::string typeName() const {
01170         return "FieldGreaterNode";
01171       }
01172 
01173       std::string queryText() const {
01174         std::stringstream qtext;
01175         qtext << "#greater(" << _field->queryText() << " " << _constant << ")";
01176         return qtext.str();
01177       }
01178 
01179       UINT64 hashCode() const {
01180         return 33 +
01181           _constant +
01182           _field->hashCode();
01183       }
01184 
01185       INT64 getConstant() const {
01186         return _constant;
01187       }
01188 
01189       RawExtentNode* getField() {
01190         return _field;
01191       }
01192 
01193       bool operator== ( Node& node ) {
01194         FieldGreaterNode* other = dynamic_cast<FieldGreaterNode*>(&node);
01195 
01196         return other &&
01197           other->getConstant() == _constant &&
01198           *other->getField() == *_field;
01199       }
01200 
01201       Node* copy( Copier& copier ) {
01202         copier.before(this);
01203         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01204         FieldGreaterNode* duplicate = new FieldGreaterNode( fieldDuplicate, _constant );
01205         return copier.after(this, duplicate);
01206       }
01207 
01208       void walk( Walker& walker ) {
01209         walker.before(this);
01210         _field->walk(walker);
01211         walker.after(this);
01212       }
01213 
01214       void pack( Packer& packer ) {
01215         packer.before(this);
01216         packer.put("field", _field);
01217         packer.put("constant", _constant);
01218         packer.after(this);
01219       }
01220     };
01221 
01222     class FieldBetweenNode : public RawExtentNode {
01223     private:
01224       RawExtentNode* _field;
01225       INT64 _low;
01226       INT64 _high;
01227 
01228     public:
01229       FieldBetweenNode( RawExtentNode* field, INT64 low, INT64 high ) :
01230         _field(field),
01231         _low(low),
01232         _high(high) {
01233       }
01234       
01235       FieldBetweenNode( Unpacker& unpacker ) {
01236         _field = unpacker.getRawExtentNode( "field" );
01237         _low = unpacker.getInteger("low");
01238         _high = unpacker.getInteger("high");
01239       }
01240 
01241       std::string typeName() const {
01242         return "FieldBetweenNode";
01243       }
01244 
01245       std::string queryText() const {
01246         std::stringstream qtext;
01247         qtext << "#between(" << _field->queryText() << " " << _low << " " << _high << ")";
01248         return qtext.str();
01249       }
01250 
01251       UINT64 hashCode() const {
01252         return 37 +
01253           _low * 3 +
01254           _high +
01255           _field->hashCode();
01256       }
01257 
01258       INT64 getLow() const {
01259         return _low;
01260       }
01261 
01262       INT64 getHigh() const {
01263         return _high;
01264       }
01265 
01266       RawExtentNode* getField() {
01267         return _field;
01268       }
01269 
01270       bool operator== ( Node& node ) {
01271         FieldBetweenNode* other = dynamic_cast<FieldBetweenNode*>(&node);
01272 
01273         return other &&
01274           other->getLow() == _low &&
01275           other->getHigh() == _high &&
01276           *other->getField() == *_field;
01277       }
01278 
01279       Node* copy( Copier& copier ) {
01280         copier.before(this);
01281         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01282         FieldBetweenNode* duplicate = new FieldBetweenNode( fieldDuplicate, _low, _high );
01283         return copier.after(this, duplicate);
01284       }
01285 
01286       void walk( Walker& walker ) {
01287         walker.before(this);
01288         _field->walk(walker);
01289         walker.after(this);
01290       }
01291 
01292       void pack( Packer& packer ) {
01293         packer.before(this);
01294         packer.put("field", _field);
01295         packer.put("low", _low);
01296         packer.put("high", _high);
01297         packer.after(this);
01298       }
01299     };
01300 
01301     class FieldEqualsNode : public RawExtentNode {
01302     private:
01303       RawExtentNode* _field;
01304       INT64 _constant;
01305 
01306     public:
01307       FieldEqualsNode( RawExtentNode* field, INT64 constant ) :
01308         _field(field),
01309         _constant(constant) {
01310       }
01311       
01312       FieldEqualsNode( Unpacker& unpacker ) {
01313         _field = unpacker.getRawExtentNode("field");
01314         _constant = unpacker.getInteger("constant");
01315       }
01316 
01317       std::string typeName() const {
01318         return "FieldEqualsNode";
01319       }
01320 
01321       std::string queryText() const {
01322         std::stringstream qtext;
01323         qtext << "#equals(" << _field->queryText() << " " << _constant << ")";
01324         return qtext.str();
01325       }
01326 
01327       UINT64 hashCode() const {
01328         return 41 +
01329           _constant +
01330           _field->hashCode();
01331       }
01332 
01333       INT64 getConstant() const {
01334         return _constant;
01335       }
01336 
01337       RawExtentNode* getField() {
01338         return _field;
01339       }
01340 
01341       bool operator== ( Node& node ) {
01342         FieldEqualsNode* other = dynamic_cast<FieldEqualsNode*>(&node);
01343 
01344         return other &&
01345           other->getConstant() == _constant &&
01346           *other->getField() == *_field;
01347       }
01348 
01349       Node* copy( Copier& copier ) {
01350         copier.before(this);
01351         RawExtentNode* fieldDuplicate = dynamic_cast<RawExtentNode*>(_field->copy(copier));
01352         FieldEqualsNode* duplicate = new FieldEqualsNode( fieldDuplicate, _constant );
01353         return copier.after(this, duplicate);
01354       }
01355 
01356       void walk( Walker& walker ) {
01357         walker.before(this);
01358         _field->walk(walker);
01359         walker.after(this);
01360       }
01361 
01362       void pack( Packer& packer ) {
01363         packer.before(this);
01364         packer.put("field", _field);
01365         packer.put("constant", _constant);
01366         packer.after(this);
01367       }
01368     };
01369 
01370     class RawScorerNode : public ScoredExtentNode {
01371     protected:
01372       double _occurrences; // number of occurrences within this context
01373       double _contextSize; // number of terms that occur within this context
01374       double _maximumContextFraction;
01375       int _documentOccurrences; // number of documents we occur in
01376       int _documentCount; // total number of documents
01377 
01378       RawExtentNode* _raw;
01379       RawExtentNode* _context;
01380       std::string _smoothing;
01381 
01382     public:
01383       RawScorerNode( RawExtentNode* raw, RawExtentNode* context, std::string smoothing = "method:dirichlet,mu:2500" ) {
01384         _raw = raw;
01385         _context = context;
01386 
01387         _occurrences = 0;
01388         _contextSize = 0;
01389         _documentOccurrences = 0;
01390         _documentCount = 0;
01391         _smoothing = smoothing;
01392       }
01393 
01394       RawScorerNode( Unpacker& unpacker ) {
01395         _raw = unpacker.getRawExtentNode( "raw" );
01396         _context = unpacker.getRawExtentNode( "context" );
01397 
01398         _occurrences = unpacker.getDouble( "occurrences" );
01399         _contextSize = unpacker.getDouble( "contextSize" );
01400         _documentOccurrences = unpacker.getInteger( "documentOccurrences" );
01401         _documentCount = unpacker.getInteger( "documentCount" );
01402         _smoothing = unpacker.getString( "smoothing" );
01403       }
01404 
01405       virtual std::string typeName() const {
01406         return "RawScorerNode";
01407       }
01408 
01409       std::string queryText() const {
01410         std::stringstream qtext;
01411         
01412         qtext << _raw->queryText();
01413         if( _context ) {
01414           // if we haven't added a period yet, put one in
01415           int dot = (int)qtext.str().find('.');
01416           if( dot < 0 )
01417             qtext << '.';
01418 
01419           qtext << "(" << _context->queryText() << ")";
01420         }
01421 
01422         return qtext.str();
01423       }
01424 
01425       virtual UINT64 hashCode() const {
01426         UINT64 hash = 0;
01427 
01428         hash += 43;
01429         hash += _raw->hashCode();
01430 
01431         if( _context ) {
01432           hash += _context->hashCode();
01433         }
01434 
01435         indri::utility::GenericHash<const char*> gh;
01436         hash += gh( _smoothing.c_str() );
01437 
01438         return hash;
01439       }
01440 
01441       double getOccurrences() const {
01442         return _occurrences;
01443       }
01444 
01445       double getContextSize() const {
01446         return _contextSize;
01447       }
01448 
01449       int getDocumentOccurrences() const {
01450         return _documentOccurrences;
01451       }
01452 
01453       int getDocumentCount() const {
01454         return _documentCount;
01455       }
01456       
01457       const std::string& getSmoothing() const {
01458         return _smoothing;
01459       }
01460 
01461       void setStatistics( double occurrences, double contextSize, int documentOccurrences, int documentCount ) {
01462         _occurrences = occurrences;
01463         _contextSize = contextSize;
01464         _documentOccurrences = documentOccurrences;
01465         _documentCount = documentCount;
01466       }
01467 
01468       void setContext( RawExtentNode* context ) {
01469         _context = context;
01470       }
01471 
01472       void setRawExtent( RawExtentNode* rawExtent ) {
01473         _raw = rawExtent;
01474       }
01475 
01476       void setSmoothing( const std::string& smoothing ) {
01477         _smoothing = smoothing;
01478       }
01479 
01480       RawExtentNode* getContext() {
01481         return _context;
01482       }
01483 
01484       RawExtentNode* getRawExtent() {
01485         return _raw;
01486       }
01487 
01488       void pack( Packer& packer ) {
01489         packer.before(this);
01490         packer.put( "raw", _raw );
01491         packer.put( "context", _context );
01492 
01493         packer.put( "occurrences", _occurrences );
01494         packer.put( "contextSize", _contextSize );
01495         packer.put( "documentOccurrences", _documentOccurrences );
01496         packer.put( "documentCount", _documentCount );
01497         packer.put( "smoothing", _smoothing );
01498         packer.after(this);
01499       }
01500 
01501       void walk( Walker& walker ) {
01502         walker.before(this);
01503         if( _raw )
01504           _raw->walk(walker);
01505         if( _context )
01506           _context->walk(walker);
01507         walker.after(this);
01508       }
01509 
01510       virtual Node* copy( Copier& copier ) {
01511         copier.before(this);
01512 
01513         RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
01514         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
01515         RawScorerNode* duplicate = new RawScorerNode(*this);
01516         duplicate->setRawExtent( duplicateRaw );
01517         duplicate->setContext( duplicateContext );
01518 
01519         return copier.after(this, duplicate);
01520       }
01521     };
01522 
01523     class TermFrequencyScorerNode : public ScoredExtentNode {
01524     private:
01525       double _occurrences; // number of occurrences within this context
01526       double _contextSize; // number of terms that occur within this context
01527       int _documentOccurrences; // number of documents we occur in
01528       int _documentCount; // total number of documents
01529 
01530       std::string _text;
01531       std::string _smoothing;
01532       bool _stemmed;
01533 
01534     public:
01535       TermFrequencyScorerNode( const std::string& text, bool stemmed ) {
01536         _occurrences = 0;
01537         _contextSize = 0;
01538         _documentOccurrences = 0;
01539         _documentCount = 0;
01540         _smoothing = "";
01541         _text = text;
01542         _stemmed = stemmed;
01543       }
01544 
01545       TermFrequencyScorerNode( Unpacker& unpacker ) {
01546         _occurrences = unpacker.getDouble( "occurrences" );
01547         _contextSize = unpacker.getDouble( "contextSize" );
01548         _documentOccurrences = unpacker.getInteger( "documentOccurrences" );
01549         _documentCount = unpacker.getInteger( "documentCount" );
01550         _smoothing = unpacker.getString( "smoothing" );
01551         _text = unpacker.getString( "text" );
01552         _stemmed = unpacker.getBoolean( "stemmed" );
01553       }
01554       
01555       const std::string& getText() const {
01556         return _text;
01557       }
01558 
01559       bool getStemmed() const {
01560         return _stemmed;
01561       }
01562 
01563       std::string typeName() const {
01564         return "TermFrequencyScorerNode";
01565       }
01566 
01567       std::string queryText() const {
01568         std::stringstream qtext;
01569         
01570         if( !_stemmed )
01571           qtext << _text;
01572         else
01573           qtext << "\"" << _text << "\"";
01574 
01575         return qtext.str();
01576       }
01577 
01578       UINT64 hashCode() const {
01579         int accumulator = 47;
01580 
01581         if( _stemmed )
01582           accumulator += 3;
01583 
01584         indri::utility::GenericHash<const char*> hash;
01585         return accumulator + hash( _text.c_str() ) * 7 + hash( _smoothing.c_str() );
01586       }
01587 
01588       double getOccurrences() const {
01589         return _occurrences;
01590       }
01591 
01592       double getContextSize() const {
01593         return _contextSize;
01594       }
01595 
01596       int getDocumentOccurrences() const {
01597         return _documentOccurrences;
01598       }
01599 
01600       int getDocumentCount() const {
01601         return _documentCount;
01602       }
01603 
01604       const std::string& getSmoothing() const {
01605         return _smoothing;
01606       }
01607 
01608       void setStatistics( double occurrences, double contextSize, int documentOccurrences, int documentCount ) {
01609         _occurrences = occurrences;
01610         _contextSize = contextSize;
01611         _documentOccurrences = documentOccurrences;
01612         _documentCount = documentCount;
01613       }
01614 
01615       void setSmoothing( const std::string& smoothing ) {
01616         _smoothing = smoothing;
01617       }
01618 
01619       void pack( Packer& packer ) {
01620         packer.before(this);
01621         packer.put( "occurrences", _occurrences );
01622         packer.put( "contextSize", _contextSize );
01623         packer.put( "documentOccurrences", _documentOccurrences );
01624         packer.put( "documentCount", _documentCount );
01625         packer.put( "text", _text );
01626         packer.put( "stemmed", _stemmed );
01627         packer.put( "smoothing", _smoothing );
01628         packer.after(this);
01629       }
01630 
01631       void walk( Walker& walker ) {
01632         walker.before(this);
01633         walker.after(this);
01634       }
01635 
01636       Node* copy( Copier& copier ) {
01637         copier.before(this);
01638         TermFrequencyScorerNode* duplicate = new TermFrequencyScorerNode(*this);
01639         return copier.after(this, duplicate);
01640       }
01641     };
01642 
01643     // The CachedFrequencyScorerNode should only be used on a local machine;
01644     // it should not be transferred across the network
01645     class CachedFrequencyScorerNode : public indri::lang::ScoredExtentNode {
01646     private:
01647       indri::lang::Node* _raw;
01648       indri::lang::Node* _context;
01649       std::string _smoothing;
01650       void* _list;
01651     
01652     public:
01653       CachedFrequencyScorerNode( indri::lang::Node* raw, indri::lang::Node* context )
01654         :
01655         _raw(raw),
01656         _context(context),
01657         _list(0)
01658       {
01659       }
01660 
01661       CachedFrequencyScorerNode( Unpacker& unpacker ) {
01662         LEMUR_THROW( LEMUR_RUNTIME_ERROR, "CachedFrequencyScorerNode should not be used on the network" );
01663 
01664         _raw = unpacker.getRawExtentNode( "raw" );
01665         _context = unpacker.getRawExtentNode( "context" );
01666         _smoothing = unpacker.getString( "smoothing" );
01667       }
01668 
01669       void setList( void* list ) {
01670         _list = list;
01671       }
01672 
01673       void* getList() {
01674         return _list;
01675       }
01676 
01677       std::string typeName() const {
01678         return "CachedFrequencyScorerNode";
01679       }
01680 
01681       std::string queryText() const {
01682         std::stringstream qtext;
01683         
01684         qtext << _raw->queryText();
01685         if( _context ) {
01686           // if we haven't added a period yet, put one in
01687           int dot = (int)qtext.str().find('.');
01688           if( dot < 0 )
01689             qtext << '.';
01690 
01691           qtext << "(" << _context->queryText() << ")";
01692         }
01693 
01694         return qtext.str();
01695       }
01696 
01697       UINT64 hashCode() const {
01698         UINT64 accumulator = 53;
01699 
01700         indri::utility::GenericHash<const char*> hash;
01701         return _raw->hashCode() * 7 + 
01702           _context->hashCode() + 
01703           hash( _smoothing.c_str() );
01704       }
01705 
01706       void setSmoothing( const std::string& smoothing ) {
01707         _smoothing = smoothing;
01708       }
01709 
01710       const std::string& getSmoothing() const {
01711         return _smoothing;
01712       }
01713 
01714       indri::lang::Node* getRaw() {
01715         return _raw;
01716       }
01717 
01718       indri::lang::Node* getContext() {
01719         return _context;
01720       }
01721       
01722       void pack( Packer& packer ) {
01723         packer.before(this);
01724         packer.put( "raw", _raw );
01725         packer.put( "context", _context );
01726         packer.put( "smoothing", _smoothing );
01727         packer.after(this);
01728       }
01729 
01730       void walk( Walker& walker ) {
01731         walker.before(this);
01732         _raw->walk( walker );
01733         if( _context )
01734           _context->walk( walker );
01735         walker.after(this);
01736       }
01737 
01738       indri::lang::Node* copy( Copier& copier ) {
01739         copier.before(this);
01740 
01741         indri::lang::RawExtentNode* duplicateRaw = dynamic_cast<indri::lang::RawExtentNode*>(_raw->copy(copier));
01742         indri::lang::RawExtentNode* duplicateContext = 0;
01743 
01744         if( _context ) 
01745           duplicateContext = dynamic_cast<indri::lang::RawExtentNode*>(_context->copy(copier));
01746 
01747         CachedFrequencyScorerNode* duplicate = new CachedFrequencyScorerNode( duplicateRaw,
01748                                                                               duplicateContext );
01749         duplicate->setNodeName( nodeName() );
01750         duplicate->setSmoothing( _smoothing );
01751         duplicate->setList( getList() );
01752 
01753         return copier.after( this, duplicate );
01754       }
01755     };
01756 
01757     class PriorNode : public ScoredExtentNode {
01758     private:
01759       std::string _priorName;
01760 
01761     public:
01762       PriorNode( const std::string& priorName ) :
01763         _priorName( priorName )
01764       {
01765       } 
01766       
01767       std::string queryText() const {
01768         std::stringstream qtext;
01769         qtext << "#prior(" << _priorName << ")";
01770         return qtext.str();
01771       }
01772 
01773       PriorNode( Unpacker& unpacker ) {
01774         _priorName = unpacker.getString( "priorName" );
01775       }
01776 
01777       std::string typeName() const {
01778         return "PriorNode";
01779       }
01780       
01781       UINT64 hashCode() const {
01782         indri::utility::GenericHash<const char*> hash;
01783         return hash( _priorName.c_str() ) + 9;
01784       }
01785       
01786       const std::string& getPriorName() const {
01787         return _priorName;
01788       }
01789 
01790       void walk( Walker& walker ) {
01791         walker.before(this);
01792         walker.after(this);
01793       }
01794 
01795       indri::lang::Node* copy( Copier& copier ) {
01796         copier.before(this);
01797         PriorNode* duplicate = new PriorNode( this->_priorName );
01798         duplicate->setNodeName( nodeName() );
01799         return copier.after(this, duplicate);
01800       }
01801 
01802       void pack( Packer& packer ) {
01803         packer.before(this);
01804         packer.put( "priorName", _priorName );
01805         packer.after(this);
01806       }
01807     };
01808 
01809     /* abstract */ class UnweightedCombinationNode : public ScoredExtentNode {
01810     protected:
01811       std::vector<ScoredExtentNode*> _children;
01812 
01813       void _unpack( Unpacker& unpacker ) {
01814         _children = unpacker.getScoredExtentVector( "children" );
01815       }
01816 
01817       UINT64 _hashCode() const {
01818         UINT64 accumulator = 0;
01819 
01820         for( size_t i=0; i<_children.size(); i++ ) {
01821           accumulator += _children[i]->hashCode();
01822         }
01823 
01824         return accumulator;
01825       }
01826 
01827       template<class _ThisType>
01828       void _walk( _ThisType* ptr, Walker& walker ) {
01829         walker.before(ptr);
01830 
01831         for( size_t i=0; i<_children.size(); i++ ) {
01832           _children[i]->walk(walker);
01833         }
01834         
01835         walker.after(ptr);
01836       }
01837 
01838       template<class _ThisType>
01839       Node* _copy( _ThisType* ptr, Copier& copier ) {
01840         copier.before(ptr);
01841         
01842         _ThisType* duplicate = new _ThisType();
01843         duplicate->setNodeName( nodeName() );
01844         for( size_t i=0; i<_children.size(); i++ ) {
01845           duplicate->addChild( dynamic_cast<ScoredExtentNode*>(_children[i]->copy(copier)) );
01846         } 
01847 
01848         return copier.after(ptr, duplicate);
01849       }
01850 
01851       void _childText( std::stringstream& qtext ) const {
01852         for( size_t i=0; i<_children.size(); i++ ) {
01853           if(i>0) qtext << " ";
01854           qtext << _children[i]->queryText();
01855         }
01856       }
01857 
01858     public:
01859       const std::vector<ScoredExtentNode*>& getChildren() {
01860         return _children;
01861       }
01862 
01863       void addChild( ScoredExtentNode* scoredNode ) {
01864         _children.push_back( scoredNode );
01865       }
01866 
01867       void pack( Packer& packer ) {
01868         packer.before(this);
01869         packer.put( "children", _children );
01870         packer.after(this);
01871       }
01872     };
01873 
01874     /* abstract */ class WeightedCombinationNode : public ScoredExtentNode {
01875     protected:
01876       std::vector< std::pair<double, ScoredExtentNode*> > _children;
01877 
01878       void _unpack( Unpacker& unpacker ) {
01879         std::vector<double> weights = unpacker.getDoubleVector( "weights" );
01880         std::vector<ScoredExtentNode*> nodes = unpacker.getScoredExtentVector( "children" );
01881 
01882         for( size_t i=0; i<weights.size(); i++ ) {
01883           _children.push_back( std::make_pair( weights[i], nodes[i] ) );
01884         }
01885       }
01886 
01887       UINT64 _hashCode() const {
01888         UINT64 accumulator = 0;
01889 
01890         for( size_t i=0; i<_children.size(); i++ ) {
01891           accumulator += (UINT64) (_children[i].first * 1000) + _children[i].second->hashCode();
01892         }
01893 
01894         return accumulator;
01895       }
01896 
01897       template<class _ThisType>
01898       void _walk( _ThisType* ptr, Walker& walker ) {
01899         walker.before(ptr);
01900         for( size_t i=0; i<_children.size(); i++ ) {
01901           _children[i].second->walk(walker);
01902         }
01903         walker.after(ptr);
01904       }
01905 
01906       template<class _ThisType>
01907       Node* _copy( _ThisType* ptr, Copier& copier ) {
01908         copier.before(ptr);
01909 
01910         _ThisType* duplicate = new _ThisType;
01911         duplicate->setNodeName( nodeName() );
01912         for( size_t i=0; i<_children.size(); i++ ) {
01913           double childWeight = _children[i].first;
01914           Node* childCopy = _children[i].second->copy( copier );
01915 
01916           duplicate->addChild( childWeight, dynamic_cast<ScoredExtentNode*>(childCopy) );
01917         }
01918         return copier.after(ptr, duplicate);
01919       }
01920 
01921       void _childText( std::stringstream& qtext ) const {
01922         for( size_t i=0; i<_children.size(); i++ ) {
01923           if(i>0) qtext << " ";
01924           qtext << _children[i].first
01925                 << " "
01926                 << _children[i].second->queryText();
01927         }
01928       }
01929 
01930     public:
01931       const std::vector< std::pair<double, ScoredExtentNode*> >& getChildren() {
01932         return _children;
01933       }
01934 
01935       void addChild( double weight, ScoredExtentNode* scoredNode ) {
01936         _children.push_back( std::make_pair( weight, scoredNode) );
01937       }
01938 
01939       void addChild( const std::string& weight, ScoredExtentNode* scoredNode ) {
01940         addChild( atof( weight.c_str() ), scoredNode );
01941       }
01942 
01943       void pack( Packer& packer ) {
01944         packer.before(this);
01945         
01946         std::vector<double> weights;
01947         std::vector<ScoredExtentNode*> nodes;
01948 
01949         for( size_t i=0; i<_children.size(); i++ ) {
01950           weights.push_back( _children[i].first );
01951           nodes.push_back( _children[i].second );
01952         }
01953 
01954         packer.put( "weights", weights );
01955         packer.put( "children", nodes );
01956         packer.after(this);
01957       }
01958     };
01959 
01960     class OrNode : public UnweightedCombinationNode {
01961     public:
01962       OrNode() {}
01963       OrNode( Unpacker& unpacker ) {
01964         _unpack( unpacker );
01965       }
01966 
01967       std::string typeName() const {
01968         return "OrNode";
01969       }
01970 
01971       std::string queryText() const {
01972         std::stringstream qtext;
01973         qtext << "#or(";
01974         _childText(qtext);
01975         qtext << ")";
01976 
01977         return qtext.str();
01978       } 
01979 
01980       UINT64 hashCode() const {
01981         return 55 + _hashCode();
01982       }
01983 
01984       void walk( Walker& walker ) {
01985         _walk( this, walker );
01986       }
01987       
01988       Node* copy( Copier& copier ) {
01989         return _copy( this, copier );
01990       }
01991     };
01992 
01993     class NotNode : public ScoredExtentNode {
01994     private:
01995       ScoredExtentNode* _child;
01996 
01997     public:
01998       NotNode() {
01999         _child = 0;
02000       }
02001 
02002       NotNode( ScoredExtentNode* child ) {
02003         _child = child;
02004       }
02005 
02006       NotNode( Unpacker& unpacker ) {
02007         _child = unpacker.getScoredExtentNode( "child" );
02008       }
02009 
02010       std::string typeName() const {
02011         return "NotNode";
02012       }
02013 
02014       ScoredExtentNode* getChild() {
02015         return _child;
02016       }
02017 
02018       void setChild( ScoredExtentNode* child ) {
02019         _child = child;
02020       }
02021 
02022       std::string queryText() const {
02023         std::stringstream qtext;
02024         qtext << "#not(";
02025         qtext << _child->queryText();
02026         qtext << ")";
02027 
02028         return qtext.str();
02029       } 
02030 
02031       UINT64 hashCode() const {
02032         return 101 + _child->hashCode();
02033       }
02034 
02035       void walk( Walker& walker ) {
02036         walker.before(this);
02037         _child->walk(walker);
02038         walker.after(this);
02039       }
02040 
02041       void pack( Packer& packer ) {
02042         packer.before(this);
02043         packer.put( "child", _child );
02044         packer.after(this);
02045       }
02046 
02047       Node* copy( Copier& copier ) {
02048         copier.before( this );
02049         ScoredExtentNode* childDuplicate = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
02050         NotNode* duplicate = new NotNode( childDuplicate );
02051         duplicate->setNodeName( nodeName() );
02052         return copier.after( this, duplicate );
02053       }
02054     };
02055 
02056     class MaxNode : public UnweightedCombinationNode {
02057     public:
02058       MaxNode() {}
02059       MaxNode( Unpacker& unpacker ) {
02060         _unpack( unpacker );
02061       }
02062 
02063       std::string typeName() const {
02064         return "MaxNode";
02065       }
02066 
02067       std::string queryText() const {
02068         std::stringstream qtext;
02069         qtext << "#max(";
02070         _childText(qtext);
02071         qtext << ")";
02072 
02073         return qtext.str();
02074       } 
02075 
02076       UINT64 hashCode() const {
02077         return 57 + _hashCode();
02078       }
02079 
02080       void walk( Walker& walker ) {
02081         _walk(this, walker);
02082       }
02083 
02084       Node* copy( Copier& copier ) {
02085         return _copy( this, copier );
02086       }
02087     };
02088 
02089     class CombineNode : public UnweightedCombinationNode {
02090     public:
02091       CombineNode() {}
02092       CombineNode( Unpacker& unpacker ) {
02093         _unpack( unpacker );
02094       }
02095 
02096       std::string typeName() const {
02097         return "CombineNode";
02098       }
02099 
02100       std::string queryText() const {
02101         std::stringstream qtext;
02102         qtext << "#combine(";
02103         _childText(qtext);
02104         qtext << ")";
02105 
02106         return qtext.str();
02107       } 
02108 
02109       UINT64 hashCode() const {
02110         return 59 + _hashCode();
02111       }
02112 
02113       void walk( Walker& walker ) {
02114         _walk( this, walker );
02115       }
02116       
02117       Node* copy( Copier& copier ) {
02118         return _copy( this, copier );
02119       }
02120     };
02121 
02122     class WAndNode : public WeightedCombinationNode {
02123     public:
02124       WAndNode() {}
02125       WAndNode( Unpacker& unpacker ) {
02126         _unpack( unpacker );
02127       }
02128 
02129       std::string typeName() const {
02130         return "WAndNode";
02131       }
02132 
02133       std::string queryText() const {
02134         std::stringstream qtext;
02135         qtext << "#wand(";
02136         _childText(qtext);
02137         qtext << ")";
02138 
02139         return qtext.str();
02140       } 
02141 
02142       UINT64 hashCode() const {
02143         return 61 + _hashCode();
02144       }
02145 
02146       void walk( Walker& walker ) {
02147         _walk( this, walker );
02148       }
02149 
02150       Node* copy( Copier& copier ) {
02151         return _copy( this, copier );
02152       }
02153     };
02154 
02155     class WSumNode : public WeightedCombinationNode {
02156     public:
02157       WSumNode() {}
02158       WSumNode( Unpacker& unpacker ) {
02159         _unpack( unpacker );
02160       }
02161 
02162       std::string typeName() const {
02163         return "WSumNode";
02164       }
02165 
02166       std::string queryText() const {
02167         std::stringstream qtext;
02168         qtext << "#wsum(";
02169         _childText(qtext);
02170         qtext << ")";
02171 
02172         return qtext.str();
02173       } 
02174 
02175       UINT64 hashCode() const {
02176         return 67 + _hashCode();
02177       }
02178 
02179       void walk( Walker& walker ) {
02180         _walk( this, walker );
02181       }
02182 
02183       Node* copy( Copier& copier ) {
02184         return _copy( this, copier );
02185       }
02186     };
02187 
02188     class WeightNode : public WeightedCombinationNode {
02189     public:
02190       WeightNode() {}
02191       WeightNode( Unpacker& unpacker ) {
02192         _unpack( unpacker );
02193       }
02194 
02195       std::string typeName() const {
02196         return "WeightNode";
02197       }
02198 
02199       std::string queryText() const {
02200         std::stringstream qtext;
02201         qtext << "#weight(";
02202         _childText(qtext);
02203         qtext << ")";
02204 
02205         return qtext.str();
02206       }
02207 
02208       UINT64 hashCode() const {
02209         return 71 + _hashCode();
02210       }
02211 
02212       void walk( Walker& walker ) {
02213         _walk( this, walker );
02214       }
02215 
02216       Node* copy( Copier& copier ) {
02217         return _copy( this, copier );
02218       }
02219     };
02220 
02221     class ExtentRestriction : public ScoredExtentNode {
02222     protected:
02223       ScoredExtentNode* _child;
02224       RawExtentNode* _field;
02225 
02226     public:
02227       ExtentRestriction( Unpacker& unpacker ) {
02228         _child = unpacker.getScoredExtentNode("child");
02229         _field = unpacker.getRawExtentNode("field");
02230       }
02231 
02232       ExtentRestriction( ScoredExtentNode* child, RawExtentNode* field ) :
02233         _child(child),
02234         _field(field)
02235       {
02236       }
02237 
02238       virtual std::string typeName() const {
02239         return "ExtentRestriction";
02240       }
02241 
02242       std::string queryText() const {
02243         std::stringstream qtext;
02244         // this extent restriction is almost certainly because of some #combine or #max operator
02245         // in the _child position.  We look for the first parenthesis (e.g. #combine(dog cat)) and
02246         // insert the brackets in.
02247         
02248         std::string childText = _child->queryText();
02249         std::string::size_type pos = childText.find( '(' );
02250 
02251         if( pos != std::string::npos ) {
02252           qtext << childText.substr(0,pos) 
02253                 << "["
02254                 << _field->queryText()
02255                 << "]"
02256                 << childText.substr(pos);
02257         } else {
02258           // couldn't find a parenthesis, so we'll tack the [field] on the front
02259           qtext << "["
02260                 << _field->queryText()
02261                 << "]"
02262                 << childText;
02263         }
02264 
02265         return qtext.str();
02266       }
02267 
02268       virtual UINT64 hashCode() const {
02269         return 79 + _child->hashCode() * 7 + _field->hashCode();
02270       }
02271 
02272       ScoredExtentNode* getChild() {
02273         return _child;
02274       }
02275 
02276       RawExtentNode* getField() {
02277         return _field;
02278       }
02279 
02280       void setChild( ScoredExtentNode* child ) {
02281         _child = child;
02282       }
02283 
02284       void setField( RawExtentNode* field ) {
02285         _field = field;
02286       }
02287       
02288       void pack( Packer& packer ) {
02289         packer.before(this);
02290         packer.put("child", _child);
02291         packer.put("field", _field);
02292         packer.after(this);
02293       }
02294 
02295       void walk( Walker& walker ) {
02296         walker.before(this);
02297         _child->walk(walker);
02298         _field->walk(walker);
02299         walker.after(this);
02300       }
02301 
02302       virtual Node* copy( Copier& copier ) {
02303         copier.before(this);
02304 
02305         ScoredExtentNode* duplicateChild = dynamic_cast<indri::lang::ScoredExtentNode*>(_child->copy(copier));
02306         RawExtentNode* duplicateField = dynamic_cast<indri::lang::RawExtentNode*>(_field->copy(copier));
02307         ExtentRestriction* duplicate = new ExtentRestriction( duplicateChild, duplicateField );
02308         duplicate->setNodeName( nodeName() );
02309         
02310         return copier.after(this, duplicate);
02311       }
02312     };
02313 
02314     class FixedPassage : public ScoredExtentNode {
02315     private:
02316       ScoredExtentNode* _child;
02317       int _windowSize;
02318       int _increment;
02319 
02320     public:
02321       FixedPassage( Unpacker& unpacker ) {
02322         _child = unpacker.getScoredExtentNode("child");
02323         _windowSize = (int)unpacker.getInteger("windowSize");
02324         _increment = (int)unpacker.getInteger("increment");
02325       }
02326 
02327       FixedPassage( ScoredExtentNode* child, int windowSize, int increment ) :
02328         _child(child),
02329         _windowSize(windowSize),
02330         _increment(increment)
02331       {
02332       }
02333 
02334       std::string typeName() const {
02335         return "FixedPassage";
02336       }
02337 
02338       std::string queryText() const {
02339         std::stringstream qtext;
02340         // this extent restriction is almost certainly because of some #combine or #max operator
02341         // in the _child position.  We look for the first parenthesis (e.g. #combine(dog cat)) and
02342         // insert the brackets in.
02343         
02344         std::string childText = _child->queryText();
02345         std::string::size_type pos = childText.find( '(' );
02346 
02347         if( pos != std::string::npos ) {
02348           qtext << childText.substr(0,pos) 
02349                 << "[passage"
02350                 << _windowSize
02351                 << ":"
02352                 << _increment
02353                 << "]"
02354                 << childText.substr(pos);
02355         } else {
02356           // couldn't find a parenthesis, so we'll tack the [field] on the front
02357           qtext << "[passage"
02358                 << _windowSize
02359                 << ":"
02360                 << _increment
02361                 << "]"
02362                 << childText;
02363         }
02364 
02365         return qtext.str();
02366       } 
02367 
02368       UINT64 hashCode() const {
02369         return 83 + _child->hashCode() + _windowSize * 3 + _increment;
02370       }
02371 
02372       ScoredExtentNode* getChild() {
02373         return _child;
02374       }
02375 
02376       int getWindowSize() {
02377         return _windowSize;
02378       }
02379 
02380       int getIncrement() {
02381         return _increment;
02382       }
02383 
02384       void setChild( ScoredExtentNode* child ) {
02385         _child = child;
02386       }
02387 
02388       void setWindowSize( int windowSize ) {
02389         _windowSize = windowSize;
02390       }
02391 
02392       void setIncrement( int increment ) {
02393         _increment = increment;
02394       }
02395 
02396       void pack( Packer& packer ) {
02397         packer.before(this);
02398         packer.put("child", _child);
02399         packer.put("increment", _increment);
02400         packer.put("windowSize", _windowSize);
02401         packer.after(this);
02402       }
02403 
02404       void walk( Walker& walker ) {
02405         walker.before(this);
02406         _child->walk(walker);
02407         walker.after(this);
02408       }
02409 
02410       Node* copy( Copier& copier ) {
02411         copier.before(this);
02412 
02413         ScoredExtentNode* duplicateChild = dynamic_cast<indri::lang::ScoredExtentNode*>(_child->copy(copier));
02414         FixedPassage* duplicate = new FixedPassage( duplicateChild,
02415                                                     _windowSize,
02416                                                     _increment );
02417         duplicate->setNodeName( nodeName() );
02418         
02419         return copier.after(this, duplicate);
02420       }
02421     };
02422 
02423     class FilterNode : public ScoredExtentNode {
02424     private:
02425       ScoredExtentNode* _child;
02426       std::vector<lemur::api::DOCID_T> _documents;
02427 
02428     public:
02429       FilterNode( ScoredExtentNode* child, std::vector<lemur::api::DOCID_T>& documents ) : 
02430         _child(child),
02431         _documents(documents)
02432       {
02433       }
02434 
02435       FilterNode( Unpacker& unpacker ) {
02436         _child = unpacker.getScoredExtentNode( "child" );
02437         _documents = unpacker.getDocIdVector( "documents" );
02438       }
02439 
02440       std::string typeName() const {
02441         return "FilterNode";
02442       }
02443 
02444       ScoredExtentNode* getChild() {
02445         return _child;
02446       }
02447 
02448       const std::vector<lemur::api::DOCID_T>& getDocuments() const {
02449         return _documents;
02450       }
02451 
02452       void pack( Packer& packer ) {
02453         packer.before(this);
02454         packer.put("child", _child);
02455         packer.put("documents", _documents);
02456         packer.after(this);
02457       }
02458 
02459       void walk( Walker& walker ) {
02460         walker.before(this);
02461         _child->walk(walker);
02462         walker.after(this);
02463       }
02464 
02465       std::string queryText() const {
02466         // for now, we'll let the filter be anonymous, since it can never
02467         // be typed by the user
02468         return _child->queryText();
02469       }
02470 
02471       UINT64 hashCode() const {
02472         UINT64 documentSum = 0;
02473 
02474         for( size_t i=0; i<_documents.size(); i++ ) {
02475           documentSum += _documents[i];
02476         }
02477 
02478         return 87 + _child->hashCode() + documentSum;
02479       }
02480 
02481       Node* copy( Copier& copier ) {
02482         copier.before(this);
02483 
02484         ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_child->copy(copier));
02485         FilterNode* duplicate = new FilterNode( duplicateChild, _documents );
02486         duplicate->setNodeName( nodeName() );
02487 
02488         return copier.after(this, duplicate);
02489       }
02490     };
02491 
02492     class ListAccumulator : public AccumulatorNode {
02493     private:
02494       RawExtentNode* _raw;
02495      
02496     public:
02497       ListAccumulator( RawExtentNode* raw ) :
02498         _raw(raw)
02499       {
02500       }
02501 
02502       ListAccumulator( Unpacker& unpacker ) {
02503         _raw = unpacker.getRawExtentNode( "raw" );
02504       }
02505 
02506       std::string typeName() const {
02507         return "ListAccumulator";
02508       }
02509 
02510       std::string queryText() const {
02511         return _raw->queryText();
02512       }
02513 
02514       UINT64 hashCode() const {
02515         // we don't use hashCodes for accumulatorNodes
02516         return 0;
02517       }
02518 
02519       RawExtentNode* getRawExtent() {
02520         return _raw;
02521       }
02522 
02523       void pack( Packer& packer ) {
02524         packer.before(this);
02525         packer.put( "raw", _raw );
02526         packer.after(this);
02527       }
02528 
02529       void walk( Walker& walker ) {
02530         walker.before(this);
02531         _raw->walk( walker );
02532         walker.after(this);
02533       }
02534 
02535       Node* copy( Copier& copier ) {
02536         copier.before(this);
02537         RawExtentNode* rawCopy = dynamic_cast<RawExtentNode*>(_raw->copy( copier ));
02538         ListAccumulator* duplicate = new ListAccumulator( rawCopy );
02539         duplicate->setNodeName( nodeName() );
02540         return copier.after(this, duplicate);
02541       }
02542     };
02543 
02544     class ContextCounterNode : public AccumulatorNode {
02545     private:
02546       RawExtentNode* _raw;
02547       RawExtentNode* _context;
02548       bool _hasCounts;
02549       bool _hasContextSize;
02550       double _occurrences;
02551       double _contextSize;
02552       int _documentOccurrences; // number of documents we occur in
02553       int _documentCount; // total number of documents
02554 
02555     public:
02556       ContextCounterNode( RawExtentNode* raw, RawExtentNode* context ) :
02557         _hasCounts(false),
02558         _hasContextSize(false),
02559         _occurrences(0),
02560         _contextSize(0),
02561         _documentOccurrences(0),
02562         _documentCount(0)
02563       {
02564         _raw = raw;
02565         _context = context;
02566       }
02567 
02568       ContextCounterNode( Unpacker& unpacker ) {
02569         _raw = unpacker.getRawExtentNode( "raw" );
02570         _context = unpacker.getRawExtentNode( "context" );
02571         _occurrences = unpacker.getDouble( "occurrences" );
02572         _contextSize = unpacker.getDouble( "contextSize" );
02573         _documentOccurrences = unpacker.getInteger( "documentOccurrences" );
02574         _documentCount = unpacker.getInteger( "documentCount" );
02575 
02576         _hasCounts = unpacker.getBoolean( "hasCounts" );
02577         _hasContextSize = unpacker.getBoolean( "hasContextSize" );
02578       }
02579 
02580       std::string typeName() const {
02581         return "ContextCounterNode";
02582       }
02583 
02584       std::string queryText() const {
02585         std::stringstream qtext;
02586         
02587         if( _raw )
02588           qtext << _raw->queryText();
02589 
02590         if( _context ) {
02591           // if we haven't added a period yet, put one in
02592           int dot = (int)qtext.str().find('.');
02593           if( dot < 0 )
02594             qtext << '.';
02595 
02596           qtext << "(" << _context->queryText() << ")";
02597         }
02598 
02599         return qtext.str();
02600       }
02601 
02602       UINT64 hashCode() const {
02603         // we don't use hashCodes for accumulatorNodes
02604         return 0;
02605       }
02606 
02607       RawExtentNode* getContext() {
02608         return _context;
02609       }
02610 
02611       RawExtentNode* getRawExtent() {
02612         return _raw;
02613       }
02614 
02615       void setRawExtent( RawExtentNode* rawExtent ) {
02616         _raw = rawExtent;
02617       }
02618 
02619       void setContext( RawExtentNode* context ) {
02620         _context = context;
02621       }
02622 
02623       void pack( Packer& packer ) {
02624         packer.before(this);
02625         packer.put( "raw", _raw );
02626         packer.put( "context", _context );
02627         packer.put( "occurrences", _occurrences );
02628         packer.put( "contextSize", _contextSize );
02629         packer.put( "documentOccurrences", _documentOccurrences );
02630         packer.put( "documentCount", _documentCount );
02631 
02632         packer.put( "hasCounts", _hasCounts );
02633         packer.put( "hasContextSize", _hasContextSize );
02634         packer.after(this);
02635       }
02636 
02637       void walk( Walker& walker ) {
02638         walker.before(this);
02639         if( _raw ) _raw->walk(walker);
02640         if( _context ) _context->walk(walker);
02641         walker.after(this);
02642       }
02643 
02644       Node* copy( Copier& copier ) {
02645         copier.before(this);
02646         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
02647         RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
02648         ContextCounterNode* duplicate = new ContextCounterNode(*this);
02649         duplicate->setContext(duplicateContext);
02650         duplicate->setRawExtent(duplicateRaw);
02651         return copier.after(this, duplicate);
02652       }
02653 
02654       bool hasCounts() const {
02655         return _hasCounts;
02656       }
02657 
02658       bool hasContextSize() const {
02659         return _hasContextSize;
02660       }
02661 
02662       double getOccurrences() const {
02663         return _occurrences;
02664       }
02665 
02666       double getContextSize() const {
02667         return _contextSize;
02668       }
02669 
02670       int getDocumentOccurrences() const {
02671         return _documentOccurrences;
02672       }
02673 
02674       int getDocumentCount() const {
02675         return _documentCount;
02676       }
02677 
02678       void setContextSize( double contextSize ) {
02679         _contextSize = contextSize;
02680         _hasContextSize = true;
02681       }
02682 
02683       void setCounts( double occurrences,
02684                       double contextSize, int documentOccurrences, 
02685                       int documentCount ) {
02686         _hasCounts = true;
02687         _occurrences = occurrences;
02688         setContextSize( contextSize );
02689         _documentOccurrences = documentOccurrences;
02690         _documentCount = documentCount;
02691       }
02692     };
02693 
02694     class ContextSimpleCounterNode : public AccumulatorNode {
02695     private:
02696       std::vector<std::string> _terms;
02697       std::string _field;
02698       std::string _context;
02699 
02700       bool _hasCounts;
02701       bool _hasContextSize;
02702       double _occurrences;
02703       double _contextSize;
02704       int _documentOccurrences; // number of documents we occur in
02705       int _documentCount; // total number of documents
02706 
02707     public:
02708       ContextSimpleCounterNode( const std::vector<std::string>& terms, const std::string& field, const std::string& context ) :
02709         _hasCounts(false),
02710         _hasContextSize(false),
02711         _occurrences(0),
02712         _contextSize(0),
02713         _terms(terms),
02714         _field(field),
02715         _context(context),
02716         _documentOccurrences(0),
02717         _documentCount(0)
02718       {
02719       }
02720 
02721       ContextSimpleCounterNode( Unpacker& unpacker ) {
02722         _occurrences = unpacker.getDouble( "occurrences" );
02723         _contextSize = unpacker.getDouble( "contextSize" );
02724 
02725         _terms = unpacker.getStringVector( "terms" );
02726         _field = unpacker.getString( "field" );
02727         _context = unpacker.getString( "context" );
02728         _documentOccurrences = unpacker.getInteger( "documentOccurrences" );
02729         _documentCount = unpacker.getInteger( "documentCount" );
02730 
02731         _hasCounts = unpacker.getBoolean( "hasCounts" );
02732         _hasContextSize = unpacker.getBoolean( "hasContextSize" );
02733       }
02734 
02735       std::string typeName() const {
02736         return "ContextSimpleCounterNode";
02737       }
02738 
02739       std::string queryText() const {
02740         // nothing to see here -- this is an optimization node
02741         return std::string();
02742       }
02743 
02744       UINT64 hashCode() const {
02745         // we don't use hashCodes for accumulatorNodes
02746         return 0;
02747       }
02748 
02749       void pack( Packer& packer ) {
02750         packer.before(this);
02751         packer.put( "occurrences", _occurrences );
02752         packer.put( "contextSize", _contextSize );
02753         packer.put( "documentOccurrences", _documentOccurrences );
02754         packer.put( "documentCount", _documentCount );
02755 
02756         packer.put( "terms", _terms );
02757         packer.put( "field", _field );
02758         packer.put( "context", _context );
02759 
02760         packer.put( "hasCounts", _hasCounts );
02761         packer.put( "hasContextSize", _hasContextSize );
02762         packer.after(this);
02763       }
02764 
02765       void walk( Walker& walker ) {
02766         walker.before(this);
02767         walker.after(this);
02768       }
02769 
02770       Node* copy( Copier& copier ) {
02771         copier.before(this);
02772         ContextSimpleCounterNode* duplicate = new ContextSimpleCounterNode(*this);
02773         return copier.after(this, duplicate);
02774       }
02775 
02776       bool hasCounts() const {
02777         return _hasCounts;
02778       }
02779 
02780       bool hasContextSize() const {
02781         return _hasContextSize;
02782       }
02783 
02784       double getOccurrences() const {
02785         return _occurrences;
02786       }
02787 
02788       double getContextSize() const {
02789         return _contextSize;
02790       }
02791 
02792       int getDocumentOccurrences() const {
02793         return _documentOccurrences;
02794       }
02795 
02796       int getDocumentCount() const {
02797         return _documentCount;
02798       }
02799 
02800       const std::vector<std::string>& terms() const {
02801         return _terms;
02802       }
02803 
02804       const std::string& field() const {
02805         return _field;
02806       }
02807 
02808       const std::string& context() const {
02809         return _context;
02810       }
02811 
02812       void setContextSize( double contextSize ) {
02813         _contextSize = contextSize;
02814         _hasContextSize = true;
02815       }
02816 
02817       void setCounts( double occurrences,
02818                       double contextSize, int documentOccurrences, 
02819                       int documentCount ) {
02820         _hasCounts = true;
02821         _occurrences = occurrences;
02822         setContextSize( contextSize );
02823         _documentOccurrences = documentOccurrences;
02824         _documentCount = documentCount;
02825       }
02826     };
02827 
02828     class ScoreAccumulatorNode : public AccumulatorNode {
02829     private:
02830       ScoredExtentNode* _scoredNode;
02831 
02832     public:
02833       ScoreAccumulatorNode( ScoredExtentNode* scoredNode ) :
02834         _scoredNode(scoredNode)
02835       {
02836       }
02837 
02838       ScoreAccumulatorNode( Unpacker& unpacker ) {
02839         _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02840       }
02841 
02842       std::string typeName() const {
02843         return "ScoreAccumulatorNode";
02844       }
02845 
02846       std::string queryText() const {
02847         // anonymous
02848         return _scoredNode->queryText();
02849       }
02850 
02851       UINT64 hashCode() const {
02852         // we don't use hashCodes for accumulatorNodes
02853         return 0;
02854       }
02855 
02856       ScoredExtentNode* getChild() {
02857         return _scoredNode;
02858       }
02859 
02860       void pack( Packer& packer ) {
02861         packer.before(this);
02862         packer.put( "scoredNode", _scoredNode );
02863         packer.after(this);
02864       }
02865 
02866       void walk( Walker& walker ) {
02867         walker.before(this);
02868         _scoredNode->walk(walker);
02869         walker.after(this);
02870       }
02871 
02872       Node* copy( Copier& copier ) {
02873         copier.before(this);
02874         ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02875         ScoreAccumulatorNode* duplicate = new ScoreAccumulatorNode(duplicateChild);
02876         duplicate->setNodeName( nodeName() );
02877         return copier.after(this, duplicate);
02878       }
02879     };
02880 
02881     class AnnotatorNode : public AccumulatorNode {
02882     private:
02883       ScoredExtentNode* _scoredNode;
02884 
02885     public:
02886       AnnotatorNode( ScoredExtentNode* scoredNode ) :
02887         _scoredNode(scoredNode)
02888       {
02889       }
02890 
02891       AnnotatorNode( Unpacker& unpacker ) {
02892         _scoredNode = unpacker.getScoredExtentNode( "scoredNode" );
02893       }
02894 
02895       std::string typeName() const {
02896         return "AnnotatorNode";
02897       }
02898 
02899       std::string queryText() const {
02900         return _scoredNode->queryText();
02901       }
02902 
02903       UINT64 hashCode() const {
02904         // we don't use hashCodes for accumulatorNodes
02905         return 0;
02906       }
02907 
02908       ScoredExtentNode* getChild() {
02909         return _scoredNode;
02910       }
02911 
02912       void pack( Packer& packer ) {
02913         packer.before(this);
02914         packer.put( "scoredNode", _scoredNode );
02915         packer.after(this);
02916       }
02917 
02918       void walk( Walker& walker ) {
02919         walker.before(this);
02920         _scoredNode->walk(walker);
02921         walker.after(this);
02922       }
02923 
02924       Node* copy( Copier& copier ) {
02925         copier.before(this);
02926         ScoredExtentNode* duplicateChild = dynamic_cast<ScoredExtentNode*>(_scoredNode->copy(copier));
02927         AnnotatorNode* duplicate = new AnnotatorNode(duplicateChild);
02928         duplicate->setNodeName( nodeName() );
02929         return copier.after(this, duplicate);
02930       }
02931     };
02932 
02933     
02934     class FieldWildcard : public RawExtentNode {
02935     private:
02936 
02937     public:
02938       FieldWildcard(  )
02939       {
02940       }
02941 
02942       FieldWildcard( Unpacker& unpacker ) {
02943       }
02944 
02945       bool operator== ( Node& o ) {
02946         FieldWildcard* other = dynamic_cast<FieldWildcard*>(&o);
02947         return other != NULL; // all instances are ==
02948       }
02949 
02950       std::string typeName() const {
02951         return "FieldWildcard";
02952       }
02953 
02954       std::string queryText() const {
02955         std::stringstream qtext;
02956         qtext << "*";
02957 
02958         return qtext.str();
02959       }
02960 
02961 
02962       void pack( Packer& packer ) {
02963         packer.before(this);
02964         packer.after(this);
02965       }
02966 
02967       void walk( Walker& walker ) {
02968         walker.before(this);
02969         walker.after(this);
02970       }
02971 
02972       UINT64 hashCode() const {
02973         return 103;//???????????????
02974       }
02975 
02976       Node* copy( Copier& copier ) {
02977         copier.before(this);
02978 
02979         FieldWildcard* fieldWildcardCopy = new FieldWildcard;
02980         fieldWildcardCopy->setNodeName( nodeName() );
02981 
02982         return copier.after(this, fieldWildcardCopy);
02983       }
02984     };
02985 
02986     class NestedExtentInside : public ExtentInside {
02987 
02988     public:
02989       NestedExtentInside( RawExtentNode* inner, RawExtentNode* outer ) :
02990         ExtentInside( inner, outer )
02991       {
02992       }
02993 
02994       NestedExtentInside( Unpacker& unpacker ):
02995         ExtentInside( unpacker ) 
02996       { 
02997       }
02998 
02999       bool operator== ( Node& o ) {
03000         NestedExtentInside* other = dynamic_cast<NestedExtentInside*>(&o);
03001   
03002         return other &&
03003           *_inner == *other->_inner &&
03004           *_outer == *other->_outer;
03005       }
03006       
03007       std::string typeName() const {
03008         return "NestedExtentInside";
03009       }
03010 
03011       UINT64 hashCode() const {
03012         return 107 + _inner->hashCode() + (_inner->hashCode() * 7);//???????????????
03013       }
03014 
03015       Node* copy( Copier& copier ) {
03016         copier.before(this);
03017         
03018         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
03019         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
03020         NestedExtentInside* extentInsideCopy = new NestedExtentInside( newInner, newOuter );
03021         extentInsideCopy->setNodeName( nodeName() );
03022 
03023         return copier.after(this, extentInsideCopy);
03024       }
03025     };
03026 
03027     class NestedRawScorerNode : public RawScorerNode {
03028 
03029     public:
03030       NestedRawScorerNode( RawExtentNode* raw, RawExtentNode* context, std::string smoothing = "method:dirichlet,mu:2500" ) :
03031         RawScorerNode( raw, context, smoothing )
03032       {
03033       }
03034 
03035       NestedRawScorerNode( Unpacker& unpacker ) :
03036         RawScorerNode( unpacker )
03037       {
03038       }
03039 
03040       std::string typeName() const {
03041         return "NestedRawScorerNode";
03042       }
03043 
03044       UINT64 hashCode() const {
03045         UINT64 hash = 0;
03046 
03047         hash += 105;
03048         hash += _raw->hashCode();
03049 
03050         if( _context ) {
03051           hash += _context->hashCode();
03052         }
03053 
03054         indri::utility::GenericHash<const char*> gh;
03055         hash += gh( _smoothing.c_str() );
03056 
03057         return hash;
03058       }
03059 
03060       Node* copy( Copier& copier ) {
03061         copier.before(this);
03062 
03063         RawExtentNode* duplicateContext = _context ? dynamic_cast<RawExtentNode*>(_context->copy(copier)) : 0;
03064         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
03065         NestedRawScorerNode* duplicate = new NestedRawScorerNode(*this);
03066         duplicate->setRawExtent( duplicateRaw );
03067         duplicate->setContext( duplicateContext );
03068 
03069         return copier.after(this, duplicate);
03070       }
03071     };
03072    
03073 
03074     class ExtentEnforcement : public ExtentRestriction {
03075 
03076     public:
03077       ExtentEnforcement( Unpacker& unpacker ) :
03078         ExtentRestriction( unpacker ) {
03079       }
03080 
03081       ExtentEnforcement( ScoredExtentNode* child, RawExtentNode* field ) :
03082         ExtentRestriction( child, field )
03083       {
03084       }
03085 
03086       std::string typeName() const {
03087         return "ExtentEnforcement";
03088       }
03089 
03090       
03091       UINT64 hashCode() const {
03092         return 109 + _child->hashCode() * 7 + _field->hashCode();//??????????????
03093       }
03094 
03095 
03096       Node* copy( Copier& copier ) {
03097         copier.before(this);
03098 
03099         ScoredExtentNode* duplicateChild = dynamic_cast<indri::lang::ScoredExtentNode*>(_child->copy(copier));
03100         RawExtentNode* duplicateField = dynamic_cast<indri::lang::RawExtentNode*>(_field->copy(copier));
03101         ExtentEnforcement* duplicate = new ExtentEnforcement( duplicateChild, duplicateField );
03102         duplicate->setNodeName( nodeName() );
03103         
03104         return copier.after(this, duplicate);
03105       }
03106     };
03107 
03108     class ContextInclusionNode : public ScoredExtentNode {
03109     protected:
03110       std::vector<ScoredExtentNode*> _children;
03111       ScoredExtentNode* _preserveExtentsChild;
03112 
03113       void _unpack( Unpacker& unpacker ) {
03114         _children = unpacker.getScoredExtentVector( "children" );
03115         _preserveExtentsChild = unpacker.getScoredExtentNode( "preserveExtentsChild" );
03116       }
03117 
03118       UINT64 _hashCode() const {
03119         UINT64 accumulator = 0;
03120 
03121         for( size_t i=0; i<_children.size(); i++ ) {
03122           accumulator += _children[i]->hashCode();
03123         }
03124 
03125         return accumulator;
03126       }
03127 
03128       template<class _ThisType>
03129       void _walk( _ThisType* ptr, Walker& walker ) {
03130         walker.before(ptr);
03131 
03132         for( size_t i=0; i<_children.size(); i++ ) {
03133           _children[i]->walk(walker);
03134         }
03135         
03136         walker.after(ptr);
03137       }
03138 
03139       template<class _ThisType>
03140       Node* _copy( _ThisType* ptr, Copier& copier ) {
03141         copier.before(ptr);
03142         
03143         _ThisType* duplicate = new _ThisType();
03144         duplicate->setNodeName( nodeName() );
03145         for( size_t i=0; i<_children.size(); i++ ) {
03146           bool preserveExtents = false;
03147           if ( _preserveExtentsChild == _children[i] ) {
03148             preserveExtents = true;
03149           }
03150           duplicate->addChild( dynamic_cast<ScoredExtentNode*>(_children[i]->copy(copier)), preserveExtents );
03151         } 
03152 
03153         return copier.after(ptr, duplicate);
03154       }
03155 
03156       void _childText( std::stringstream& qtext ) const {
03157         if ( _preserveExtentsChild != 0 ) {
03158           qtext << _preserveExtentsChild->queryText() << " ";
03159         }
03160         for( size_t i=0; i<_children.size(); i++ ) {
03161           if ( _children[i] != _preserveExtentsChild ) {
03162             if(i>0) qtext << " ";
03163             qtext << _children[i]->queryText();
03164           }
03165         }
03166       }
03167 
03168     public:
03169       ContextInclusionNode( ) { }
03170       ContextInclusionNode( Unpacker & unpacker ) {
03171         _unpack( unpacker );
03172       }
03173 
03174       const std::vector<ScoredExtentNode*>& getChildren() {
03175         return _children;
03176       }
03177       
03178       ScoredExtentNode * getPreserveExtentsChild() {
03179         return _preserveExtentsChild;
03180       }
03181 
03182       void addChild( ScoredExtentNode* scoredNode, bool preserveExtents = false ) {
03183         if (preserveExtents == true) {
03184           _preserveExtentsChild = scoredNode;
03185         }       
03186         _children.push_back( scoredNode );
03187       }
03188 
03189       std::string typeName() const {
03190         return "ContextInclusionNode";
03191       }
03192 
03193       std::string queryText() const {
03194         std::stringstream qtext;
03195         qtext << "#context(";
03196         _childText(qtext);
03197         qtext << ")";
03198 
03199         return qtext.str();
03200       } 
03201 
03202       virtual UINT64 hashCode() const {
03203         return 111 + _hashCode();//?????????????
03204       }
03205 
03206       void pack( Packer& packer ) {
03207         packer.before(this);
03208         packer.put( "children", _children );
03209         packer.put( "preserveExtentsChild", _preserveExtentsChild);
03210         packer.after(this);
03211       }
03212 
03213       void walk( Walker& walker ) {
03214         _walk( this, walker );
03215       }
03216       
03217       Node* copy( Copier& copier ) {
03218         return _copy( this, copier );
03219       }
03220     };
03221 
03222     class LengthPrior : public ScoredExtentNode {
03223     private:
03224       double _exponent;
03225       ScoredExtentNode * _child;
03226 
03227     public:
03228       LengthPrior(ScoredExtentNode * child, double exponent) :
03229         _child(child), 
03230         _exponent(exponent)
03231       {
03232         
03233       }
03234 
03235       LengthPrior( Unpacker& unpacker ) {
03236         _exponent = unpacker.getDouble( "exponent" );
03237         _child = unpacker.getScoredExtentNode( "child" );
03238       }
03239 
03240       std::string queryText() const {
03241         std::stringstream qtext;
03242         // with the definition of priors somewhat in flux, it's
03243         // hard to know what would be good to put here.  It's also
03244         // a little hard when there realy isn't a way to 
03245         // specify this in either of the indri/nexi query languages.
03246         qtext <<  "#lengthprior(" << _exponent << ")";
03247         return qtext.str();
03248       }
03249 
03250       std::string nodeType() {
03251         return "LengthPrior";
03252       }
03253 
03254       void setExponent( double exponent ) {
03255         _exponent = exponent;
03256       }
03257 
03258       double getExponent() {
03259         return _exponent;
03260       }
03261 
03262       ScoredExtentNode* getChild() {
03263         return _child;
03264       }
03265       
03266       virtual UINT64 hashCode() const {
03267         return 115; //?????????
03268       }
03269 
03270       void walk( Walker& walker ) {
03271         walker.before(this);
03272         _child->walk(walker);
03273         walker.after(this);
03274       }
03275 
03276       indri::lang::Node* copy( Copier& copier ) {
03277         copier.before(this);
03278         ScoredExtentNode * childCopy = dynamic_cast<ScoredExtentNode*> (_child->copy( copier ) );
03279         LengthPrior* duplicate = new LengthPrior( childCopy, _exponent );
03280         return copier.after(this, duplicate);
03281       }
03282 
03283       void pack( Packer& packer ) {
03284         packer.before(this);
03285         packer.put( "exponent", _exponent );
03286         packer.put( "child", _child );
03287         packer.after(this);
03288       }
03289     };
03290 
03291     class DocumentStructureNode : public Node {
03292     public:
03293       
03294       DocumentStructureNode( ) {        
03295       }
03296 
03297       DocumentStructureNode( Unpacker& unpacker ) {     
03298       }
03299 
03300       UINT64 hashCode() const {
03301         return 117; //?????????
03302       }
03303 
03304       bool operator== ( Node& o ) {
03305         DocumentStructureNode* other = dynamic_cast<DocumentStructureNode*>(&o);
03306         return other != NULL; // all instances are ==
03307       }
03308 
03309       std::string typeName() const {
03310         return "DocumentStructure";
03311       }
03312       
03313       void pack( Packer& packer ) {
03314         packer.before(this);
03315         packer.after(this);
03316       }
03317 
03318       void walk( Walker& walker ) {
03319         walker.before(this);
03320         walker.after(this);
03321       }
03322 
03323       std::string queryText() const {
03324         return "";
03325       }
03326 
03327       Node* copy( Copier& copier ) {
03328         copier.before(this);
03329 
03330         DocumentStructureNode* documentStructureCopy = new DocumentStructureNode;
03331         documentStructureCopy->setNodeName( nodeName() );
03332 
03333         return copier.after(this, documentStructureCopy);
03334       }
03335     };
03336 
03337     class ShrinkageScorerNode : public RawScorerNode {
03338     private:
03339       DocumentStructureNode* _documentStructure;
03340       
03341       std::vector<std::string> _shrinkageRules;
03342 
03343     public:
03344       ShrinkageScorerNode( RawExtentNode* raw, 
03345                            DocumentStructureNode* documentStructure, 
03346                            std::string smoothing = "method:dirichlet,mu:2500" )
03347         : RawScorerNode( raw, 0, smoothing ),
03348           _documentStructure( documentStructure ), 
03349           _shrinkageRules( 0 )
03350       {
03351       }
03352 
03353       ShrinkageScorerNode( Unpacker& unpacker ) : RawScorerNode( 0, 0, "" ) {
03354         _raw = unpacker.getRawExtentNode( "raw" );
03355         _documentStructure = unpacker.getDocumentStructureNode( "documentStructureNode" );
03356         _occurrences = unpacker.getDouble( "occurrences" );
03357         _contextSize = unpacker.getDouble( "contextSize" );
03358         _smoothing = unpacker.getString( "smoothing" );
03359         _shrinkageRules = unpacker.getStringVector( "shrinkageRules" );
03360       }
03361 
03362       std::string typeName() const {
03363         return "ShrinkageScorerNode";
03364       }
03365 
03366       std::string queryText() const {
03367         std::stringstream qtext;
03368         
03369         qtext << _raw->queryText();
03370 
03371         return qtext.str();
03372       }
03373 
03374       void addShrinkageRule( std::string rule ) {
03375         _shrinkageRules.push_back( rule );
03376       }
03377       
03378       std::vector<std::string> getShrinkageRules() {
03379         return _shrinkageRules;
03380       }
03381 
03382       void setDocumentStructure( DocumentStructureNode* docStruct ) {
03383         _documentStructure = docStruct;
03384       }
03385 
03386       DocumentStructureNode* getDocumentStructure() {
03387         return _documentStructure;
03388       }
03389 
03390 
03391       UINT64 hashCode() const {
03392         UINT64 hash = 0;
03393 
03394         hash += 119;
03395         hash += _raw->hashCode();
03396 
03397         if( _context ) {
03398           hash += _context->hashCode();
03399         }
03400 
03401         indri::utility::GenericHash<const char*> gh;
03402         hash += gh( _smoothing.c_str() );
03403 
03404         return hash;
03405       }
03406 
03407       void pack( Packer& packer ) {
03408         packer.before(this);
03409         packer.put( "raw", _raw );
03410         packer.put( "documentStructure", _documentStructure );
03411         packer.put( "occurrences", _occurrences );
03412         packer.put( "contextSize", _contextSize );
03413         packer.put( "smoothing", _smoothing );
03414         packer.put( "shrinkageRules", _shrinkageRules );
03415         packer.after(this);
03416       }
03417 
03418       void walk( Walker& walker ) {
03419         walker.before(this);
03420         if( _raw )
03421           _raw->walk(walker);
03422         if( _documentStructure ) 
03423           _documentStructure->walk(walker);
03424         walker.after(this);
03425       }
03426 
03427       Node* copy( Copier& copier ) {
03428         copier.before(this);
03429 
03430         RawExtentNode* duplicateRaw = _raw ? dynamic_cast<RawExtentNode*>(_raw->copy(copier)) : 0;
03431         DocumentStructureNode* duplicateDocStruct = _documentStructure ? dynamic_cast<DocumentStructureNode*>(_documentStructure->copy(copier)) : 0;
03432         ShrinkageScorerNode* duplicate = new ShrinkageScorerNode(*this);
03433         duplicate->setRawExtent( duplicateRaw );
03434         duplicate->setDocumentStructure( duplicateDocStruct );
03435 
03436         std::vector<std::string>::iterator ruleIter = _shrinkageRules.begin();
03437         while( ruleIter != _shrinkageRules.end() ) {
03438           duplicate->addShrinkageRule( *ruleIter );
03439           ruleIter++;
03440         }
03441 
03442         return copier.after(this, duplicate);
03443       }
03444     };
03445 
03446 
03447 
03448     class ExtentDescendant : public ExtentInside {
03449     protected:
03450 
03451       DocumentStructureNode* _documentStructure;
03452       
03453     public:
03454       ExtentDescendant( RawExtentNode* inner, RawExtentNode* outer, DocumentStructureNode * docStruct ) :
03455         ExtentInside( inner, outer ),
03456         _documentStructure( docStruct)
03457       {
03458       }
03459 
03460       ExtentDescendant( Unpacker& unpacker ):
03461         ExtentInside( unpacker ) 
03462       { 
03463         _documentStructure = unpacker.getDocumentStructureNode( "documentStructureNode" );
03464       }
03465 
03466       bool operator== ( Node& o ) {
03467         ExtentDescendant* other = dynamic_cast<ExtentDescendant*>(&o);
03468   
03469         return other &&
03470           *_inner == *other->_inner &&
03471           *_outer == *other->_outer &&
03472           *_documentStructure == *other->_documentStructure;
03473       }
03474       
03475       void setDocumentStructure( DocumentStructureNode* docStruct ) {
03476         _documentStructure = docStruct;
03477       }
03478 
03479       DocumentStructureNode* getDocumentStructure() {
03480         return _documentStructure;
03481       }
03482 
03483       std::string typeName() const {
03484         return "ExtentDescendant";
03485       }
03486 
03487       UINT64 hashCode() const {
03488         return 125 + _inner->hashCode() + (_inner->hashCode() * 7);//???????????????
03489       }
03490 
03491 
03492       void walk( Walker& walker ) {
03493         walker.before(this);
03494         _inner->walk(walker);
03495         _outer->walk(walker);
03496         _documentStructure->walk(walker);
03497         walker.after(this);
03498       }
03499 
03500       Node* copy( Copier& copier ) {
03501         copier.before(this);
03502         
03503         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
03504         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
03505         DocumentStructureNode * newDocStruct = dynamic_cast<DocumentStructureNode*>(_documentStructure->copy(copier));
03506         ExtentDescendant* extentInsideCopy = new ExtentDescendant( newInner, newOuter, newDocStruct );
03507         extentInsideCopy->setNodeName( nodeName() );
03508 
03509         return copier.after(this, extentInsideCopy);
03510       }
03511     };
03512 
03513     class ExtentChild : public ExtentInside {
03514     protected:
03515 
03516       DocumentStructureNode* _documentStructure;
03517       
03518     public:
03519       ExtentChild( RawExtentNode* inner, RawExtentNode* outer, DocumentStructureNode * docStruct ) :
03520         ExtentInside( inner, outer ),
03521         _documentStructure( docStruct)
03522       {
03523       }
03524 
03525       ExtentChild( Unpacker& unpacker ):
03526         ExtentInside( unpacker ) 
03527       { 
03528         _documentStructure = unpacker.getDocumentStructureNode( "documentStructureNode" );
03529       }
03530 
03531       bool operator== ( Node& o ) {
03532         ExtentChild* other = dynamic_cast<ExtentChild*>(&o);
03533   
03534         return other &&
03535           *_inner == *other->_inner &&
03536           *_outer == *other->_outer &&
03537           *_documentStructure == *other->_documentStructure;
03538       }
03539 
03540       void setDocumentStructure( DocumentStructureNode* docStruct ) {
03541         _documentStructure = docStruct;
03542       }
03543 
03544       DocumentStructureNode* getDocumentStructure() {
03545         return _documentStructure;
03546       }
03547      
03548       std::string typeName() const {
03549         return "ExtentChild";
03550       }
03551 
03552       UINT64 hashCode() const {
03553         return 129 + _inner->hashCode() + (_inner->hashCode() * 7);//???????????????
03554       }
03555 
03556 
03557       void walk( Walker& walker ) {
03558         walker.before(this);
03559         _inner->walk(walker);
03560         _outer->walk(walker);
03561         _documentStructure->walk(walker);
03562         walker.after(this);
03563       }
03564 
03565       Node* copy( Copier& copier ) {
03566         copier.before(this);
03567         
03568         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
03569         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
03570         DocumentStructureNode * newDocStruct = dynamic_cast<DocumentStructureNode*>(_documentStructure->copy(copier));
03571         ExtentChild* extentInsideCopy = new ExtentChild( newInner, newOuter, newDocStruct );
03572         extentInsideCopy->setNodeName( nodeName() );
03573 
03574         return copier.after(this, extentInsideCopy);
03575       }
03576     };
03577 
03578     class ExtentParent : public ExtentInside {
03579     protected:
03580 
03581       DocumentStructureNode* _documentStructure;
03582       
03583     public:
03584       ExtentParent( RawExtentNode* inner, RawExtentNode* outer, DocumentStructureNode * docStruct ) :
03585         ExtentInside( inner, outer ),
03586         _documentStructure( docStruct)
03587       {
03588       }
03589 
03590       ExtentParent( Unpacker& unpacker ):
03591         ExtentInside( unpacker ) 
03592       { 
03593         _documentStructure = unpacker.getDocumentStructureNode( "documentStructureNode" );
03594       }
03595 
03596       bool operator== ( Node& o ) {
03597         ExtentParent* other = dynamic_cast<ExtentParent*>(&o);
03598   
03599         return other &&
03600           *_inner == *other->_inner &&
03601           *_outer == *other->_outer &&
03602           *_documentStructure == *other->_documentStructure;
03603       }
03604 
03605       void setDocumentStructure( DocumentStructureNode* docStruct ) {
03606         _documentStructure = docStruct;
03607       }
03608 
03609       DocumentStructureNode* getDocumentStructure() {
03610         return _documentStructure;
03611       }
03612      
03613       std::string typeName() const {
03614         return "ExtentParent";
03615       }
03616 
03617       UINT64 hashCode() const {
03618         return 129 + _inner->hashCode() + (_inner->hashCode() * 7);//???????????????
03619       }
03620 
03621 
03622       void walk( Walker& walker ) {
03623         walker.before(this);
03624         _inner->walk(walker);
03625         _outer->walk(walker);
03626         _documentStructure->walk(walker);
03627         walker.after(this);
03628       }
03629 
03630       Node* copy( Copier& copier ) {
03631         copier.before(this);
03632         
03633         RawExtentNode* newInner = dynamic_cast<RawExtentNode*>(_inner->copy(copier));
03634         RawExtentNode* newOuter = dynamic_cast<RawExtentNode*>(_outer->copy(copier));
03635         DocumentStructureNode * newDocStruct = dynamic_cast<DocumentStructureNode*>(_documentStructure->copy(copier));
03636         ExtentParent* extentInsideCopy = new ExtentParent( newInner, newOuter, newDocStruct );
03637         extentInsideCopy->setNodeName( nodeName() );
03638 
03639         return copier.after(this, extentInsideCopy);
03640       }
03641     }; // end ExtentParent
03642 
03643                 //
03644                 // WildcardTerm
03645                 //
03646     class WildcardTerm : public RawExtentNode {
03647     private:
03648                         std::string _normalizedTerm;
03649 
03650                         void normalizeTerm() {
03651                                 // remove the wildcard character
03652                                 std::string::size_type wpos=_normalizedTerm.rfind("*");
03653                                 if (wpos!=std::string::npos) {
03654           _normalizedTerm=_normalizedTerm.substr(0, wpos);
03655                                 }
03656 
03657                                 // lowercase the term
03658                                 for (size_t i=0; i < _normalizedTerm.size(); ++i) {
03659                                         _normalizedTerm[i]=tolower(_normalizedTerm[i]);
03660                                 }
03661                         }
03662 
03663     public:
03664       WildcardTerm() {}
03665                         WildcardTerm( std::string text) :
03666                         _normalizedTerm(text)
03667       {
03668         normalizeTerm();
03669       }
03670 
03671       WildcardTerm( Unpacker& unpacker ) {
03672         _normalizedTerm = unpacker.getString( "normalizedTerm" );
03673       } 
03674 
03675       std::string typeName() const {
03676         return "WildcardTerm";
03677       }
03678 
03679       std::string queryText() const {
03680                                 return (_normalizedTerm + "*");
03681       }
03682 
03683       UINT64 hashCode() const {
03684         int accumulator = 7;
03685 
03686         indri::utility::GenericHash<const char*> hash;
03687         return accumulator + hash( _normalizedTerm.c_str() );
03688       }
03689 
03690                         void setTerm( std::string term ) {
03691                                 _normalizedTerm=term;
03692                                 normalizeTerm();
03693       }
03694 
03695                         std::string getTerm() {
03696         return _normalizedTerm;
03697       }
03698 
03699       bool operator== ( Node& node ) {
03700         WildcardTerm* other = dynamic_cast<WildcardTerm*>(&node);
03701 
03702         if( other == this )
03703           return true;
03704 
03705         if( !other )
03706           return false;
03707 
03708         if (other->getTerm()==_normalizedTerm) 
03709                                         return true;
03710 
03711                                 return false;
03712       }
03713 
03714       void pack( Packer& packer ) {
03715         packer.before(this);
03716         packer.put( "normalizedTerm", _normalizedTerm );
03717         packer.after(this);
03718       }
03719 
03720       void walk( Walker& walker ) {
03721         walker.before(this);
03722         walker.after(this);
03723       }
03724 
03725       Node* copy( Copier& copier ) {
03726         copier.before(this);
03727         
03728         WildcardTerm* duplicate = new WildcardTerm(_normalizedTerm);
03729         duplicate->setNodeName( nodeName() );
03730 
03731         return copier.after(this, duplicate);
03732       }
03733     };
03734 
03735     class PlusNode : public UnweightedCombinationNode {
03736     public:
03737       PlusNode() {}
03738       PlusNode( Unpacker& unpacker ) {
03739         _unpack( unpacker );
03740       }
03741 
03742       std::string typeName() const {
03743         return "PlusNode";
03744       }
03745 
03746       std::string queryText() const {
03747         std::stringstream qtext;
03748         qtext << "#plus(";
03749         _childText(qtext);
03750         qtext << ")";
03751 
03752         return qtext.str();
03753       } 
03754 
03755       UINT64 hashCode() const {
03756         return 259 + _hashCode();
03757       }
03758 
03759       void walk( Walker& walker ) {
03760         _walk( this, walker );
03761       }
03762       
03763       Node* copy( Copier& copier ) {
03764         return _copy( this, copier );
03765       }
03766     };
03767 
03768     class WPlusNode : public WeightedCombinationNode {
03769     public:
03770       WPlusNode() {}
03771       WPlusNode( Unpacker& unpacker ) {
03772         _unpack( unpacker );
03773       }
03774 
03775       std::string typeName() const {
03776         return "WPlusNode";
03777       }
03778 
03779       std::string queryText() const {
03780         std::stringstream qtext;
03781         qtext << "#wplus(";
03782         _childText(qtext);
03783         qtext << ")";
03784 
03785         return qtext.str();
03786       } 
03787 
03788       UINT64 hashCode() const {
03789         return 261 + _hashCode();
03790       }
03791 
03792       void walk( Walker& walker ) {
03793         _walk( this, walker );
03794       }
03795       
03796       Node* copy( Copier& copier ) {
03797         return _copy( this, copier );
03798       }
03799     };
03800 
03801 
03802   }
03803 }
03804 
03805 #endif // INDRI_QUERYSPEC_HPP

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4