Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

ListIteratorNode.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // ListIteratorNode
00015 //
00016 // 26 January 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_LISTITERATORNODE_HPP
00020 #define INDRI_LISTITERATORNODE_HPP
00021 
00022 #include "indri/InferenceNetworkNode.hpp"
00023 #include "indri/Extent.hpp"
00024 #include <indri/greedy_vector>
00025 namespace indri
00026 {
00027   namespace infnet 
00028   {
00029     class ListIteratorNode : public InferenceNetworkNode {
00030     protected:
00031       indri::utility::greedy_vector<indri::index::Extent> _matches;
00032 
00033       // speedup for child siblings - 
00034       // stores last successful position of the matches(extent)
00035       // search; linear search assuming ascending order
00036       int _lastpos; 
00037       indri::index::Extent _lastExtent;
00038 
00039     public:
00041       void initpointer() {
00042         _lastpos=0;
00043       }
00044 
00046       void sortparent(indri::utility::greedy_vector<indri::index::Extent>& extents) {
00047         int sorted=0;
00048         int lastbegin = 0; int lastend = extents.size();
00049         while(lastbegin<lastend){
00050           int i=lastbegin;
00051           int end=lastend-1;
00052           lastbegin=lastend;
00053           lastend=i;
00054           for (;i<end;i++){
00055             if (extents[i].parent > extents[i+1].parent){
00056               indri::index::Extent x(extents[i]);
00057               extents[i]=extents[i+1];extents[i+1]=x;
00058               if(lastbegin>i)lastbegin=i;
00059               if(lastend<i+1)lastend=i+1;
00060             }
00061           }
00062         }
00063       }
00064 
00066       void sortbegin(indri::utility::greedy_vector<indri::index::Extent>& extents){
00067         int sorted=0;
00068         int lastbegin = 0; int lastend = extents.size();
00069         while(lastbegin<lastend){
00070           int i=lastbegin;
00071           int end=lastend-1;
00072           lastbegin=lastend;
00073           lastend=i;
00074           for (;i<end;i++){
00075             if (extents[i].begin > extents[i+1].begin){
00076               indri::index::Extent x(extents[i]);
00077               extents[i]=extents[i+1];extents[i+1]=x;
00078               if(lastbegin>i)lastbegin=i;
00079               if(lastend<i+1)lastend=i+1;
00080             }
00081           }
00082         }
00083       }
00084 
00086       virtual void prepare( lemur::api::DOCID_T documentID ) = 0;
00087 
00089       virtual const indri::utility::greedy_vector<indri::index::Extent>& extents() = 0;
00090 
00092       virtual void annotate( class Annotator& annotator, lemur::api::DOCID_T documentID, indri::index::Extent &extent ) = 0;
00093 
00094       // Moved work of identifying matches of an extent from ExtentRestriction to here.
00095       // This allows the computation for the list of matches for an extent to be 
00096       // overridden.  ExtentParent, ExtentChild, and ExtentDescendant are among the
00097       // classes that need to do this, as the parent/child/descedant relationships
00098       // are not based on extent containment - these relationships may be among
00099       // arbitrary fields in a document.
00100       virtual const indri::utility::greedy_vector<indri::index::Extent>& matches( indri::index::Extent &extent ) {
00101         int begin = extent.begin;
00102         int end = extent.end;
00103         _matches.clear();
00104         const indri::utility::greedy_vector<indri::index::Extent>& exts = extents();
00105 
00106         // if there's no extents or we have no length - just return
00107         if (begin == end || exts.size()==0) return _matches;
00108 
00109         // if we are dealing with child extents, we need to reverse the
00110         // list pointer to the last good position
00111         while((_lastpos > 0) && (exts[_lastpos-1].begin >= begin)){
00112           _lastpos--;
00113         }
00114 
00115         // now, we make sure we're in the correct position
00116         // after this loop, _lastpos->begin >= begin
00117         while((_lastpos < exts.size()) && (exts[_lastpos].begin < begin)){
00118           _lastpos++;
00119         }
00120 
00121         // for default DocListIteratorNode, any extent: begin+1 == end.
00122         while((_lastpos < exts.size()) && (exts[_lastpos].begin < end)) { 
00123           if(exts[_lastpos].end <= end) {
00124             indri::index::Extent ext(exts[_lastpos]);
00125             _matches.push_back(ext);
00126           } // end if(_exts[_lastpos].end<=end)
00127           _lastpos++;
00128         } // end while(_lastpos<_exts.size()&&_exts[_lastpos].begin<end)
00129 
00130 /***
00131  *** old method of matching child extents - deprecated 
00132  *
00133  *      for( size_t i = 0 ; i < exts.size(); i++ ) {
00134  *        if ( begin <= exts[i].begin && end >= exts[i].end ) {
00135  *          _matches.push_back( exts[i] );
00136  *        } else if ( exts[i].begin > end ) {
00137  *          break;
00138  *        }
00139  *      }
00140  **/
00141         return _matches;
00142       }
00143     };
00144   }
00145 }
00146 
00147 #endif // INDRI_LISTNODE_HPP
00148 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4