Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

TermInfoList.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 #ifndef _TERMINFOLIST_HPP
00014 #define _TERMINFOLIST_HPP
00015 
00016 #include "IndexTypes.hpp"
00017 #include "Exception.hpp"
00018 
00019 namespace lemur
00020 {
00021   namespace api 
00022   {
00023     
00025 
00034     class TermInfo {
00035     public:
00036       TermInfo() {}
00037       TermInfo( TERMID_T termID, COUNT_T termCount) :
00038         tid(termID), tcount(termCount) {}
00039       virtual ~TermInfo() {}
00040 
00042       virtual TERMID_T termID() const {return tid;}
00043 
00045       virtual void termID(TERMID_T id) {tid = id;}
00046 
00048       virtual COUNT_T count() const {return tcount;}
00049 
00051       virtual void count(COUNT_T c) {tcount = c;}
00052 
00053       // Return list of positions this term occurs in this document
00054       // (can be a list of 1 item)
00055       // Default implementation to return NULL if no position information available for this TermInfo
00056       // List of positions is better used for bag of words support
00057       virtual const LOC_T* positions() const{ return NULL; }
00058 
00059       // Return position this term occurs in this document
00060       // Better for sequence of words support
00061       // When list of positions can be obtained, this returns the first item in the list
00062       // Default implementation to return -1 if no position information available for this TermInfo
00063       virtual LOC_T position() const { return -1; }
00064 
00065       virtual void position(LOC_T pos) {}
00066 
00067     protected:
00068       TERMID_T tid;
00069       COUNT_T tcount;
00070     };
00071 
00072 
00074 
00082     class TermInfoList {
00083     public:
00084       virtual ~TermInfoList() {}
00085 
00086     protected:
00087       // Helper functions for iterator, subclasses should override
00089       virtual TermInfo* newElement() const { return new TermInfo(); }
00091       virtual TermInfo* getElement(TermInfo* elem, POS_T position) const =0;
00094       virtual void assignElement(TermInfo* to, TermInfo* from) const { *to = *from; }
00096       virtual POS_T beginPosition() const =0;
00098       virtual POS_T endPosition() const =0;
00100       virtual POS_T nextPosition(POS_T position) const =0;
00101 
00102     public:
00103       // Single, internal iteration
00105       virtual void startIteration()const=0;
00107       virtual bool hasMore()const=0;
00109       virtual TermInfo *nextEntry()const=0;
00110 
00112       virtual int size()=0;
00113 
00116       virtual TermInfo* operator[](int index) const = 0;
00117 
00118       // C++ style forward input (readonly) iterator
00120       class iterator : std::iterator<std::input_iterator_tag, TermInfo> {
00121       public:
00122         iterator() : list(NULL), position(0), current(NULL) {}
00123         iterator(const iterator& other) {
00124           list = other.list;
00125           position = other.position;
00126           if ((list) && (other.current) ) {
00127             current = list->newElement();
00128             list->assignElement(current, other.current);  // list knows element class
00129           } else {
00130             current = NULL;
00131           }
00132         }
00133         iterator(const TermInfoList* til, POS_T pos) : list(til), position(pos) {
00134           if (list) {
00135             if (position != list->endPosition()) {
00136               current = list->newElement();   // get new element
00137               current = list->getElement(current, position);
00138             } else {
00139               current = NULL;
00140             }
00141           }
00142         }
00143 
00144         ~iterator() {
00145           delete(current);
00146         }
00147 
00148         TermInfo& operator*() { return *current; }
00149         TermInfo* operator->() { return current; }
00150         iterator& operator++() {
00151           position = list->nextPosition(position);
00152           if (position != list->endPosition())
00153             current = list->getElement(current, position);
00154           return *this;
00155         }
00156         // identical to prefix version
00157         iterator& operator++(int) {
00158           return operator++();
00159         }
00160         bool operator==(const iterator& other) const {
00161           return (list == other.list) && (position == other.position);
00162         }
00163         bool operator!=(const iterator& other) const {
00164           return (list != other.list) || (position != other.position);
00165         }
00166         iterator& operator=(const iterator& other) {
00167           list = other.list;
00168           position = other.position;
00169           if ((list) && (other.current)) {
00170             if (!current)
00171               current = list->newElement();
00172             list->assignElement(current, other.current);  // list knows element class
00173           } else {
00174             delete(current);
00175             current=NULL;
00176           }
00177           return *this;
00178         }
00181         void seek(POS_T pos) {
00182           position = pos;
00183           if (position != list->endPosition()) {
00184             if (!current)
00185               current = list->newElement();
00186             current = list->getElement(current, position);
00187           } else {
00188             delete(current);
00189             current = NULL;
00190           }
00191         }
00192 
00193       protected:
00194         const TermInfoList* list;  // list associated with this iterator
00195         POS_T position;     // current position in list
00196         TermInfo* current;   // current element of list
00197       }; // end of nested iterator declaration
00198  
00199       iterator& begin() const { 
00200         iterator it(this, beginPosition());
00201         itbegin = it;
00202         return itbegin;
00203       }
00204       iterator& end() const { 
00205         iterator it(this, endPosition());
00206         itend = it;
00207         return itend;
00208       }
00209 
00210     protected:
00211       mutable TermInfoList::iterator itbegin;  // iterator at head of list
00212       mutable TermInfoList::iterator itend;    // iterator at end of list
00213       friend class iterator;
00214     };
00215   }
00216 }
00217 
00218 
00219 #endif

Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4