Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

DocOffsetParser.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002-2003 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _DOCOFFSETPARSER_HPP
00013 #define _DOCOFFSETPARSER_HPP
00014 #include "Parser.hpp"
00015 #include "Match.hpp"
00016 namespace lemur
00017 {
00019   namespace parse 
00020   {
00021     
00024     class DocOffsetParser : public lemur::api::TextHandler {
00025 
00026     public:
00028       DocOffsetParser(lemur::api::Parser *parser) : p(parser) { 
00029         p->setTextHandler(this);
00030       }
00031   
00032       virtual ~DocOffsetParser(){}
00033 
00035       virtual char *handleWord(char * word) {
00036         if (word != NULL) {
00037           int end = p->fileTell() - 1;
00038           int start = (end - strlen(word)) + 1;
00039           Match m;
00040           m.start = start;
00041           m.end = end;    
00042           offsets.push_back(m);
00043         }
00044         return word;
00045       }
00047       virtual void parseString(char *buffer) {
00048         offsets.clear();
00049         p->parseBuffer(buffer, strlen(buffer));
00050       }
00052       vector <Match> getOffsets() { return offsets; }
00053     private:
00055       vector <Match> offsets;
00056       lemur::api::Parser *p;
00057     };
00058   }
00059 }
00060 
00061 #endif

Generated on Tue Jun 15 11:02:53 2010 for Lemur by doxygen 1.3.4