Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

Parser.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2001 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _PARSER_HPP
00013 #define _PARSER_HPP
00014 
00015 #include "TextHandler.hpp"
00016 #include "WordSet.hpp"
00017 namespace lemur
00018 {
00019   namespace api
00020   {
00023 
00030     class Parser : public TextHandler {
00031     public:
00032       static const string category;
00033       static const string identifier;
00034 
00035       Parser();
00036       virtual ~Parser();
00037 
00040       virtual void parse(const string &filename);
00041   
00044       virtual void parseFile(const string &filename) = 0;
00045 
00047       virtual void parseBuffer(char * buf, int len) = 0;
00048 
00052       virtual void setAcroList(const lemur::utility::WordSet * acronyms);
00053 
00055       virtual void setAcroList(string filename);
00056 
00058       virtual long fileTell() const = 0;
00059 
00061       virtual long getDocBytePos() const { return docpos; }
00062 
00064       virtual const string getParseFile() const { return parsefile; }
00065 
00066     protected: 
00069       bool isAcronym(const char * word);
00071       void clearAcros();
00072 
00073       long docpos; 
00074 
00075       string parsefile;
00076     private:
00078       lemur::utility::WordSet * myacros;
00079       const lemur::utility::WordSet* borrowedacros;
00080     };
00081   }
00082 }
00083 
00084 #endif

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4