Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

IndriTokenizer.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2005 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // Tokenizer
00015 // 
00016 // 15 September 2005 -- mwb
00017 //
00018 
00019 #ifndef INDRI_TOKENIZER_HPP
00020 #define INDRI_TOKENIZER_HPP
00021 
00022 #include "indri/ObjectHandler.hpp"
00023 #include "indri/UnparsedDocument.hpp"
00024 #include "indri/TokenizedDocument.hpp"
00025 // #include <map>
00026 // #include <vector>
00027 
00028 namespace indri {
00029   namespace parse {
00030       
00031     class Tokenizer : public ObjectHandler<UnparsedDocument> {
00032     public:
00033       Tokenizer() {}
00034       virtual ~Tokenizer() {}
00035 
00036       virtual TokenizedDocument* tokenize( UnparsedDocument* document ) = 0;
00037       virtual void handle( UnparsedDocument* document ) = 0;
00038       virtual void setHandler( ObjectHandler<TokenizedDocument>& handler ) = 0;
00039     };
00040   }
00041 }
00042 
00043 #endif // INDRI_TOKENIZER_HPP
00044 
00045 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4