Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

IndriTextHandler.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2000-2004 Carnegie Mellon University.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software (and below), and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 #ifndef _INDRITEXTHANDLER_HPP
00013 #define _INDRITEXTHANDLER_HPP
00014 
00015 #include "Parser.hpp"
00016 #include "indri/ParsedDocument.hpp"
00017 #include "indri/IndexEnvironment.hpp"
00018 
00019 namespace lemur 
00020 {
00021   namespace parse 
00022   {
00023     
00029 #define DOCIDKEY "docno"
00030 
00031     class IndriTextHandler : public lemur::api::TextHandler {
00032 
00033     public:
00036       IndriTextHandler(const string &name, int memory, 
00037                        const lemur::api::Parser* p);
00038       ~IndriTextHandler();
00039 
00041       char * handleDoc(char * docno);
00043       void handleEndDoc();
00045       char * handleWord(char * word, const char* original, PropertyList* list);
00046       char * handleBeginTag(char* tag, const char* orig, PropertyList* props);
00047       char * handleEndTag(char* tag, const char* orig, PropertyList* props);
00048 
00049     protected:
00051       indri::api::IndexEnvironment env;
00053       indri::api::ParsedDocument document;
00055       indri::parse::MetadataPair docid;
00057       char* curdocno;
00059       int docbegin;
00061       const lemur::api::Parser* parser;
00063     };
00064   }
00065 }
00066 
00067 #endif
00068 

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4