Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

PowerPointDocumentExtractor.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // PowerPointDocumentExtractor
00015 //
00016 // 14 June 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_POWERPOINTDOCUMENTEXTRACTOR_HPP
00020 #define INDRI_POWERPOINTDOCUMENTEXTRACTOR_HPP
00021 #ifdef WIN32
00022 #include <string>
00023 #include "indri/Buffer.hpp"
00024 #include "indri/UnparsedDocument.hpp"
00025 #undef WIN32_LEAN_AND_MEAN
00026 #undef NOGDI
00027 #include <windows.h>
00028 #include <unknwn.h>
00029 #include <oaidl.h>
00030 #include "indri/DocumentIterator.hpp"
00031 #include "indri/OfficeHelper.hpp"
00032 
00033 namespace indri
00034 {
00035   namespace parse
00036   {
00037     
00038     class PowerPointDocumentExtractor : public DocumentIterator {
00039     private:
00040       IUnknown* _powerPointUnknown;
00041       IDispatch* _powerPointDispatch;
00042       IDispatch* _presentationsDispatch;
00043       UnparsedDocument _unparsedDocument;
00044 
00045       std::string _documentPath;
00046       indri::utility::Buffer _documentBuffer;
00047       bool _documentWaiting;
00048       OfficeHelper _officeHelper;
00049 
00050     public:
00051       PowerPointDocumentExtractor();
00052       ~PowerPointDocumentExtractor();
00053 
00054       void open( const std::string& filename );
00055       UnparsedDocument* nextDocument();
00056       void close();
00057     };
00058   }
00059 }
00060 
00061 #endif // WIN32
00062 #endif // INDRI_POWERPOINTDOCUMENTEXTRACTOR_HPP

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4