Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

StopperTransformation.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2003-2004 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 
00013 //
00014 // StopperTransformation
00015 //
00016 // 13 May 2004 -- tds
00017 //
00018 
00019 #ifndef INDRI_STOPPERTRANSFORMATION_HPP
00020 #define INDRI_STOPPERTRANSFORMATION_HPP
00021 
00022 #include "indri/Transformation.hpp"
00023 #include <string>
00024 #include <vector>
00025 #include "indri/Parameters.hpp"
00026 
00027 #ifdef WIN32
00028 #include <hash_set>
00029 #else
00030 // Move this somewhere
00031 #ifndef HAVE_GCC_VERSION
00032 #define HAVE_GCC_VERSION(MAJOR, MINOR) \
00033   (__GNUC__ > (MAJOR) || (__GNUC__ == (MAJOR) && __GNUC_MINOR__ >= (MINOR)))
00034 #endif /* ! HAVE_GCC_VERSION */
00035 #if HAVE_GCC_VERSION(4,3)
00036 // if GCC 4.3+
00037 #include <tr1/unordered_set>
00038 #else
00039 #include <ext/hash_set>
00040 #endif
00041 // 3.3 does not use __gnu_cxx, 3.4+ does.
00042 using namespace __gnu_cxx;
00043 #endif
00044 
00045 namespace indri
00046 {
00047   namespace parse
00048   {
00049     
00050     class StopperTransformation : public Transformation {
00051     private:
00052       ObjectHandler<indri::api::ParsedDocument>* _handler;
00053 #ifdef WIN32
00054       struct ltstr {
00055         bool operator()( const char* s1,  const char* s2) const {
00056           return (strcmp(s1, s2) < 0);
00057         }
00058       };
00059       //studio 7 hash_set provides hash_compare, rather than hash
00060       // needing an < predicate, rather than an == predicate.
00061       typedef stdext::hash_set< const char *, stdext::hash_compare< const char *, ltstr> > dictTable;
00062 #else
00063       struct eqstr {
00064         bool operator()(char* s1, char* s2) const {
00065           return strcmp(s1, s2) == 0;
00066         }
00067       };
00068 #if HAVE_GCC_VERSION(4,3)
00069       typedef std::tr1::unordered_set<char *, std::tr1::hash<std::string>, eqstr> dictTable;
00070 #else
00071       typedef hash_set<char *, hash<char *>, eqstr> dictTable;
00072 #endif
00073 #endif
00074 
00075       dictTable _table;
00076 
00077     public:
00078       StopperTransformation();
00079       StopperTransformation( const std::vector<std::string>& stopwords );
00080       StopperTransformation( const std::vector<const char*>& stopwords );
00081       StopperTransformation( const std::vector<char*>& stopwords );
00082       StopperTransformation( indri::api::Parameters& stopwords );
00083       ~StopperTransformation();
00084 
00085       void read( const std::vector<std::string>& stopwords );
00086       void read( const std::vector<const char*>& stopwords );
00087       void read( const std::vector<char*>& stopwords );
00088       void read( const std::string& filename );
00089       void read( indri::api::Parameters& stopwords );
00090 
00091       indri::api::ParsedDocument* transform( indri::api::ParsedDocument* document );
00092 
00093       void handle( indri::api::ParsedDocument* document );
00094       void setHandler( ObjectHandler<indri::api::ParsedDocument>& handler );
00095     };
00096   }
00097 }
00098 
00099 #endif // INDRI_STOPPERTRANSFORMATION_HPP
00100 

Generated on Tue Jun 15 11:02:55 2010 for Lemur by doxygen 1.3.4