Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

indri::parse::KrovetzStemmer Class Reference

#include <KrovetzStemmer.hpp>

List of all members.

Public Member Functions

 KrovetzStemmer ()
 ~KrovetzStemmer ()
char * kstem_stemmer (char *term)
 stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again.

int kstem_stem_tobuffer (char *term, char *buffer)
 stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument.

void kstem_add_table_entry (const char *variant, const char *word, bool exc=false)
 Add an entry to the stemmer's dictionary table.


Static Public Attributes

const int MAX_WORD_LENGTH = 25
 maximum number of characters in a word to be stemmed.


Private Types

typedef indri::parse::KrovetzStemmer::dictEntry dictEntry
 Dictionary table entry.

typedef indri::parse::KrovetzStemmer::cacheEntry cacheEntry
 Two term hashtable entry for caching across calls.

typedef hash_map< const char *,
dictEntry, hash< const char * >,
eqstr
dictTable

Private Member Functions

bool ends (const char *s, int sufflen)
void setsuff (const char *str, int length)
dictEntrygetdep (char *word)
bool lookup (char *word)
bool cons (int i)
bool vowelinstem ()
bool vowel (int i)
bool doublec (int i)
void plural ()
void past_tense ()
void aspect ()
void ion_endings ()
void er_and_or_endings ()
void ly_endings ()
void al_endings ()
void ive_endings ()
void ize_endings ()
void ment_endings ()
void ity_endings ()
void ble_endings ()
void ness_endings ()
void ism_endings ()
void ic_endings ()
void ncy_endings ()
void nce_endings ()
void loadTables ()

Private Attributes

indri::thread::Mutex _stemLock
 lock for protecting stem calls

dictTable dictEntries
cacheEntrystemCache
int stemhtsize
int k
int j
char * word
char stem [MAX_WORD_LENGTH]


Member Typedef Documentation

typedef struct indri::parse::KrovetzStemmer::cacheEntry indri::parse::KrovetzStemmer::cacheEntry [private]
 

Two term hashtable entry for caching across calls.

typedef struct indri::parse::KrovetzStemmer::dictEntry indri::parse::KrovetzStemmer::dictEntry [private]
 

Dictionary table entry.

typedef hash_map<const char *, dictEntry, hash<const char *>, eqstr> indri::parse::KrovetzStemmer::dictTable [private]
 


Constructor & Destructor Documentation

indri::parse::KrovetzStemmer::KrovetzStemmer  ) 
 

indri::parse::KrovetzStemmer::~KrovetzStemmer  ) 
 


Member Function Documentation

void indri::parse::KrovetzStemmer::al_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::aspect  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ble_endings  )  [inline, private]
 

bool indri::parse::KrovetzStemmer::cons int  i  )  [inline, private]
 

bool indri::parse::KrovetzStemmer::doublec int  i  )  [inline, private]
 

bool indri::parse::KrovetzStemmer::ends const char *  s,
int  sufflen
[inline, private]
 

void indri::parse::KrovetzStemmer::er_and_or_endings  )  [inline, private]
 

KrovetzStemmer::dictEntry * indri::parse::KrovetzStemmer::getdep char *  word  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ic_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ion_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ism_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ity_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ive_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ize_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::kstem_add_table_entry const char *  variant,
const char *  word,
bool  exc = false
 

Add an entry to the stemmer's dictionary table.

Parameters:
variant the spelling for the entry.
word the stem to use for the variant. If "", the variant stems to itself.
exc Is the word an exception to the spelling rules.

int indri::parse::KrovetzStemmer::kstem_stem_tobuffer char *  term,
char *  buffer
 

stem a term using the Krovetz algorithm into the specified buffer. The stem returned may be longer than the input term. Performs case normalization on its input argument.

Parameters:
term the term to stem
buffer the buffer to hold the stemmed term. The buffer should be at MAX_WORD_LENGTH or larger.
Returns:
the number of characters written to the buffer, including the terminating '\0'. If 0, the caller should use the value in term.

char * indri::parse::KrovetzStemmer::kstem_stemmer char *  term  ) 
 

stem a term using the Krovetz algorithm. The stem returned may be longer than the input term. May return a pointer to the private attribute stem. Performs case normalization on its input argument. Return values should be copied before calling the method again.

Parameters:
term the term to stem
Returns:
the stemmed term or the original term if no stemming was performed.

void indri::parse::KrovetzStemmer::loadTables  )  [private]
 

bool indri::parse::KrovetzStemmer::lookup char *  word  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ly_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ment_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::nce_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ncy_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::ness_endings  )  [inline, private]
 

void indri::parse::KrovetzStemmer::past_tense  )  [inline, private]
 

void indri::parse::KrovetzStemmer::plural  )  [inline, private]
 

void indri::parse::KrovetzStemmer::setsuff const char *  str,
int  length
[inline, private]
 

bool indri::parse::KrovetzStemmer::vowel int  i  )  [inline, private]
 

bool indri::parse::KrovetzStemmer::vowelinstem  )  [inline, private]
 


Member Data Documentation

indri::thread::Mutex indri::parse::KrovetzStemmer::_stemLock [private]
 

lock for protecting stem calls

dictTable indri::parse::KrovetzStemmer::dictEntries [private]
 

int indri::parse::KrovetzStemmer::j [private]
 

int indri::parse::KrovetzStemmer::k [private]
 

const int indri::parse::KrovetzStemmer::MAX_WORD_LENGTH = 25 [static]
 

maximum number of characters in a word to be stemmed.

char indri::parse::KrovetzStemmer::stem[MAX_WORD_LENGTH] [private]
 

cacheEntry* indri::parse::KrovetzStemmer::stemCache [private]
 

int indri::parse::KrovetzStemmer::stemhtsize [private]
 

char* indri::parse::KrovetzStemmer::word [private]
 


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:03 2010 for Lemur by doxygen 1.3.4