Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

lemur::parse::Arabic_Stemmer Class Reference

#include <Arabic_Stemmer.hpp>

List of all members.

Public Member Functions

 Arabic_Stemmer (std::string stemFunc)
 ~Arabic_Stemmer ()
void stemTerm (char *, char *)

Private Member Functions

void arabic_remove_diacritics (char *, char *)
void arabic_stop (char *, char *)
void no_stem (char *, char *)
void arabic_norm2 (char *, char *)
void arabic_norm2_stop (char *, char *)
void arabic_light10 (char *, char *)
void arabic_light10_stop (char *, char *)
bool on_stop_list (char *word)
int is_whitespace (const char c)
void remove_definite_articles (char *word, char *result)
void remove_all_suffixes (char *word, char *result, size_t lenlimit)

Private Attributes

void(Arabic_Stemmer::* stem_fct )(char *, char *)
std::set< const char *, ltstrstop_words_ht

Static Private Attributes

stem_info_t stemtable []
const int ArabicVowel [256]
const int Norm3Char [256]
const int NormChar [256]
const int isWhitespace [256]
const char * stopwords []
const char * suffixes []
const char * defarticles []


Constructor & Destructor Documentation

lemur::parse::Arabic_Stemmer::Arabic_Stemmer std::string  stemFunc  ) 
 

lemur::parse::Arabic_Stemmer::~Arabic_Stemmer  ) 
 


Member Function Documentation

void lemur::parse::Arabic_Stemmer::arabic_light10 char *  ,
char * 
[private]
 

void lemur::parse::Arabic_Stemmer::arabic_light10_stop char *  ,
char * 
[private]
 

void lemur::parse::Arabic_Stemmer::arabic_norm2 char *  ,
char * 
[private]
 

void lemur::parse::Arabic_Stemmer::arabic_norm2_stop char *  ,
char * 
[private]
 

void lemur::parse::Arabic_Stemmer::arabic_remove_diacritics char *  ,
char * 
[private]
 

void lemur::parse::Arabic_Stemmer::arabic_stop char *  ,
char * 
[private]
 

int lemur::parse::Arabic_Stemmer::is_whitespace const char  c  )  [private]
 

void lemur::parse::Arabic_Stemmer::no_stem char *  ,
char * 
[private]
 

bool lemur::parse::Arabic_Stemmer::on_stop_list char *  word  )  [private]
 

void lemur::parse::Arabic_Stemmer::remove_all_suffixes char *  word,
char *  result,
size_t  lenlimit
[private]
 

void lemur::parse::Arabic_Stemmer::remove_definite_articles char *  word,
char *  result
[private]
 

void lemur::parse::Arabic_Stemmer::stemTerm char *  ,
char * 
 


Member Data Documentation

const int lemur::parse::Arabic_Stemmer::ArabicVowel [static, private]
 

Initial value:

 {
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0xc1,0xc2,0xc3,0xc4,0xc5,0xc6,0xc7,   0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0xe6,0,0,0,0,0,0xec,0xed,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

const char * lemur::parse::Arabic_Stemmer::defarticles [static, private]
 

Initial value:

 {"ال", "وال","بال", "كال", 
                                           "فال", "لل", "\0"}

const int lemur::parse::Arabic_Stemmer::isWhitespace [static, private]
 

Initial value:

 {
      0,0,0,0,0,0,0,0,0,1,1,1,1,1,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,
      1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

const int lemur::parse::Arabic_Stemmer::Norm3Char [static, private]
 

Initial value:

 {
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
      0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
      0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0xc0,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
      0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,   0,0xd8,0xd9,0xda,0xdb,   0,0xdd,0xde,0xdf,
      0,0xe1,   0,0xe3,0xe4,0xe5,0xe6,   0,   0,   0,   0,   0,0xed, 0xed,   0,   0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

const int lemur::parse::Arabic_Stemmer::NormChar [static, private]
 

Initial value:

 {
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0,0x81,0,0,0,0,0,0,0,0,0x8a,0,0,0x8d,0x8e,0x8f,
      0x90,0,0,0,0,0,0,0,0x98,0,0x9a,0,0,0,0,0x9f,
      0,0,0,0,0,0,0,0,0,0,0xaa,0,0,0,0,0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
      0xc0,0xc1,0xc7,0xc7,0xc4,0xc7,0xc6,0xc7,0xc8,0xe5,0xca,0xcb,0xcc,0xcd,0xce,0xcf,
      0xd0,0xd1,0xd2,0xd3,0xd4,0xd5,0xd6,   0,0xd8,0xd9,0xda,0xdb,   0,0xdd,0xde,0xdf,
      0,0xe1,   0,0xe3,0xe4,0xe5,0xe6,   0,   0,   0,   0,   0,0xed, 0xed,   0,   0,
      0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}

void(Arabic_Stemmer::* lemur::parse::Arabic_Stemmer::stem_fct)(char *, char *) [private]
 

Arabic_Stemmer::stem_info_t lemur::parse::Arabic_Stemmer::stemtable [static, private]
 

Initial value:

 {
      {"none", &Arabic_Stemmer::no_stem},
      {"arabic_stop", &Arabic_Stemmer::arabic_stop},
      {"arabic_norm2", &Arabic_Stemmer::arabic_norm2},
      {"arabic_norm2_stop", &Arabic_Stemmer::arabic_norm2_stop},
      {"arabic_light10", &Arabic_Stemmer::arabic_light10}, 
      {"arabic_light10_stop", &Arabic_Stemmer::arabic_light10_stop}
    }

std::set<const char *, ltstr> lemur::parse::Arabic_Stemmer::stop_words_ht [private]
 

const char * lemur::parse::Arabic_Stemmer::stopwords [static, private]
 

Initial value:

 
      { "ان","بعد", "ضد", "يلي", "الى", "في", "من", "حتى", "وهو", "يكون",
        "به", "وليس", "أحد", "على", "وكان", "تلك", "كذلك", "التي", "وبين",
        "فيها", "عليها", "إن", "وعلى", "لكن", "عن", "مساء", "ليس", "منذ",
        "الذي", "أما", "حين", "ومن", "لا", "ليسب", "وكانت", "أي", "ما", "عنه",
        "حول", "دون", "مع", "لكنه", "ولكن", "له", "هذا", "والتي","فقط", "ثم",
        "هذه", "أنه", "تكون", "قد", "بين", "جدا", "لن", "نحو", "كان", "لهم",
        "لأن", "اليوم", "لم", "هؤلاء", "فإن", "فيه", "ذلك", "لو", "عند",
        "اللذين", "كل", "بد", "لدى", "وثي", "أن", "ومع", "فقد", "بل", "هو",
        "عنها", "منه", "بها", "وفي", "فهو", "تحت", "لها", "أو", "إذ", "علي",
        "عليه", "كما", "كيف", "هنا", "وقد", "كانت", "لذلك", "أمام", "هناك",
        "قبل", "معه", "يوم", "منها", "إلى", "إذا", "هل", "حيث", "هي", "اذا",
        "او", "و", "ما", "لا", "الي", "إلي", "مازال", "لازال", "لايزال",
        "مايزال", "اصبح", "أصبح", "أمسى", "امسى", "أضحى", "اضحى", "ظل",
        "مابرح", "مافتئ", "ماانفك", "بات", "صار", "ليس", "إن", "كأن",
        "ليت", "لعل", "لاسيما", "ولايزال", "الحالي", "ضمن", "اول", "وله",
        "ذات", "اي", "بدلا", "اليها", "انه", "الذين", "فانه", "وان",
        "والذي", "وهذا", "لهذا", "الا", "فكان", "ستكون", "مما", "أبو",
        "بإن", "الذي", "اليه", "يمكن", "بهذا", "لدي", "وأن", "وهي", "وأبو",
        "آل", "الذي", "هن", "الذى", NULL }

const char * lemur::parse::Arabic_Stemmer::suffixes [static, private]
 

Initial value:

 {"ها","ان","ات","ون","ين","يه","ية",
                                        "ه","ة","ي","\0"}


The documentation for this class was generated from the following files:
Generated on Tue Jun 15 11:03:06 2010 for Lemur by doxygen 1.3.4