Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

FileClassEnvironmentFactory.cpp File Reference

#include "indri/FileClassEnvironmentFactory.hpp"
#include "indri/DocumentIteratorFactory.hpp"
#include "indri/ParserFactory.hpp"
#include "indri/TokenizerFactory.hpp"
#include <ctype.h>
#include <map>
#include <vector>

Classes

struct  extension_conflations
struct  file_class_environment_spec

Functions

std::string _canonicalExtension (const std::string &name)
void copy_strings_to_vector (std::vector< std::string > &vec, const char **array)
void copy_string_tuples_to_map (std::map< indri::parse::ConflationPattern *, std::string > &m, const char **array)
void cleanup_conflations_map (std::map< indri::parse::ConflationPattern *, std::string > &conflations)
indri::parse::FileClassEnvironmentbuild_file_class_environment (const file_class_environment_spec *spec)
indri::parse::FileClassEnvironmentbuild_file_class_environment (const indri::parse::FileClassEnvironmentFactory::Specification *spec)

Variables

const char * pdf_index_tags [] = { "title", "author", 0 }
const char * pdf_metadata_tags [] = { "title", "author", 0 }
const char * html_index_tags [] = { "title", "author", "h1", "h2", "h3", "h4", 0 }
const char * html_metadata_tags [] = { "title", "author", 0 }
const char * html_conflations [] = { "h1", NULL, NULL, "heading", "h2", NULL, NULL, "heading", "h3", NULL, NULL, "heading", "h4", NULL, NULL, "heading", 0, 0, 0, 0 }
const char * trec_include_tags [] = { "text", "hl", "head", "headline", "title", "ttl", "dd", "date", "date_time", "lp", "leadpara", 0 }
const char * trecalt_include_tags [] = { "text", 0 }
const char * trecalt_index_tags [] = { "text", 0 }
const char * trec_metadata_tags [] = { "docno", "title", "author", 0 }
const char * trec_conflations [] = { "hl", NULL, NULL, "headline", "head", NULL, NULL, "headline", "ttl", NULL, NULL, "title", "dd", NULL, NULL, "date", "date_time", NULL, NULL, "date", 0, 0, 0, 0 }
const char * trec_index_tags [] = { "author", "hl", "head", "headline", "title", "ttl", "dd", "date_time", "date", 0 }
const char * html_exclude_tags [] = { "script", "style", 0}
const char * _html [] = {"html", "htm", 0}
const char * _txt [] = {"txt", "text", 0}
const char * _doc [] = {"doc", "docx", 0}
const char * _ppt [] = {"ppt", "pptx", 0}
extension_conflations extensions []
file_class_environment_spec environments []

Function Documentation

std::string _canonicalExtension const std::string &  name  )  [static]
 

indri::parse::FileClassEnvironment* build_file_class_environment const indri::parse::FileClassEnvironmentFactory::Specification spec  ) 
 

indri::parse::FileClassEnvironment* build_file_class_environment const file_class_environment_spec spec  ) 
 

void cleanup_conflations_map std::map< indri::parse::ConflationPattern *, std::string > &  conflations  )  [static]
 

void copy_string_tuples_to_map std::map< indri::parse::ConflationPattern *, std::string > &  m,
const char **  array
[static]
 

void copy_strings_to_vector std::vector< std::string > &  vec,
const char **  array
[static]
 


Variable Documentation

const char* _doc[] = {"doc", "docx", 0} [static]
 

const char* _html[] = {"html", "htm", 0} [static]
 

const char* _ppt[] = {"ppt", "pptx", 0} [static]
 

const char* _txt[] = {"txt", "text", 0} [static]
 

file_class_environment_spec environments[] [static]
 

extension_conflations extensions[] [static]
 

Initial value:

 {
  _html, _txt, _doc, _ppt, {0}
}

const char* html_conflations[] = { "h1", NULL, NULL, "heading", "h2", NULL, NULL, "heading", "h3", NULL, NULL, "heading", "h4", NULL, NULL, "heading", 0, 0, 0, 0 } [static]
 

const char* html_exclude_tags[] = { "script", "style", 0} [static]
 

const char* html_index_tags[] = { "title", "author", "h1", "h2", "h3", "h4", 0 } [static]
 

const char* html_metadata_tags[] = { "title", "author", 0 } [static]
 

const char* pdf_index_tags[] = { "title", "author", 0 } [static]
 

const char* pdf_metadata_tags[] = { "title", "author", 0 } [static]
 

const char* trec_conflations[] = { "hl", NULL, NULL, "headline", "head", NULL, NULL, "headline", "ttl", NULL, NULL, "title", "dd", NULL, NULL, "date", "date_time", NULL, NULL, "date", 0, 0, 0, 0 } [static]
 

const char* trec_include_tags[] = { "text", "hl", "head", "headline", "title", "ttl", "dd", "date", "date_time", "lp", "leadpara", 0 } [static]
 

const char* trec_index_tags[] = { "author", "hl", "head", "headline", "title", "ttl", "dd", "date_time", "date", 0 } [static]
 

const char* trec_metadata_tags[] = { "docno", "title", "author", 0 } [static]
 

const char* trecalt_include_tags[] = { "text", 0 } [static]
 

const char* trecalt_index_tags[] = { "text", 0 } [static]
 


Generated on Tue Jun 15 11:02:56 2010 for Lemur by doxygen 1.3.4