|
Public Member Functions |
| | Combiner (int bins=10) |
| void | combineRedirectDestinationBuckets (const std::string &tmpPath) |
| void | combineBuckets (const std::string &outputPath, const std::string &tmpPath) |
| void | hashRedirectTargets (const std::string &bucketPath, const std::string &redirectsPath) |
| void | hashToBuckets (const std::string &bucketPath, const std::string &inputPath) |
| void | sortCorpusFiles (const std::string &outputPath, const std::string &preSortPath, const std::string &inputPath) |
Private Types |
typedef indri::utility::HashTable<
char *, url_entry *, strhash,
strcompst > | UrlEntryTable |
typedef indri::utility::HashTable<
char *, std::vector< url_entry * >,
strhash, strcompst > | UrlEntryVectorTable |
Private Member Functions |
| url_entry * | _newUrlEntry (const char *url, const char *corpusPath, const char *docNo) |
| void | _deleteUrlEntry (void *buffer) |
| void | _readLinks (UrlEntryTable &urlTable, std::ifstream &linkIn) |
| void | _readRedirects (UrlEntryTable &urlTable, const std::string &redirectPath, int number) |
| void | _writeCorpusTable (UrlEntryVectorTable &corpusTable, const std::string &outputPath) |
| void | _hashToCorpusTable (UrlEntryVectorTable &corpusTable, UrlEntryTable &urlTable) |
| void | _openWriteBuckets (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, const std::string &path, int bins) |
| void | _flushWriteBuffer (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, bool force, int i) |
| void | _flushWriteBuffers (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets, bool force) |
| void | _closeWriteBuckets (std::vector< std::stringstream * > &buffers, std::vector< std::ofstream * > &buckets) |
| void | _openReadBuckets (std::vector< std::ifstream * > &buckets, const std::string &path, int bins) |
| void | _readDocBucket (UrlEntryTable &urlTable, std::ifstream &docIn) |
| int | hashString (const char *str) |
| void | hashToBuckets (std::ifstream &in, const std::string &path) |
| void | createBuckets (const std::string &tmpPath) |
| void | closeBuckets () |
| void | combineBucket (const std::string &outputPath, const std::string &tmpPath, int bucket) |
| void | hashToBuckets (const std::string &inputPath) |
| void | combineRedirectDestinationBucket (const std::string &tmpPath, int i, std::vector< std::stringstream * > &outBuffers, std::vector< std::ofstream * > &outputFiles) |
Private Attributes |
| std::vector< std::ofstream * > | _docBucketFiles |
| std::vector< std::ofstream * > | _linkBucketFiles |
| std::vector< std::stringstream * > | _docBuckets |
| std::vector< std::stringstream * > | _linkBuckets |
| int | _bins |