Main Page | Namespace List | Class Hierarchy | Class List | File List | Namespace Members | Class Members | File Members | Related Pages

OfflineCluster.hpp

Go to the documentation of this file.
00001 /*==========================================================================
00002  * Copyright (c) 2002 University of Massachusetts.  All Rights Reserved.
00003  *
00004  * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
00005  * is subject to the terms of the software license set forth in the LICENSE
00006  * file included with this software, and also available at
00007  * http://www.lemurproject.org/license.html
00008  *
00009  *==========================================================================
00010  */
00011 
00012 // David Fisher
00013 // init: 02/03/2003
00014 #ifndef _OFFLINECLUSTER_HPP
00015 #define _OFFLINECLUSTER_HPP
00016 #include <set>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "ClusterParam.hpp"
00020 #include "ClusterFactory.hpp"
00021 #include "SimFactory.hpp"
00022 namespace lemur 
00023 {
00024   namespace cluster
00025   {
00026     
00028     class OfflineCluster
00029     {
00030     public:
00032       OfflineCluster(const lemur::api::Index &ind, 
00033                      enum ClusterParam::simTypes simType = ClusterParam::COS,
00034                      enum ClusterParam::clusterTypes clusterType = ClusterParam::CENTROID,
00035                      enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00036 
00038       ~OfflineCluster();
00039 
00042       vector<Cluster*> *kMeans(vector<lemur::api::DOCID_T> docIds, 
00043                                int numParts = 2, int maxIters = 100);
00044 
00046       vector<Cluster*> *kMeans(Cluster *cluster, int numParts = 2, 
00047                                int maxIters = 100);
00048 
00051       vector<Cluster*> *bisecting_kMeans(vector<lemur::api::DOCID_T> docIds, 
00052                                          int numParts = 2, 
00053                                          int numIters = 5, int maxIters = 100);
00054 
00055     private:
00057       const SimilarityMethod *sim;
00059       ClusterFactory *factory;
00061       const lemur::api::Index &index;
00063       bool compareClusterSets(Cluster **, Cluster **, int n);
00065       vector <lemur::api::DOCID_T> selectSeeds(vector<lemur::api::DOCID_T> docIds, int num);
00067       Cluster *chooseSplit(vector<Cluster *> *working);  
00069       double scoreSet(vector<Cluster *> *working);
00070     };
00071   }
00072 }
00073 
00074 #endif

Generated on Tue Jun 15 11:02:54 2010 for Lemur by doxygen 1.3.4