use of weka.core.NormalizableDistance in project iobserve-analysis by research-iobserve.
the class XMeansClustering method clusterSessionsWithXMeans.
/**
* @param instances
* data to cluster in Weka format
* @param numberOfUserGroupsFromInputUsageModel
* is the input number of clusters
* @param varianceOfUserGroups
* enables the creation of a minimum and maximum number of clusters
* @param seed
* states a random determination of the initial centroids
* @return the clustering results that contain the number of cluster and the assignments
*/
public ClusteringResults clusterSessionsWithXMeans(final Instances instances, final int numberOfUserGroupsFromInputUsageModel, final int varianceOfUserGroups, final int seed) {
ClusteringResults xMeansClusteringResults = null;
try {
final XMeans xmeans = new XMeans();
xmeans.setSeed(seed);
final NormalizableDistance manhattenDistance = new ManhattanDistance();
manhattenDistance.setDontNormalize(false);
manhattenDistance.setInstances(instances);
xmeans.setDistanceF(manhattenDistance);
int[] clustersize = null;
final int[] assignments = new int[instances.numInstances()];
// Determines the range of clusters
// The X-Means clustering algorithm determines the best fitting number of clusters
// within this range by itself
int numberOfClustersMin = numberOfUserGroupsFromInputUsageModel - varianceOfUserGroups;
int numberOfClustersMax = numberOfUserGroupsFromInputUsageModel + varianceOfUserGroups;
if (numberOfClustersMax < 2) {
numberOfClustersMax = 1;
numberOfClustersMin = 1;
} else {
if (numberOfClustersMin < 2) {
numberOfClustersMin = 2;
}
}
xmeans.setMinNumClusters(numberOfClustersMin);
xmeans.setMaxNumClusters(numberOfClustersMax);
xmeans.buildClusterer(instances);
clustersize = new int[xmeans.getClusterCenters().numInstances()];
for (int s = 0; s < instances.numInstances(); s++) {
assignments[s] = xmeans.clusterInstance(instances.instance(s));
clustersize[xmeans.clusterInstance(instances.instance(s))]++;
}
final ClusteringMetrics clusteringMetrics = new ClusteringMetrics(xmeans.getClusterCenters(), instances, assignments);
clusteringMetrics.calculateSimilarityMetrics();
xMeansClusteringResults = new ClusteringResults("X-Means", xmeans.getClusterCenters().numInstances(), assignments, clusteringMetrics);
} catch (final Exception e) {
// NOPMD NOCS due to broken xmeans implementation triggering
// Exception
e.printStackTrace();
}
return xMeansClusteringResults;
}
Aggregations