use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.
the class Clustering method getClusters.
public static ClusterList getClusters(HashMap<String, double[]> docs) throws NumberFormatException, IOException {
// double maxdistance = 0.415; //meilleur seuil sur DS_SM
double maxdistance = 0.2;
DistanceFunction distanceFunction = new DistanceCorrelation();
HierarchicalClustering algo = new HierarchicalClustering();
List<ClusterWithMean> clusters = algo.runAlgorithm(docs, maxdistance, distanceFunction);
ClusterList clusterList = new ClusterList();
for (// For each cluster
ClusterWithMean clust : // For each cluster
algo.clusters) {
Cluster cluster = new Cluster();
for (// For each vector
DoubleArray vector : // For each vector
clust.getVectors()) {
for (Entry<String, double[]> doc : docs.entrySet()) {
if (Arrays.equals(doc.getValue(), vector.data)) {
DocVec docVec = new DocVec(doc.getKey(), doc.getValue());
cluster.add(docVec);
}
}
}
clusterList.add(cluster);
}
clusterList.updateCentroids();
clusterList.updateExemplars();
return clusterList;
}
use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.
the class HierarchicalClustering method mergeTheClosestCluster.
/**
* Merge the two closest clusters in terms of distance.
* @return true if a merge was done, otherwise false.
*/
private boolean mergeTheClosestCluster() {
// These variables will contain the two closest clusters that
// can be merged
ClusterWithMean clusterToMerge1 = null;
ClusterWithMean clusterToMerge2 = null;
double minClusterDistance = Integer.MAX_VALUE;
// by comparing all pairs of clusters i and j
for (int i = 0; i < clusters.size(); i++) {
for (int j = i + 1; j < clusters.size(); j++) {
// calculate the distance between i and j
double distance = distanceFunction.calculateDistance(clusters.get(i).getmean(), clusters.get(j).getmean());
// and if it is the smallest distance until now
if (distance < minClusterDistance && distance <= maxDistance) {
// record this pair of clusters
minClusterDistance = distance;
clusterToMerge1 = clusters.get(i);
clusterToMerge2 = clusters.get(j);
}
}
}
// if no close clusters were found, return false
if (clusterToMerge1 == null) {
return false;
}
// else, merge the two closest clusters
for (DoubleArray vector : clusterToMerge2.getVectors()) {
clusterToMerge1.addVector(vector);
}
// after mergint, we need to recompute the mean of the resulting cluster
clusterToMerge1.recomputeClusterMean();
// we delete the cluster that was merged
clusters.remove(clusterToMerge2);
// increase iteration count for statistics
iterationCount++;
return true;
}
use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.
the class HierarchicalClustering method runAlgorithm.
public List<ClusterWithMean> runAlgorithm(HashMap<String, double[]> docs, double maxDistance, DistanceFunction distanceFunction) throws NumberFormatException, IOException {
startTimestamp = System.currentTimeMillis();
this.maxDistance = maxDistance;
this.distanceFunction = distanceFunction;
// create an empty list of clusters
clusters = new ArrayList<ClusterWithMean>();
/**
**
* Add each vector to an individual cluster.
***
*/
for (Entry<String, double[]> doc : docs.entrySet()) {
double[] vector = doc.getValue();
// create a DoubleArray object with the vector
DoubleArray theVector = new DoubleArray(vector);
// Initiallly we create a cluster for each vector
ClusterWithMean cluster = new ClusterWithMean(vector.length);
cluster.addVector(theVector);
cluster.setMean(theVector.clone());
clusters.add(cluster);
}
// (2) Loop to combine the two closest clusters into a bigger cluster
// until no clusters can be combined.
boolean changed = false;
do {
// merge the two closest clusters
changed = mergeTheClosestCluster();
// record memory usage
MemoryLogger.getInstance().checkMemory();
} while (changed);
// record end time
endTimestamp = System.currentTimeMillis();
// return the clusters
return clusters;
}
Aggregations