Search in sources :

Example 1 with ClusterWithMean

use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.

the class Clustering method getClusters.

public static ClusterList getClusters(HashMap<String, double[]> docs) throws NumberFormatException, IOException {
    // double maxdistance = 0.415; //meilleur seuil sur DS_SM
    double maxdistance = 0.2;
    DistanceFunction distanceFunction = new DistanceCorrelation();
    HierarchicalClustering algo = new HierarchicalClustering();
    List<ClusterWithMean> clusters = algo.runAlgorithm(docs, maxdistance, distanceFunction);
    ClusterList clusterList = new ClusterList();
    for (// For each cluster
    ClusterWithMean clust : // For each cluster
    algo.clusters) {
        Cluster cluster = new Cluster();
        for (// For each vector
        DoubleArray vector : // For each vector
        clust.getVectors()) {
            for (Entry<String, double[]> doc : docs.entrySet()) {
                if (Arrays.equals(doc.getValue(), vector.data)) {
                    DocVec docVec = new DocVec(doc.getKey(), doc.getValue());
                    cluster.add(docVec);
                }
            }
        }
        clusterList.add(cluster);
    }
    clusterList.updateCentroids();
    clusterList.updateExemplars();
    return clusterList;
}
Also used : ClusterWithMean(ca.pfv.spmf.patterns.cluster.ClusterWithMean) DistanceCorrelation(ca.pfv.spmf.algorithms.clustering.distanceFunctions.DistanceCorrelation) DoubleArray(ca.pfv.spmf.patterns.cluster.DoubleArray) DistanceFunction(ca.pfv.spmf.algorithms.clustering.distanceFunctions.DistanceFunction)

Example 2 with ClusterWithMean

use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.

the class HierarchicalClustering method mergeTheClosestCluster.

/**
 * Merge the two closest clusters in terms of distance.
 * @return true if a merge was done, otherwise false.
 */
private boolean mergeTheClosestCluster() {
    // These variables will contain the two closest clusters that
    // can be merged
    ClusterWithMean clusterToMerge1 = null;
    ClusterWithMean clusterToMerge2 = null;
    double minClusterDistance = Integer.MAX_VALUE;
    // by comparing all pairs of clusters i and j
    for (int i = 0; i < clusters.size(); i++) {
        for (int j = i + 1; j < clusters.size(); j++) {
            // calculate the distance between i and j
            double distance = distanceFunction.calculateDistance(clusters.get(i).getmean(), clusters.get(j).getmean());
            // and if it is the smallest distance until now
            if (distance < minClusterDistance && distance <= maxDistance) {
                // record this pair of clusters
                minClusterDistance = distance;
                clusterToMerge1 = clusters.get(i);
                clusterToMerge2 = clusters.get(j);
            }
        }
    }
    // if no close clusters were found, return false
    if (clusterToMerge1 == null) {
        return false;
    }
    // else, merge the two closest clusters
    for (DoubleArray vector : clusterToMerge2.getVectors()) {
        clusterToMerge1.addVector(vector);
    }
    // after mergint, we need to recompute the mean of the resulting cluster
    clusterToMerge1.recomputeClusterMean();
    // we delete the cluster that was merged
    clusters.remove(clusterToMerge2);
    // increase iteration count for statistics
    iterationCount++;
    return true;
}
Also used : ClusterWithMean(ca.pfv.spmf.patterns.cluster.ClusterWithMean) DoubleArray(ca.pfv.spmf.patterns.cluster.DoubleArray)

Example 3 with ClusterWithMean

use of ca.pfv.spmf.patterns.cluster.ClusterWithMean in project legato by DOREMUS-ANR.

the class HierarchicalClustering method runAlgorithm.

public List<ClusterWithMean> runAlgorithm(HashMap<String, double[]> docs, double maxDistance, DistanceFunction distanceFunction) throws NumberFormatException, IOException {
    startTimestamp = System.currentTimeMillis();
    this.maxDistance = maxDistance;
    this.distanceFunction = distanceFunction;
    // create an empty list of clusters
    clusters = new ArrayList<ClusterWithMean>();
    /**
     **
     * Add each vector to an individual cluster.
     ***
     */
    for (Entry<String, double[]> doc : docs.entrySet()) {
        double[] vector = doc.getValue();
        // create a DoubleArray object with the vector
        DoubleArray theVector = new DoubleArray(vector);
        // Initiallly we create a cluster for each vector
        ClusterWithMean cluster = new ClusterWithMean(vector.length);
        cluster.addVector(theVector);
        cluster.setMean(theVector.clone());
        clusters.add(cluster);
    }
    // (2) Loop to combine the two closest clusters into a bigger cluster
    // until no clusters can be combined.
    boolean changed = false;
    do {
        // merge the two closest clusters
        changed = mergeTheClosestCluster();
        // record memory usage
        MemoryLogger.getInstance().checkMemory();
    } while (changed);
    // record end time
    endTimestamp = System.currentTimeMillis();
    // return the clusters
    return clusters;
}
Also used : ClusterWithMean(ca.pfv.spmf.patterns.cluster.ClusterWithMean) DoubleArray(ca.pfv.spmf.patterns.cluster.DoubleArray)

Aggregations

ClusterWithMean (ca.pfv.spmf.patterns.cluster.ClusterWithMean)3 DoubleArray (ca.pfv.spmf.patterns.cluster.DoubleArray)3 DistanceCorrelation (ca.pfv.spmf.algorithms.clustering.distanceFunctions.DistanceCorrelation)1 DistanceFunction (ca.pfv.spmf.algorithms.clustering.distanceFunctions.DistanceFunction)1