Search in sources :

Example 1 with HopachablePAM

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM in project clusterMaker2 by RBVI.

the class HopachablePAMTest method testCollapse.

@Test
public void testCollapse() {
    Double[] data = { .9, .9, .8, .8, .4, .4, .5, .5, .1, .1, .0, .0 };
    int k = 3;
    CyMatrix mat = CyMatrixFactory.makeSmallMatrix(6, 2, data);
    HopachablePAM pam = new HopachablePAM(null, mat, DistanceMetric.CITYBLOCK);
    Clusters c1 = pam.cluster(k);
    Clusters c2 = pam.collapse(0, 1, c1);
    Clusters c3 = pam.collapse(1, 2, c1);
    Clusters c4 = pam.collapse(0, 2, c1);
    // check that the size has reduced
    --k;
    assertEquals(c2.getSizes().length, k);
    assertEquals(c3.getSizes().length, k);
    assertEquals(c4.getSizes().length, k);
    Clusters c5 = pam.collapse(0, 1, c2);
    --k;
    assertEquals(c5.getSizes().length, k);
}
Also used : CyMatrix(edu.ucsf.rbvi.clusterMaker2.internal.api.CyMatrix) Clusters(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters) Test(org.junit.Test)

Example 2 with HopachablePAM

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM in project clusterMaker2 by RBVI.

the class HopachablePAMTest method testSubset.

@Test
public void testSubset() {
    Double[] data = { .9, .9, .8, .8, .4, .4, .5, .5, .1, .1, .0, .0 };
    int k = 3;
    // new order
    int[] index = { 0, 1, 5, 2, 4, 3 };
    // expected results based on new order
    int[] ans = { 0, 0, 1, 2, 1, 2 };
    CyMatrix mat = CyMatrixFactory.makeSmallMatrix(6, 2, data);
    HopachablePAM pam = new HopachablePAM(null, mat, DistanceMetric.CITYBLOCK);
    // permute sample order
    Hopachable pamPermuted = pam.subset(index);
    Clusters c = pamPermuted.cluster(k);
    // the number of clusters should not change because it should always
    // return the specified number of clusters
    assertEquals(c.getNumberOfClusters(), k);
    // check that the clustering results match
    for (int i = 0; i < c.size(); ++i) {
        assertEquals(c.getClusterIndex(i), ans[i]);
    }
    // minor test case
    // subset the last 4 elements
    int[] subsetIndex = { 2, 3, 4, 5 };
    int[] subsetAns = { 0, 0, 1, 1 };
    int subsetK = 2;
    Hopachable pamSubset = pam.subset(subsetIndex);
    Clusters c2 = pamSubset.cluster(subsetK);
    // check number of clusters
    assertEquals(c2.getNumberOfClusters(), subsetK);
    // check cluster assignments
    for (int i = 0; i < c2.size(); ++i) {
        assertEquals(c2.getClusterIndex(i), subsetAns[i]);
    }
}
Also used : CyMatrix(edu.ucsf.rbvi.clusterMaker2.internal.api.CyMatrix) Clusters(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters) Hopachable(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.hopach.types.Hopachable) Test(org.junit.Test)

Example 3 with HopachablePAM

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM in project clusterMaker2 by RBVI.

the class HopachablePAMTest method testSplit.

@Test
public void testSplit() {
    Double[] data = { .2, .2, .8, .8, .82, .82, .4, .5, .5, .4, .15, .15, .81, .81, .14, .14, .45, .45 };
    int k = 3;
    int[] ans = { 0, 1, 1, 2, 2, 0, 1, 0, 2 };
    CyMatrix mat = CyMatrixFactory.makeSmallMatrix(9, 2, data);
    HopachablePAM pam = new HopachablePAM(null, mat, DistanceMetric.CITYBLOCK);
    pam.setParameters(9, 9, SplitCost.AVERAGE_SPLIT_SILHOUETTE, new MedianSummarizer());
    Clusters c = pam.split(false);
    // check that data are split into expected number of clusters
    assertEquals(c.getNumberOfClusters(), k);
    // check cluster assignments
    for (int i = 0; i < c.size(); ++i) {
        assertEquals(c.getClusterIndex(i), ans[i]);
    }
}
Also used : CyMatrix(edu.ucsf.rbvi.clusterMaker2.internal.api.CyMatrix) Clusters(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters) MedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MedianSummarizer) Test(org.junit.Test)

Example 4 with HopachablePAM

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM in project clusterMaker2 by RBVI.

the class RunHopachPAM method kcluster.

@Override
public int kcluster(int nClusters, int nIterations, CyMatrix matrix, DistanceMetric metric, int[] clusterId) {
    monitor.setProgress(0);
    Summarizer summarizer;
    PrimitiveSummarizer psummarizer;
    switch(summaryMethod) {
        case MEDIAN:
            summarizer = new MedianSummarizer();
            psummarizer = new PrimitiveMedianSummarizer();
            break;
        case MEAN:
        default:
            summarizer = new MeanSummarizer();
            psummarizer = new PrimitiveMeanSummarizer();
            break;
    }
    HopachablePAM partitioner = new HopachablePAM(network, matrix, metric);
    partitioner.setParameters(K, L, splitCost, summarizer);
    HopachPAM hopachPam = new HopachPAM(partitioner);
    hopachPam.setParameters(maxLevel, minCostReduction, forceInitSplit, psummarizer);
    Clusters c = hopachPam.run();
    // copy results into clusterId
    for (int i = 0; i < c.size(); ++i) {
        clusterId[i] = c.getClusterIndex(i);
    }
    return c.getNumberOfClusters();
}
Also used : PrimitiveMeanSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMeanSummarizer) MeanSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MeanSummarizer) PrimitiveMeanSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMeanSummarizer) PrimitiveSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveSummarizer) PrimitiveMedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMedianSummarizer) Clusters(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters) HopachablePAM(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM) PrimitiveMeanSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMeanSummarizer) MedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MedianSummarizer) PrimitiveSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveSummarizer) MeanSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MeanSummarizer) PrimitiveMedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMedianSummarizer) Summarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.Summarizer) MedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MedianSummarizer) PrimitiveMedianSummarizer(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMedianSummarizer)

Example 5 with HopachablePAM

use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM in project clusterMaker2 by RBVI.

the class Hopach method nextLevel.

/**
 * Attempt to split the next level.
 * @param level next level to split
 * @return convergence
 */
boolean nextLevel(int level) {
    // nextLevel can only be invoked for level >= 1
    if (level < 1) {
        throw new IllegalArgumentException("nextlevel can only be invoked for level >= 1");
    }
    // clusters in parent level
    Clusters prevSplit = splits.get(level - 1);
    int[][] partitions = prevSplit.getPartitions();
    int nClusters = prevSplit.getNumberOfClusters();
    double[][] segregations = partitioner.segregations(prevSplit);
    // flattened array of all cluster index for each partition
    int[] clusterIndex = new int[partitioner.size()];
    // j indexes the first element of each cluster (global index)
    int j = 0;
    // running total number of partitions
    int k = 0;
    // cost of each subsplit
    double[] costs = new double[nClusters];
    // Attempt to split each partition
    for (int i = 0; i < nClusters; ++i) {
        // neighbour is on the right unless current partition is the last partition
        boolean rightNeighbour = j < nClusters - 1;
        int[] partition = partitions[i];
        if (partition.length == 0) {
            // partition is empty (partitioner returned fewer partitions than requested)
            continue;
        }
        int neighbourIndex = rightNeighbour ? i + 1 : i - 1;
        // split partition
        // TODO cache sub-partitioner here and in MSS calculator
        // or, cache partition and split results
        Hopachable sub = partitioner.subset(partition);
        Clusters subsplit = sub.split(false);
        int subk = subsplit.getNumberOfClusters();
        if (subk > 1 && neighbourIndex >= 0) {
            int[][] subpartitions = subsplit.getPartitions();
            // create separation matrix for distance from each sub-cluster to neighbouring cluster
            // NB Pollard used medoid-separations for ordering initial level, but average-separations for subsequent levels (implemented here)
            // TODO Allow partitioner to handle different options of summarizing distance from new subclusters to neighbouring cluster?
            // e.g. Calculate distances between medoids instead of average distances
            // Consider: HopachablePAM will only only one type of separations (medoid-separations).
            double[] separations = new double[subpartitions.length];
            for (int c = 0; c < subpartitions.length; ++c) {
                // average distance across elements of sub-cluster
                double d = 0.0;
                for (int jj = 0; jj < subpartitions[c].length; ++jj) {
                    d += segregations[subpartitions[c][jj] + j][neighbourIndex];
                }
                separations[c] = d / subpartitions[c].length;
            }
            // order sub-clusters
            sortSplit(subsplit, separations, rightNeighbour);
        // NB  ordered labels now stores the index of the medoids based on local index
        }
        costs[i] = subsplit.getCost();
        // copy over new cluster index
        for (int jj = 0; jj < sub.size(); ++jj) {
            clusterIndex[partition[jj]] = subsplit.getClusterIndex(jj) + k;
        }
        j += sub.size();
        k += subk;
    }
    // store results for new level
    Clusters newSplit = new Clusters(clusterIndex, k, psummarizer.summarize(costs));
    // NB  now the orderedLabels store trivial labels...
    this.split = newSplit;
    this.splits.set(level, newSplit);
    // splitting has converged if k has not changed
    if (k == nClusters) {
        return true;
    }
    // splitting has converged if new split is final
    return splitIsFinal(newSplit);
}
Also used : Clusters(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters) Hopachable(edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.hopach.types.Hopachable)

Aggregations

Clusters (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters)5 CyMatrix (edu.ucsf.rbvi.clusterMaker2.internal.api.CyMatrix)3 Test (org.junit.Test)3 Hopachable (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.hopach.types.Hopachable)2 MedianSummarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MedianSummarizer)2 HopachablePAM (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.pam.HopachablePAM)1 MeanSummarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.MeanSummarizer)1 PrimitiveMeanSummarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMeanSummarizer)1 PrimitiveMedianSummarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveMedianSummarizer)1 PrimitiveSummarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.PrimitiveSummarizer)1 Summarizer (edu.ucsf.rbvi.clusterMaker2.internal.algorithms.numeric.Summarizer)1