use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class HopachablePAM method collapse.
public Clusters collapse(int i, int j, Clusters clusters) {
// NB In Pollard's implementation, the choice of the new medoid probably does not change downstream results...
Clusters c = new Clusters(clusters);
c.merge(i, j);
// set new cost
switch(splitCost) {
case AVERAGE_SILHOUETTE:
c.setCost(1 - SilhouetteCalculator.silhouettes(this.segregations(c), c).getAverage(summarizer));
break;
case AVERAGE_SPLIT_SILHOUETTE:
default:
c.setCost(MSplitSilhouetteCalculator.averageSplitSilhouette(this, c, maxL, summarizer));
break;
}
return c;
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class HopachablePAMTest method testSplit.
@Test
public void testSplit() {
Double[] data = { .2, .2, .8, .8, .82, .82, .4, .5, .5, .4, .15, .15, .81, .81, .14, .14, .45, .45 };
int k = 3;
int[] ans = { 0, 1, 1, 2, 2, 0, 1, 0, 2 };
CyMatrix mat = CyMatrixFactory.makeSmallMatrix(9, 2, data);
HopachablePAM pam = new HopachablePAM(null, mat, DistanceMetric.CITYBLOCK);
pam.setParameters(9, 9, SplitCost.AVERAGE_SPLIT_SILHOUETTE, new MedianSummarizer());
Clusters c = pam.split(false);
// check that data are split into expected number of clusters
assertEquals(c.getNumberOfClusters(), k);
// check cluster assignments
for (int i = 0; i < c.size(); ++i) {
assertEquals(c.getClusterIndex(i), ans[i]);
}
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class PAMTest method testCluster.
@Test
public void testCluster() {
System.out.println("testCluster begin");
Double[] data = { .9, .9, .8, .8, .4, .4, .5, .5, .1, .1 };
int k = 3;
// {0, 0, 1, 1, 2, 2};
int[] ans = { 0, 0, 1, 1, 2 };
// BaseMatrix mat = new BaseMatrix(4, 2, data);
CyMatrix mat = CyMatrixFactory.makeSmallMatrix(4, 2, data);
PAM pam = new PAM(null, mat, DistanceMetric.CITYBLOCK);
Clusters c = pam.cluster(k);
System.out.println("testCluster end");
assertEquals(c.getNumberOfClusters(), k);
for (int i = 0; i < c.size(); ++i) {
System.out.println("c[" + i + "] = " + c.getClusterIndex(i));
}
for (int i = 0; i < c.size(); ++i) {
assertEquals(c.getClusterIndex(i), ans[i]);
}
// NB The current implementation fails the below test case:
// data = [.9, .9; .8, .8; .4, .4,; .5, .5; .1, .1]
// ans = [ 0, 0, 1, 1, 2]
//
// Instead, PAM.cluster(...) yields:
// res = [ 0, 0, 1, 2, 2]
//
// This discrepancy is due to existence of singleton clusters.
// During the build phase, element 2 is selected as a medoid, which precludes
// element 4 from becoming a medoid.
// During the swap phase, the current implementation failed to register
// the (2, 4) swap as a worthwhile swap, because the contribution to a potential
// swap is only calculated based on nonmedoids other than the candidate itself.
// There is no way to justify merging element 2 to cluster 1 and creating
// a new singleton cluster headed by element 4.
//
// In contrast, R's cluster::pam passes this test case.
// The algorithm therein likely differs from the one upon which the current
// implementation is based.
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class RunPAM method kcluster.
@Override
public int kcluster(int nClusters, int nIterations, CyMatrix matrix, DistanceMetric metric, int[] clusterId) {
PAM pam = new PAM(network, matrix, metric);
Clusters c = pam.cluster(nClusters);
// copy results into clusterId
for (int i = 0; i < c.size(); ++i) {
clusterId[i] = c.getClusterIndex(i);
}
return c.getNumberOfClusters();
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class MSplitSilhouetteCalculator method segregateByAverageSilhouette.
public static Clusters segregateByAverageSilhouette(Segregatable seg, int K, Summarizer summarizer) {
Clusters split = null;
int m = seg.size();
// bound K
if (K > m - 1) {
K = m - 1;
}
// maximize average silhouette
double avgSil = Double.NEGATIVE_INFINITY;
for (int k = 2; k <= K; ++k) {
Clusters clusters = seg.cluster(k);
Silhouettes sils = SilhouetteCalculator.silhouettes(seg.segregations(clusters), clusters);
double t = sils.getAverage(summarizer);
if (t > avgSil) {
avgSil = t;
split = clusters;
}
}
if (split != null) {
// replace classification cost by (1 - average silhouette)
split.setCost(1 - avgSil);
}
return split;
}
Aggregations