use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class PAMTest method testSingletonCluster.
@Test
public void testSingletonCluster() {
Double[] data = { .9, .9 };
int k = 1;
int[] ans = { 0 };
CyMatrix mat = CyMatrixFactory.makeSmallMatrix(1, 2, data);
PAM pam = new PAM(null, mat, DistanceMetric.CITYBLOCK);
Clusters c = pam.cluster(k);
assertEquals(c.getNumberOfClusters(), k);
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class PAMTest method testLarge.
@Test
public void testLarge() {
System.out.println("testLarge begin");
Double[] data;
int k = 8;
int[] ans = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 };
BufferedReader reader = new BufferedReader(new InputStreamReader(PAMTest.class.getResourceAsStream("/pam_data.txt")));
String line;
ArrayList<Double> vectors = new ArrayList<Double>();
int vectorWidth = 0;
try {
while ((line = reader.readLine()) != null) {
String[] vector = line.split("\t");
vectorWidth = vector.length;
for (String v : vector) vectors.add(Double.parseDouble(v));
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
data = new Double[vectors.size()];
data = vectors.toArray(data);
// BaseMatrix mat = new BaseMatrix(0, vectorWidth, data);
CyMatrix mat = CyMatrixFactory.makeSmallMatrix(data.length / vectorWidth, vectorWidth, data);
PAM pam = new PAM(null, mat, DistanceMetric.EUCLIDEAN);
Clusters c = pam.cluster(k);
System.out.println("testLarge end");
assertEquals(c.getNumberOfClusters(), k);
for (int i = 0; i < c.size(); ++i) {
assertEquals(c.getClusterIndex(i), ans[i]);
}
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class MSplitSilhouetteCalculator method splitByAverageSilhouette.
public static Clusters splitByAverageSilhouette(Segregatable seg, int K, boolean forceSplit, Summarizer summarizer) {
Clusters split = segregateByAverageSilhouette(seg, K, summarizer);
if (!forceSplit) {
// consider no split (k = 1)
if (split.getCost() >= 1) {
// cost >= 1 => average silhouette < 0 => no splitting is warranted
split = seg.cluster(1);
split.setCost(1.0);
}
}
return split;
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class MSplitSilhouetteCalculator method averageSilhouettes.
public static ArrayList<Double> averageSilhouettes(Subsegregatable sseg, Clusters clusters, int L, Summarizer summarizer) {
int K = clusters.getNumberOfClusters();
ArrayList<Double> splitSilhouettes = new ArrayList<Double>();
int[][] partitions = clusters.getPartitions();
// calculate the split silhouette of each cluster
for (int kk = 0; kk < K; ++kk) {
Clusters subclusters = segregateByAverageSilhouette(sseg.subset(partitions[kk]), L, summarizer);
if (subclusters != null) {
// cluster could be split further into subclusters
splitSilhouettes.add(1 - subclusters.getCost());
}
}
return splitSilhouettes;
}
use of edu.ucsf.rbvi.clusterMaker2.internal.algorithms.attributeClusterers.Clusters in project clusterMaker2 by RBVI.
the class getClusters method findClusters.
public void findClusters(boolean general) {
// used edges for DFS
boolean[] used = new boolean[nodes.length];
for (int i = 0; i < used.length; i++) used[i] = false;
List<List<double[]>> clust = new ArrayList<List<double[]>>();
List<List<String>> clustLabels = new ArrayList<List<String>>();
List<List<Integer>> clustIDs = new ArrayList<List<Integer>>();
List<List<Integer>> allNodes = new ArrayList<List<Integer>>();
for (int j = 0; j < edges.length; j++) {
if (edges[j][2] > maxDist)
maxDist = edges[j][2];
}
int max = 0;
// iterate across all rescaled SOM nodes
for (int i = 0; i < nodes.length; i++) {
// if node i has not been accessed, find all nodes connected to node i
if (!used[i])
connected(i);
else
// if node i has already been added to cluster, continue
continue;
// populate cluster arrays
List<double[]> allIndices = new ArrayList<double[]>();
List<String> allLabels = new ArrayList<String>();
List<Integer> allIDs = new ArrayList<Integer>();
List<Integer> indices = new ArrayList<Integer>();
for (int index : currClust) {
if (!general) {
// if TR clusters
double[] info = new double[nodes[index].getPoint().length];
for (int k = 0; k < info.length; k++) info[k] = nodes[index].getPoint()[k];
allIndices.add(info);
} else {
// If general numerical clusters
for (int h = 0; h < ids[index].size(); h++) {
double[] info = new double[nodes[index].getPoint().length + 1];
/* String lab = new StringTokenizer(labels[index].get(h).toString(),",").nextToken();
try{
int ID = Integer.parseInt(lab);
info[0] = ID;
}catch(NumberFormatException err) {info[0]=1;} */
// indices.add(index);
info[0] = index;
for (int k = 1; k < info.length; k++) info[k] = nodes[index].getPoint()[k - 1];
int id = ids[index].get(h).intValue();
String[] tokens = s.input[id].getDesc().split(",");
if (s.benchmark) {
int label = Integer.valueOf(tokens[0]);
// if clusters are known (benchmarking), how many are there?
if (label > max)
max = (int) label;
}
allIndices.add(info);
// allLabels.add(labels[index].get(h).toString());
allIDs.add(ids[index].get(h));
}
}
// node 'index' is now used
used[index] = true;
}
allNodes.add(indices);
// add node indices to current cluster
clust.add(allIndices);
// add labels to current cluster
clustLabels.add(allLabels);
clustIDs.add(allIDs);
currClust.clear();
}
clusters = new cluster[clust.size()];
for (int i = 0; i < clusters.length; i++) {
clusters[i] = new cluster(clust.get(i), clustLabels.get(i), clustIDs.get(i), allNodes.get(i));
if (s.benchmark) {
// if benchmarking, store original clusters
origClusters = (List<Integer>[]) Array.newInstance(List.class, max);
if (general) {
for (int j = 0; j < clusters[i].labels.size(); j++) {
String[] tokens = clusters[i].labels.get(j).toString().split(",");
int label = Integer.valueOf(tokens[0]);
if (origClusters[label - 1] == null)
origClusters[label - 1] = new ArrayList<Integer>();
origClusters[label - 1].add(label);
}
}
}
}
currClust.clear();
}
Aggregations