use of cern.colt.matrix.tdouble.DoubleMatrix1D in project clusterMaker2 by RBVI.
the class ColtOps method columnSum2.
public double columnSum2(int column) {
DoubleMatrix1D colMat = getData().viewColumn(column);
double rSum2 = colMat.aggregate(DoubleFunctions.plus, DoubleFunctions.square);
return rSum2;
}
use of cern.colt.matrix.tdouble.DoubleMatrix1D in project clusterMaker2 by RBVI.
the class ColtOps method divideScalarColumn.
/**
* divide all cells in a column by a value. This is used
* primarily for normalization when the current sum
* of the column is already known.
* Note: does not update matrix min/max values.
*
* @param column the column we're dividing
* @param value to divide each cell in the column by
*/
public void divideScalarColumn(int column, double value) {
DoubleMatrix1D col = getData().viewColumn(column);
col.assign(new DoubleFunction() {
public double apply(double v) {
return v / value;
}
});
}
use of cern.colt.matrix.tdouble.DoubleMatrix1D in project clusterMaker2 by RBVI.
the class RunSCPS method getUMat.
// U constructed from top K Eigenvectors of L. After construction, each row of U is normalized to unit length.
public DoubleMatrix2D getUMat(DoubleMatrix2D eigenVect, int k) {
DoubleMatrix2D uMat;
IntArrayList indexList = new IntArrayList();
DoubleArrayList valueList = new DoubleArrayList();
// construct matrix U from first K eigenvectors (ordered in ascending value by eigenvalue in eigenVect so start with the k-to-last column)
uMat = eigenVect.viewPart(0, eigenVect.columns() - k, eigenVect.rows(), k);
// Normalize each row of matrix U to have unit length
for (int i = 0; i < uMat.columns(); i++) {
DoubleMatrix1D row = uMat.viewRow(i);
double rowLength = Math.pow(row.zDotProduct(row), .5);
row.getNonZeros(indexList, valueList);
// normalize each Nozero value in row
for (int j = 0; j < indexList.size(); j++) {
int index = indexList.get(j);
double value = valueList.get(j) / rowLength;
uMat.set(i, index, value);
}
}
return uMat;
}
use of cern.colt.matrix.tdouble.DoubleMatrix1D in project clusterMaker2 by RBVI.
the class RunSCPS method redistributeMaxCluster.
// Takes largest cluster obtained by Kmeans and redisributes some of its elements across the other clusters via Kurucz Algorithm
public int[] redistributeMaxCluster(int[] clusters, DoubleMatrix2D sMat, int k) {
int maxClusterID = -1;
int maxClusterSize = -1;
int maxClusterConnection = -1;
double maxClusterConnectionSize = -1;
IntArrayList indexList = new IntArrayList();
DoubleArrayList valueList = new DoubleArrayList();
// Array of cluster sizes
int[] clusterSizeArray = new int[k];
// array of redistributed clusters
int[] redistribClusters = new int[clusters.length];
// array summing edge connections from node in largest cluster to all other clusters
double[] clusterConnectionCount = new double[k];
for (int i = 0; i < clusterSizeArray.length; i++) clusterSizeArray[i] = 0;
// compute size of each cluster
for (int i = 0; i < clusters.length; i++) {
int clusterID = clusters[i];
clusterSizeArray[clusterID] += 1;
}
// find max cluster size and max cluster id
for (int i = 0; i < clusterSizeArray.length; i++) {
int clusterSize = clusterSizeArray[i];
if (clusterSize > maxClusterSize) {
maxClusterSize = clusterSize;
maxClusterID = i;
}
}
// run loop until no changes observed in cluster transfers
while (true) {
int transfer_count = 0;
// loop through SMat redistribute elements in largest cluster based on edge weight connectivity
for (int i = 0; i < clusters.length; i++) {
// node belongs to one of smaller clusters. Merely add existing cluster value to redistributed cluster array
if (clusters[i] != maxClusterID) {
redistribClusters[i] = clusters[i];
continue;
}
// index corresponds to element in main cluster. Count the cluster connections from node
for (int j = 0; j < k; j++) clusterConnectionCount[j] = 0;
maxClusterConnection = -1;
maxClusterConnectionSize = -1;
DoubleMatrix1D row = sMat.viewRow(i);
row.getNonZeros(indexList, valueList);
// loop through existing edges for node and record how many times the connection bridges each cluster
for (int j = 0; j < indexList.size(); j++) {
int connectingNode = indexList.get(j);
int connectingNodeCluster = clusters[connectingNode];
clusterConnectionCount[connectingNodeCluster] += valueList.get(j);
}
// loop through cluster connection counts and find cluster with greatest number of avg edge connections
for (int j = 0; j < k; j++) {
double avgConnectionSize = clusterConnectionCount[j] / (double) (clusterSizeArray[j] + 1);
if (maxClusterConnectionSize < avgConnectionSize) {
maxClusterConnectionSize = avgConnectionSize;
maxClusterConnection = j;
}
}
// update redistributed cluster array to reflect maxClusterConnection
redistribClusters[i] = maxClusterConnection;
if (clusters[i] != redistribClusters[i]) {
transfer_count++;
System.out.println("Node " + i + " moved from " + clusters[i] + " to " + redistribClusters[i]);
}
}
// transfer has occured, update clusters to equal redistrib clusters
if (transfer_count > 0) {
for (int i = 0; i < clusters.length; i++) if (clusters[i] != redistribClusters[i]) {
int clusterID = redistribClusters[i];
clusterSizeArray[maxClusterID]--;
clusterSizeArray[clusterID]++;
clusters[i] = redistribClusters[i];
}
System.out.println("Transfer Count " + transfer_count + " MaxClusterSize " + clusterSizeArray[maxClusterID]);
} else
// No transfer occured. Break out of loop
break;
}
return redistribClusters;
}
use of cern.colt.matrix.tdouble.DoubleMatrix1D in project clusterMaker2 by RBVI.
the class KCluster method getDistance.
// get Euclidian Distance between two rows of matrix. Use colt Euclidian Distance function
private static double getDistance(int row1_id, int row2_id, DoubleMatrix2D matrix, DoubleMatrix2D cdata) {
DoubleMatrix1D row1 = matrix.viewRow(row1_id);
DoubleMatrix1D row2 = cdata.viewRow(row2_id);
return DoubleStatistic.EUCLID.apply(row1, row2);
}
Aggregations