use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class FuzzyCMeansExample method main.
/**
* Executes example.
*
* @param args Command line arguments, none required.
*/
public static void main(String[] args) throws InterruptedException {
System.out.println(">>> Fuzzy C-Means usage example started.");
// Start ignite grid.
try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
System.out.println(">>> Ignite grid started.");
// Start new Ignite thread.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), FuzzyCMeansExample.class.getSimpleName(), () -> {
// Distance measure that computes distance between two points.
DistanceMeasure distanceMeasure = new EuclideanDistance();
// "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
double exponentialWeight = 2.0;
// Condition that indicated when algorithm must stop.
// In this example algorithm stops if memberships have changed insignificantly.
BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
// Maximum difference between new and old membership values with which algorithm will continue to work.
double maxDelta = 0.01;
// The maximum number of FCM iterations.
int maxIterations = 50;
// Value that is used to initialize random numbers generator. You can choose it randomly.
Long seed = null;
// Number of steps of primary centers selection (more steps more candidates).
int initializationSteps = 2;
// Number of K-Means iteration that is used to choose required number of primary centers from candidates.
int kMeansMaxIterations = 50;
// Create new distributed clusterer with parameters described above.
System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations);
// Create sample data.
double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
// Initialize matrix of data points. Each row contains one point.
int rows = points.length;
int cols = points[0].length;
System.out.println(">>> Create the matrix that contains sample points.");
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
// Store points into matrix.
pntMatrix.assign(points);
// Call clusterization method with some number of centers.
// It returns model that can predict results for new points.
System.out.println(">>> Perform clusterization.");
int numCenters = 4;
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
// You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
Vector[] centers = mdl.centers();
String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
System.out.println(res);
pntMatrix.destroy();
});
igniteThread.start();
igniteThread.join();
}
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsLittleData.
/**
* Test that algorithm gives correct results on a small sample - 4 centers on the plane.
*/
public void testTwoDimensionsLittleData() {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 500, null, 2, 50);
double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(16, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
for (int i = 0; i < 16; i++) pntMatrix.setRow(i, points[i]);
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4);
Vector[] centers = mdl.centers();
Arrays.sort(centers, Comparator.comparing(vector -> Math.atan2(vector.get(1), vector.get(0))));
DistanceMeasure measure = mdl.distanceMeasure();
assertEquals(0, measure.compute(centers[0], new DenseLocalOnHeapVector(new double[] { -10, -10 })), 1);
assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[] { 10, -10 })), 1);
assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[] { 10, 10 })), 1);
assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[] { -10, 10 })), 1);
pntMatrix.destroy();
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class FuzzyCMeansDistributedClustererTest method performRandomTest.
/**
* Test given clusterer on points placed randomly around vertexes of a regular polygon.
*
* @param distributedClusterer Tested clusterer.
* @param seed Seed for the random numbers generator.
*/
private void performRandomTest(FuzzyCMeansDistributedClusterer distributedClusterer, long seed) {
final int minNumCenters = 2;
final int maxNumCenters = 5;
final double maxRadius = 1000;
final int maxPoints = 1000;
final int minPoints = 300;
Random random = new Random(seed);
int numCenters = random.nextInt(maxNumCenters - minNumCenters) + minNumCenters;
double[][] centers = new double[numCenters][2];
for (int i = 0; i < numCenters; i++) {
double angle = Math.PI * 2.0 * i / numCenters;
centers[i][0] = Math.cos(angle) * maxRadius;
centers[i][1] = Math.sin(angle) * maxRadius;
}
int numPoints = minPoints + random.nextInt(maxPoints - minPoints);
double[][] points = new double[numPoints][2];
for (int i = 0; i < numPoints; i++) {
int center = random.nextInt(numCenters);
double randomDouble = random.nextDouble();
double radius = randomDouble * randomDouble * maxRadius / 10;
double angle = random.nextDouble() * Math.PI * 2.0;
points[i][0] = centers[center][0] + Math.cos(angle) * radius;
points[i][1] = centers[center][1] + Math.sin(angle) * radius;
}
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(numPoints, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
for (int i = 0; i < numPoints; i++) pntMatrix.setRow(i, points[i]);
FuzzyCMeansModel mdl = distributedClusterer.cluster(pntMatrix, numCenters);
Vector[] computedCenters = mdl.centers();
DistanceMeasure measure = mdl.distanceMeasure();
int cntr = numCenters;
for (int i = 0; i < numCenters; i++) {
for (int j = 0; j < numCenters; j++) {
if (measure.compute(computedCenters[i], new DenseLocalOnHeapVector(centers[j])) < 100) {
cntr--;
break;
}
}
}
assertEquals(0, cntr);
pntMatrix.destroy();
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class KMeansDistributedClustererTestSingleNode method testClusterizationOnDatasetWithObviousStructure.
/**
*/
public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
int ptsCnt = 10000;
int squareSideLen = 10000;
Random rnd = new Random(123456L);
// Let centers be in the vertices of square.
Map<Integer, Vector> centers = new HashMap<>();
centers.put(100, new DenseLocalOnHeapVector(new double[] { 0.0, 0.0 }));
centers.put(900, new DenseLocalOnHeapVector(new double[] { squareSideLen, 0.0 }));
centers.put(3000, new DenseLocalOnHeapVector(new double[] { 0.0, squareSideLen }));
centers.put(6000, new DenseLocalOnHeapVector(new double[] { squareSideLen, squareSideLen }));
int centersCnt = centers.size();
SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
Collections.shuffle(permutation, rnd);
Vector[] mc = new Vector[centersCnt];
Arrays.fill(mc, VectorUtils.zeroes(2));
int centIdx = 0;
int totalCnt = 0;
List<Vector> massCenters = new ArrayList<>();
for (Integer count : centers.keySet()) {
for (int i = 0; i < count; i++) {
Vector pnt = new DenseLocalOnHeapVector(2).assign(centers.get(count));
// Perturbate point on random value.
pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
mc[centIdx] = mc[centIdx].plus(pnt);
points.assignRow(permutation.get(totalCnt), pnt);
totalCnt++;
}
massCenters.add(mc[centIdx].times(1 / (double) count));
centIdx++;
}
EuclideanDistance dist = new EuclideanDistance();
OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new Vector[] {}), dist);
massCenters.sort(comp);
KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
KMeansModel mdl = clusterer.cluster(points, 4);
Vector[] resCenters = mdl.centers();
Arrays.sort(resCenters, comp);
checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[] {}), 30.0);
points.destroy();
}
use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.
the class IgniteFuzzyCMeansDistributedClustererBenchmark method test.
/**
* {@inheritDoc}
*/
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
// Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
// because we create ignite cache internally.
IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {
/**
* {@inheritDoc}
*/
@Override
public void run() {
// IMPL NOTE originally taken from FuzzyCMeansExample.
// Distance measure that computes distance between two points.
DistanceMeasure distanceMeasure = new EuclideanDistance();
// "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
double exponentialWeight = 2.0;
// Condition that indicated when algorithm must stop.
// In this example algorithm stops if memberships have changed insignificantly.
BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
// Maximum difference between new and old membership values with which algorithm will continue to work.
double maxDelta = 0.01;
// The maximum number of FCM iterations.
int maxIterations = 50;
// Number of steps of primary centers selection (more steps more candidates).
int initializationSteps = 2;
// Number of K-Means iteration that is used to choose required number of primary centers from candidates.
int kMeansMaxIterations = 50;
// Create new distributed clusterer with parameters described above.
FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, null, initializationSteps, kMeansMaxIterations);
// Create sample data.
double[][] points = shuffle((int) (DataChanger.next()));
// Initialize matrix of data points. Each row contains one point.
int rows = points.length;
int cols = points[0].length;
// Create the matrix that contains sample points.
SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
// Store points into matrix.
pntMatrix.assign(points);
// Call clusterization method with some number of centers.
// It returns model that can predict results for new points.
int numCenters = 4;
FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
// Get centers of clusters that is computed by Fuzzy C-Means algorithm.
mdl.centers();
pntMatrix.destroy();
}
});
igniteThread.start();
igniteThread.join();
return true;
}
Aggregations