Search in sources :

Example 1 with DistanceMeasure

use of org.apache.ignite.ml.math.distances.DistanceMeasure in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsLittleData.

/**
 * Test that algorithm gives correct results on a small sample - 4 centers on the plane.
 */
public void testTwoDimensionsLittleData() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 500, null, 2, 50);
    double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
    SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(16, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    for (int i = 0; i < 16; i++) pntMatrix.setRow(i, points[i]);
    FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4);
    Vector[] centers = mdl.centers();
    Arrays.sort(centers, Comparator.comparing(vector -> Math.atan2(vector.get(1), vector.get(0))));
    DistanceMeasure measure = mdl.distanceMeasure();
    assertEquals(0, measure.compute(centers[0], new DenseLocalOnHeapVector(new double[] { -10, -10 })), 1);
    assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[] { 10, -10 })), 1);
    assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[] { 10, 10 })), 1);
    assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[] { -10, 10 })), 1);
    pntMatrix.destroy();
}
Also used : Arrays(java.util.Arrays) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Vector(org.apache.ignite.ml.math.Vector) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Random(java.util.Random) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) Ignite(org.apache.ignite.Ignite) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Comparator(java.util.Comparator) StorageConstants(org.apache.ignite.ml.math.StorageConstants) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Example 2 with DistanceMeasure

use of org.apache.ignite.ml.math.distances.DistanceMeasure in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsRandomlyPlacedPointsAndCenters.

/**
 * Perform N tests each of which contains M random points placed around K centers on the plane.
 */
public void testTwoDimensionsRandomlyPlacedPointsAndCenters() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    final int numOfTests = 5;
    final double exponentialWeight = 2.0;
    final double maxCentersDelta = 0.01;
    final int maxIterations = 500;
    final Long seed = 1L;
    DistanceMeasure measure = new EuclideanDistance();
    FuzzyCMeansDistributedClusterer distributedClusterer = new FuzzyCMeansDistributedClusterer(measure, exponentialWeight, BaseFuzzyCMeansClusterer.StopCondition.STABLE_CENTERS, maxCentersDelta, maxIterations, seed, 2, 50);
    for (int i = 0; i < numOfTests; i++) performRandomTest(distributedClusterer, i);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Example 3 with DistanceMeasure

use of org.apache.ignite.ml.math.distances.DistanceMeasure in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method performRandomTest.

/**
 * Test given clusterer on points placed randomly around vertexes of a regular polygon.
 *
 * @param distributedClusterer Tested clusterer.
 * @param seed Seed for the random numbers generator.
 */
private void performRandomTest(FuzzyCMeansDistributedClusterer distributedClusterer, long seed) {
    final int minNumCenters = 2;
    final int maxNumCenters = 5;
    final double maxRadius = 1000;
    final int maxPoints = 1000;
    final int minPoints = 300;
    Random random = new Random(seed);
    int numCenters = random.nextInt(maxNumCenters - minNumCenters) + minNumCenters;
    double[][] centers = new double[numCenters][2];
    for (int i = 0; i < numCenters; i++) {
        double angle = Math.PI * 2.0 * i / numCenters;
        centers[i][0] = Math.cos(angle) * maxRadius;
        centers[i][1] = Math.sin(angle) * maxRadius;
    }
    int numPoints = minPoints + random.nextInt(maxPoints - minPoints);
    double[][] points = new double[numPoints][2];
    for (int i = 0; i < numPoints; i++) {
        int center = random.nextInt(numCenters);
        double randomDouble = random.nextDouble();
        double radius = randomDouble * randomDouble * maxRadius / 10;
        double angle = random.nextDouble() * Math.PI * 2.0;
        points[i][0] = centers[center][0] + Math.cos(angle) * radius;
        points[i][1] = centers[center][1] + Math.sin(angle) * radius;
    }
    SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(numPoints, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    for (int i = 0; i < numPoints; i++) pntMatrix.setRow(i, points[i]);
    FuzzyCMeansModel mdl = distributedClusterer.cluster(pntMatrix, numCenters);
    Vector[] computedCenters = mdl.centers();
    DistanceMeasure measure = mdl.distanceMeasure();
    int cntr = numCenters;
    for (int i = 0; i < numCenters; i++) {
        for (int j = 0; j < numCenters; j++) {
            if (measure.compute(computedCenters[i], new DenseLocalOnHeapVector(centers[j])) < 100) {
                cntr--;
                break;
            }
        }
    }
    assertEquals(0, cntr);
    pntMatrix.destroy();
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Random(java.util.Random) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Example 4 with DistanceMeasure

use of org.apache.ignite.ml.math.distances.DistanceMeasure in project ignite by apache.

the class IgniteFuzzyCMeansDistributedClustererBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
    // because we create ignite cache internally.
    IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {

        /**
         * {@inheritDoc}
         */
        @Override
        public void run() {
            // IMPL NOTE originally taken from FuzzyCMeansExample.
            // Distance measure that computes distance between two points.
            DistanceMeasure distanceMeasure = new EuclideanDistance();
            // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
            double exponentialWeight = 2.0;
            // Condition that indicated when algorithm must stop.
            // In this example algorithm stops if memberships have changed insignificantly.
            BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
            // Maximum difference between new and old membership values with which algorithm will continue to work.
            double maxDelta = 0.01;
            // The maximum number of FCM iterations.
            int maxIterations = 50;
            // Number of steps of primary centers selection (more steps more candidates).
            int initializationSteps = 2;
            // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
            int kMeansMaxIterations = 50;
            // Create new distributed clusterer with parameters described above.
            FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, null, initializationSteps, kMeansMaxIterations);
            // Create sample data.
            double[][] points = shuffle((int) (DataChanger.next()));
            // Initialize matrix of data points. Each row contains one point.
            int rows = points.length;
            int cols = points[0].length;
            // Create the matrix that contains sample points.
            SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            // Store points into matrix.
            pntMatrix.assign(points);
            // Call clusterization method with some number of centers.
            // It returns model that can predict results for new points.
            int numCenters = 4;
            FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
            // Get centers of clusters that is computed by Fuzzy C-Means algorithm.
            mdl.centers();
            pntMatrix.destroy();
        }
    });
    igniteThread.start();
    igniteThread.join();
    return true;
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) IgniteThread(org.apache.ignite.thread.IgniteThread) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) FuzzyCMeansDistributedClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)

Example 5 with DistanceMeasure

use of org.apache.ignite.ml.math.distances.DistanceMeasure in project ignite by apache.

the class IgniteFuzzyCMeansLocalClustererBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // IMPL NOTE originally taken from FuzzyLocalCMeansExample.
    // Distance measure that computes distance between two points.
    DistanceMeasure distanceMeasure = new EuclideanDistance();
    // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
    double exponentialWeight = 2.0;
    // Condition that indicated when algorithm must stop.
    // In this example algorithm stops if memberships have changed insignificantly.
    BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
    // Maximum difference between new and old membership values with which algorithm will continue to work.
    double maxDelta = 0.01;
    // The maximum number of FCM iterations.
    int maxIterations = 50;
    // Create new local clusterer with parameters described above.
    FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, null);
    // Create sample data.
    double[][] points = shuffle((int) (DataChanger.next()));
    // Create the matrix that contains sample points.
    DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points);
    // Call clusterization method with some number of centers.
    // It returns model that can predict results for new points.
    int numCenters = 4;
    FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
    // Get centers of clusters that is computed by Fuzzy C-Means algorithm.
    mdl.centers();
    return true;
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) FuzzyCMeansLocalClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer) BaseFuzzyCMeansClusterer(org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Aggregations

DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)8 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)7 Vector (org.apache.ignite.ml.math.Vector)5 FuzzyCMeansModel (org.apache.ignite.ml.clustering.FuzzyCMeansModel)4 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)4 BaseFuzzyCMeansClusterer (org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer)3 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)3 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)3 Arrays (java.util.Arrays)2 Comparator (java.util.Comparator)2 Random (java.util.Random)2 Ignite (org.apache.ignite.Ignite)2 FuzzyCMeansDistributedClusterer (org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)2 FuzzyCMeansLocalClusterer (org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer)2 IgniteThread (org.apache.ignite.thread.IgniteThread)2 ArrayList (java.util.ArrayList)1 Collections (java.util.Collections)1 IgniteUtils (org.apache.ignite.internal.util.IgniteUtils)1 StorageConstants (org.apache.ignite.ml.math.StorageConstants)1 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)1