Search in sources :

Example 6 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class FuzzyCMeansExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> Fuzzy C-Means usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Start new Ignite thread.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), FuzzyCMeansExample.class.getSimpleName(), () -> {
            // Distance measure that computes distance between two points.
            DistanceMeasure distanceMeasure = new EuclideanDistance();
            // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
            double exponentialWeight = 2.0;
            // Condition that indicated when algorithm must stop.
            // In this example algorithm stops if memberships have changed insignificantly.
            BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
            // Maximum difference between new and old membership values with which algorithm will continue to work.
            double maxDelta = 0.01;
            // The maximum number of FCM iterations.
            int maxIterations = 50;
            // Value that is used to initialize random numbers generator. You can choose it randomly.
            Long seed = null;
            // Number of steps of primary centers selection (more steps more candidates).
            int initializationSteps = 2;
            // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
            int kMeansMaxIterations = 50;
            // Create new distributed clusterer with parameters described above.
            System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
            FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations);
            // Create sample data.
            double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
            // Initialize matrix of data points. Each row contains one point.
            int rows = points.length;
            int cols = points[0].length;
            System.out.println(">>> Create the matrix that contains sample points.");
            SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            // Store points into matrix.
            pntMatrix.assign(points);
            // Call clusterization method with some number of centers.
            // It returns model that can predict results for new points.
            System.out.println(">>> Perform clusterization.");
            int numCenters = 4;
            FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
            // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
            Vector[] centers = mdl.centers();
            String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
            System.out.println(res);
            pntMatrix.destroy();
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) BaseFuzzyCMeansClusterer(org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer) Vector(org.apache.ignite.ml.math.Vector) FuzzyCMeansDistributedClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)

Example 7 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsLittleData.

/**
 * Test that algorithm gives correct results on a small sample - 4 centers on the plane.
 */
public void testTwoDimensionsLittleData() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 500, null, 2, 50);
    double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
    SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(16, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    for (int i = 0; i < 16; i++) pntMatrix.setRow(i, points[i]);
    FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4);
    Vector[] centers = mdl.centers();
    Arrays.sort(centers, Comparator.comparing(vector -> Math.atan2(vector.get(1), vector.get(0))));
    DistanceMeasure measure = mdl.distanceMeasure();
    assertEquals(0, measure.compute(centers[0], new DenseLocalOnHeapVector(new double[] { -10, -10 })), 1);
    assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[] { 10, -10 })), 1);
    assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[] { 10, 10 })), 1);
    assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[] { -10, 10 })), 1);
    pntMatrix.destroy();
}
Also used : Arrays(java.util.Arrays) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Vector(org.apache.ignite.ml.math.Vector) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Random(java.util.Random) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) Ignite(org.apache.ignite.Ignite) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Comparator(java.util.Comparator) StorageConstants(org.apache.ignite.ml.math.StorageConstants) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Example 8 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method performRandomTest.

/**
 * Test given clusterer on points placed randomly around vertexes of a regular polygon.
 *
 * @param distributedClusterer Tested clusterer.
 * @param seed Seed for the random numbers generator.
 */
private void performRandomTest(FuzzyCMeansDistributedClusterer distributedClusterer, long seed) {
    final int minNumCenters = 2;
    final int maxNumCenters = 5;
    final double maxRadius = 1000;
    final int maxPoints = 1000;
    final int minPoints = 300;
    Random random = new Random(seed);
    int numCenters = random.nextInt(maxNumCenters - minNumCenters) + minNumCenters;
    double[][] centers = new double[numCenters][2];
    for (int i = 0; i < numCenters; i++) {
        double angle = Math.PI * 2.0 * i / numCenters;
        centers[i][0] = Math.cos(angle) * maxRadius;
        centers[i][1] = Math.sin(angle) * maxRadius;
    }
    int numPoints = minPoints + random.nextInt(maxPoints - minPoints);
    double[][] points = new double[numPoints][2];
    for (int i = 0; i < numPoints; i++) {
        int center = random.nextInt(numCenters);
        double randomDouble = random.nextDouble();
        double radius = randomDouble * randomDouble * maxRadius / 10;
        double angle = random.nextDouble() * Math.PI * 2.0;
        points[i][0] = centers[center][0] + Math.cos(angle) * radius;
        points[i][1] = centers[center][1] + Math.sin(angle) * radius;
    }
    SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(numPoints, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    for (int i = 0; i < numPoints; i++) pntMatrix.setRow(i, points[i]);
    FuzzyCMeansModel mdl = distributedClusterer.cluster(pntMatrix, numCenters);
    Vector[] computedCenters = mdl.centers();
    DistanceMeasure measure = mdl.distanceMeasure();
    int cntr = numCenters;
    for (int i = 0; i < numCenters; i++) {
        for (int j = 0; j < numCenters; j++) {
            if (measure.compute(computedCenters[i], new DenseLocalOnHeapVector(centers[j])) < 100) {
                cntr--;
                break;
            }
        }
    }
    assertEquals(0, cntr);
    pntMatrix.destroy();
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Random(java.util.Random) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Example 9 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class KMeansDistributedClustererTestSingleNode method testClusterizationOnDatasetWithObviousStructure.

/**
 */
public void testClusterizationOnDatasetWithObviousStructure() throws IOException {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    int ptsCnt = 10000;
    int squareSideLen = 10000;
    Random rnd = new Random(123456L);
    // Let centers be in the vertices of square.
    Map<Integer, Vector> centers = new HashMap<>();
    centers.put(100, new DenseLocalOnHeapVector(new double[] { 0.0, 0.0 }));
    centers.put(900, new DenseLocalOnHeapVector(new double[] { squareSideLen, 0.0 }));
    centers.put(3000, new DenseLocalOnHeapVector(new double[] { 0.0, squareSideLen }));
    centers.put(6000, new DenseLocalOnHeapVector(new double[] { squareSideLen, squareSideLen }));
    int centersCnt = centers.size();
    SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    List<Integer> permutation = IntStream.range(0, ptsCnt).boxed().collect(Collectors.toList());
    Collections.shuffle(permutation, rnd);
    Vector[] mc = new Vector[centersCnt];
    Arrays.fill(mc, VectorUtils.zeroes(2));
    int centIdx = 0;
    int totalCnt = 0;
    List<Vector> massCenters = new ArrayList<>();
    for (Integer count : centers.keySet()) {
        for (int i = 0; i < count; i++) {
            Vector pnt = new DenseLocalOnHeapVector(2).assign(centers.get(count));
            // Perturbate point on random value.
            pnt.map(val -> val + rnd.nextDouble() * squareSideLen / 100);
            mc[centIdx] = mc[centIdx].plus(pnt);
            points.assignRow(permutation.get(totalCnt), pnt);
            totalCnt++;
        }
        massCenters.add(mc[centIdx].times(1 / (double) count));
        centIdx++;
    }
    EuclideanDistance dist = new EuclideanDistance();
    OrderedNodesComparator comp = new OrderedNodesComparator(centers.values().toArray(new Vector[] {}), dist);
    massCenters.sort(comp);
    KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
    KMeansModel mdl = clusterer.cluster(points, 4);
    Vector[] resCenters = mdl.centers();
    Arrays.sort(resCenters, comp);
    checkIsInEpsilonNeighbourhood(resCenters, massCenters.toArray(new Vector[] {}), 30.0);
    points.destroy();
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Random(java.util.Random) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 10 with SparseDistributedMatrix

use of org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix in project ignite by apache.

the class IgniteFuzzyCMeansDistributedClustererBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
    // because we create ignite cache internally.
    IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {

        /**
         * {@inheritDoc}
         */
        @Override
        public void run() {
            // IMPL NOTE originally taken from FuzzyCMeansExample.
            // Distance measure that computes distance between two points.
            DistanceMeasure distanceMeasure = new EuclideanDistance();
            // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
            double exponentialWeight = 2.0;
            // Condition that indicated when algorithm must stop.
            // In this example algorithm stops if memberships have changed insignificantly.
            BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
            // Maximum difference between new and old membership values with which algorithm will continue to work.
            double maxDelta = 0.01;
            // The maximum number of FCM iterations.
            int maxIterations = 50;
            // Number of steps of primary centers selection (more steps more candidates).
            int initializationSteps = 2;
            // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
            int kMeansMaxIterations = 50;
            // Create new distributed clusterer with parameters described above.
            FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, null, initializationSteps, kMeansMaxIterations);
            // Create sample data.
            double[][] points = shuffle((int) (DataChanger.next()));
            // Initialize matrix of data points. Each row contains one point.
            int rows = points.length;
            int cols = points[0].length;
            // Create the matrix that contains sample points.
            SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            // Store points into matrix.
            pntMatrix.assign(points);
            // Call clusterization method with some number of centers.
            // It returns model that can predict results for new points.
            int numCenters = 4;
            FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
            // Get centers of clusters that is computed by Fuzzy C-Means algorithm.
            mdl.centers();
            pntMatrix.destroy();
        }
    });
    igniteThread.start();
    igniteThread.join();
    return true;
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) IgniteThread(org.apache.ignite.thread.IgniteThread) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) FuzzyCMeansDistributedClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)

Aggregations

SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)29 Vector (org.apache.ignite.ml.math.Vector)18 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)14 Random (java.util.Random)11 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)9 Ignite (org.apache.ignite.Ignite)8 IgniteThread (org.apache.ignite.thread.IgniteThread)8 HashMap (java.util.HashMap)7 List (java.util.List)7 Map (java.util.Map)7 Collectors (java.util.stream.Collectors)7 StorageConstants (org.apache.ignite.ml.math.StorageConstants)7 UUID (java.util.UUID)6 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)6 IgniteFunction (org.apache.ignite.ml.math.functions.IgniteFunction)6 SparseDistributedMatrixStorage (org.apache.ignite.ml.math.impls.storage.matrix.SparseDistributedMatrixStorage)6 Collections (java.util.Collections)5 LinkedList (java.util.LinkedList)5 DoubleStream (java.util.stream.DoubleStream)5 IgniteUtils (org.apache.ignite.internal.util.IgniteUtils)5