Search in sources :

Example 16 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class KMeansLocalClustererExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) {
    // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
    System.out.println(">>> K-means local clusterer example started.");
    int ptsCnt = 10000;
    DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
    DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
    List<Vector> massCenters = dataset.generate(points);
    EuclideanDistance dist = new EuclideanDistance();
    OrderedNodesComparator comp = new OrderedNodesComparator(dataset.centers().values().toArray(new Vector[] {}), dist);
    massCenters.sort(comp);
    KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
    KMeansModel mdl = clusterer.cluster(points, 4);
    Vector[] resCenters = mdl.centers();
    Arrays.sort(resCenters, comp);
    System.out.println("Mass centers:");
    massCenters.forEach(Tracer::showAscii);
    System.out.println("Cluster centers:");
    Arrays.asList(resCenters).forEach(Tracer::showAscii);
    System.out.println("\n>>> K-means local clusterer example completed.");
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansModel(org.apache.ignite.ml.clustering.KMeansModel) Tracer(org.apache.ignite.ml.math.Tracer) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Vector(org.apache.ignite.ml.math.Vector) KMeansLocalClusterer(org.apache.ignite.ml.clustering.KMeansLocalClusterer)

Example 17 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class KNNClassificationExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> kNN classification example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> {
            try {
                // Prepare path to read
                File file = IgniteUtils.resolveIgnitePath(KNN_IRIS_TXT);
                if (file == null)
                    throw new RuntimeException("Can't find file: " + KNN_IRIS_TXT);
                Path path = file.toPath();
                // Read dataset from file
                LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false);
                // Random splitting of iris data as 70% train and 30% test datasets
                LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
                System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize());
                System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize());
                LabeledDataset test = split.test();
                LabeledDataset train = split.train();
                KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
                // Clone labels
                final double[] labels = test.labels();
                // Save predicted classes to test dataset
                LabellingMachine.assignLabels(test, knnMdl);
                // Calculate amount of errors on test dataset
                int amountOfErrors = 0;
                for (int i = 0; i < test.rowSize(); i++) {
                    if (test.label(i) != labels[i])
                        amountOfErrors++;
                }
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + amountOfErrors / (double) test.rowSize());
                // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
                int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
                for (int i = 0; i < test.rowSize(); i++) {
                    int idx1 = (int) test.label(i);
                    int idx2 = (int) labels[i];
                    confusionMtx[idx1 - 1][idx2 - 1]++;
                }
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
                // Calculate precision, recall and F-metric for each class
                for (int i = 0; i < 3; i++) {
                    double precision = 0.0;
                    for (int j = 0; j < 3; j++) precision += confusionMtx[i][j];
                    precision = confusionMtx[i][i] / precision;
                    double clsLb = (double) (i + 1);
                    System.out.println("\n>>> Precision for class " + clsLb + " is " + precision);
                    double recall = 0.0;
                    for (int j = 0; j < 3; j++) recall += confusionMtx[j][i];
                    recall = confusionMtx[i][i] / recall;
                    System.out.println("\n>>> Recall for class " + clsLb + " is " + recall);
                    double fScore = 2 * precision * recall / (precision + recall);
                    System.out.println("\n>>> F-score for class " + clsLb + " is " + fScore);
                }
            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("\n>>> Unexpected exception, check resources: " + e);
            } finally {
                System.out.println("\n>>> kNN classification example completed.");
            }
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) File(java.io.File)

Example 18 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class FuzzyCMeansExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> Fuzzy C-Means usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Start new Ignite thread.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), FuzzyCMeansExample.class.getSimpleName(), () -> {
            // Distance measure that computes distance between two points.
            DistanceMeasure distanceMeasure = new EuclideanDistance();
            // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
            double exponentialWeight = 2.0;
            // Condition that indicated when algorithm must stop.
            // In this example algorithm stops if memberships have changed insignificantly.
            BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
            // Maximum difference between new and old membership values with which algorithm will continue to work.
            double maxDelta = 0.01;
            // The maximum number of FCM iterations.
            int maxIterations = 50;
            // Value that is used to initialize random numbers generator. You can choose it randomly.
            Long seed = null;
            // Number of steps of primary centers selection (more steps more candidates).
            int initializationSteps = 2;
            // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
            int kMeansMaxIterations = 50;
            // Create new distributed clusterer with parameters described above.
            System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
            FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations);
            // Create sample data.
            double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
            // Initialize matrix of data points. Each row contains one point.
            int rows = points.length;
            int cols = points[0].length;
            System.out.println(">>> Create the matrix that contains sample points.");
            SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            // Store points into matrix.
            pntMatrix.assign(points);
            // Call clusterization method with some number of centers.
            // It returns model that can predict results for new points.
            System.out.println(">>> Perform clusterization.");
            int numCenters = 4;
            FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
            // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
            Vector[] centers = mdl.centers();
            String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
            System.out.println(res);
            pntMatrix.destroy();
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) BaseFuzzyCMeansClusterer(org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer) Vector(org.apache.ignite.ml.math.Vector) FuzzyCMeansDistributedClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)

Example 19 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class KMeansDistributedClustererExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
    System.out.println(">>> K-means distributed clusterer example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
        // because we create ignite cache internally.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), SparseDistributedMatrixExample.class.getSimpleName(), () -> {
            int ptsCnt = 10000;
            SparseDistributedMatrix points = new SparseDistributedMatrix(ptsCnt, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
            List<Vector> massCenters = dataset.generate(points);
            EuclideanDistance dist = new EuclideanDistance();
            KMeansDistributedClusterer clusterer = new KMeansDistributedClusterer(dist, 3, 100, 1L);
            Vector[] resCenters = clusterer.cluster(points, 4).centers();
            System.out.println("Mass centers:");
            massCenters.forEach(Tracer::showAscii);
            System.out.println("Cluster centers:");
            Arrays.asList(resCenters).forEach(Tracer::showAscii);
            points.destroy();
            System.out.println("\n>>> K-means distributed clusterer example completed.");
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrixExample(org.apache.ignite.examples.ml.math.matrix.SparseDistributedMatrixExample) Tracer(org.apache.ignite.ml.math.Tracer) KMeansDistributedClusterer(org.apache.ignite.ml.clustering.KMeansDistributedClusterer) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) Vector(org.apache.ignite.ml.math.Vector)

Example 20 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class FuzzyCMeansLocalExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) {
    System.out.println(">>> Local Fuzzy C-Means usage example started.");
    // Distance measure that computes distance between two points.
    DistanceMeasure distanceMeasure = new EuclideanDistance();
    // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
    double exponentialWeight = 2.0;
    // Condition that indicated when algorithm must stop.
    // In this example algorithm stops if memberships have changed insignificantly.
    BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
    // Maximum difference between new and old membership values with which algorithm will continue to work.
    double maxDelta = 0.01;
    // The maximum number of FCM iterations.
    int maxIterations = 50;
    // Value that is used to initialize random numbers generator. You can choose it randomly.
    Long seed = null;
    // Create new distributed clusterer with parameters described above.
    System.out.println(">>> Create new Local Fuzzy C-Means clusterer.");
    FuzzyCMeansLocalClusterer clusterer = new FuzzyCMeansLocalClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed);
    // Create sample data.
    double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
    // Initialize matrix of data points. Each row contains one point.
    System.out.println(">>> Create the matrix that contains sample points.");
    // Store points into matrix.
    DenseLocalOnHeapMatrix pntMatrix = new DenseLocalOnHeapMatrix(points);
    // Call clusterization method with some number of centers.
    // It returns model that can predict results for new points.
    System.out.println(">>> Perform clusterization.");
    int numCenters = 4;
    FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
    // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
    Vector[] centers = mdl.centers();
    String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
    System.out.println(res);
}
Also used : DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) FuzzyCMeansLocalClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansLocalClusterer) BaseFuzzyCMeansClusterer(org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer) Vector(org.apache.ignite.ml.math.Vector)

Aggregations

EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)37 Vector (org.apache.ignite.ml.math.Vector)22 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)18 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)14 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)13 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)10 Test (org.junit.Test)10 KNNModel (org.apache.ignite.ml.knn.models.KNNModel)9 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)9 ArrayList (java.util.ArrayList)6 IgniteThread (org.apache.ignite.thread.IgniteThread)6 Arrays (java.util.Arrays)5 Comparator (java.util.Comparator)5 Collections (java.util.Collections)4 Ignite (org.apache.ignite.Ignite)4 FuzzyCMeansModel (org.apache.ignite.ml.clustering.FuzzyCMeansModel)4 KNNMultipleLinearRegression (org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression)4 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)4 SparseBlockDistributedVector (org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector)4 Assert.assertEquals (org.junit.Assert.assertEquals)4