Search in sources :

Example 1 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class KMeansLocalClustererExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) {
    // IMPL NOTE based on KMeansDistributedClustererTestSingleNode#testClusterizationOnDatasetWithObviousStructure
    System.out.println(">>> K-means local clusterer example started.");
    int ptsCnt = 10000;
    DenseLocalOnHeapMatrix points = new DenseLocalOnHeapMatrix(ptsCnt, 2);
    DatasetWithObviousStructure dataset = new DatasetWithObviousStructure(10000);
    List<Vector> massCenters = dataset.generate(points);
    EuclideanDistance dist = new EuclideanDistance();
    OrderedNodesComparator comp = new OrderedNodesComparator(dataset.centers().values().toArray(new Vector[] {}), dist);
    massCenters.sort(comp);
    KMeansLocalClusterer clusterer = new KMeansLocalClusterer(dist, 100, 1L);
    KMeansModel mdl = clusterer.cluster(points, 4);
    Vector[] resCenters = mdl.centers();
    Arrays.sort(resCenters, comp);
    System.out.println("Mass centers:");
    massCenters.forEach(Tracer::showAscii);
    System.out.println("Cluster centers:");
    Arrays.asList(resCenters).forEach(Tracer::showAscii);
    System.out.println("\n>>> K-means local clusterer example completed.");
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KMeansModel(org.apache.ignite.ml.clustering.KMeansModel) Tracer(org.apache.ignite.ml.math.Tracer) DenseLocalOnHeapMatrix(org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix) Vector(org.apache.ignite.ml.math.Vector) KMeansLocalClusterer(org.apache.ignite.ml.clustering.KMeansLocalClusterer)

Example 2 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class KNNClassificationExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> kNN classification example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNClassificationExample.class.getSimpleName(), () -> {
            try {
                // Prepare path to read
                File file = IgniteUtils.resolveIgnitePath(KNN_IRIS_TXT);
                if (file == null)
                    throw new RuntimeException("Can't find file: " + KNN_IRIS_TXT);
                Path path = file.toPath();
                // Read dataset from file
                LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, true, false);
                // Random splitting of iris data as 70% train and 30% test datasets
                LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.3);
                System.out.println("\n>>> Amount of observations in train dataset " + split.train().rowSize());
                System.out.println("\n>>> Amount of observations in test dataset " + split.test().rowSize());
                LabeledDataset test = split.test();
                LabeledDataset train = split.train();
                KNNModel knnMdl = new KNNModel(5, new EuclideanDistance(), KNNStrategy.SIMPLE, train);
                // Clone labels
                final double[] labels = test.labels();
                // Save predicted classes to test dataset
                LabellingMachine.assignLabels(test, knnMdl);
                // Calculate amount of errors on test dataset
                int amountOfErrors = 0;
                for (int i = 0; i < test.rowSize(); i++) {
                    if (test.label(i) != labels[i])
                        amountOfErrors++;
                }
                System.out.println("\n>>> Absolute amount of errors " + amountOfErrors);
                System.out.println("\n>>> Accuracy " + amountOfErrors / (double) test.rowSize());
                // Build confusion matrix. See https://en.wikipedia.org/wiki/Confusion_matrix
                int[][] confusionMtx = { { 0, 0, 0 }, { 0, 0, 0 }, { 0, 0, 0 } };
                for (int i = 0; i < test.rowSize(); i++) {
                    int idx1 = (int) test.label(i);
                    int idx2 = (int) labels[i];
                    confusionMtx[idx1 - 1][idx2 - 1]++;
                }
                System.out.println("\n>>> Confusion matrix is " + Arrays.deepToString(confusionMtx));
                // Calculate precision, recall and F-metric for each class
                for (int i = 0; i < 3; i++) {
                    double precision = 0.0;
                    for (int j = 0; j < 3; j++) precision += confusionMtx[i][j];
                    precision = confusionMtx[i][i] / precision;
                    double clsLb = (double) (i + 1);
                    System.out.println("\n>>> Precision for class " + clsLb + " is " + precision);
                    double recall = 0.0;
                    for (int j = 0; j < 3; j++) recall += confusionMtx[j][i];
                    recall = confusionMtx[i][i] / recall;
                    System.out.println("\n>>> Recall for class " + clsLb + " is " + recall);
                    double fScore = 2 * precision * recall / (precision + recall);
                    System.out.println("\n>>> F-score for class " + clsLb + " is " + fScore);
                }
            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("\n>>> Unexpected exception, check resources: " + e);
            } finally {
                System.out.println("\n>>> kNN classification example completed.");
            }
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : Path(java.nio.file.Path) IOException(java.io.IOException) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) File(java.io.File)

Example 3 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class FuzzyCMeansExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> Fuzzy C-Means usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        // Start new Ignite thread.
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), FuzzyCMeansExample.class.getSimpleName(), () -> {
            // Distance measure that computes distance between two points.
            DistanceMeasure distanceMeasure = new EuclideanDistance();
            // "Fuzziness" - specific constant that is used in membership calculation (1.0+-eps ~ K-Means).
            double exponentialWeight = 2.0;
            // Condition that indicated when algorithm must stop.
            // In this example algorithm stops if memberships have changed insignificantly.
            BaseFuzzyCMeansClusterer.StopCondition stopCond = BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS;
            // Maximum difference between new and old membership values with which algorithm will continue to work.
            double maxDelta = 0.01;
            // The maximum number of FCM iterations.
            int maxIterations = 50;
            // Value that is used to initialize random numbers generator. You can choose it randomly.
            Long seed = null;
            // Number of steps of primary centers selection (more steps more candidates).
            int initializationSteps = 2;
            // Number of K-Means iteration that is used to choose required number of primary centers from candidates.
            int kMeansMaxIterations = 50;
            // Create new distributed clusterer with parameters described above.
            System.out.println(">>> Create new Distributed Fuzzy C-Means clusterer.");
            FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(distanceMeasure, exponentialWeight, stopCond, maxDelta, maxIterations, seed, initializationSteps, kMeansMaxIterations);
            // Create sample data.
            double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
            // Initialize matrix of data points. Each row contains one point.
            int rows = points.length;
            int cols = points[0].length;
            System.out.println(">>> Create the matrix that contains sample points.");
            SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(rows, cols, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
            // Store points into matrix.
            pntMatrix.assign(points);
            // Call clusterization method with some number of centers.
            // It returns model that can predict results for new points.
            System.out.println(">>> Perform clusterization.");
            int numCenters = 4;
            FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, numCenters);
            // You can also get centers of clusters that is computed by Fuzzy C-Means algorithm.
            Vector[] centers = mdl.centers();
            String res = ">>> Results:\n" + ">>> 1st center: " + centers[0].get(0) + " " + centers[0].get(1) + "\n" + ">>> 2nd center: " + centers[1].get(0) + " " + centers[1].get(1) + "\n" + ">>> 3rd center: " + centers[2].get(0) + " " + centers[2].get(1) + "\n" + ">>> 4th center: " + centers[3].get(0) + " " + centers[3].get(1) + "\n";
            System.out.println(res);
            pntMatrix.destroy();
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) FuzzyCMeansModel(org.apache.ignite.ml.clustering.FuzzyCMeansModel) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) BaseFuzzyCMeansClusterer(org.apache.ignite.ml.clustering.BaseFuzzyCMeansClusterer) Vector(org.apache.ignite.ml.math.Vector) FuzzyCMeansDistributedClusterer(org.apache.ignite.ml.clustering.FuzzyCMeansDistributedClusterer)

Example 4 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class LocalModelsTest method importExportKNNModelTest.

/**
 */
@Test
public void importExportKNNModelTest() throws IOException {
    executeModelTest(mdlFilePath -> {
        double[][] mtx = new double[][] { { 1.0, 1.0 }, { 1.0, 2.0 }, { 2.0, 1.0 }, { -1.0, -1.0 }, { -1.0, -2.0 }, { -2.0, -1.0 } };
        double[] lbs = new double[] { 1.0, 1.0, 1.0, 2.0, 2.0, 2.0 };
        LabeledDataset training = new LabeledDataset(mtx, lbs);
        KNNModel mdl = new KNNModel(3, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
        Exporter<KNNModelFormat, String> exporter = new FileExporter<>();
        mdl.saveModel(exporter, mdlFilePath);
        KNNModelFormat load = exporter.load(mdlFilePath);
        Assert.assertNotNull(load);
        KNNModel importedMdl = new KNNModel(load.getK(), load.getDistanceMeasure(), load.getStgy(), load.getTraining());
        Assert.assertTrue("", mdl.equals(importedMdl));
        return null;
    });
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNModelFormat(org.apache.ignite.ml.knn.models.KNNModelFormat) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Test(org.junit.Test)

Example 5 with EuclideanDistance

use of org.apache.ignite.ml.math.distances.EuclideanDistance in project ignite by apache.

the class FuzzyCMeansDistributedClustererTest method testTwoDimensionsLittleData.

/**
 * Test that algorithm gives correct results on a small sample - 4 centers on the plane.
 */
public void testTwoDimensionsLittleData() {
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    FuzzyCMeansDistributedClusterer clusterer = new FuzzyCMeansDistributedClusterer(new EuclideanDistance(), 2, BaseFuzzyCMeansClusterer.StopCondition.STABLE_MEMBERSHIPS, 0.01, 500, null, 2, 50);
    double[][] points = new double[][] { { -10, -10 }, { -9, -11 }, { -10, -9 }, { -11, -9 }, { 10, 10 }, { 9, 11 }, { 10, 9 }, { 11, 9 }, { -10, 10 }, { -9, 11 }, { -10, 9 }, { -11, 9 }, { 10, -10 }, { 9, -11 }, { 10, -9 }, { 11, -9 } };
    SparseDistributedMatrix pntMatrix = new SparseDistributedMatrix(16, 2, StorageConstants.ROW_STORAGE_MODE, StorageConstants.RANDOM_ACCESS_MODE);
    for (int i = 0; i < 16; i++) pntMatrix.setRow(i, points[i]);
    FuzzyCMeansModel mdl = clusterer.cluster(pntMatrix, 4);
    Vector[] centers = mdl.centers();
    Arrays.sort(centers, Comparator.comparing(vector -> Math.atan2(vector.get(1), vector.get(0))));
    DistanceMeasure measure = mdl.distanceMeasure();
    assertEquals(0, measure.compute(centers[0], new DenseLocalOnHeapVector(new double[] { -10, -10 })), 1);
    assertEquals(0, measure.compute(centers[1], new DenseLocalOnHeapVector(new double[] { 10, -10 })), 1);
    assertEquals(0, measure.compute(centers[2], new DenseLocalOnHeapVector(new double[] { 10, 10 })), 1);
    assertEquals(0, measure.compute(centers[3], new DenseLocalOnHeapVector(new double[] { -10, 10 })), 1);
    pntMatrix.destroy();
}
Also used : Arrays(java.util.Arrays) GridCommonAbstractTest(org.apache.ignite.testframework.junits.common.GridCommonAbstractTest) Vector(org.apache.ignite.ml.math.Vector) IgniteUtils(org.apache.ignite.internal.util.IgniteUtils) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Random(java.util.Random) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure) Ignite(org.apache.ignite.Ignite) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) Comparator(java.util.Comparator) StorageConstants(org.apache.ignite.ml.math.StorageConstants) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) SparseDistributedMatrix(org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) Vector(org.apache.ignite.ml.math.Vector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) DistanceMeasure(org.apache.ignite.ml.math.distances.DistanceMeasure)

Aggregations

EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)37 Vector (org.apache.ignite.ml.math.Vector)22 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)18 DenseLocalOnHeapMatrix (org.apache.ignite.ml.math.impls.matrix.DenseLocalOnHeapMatrix)14 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)13 DistanceMeasure (org.apache.ignite.ml.math.distances.DistanceMeasure)10 Test (org.junit.Test)10 KNNModel (org.apache.ignite.ml.knn.models.KNNModel)9 SparseDistributedMatrix (org.apache.ignite.ml.math.impls.matrix.SparseDistributedMatrix)9 ArrayList (java.util.ArrayList)6 IgniteThread (org.apache.ignite.thread.IgniteThread)6 Arrays (java.util.Arrays)5 Comparator (java.util.Comparator)5 Collections (java.util.Collections)4 Ignite (org.apache.ignite.Ignite)4 FuzzyCMeansModel (org.apache.ignite.ml.clustering.FuzzyCMeansModel)4 KNNMultipleLinearRegression (org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression)4 MathIllegalArgumentException (org.apache.ignite.ml.math.exceptions.MathIllegalArgumentException)4 SparseBlockDistributedVector (org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector)4 Assert.assertEquals (org.junit.Assert.assertEquals)4