Search in sources :

Example 1 with KNNMultipleLinearRegression

use of org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression in project ignite by apache.

the class KNNMultipleLinearRegressionTest method testLonglyWithWeightedStrategyAndNormalization.

/**
 */
public void testLonglyWithWeightedStrategyAndNormalization() {
    y = new double[] { 60323, 61122, 60171, 61187, 63221, 63639, 64989, 63761, 66019, 68169, 66513, 68655, 69564, 69331, 70551 };
    x = new double[15][];
    x[0] = new double[] { 83.0, 234289, 2356, 1590, 107608, 1947 };
    x[1] = new double[] { 88.5, 259426, 2325, 1456, 108632, 1948 };
    x[2] = new double[] { 88.2, 258054, 3682, 1616, 109773, 1949 };
    x[3] = new double[] { 89.5, 284599, 3351, 1650, 110929, 1950 };
    x[4] = new double[] { 96.2, 328975, 2099, 3099, 112075, 1951 };
    x[5] = new double[] { 98.1, 346999, 1932, 3594, 113270, 1952 };
    x[6] = new double[] { 99.0, 365385, 1870, 3547, 115094, 1953 };
    x[7] = new double[] { 100.0, 363112, 3578, 3350, 116219, 1954 };
    x[8] = new double[] { 101.2, 397469, 2904, 3048, 117388, 1955 };
    x[9] = new double[] { 108.4, 442769, 2936, 2798, 120445, 1957 };
    x[10] = new double[] { 110.8, 444546, 4681, 2637, 121950, 1958 };
    x[11] = new double[] { 112.6, 482704, 3813, 2552, 123366, 1959 };
    x[12] = new double[] { 114.2, 502601, 3931, 2514, 125368, 1960 };
    x[13] = new double[] { 115.7, 518173, 4806, 2572, 127852, 1961 };
    x[14] = new double[] { 116.9, 554894, 4007, 2827, 130081, 1962 };
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    LabeledDataset training = new LabeledDataset(x, y);
    final LabeledDataset normalizedTrainingDataset = (LabeledDataset) Normalizer.normalizeWithMiniMax(training);
    KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(5, new EuclideanDistance(), KNNStrategy.WEIGHTED, normalizedTrainingDataset);
    Vector vector = new DenseLocalOnHeapVector(new double[] { 104.6, 419180, 2822, 2857, 118734, 1956 });
    System.out.println(knnMdl.apply(vector));
    Assert.assertEquals(67857, knnMdl.apply(vector), 2000);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Vector(org.apache.ignite.ml.math.Vector) SparseBlockDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 2 with KNNMultipleLinearRegression

use of org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression in project ignite by apache.

the class KNNMultipleLinearRegressionTest method testLonglyWithNormalization.

/**
 */
public void testLonglyWithNormalization() {
    y = new double[] { 60323, 61122, 60171, 61187, 63221, 63639, 64989, 63761, 66019, 68169, 66513, 68655, 69564, 69331, 70551 };
    x = new double[15][];
    x[0] = new double[] { 83.0, 234289, 2356, 1590, 107608, 1947 };
    x[1] = new double[] { 88.5, 259426, 2325, 1456, 108632, 1948 };
    x[2] = new double[] { 88.2, 258054, 3682, 1616, 109773, 1949 };
    x[3] = new double[] { 89.5, 284599, 3351, 1650, 110929, 1950 };
    x[4] = new double[] { 96.2, 328975, 2099, 3099, 112075, 1951 };
    x[5] = new double[] { 98.1, 346999, 1932, 3594, 113270, 1952 };
    x[6] = new double[] { 99.0, 365385, 1870, 3547, 115094, 1953 };
    x[7] = new double[] { 100.0, 363112, 3578, 3350, 116219, 1954 };
    x[8] = new double[] { 101.2, 397469, 2904, 3048, 117388, 1955 };
    x[9] = new double[] { 108.4, 442769, 2936, 2798, 120445, 1957 };
    x[10] = new double[] { 110.8, 444546, 4681, 2637, 121950, 1958 };
    x[11] = new double[] { 112.6, 482704, 3813, 2552, 123366, 1959 };
    x[12] = new double[] { 114.2, 502601, 3931, 2514, 125368, 1960 };
    x[13] = new double[] { 115.7, 518173, 4806, 2572, 127852, 1961 };
    x[14] = new double[] { 116.9, 554894, 4007, 2827, 130081, 1962 };
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    LabeledDataset training = new LabeledDataset(x, y);
    final LabeledDataset normalizedTrainingDataset = (LabeledDataset) Normalizer.normalizeWithMiniMax(training);
    KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(5, new EuclideanDistance(), KNNStrategy.SIMPLE, normalizedTrainingDataset);
    Vector vector = new DenseLocalOnHeapVector(new double[] { 104.6, 419180, 2822, 2857, 118734, 1956 });
    System.out.println(knnMdl.apply(vector));
    Assert.assertEquals(67857, knnMdl.apply(vector), 2000);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Vector(org.apache.ignite.ml.math.Vector) SparseBlockDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Example 3 with KNNMultipleLinearRegression

use of org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression in project ignite by apache.

the class KNNRegressionExample method main.

/**
 * Executes example.
 *
 * @param args Command line arguments, none required.
 */
public static void main(String[] args) throws InterruptedException {
    System.out.println(">>> kNN regression example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), KNNRegressionExample.class.getSimpleName(), () -> {
            try {
                // Prepare path to read
                File file = IgniteUtils.resolveIgnitePath(KNN_CLEARED_MACHINES_TXT);
                if (file == null)
                    throw new RuntimeException("Can't find file: " + KNN_CLEARED_MACHINES_TXT);
                Path path = file.toPath();
                // Read dataset from file
                LabeledDataset dataset = LabeledDatasetLoader.loadFromTxtFile(path, SEPARATOR, false, false);
                // Normalize dataset
                Normalizer.normalizeWithMiniMax(dataset);
                // Random splitting of iris data as 80% train and 20% test datasets
                LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
                System.out.println("\n>>> Amount of observations in train dataset: " + split.train().rowSize());
                System.out.println("\n>>> Amount of observations in test dataset: " + split.test().rowSize());
                LabeledDataset test = split.test();
                LabeledDataset train = split.train();
                // Builds weighted kNN-regression with Manhattan Distance
                KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
                // Clone labels
                final double[] labels = test.labels();
                // Save predicted classes to test dataset
                LabellingMachine.assignLabels(test, knnMdl);
                // Calculate mean squared error (MSE)
                double mse = 0.0;
                for (int i = 0; i < test.rowSize(); i++) mse += Math.pow(test.label(i) - labels[i], 2.0);
                mse = mse / test.rowSize();
                System.out.println("\n>>> Mean squared error (MSE) " + mse);
                // Calculate mean absolute error (MAE)
                double mae = 0.0;
                for (int i = 0; i < test.rowSize(); i++) mae += Math.abs(test.label(i) - labels[i]);
                mae = mae / test.rowSize();
                System.out.println("\n>>> Mean absolute error (MAE) " + mae);
                // Calculate R^2 as 1 - RSS/TSS
                double avg = 0.0;
                for (int i = 0; i < test.rowSize(); i++) avg += test.label(i);
                avg = avg / test.rowSize();
                double detCf = 0.0;
                double tss = 0.0;
                for (int i = 0; i < test.rowSize(); i++) {
                    detCf += Math.pow(test.label(i) - labels[i], 2.0);
                    tss += Math.pow(test.label(i) - avg, 2.0);
                }
                detCf = 1 - detCf / tss;
                System.out.println("\n>>> R^2 " + detCf);
            } catch (IOException e) {
                e.printStackTrace();
                System.out.println("\n>>> Unexpected exception, check resources: " + e);
            } finally {
                System.out.println("\n>>> kNN regression example completed.");
            }
        });
        igniteThread.start();
        igniteThread.join();
    }
}
Also used : Path(java.nio.file.Path) KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) IOException(java.io.IOException) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) Ignite(org.apache.ignite.Ignite) IgniteThread(org.apache.ignite.thread.IgniteThread) File(java.io.File) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 4 with KNNMultipleLinearRegression

use of org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression in project ignite by apache.

the class IgniteKNNRegressionBenchmark method test.

/**
 * {@inheritDoc}
 */
@Override
public boolean test(Map<Object, Object> ctx) throws Exception {
    // Create IgniteThread, we must work with SparseDistributedMatrix inside IgniteThread
    // because we create ignite cache internally.
    IgniteThread igniteThread = new IgniteThread(ignite.configuration().getIgniteInstanceName(), this.getClass().getSimpleName(), new Runnable() {

        /**
         * {@inheritDoc}
         */
        @Override
        public void run() {
            // IMPL NOTE originally taken from KNNRegressionExample.
            // Obtain shuffled dataset.
            LabeledDataset dataset = new Datasets().shuffleClearedMachines((int) (DataChanger.next()));
            // Normalize dataset
            Normalizer.normalizeWithMiniMax(dataset);
            // Random splitting of iris data as 80% train and 20% test datasets.
            LabeledDatasetTestTrainPair split = new LabeledDatasetTestTrainPair(dataset, 0.2);
            LabeledDataset test = split.test();
            LabeledDataset train = split.train();
            // Builds weighted kNN-regression with Manhattan Distance.
            KNNModel knnMdl = new KNNMultipleLinearRegression(7, new ManhattanDistance(), KNNStrategy.WEIGHTED, train);
            // Clone labels
            final double[] labels = test.labels();
            // Calculate predicted classes.
            for (int i = 0; i < test.rowSize() - 1; i++) knnMdl.apply(test.getRow(i).features());
        }
    });
    igniteThread.start();
    igniteThread.join();
    return true;
}
Also used : KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) LabeledDatasetTestTrainPair(org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair) KNNModel(org.apache.ignite.ml.knn.models.KNNModel) IgniteThread(org.apache.ignite.thread.IgniteThread) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 5 with KNNMultipleLinearRegression

use of org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression in project ignite by apache.

the class KNNMultipleLinearRegressionTest method testSimpleRegressionWithOneNeighbour.

/**
 */
public void testSimpleRegressionWithOneNeighbour() {
    y = new double[] { 11.0, 12.0, 13.0, 14.0, 15.0, 16.0 };
    x = new double[6][];
    x[0] = new double[] { 0, 0, 0, 0, 0 };
    x[1] = new double[] { 2.0, 0, 0, 0, 0 };
    x[2] = new double[] { 0, 3.0, 0, 0, 0 };
    x[3] = new double[] { 0, 0, 4.0, 0, 0 };
    x[4] = new double[] { 0, 0, 0, 5.0, 0 };
    x[5] = new double[] { 0, 0, 0, 0, 6.0 };
    IgniteUtils.setCurrentIgniteName(ignite.configuration().getIgniteInstanceName());
    LabeledDataset training = new LabeledDataset(x, y);
    KNNMultipleLinearRegression knnMdl = new KNNMultipleLinearRegression(1, new EuclideanDistance(), KNNStrategy.SIMPLE, training);
    Vector vector = new SparseBlockDistributedVector(new double[] { 0, 0, 0, 5.0, 0.0 });
    System.out.println(knnMdl.apply(vector));
    Assert.assertEquals(15, knnMdl.apply(vector), 1E-12);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNMultipleLinearRegression(org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression) SparseBlockDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector) LabeledDataset(org.apache.ignite.ml.structures.LabeledDataset) Vector(org.apache.ignite.ml.math.Vector) SparseBlockDistributedVector(org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector) DenseLocalOnHeapVector(org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)

Aggregations

KNNMultipleLinearRegression (org.apache.ignite.ml.knn.regression.KNNMultipleLinearRegression)6 LabeledDataset (org.apache.ignite.ml.structures.LabeledDataset)6 Vector (org.apache.ignite.ml.math.Vector)4 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)4 DenseLocalOnHeapVector (org.apache.ignite.ml.math.impls.vector.DenseLocalOnHeapVector)4 SparseBlockDistributedVector (org.apache.ignite.ml.math.impls.vector.SparseBlockDistributedVector)4 ManhattanDistance (org.apache.ignite.ml.math.distances.ManhattanDistance)2 LabeledDatasetTestTrainPair (org.apache.ignite.ml.structures.LabeledDatasetTestTrainPair)2 IgniteThread (org.apache.ignite.thread.IgniteThread)2 File (java.io.File)1 IOException (java.io.IOException)1 Path (java.nio.file.Path)1 Ignite (org.apache.ignite.Ignite)1 KNNModel (org.apache.ignite.ml.knn.models.KNNModel)1