Search in sources :

Example 1 with KNNRegressionTrainer

use of org.apache.ignite.ml.knn.regression.KNNRegressionTrainer in project ignite by apache.

the class KNNRegressionExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> kNN regression over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.CLEARED_MACHINES);
            KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(5).withDistanceMeasure(new ManhattanDistance()).withIdxType(SpatialIndexType.BALL_TREE).withWeighted(true);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            KNNRegressionModel knnMdl = trainer.fit(ignite, dataCache, vectorizer);
            double rmse = Evaluator.evaluate(dataCache, knnMdl, vectorizer, new Rmse());
            System.out.println("\n>>> Rmse = " + rmse);
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) Rmse(org.apache.ignite.ml.selection.scoring.metric.regression.Rmse) Ignite(org.apache.ignite.Ignite) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 2 with KNNRegressionTrainer

use of org.apache.ignite.ml.knn.regression.KNNRegressionTrainer in project ignite by apache.

the class KNNRegressionTest method testLongly.

/**
 */
private void testLongly(boolean weighted) {
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 60323, 83.0, 234289, 2356, 1590, 107608, 1947 });
    data.put(1, new double[] { 61122, 88.5, 259426, 2325, 1456, 108632, 1948 });
    data.put(2, new double[] { 60171, 88.2, 258054, 3682, 1616, 109773, 1949 });
    data.put(3, new double[] { 61187, 89.5, 284599, 3351, 1650, 110929, 1950 });
    data.put(4, new double[] { 63221, 96.2, 328975, 2099, 3099, 112075, 1951 });
    data.put(5, new double[] { 63639, 98.1, 346999, 1932, 3594, 113270, 1952 });
    data.put(6, new double[] { 64989, 99.0, 365385, 1870, 3547, 115094, 1953 });
    data.put(7, new double[] { 63761, 100.0, 363112, 3578, 3350, 116219, 1954 });
    data.put(8, new double[] { 66019, 101.2, 397469, 2904, 3048, 117388, 1955 });
    data.put(9, new double[] { 68169, 108.4, 442769, 2936, 2798, 120445, 1957 });
    data.put(10, new double[] { 66513, 110.8, 444546, 4681, 2637, 121950, 1958 });
    data.put(11, new double[] { 68655, 112.6, 482704, 3813, 2552, 123366, 1959 });
    data.put(12, new double[] { 69564, 114.2, 502601, 3931, 2514, 125368, 1960 });
    data.put(13, new double[] { 69331, 115.7, 518173, 4806, 2572, 127852, 1961 });
    data.put(14, new double[] { 70551, 116.9, 554894, 4007, 2827, 130081, 1962 });
    KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(weighted);
    KNNRegressionModel knnMdl = trainer.fit(new LocalDatasetBuilder<>(data, parts), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
    Vector vector = VectorUtils.of(104.6, 419180.0, 2822.0, 2857.0, 118734.0, 1956.0);
    assertNotNull(knnMdl.predict(vector));
    assertEquals(67857, knnMdl.predict(vector), 2000);
// Assert.assertTrue(knnMdl.toString().contains(stgy.name()));
// Assert.assertTrue(knnMdl.toString(true).contains(stgy.name()));
// Assert.assertTrue(knnMdl.toString(false).contains(stgy.name()));
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Example 3 with KNNRegressionTrainer

use of org.apache.ignite.ml.knn.regression.KNNRegressionTrainer in project ignite by apache.

the class RegressionEvaluatorTest method testEvaluatorWithFilter.

/**
 * Test evaluator and trainer with test-train splitting.
 */
@Test
public void testEvaluatorWithFilter() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(0, VectorUtils.of(60323, 83.0, 234289, 2356, 1590, 107608, 1947));
    data.put(1, VectorUtils.of(61122, 88.5, 259426, 2325, 1456, 108632, 1948));
    data.put(2, VectorUtils.of(60171, 88.2, 258054, 3682, 1616, 109773, 1949));
    data.put(3, VectorUtils.of(61187, 89.5, 284599, 3351, 1650, 110929, 1950));
    data.put(4, VectorUtils.of(63221, 96.2, 328975, 2099, 3099, 112075, 1951));
    data.put(5, VectorUtils.of(63639, 98.1, 346999, 1932, 3594, 113270, 1952));
    data.put(6, VectorUtils.of(64989, 99.0, 365385, 1870, 3547, 115094, 1953));
    data.put(7, VectorUtils.of(63761, 100.0, 363112, 3578, 3350, 116219, 1954));
    data.put(8, VectorUtils.of(66019, 101.2, 397469, 2904, 3048, 117388, 1955));
    data.put(9, VectorUtils.of(68169, 108.4, 442769, 2936, 2798, 120445, 1957));
    data.put(10, VectorUtils.of(66513, 110.8, 444546, 4681, 2637, 121950, 1958));
    data.put(11, VectorUtils.of(68655, 112.6, 482704, 3813, 2552, 123366, 1959));
    data.put(12, VectorUtils.of(69564, 114.2, 502601, 3931, 2514, 125368, 1960));
    data.put(13, VectorUtils.of(69331, 115.7, 518173, 4806, 2572, 127852, 1961));
    data.put(14, VectorUtils.of(70551, 116.9, 554894, 4007, 2827, 130081, 1962));
    KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance());
    TrainTestSplit<Integer, Vector> split = new TrainTestDatasetSplitter<Integer, Vector>(new SHA256UniformMapper<>(new Random(0))).split(0.5);
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
    KNNRegressionModel mdl = trainer.fit(data, split.getTestFilter(), parts, vectorizer);
    double score = Evaluator.evaluate(new LocalDatasetBuilder<>(data, split.getTrainFilter(), parts), mdl, vectorizer, new Rss()).getSingle();
    assertEquals(4800164.444444457, score, 1e-4);
}
Also used : KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) SHA256UniformMapper(org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper) HashMap(java.util.HashMap) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) Rss(org.apache.ignite.ml.selection.scoring.metric.regression.Rss) Random(java.util.Random) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 4 with KNNRegressionTrainer

use of org.apache.ignite.ml.knn.regression.KNNRegressionTrainer in project ignite by apache.

the class RegressionMetricExample method main.

/**
 * Run example.
 */
public static void main(String[] args) throws IOException {
    System.out.println();
    System.out.println(">>> kNN regression over cached dataset usage example started.");
    // Start ignite grid.
    try (Ignite ignite = Ignition.start("examples/config/example-ignite.xml")) {
        System.out.println(">>> Ignite grid started.");
        IgniteCache<Integer, Vector> dataCache = null;
        try {
            dataCache = new SandboxMLCache(ignite).fillCacheWith(MLSandboxDatasets.CLEARED_MACHINES);
            KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(5).withDistanceMeasure(new ManhattanDistance()).withIdxType(SpatialIndexType.BALL_TREE).withWeighted(true);
            Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
            KNNRegressionModel knnMdl = trainer.fit(ignite, dataCache, vectorizer);
            double mae = Evaluator.evaluate(dataCache, knnMdl, vectorizer, MetricName.MAE);
            System.out.println("\n>>> Mae " + mae);
        } finally {
            if (dataCache != null)
                dataCache.destroy();
        }
    } finally {
        System.out.flush();
    }
}
Also used : SandboxMLCache(org.apache.ignite.examples.ml.util.SandboxMLCache) KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) Ignite(org.apache.ignite.Ignite) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) ManhattanDistance(org.apache.ignite.ml.math.distances.ManhattanDistance)

Example 5 with KNNRegressionTrainer

use of org.apache.ignite.ml.knn.regression.KNNRegressionTrainer in project ignite by apache.

the class RegressionEvaluatorTest method testEvaluatorWithoutFilter.

/**
 * Test evaluator and trainer.
 */
@Test
public void testEvaluatorWithoutFilter() {
    Map<Integer, Vector> data = new HashMap<>();
    data.put(0, VectorUtils.of(60323, 83.0, 234289, 2356, 1590, 107608, 1947));
    data.put(1, VectorUtils.of(61122, 88.5, 259426, 2325, 1456, 108632, 1948));
    data.put(2, VectorUtils.of(60171, 88.2, 258054, 3682, 1616, 109773, 1949));
    data.put(3, VectorUtils.of(61187, 89.5, 284599, 3351, 1650, 110929, 1950));
    data.put(4, VectorUtils.of(63221, 96.2, 328975, 2099, 3099, 112075, 1951));
    data.put(5, VectorUtils.of(63639, 98.1, 346999, 1932, 3594, 113270, 1952));
    data.put(6, VectorUtils.of(64989, 99.0, 365385, 1870, 3547, 115094, 1953));
    data.put(7, VectorUtils.of(63761, 100.0, 363112, 3578, 3350, 116219, 1954));
    data.put(8, VectorUtils.of(66019, 101.2, 397469, 2904, 3048, 117388, 1955));
    data.put(9, VectorUtils.of(68169, 108.4, 442769, 2936, 2798, 120445, 1957));
    data.put(10, VectorUtils.of(66513, 110.8, 444546, 4681, 2637, 121950, 1958));
    data.put(11, VectorUtils.of(68655, 112.6, 482704, 3813, 2552, 123366, 1959));
    data.put(12, VectorUtils.of(69564, 114.2, 502601, 3931, 2514, 125368, 1960));
    data.put(13, VectorUtils.of(69331, 115.7, 518173, 4806, 2572, 127852, 1961));
    data.put(14, VectorUtils.of(70551, 116.9, 554894, 4007, 2827, 130081, 1962));
    KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance());
    Vectorizer<Integer, Vector, Integer, Double> vectorizer = new DummyVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST);
    LocalDatasetBuilder<Integer, Vector> datasetBuilder = new LocalDatasetBuilder<>(data, parts);
    KNNRegressionModel mdl = trainer.fit(datasetBuilder, vectorizer);
    double score = Evaluator.evaluate(data, mdl, vectorizer, MetricName.RSS);
    assertEquals(5581012.666666679, score, 1e-4);
}
Also used : KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) HashMap(java.util.HashMap) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) LocalDatasetBuilder(org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Aggregations

KNNRegressionModel (org.apache.ignite.ml.knn.regression.KNNRegressionModel)7 KNNRegressionTrainer (org.apache.ignite.ml.knn.regression.KNNRegressionTrainer)7 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)6 HashMap (java.util.HashMap)5 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)5 TrainerTest (org.apache.ignite.ml.common.TrainerTest)4 Test (org.junit.Test)4 DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)3 Ignite (org.apache.ignite.Ignite)2 SandboxMLCache (org.apache.ignite.examples.ml.util.SandboxMLCache)2 LocalDatasetBuilder (org.apache.ignite.ml.dataset.impl.local.LocalDatasetBuilder)2 ManhattanDistance (org.apache.ignite.ml.math.distances.ManhattanDistance)2 Random (java.util.Random)1 Rmse (org.apache.ignite.ml.selection.scoring.metric.regression.Rmse)1 Rss (org.apache.ignite.ml.selection.scoring.metric.regression.Rss)1 SHA256UniformMapper (org.apache.ignite.ml.selection.split.mapper.SHA256UniformMapper)1