Search in sources :

Example 6 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class StackingTest method testSimpleStack.

/**
 * Tests simple stack training.
 */
@Test
public void testSimpleStack() {
    StackedDatasetTrainer<Vector, Vector, Double, LinearRegressionModel, Double> trainer = new StackedDatasetTrainer<>();
    UpdatesStrategy<SmoothParametrized, SimpleGDParameterUpdate> updatesStgy = new UpdatesStrategy<>(new SimpleGDUpdateCalculator(0.2), SimpleGDParameterUpdate.SUM_LOCAL, SimpleGDParameterUpdate.AVG);
    MLPArchitecture arch = new MLPArchitecture(2).withAddedLayer(10, true, Activators.RELU).withAddedLayer(1, false, Activators.SIGMOID);
    MLPTrainer<SimpleGDParameterUpdate> trainer1 = new MLPTrainer<>(arch, LossFunctions.MSE, updatesStgy, 3000, 10, 50, 123L);
    // Convert model trainer to produce Vector -> Vector model
    DatasetTrainer<AdaptableDatasetModel<Vector, Vector, Matrix, Matrix, MultilayerPerceptron>, Double> mlpTrainer = AdaptableDatasetTrainer.of(trainer1).beforeTrainedModel((Vector v) -> new DenseMatrix(v.asArray(), 1)).afterTrainedModel((Matrix mtx) -> mtx.getRow(0)).withConvertedLabels(VectorUtils::num2Arr);
    final double factor = 3;
    StackedModel<Vector, Vector, Double, LinearRegressionModel> mdl = trainer.withAggregatorTrainer(new LinearRegressionLSQRTrainer().withConvertedLabels(x -> x * factor)).addTrainer(mlpTrainer).withAggregatorInputMerger(VectorUtils::concat).withSubmodelOutput2VectorConverter(IgniteFunction.identity()).withVector2SubmodelInputConverter(IgniteFunction.identity()).withOriginalFeaturesKept(IgniteFunction.identity()).withEnvironmentBuilder(TestUtils.testEnvBuilder()).fit(getCacheMock(xor), parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    assertEquals(0.0 * factor, mdl.predict(VectorUtils.of(0.0, 0.0)), 0.3);
    assertEquals(1.0 * factor, mdl.predict(VectorUtils.of(0.0, 1.0)), 0.3);
    assertEquals(1.0 * factor, mdl.predict(VectorUtils.of(1.0, 0.0)), 0.3);
    assertEquals(0.0 * factor, mdl.predict(VectorUtils.of(1.0, 1.0)), 0.3);
}
Also used : VectorUtils(org.apache.ignite.ml.math.primitives.vector.VectorUtils) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) LinearRegressionModel(org.apache.ignite.ml.regressions.linear.LinearRegressionModel) MLPArchitecture(org.apache.ignite.ml.nn.architecture.MLPArchitecture) MLPTrainer(org.apache.ignite.ml.nn.MLPTrainer) SimpleGDParameterUpdate(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDParameterUpdate) DenseMatrix(org.apache.ignite.ml.math.primitives.matrix.impl.DenseMatrix) LinearRegressionLSQRTrainer(org.apache.ignite.ml.regressions.linear.LinearRegressionLSQRTrainer) DenseMatrix(org.apache.ignite.ml.math.primitives.matrix.impl.DenseMatrix) Matrix(org.apache.ignite.ml.math.primitives.matrix.Matrix) SimpleGDUpdateCalculator(org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator) AdaptableDatasetModel(org.apache.ignite.ml.trainers.AdaptableDatasetModel) StackedDatasetTrainer(org.apache.ignite.ml.composition.stacking.StackedDatasetTrainer) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) SmoothParametrized(org.apache.ignite.ml.optimization.SmoothParametrized) UpdatesStrategy(org.apache.ignite.ml.nn.UpdatesStrategy) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 7 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class KNNClassificationTest method testBinaryClassificationFarPointsWithSimpleStrategy.

/**
 */
@Test
public void testBinaryClassificationFarPointsWithSimpleStrategy() {
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 10.0, 10.0, 1.0 });
    data.put(1, new double[] { 10.0, 20.0, 1.0 });
    data.put(2, new double[] { -1, -1, 1.0 });
    data.put(3, new double[] { -2, -2, 2.0 });
    data.put(4, new double[] { -1.0, -2.0, 2.0 });
    data.put(5, new double[] { -2.0, -1.0, 2.0 });
    KNNClassificationTrainer trainer = new KNNClassificationTrainer().withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(false);
    KNNClassificationModel knnMdl = trainer.fit(data, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    assertEquals(2.0, knnMdl.predict(VectorUtils.of(-1.01, -1.01)), 0);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) KNNClassificationTrainer(org.apache.ignite.ml.knn.classification.KNNClassificationTrainer) KNNClassificationModel(org.apache.ignite.ml.knn.classification.KNNClassificationModel) Test(org.junit.Test)

Example 8 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class KNNClassificationTest method testBinaryClassification.

/**
 */
@Test
public void testBinaryClassification() {
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 1.0, 1.0, 1.0 });
    data.put(1, new double[] { 1.0, 2.0, 1.0 });
    data.put(2, new double[] { 2.0, 1.0, 1.0 });
    data.put(3, new double[] { -1.0, -1.0, 2.0 });
    data.put(4, new double[] { -1.0, -2.0, 2.0 });
    data.put(5, new double[] { -2.0, -1.0, 2.0 });
    KNNClassificationTrainer trainer = new KNNClassificationTrainer().withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(false);
    KNNClassificationModel knnMdl = trainer.fit(data, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST));
    assertTrue(!knnMdl.toString().isEmpty());
    assertTrue(!knnMdl.toString(true).isEmpty());
    assertTrue(!knnMdl.toString(false).isEmpty());
    Vector firstVector = VectorUtils.of(2.0, 2.0);
    assertEquals(1.0, knnMdl.predict(firstVector), 0);
    Vector secondVector = VectorUtils.of(-2.0, -2.0);
    assertEquals(2.0, knnMdl.predict(secondVector), 0);
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) KNNClassificationTrainer(org.apache.ignite.ml.knn.classification.KNNClassificationTrainer) KNNClassificationModel(org.apache.ignite.ml.knn.classification.KNNClassificationModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector) Test(org.junit.Test)

Example 9 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class ANNClassificationTest method testUpdate.

/**
 */
@Test
public void testUpdate() {
    Map<Integer, double[]> cacheMock = new HashMap<>();
    for (int i = 0; i < twoClusters.length; i++) cacheMock.put(i, twoClusters[i]);
    ANNClassificationTrainer trainer = new ANNClassificationTrainer().withK(10).withMaxIterations(10).withEpsilon(1e-4).withDistance(new EuclideanDistance());
    ANNClassificationModel originalMdl = (ANNClassificationModel) trainer.fit(cacheMock, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST)).withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(false);
    ANNClassificationModel updatedOnSameDataset = (ANNClassificationModel) trainer.update(originalMdl, cacheMock, parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST)).withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(false);
    ANNClassificationModel updatedOnEmptyDataset = (ANNClassificationModel) trainer.update(originalMdl, new HashMap<>(), parts, new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.LAST)).withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(false);
    Assert.assertNotNull(updatedOnSameDataset.getCandidates());
    assertTrue(updatedOnSameDataset.toString().contains("weighted = [false]"));
    assertTrue(updatedOnSameDataset.toString(true).contains("weighted = [false]"));
    assertTrue(updatedOnSameDataset.toString(false).contains("weighted = [false]"));
    assertNotNull(updatedOnEmptyDataset.getCandidates());
    assertTrue(updatedOnEmptyDataset.toString().contains("weighted = [false]"));
    assertTrue(updatedOnEmptyDataset.toString(true).contains("weighted = [false]"));
    assertTrue(updatedOnEmptyDataset.toString(false).contains("weighted = [false]"));
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) ANNClassificationModel(org.apache.ignite.ml.knn.ann.ANNClassificationModel) ANNClassificationTrainer(org.apache.ignite.ml.knn.ann.ANNClassificationTrainer) TrainerTest(org.apache.ignite.ml.common.TrainerTest) Test(org.junit.Test)

Example 10 with DoubleArrayVectorizer

use of org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer in project ignite by apache.

the class KNNRegressionTest method testLongly.

/**
 */
private void testLongly(boolean weighted) {
    Map<Integer, double[]> data = new HashMap<>();
    data.put(0, new double[] { 60323, 83.0, 234289, 2356, 1590, 107608, 1947 });
    data.put(1, new double[] { 61122, 88.5, 259426, 2325, 1456, 108632, 1948 });
    data.put(2, new double[] { 60171, 88.2, 258054, 3682, 1616, 109773, 1949 });
    data.put(3, new double[] { 61187, 89.5, 284599, 3351, 1650, 110929, 1950 });
    data.put(4, new double[] { 63221, 96.2, 328975, 2099, 3099, 112075, 1951 });
    data.put(5, new double[] { 63639, 98.1, 346999, 1932, 3594, 113270, 1952 });
    data.put(6, new double[] { 64989, 99.0, 365385, 1870, 3547, 115094, 1953 });
    data.put(7, new double[] { 63761, 100.0, 363112, 3578, 3350, 116219, 1954 });
    data.put(8, new double[] { 66019, 101.2, 397469, 2904, 3048, 117388, 1955 });
    data.put(9, new double[] { 68169, 108.4, 442769, 2936, 2798, 120445, 1957 });
    data.put(10, new double[] { 66513, 110.8, 444546, 4681, 2637, 121950, 1958 });
    data.put(11, new double[] { 68655, 112.6, 482704, 3813, 2552, 123366, 1959 });
    data.put(12, new double[] { 69564, 114.2, 502601, 3931, 2514, 125368, 1960 });
    data.put(13, new double[] { 69331, 115.7, 518173, 4806, 2572, 127852, 1961 });
    data.put(14, new double[] { 70551, 116.9, 554894, 4007, 2827, 130081, 1962 });
    KNNRegressionTrainer trainer = new KNNRegressionTrainer().withK(3).withDistanceMeasure(new EuclideanDistance()).withWeighted(weighted);
    KNNRegressionModel knnMdl = trainer.fit(new LocalDatasetBuilder<>(data, parts), new DoubleArrayVectorizer<Integer>().labeled(Vectorizer.LabelCoordinate.FIRST));
    Vector vector = VectorUtils.of(104.6, 419180.0, 2822.0, 2857.0, 118734.0, 1956.0);
    assertNotNull(knnMdl.predict(vector));
    assertEquals(67857, knnMdl.predict(vector), 2000);
// Assert.assertTrue(knnMdl.toString().contains(stgy.name()));
// Assert.assertTrue(knnMdl.toString(true).contains(stgy.name()));
// Assert.assertTrue(knnMdl.toString(false).contains(stgy.name()));
}
Also used : EuclideanDistance(org.apache.ignite.ml.math.distances.EuclideanDistance) KNNRegressionTrainer(org.apache.ignite.ml.knn.regression.KNNRegressionTrainer) DoubleArrayVectorizer(org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer) HashMap(java.util.HashMap) KNNRegressionModel(org.apache.ignite.ml.knn.regression.KNNRegressionModel) Vector(org.apache.ignite.ml.math.primitives.vector.Vector)

Aggregations

DoubleArrayVectorizer (org.apache.ignite.ml.dataset.feature.extractor.impl.DoubleArrayVectorizer)30 Test (org.junit.Test)23 HashMap (java.util.HashMap)17 Vector (org.apache.ignite.ml.math.primitives.vector.Vector)14 TrainerTest (org.apache.ignite.ml.common.TrainerTest)11 EuclideanDistance (org.apache.ignite.ml.math.distances.EuclideanDistance)10 Ignite (org.apache.ignite.Ignite)5 RendezvousAffinityFunction (org.apache.ignite.cache.affinity.rendezvous.RendezvousAffinityFunction)5 CacheConfiguration (org.apache.ignite.configuration.CacheConfiguration)5 MeanAbsValueConvergenceCheckerFactory (org.apache.ignite.ml.composition.boosting.convergence.mean.MeanAbsValueConvergenceCheckerFactory)5 KNNClassificationModel (org.apache.ignite.ml.knn.classification.KNNClassificationModel)5 KNNClassificationTrainer (org.apache.ignite.ml.knn.classification.KNNClassificationTrainer)5 GDBModel (org.apache.ignite.ml.composition.boosting.GDBModel)4 GDBTrainer (org.apache.ignite.ml.composition.boosting.GDBTrainer)4 VectorUtils (org.apache.ignite.ml.math.primitives.vector.VectorUtils)4 SimpleGDUpdateCalculator (org.apache.ignite.ml.optimization.updatecalculators.SimpleGDUpdateCalculator)4 Path (java.nio.file.Path)3 Random (java.util.Random)3 KNNRegressionModel (org.apache.ignite.ml.knn.regression.KNNRegressionModel)3 KNNRegressionTrainer (org.apache.ignite.ml.knn.regression.KNNRegressionTrainer)3